klcsp
/

mistral7b-fft-coding-11-v1

Text Generation

Generated from Trainer

text-generation-inference

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

mistral7b-fft-coding-11-v1 / trainer_state.json

chansung's picture

Model save

27d277c verified 3 months ago

history blame contribute delete

4.11 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9968847352024922,
	"eval_steps": 500,
	"global_step": 80,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.012461059190031152,
	"grad_norm": 25.687093602481305,
	"learning_rate": 2.5e-06,
	"loss": 1.2485,
	"step": 1
	},
	{
	"epoch": 0.06230529595015576,
	"grad_norm": 15.094847025724023,
	"learning_rate": 1.25e-05,
	"loss": 1.1293,
	"step": 5
	},
	{
	"epoch": 0.12461059190031153,
	"grad_norm": 11.363800874975448,
	"learning_rate": 1.9961946980917457e-05,
	"loss": 0.9767,
	"step": 10
	},
	{
	"epoch": 0.18691588785046728,
	"grad_norm": 8.12136266435658,
	"learning_rate": 1.953716950748227e-05,
	"loss": 0.7746,
	"step": 15
	},
	{
	"epoch": 0.24922118380062305,
	"grad_norm": 7.927023181508759,
	"learning_rate": 1.866025403784439e-05,
	"loss": 0.6367,
	"step": 20
	},
	{
	"epoch": 0.3115264797507788,
	"grad_norm": 3.686503615552781,
	"learning_rate": 1.737277336810124e-05,
	"loss": 0.5175,
	"step": 25
	},
	{
	"epoch": 0.37383177570093457,
	"grad_norm": 3.6799656836763077,
	"learning_rate": 1.573576436351046e-05,
	"loss": 0.4736,
	"step": 30
	},
	{
	"epoch": 0.43613707165109034,
	"grad_norm": 1.824779614147191,
	"learning_rate": 1.3826834323650899e-05,
	"loss": 0.4468,
	"step": 35
	},
	{
	"epoch": 0.4984423676012461,
	"grad_norm": 1.6428127656511486,
	"learning_rate": 1.1736481776669307e-05,
	"loss": 0.4412,
	"step": 40
	},
	{
	"epoch": 0.5607476635514018,
	"grad_norm": 1.255700115951508,
	"learning_rate": 9.563806126346643e-06,
	"loss": 0.4362,
	"step": 45
	},
	{
	"epoch": 0.6230529595015576,
	"grad_norm": 1.1763215498923227,
	"learning_rate": 7.411809548974792e-06,
	"loss": 0.4261,
	"step": 50
	},
	{
	"epoch": 0.6853582554517134,
	"grad_norm": 1.0876919118710244,
	"learning_rate": 5.382513867649663e-06,
	"loss": 0.4159,
	"step": 55
	},
	{
	"epoch": 0.7476635514018691,
	"grad_norm": 1.0885014436037381,
	"learning_rate": 3.5721239031346067e-06,
	"loss": 0.4142,
	"step": 60
	},
	{
	"epoch": 0.8099688473520249,
	"grad_norm": 0.9605618048169687,
	"learning_rate": 2.0664665970876496e-06,
	"loss": 0.4068,
	"step": 65
	},
	{
	"epoch": 0.8722741433021807,
	"grad_norm": 0.9420340387839357,
	"learning_rate": 9.369221296335007e-07,
	"loss": 0.4026,
	"step": 70
	},
	{
	"epoch": 0.9345794392523364,
	"grad_norm": 0.8433707190106806,
	"learning_rate": 2.370399288006664e-07,
	"loss": 0.3984,
	"step": 75
	},
	{
	"epoch": 0.9968847352024922,
	"grad_norm": 0.8754132731808941,
	"learning_rate": 0.0,
	"loss": 0.399,
	"step": 80
	},
	{
	"epoch": 0.9968847352024922,
	"eval_loss": 1.1888436079025269,
	"eval_runtime": 0.7161,
	"eval_samples_per_second": 15.361,
	"eval_steps_per_second": 1.396,
	"step": 80
	},
	{
	"epoch": 0.9968847352024922,
	"step": 80,
	"total_flos": 33396055080960.0,
	"train_loss": 0.5449534490704536,
	"train_runtime": 879.8279,
	"train_samples_per_second": 46.577,
	"train_steps_per_second": 0.091
	}
	],
	"logging_steps": 5,
	"max_steps": 80,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": false,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 33396055080960.0,
	"train_batch_size": 16,
	"trial_name": null,
	"trial_params": null
	}