|
{ |
|
"architectures": [ |
|
"InstructCell" |
|
], |
|
"base_model_config": { |
|
"architectures": [ |
|
"T5ForConditionalGeneration" |
|
], |
|
"d_ff": 3072, |
|
"d_model": 768, |
|
"decoder_start_token_id": 0, |
|
"ignore_index": null, |
|
"model_type": "t5", |
|
"n_positions": 512, |
|
"num_decoder_layers": 12, |
|
"num_heads": 12, |
|
"num_layers": 12, |
|
"num_signal_tokens": 1, |
|
"output_past": true, |
|
"special_tokens_dict": { |
|
"end_tag": "</CELL>", |
|
"placeholder": "<CELL0>", |
|
"signal_tokens": [ |
|
"<CELL1>" |
|
], |
|
"start_tag": "<CELL>" |
|
}, |
|
"special_tokens_index_dict": { |
|
"end_tag": 32101, |
|
"first_signal_token": 32103, |
|
"placeholder": 32102, |
|
"start_tag": 32100 |
|
}, |
|
"task_specific_params": { |
|
"summarization": { |
|
"early_stopping": true, |
|
"length_penalty": 2.0, |
|
"max_length": 200, |
|
"min_length": 30, |
|
"no_repeat_ngram_size": 3, |
|
"num_beams": 4, |
|
"prefix": "summarize: " |
|
}, |
|
"translation_en_to_de": { |
|
"early_stopping": true, |
|
"max_length": 300, |
|
"num_beams": 4, |
|
"prefix": "translate English to German: " |
|
}, |
|
"translation_en_to_fr": { |
|
"early_stopping": true, |
|
"max_length": 300, |
|
"num_beams": 4, |
|
"prefix": "translate English to French: " |
|
}, |
|
"translation_en_to_ro": { |
|
"early_stopping": true, |
|
"max_length": 300, |
|
"num_beams": 4, |
|
"prefix": "translate English to Romanian: " |
|
} |
|
}, |
|
"vocab_size": 32112 |
|
}, |
|
"feature_decoder_config": { |
|
"adaptive_library": true, |
|
"condition_input_dim": 256, |
|
"dropout_rate": 0.1, |
|
"log_variational": true, |
|
"n_hidden": 1024, |
|
"n_latent": 256, |
|
"n_layers": 4, |
|
"use_batch_norm": "none", |
|
"use_layer_norm": "both" |
|
}, |
|
"feature_encoder_config": { |
|
"count_dim": 18961, |
|
"cross_attention_frequency": 1, |
|
"hidden_dropout_prob": 0.1, |
|
"is_q_former_encoder": true, |
|
"num_blocks": 3, |
|
"num_hidden_layers": 4, |
|
"num_key_value_tokens": 6, |
|
"num_query_tokens": 8 |
|
}, |
|
"log_variational": true, |
|
"model_type": "instructcell", |
|
"normalize_total": true, |
|
"tokenizer_use_fast": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.33.3" |
|
} |
|
|