{ "ae_mode": "token", "attn_implementation": null, "auto_map": { "AutoConfig": "modeling_clara.CLaRaConfig", "AutoModel": "modeling_clara.CLaRa" }, "compr_base_model_name": "/mnt/ceph_rbd/model/Mistral-7B-Instruct-v0.2", "compr_every_n_layer": null, "compr_linear_type": "concat", "compr_mlp_hidden_dim": 8096, "compr_model_name": null, "compr_n_layers": 5, "compr_rate": 16, "compr_rms_norm": false, "compr_use_mlp": false, "decoder_model_name": "/mnt/ceph_rbd/model/Mistral-7B-Instruct-v0.2", "device_map": null, "different_mem_tokens": true, "doc_max_length": 256, "generation_top_k": 5, "kbtc_training": false, "load_adapters": true, "load_pretrained_checkpoint": false, "lora": true, "lora_compressor": false, "lora_r": 16, "lora_r_compressor": 16, "max_new_tokens": 128, "model_type": "CLaRa", "optimize_mem_tokens": true, "pad_token_id": 2, "pure_inference": false, "quantization": "no", "sep": true, "stage2_retrieval_top_n": 1, "training_form": "both_separately", "training_stage": "stage1_2", "transformers_version": "4.53.3" }