Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/config.json +139 -0
- ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/model.safetensors +3 -0
- ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/training_args.bin +3 -0
- ivl3-1b_ss2_2_ce_aa_sr4_cbs/evaluation_metrics.json +314 -0
- ivl3-1b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759861301.bask-pg0309u03a.2090474.0 +3 -0
- ivl3-1b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_19-21-38_bask-pg0309u03a/events.out.tfevents.1759861364.bask-pg0309u03a.2090474.1 +3 -0
- ivl3-1b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_19-21-38_bask-pg0309u03a/events.out.tfevents.1759976906.bask-pg0309u03a.2090474.2 +3 -0
- ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/config.json +143 -0
- ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/model.safetensors +3 -0
- ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/training_args.bin +3 -0
- ivl3-2b_ss2_2_aa_sr4_cbs/evaluation_metrics.json +182 -0
- ivl3-2b_ss2_2_aa_sr4_cbs/events.out.tfevents.1759899360.bask-pg0309u36a.1966744.0 +3 -0
- ivl3-2b_ss2_2_aa_sr4_cbs/runs/Oct08_05-55-56_bask-pg0309u36a/events.out.tfevents.1759899430.bask-pg0309u36a.1966744.1 +3 -0
- ivl3-2b_ss2_2_aa_sr4_cbs/runs/Oct08_05-55-56_bask-pg0309u36a/events.out.tfevents.1759989193.bask-pg0309u36a.1966744.2 +3 -0
- ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/config.json +143 -0
- ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/model.safetensors +3 -0
- ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/training_args.bin +3 -0
- ivl3-2b_ss2_2_ce_aa_seq_cbs_1/evaluation_metrics.json +104 -0
- ivl3-2b_ss2_2_ce_aa_seq_cbs_1/events.out.tfevents.1760049897.bask-pg0309u36a.421581.0 +3 -0
- ivl3-2b_ss2_2_ce_aa_seq_cbs_1/runs/Oct09_23-44-53_bask-pg0309u36a/events.out.tfevents.1760049961.bask-pg0309u36a.421581.1 +3 -0
- ivl3-2b_ss2_2_ce_aa_seq_cbs_1/runs/Oct09_23-44-53_bask-pg0309u36a/events.out.tfevents.1760090134.bask-pg0309u36a.421581.2 +3 -0
- ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/config.json +143 -0
- ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/model.safetensors +3 -0
- ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/training_args.bin +3 -0
- ivl3-2b_ss2_2_ce_aa_sr4_cbs/evaluation_metrics.json +182 -0
- ivl3-2b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759877835.bask-pg0309u12a.1530010.0 +3 -0
- ivl3-2b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759878745.bask-pg0309u12a.1549446.0 +3 -0
- ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_23-57-11_bask-pg0309u12a/events.out.tfevents.1759877899.bask-pg0309u12a.1530010.1 +3 -0
- ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct08_00-12-21_bask-pg0309u12a/events.out.tfevents.1759878805.bask-pg0309u12a.1549446.1 +3 -0
- ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct08_00-12-21_bask-pg0309u12a/events.out.tfevents.1759967908.bask-pg0309u12a.1549446.2 +3 -0
- lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/config.json +143 -0
- lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/model.safetensors +3 -0
- lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/training_args.bin +3 -0
- lisa-ivl3-2b_s2_2_vlora_sr/evaluation_metrics.json +134 -0
- lisa-ivl3-2b_s2_2_vlora_sr/events.out.tfevents.1759802135.bask-pg0308u03a.2088475.0 +3 -0
- lisa-ivl3-2b_s2_2_vlora_sr/runs/Oct07_02-55-32_bask-pg0308u03a/events.out.tfevents.1759802218.bask-pg0308u03a.2088475.1 +3 -0
- lisa-ivl3-2b_s2_2_vlora_sr/runs/Oct07_02-55-32_bask-pg0308u03a/events.out.tfevents.1759828851.bask-pg0308u03a.2088475.2 +3 -0
- lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/config.json +143 -0
- lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/model.safetensors +3 -0
- lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/training_args.bin +3 -0
- lisa-ivl3-2b_ss2_2_ce_aa_sr4/evaluation_metrics.json +182 -0
- lisa-ivl3-2b_ss2_2_ce_aa_sr4/events.out.tfevents.1759803265.bask-pg0309u36a.301557.0 +3 -0
- lisa-ivl3-2b_ss2_2_ce_aa_sr4/runs/Oct07_03-14-21_bask-pg0309u36a/events.out.tfevents.1759803317.bask-pg0309u36a.301557.1 +3 -0
- lisa-ivl3-2b_ss2_2_ce_aa_sr4/runs/Oct07_03-14-21_bask-pg0309u36a/events.out.tfevents.1759893368.bask-pg0309u36a.301557.2 +3 -0
- lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/config.json +143 -0
- lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/model.safetensors +3 -0
- lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/training_args.bin +3 -0
- lisa-ivl3-2b_ss2_2_ce_vlora_sr/evaluation_metrics.json +134 -0
- lisa-ivl3-2b_ss2_2_ce_vlora_sr/events.out.tfevents.1759802625.bask-pg0309u03a.1084672.0 +3 -0
- lisa-ivl3-2b_ss2_2_ce_vlora_sr/events.out.tfevents.1759802958.bask-pg0309u36a.294001.0 +3 -0
ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 896,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 896,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 4864,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention"
|
| 56 |
+
],
|
| 57 |
+
"max_position_embeddings": 32768,
|
| 58 |
+
"max_window_layers": 70,
|
| 59 |
+
"model_type": "qwen2",
|
| 60 |
+
"moe_config": null,
|
| 61 |
+
"num_attention_heads": 14,
|
| 62 |
+
"num_hidden_layers": 24,
|
| 63 |
+
"num_key_value_heads": 2,
|
| 64 |
+
"rms_norm_eps": 1e-06,
|
| 65 |
+
"rope_scaling": {
|
| 66 |
+
"factor": 2.0,
|
| 67 |
+
"rope_type": "dynamic",
|
| 68 |
+
"type": "dynamic"
|
| 69 |
+
},
|
| 70 |
+
"rope_theta": 1000000.0,
|
| 71 |
+
"sliding_window": null,
|
| 72 |
+
"use_bfloat16": true,
|
| 73 |
+
"use_cache": false,
|
| 74 |
+
"use_sliding_window": false,
|
| 75 |
+
"vocab_size": 151676
|
| 76 |
+
},
|
| 77 |
+
"max_dynamic_patch": 12,
|
| 78 |
+
"min_dynamic_patch": 1,
|
| 79 |
+
"model_type": "internvl_chat",
|
| 80 |
+
"output_attentions": false,
|
| 81 |
+
"pad2square": false,
|
| 82 |
+
"pad_token_id": 151643,
|
| 83 |
+
"ps_version": "v2",
|
| 84 |
+
"select_layer": -1,
|
| 85 |
+
"system_message": null,
|
| 86 |
+
"template": "internvl2_5",
|
| 87 |
+
"tie_word_embeddings": false,
|
| 88 |
+
"transformers_version": null,
|
| 89 |
+
"use_backbone_lora": 0,
|
| 90 |
+
"use_llm_lora": 0,
|
| 91 |
+
"use_thumbnail": true,
|
| 92 |
+
"vision_config": {
|
| 93 |
+
"_attn_implementation_autoset": true,
|
| 94 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 95 |
+
"architectures": [
|
| 96 |
+
"InternVisionModel"
|
| 97 |
+
],
|
| 98 |
+
"attention_dropout": 0.0,
|
| 99 |
+
"auto_map": {
|
| 100 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 101 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 102 |
+
},
|
| 103 |
+
"capacity_factor": 1.2,
|
| 104 |
+
"drop_path_rate": 0.1,
|
| 105 |
+
"dropout": 0.0,
|
| 106 |
+
"dtype": "bfloat16",
|
| 107 |
+
"eval_capacity_factor": 1.4,
|
| 108 |
+
"hidden_act": "gelu",
|
| 109 |
+
"hidden_size": 1024,
|
| 110 |
+
"image_size": 448,
|
| 111 |
+
"initializer_factor": 0.1,
|
| 112 |
+
"initializer_range": 1e-10,
|
| 113 |
+
"intermediate_size": 4096,
|
| 114 |
+
"laux_allreduce": "all_nodes",
|
| 115 |
+
"layer_norm_eps": 1e-06,
|
| 116 |
+
"model_type": "intern_vit_6b",
|
| 117 |
+
"moe_coeff_ratio": 0.5,
|
| 118 |
+
"moe_intermediate_size": 768,
|
| 119 |
+
"moe_output_scale": 4.0,
|
| 120 |
+
"noisy_gate_policy": "RSample_before",
|
| 121 |
+
"norm_type": "layer_norm",
|
| 122 |
+
"num_attention_heads": 16,
|
| 123 |
+
"num_channels": 3,
|
| 124 |
+
"num_experts": 8,
|
| 125 |
+
"num_hidden_layers": 24,
|
| 126 |
+
"num_routed_experts": 4,
|
| 127 |
+
"num_shared_experts": 4,
|
| 128 |
+
"patch_size": 14,
|
| 129 |
+
"qk_normalization": false,
|
| 130 |
+
"qkv_bias": true,
|
| 131 |
+
"shared_expert_intermediate_size": 3072,
|
| 132 |
+
"use_bfloat16": true,
|
| 133 |
+
"use_flash_attn": true,
|
| 134 |
+
"use_moe": false,
|
| 135 |
+
"use_residual": true,
|
| 136 |
+
"use_rts": false,
|
| 137 |
+
"use_weighted_residual": false
|
| 138 |
+
}
|
| 139 |
+
}
|
ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fbe4ec6a49e0b6bc8a746e8f97b9a142098004142e18058d1d0920b1c311a4b4
|
| 3 |
+
size 1895760944
|
ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c69c399d40df85749505a203a51b3b6a47ec624ca8587a3db35106d69490369
|
| 3 |
+
size 7352
|
ivl3-1b_ss2_2_ce_aa_sr4_cbs/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.46611911058425903,
|
| 6 |
+
"eval_ciou": 0.502875030040741
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.45392024517059326,
|
| 12 |
+
"eval_ciou": 0.5104899406433105
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.4961601495742798,
|
| 18 |
+
"eval_ciou": 0.5527771711349487
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.4867956042289734,
|
| 24 |
+
"eval_ciou": 0.5469930171966553
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.5023592114448547,
|
| 30 |
+
"eval_ciou": 0.5746171474456787
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.5149329900741577,
|
| 36 |
+
"eval_ciou": 0.5788349509239197
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.4842008650302887,
|
| 42 |
+
"eval_ciou": 0.5401598215103149
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.4861740469932556,
|
| 48 |
+
"eval_ciou": 0.509650468826294
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.5150892734527588,
|
| 54 |
+
"eval_ciou": 0.5765101909637451
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.5048616528511047,
|
| 60 |
+
"eval_ciou": 0.5617921948432922
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|val",
|
| 64 |
+
"epoch": 11.0,
|
| 65 |
+
"eval_giou": 0.49304988980293274,
|
| 66 |
+
"eval_ciou": 0.5348318219184875
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "ReasonSeg|val",
|
| 70 |
+
"epoch": 12.0,
|
| 71 |
+
"eval_giou": 0.4974077343940735,
|
| 72 |
+
"eval_ciou": 0.5312325954437256
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "ReasonSeg|val",
|
| 76 |
+
"epoch": 13.0,
|
| 77 |
+
"eval_giou": 0.49648475646972656,
|
| 78 |
+
"eval_ciou": 0.5170189142227173
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "ReasonSeg|val",
|
| 82 |
+
"epoch": 14.0,
|
| 83 |
+
"eval_giou": 0.48747989535331726,
|
| 84 |
+
"eval_ciou": 0.5097679495811462
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "ReasonSeg|val",
|
| 88 |
+
"epoch": 15.0,
|
| 89 |
+
"eval_giou": 0.4950149953365326,
|
| 90 |
+
"eval_ciou": 0.5840950012207031
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "ReasonSeg|val",
|
| 94 |
+
"epoch": 16.0,
|
| 95 |
+
"eval_giou": 0.49667176604270935,
|
| 96 |
+
"eval_ciou": 0.5483035445213318
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "ReasonSeg|val",
|
| 100 |
+
"epoch": 17.0,
|
| 101 |
+
"eval_giou": 0.5069117546081543,
|
| 102 |
+
"eval_ciou": 0.5292925238609314
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "ReasonSeg|val",
|
| 106 |
+
"epoch": 18.0,
|
| 107 |
+
"eval_giou": 0.5001476407051086,
|
| 108 |
+
"eval_ciou": 0.5825715065002441
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "ReasonSeg|val",
|
| 112 |
+
"epoch": 19.0,
|
| 113 |
+
"eval_giou": 0.5190213322639465,
|
| 114 |
+
"eval_ciou": 0.5663264393806458
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"val_dataset": "ReasonSeg|val",
|
| 118 |
+
"epoch": 20.0,
|
| 119 |
+
"eval_giou": 0.5078052282333374,
|
| 120 |
+
"eval_ciou": 0.550085723400116
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"val_dataset": "ReasonSeg|val",
|
| 124 |
+
"epoch": 21.0,
|
| 125 |
+
"eval_giou": 0.5005829334259033,
|
| 126 |
+
"eval_ciou": 0.5272146463394165
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"val_dataset": "ReasonSeg|val",
|
| 130 |
+
"epoch": 22.0,
|
| 131 |
+
"eval_giou": 0.5325579643249512,
|
| 132 |
+
"eval_ciou": 0.5967816114425659
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"val_dataset": "ReasonSeg|val",
|
| 136 |
+
"epoch": 23.0,
|
| 137 |
+
"eval_giou": 0.5019457340240479,
|
| 138 |
+
"eval_ciou": 0.5450933575630188
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"val_dataset": "ReasonSeg|val",
|
| 142 |
+
"epoch": 24.0,
|
| 143 |
+
"eval_giou": 0.5054593086242676,
|
| 144 |
+
"eval_ciou": 0.5789456367492676
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"val_dataset": "ReasonSeg|val",
|
| 148 |
+
"epoch": 25.0,
|
| 149 |
+
"eval_giou": 0.5179678201675415,
|
| 150 |
+
"eval_ciou": 0.5849955677986145
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"val_dataset": "ReasonSeg|val",
|
| 154 |
+
"epoch": 26.0,
|
| 155 |
+
"eval_giou": 0.49324876070022583,
|
| 156 |
+
"eval_ciou": 0.5512343645095825
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"val_dataset": "ReasonSeg|val",
|
| 160 |
+
"epoch": 27.0,
|
| 161 |
+
"eval_giou": 0.526308536529541,
|
| 162 |
+
"eval_ciou": 0.5917784571647644
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"val_dataset": "ReasonSeg|val",
|
| 166 |
+
"epoch": 28.0,
|
| 167 |
+
"eval_giou": 0.5196185111999512,
|
| 168 |
+
"eval_ciou": 0.5830491781234741
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"val_dataset": "ReasonSeg|val",
|
| 172 |
+
"epoch": 29.0,
|
| 173 |
+
"eval_giou": 0.5260405540466309,
|
| 174 |
+
"eval_ciou": 0.5873146057128906
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"val_dataset": "ReasonSeg|val",
|
| 178 |
+
"epoch": 30.0,
|
| 179 |
+
"eval_giou": 0.5176364779472351,
|
| 180 |
+
"eval_ciou": 0.5502902865409851
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"val_dataset": "ReasonSeg|val",
|
| 184 |
+
"epoch": 31.0,
|
| 185 |
+
"eval_giou": 0.5242363810539246,
|
| 186 |
+
"eval_ciou": 0.5449221134185791
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"val_dataset": "ReasonSeg|val",
|
| 190 |
+
"epoch": 32.0,
|
| 191 |
+
"eval_giou": 0.5220826864242554,
|
| 192 |
+
"eval_ciou": 0.548717737197876
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"val_dataset": "ReasonSeg|val",
|
| 196 |
+
"epoch": 33.0,
|
| 197 |
+
"eval_giou": 0.532138466835022,
|
| 198 |
+
"eval_ciou": 0.5798225402832031
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"val_dataset": "ReasonSeg|val",
|
| 202 |
+
"epoch": 34.0,
|
| 203 |
+
"eval_giou": 0.5219024419784546,
|
| 204 |
+
"eval_ciou": 0.5670239925384521
|
| 205 |
+
},
|
| 206 |
+
{
|
| 207 |
+
"val_dataset": "ReasonSeg|val",
|
| 208 |
+
"epoch": 35.0,
|
| 209 |
+
"eval_giou": 0.5276386141777039,
|
| 210 |
+
"eval_ciou": 0.5819328427314758
|
| 211 |
+
},
|
| 212 |
+
{
|
| 213 |
+
"val_dataset": "ReasonSeg|val",
|
| 214 |
+
"epoch": 36.0,
|
| 215 |
+
"eval_giou": 0.5373537540435791,
|
| 216 |
+
"eval_ciou": 0.5725668668746948
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"val_dataset": "ReasonSeg|val",
|
| 220 |
+
"epoch": 37.0,
|
| 221 |
+
"eval_giou": 0.5370974540710449,
|
| 222 |
+
"eval_ciou": 0.5747125744819641
|
| 223 |
+
},
|
| 224 |
+
{
|
| 225 |
+
"val_dataset": "ReasonSeg|val",
|
| 226 |
+
"epoch": 38.0,
|
| 227 |
+
"eval_giou": 0.534446120262146,
|
| 228 |
+
"eval_ciou": 0.5705200433731079
|
| 229 |
+
},
|
| 230 |
+
{
|
| 231 |
+
"val_dataset": "ReasonSeg|val",
|
| 232 |
+
"epoch": 39.0,
|
| 233 |
+
"eval_giou": 0.5382332801818848,
|
| 234 |
+
"eval_ciou": 0.5789334177970886
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"val_dataset": "ReasonSeg|val",
|
| 238 |
+
"epoch": 40.0,
|
| 239 |
+
"eval_giou": 0.5384411811828613,
|
| 240 |
+
"eval_ciou": 0.5742336511611938
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"val_dataset": "ReasonSeg|test",
|
| 244 |
+
"epoch": 40.0,
|
| 245 |
+
"eval_giou": 0.5279056429862976,
|
| 246 |
+
"eval_ciou": 0.5487434267997742
|
| 247 |
+
},
|
| 248 |
+
{
|
| 249 |
+
"val_dataset": "refcoco|unc|val",
|
| 250 |
+
"epoch": 40.0,
|
| 251 |
+
"eval_giou": 0.8133376836776733,
|
| 252 |
+
"eval_ciou": 0.8157490491867065
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"val_dataset": "refcoco|unc|testA",
|
| 256 |
+
"epoch": 40.0,
|
| 257 |
+
"eval_giou": 0.8291460871696472,
|
| 258 |
+
"eval_ciou": 0.8322128653526306
|
| 259 |
+
},
|
| 260 |
+
{
|
| 261 |
+
"val_dataset": "refcoco|unc|testB",
|
| 262 |
+
"epoch": 40.0,
|
| 263 |
+
"eval_giou": 0.7959555387496948,
|
| 264 |
+
"eval_ciou": 0.7960028052330017
|
| 265 |
+
},
|
| 266 |
+
{
|
| 267 |
+
"val_dataset": "refcoco+|unc|val",
|
| 268 |
+
"epoch": 40.0,
|
| 269 |
+
"eval_giou": 0.7633941173553467,
|
| 270 |
+
"eval_ciou": 0.7513891458511353
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 274 |
+
"epoch": 40.0,
|
| 275 |
+
"eval_giou": 0.8041790723800659,
|
| 276 |
+
"eval_ciou": 0.7988309264183044
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 280 |
+
"epoch": 40.0,
|
| 281 |
+
"eval_giou": 0.7283210754394531,
|
| 282 |
+
"eval_ciou": 0.7128502130508423
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"val_dataset": "refcocog|umd|test",
|
| 286 |
+
"epoch": 40.0,
|
| 287 |
+
"eval_giou": 0.7768380641937256,
|
| 288 |
+
"eval_ciou": 0.7853620052337646
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"val_dataset": "refcocog|umd|val",
|
| 292 |
+
"epoch": 40.0,
|
| 293 |
+
"eval_giou": 0.772633969783783,
|
| 294 |
+
"eval_ciou": 0.7714908719062805
|
| 295 |
+
},
|
| 296 |
+
{
|
| 297 |
+
"val_dataset": "grefcoco|unc|val",
|
| 298 |
+
"epoch": 40.0,
|
| 299 |
+
"eval_giou": 0.33939608931541443,
|
| 300 |
+
"eval_ciou": 0.389335036277771
|
| 301 |
+
},
|
| 302 |
+
{
|
| 303 |
+
"val_dataset": "grefcoco|unc|testA",
|
| 304 |
+
"epoch": 40.0,
|
| 305 |
+
"eval_giou": 0.48885226249694824,
|
| 306 |
+
"eval_ciou": 0.5180455446243286
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"val_dataset": "grefcoco|unc|testB",
|
| 310 |
+
"epoch": 40.0,
|
| 311 |
+
"eval_giou": 0.42087891697883606,
|
| 312 |
+
"eval_ciou": 0.45711269974708557
|
| 313 |
+
}
|
| 314 |
+
]
|
ivl3-1b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759861301.bask-pg0309u03a.2090474.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bcc1ce8b056a496430e9b93fc159c074fcc2897c8d3ffa4595ca9a42b640bcf2
|
| 3 |
+
size 838934
|
ivl3-1b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_19-21-38_bask-pg0309u03a/events.out.tfevents.1759861364.bask-pg0309u03a.2090474.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1aac295c5e250979475524c9c31a0d9338b8acbd61cfbc94c6765763ddf61bd9
|
| 3 |
+
size 438663
|
ivl3-1b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_19-21-38_bask-pg0309u03a/events.out.tfevents.1759976906.bask-pg0309u03a.2090474.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f5f9fde132a38c1d35c3ca50825c862adb145378bf3c2fe9c0a241496964c2b
|
| 3 |
+
size 1876
|
ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86bb78ca606874d74621c7804c52b7f099d1e33cc07b872caa1ded9beeed24b6
|
| 3 |
+
size 4234672656
|
ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae2bdf9ab86563d24903333ab4098296993fbaf54682b00a9f64a59b450883f0
|
| 3 |
+
size 7352
|
ivl3-2b_ss2_2_aa_sr4_cbs/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.5472875237464905,
|
| 6 |
+
"eval_ciou": 0.6169445514678955
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.5826247930526733,
|
| 12 |
+
"eval_ciou": 0.6471297740936279
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.571751594543457,
|
| 18 |
+
"eval_ciou": 0.58400559425354
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.6117531061172485,
|
| 24 |
+
"eval_ciou": 0.7337754368782043
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.603766679763794,
|
| 30 |
+
"eval_ciou": 0.6782984733581543
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.5942732691764832,
|
| 36 |
+
"eval_ciou": 0.6343610882759094
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.5888954997062683,
|
| 42 |
+
"eval_ciou": 0.5462635159492493
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.6029112339019775,
|
| 48 |
+
"eval_ciou": 0.672645092010498
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.5829816460609436,
|
| 54 |
+
"eval_ciou": 0.6506213545799255
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.5900739431381226,
|
| 60 |
+
"eval_ciou": 0.5817593932151794
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|val",
|
| 64 |
+
"epoch": 11.0,
|
| 65 |
+
"eval_giou": 0.5904648900032043,
|
| 66 |
+
"eval_ciou": 0.6257218718528748
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "ReasonSeg|val",
|
| 70 |
+
"epoch": 12.0,
|
| 71 |
+
"eval_giou": 0.6047389507293701,
|
| 72 |
+
"eval_ciou": 0.662787139415741
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "ReasonSeg|val",
|
| 76 |
+
"epoch": 13.0,
|
| 77 |
+
"eval_giou": 0.632697582244873,
|
| 78 |
+
"eval_ciou": 0.6868629455566406
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "ReasonSeg|val",
|
| 82 |
+
"epoch": 14.0,
|
| 83 |
+
"eval_giou": 0.6089114546775818,
|
| 84 |
+
"eval_ciou": 0.662788987159729
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "ReasonSeg|val",
|
| 88 |
+
"epoch": 15.0,
|
| 89 |
+
"eval_giou": 0.6122798323631287,
|
| 90 |
+
"eval_ciou": 0.6807273030281067
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "ReasonSeg|val",
|
| 94 |
+
"epoch": 16.0,
|
| 95 |
+
"eval_giou": 0.6197442412376404,
|
| 96 |
+
"eval_ciou": 0.7008298635482788
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "ReasonSeg|val",
|
| 100 |
+
"epoch": 17.0,
|
| 101 |
+
"eval_giou": 0.6097345352172852,
|
| 102 |
+
"eval_ciou": 0.6946455836296082
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "ReasonSeg|val",
|
| 106 |
+
"epoch": 18.0,
|
| 107 |
+
"eval_giou": 0.6123113036155701,
|
| 108 |
+
"eval_ciou": 0.6853691339492798
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "ReasonSeg|val",
|
| 112 |
+
"epoch": 19.0,
|
| 113 |
+
"eval_giou": 0.6210272908210754,
|
| 114 |
+
"eval_ciou": 0.6771239042282104
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"val_dataset": "ReasonSeg|val",
|
| 118 |
+
"epoch": 20.0,
|
| 119 |
+
"eval_giou": 0.617154061794281,
|
| 120 |
+
"eval_ciou": 0.6615597605705261
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"val_dataset": "ReasonSeg|test",
|
| 124 |
+
"epoch": 20.0,
|
| 125 |
+
"eval_giou": 0.61573725938797,
|
| 126 |
+
"eval_ciou": 0.6199108958244324
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"val_dataset": "refcoco|unc|val",
|
| 130 |
+
"epoch": 20.0,
|
| 131 |
+
"eval_giou": 0.8253726959228516,
|
| 132 |
+
"eval_ciou": 0.8281757831573486
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"val_dataset": "refcoco|unc|testA",
|
| 136 |
+
"epoch": 20.0,
|
| 137 |
+
"eval_giou": 0.839383602142334,
|
| 138 |
+
"eval_ciou": 0.8423187136650085
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"val_dataset": "refcoco|unc|testB",
|
| 142 |
+
"epoch": 20.0,
|
| 143 |
+
"eval_giou": 0.8076790571212769,
|
| 144 |
+
"eval_ciou": 0.8090101480484009
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"val_dataset": "refcoco+|unc|val",
|
| 148 |
+
"epoch": 20.0,
|
| 149 |
+
"eval_giou": 0.7864928245544434,
|
| 150 |
+
"eval_ciou": 0.7770533561706543
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 154 |
+
"epoch": 20.0,
|
| 155 |
+
"eval_giou": 0.8159601092338562,
|
| 156 |
+
"eval_ciou": 0.8131332993507385
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 160 |
+
"epoch": 20.0,
|
| 161 |
+
"eval_giou": 0.7557609677314758,
|
| 162 |
+
"eval_ciou": 0.7428407073020935
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"val_dataset": "refcocog|umd|test",
|
| 166 |
+
"epoch": 20.0,
|
| 167 |
+
"eval_giou": 0.7897712588310242,
|
| 168 |
+
"eval_ciou": 0.7977313995361328
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"val_dataset": "refcocog|umd|val",
|
| 172 |
+
"epoch": 20.0,
|
| 173 |
+
"eval_giou": 0.7885610461235046,
|
| 174 |
+
"eval_ciou": 0.7968337535858154
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"val_dataset": "grefcoco|unc|val",
|
| 178 |
+
"epoch": 20.0,
|
| 179 |
+
"eval_giou": 0.35536685585975647,
|
| 180 |
+
"eval_ciou": 0.41349032521247864
|
| 181 |
+
}
|
| 182 |
+
]
|
ivl3-2b_ss2_2_aa_sr4_cbs/events.out.tfevents.1759899360.bask-pg0309u36a.1966744.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4316c0df3dd2f18914626eec6a94a4b077712da715160d288c664f9f079ae2a8
|
| 3 |
+
size 420319
|
ivl3-2b_ss2_2_aa_sr4_cbs/runs/Oct08_05-55-56_bask-pg0309u36a/events.out.tfevents.1759899430.bask-pg0309u36a.1966744.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f3cb7d64c9b2d4fa9b2938a235243fd234ba896ba3983d796fbfb53e356b0f2
|
| 3 |
+
size 223357
|
ivl3-2b_ss2_2_aa_sr4_cbs/runs/Oct08_05-55-56_bask-pg0309u36a/events.out.tfevents.1759989193.bask-pg0309u36a.1966744.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e29cf07dd781a544a2fd697efb429299042fcda8397002c3d4809627b4f141e
|
| 3 |
+
size 1548
|
ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e68b4bf0ac4fb7851d80063b44ba49742267b6ce786b99e0e683a5011880a472
|
| 3 |
+
size 4234672656
|
ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c483d8e4bccc79b2041f4255aa76528d81438cb33e8ab6094f36f23a18c22fe
|
| 3 |
+
size 7352
|
ivl3-2b_ss2_2_ce_aa_seq_cbs_1/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 0.14180939930424763,
|
| 5 |
+
"eval_giou": 0.5170324444770813,
|
| 6 |
+
"eval_ciou": 0.5942871570587158
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 0.28361879860849526,
|
| 11 |
+
"eval_giou": 0.5256783366203308,
|
| 12 |
+
"eval_ciou": 0.6608301997184753
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 0.4254281979127429,
|
| 17 |
+
"eval_giou": 0.5604663491249084,
|
| 18 |
+
"eval_ciou": 0.6650714874267578
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 0.5672375972169905,
|
| 23 |
+
"eval_giou": 0.5549952387809753,
|
| 24 |
+
"eval_ciou": 0.6755744218826294
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 0.7090469965212381,
|
| 29 |
+
"eval_giou": 0.5817117691040039,
|
| 30 |
+
"eval_ciou": 0.6808510422706604
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 0.8508563958254858,
|
| 35 |
+
"eval_giou": 0.5875627398490906,
|
| 36 |
+
"eval_ciou": 0.6574282050132751
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 0.9926657951297334,
|
| 41 |
+
"eval_giou": 0.6035187840461731,
|
| 42 |
+
"eval_ciou": 0.6785731911659241
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|test",
|
| 46 |
+
"epoch": 1.0,
|
| 47 |
+
"eval_giou": 0.5871202349662781,
|
| 48 |
+
"eval_ciou": 0.6128882765769958
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "refcoco|unc|val",
|
| 52 |
+
"epoch": 1.0,
|
| 53 |
+
"eval_giou": 0.7931445240974426,
|
| 54 |
+
"eval_ciou": 0.7957648634910583
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "refcoco|unc|testA",
|
| 58 |
+
"epoch": 1.0,
|
| 59 |
+
"eval_giou": 0.8122290372848511,
|
| 60 |
+
"eval_ciou": 0.8159088492393494
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "refcoco|unc|testB",
|
| 64 |
+
"epoch": 1.0,
|
| 65 |
+
"eval_giou": 0.771130383014679,
|
| 66 |
+
"eval_ciou": 0.7735289335250854
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "refcoco+|unc|val",
|
| 70 |
+
"epoch": 1.0,
|
| 71 |
+
"eval_giou": 0.7441046833992004,
|
| 72 |
+
"eval_ciou": 0.7393963932991028
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 76 |
+
"epoch": 1.0,
|
| 77 |
+
"eval_giou": 0.7816813588142395,
|
| 78 |
+
"eval_ciou": 0.7822470664978027
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 82 |
+
"epoch": 1.0,
|
| 83 |
+
"eval_giou": 0.7043335437774658,
|
| 84 |
+
"eval_ciou": 0.7011061906814575
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "refcocog|umd|test",
|
| 88 |
+
"epoch": 1.0,
|
| 89 |
+
"eval_giou": 0.7601777911186218,
|
| 90 |
+
"eval_ciou": 0.7689756751060486
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "refcocog|umd|val",
|
| 94 |
+
"epoch": 1.0,
|
| 95 |
+
"eval_giou": 0.7574023604393005,
|
| 96 |
+
"eval_ciou": 0.7668260931968689
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "grefcoco|unc|val",
|
| 100 |
+
"epoch": 1.0,
|
| 101 |
+
"eval_giou": 0.3440900146961212,
|
| 102 |
+
"eval_ciou": 0.40447333455085754
|
| 103 |
+
}
|
| 104 |
+
]
|
ivl3-2b_ss2_2_ce_aa_seq_cbs_1/events.out.tfevents.1760049897.bask-pg0309u36a.421581.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a814830c21483a873a9fb7aae3df2dd0da596f8e099f805420b4f97ddee661e
|
| 3 |
+
size 237250
|
ivl3-2b_ss2_2_ce_aa_seq_cbs_1/runs/Oct09_23-44-53_bask-pg0309u36a/events.out.tfevents.1760049961.bask-pg0309u36a.421581.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:696272af75f56d031defab3f9ee22e1ff2152eaf31d3d26dd707e500237f3873
|
| 3 |
+
size 129472
|
ivl3-2b_ss2_2_ce_aa_seq_cbs_1/runs/Oct09_23-44-53_bask-pg0309u36a/events.out.tfevents.1760090134.bask-pg0309u36a.421581.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1ef9166bd90585ddfb26787975266c8673785ef08e917d33dcc3947b38b8781
|
| 3 |
+
size 1548
|
ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bf3469a88769c9cf80872f702a2b69af7fc3de3e13b5fc72e57313bc7e6fd6d
|
| 3 |
+
size 4234672656
|
ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbd27079f7bb014416e853c6dd7c8cb2cc3a0ed270d7689a6ce1c20a394b44ed
|
| 3 |
+
size 7352
|
ivl3-2b_ss2_2_ce_aa_sr4_cbs/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.545238733291626,
|
| 6 |
+
"eval_ciou": 0.6227507591247559
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.5574519634246826,
|
| 12 |
+
"eval_ciou": 0.5911077857017517
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.5884220600128174,
|
| 18 |
+
"eval_ciou": 0.6480932831764221
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.567903459072113,
|
| 24 |
+
"eval_ciou": 0.6106956601142883
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.5836549401283264,
|
| 30 |
+
"eval_ciou": 0.6618658900260925
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.6009659767150879,
|
| 36 |
+
"eval_ciou": 0.601414144039154
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.5822204947471619,
|
| 42 |
+
"eval_ciou": 0.6209443211555481
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.5958440899848938,
|
| 48 |
+
"eval_ciou": 0.6226491332054138
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.5826206803321838,
|
| 54 |
+
"eval_ciou": 0.6304926872253418
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.6073910593986511,
|
| 60 |
+
"eval_ciou": 0.6384271383285522
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|val",
|
| 64 |
+
"epoch": 11.0,
|
| 65 |
+
"eval_giou": 0.5889663100242615,
|
| 66 |
+
"eval_ciou": 0.5859989523887634
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "ReasonSeg|val",
|
| 70 |
+
"epoch": 12.0,
|
| 71 |
+
"eval_giou": 0.5989387631416321,
|
| 72 |
+
"eval_ciou": 0.5917373895645142
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "ReasonSeg|val",
|
| 76 |
+
"epoch": 13.0,
|
| 77 |
+
"eval_giou": 0.6207078695297241,
|
| 78 |
+
"eval_ciou": 0.6845619678497314
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "ReasonSeg|val",
|
| 82 |
+
"epoch": 14.0,
|
| 83 |
+
"eval_giou": 0.6023203730583191,
|
| 84 |
+
"eval_ciou": 0.6288123726844788
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "ReasonSeg|val",
|
| 88 |
+
"epoch": 15.0,
|
| 89 |
+
"eval_giou": 0.6015496850013733,
|
| 90 |
+
"eval_ciou": 0.6256543397903442
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "ReasonSeg|val",
|
| 94 |
+
"epoch": 16.0,
|
| 95 |
+
"eval_giou": 0.6224597692489624,
|
| 96 |
+
"eval_ciou": 0.6749593019485474
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "ReasonSeg|val",
|
| 100 |
+
"epoch": 17.0,
|
| 101 |
+
"eval_giou": 0.6151285767555237,
|
| 102 |
+
"eval_ciou": 0.6607990860939026
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "ReasonSeg|val",
|
| 106 |
+
"epoch": 18.0,
|
| 107 |
+
"eval_giou": 0.6089848279953003,
|
| 108 |
+
"eval_ciou": 0.6714296936988831
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "ReasonSeg|val",
|
| 112 |
+
"epoch": 19.0,
|
| 113 |
+
"eval_giou": 0.6193683743476868,
|
| 114 |
+
"eval_ciou": 0.66315758228302
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"val_dataset": "ReasonSeg|val",
|
| 118 |
+
"epoch": 20.0,
|
| 119 |
+
"eval_giou": 0.6168807148933411,
|
| 120 |
+
"eval_ciou": 0.6491692066192627
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"val_dataset": "ReasonSeg|test",
|
| 124 |
+
"epoch": 20.0,
|
| 125 |
+
"eval_giou": 0.6177780628204346,
|
| 126 |
+
"eval_ciou": 0.623653769493103
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"val_dataset": "refcoco|unc|val",
|
| 130 |
+
"epoch": 20.0,
|
| 131 |
+
"eval_giou": 0.823724627494812,
|
| 132 |
+
"eval_ciou": 0.8269159197807312
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"val_dataset": "refcoco|unc|testA",
|
| 136 |
+
"epoch": 20.0,
|
| 137 |
+
"eval_giou": 0.8385549783706665,
|
| 138 |
+
"eval_ciou": 0.8419386148452759
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"val_dataset": "refcoco|unc|testB",
|
| 142 |
+
"epoch": 20.0,
|
| 143 |
+
"eval_giou": 0.8077682852745056,
|
| 144 |
+
"eval_ciou": 0.8096453547477722
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"val_dataset": "refcoco+|unc|val",
|
| 148 |
+
"epoch": 20.0,
|
| 149 |
+
"eval_giou": 0.7876097559928894,
|
| 150 |
+
"eval_ciou": 0.7777907848358154
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 154 |
+
"epoch": 20.0,
|
| 155 |
+
"eval_giou": 0.8152230978012085,
|
| 156 |
+
"eval_ciou": 0.8130303025245667
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 160 |
+
"epoch": 20.0,
|
| 161 |
+
"eval_giou": 0.7466538548469543,
|
| 162 |
+
"eval_ciou": 0.7298356294631958
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"val_dataset": "refcocog|umd|test",
|
| 166 |
+
"epoch": 20.0,
|
| 167 |
+
"eval_giou": 0.7895915508270264,
|
| 168 |
+
"eval_ciou": 0.7953519821166992
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"val_dataset": "refcocog|umd|val",
|
| 172 |
+
"epoch": 20.0,
|
| 173 |
+
"eval_giou": 0.7863814830780029,
|
| 174 |
+
"eval_ciou": 0.790006697177887
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"val_dataset": "grefcoco|unc|val",
|
| 178 |
+
"epoch": 20.0,
|
| 179 |
+
"eval_giou": 0.34861522912979126,
|
| 180 |
+
"eval_ciou": 0.4008709788322449
|
| 181 |
+
}
|
| 182 |
+
]
|
ivl3-2b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759877835.bask-pg0309u12a.1530010.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5bdf4f69c712289c45dc7f3d2fc1c00080ed867cb126c8893a7cdb9e6410c575
|
| 3 |
+
size 5269
|
ivl3-2b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759878745.bask-pg0309u12a.1549446.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4d3e677ef10d742ca354d04dc083cd8fac9d4f3438fcb2105ec954c1cb39554
|
| 3 |
+
size 420319
|
ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_23-57-11_bask-pg0309u12a/events.out.tfevents.1759877899.bask-pg0309u12a.1530010.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ef6b38d60c181687f36e0184bd79a6ca1ad752797613b8b065873a3b4246d0b
|
| 3 |
+
size 11734
|
ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct08_00-12-21_bask-pg0309u12a/events.out.tfevents.1759878805.bask-pg0309u12a.1549446.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2aca35e8105442e8225c238eb97ea990547dd7850e1d79d809ea06ab4a8bbf52
|
| 3 |
+
size 223363
|
ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct08_00-12-21_bask-pg0309u12a/events.out.tfevents.1759967908.bask-pg0309u12a.1549446.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea46edb0001154b124f3e0221461332c2861bdb219b80ce8118ea43f68f236d3
|
| 3 |
+
size 1548
|
lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30e47cc709ec1ba69f2d9d95a1cb1772d8724fbe12b8127c78a3f24363254eb2
|
| 3 |
+
size 4234672656
|
lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:376cf9ecb749a538a45e36600313b7a343b1da588de84846095f57565d33118a
|
| 3 |
+
size 7352
|
lisa-ivl3-2b_s2_2_vlora_sr/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.5275816917419434,
|
| 6 |
+
"eval_ciou": 0.6127659678459167
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.5785483121871948,
|
| 12 |
+
"eval_ciou": 0.6731550693511963
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.5671323537826538,
|
| 18 |
+
"eval_ciou": 0.6406646966934204
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.5716913342475891,
|
| 24 |
+
"eval_ciou": 0.6880943775177002
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.6032226085662842,
|
| 30 |
+
"eval_ciou": 0.669122576713562
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.6153417229652405,
|
| 36 |
+
"eval_ciou": 0.6467410326004028
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.6117483377456665,
|
| 42 |
+
"eval_ciou": 0.6864674687385559
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.613699734210968,
|
| 48 |
+
"eval_ciou": 0.6648719906806946
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.6249428987503052,
|
| 54 |
+
"eval_ciou": 0.6983655691146851
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.6295068860054016,
|
| 60 |
+
"eval_ciou": 0.6981709599494934
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|test",
|
| 64 |
+
"epoch": 10.0,
|
| 65 |
+
"eval_giou": 0.6091115474700928,
|
| 66 |
+
"eval_ciou": 0.6346589922904968
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "refcoco|unc|val",
|
| 70 |
+
"epoch": 10.0,
|
| 71 |
+
"eval_giou": 0.7866575121879578,
|
| 72 |
+
"eval_ciou": 0.7891850471496582
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "refcoco|unc|testA",
|
| 76 |
+
"epoch": 10.0,
|
| 77 |
+
"eval_giou": 0.8079327344894409,
|
| 78 |
+
"eval_ciou": 0.8123739361763
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "refcoco|unc|testB",
|
| 82 |
+
"epoch": 10.0,
|
| 83 |
+
"eval_giou": 0.7600301504135132,
|
| 84 |
+
"eval_ciou": 0.7600919008255005
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "refcoco+|unc|val",
|
| 88 |
+
"epoch": 10.0,
|
| 89 |
+
"eval_giou": 0.7380849719047546,
|
| 90 |
+
"eval_ciou": 0.7316219210624695
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 94 |
+
"epoch": 10.0,
|
| 95 |
+
"eval_giou": 0.7775301337242126,
|
| 96 |
+
"eval_ciou": 0.7753238677978516
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 100 |
+
"epoch": 10.0,
|
| 101 |
+
"eval_giou": 0.6858305335044861,
|
| 102 |
+
"eval_ciou": 0.6726531982421875
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "refcocog|umd|test",
|
| 106 |
+
"epoch": 10.0,
|
| 107 |
+
"eval_giou": 0.7513312697410583,
|
| 108 |
+
"eval_ciou": 0.7607209086418152
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "refcocog|umd|val",
|
| 112 |
+
"epoch": 10.0,
|
| 113 |
+
"eval_giou": 0.7468341588973999,
|
| 114 |
+
"eval_ciou": 0.7547957897186279
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"val_dataset": "grefcoco|unc|val",
|
| 118 |
+
"epoch": 10.0,
|
| 119 |
+
"eval_giou": 0.3461865484714508,
|
| 120 |
+
"eval_ciou": 0.4059288203716278
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"val_dataset": "grefcoco|unc|testA",
|
| 124 |
+
"epoch": 10.0,
|
| 125 |
+
"eval_giou": 0.5068609714508057,
|
| 126 |
+
"eval_ciou": 0.5432214736938477
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"val_dataset": "grefcoco|unc|testB",
|
| 130 |
+
"epoch": 10.0,
|
| 131 |
+
"eval_giou": 0.4215650260448456,
|
| 132 |
+
"eval_ciou": 0.46161597967147827
|
| 133 |
+
}
|
| 134 |
+
]
|
lisa-ivl3-2b_s2_2_vlora_sr/events.out.tfevents.1759802135.bask-pg0308u03a.2088475.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9753975d394fb29fe790a725030cfb2eb7164a9c9772c6c58f150b86032ff6d2
|
| 3 |
+
size 213843
|
lisa-ivl3-2b_s2_2_vlora_sr/runs/Oct07_02-55-32_bask-pg0308u03a/events.out.tfevents.1759802218.bask-pg0308u03a.2088475.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb122b4b83d6c98e0202bcb24e7e4d8b2ecf6127e8749eb7ad7e39ee8ee4bee3
|
| 3 |
+
size 116402
|
lisa-ivl3-2b_s2_2_vlora_sr/runs/Oct07_02-55-32_bask-pg0308u03a/events.out.tfevents.1759828851.bask-pg0308u03a.2088475.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:049fe35ce139736f5cff725d493b6559f3cb8ebe44b73c1dada7d658c239b209
|
| 3 |
+
size 1840
|
lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf8abfa0531fab5dd062f2adc5d1998d253a3534af2fbee5966ca5f440b8b56d
|
| 3 |
+
size 4234672656
|
lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6f838b765f1d9e641210de93267752a4124e94ae40523c3d484eed6abea4144
|
| 3 |
+
size 7352
|
lisa-ivl3-2b_ss2_2_ce_aa_sr4/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.5319168567657471,
|
| 6 |
+
"eval_ciou": 0.6217775344848633
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.5545085668563843,
|
| 12 |
+
"eval_ciou": 0.5805025696754456
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.5865544676780701,
|
| 18 |
+
"eval_ciou": 0.5943950414657593
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.5605677366256714,
|
| 24 |
+
"eval_ciou": 0.618609607219696
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.5957074165344238,
|
| 30 |
+
"eval_ciou": 0.6184157729148865
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.5842458605766296,
|
| 36 |
+
"eval_ciou": 0.5878432989120483
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.5693845748901367,
|
| 42 |
+
"eval_ciou": 0.5511792302131653
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.589174747467041,
|
| 48 |
+
"eval_ciou": 0.601881742477417
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.5905624628067017,
|
| 54 |
+
"eval_ciou": 0.5573661923408508
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.6161700487136841,
|
| 60 |
+
"eval_ciou": 0.6625725626945496
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|val",
|
| 64 |
+
"epoch": 11.0,
|
| 65 |
+
"eval_giou": 0.6013333797454834,
|
| 66 |
+
"eval_ciou": 0.6199454665184021
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "ReasonSeg|val",
|
| 70 |
+
"epoch": 12.0,
|
| 71 |
+
"eval_giou": 0.5980168581008911,
|
| 72 |
+
"eval_ciou": 0.6256346106529236
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "ReasonSeg|val",
|
| 76 |
+
"epoch": 13.0,
|
| 77 |
+
"eval_giou": 0.6127091646194458,
|
| 78 |
+
"eval_ciou": 0.6268919706344604
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "ReasonSeg|val",
|
| 82 |
+
"epoch": 14.0,
|
| 83 |
+
"eval_giou": 0.6001991033554077,
|
| 84 |
+
"eval_ciou": 0.6283772587776184
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "ReasonSeg|val",
|
| 88 |
+
"epoch": 15.0,
|
| 89 |
+
"eval_giou": 0.5891889333724976,
|
| 90 |
+
"eval_ciou": 0.6160405278205872
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "ReasonSeg|val",
|
| 94 |
+
"epoch": 16.0,
|
| 95 |
+
"eval_giou": 0.583220899105072,
|
| 96 |
+
"eval_ciou": 0.6409042477607727
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "ReasonSeg|val",
|
| 100 |
+
"epoch": 17.0,
|
| 101 |
+
"eval_giou": 0.5922881364822388,
|
| 102 |
+
"eval_ciou": 0.6401846408843994
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "ReasonSeg|val",
|
| 106 |
+
"epoch": 18.0,
|
| 107 |
+
"eval_giou": 0.5957611799240112,
|
| 108 |
+
"eval_ciou": 0.6475955843925476
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "ReasonSeg|val",
|
| 112 |
+
"epoch": 19.0,
|
| 113 |
+
"eval_giou": 0.606143593788147,
|
| 114 |
+
"eval_ciou": 0.6534363031387329
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"val_dataset": "ReasonSeg|val",
|
| 118 |
+
"epoch": 20.0,
|
| 119 |
+
"eval_giou": 0.6004212498664856,
|
| 120 |
+
"eval_ciou": 0.6504445672035217
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"val_dataset": "ReasonSeg|test",
|
| 124 |
+
"epoch": 20.0,
|
| 125 |
+
"eval_giou": 0.6277483105659485,
|
| 126 |
+
"eval_ciou": 0.6647483706474304
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"val_dataset": "refcoco|unc|val",
|
| 130 |
+
"epoch": 20.0,
|
| 131 |
+
"eval_giou": 0.8239029049873352,
|
| 132 |
+
"eval_ciou": 0.825985848903656
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"val_dataset": "refcoco|unc|testA",
|
| 136 |
+
"epoch": 20.0,
|
| 137 |
+
"eval_giou": 0.8409322500228882,
|
| 138 |
+
"eval_ciou": 0.8456533551216125
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"val_dataset": "refcoco|unc|testB",
|
| 142 |
+
"epoch": 20.0,
|
| 143 |
+
"eval_giou": 0.8107873201370239,
|
| 144 |
+
"eval_ciou": 0.8120189905166626
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"val_dataset": "refcoco+|unc|val",
|
| 148 |
+
"epoch": 20.0,
|
| 149 |
+
"eval_giou": 0.7863824367523193,
|
| 150 |
+
"eval_ciou": 0.7759678959846497
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 154 |
+
"epoch": 20.0,
|
| 155 |
+
"eval_giou": 0.8180026412010193,
|
| 156 |
+
"eval_ciou": 0.8148158192634583
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 160 |
+
"epoch": 20.0,
|
| 161 |
+
"eval_giou": 0.7562121748924255,
|
| 162 |
+
"eval_ciou": 0.7416884899139404
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"val_dataset": "refcocog|umd|test",
|
| 166 |
+
"epoch": 20.0,
|
| 167 |
+
"eval_giou": 0.7904837131500244,
|
| 168 |
+
"eval_ciou": 0.799082338809967
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"val_dataset": "refcocog|umd|val",
|
| 172 |
+
"epoch": 20.0,
|
| 173 |
+
"eval_giou": 0.7864062190055847,
|
| 174 |
+
"eval_ciou": 0.7907365560531616
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"val_dataset": "grefcoco|unc|val",
|
| 178 |
+
"epoch": 20.0,
|
| 179 |
+
"eval_giou": 0.3480578660964966,
|
| 180 |
+
"eval_ciou": 0.40478312969207764
|
| 181 |
+
}
|
| 182 |
+
]
|
lisa-ivl3-2b_ss2_2_ce_aa_sr4/events.out.tfevents.1759803265.bask-pg0309u36a.301557.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:660498ff574c492bf5d679c2ab2000082d9d54adb74cd54a33d31ee5e50ed85d
|
| 3 |
+
size 420319
|
lisa-ivl3-2b_ss2_2_ce_aa_sr4/runs/Oct07_03-14-21_bask-pg0309u36a/events.out.tfevents.1759803317.bask-pg0309u36a.301557.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07c9a2e3d5e655d1a3720f020272d78ce223c539dd74f22c129512a6a9c19465
|
| 3 |
+
size 223365
|
lisa-ivl3-2b_ss2_2_ce_aa_sr4/runs/Oct07_03-14-21_bask-pg0309u36a/events.out.tfevents.1759893368.bask-pg0309u36a.301557.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9651efafd9183c3de60fe034fc2361230008f6a877418be63c966d679c72114e
|
| 3 |
+
size 1548
|
lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:139fdbe8833d72b40b2877084cee6be809601e8730f050f271995f16007a95f4
|
| 3 |
+
size 4234672656
|
lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7aaa7c7f7fcd6affd89f09fc49b3e6065ac81102006c76127b26eb69297651b1
|
| 3 |
+
size 7352
|
lisa-ivl3-2b_ss2_2_ce_vlora_sr/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.5569912195205688,
|
| 6 |
+
"eval_ciou": 0.6086179614067078
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.5633984208106995,
|
| 12 |
+
"eval_ciou": 0.6330024600028992
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.5652901530265808,
|
| 18 |
+
"eval_ciou": 0.6073711514472961
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.5960856080055237,
|
| 24 |
+
"eval_ciou": 0.6793828010559082
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.6029606461524963,
|
| 30 |
+
"eval_ciou": 0.7154921293258667
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.5903835892677307,
|
| 36 |
+
"eval_ciou": 0.569995105266571
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.613521158695221,
|
| 42 |
+
"eval_ciou": 0.6969940066337585
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.6117329597473145,
|
| 48 |
+
"eval_ciou": 0.6676673889160156
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.6173275113105774,
|
| 54 |
+
"eval_ciou": 0.6789456009864807
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.6149329543113708,
|
| 60 |
+
"eval_ciou": 0.6718701124191284
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|test",
|
| 64 |
+
"epoch": 10.0,
|
| 65 |
+
"eval_giou": 0.609404444694519,
|
| 66 |
+
"eval_ciou": 0.6132881045341492
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "refcoco|unc|val",
|
| 70 |
+
"epoch": 10.0,
|
| 71 |
+
"eval_giou": 0.7893159985542297,
|
| 72 |
+
"eval_ciou": 0.7933170199394226
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "refcoco|unc|testA",
|
| 76 |
+
"epoch": 10.0,
|
| 77 |
+
"eval_giou": 0.8105526566505432,
|
| 78 |
+
"eval_ciou": 0.8163524270057678
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "refcoco|unc|testB",
|
| 82 |
+
"epoch": 10.0,
|
| 83 |
+
"eval_giou": 0.7612078785896301,
|
| 84 |
+
"eval_ciou": 0.7619940042495728
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "refcoco+|unc|val",
|
| 88 |
+
"epoch": 10.0,
|
| 89 |
+
"eval_giou": 0.7366273999214172,
|
| 90 |
+
"eval_ciou": 0.7300769686698914
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 94 |
+
"epoch": 10.0,
|
| 95 |
+
"eval_giou": 0.7783792614936829,
|
| 96 |
+
"eval_ciou": 0.7807698845863342
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 100 |
+
"epoch": 10.0,
|
| 101 |
+
"eval_giou": 0.6872988343238831,
|
| 102 |
+
"eval_ciou": 0.6766538619995117
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "refcocog|umd|test",
|
| 106 |
+
"epoch": 10.0,
|
| 107 |
+
"eval_giou": 0.7511187195777893,
|
| 108 |
+
"eval_ciou": 0.7614018321037292
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "refcocog|umd|val",
|
| 112 |
+
"epoch": 10.0,
|
| 113 |
+
"eval_giou": 0.7464777231216431,
|
| 114 |
+
"eval_ciou": 0.7518000602722168
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"val_dataset": "grefcoco|unc|val",
|
| 118 |
+
"epoch": 10.0,
|
| 119 |
+
"eval_giou": 0.34809717535972595,
|
| 120 |
+
"eval_ciou": 0.40297430753707886
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"val_dataset": "grefcoco|unc|testA",
|
| 124 |
+
"epoch": 10.0,
|
| 125 |
+
"eval_giou": 0.5119317770004272,
|
| 126 |
+
"eval_ciou": 0.5438946485519409
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"val_dataset": "grefcoco|unc|testB",
|
| 130 |
+
"epoch": 10.0,
|
| 131 |
+
"eval_giou": 0.42917221784591675,
|
| 132 |
+
"eval_ciou": 0.46293896436691284
|
| 133 |
+
}
|
| 134 |
+
]
|
lisa-ivl3-2b_ss2_2_ce_vlora_sr/events.out.tfevents.1759802625.bask-pg0309u03a.1084672.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a500c6eab72e54c5f9f22ebdab934bd6697b3029ccac498ad538fbad5ec125c8
|
| 3 |
+
size 213843
|
lisa-ivl3-2b_ss2_2_ce_vlora_sr/events.out.tfevents.1759802958.bask-pg0309u36a.294001.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8104cc4761ea3f319ab73a87e86d1bacfacc2e1d101f9e6144d48e9575589b0
|
| 3 |
+
size 884
|