UltraDoughnut commited on
Commit
eda79fd
·
verified ·
1 Parent(s): df8062f

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/config.json +139 -0
  2. ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/model.safetensors +3 -0
  3. ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/training_args.bin +3 -0
  4. ivl3-1b_ss2_2_ce_aa_sr4_cbs/evaluation_metrics.json +314 -0
  5. ivl3-1b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759861301.bask-pg0309u03a.2090474.0 +3 -0
  6. ivl3-1b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_19-21-38_bask-pg0309u03a/events.out.tfevents.1759861364.bask-pg0309u03a.2090474.1 +3 -0
  7. ivl3-1b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_19-21-38_bask-pg0309u03a/events.out.tfevents.1759976906.bask-pg0309u03a.2090474.2 +3 -0
  8. ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/config.json +143 -0
  9. ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/model.safetensors +3 -0
  10. ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/training_args.bin +3 -0
  11. ivl3-2b_ss2_2_aa_sr4_cbs/evaluation_metrics.json +182 -0
  12. ivl3-2b_ss2_2_aa_sr4_cbs/events.out.tfevents.1759899360.bask-pg0309u36a.1966744.0 +3 -0
  13. ivl3-2b_ss2_2_aa_sr4_cbs/runs/Oct08_05-55-56_bask-pg0309u36a/events.out.tfevents.1759899430.bask-pg0309u36a.1966744.1 +3 -0
  14. ivl3-2b_ss2_2_aa_sr4_cbs/runs/Oct08_05-55-56_bask-pg0309u36a/events.out.tfevents.1759989193.bask-pg0309u36a.1966744.2 +3 -0
  15. ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/config.json +143 -0
  16. ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/model.safetensors +3 -0
  17. ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/training_args.bin +3 -0
  18. ivl3-2b_ss2_2_ce_aa_seq_cbs_1/evaluation_metrics.json +104 -0
  19. ivl3-2b_ss2_2_ce_aa_seq_cbs_1/events.out.tfevents.1760049897.bask-pg0309u36a.421581.0 +3 -0
  20. ivl3-2b_ss2_2_ce_aa_seq_cbs_1/runs/Oct09_23-44-53_bask-pg0309u36a/events.out.tfevents.1760049961.bask-pg0309u36a.421581.1 +3 -0
  21. ivl3-2b_ss2_2_ce_aa_seq_cbs_1/runs/Oct09_23-44-53_bask-pg0309u36a/events.out.tfevents.1760090134.bask-pg0309u36a.421581.2 +3 -0
  22. ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/config.json +143 -0
  23. ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/model.safetensors +3 -0
  24. ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/training_args.bin +3 -0
  25. ivl3-2b_ss2_2_ce_aa_sr4_cbs/evaluation_metrics.json +182 -0
  26. ivl3-2b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759877835.bask-pg0309u12a.1530010.0 +3 -0
  27. ivl3-2b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759878745.bask-pg0309u12a.1549446.0 +3 -0
  28. ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_23-57-11_bask-pg0309u12a/events.out.tfevents.1759877899.bask-pg0309u12a.1530010.1 +3 -0
  29. ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct08_00-12-21_bask-pg0309u12a/events.out.tfevents.1759878805.bask-pg0309u12a.1549446.1 +3 -0
  30. ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct08_00-12-21_bask-pg0309u12a/events.out.tfevents.1759967908.bask-pg0309u12a.1549446.2 +3 -0
  31. lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/config.json +143 -0
  32. lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/model.safetensors +3 -0
  33. lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/training_args.bin +3 -0
  34. lisa-ivl3-2b_s2_2_vlora_sr/evaluation_metrics.json +134 -0
  35. lisa-ivl3-2b_s2_2_vlora_sr/events.out.tfevents.1759802135.bask-pg0308u03a.2088475.0 +3 -0
  36. lisa-ivl3-2b_s2_2_vlora_sr/runs/Oct07_02-55-32_bask-pg0308u03a/events.out.tfevents.1759802218.bask-pg0308u03a.2088475.1 +3 -0
  37. lisa-ivl3-2b_s2_2_vlora_sr/runs/Oct07_02-55-32_bask-pg0308u03a/events.out.tfevents.1759828851.bask-pg0308u03a.2088475.2 +3 -0
  38. lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/config.json +143 -0
  39. lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/model.safetensors +3 -0
  40. lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/training_args.bin +3 -0
  41. lisa-ivl3-2b_ss2_2_ce_aa_sr4/evaluation_metrics.json +182 -0
  42. lisa-ivl3-2b_ss2_2_ce_aa_sr4/events.out.tfevents.1759803265.bask-pg0309u36a.301557.0 +3 -0
  43. lisa-ivl3-2b_ss2_2_ce_aa_sr4/runs/Oct07_03-14-21_bask-pg0309u36a/events.out.tfevents.1759803317.bask-pg0309u36a.301557.1 +3 -0
  44. lisa-ivl3-2b_ss2_2_ce_aa_sr4/runs/Oct07_03-14-21_bask-pg0309u36a/events.out.tfevents.1759893368.bask-pg0309u36a.301557.2 +3 -0
  45. lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/config.json +143 -0
  46. lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/model.safetensors +3 -0
  47. lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/training_args.bin +3 -0
  48. lisa-ivl3-2b_ss2_2_ce_vlora_sr/evaluation_metrics.json +134 -0
  49. lisa-ivl3-2b_ss2_2_ce_vlora_sr/events.out.tfevents.1759802625.bask-pg0309u03a.1084672.0 +3 -0
  50. lisa-ivl3-2b_ss2_2_ce_vlora_sr/events.out.tfevents.1759802958.bask-pg0309u36a.294001.0 +3 -0
ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/config.json ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 896,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 896,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 4864,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention"
56
+ ],
57
+ "max_position_embeddings": 32768,
58
+ "max_window_layers": 70,
59
+ "model_type": "qwen2",
60
+ "moe_config": null,
61
+ "num_attention_heads": 14,
62
+ "num_hidden_layers": 24,
63
+ "num_key_value_heads": 2,
64
+ "rms_norm_eps": 1e-06,
65
+ "rope_scaling": {
66
+ "factor": 2.0,
67
+ "rope_type": "dynamic",
68
+ "type": "dynamic"
69
+ },
70
+ "rope_theta": 1000000.0,
71
+ "sliding_window": null,
72
+ "use_bfloat16": true,
73
+ "use_cache": false,
74
+ "use_sliding_window": false,
75
+ "vocab_size": 151676
76
+ },
77
+ "max_dynamic_patch": 12,
78
+ "min_dynamic_patch": 1,
79
+ "model_type": "internvl_chat",
80
+ "output_attentions": false,
81
+ "pad2square": false,
82
+ "pad_token_id": 151643,
83
+ "ps_version": "v2",
84
+ "select_layer": -1,
85
+ "system_message": null,
86
+ "template": "internvl2_5",
87
+ "tie_word_embeddings": false,
88
+ "transformers_version": null,
89
+ "use_backbone_lora": 0,
90
+ "use_llm_lora": 0,
91
+ "use_thumbnail": true,
92
+ "vision_config": {
93
+ "_attn_implementation_autoset": true,
94
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
95
+ "architectures": [
96
+ "InternVisionModel"
97
+ ],
98
+ "attention_dropout": 0.0,
99
+ "auto_map": {
100
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
101
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
102
+ },
103
+ "capacity_factor": 1.2,
104
+ "drop_path_rate": 0.1,
105
+ "dropout": 0.0,
106
+ "dtype": "bfloat16",
107
+ "eval_capacity_factor": 1.4,
108
+ "hidden_act": "gelu",
109
+ "hidden_size": 1024,
110
+ "image_size": 448,
111
+ "initializer_factor": 0.1,
112
+ "initializer_range": 1e-10,
113
+ "intermediate_size": 4096,
114
+ "laux_allreduce": "all_nodes",
115
+ "layer_norm_eps": 1e-06,
116
+ "model_type": "intern_vit_6b",
117
+ "moe_coeff_ratio": 0.5,
118
+ "moe_intermediate_size": 768,
119
+ "moe_output_scale": 4.0,
120
+ "noisy_gate_policy": "RSample_before",
121
+ "norm_type": "layer_norm",
122
+ "num_attention_heads": 16,
123
+ "num_channels": 3,
124
+ "num_experts": 8,
125
+ "num_hidden_layers": 24,
126
+ "num_routed_experts": 4,
127
+ "num_shared_experts": 4,
128
+ "patch_size": 14,
129
+ "qk_normalization": false,
130
+ "qkv_bias": true,
131
+ "shared_expert_intermediate_size": 3072,
132
+ "use_bfloat16": true,
133
+ "use_flash_attn": true,
134
+ "use_moe": false,
135
+ "use_residual": true,
136
+ "use_rts": false,
137
+ "use_weighted_residual": false
138
+ }
139
+ }
ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbe4ec6a49e0b6bc8a746e8f97b9a142098004142e18058d1d0920b1c311a4b4
3
+ size 1895760944
ivl3-1b_ss2_2_ce_aa_sr4_cbs/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c69c399d40df85749505a203a51b3b6a47ec624ca8587a3db35106d69490369
3
+ size 7352
ivl3-1b_ss2_2_ce_aa_sr4_cbs/evaluation_metrics.json ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.46611911058425903,
6
+ "eval_ciou": 0.502875030040741
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.45392024517059326,
12
+ "eval_ciou": 0.5104899406433105
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.4961601495742798,
18
+ "eval_ciou": 0.5527771711349487
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.4867956042289734,
24
+ "eval_ciou": 0.5469930171966553
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.5023592114448547,
30
+ "eval_ciou": 0.5746171474456787
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.5149329900741577,
36
+ "eval_ciou": 0.5788349509239197
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.4842008650302887,
42
+ "eval_ciou": 0.5401598215103149
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.4861740469932556,
48
+ "eval_ciou": 0.509650468826294
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.5150892734527588,
54
+ "eval_ciou": 0.5765101909637451
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.5048616528511047,
60
+ "eval_ciou": 0.5617921948432922
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|val",
64
+ "epoch": 11.0,
65
+ "eval_giou": 0.49304988980293274,
66
+ "eval_ciou": 0.5348318219184875
67
+ },
68
+ {
69
+ "val_dataset": "ReasonSeg|val",
70
+ "epoch": 12.0,
71
+ "eval_giou": 0.4974077343940735,
72
+ "eval_ciou": 0.5312325954437256
73
+ },
74
+ {
75
+ "val_dataset": "ReasonSeg|val",
76
+ "epoch": 13.0,
77
+ "eval_giou": 0.49648475646972656,
78
+ "eval_ciou": 0.5170189142227173
79
+ },
80
+ {
81
+ "val_dataset": "ReasonSeg|val",
82
+ "epoch": 14.0,
83
+ "eval_giou": 0.48747989535331726,
84
+ "eval_ciou": 0.5097679495811462
85
+ },
86
+ {
87
+ "val_dataset": "ReasonSeg|val",
88
+ "epoch": 15.0,
89
+ "eval_giou": 0.4950149953365326,
90
+ "eval_ciou": 0.5840950012207031
91
+ },
92
+ {
93
+ "val_dataset": "ReasonSeg|val",
94
+ "epoch": 16.0,
95
+ "eval_giou": 0.49667176604270935,
96
+ "eval_ciou": 0.5483035445213318
97
+ },
98
+ {
99
+ "val_dataset": "ReasonSeg|val",
100
+ "epoch": 17.0,
101
+ "eval_giou": 0.5069117546081543,
102
+ "eval_ciou": 0.5292925238609314
103
+ },
104
+ {
105
+ "val_dataset": "ReasonSeg|val",
106
+ "epoch": 18.0,
107
+ "eval_giou": 0.5001476407051086,
108
+ "eval_ciou": 0.5825715065002441
109
+ },
110
+ {
111
+ "val_dataset": "ReasonSeg|val",
112
+ "epoch": 19.0,
113
+ "eval_giou": 0.5190213322639465,
114
+ "eval_ciou": 0.5663264393806458
115
+ },
116
+ {
117
+ "val_dataset": "ReasonSeg|val",
118
+ "epoch": 20.0,
119
+ "eval_giou": 0.5078052282333374,
120
+ "eval_ciou": 0.550085723400116
121
+ },
122
+ {
123
+ "val_dataset": "ReasonSeg|val",
124
+ "epoch": 21.0,
125
+ "eval_giou": 0.5005829334259033,
126
+ "eval_ciou": 0.5272146463394165
127
+ },
128
+ {
129
+ "val_dataset": "ReasonSeg|val",
130
+ "epoch": 22.0,
131
+ "eval_giou": 0.5325579643249512,
132
+ "eval_ciou": 0.5967816114425659
133
+ },
134
+ {
135
+ "val_dataset": "ReasonSeg|val",
136
+ "epoch": 23.0,
137
+ "eval_giou": 0.5019457340240479,
138
+ "eval_ciou": 0.5450933575630188
139
+ },
140
+ {
141
+ "val_dataset": "ReasonSeg|val",
142
+ "epoch": 24.0,
143
+ "eval_giou": 0.5054593086242676,
144
+ "eval_ciou": 0.5789456367492676
145
+ },
146
+ {
147
+ "val_dataset": "ReasonSeg|val",
148
+ "epoch": 25.0,
149
+ "eval_giou": 0.5179678201675415,
150
+ "eval_ciou": 0.5849955677986145
151
+ },
152
+ {
153
+ "val_dataset": "ReasonSeg|val",
154
+ "epoch": 26.0,
155
+ "eval_giou": 0.49324876070022583,
156
+ "eval_ciou": 0.5512343645095825
157
+ },
158
+ {
159
+ "val_dataset": "ReasonSeg|val",
160
+ "epoch": 27.0,
161
+ "eval_giou": 0.526308536529541,
162
+ "eval_ciou": 0.5917784571647644
163
+ },
164
+ {
165
+ "val_dataset": "ReasonSeg|val",
166
+ "epoch": 28.0,
167
+ "eval_giou": 0.5196185111999512,
168
+ "eval_ciou": 0.5830491781234741
169
+ },
170
+ {
171
+ "val_dataset": "ReasonSeg|val",
172
+ "epoch": 29.0,
173
+ "eval_giou": 0.5260405540466309,
174
+ "eval_ciou": 0.5873146057128906
175
+ },
176
+ {
177
+ "val_dataset": "ReasonSeg|val",
178
+ "epoch": 30.0,
179
+ "eval_giou": 0.5176364779472351,
180
+ "eval_ciou": 0.5502902865409851
181
+ },
182
+ {
183
+ "val_dataset": "ReasonSeg|val",
184
+ "epoch": 31.0,
185
+ "eval_giou": 0.5242363810539246,
186
+ "eval_ciou": 0.5449221134185791
187
+ },
188
+ {
189
+ "val_dataset": "ReasonSeg|val",
190
+ "epoch": 32.0,
191
+ "eval_giou": 0.5220826864242554,
192
+ "eval_ciou": 0.548717737197876
193
+ },
194
+ {
195
+ "val_dataset": "ReasonSeg|val",
196
+ "epoch": 33.0,
197
+ "eval_giou": 0.532138466835022,
198
+ "eval_ciou": 0.5798225402832031
199
+ },
200
+ {
201
+ "val_dataset": "ReasonSeg|val",
202
+ "epoch": 34.0,
203
+ "eval_giou": 0.5219024419784546,
204
+ "eval_ciou": 0.5670239925384521
205
+ },
206
+ {
207
+ "val_dataset": "ReasonSeg|val",
208
+ "epoch": 35.0,
209
+ "eval_giou": 0.5276386141777039,
210
+ "eval_ciou": 0.5819328427314758
211
+ },
212
+ {
213
+ "val_dataset": "ReasonSeg|val",
214
+ "epoch": 36.0,
215
+ "eval_giou": 0.5373537540435791,
216
+ "eval_ciou": 0.5725668668746948
217
+ },
218
+ {
219
+ "val_dataset": "ReasonSeg|val",
220
+ "epoch": 37.0,
221
+ "eval_giou": 0.5370974540710449,
222
+ "eval_ciou": 0.5747125744819641
223
+ },
224
+ {
225
+ "val_dataset": "ReasonSeg|val",
226
+ "epoch": 38.0,
227
+ "eval_giou": 0.534446120262146,
228
+ "eval_ciou": 0.5705200433731079
229
+ },
230
+ {
231
+ "val_dataset": "ReasonSeg|val",
232
+ "epoch": 39.0,
233
+ "eval_giou": 0.5382332801818848,
234
+ "eval_ciou": 0.5789334177970886
235
+ },
236
+ {
237
+ "val_dataset": "ReasonSeg|val",
238
+ "epoch": 40.0,
239
+ "eval_giou": 0.5384411811828613,
240
+ "eval_ciou": 0.5742336511611938
241
+ },
242
+ {
243
+ "val_dataset": "ReasonSeg|test",
244
+ "epoch": 40.0,
245
+ "eval_giou": 0.5279056429862976,
246
+ "eval_ciou": 0.5487434267997742
247
+ },
248
+ {
249
+ "val_dataset": "refcoco|unc|val",
250
+ "epoch": 40.0,
251
+ "eval_giou": 0.8133376836776733,
252
+ "eval_ciou": 0.8157490491867065
253
+ },
254
+ {
255
+ "val_dataset": "refcoco|unc|testA",
256
+ "epoch": 40.0,
257
+ "eval_giou": 0.8291460871696472,
258
+ "eval_ciou": 0.8322128653526306
259
+ },
260
+ {
261
+ "val_dataset": "refcoco|unc|testB",
262
+ "epoch": 40.0,
263
+ "eval_giou": 0.7959555387496948,
264
+ "eval_ciou": 0.7960028052330017
265
+ },
266
+ {
267
+ "val_dataset": "refcoco+|unc|val",
268
+ "epoch": 40.0,
269
+ "eval_giou": 0.7633941173553467,
270
+ "eval_ciou": 0.7513891458511353
271
+ },
272
+ {
273
+ "val_dataset": "refcoco+|unc|testA",
274
+ "epoch": 40.0,
275
+ "eval_giou": 0.8041790723800659,
276
+ "eval_ciou": 0.7988309264183044
277
+ },
278
+ {
279
+ "val_dataset": "refcoco+|unc|testB",
280
+ "epoch": 40.0,
281
+ "eval_giou": 0.7283210754394531,
282
+ "eval_ciou": 0.7128502130508423
283
+ },
284
+ {
285
+ "val_dataset": "refcocog|umd|test",
286
+ "epoch": 40.0,
287
+ "eval_giou": 0.7768380641937256,
288
+ "eval_ciou": 0.7853620052337646
289
+ },
290
+ {
291
+ "val_dataset": "refcocog|umd|val",
292
+ "epoch": 40.0,
293
+ "eval_giou": 0.772633969783783,
294
+ "eval_ciou": 0.7714908719062805
295
+ },
296
+ {
297
+ "val_dataset": "grefcoco|unc|val",
298
+ "epoch": 40.0,
299
+ "eval_giou": 0.33939608931541443,
300
+ "eval_ciou": 0.389335036277771
301
+ },
302
+ {
303
+ "val_dataset": "grefcoco|unc|testA",
304
+ "epoch": 40.0,
305
+ "eval_giou": 0.48885226249694824,
306
+ "eval_ciou": 0.5180455446243286
307
+ },
308
+ {
309
+ "val_dataset": "grefcoco|unc|testB",
310
+ "epoch": 40.0,
311
+ "eval_giou": 0.42087891697883606,
312
+ "eval_ciou": 0.45711269974708557
313
+ }
314
+ ]
ivl3-1b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759861301.bask-pg0309u03a.2090474.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcc1ce8b056a496430e9b93fc159c074fcc2897c8d3ffa4595ca9a42b640bcf2
3
+ size 838934
ivl3-1b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_19-21-38_bask-pg0309u03a/events.out.tfevents.1759861364.bask-pg0309u03a.2090474.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aac295c5e250979475524c9c31a0d9338b8acbd61cfbc94c6765763ddf61bd9
3
+ size 438663
ivl3-1b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_19-21-38_bask-pg0309u03a/events.out.tfevents.1759976906.bask-pg0309u03a.2090474.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f5f9fde132a38c1d35c3ca50825c862adb145378bf3c2fe9c0a241496964c2b
3
+ size 1876
ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86bb78ca606874d74621c7804c52b7f099d1e33cc07b872caa1ded9beeed24b6
3
+ size 4234672656
ivl3-2b_ss2_2_aa_sr4_cbs/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae2bdf9ab86563d24903333ab4098296993fbaf54682b00a9f64a59b450883f0
3
+ size 7352
ivl3-2b_ss2_2_aa_sr4_cbs/evaluation_metrics.json ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.5472875237464905,
6
+ "eval_ciou": 0.6169445514678955
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.5826247930526733,
12
+ "eval_ciou": 0.6471297740936279
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.571751594543457,
18
+ "eval_ciou": 0.58400559425354
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.6117531061172485,
24
+ "eval_ciou": 0.7337754368782043
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.603766679763794,
30
+ "eval_ciou": 0.6782984733581543
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.5942732691764832,
36
+ "eval_ciou": 0.6343610882759094
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.5888954997062683,
42
+ "eval_ciou": 0.5462635159492493
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.6029112339019775,
48
+ "eval_ciou": 0.672645092010498
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.5829816460609436,
54
+ "eval_ciou": 0.6506213545799255
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.5900739431381226,
60
+ "eval_ciou": 0.5817593932151794
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|val",
64
+ "epoch": 11.0,
65
+ "eval_giou": 0.5904648900032043,
66
+ "eval_ciou": 0.6257218718528748
67
+ },
68
+ {
69
+ "val_dataset": "ReasonSeg|val",
70
+ "epoch": 12.0,
71
+ "eval_giou": 0.6047389507293701,
72
+ "eval_ciou": 0.662787139415741
73
+ },
74
+ {
75
+ "val_dataset": "ReasonSeg|val",
76
+ "epoch": 13.0,
77
+ "eval_giou": 0.632697582244873,
78
+ "eval_ciou": 0.6868629455566406
79
+ },
80
+ {
81
+ "val_dataset": "ReasonSeg|val",
82
+ "epoch": 14.0,
83
+ "eval_giou": 0.6089114546775818,
84
+ "eval_ciou": 0.662788987159729
85
+ },
86
+ {
87
+ "val_dataset": "ReasonSeg|val",
88
+ "epoch": 15.0,
89
+ "eval_giou": 0.6122798323631287,
90
+ "eval_ciou": 0.6807273030281067
91
+ },
92
+ {
93
+ "val_dataset": "ReasonSeg|val",
94
+ "epoch": 16.0,
95
+ "eval_giou": 0.6197442412376404,
96
+ "eval_ciou": 0.7008298635482788
97
+ },
98
+ {
99
+ "val_dataset": "ReasonSeg|val",
100
+ "epoch": 17.0,
101
+ "eval_giou": 0.6097345352172852,
102
+ "eval_ciou": 0.6946455836296082
103
+ },
104
+ {
105
+ "val_dataset": "ReasonSeg|val",
106
+ "epoch": 18.0,
107
+ "eval_giou": 0.6123113036155701,
108
+ "eval_ciou": 0.6853691339492798
109
+ },
110
+ {
111
+ "val_dataset": "ReasonSeg|val",
112
+ "epoch": 19.0,
113
+ "eval_giou": 0.6210272908210754,
114
+ "eval_ciou": 0.6771239042282104
115
+ },
116
+ {
117
+ "val_dataset": "ReasonSeg|val",
118
+ "epoch": 20.0,
119
+ "eval_giou": 0.617154061794281,
120
+ "eval_ciou": 0.6615597605705261
121
+ },
122
+ {
123
+ "val_dataset": "ReasonSeg|test",
124
+ "epoch": 20.0,
125
+ "eval_giou": 0.61573725938797,
126
+ "eval_ciou": 0.6199108958244324
127
+ },
128
+ {
129
+ "val_dataset": "refcoco|unc|val",
130
+ "epoch": 20.0,
131
+ "eval_giou": 0.8253726959228516,
132
+ "eval_ciou": 0.8281757831573486
133
+ },
134
+ {
135
+ "val_dataset": "refcoco|unc|testA",
136
+ "epoch": 20.0,
137
+ "eval_giou": 0.839383602142334,
138
+ "eval_ciou": 0.8423187136650085
139
+ },
140
+ {
141
+ "val_dataset": "refcoco|unc|testB",
142
+ "epoch": 20.0,
143
+ "eval_giou": 0.8076790571212769,
144
+ "eval_ciou": 0.8090101480484009
145
+ },
146
+ {
147
+ "val_dataset": "refcoco+|unc|val",
148
+ "epoch": 20.0,
149
+ "eval_giou": 0.7864928245544434,
150
+ "eval_ciou": 0.7770533561706543
151
+ },
152
+ {
153
+ "val_dataset": "refcoco+|unc|testA",
154
+ "epoch": 20.0,
155
+ "eval_giou": 0.8159601092338562,
156
+ "eval_ciou": 0.8131332993507385
157
+ },
158
+ {
159
+ "val_dataset": "refcoco+|unc|testB",
160
+ "epoch": 20.0,
161
+ "eval_giou": 0.7557609677314758,
162
+ "eval_ciou": 0.7428407073020935
163
+ },
164
+ {
165
+ "val_dataset": "refcocog|umd|test",
166
+ "epoch": 20.0,
167
+ "eval_giou": 0.7897712588310242,
168
+ "eval_ciou": 0.7977313995361328
169
+ },
170
+ {
171
+ "val_dataset": "refcocog|umd|val",
172
+ "epoch": 20.0,
173
+ "eval_giou": 0.7885610461235046,
174
+ "eval_ciou": 0.7968337535858154
175
+ },
176
+ {
177
+ "val_dataset": "grefcoco|unc|val",
178
+ "epoch": 20.0,
179
+ "eval_giou": 0.35536685585975647,
180
+ "eval_ciou": 0.41349032521247864
181
+ }
182
+ ]
ivl3-2b_ss2_2_aa_sr4_cbs/events.out.tfevents.1759899360.bask-pg0309u36a.1966744.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4316c0df3dd2f18914626eec6a94a4b077712da715160d288c664f9f079ae2a8
3
+ size 420319
ivl3-2b_ss2_2_aa_sr4_cbs/runs/Oct08_05-55-56_bask-pg0309u36a/events.out.tfevents.1759899430.bask-pg0309u36a.1966744.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f3cb7d64c9b2d4fa9b2938a235243fd234ba896ba3983d796fbfb53e356b0f2
3
+ size 223357
ivl3-2b_ss2_2_aa_sr4_cbs/runs/Oct08_05-55-56_bask-pg0309u36a/events.out.tfevents.1759989193.bask-pg0309u36a.1966744.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e29cf07dd781a544a2fd697efb429299042fcda8397002c3d4809627b4f141e
3
+ size 1548
ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e68b4bf0ac4fb7851d80063b44ba49742267b6ce786b99e0e683a5011880a472
3
+ size 4234672656
ivl3-2b_ss2_2_ce_aa_seq_cbs_1/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c483d8e4bccc79b2041f4255aa76528d81438cb33e8ab6094f36f23a18c22fe
3
+ size 7352
ivl3-2b_ss2_2_ce_aa_seq_cbs_1/evaluation_metrics.json ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 0.14180939930424763,
5
+ "eval_giou": 0.5170324444770813,
6
+ "eval_ciou": 0.5942871570587158
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 0.28361879860849526,
11
+ "eval_giou": 0.5256783366203308,
12
+ "eval_ciou": 0.6608301997184753
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 0.4254281979127429,
17
+ "eval_giou": 0.5604663491249084,
18
+ "eval_ciou": 0.6650714874267578
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 0.5672375972169905,
23
+ "eval_giou": 0.5549952387809753,
24
+ "eval_ciou": 0.6755744218826294
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 0.7090469965212381,
29
+ "eval_giou": 0.5817117691040039,
30
+ "eval_ciou": 0.6808510422706604
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 0.8508563958254858,
35
+ "eval_giou": 0.5875627398490906,
36
+ "eval_ciou": 0.6574282050132751
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 0.9926657951297334,
41
+ "eval_giou": 0.6035187840461731,
42
+ "eval_ciou": 0.6785731911659241
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|test",
46
+ "epoch": 1.0,
47
+ "eval_giou": 0.5871202349662781,
48
+ "eval_ciou": 0.6128882765769958
49
+ },
50
+ {
51
+ "val_dataset": "refcoco|unc|val",
52
+ "epoch": 1.0,
53
+ "eval_giou": 0.7931445240974426,
54
+ "eval_ciou": 0.7957648634910583
55
+ },
56
+ {
57
+ "val_dataset": "refcoco|unc|testA",
58
+ "epoch": 1.0,
59
+ "eval_giou": 0.8122290372848511,
60
+ "eval_ciou": 0.8159088492393494
61
+ },
62
+ {
63
+ "val_dataset": "refcoco|unc|testB",
64
+ "epoch": 1.0,
65
+ "eval_giou": 0.771130383014679,
66
+ "eval_ciou": 0.7735289335250854
67
+ },
68
+ {
69
+ "val_dataset": "refcoco+|unc|val",
70
+ "epoch": 1.0,
71
+ "eval_giou": 0.7441046833992004,
72
+ "eval_ciou": 0.7393963932991028
73
+ },
74
+ {
75
+ "val_dataset": "refcoco+|unc|testA",
76
+ "epoch": 1.0,
77
+ "eval_giou": 0.7816813588142395,
78
+ "eval_ciou": 0.7822470664978027
79
+ },
80
+ {
81
+ "val_dataset": "refcoco+|unc|testB",
82
+ "epoch": 1.0,
83
+ "eval_giou": 0.7043335437774658,
84
+ "eval_ciou": 0.7011061906814575
85
+ },
86
+ {
87
+ "val_dataset": "refcocog|umd|test",
88
+ "epoch": 1.0,
89
+ "eval_giou": 0.7601777911186218,
90
+ "eval_ciou": 0.7689756751060486
91
+ },
92
+ {
93
+ "val_dataset": "refcocog|umd|val",
94
+ "epoch": 1.0,
95
+ "eval_giou": 0.7574023604393005,
96
+ "eval_ciou": 0.7668260931968689
97
+ },
98
+ {
99
+ "val_dataset": "grefcoco|unc|val",
100
+ "epoch": 1.0,
101
+ "eval_giou": 0.3440900146961212,
102
+ "eval_ciou": 0.40447333455085754
103
+ }
104
+ ]
ivl3-2b_ss2_2_ce_aa_seq_cbs_1/events.out.tfevents.1760049897.bask-pg0309u36a.421581.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a814830c21483a873a9fb7aae3df2dd0da596f8e099f805420b4f97ddee661e
3
+ size 237250
ivl3-2b_ss2_2_ce_aa_seq_cbs_1/runs/Oct09_23-44-53_bask-pg0309u36a/events.out.tfevents.1760049961.bask-pg0309u36a.421581.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:696272af75f56d031defab3f9ee22e1ff2152eaf31d3d26dd707e500237f3873
3
+ size 129472
ivl3-2b_ss2_2_ce_aa_seq_cbs_1/runs/Oct09_23-44-53_bask-pg0309u36a/events.out.tfevents.1760090134.bask-pg0309u36a.421581.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1ef9166bd90585ddfb26787975266c8673785ef08e917d33dcc3947b38b8781
3
+ size 1548
ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bf3469a88769c9cf80872f702a2b69af7fc3de3e13b5fc72e57313bc7e6fd6d
3
+ size 4234672656
ivl3-2b_ss2_2_ce_aa_sr4_cbs/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbd27079f7bb014416e853c6dd7c8cb2cc3a0ed270d7689a6ce1c20a394b44ed
3
+ size 7352
ivl3-2b_ss2_2_ce_aa_sr4_cbs/evaluation_metrics.json ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.545238733291626,
6
+ "eval_ciou": 0.6227507591247559
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.5574519634246826,
12
+ "eval_ciou": 0.5911077857017517
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.5884220600128174,
18
+ "eval_ciou": 0.6480932831764221
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.567903459072113,
24
+ "eval_ciou": 0.6106956601142883
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.5836549401283264,
30
+ "eval_ciou": 0.6618658900260925
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.6009659767150879,
36
+ "eval_ciou": 0.601414144039154
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.5822204947471619,
42
+ "eval_ciou": 0.6209443211555481
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.5958440899848938,
48
+ "eval_ciou": 0.6226491332054138
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.5826206803321838,
54
+ "eval_ciou": 0.6304926872253418
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.6073910593986511,
60
+ "eval_ciou": 0.6384271383285522
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|val",
64
+ "epoch": 11.0,
65
+ "eval_giou": 0.5889663100242615,
66
+ "eval_ciou": 0.5859989523887634
67
+ },
68
+ {
69
+ "val_dataset": "ReasonSeg|val",
70
+ "epoch": 12.0,
71
+ "eval_giou": 0.5989387631416321,
72
+ "eval_ciou": 0.5917373895645142
73
+ },
74
+ {
75
+ "val_dataset": "ReasonSeg|val",
76
+ "epoch": 13.0,
77
+ "eval_giou": 0.6207078695297241,
78
+ "eval_ciou": 0.6845619678497314
79
+ },
80
+ {
81
+ "val_dataset": "ReasonSeg|val",
82
+ "epoch": 14.0,
83
+ "eval_giou": 0.6023203730583191,
84
+ "eval_ciou": 0.6288123726844788
85
+ },
86
+ {
87
+ "val_dataset": "ReasonSeg|val",
88
+ "epoch": 15.0,
89
+ "eval_giou": 0.6015496850013733,
90
+ "eval_ciou": 0.6256543397903442
91
+ },
92
+ {
93
+ "val_dataset": "ReasonSeg|val",
94
+ "epoch": 16.0,
95
+ "eval_giou": 0.6224597692489624,
96
+ "eval_ciou": 0.6749593019485474
97
+ },
98
+ {
99
+ "val_dataset": "ReasonSeg|val",
100
+ "epoch": 17.0,
101
+ "eval_giou": 0.6151285767555237,
102
+ "eval_ciou": 0.6607990860939026
103
+ },
104
+ {
105
+ "val_dataset": "ReasonSeg|val",
106
+ "epoch": 18.0,
107
+ "eval_giou": 0.6089848279953003,
108
+ "eval_ciou": 0.6714296936988831
109
+ },
110
+ {
111
+ "val_dataset": "ReasonSeg|val",
112
+ "epoch": 19.0,
113
+ "eval_giou": 0.6193683743476868,
114
+ "eval_ciou": 0.66315758228302
115
+ },
116
+ {
117
+ "val_dataset": "ReasonSeg|val",
118
+ "epoch": 20.0,
119
+ "eval_giou": 0.6168807148933411,
120
+ "eval_ciou": 0.6491692066192627
121
+ },
122
+ {
123
+ "val_dataset": "ReasonSeg|test",
124
+ "epoch": 20.0,
125
+ "eval_giou": 0.6177780628204346,
126
+ "eval_ciou": 0.623653769493103
127
+ },
128
+ {
129
+ "val_dataset": "refcoco|unc|val",
130
+ "epoch": 20.0,
131
+ "eval_giou": 0.823724627494812,
132
+ "eval_ciou": 0.8269159197807312
133
+ },
134
+ {
135
+ "val_dataset": "refcoco|unc|testA",
136
+ "epoch": 20.0,
137
+ "eval_giou": 0.8385549783706665,
138
+ "eval_ciou": 0.8419386148452759
139
+ },
140
+ {
141
+ "val_dataset": "refcoco|unc|testB",
142
+ "epoch": 20.0,
143
+ "eval_giou": 0.8077682852745056,
144
+ "eval_ciou": 0.8096453547477722
145
+ },
146
+ {
147
+ "val_dataset": "refcoco+|unc|val",
148
+ "epoch": 20.0,
149
+ "eval_giou": 0.7876097559928894,
150
+ "eval_ciou": 0.7777907848358154
151
+ },
152
+ {
153
+ "val_dataset": "refcoco+|unc|testA",
154
+ "epoch": 20.0,
155
+ "eval_giou": 0.8152230978012085,
156
+ "eval_ciou": 0.8130303025245667
157
+ },
158
+ {
159
+ "val_dataset": "refcoco+|unc|testB",
160
+ "epoch": 20.0,
161
+ "eval_giou": 0.7466538548469543,
162
+ "eval_ciou": 0.7298356294631958
163
+ },
164
+ {
165
+ "val_dataset": "refcocog|umd|test",
166
+ "epoch": 20.0,
167
+ "eval_giou": 0.7895915508270264,
168
+ "eval_ciou": 0.7953519821166992
169
+ },
170
+ {
171
+ "val_dataset": "refcocog|umd|val",
172
+ "epoch": 20.0,
173
+ "eval_giou": 0.7863814830780029,
174
+ "eval_ciou": 0.790006697177887
175
+ },
176
+ {
177
+ "val_dataset": "grefcoco|unc|val",
178
+ "epoch": 20.0,
179
+ "eval_giou": 0.34861522912979126,
180
+ "eval_ciou": 0.4008709788322449
181
+ }
182
+ ]
ivl3-2b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759877835.bask-pg0309u12a.1530010.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bdf4f69c712289c45dc7f3d2fc1c00080ed867cb126c8893a7cdb9e6410c575
3
+ size 5269
ivl3-2b_ss2_2_ce_aa_sr4_cbs/events.out.tfevents.1759878745.bask-pg0309u12a.1549446.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4d3e677ef10d742ca354d04dc083cd8fac9d4f3438fcb2105ec954c1cb39554
3
+ size 420319
ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct07_23-57-11_bask-pg0309u12a/events.out.tfevents.1759877899.bask-pg0309u12a.1530010.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ef6b38d60c181687f36e0184bd79a6ca1ad752797613b8b065873a3b4246d0b
3
+ size 11734
ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct08_00-12-21_bask-pg0309u12a/events.out.tfevents.1759878805.bask-pg0309u12a.1549446.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2aca35e8105442e8225c238eb97ea990547dd7850e1d79d809ea06ab4a8bbf52
3
+ size 223363
ivl3-2b_ss2_2_ce_aa_sr4_cbs/runs/Oct08_00-12-21_bask-pg0309u12a/events.out.tfevents.1759967908.bask-pg0309u12a.1549446.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea46edb0001154b124f3e0221461332c2861bdb219b80ce8118ea43f68f236d3
3
+ size 1548
lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30e47cc709ec1ba69f2d9d95a1cb1772d8724fbe12b8127c78a3f24363254eb2
3
+ size 4234672656
lisa-ivl3-2b_s2_2_vlora_sr/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:376cf9ecb749a538a45e36600313b7a343b1da588de84846095f57565d33118a
3
+ size 7352
lisa-ivl3-2b_s2_2_vlora_sr/evaluation_metrics.json ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.5275816917419434,
6
+ "eval_ciou": 0.6127659678459167
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.5785483121871948,
12
+ "eval_ciou": 0.6731550693511963
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.5671323537826538,
18
+ "eval_ciou": 0.6406646966934204
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.5716913342475891,
24
+ "eval_ciou": 0.6880943775177002
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.6032226085662842,
30
+ "eval_ciou": 0.669122576713562
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.6153417229652405,
36
+ "eval_ciou": 0.6467410326004028
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.6117483377456665,
42
+ "eval_ciou": 0.6864674687385559
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.613699734210968,
48
+ "eval_ciou": 0.6648719906806946
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.6249428987503052,
54
+ "eval_ciou": 0.6983655691146851
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.6295068860054016,
60
+ "eval_ciou": 0.6981709599494934
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|test",
64
+ "epoch": 10.0,
65
+ "eval_giou": 0.6091115474700928,
66
+ "eval_ciou": 0.6346589922904968
67
+ },
68
+ {
69
+ "val_dataset": "refcoco|unc|val",
70
+ "epoch": 10.0,
71
+ "eval_giou": 0.7866575121879578,
72
+ "eval_ciou": 0.7891850471496582
73
+ },
74
+ {
75
+ "val_dataset": "refcoco|unc|testA",
76
+ "epoch": 10.0,
77
+ "eval_giou": 0.8079327344894409,
78
+ "eval_ciou": 0.8123739361763
79
+ },
80
+ {
81
+ "val_dataset": "refcoco|unc|testB",
82
+ "epoch": 10.0,
83
+ "eval_giou": 0.7600301504135132,
84
+ "eval_ciou": 0.7600919008255005
85
+ },
86
+ {
87
+ "val_dataset": "refcoco+|unc|val",
88
+ "epoch": 10.0,
89
+ "eval_giou": 0.7380849719047546,
90
+ "eval_ciou": 0.7316219210624695
91
+ },
92
+ {
93
+ "val_dataset": "refcoco+|unc|testA",
94
+ "epoch": 10.0,
95
+ "eval_giou": 0.7775301337242126,
96
+ "eval_ciou": 0.7753238677978516
97
+ },
98
+ {
99
+ "val_dataset": "refcoco+|unc|testB",
100
+ "epoch": 10.0,
101
+ "eval_giou": 0.6858305335044861,
102
+ "eval_ciou": 0.6726531982421875
103
+ },
104
+ {
105
+ "val_dataset": "refcocog|umd|test",
106
+ "epoch": 10.0,
107
+ "eval_giou": 0.7513312697410583,
108
+ "eval_ciou": 0.7607209086418152
109
+ },
110
+ {
111
+ "val_dataset": "refcocog|umd|val",
112
+ "epoch": 10.0,
113
+ "eval_giou": 0.7468341588973999,
114
+ "eval_ciou": 0.7547957897186279
115
+ },
116
+ {
117
+ "val_dataset": "grefcoco|unc|val",
118
+ "epoch": 10.0,
119
+ "eval_giou": 0.3461865484714508,
120
+ "eval_ciou": 0.4059288203716278
121
+ },
122
+ {
123
+ "val_dataset": "grefcoco|unc|testA",
124
+ "epoch": 10.0,
125
+ "eval_giou": 0.5068609714508057,
126
+ "eval_ciou": 0.5432214736938477
127
+ },
128
+ {
129
+ "val_dataset": "grefcoco|unc|testB",
130
+ "epoch": 10.0,
131
+ "eval_giou": 0.4215650260448456,
132
+ "eval_ciou": 0.46161597967147827
133
+ }
134
+ ]
lisa-ivl3-2b_s2_2_vlora_sr/events.out.tfevents.1759802135.bask-pg0308u03a.2088475.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9753975d394fb29fe790a725030cfb2eb7164a9c9772c6c58f150b86032ff6d2
3
+ size 213843
lisa-ivl3-2b_s2_2_vlora_sr/runs/Oct07_02-55-32_bask-pg0308u03a/events.out.tfevents.1759802218.bask-pg0308u03a.2088475.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb122b4b83d6c98e0202bcb24e7e4d8b2ecf6127e8749eb7ad7e39ee8ee4bee3
3
+ size 116402
lisa-ivl3-2b_s2_2_vlora_sr/runs/Oct07_02-55-32_bask-pg0308u03a/events.out.tfevents.1759828851.bask-pg0308u03a.2088475.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:049fe35ce139736f5cff725d493b6559f3cb8ebe44b73c1dada7d658c239b209
3
+ size 1840
lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf8abfa0531fab5dd062f2adc5d1998d253a3534af2fbee5966ca5f440b8b56d
3
+ size 4234672656
lisa-ivl3-2b_ss2_2_ce_aa_sr4/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6f838b765f1d9e641210de93267752a4124e94ae40523c3d484eed6abea4144
3
+ size 7352
lisa-ivl3-2b_ss2_2_ce_aa_sr4/evaluation_metrics.json ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.5319168567657471,
6
+ "eval_ciou": 0.6217775344848633
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.5545085668563843,
12
+ "eval_ciou": 0.5805025696754456
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.5865544676780701,
18
+ "eval_ciou": 0.5943950414657593
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.5605677366256714,
24
+ "eval_ciou": 0.618609607219696
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.5957074165344238,
30
+ "eval_ciou": 0.6184157729148865
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.5842458605766296,
36
+ "eval_ciou": 0.5878432989120483
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.5693845748901367,
42
+ "eval_ciou": 0.5511792302131653
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.589174747467041,
48
+ "eval_ciou": 0.601881742477417
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.5905624628067017,
54
+ "eval_ciou": 0.5573661923408508
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.6161700487136841,
60
+ "eval_ciou": 0.6625725626945496
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|val",
64
+ "epoch": 11.0,
65
+ "eval_giou": 0.6013333797454834,
66
+ "eval_ciou": 0.6199454665184021
67
+ },
68
+ {
69
+ "val_dataset": "ReasonSeg|val",
70
+ "epoch": 12.0,
71
+ "eval_giou": 0.5980168581008911,
72
+ "eval_ciou": 0.6256346106529236
73
+ },
74
+ {
75
+ "val_dataset": "ReasonSeg|val",
76
+ "epoch": 13.0,
77
+ "eval_giou": 0.6127091646194458,
78
+ "eval_ciou": 0.6268919706344604
79
+ },
80
+ {
81
+ "val_dataset": "ReasonSeg|val",
82
+ "epoch": 14.0,
83
+ "eval_giou": 0.6001991033554077,
84
+ "eval_ciou": 0.6283772587776184
85
+ },
86
+ {
87
+ "val_dataset": "ReasonSeg|val",
88
+ "epoch": 15.0,
89
+ "eval_giou": 0.5891889333724976,
90
+ "eval_ciou": 0.6160405278205872
91
+ },
92
+ {
93
+ "val_dataset": "ReasonSeg|val",
94
+ "epoch": 16.0,
95
+ "eval_giou": 0.583220899105072,
96
+ "eval_ciou": 0.6409042477607727
97
+ },
98
+ {
99
+ "val_dataset": "ReasonSeg|val",
100
+ "epoch": 17.0,
101
+ "eval_giou": 0.5922881364822388,
102
+ "eval_ciou": 0.6401846408843994
103
+ },
104
+ {
105
+ "val_dataset": "ReasonSeg|val",
106
+ "epoch": 18.0,
107
+ "eval_giou": 0.5957611799240112,
108
+ "eval_ciou": 0.6475955843925476
109
+ },
110
+ {
111
+ "val_dataset": "ReasonSeg|val",
112
+ "epoch": 19.0,
113
+ "eval_giou": 0.606143593788147,
114
+ "eval_ciou": 0.6534363031387329
115
+ },
116
+ {
117
+ "val_dataset": "ReasonSeg|val",
118
+ "epoch": 20.0,
119
+ "eval_giou": 0.6004212498664856,
120
+ "eval_ciou": 0.6504445672035217
121
+ },
122
+ {
123
+ "val_dataset": "ReasonSeg|test",
124
+ "epoch": 20.0,
125
+ "eval_giou": 0.6277483105659485,
126
+ "eval_ciou": 0.6647483706474304
127
+ },
128
+ {
129
+ "val_dataset": "refcoco|unc|val",
130
+ "epoch": 20.0,
131
+ "eval_giou": 0.8239029049873352,
132
+ "eval_ciou": 0.825985848903656
133
+ },
134
+ {
135
+ "val_dataset": "refcoco|unc|testA",
136
+ "epoch": 20.0,
137
+ "eval_giou": 0.8409322500228882,
138
+ "eval_ciou": 0.8456533551216125
139
+ },
140
+ {
141
+ "val_dataset": "refcoco|unc|testB",
142
+ "epoch": 20.0,
143
+ "eval_giou": 0.8107873201370239,
144
+ "eval_ciou": 0.8120189905166626
145
+ },
146
+ {
147
+ "val_dataset": "refcoco+|unc|val",
148
+ "epoch": 20.0,
149
+ "eval_giou": 0.7863824367523193,
150
+ "eval_ciou": 0.7759678959846497
151
+ },
152
+ {
153
+ "val_dataset": "refcoco+|unc|testA",
154
+ "epoch": 20.0,
155
+ "eval_giou": 0.8180026412010193,
156
+ "eval_ciou": 0.8148158192634583
157
+ },
158
+ {
159
+ "val_dataset": "refcoco+|unc|testB",
160
+ "epoch": 20.0,
161
+ "eval_giou": 0.7562121748924255,
162
+ "eval_ciou": 0.7416884899139404
163
+ },
164
+ {
165
+ "val_dataset": "refcocog|umd|test",
166
+ "epoch": 20.0,
167
+ "eval_giou": 0.7904837131500244,
168
+ "eval_ciou": 0.799082338809967
169
+ },
170
+ {
171
+ "val_dataset": "refcocog|umd|val",
172
+ "epoch": 20.0,
173
+ "eval_giou": 0.7864062190055847,
174
+ "eval_ciou": 0.7907365560531616
175
+ },
176
+ {
177
+ "val_dataset": "grefcoco|unc|val",
178
+ "epoch": 20.0,
179
+ "eval_giou": 0.3480578660964966,
180
+ "eval_ciou": 0.40478312969207764
181
+ }
182
+ ]
lisa-ivl3-2b_ss2_2_ce_aa_sr4/events.out.tfevents.1759803265.bask-pg0309u36a.301557.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:660498ff574c492bf5d679c2ab2000082d9d54adb74cd54a33d31ee5e50ed85d
3
+ size 420319
lisa-ivl3-2b_ss2_2_ce_aa_sr4/runs/Oct07_03-14-21_bask-pg0309u36a/events.out.tfevents.1759803317.bask-pg0309u36a.301557.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07c9a2e3d5e655d1a3720f020272d78ce223c539dd74f22c129512a6a9c19465
3
+ size 223365
lisa-ivl3-2b_ss2_2_ce_aa_sr4/runs/Oct07_03-14-21_bask-pg0309u36a/events.out.tfevents.1759893368.bask-pg0309u36a.301557.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9651efafd9183c3de60fe034fc2361230008f6a877418be63c966d679c72114e
3
+ size 1548
lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:139fdbe8833d72b40b2877084cee6be809601e8730f050f271995f16007a95f4
3
+ size 4234672656
lisa-ivl3-2b_ss2_2_ce_vlora_sr/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aaa7c7f7fcd6affd89f09fc49b3e6065ac81102006c76127b26eb69297651b1
3
+ size 7352
lisa-ivl3-2b_ss2_2_ce_vlora_sr/evaluation_metrics.json ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.5569912195205688,
6
+ "eval_ciou": 0.6086179614067078
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.5633984208106995,
12
+ "eval_ciou": 0.6330024600028992
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.5652901530265808,
18
+ "eval_ciou": 0.6073711514472961
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.5960856080055237,
24
+ "eval_ciou": 0.6793828010559082
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.6029606461524963,
30
+ "eval_ciou": 0.7154921293258667
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.5903835892677307,
36
+ "eval_ciou": 0.569995105266571
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.613521158695221,
42
+ "eval_ciou": 0.6969940066337585
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.6117329597473145,
48
+ "eval_ciou": 0.6676673889160156
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.6173275113105774,
54
+ "eval_ciou": 0.6789456009864807
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.6149329543113708,
60
+ "eval_ciou": 0.6718701124191284
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|test",
64
+ "epoch": 10.0,
65
+ "eval_giou": 0.609404444694519,
66
+ "eval_ciou": 0.6132881045341492
67
+ },
68
+ {
69
+ "val_dataset": "refcoco|unc|val",
70
+ "epoch": 10.0,
71
+ "eval_giou": 0.7893159985542297,
72
+ "eval_ciou": 0.7933170199394226
73
+ },
74
+ {
75
+ "val_dataset": "refcoco|unc|testA",
76
+ "epoch": 10.0,
77
+ "eval_giou": 0.8105526566505432,
78
+ "eval_ciou": 0.8163524270057678
79
+ },
80
+ {
81
+ "val_dataset": "refcoco|unc|testB",
82
+ "epoch": 10.0,
83
+ "eval_giou": 0.7612078785896301,
84
+ "eval_ciou": 0.7619940042495728
85
+ },
86
+ {
87
+ "val_dataset": "refcoco+|unc|val",
88
+ "epoch": 10.0,
89
+ "eval_giou": 0.7366273999214172,
90
+ "eval_ciou": 0.7300769686698914
91
+ },
92
+ {
93
+ "val_dataset": "refcoco+|unc|testA",
94
+ "epoch": 10.0,
95
+ "eval_giou": 0.7783792614936829,
96
+ "eval_ciou": 0.7807698845863342
97
+ },
98
+ {
99
+ "val_dataset": "refcoco+|unc|testB",
100
+ "epoch": 10.0,
101
+ "eval_giou": 0.6872988343238831,
102
+ "eval_ciou": 0.6766538619995117
103
+ },
104
+ {
105
+ "val_dataset": "refcocog|umd|test",
106
+ "epoch": 10.0,
107
+ "eval_giou": 0.7511187195777893,
108
+ "eval_ciou": 0.7614018321037292
109
+ },
110
+ {
111
+ "val_dataset": "refcocog|umd|val",
112
+ "epoch": 10.0,
113
+ "eval_giou": 0.7464777231216431,
114
+ "eval_ciou": 0.7518000602722168
115
+ },
116
+ {
117
+ "val_dataset": "grefcoco|unc|val",
118
+ "epoch": 10.0,
119
+ "eval_giou": 0.34809717535972595,
120
+ "eval_ciou": 0.40297430753707886
121
+ },
122
+ {
123
+ "val_dataset": "grefcoco|unc|testA",
124
+ "epoch": 10.0,
125
+ "eval_giou": 0.5119317770004272,
126
+ "eval_ciou": 0.5438946485519409
127
+ },
128
+ {
129
+ "val_dataset": "grefcoco|unc|testB",
130
+ "epoch": 10.0,
131
+ "eval_giou": 0.42917221784591675,
132
+ "eval_ciou": 0.46293896436691284
133
+ }
134
+ ]
lisa-ivl3-2b_ss2_2_ce_vlora_sr/events.out.tfevents.1759802625.bask-pg0309u03a.1084672.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a500c6eab72e54c5f9f22ebdab934bd6697b3029ccac498ad538fbad5ec125c8
3
+ size 213843
lisa-ivl3-2b_ss2_2_ce_vlora_sr/events.out.tfevents.1759802958.bask-pg0309u36a.294001.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8104cc4761ea3f319ab73a87e86d1bacfacc2e1d101f9e6144d48e9575589b0
3
+ size 884