pcuenq HF Staff commited on
Commit
66334f2
·
verified ·
1 Parent(s): 15665c9

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +229 -0
config.json CHANGED
@@ -37,6 +37,235 @@
37
  "video_context_token": "<video>",
38
  "video_context_token_id": 131081,
39
  "video_pruning_rate": 0.7,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  "vit_hidden_size": 1280,
41
  "quantization_config": {
42
  "config_groups": {
 
37
  "video_context_token": "<video>",
38
  "video_context_token_id": 131081,
39
  "video_pruning_rate": 0.7,
40
+ "vision_config": {
41
+ "adaptor_configs": {},
42
+ "adaptor_names": null,
43
+ "architectures": [
44
+ "RADIOModel"
45
+ ],
46
+ "args": {
47
+ "aa": null,
48
+ "amp": true,
49
+ "amp_dtype": "bfloat16",
50
+ "amp_impl": "native",
51
+ "aug_repeats": 0,
52
+ "aug_splits": 0,
53
+ "bn_eps": null,
54
+ "bn_momentum": null,
55
+ "cache_dir": null,
56
+ "channels_last": false,
57
+ "checkpoint_hist": 10,
58
+ "chk_keep_forever": 100,
59
+ "class_map": "",
60
+ "clip_grad": null,
61
+ "clip_mode": "norm",
62
+ "cls_token_per_teacher": true,
63
+ "coco_annotations_file": "/datasets/coco2017-adlsa/annotations/captions_val2017.json",
64
+ "coco_image_dir": "/datasets/coco2017-adlsa/val2017",
65
+ "color_jitter": 0.4,
66
+ "cooldown_epochs": 0,
67
+ "cpe_max_size": 2048,
68
+ "crd_loss": false,
69
+ "crd_loss_weight": 0.8,
70
+ "crop_pct": null,
71
+ "cutmix": 0.0,
72
+ "cutmix_minmax": null,
73
+ "dataset_download": false,
74
+ "debug_full_knn": false,
75
+ "decay_epochs": 90,
76
+ "decay_milestones": [
77
+ 90,
78
+ 180,
79
+ 270
80
+ ],
81
+ "decay_rate": 0.1,
82
+ "depchain": true,
83
+ "dist_bn": "reduce",
84
+ "dist_norm_weight": 0.0,
85
+ "distributed": true,
86
+ "drop": 0.0,
87
+ "drop_block": null,
88
+ "drop_connect": null,
89
+ "drop_path": null,
90
+ "dtype": "bfloat16",
91
+ "epoch_repeats": 0.0,
92
+ "eval": false,
93
+ "eval_metric": "knn_top1",
94
+ "eval_teacher": false,
95
+ "eval_teacher_only": false,
96
+ "eval_throughput": false,
97
+ "fast_norm": false,
98
+ "fd_loss_fn": "MSE",
99
+ "feature_normalization": "SHIP_NORM",
100
+ "feature_summarizer": "cls_token",
101
+ "feature_upscale_factor": null,
102
+ "force_new_wandb_id": false,
103
+ "force_spectral_reparam": true,
104
+ "freeze_bn": false,
105
+ "fsdp": false,
106
+ "fuser": "",
107
+ "gp": null,
108
+ "grad_accum_steps": 1,
109
+ "grad_checkpointing": false,
110
+ "head_init_bias": null,
111
+ "head_init_scale": null,
112
+ "head_warmup": 5,
113
+ "head_weight_decay": 0.001,
114
+ "hflip": 0.5,
115
+ "img_size": null,
116
+ "in_chans": null,
117
+ "initial_checkpoint": null,
118
+ "input_size": null,
119
+ "interpolation": "",
120
+ "layer_decay": null,
121
+ "local_rank": 0,
122
+ "log_interval": 50,
123
+ "log_mlflow": false,
124
+ "log_wandb": true,
125
+ "loss_auto_balance": false,
126
+ "lr_base": 0.1,
127
+ "lr_base_scale": "",
128
+ "lr_base_size": 256,
129
+ "lr_cycle_decay": 0.5,
130
+ "lr_cycle_limit": 1,
131
+ "lr_cycle_mul": 1.0,
132
+ "lr_k_decay": 1.0,
133
+ "lr_noise": null,
134
+ "lr_noise_pct": 0.67,
135
+ "lr_noise_std": 1.0,
136
+ "mean": null,
137
+ "mesa": false,
138
+ "min_lr": 0,
139
+ "mixup": 0.0,
140
+ "mixup_mode": "batch",
141
+ "mixup_off_epoch": 0,
142
+ "mixup_prob": 1.0,
143
+ "mixup_switch_prob": 0.5,
144
+ "mlp_hidden_size": 1520,
145
+ "mlp_num_inner": 3,
146
+ "mlp_version": "v2",
147
+ "model": "vit_huge_patch16_224",
148
+ "model_kwargs": {},
149
+ "model_norm": false,
150
+ "momentum": 0.9,
151
+ "no_aug": false,
152
+ "no_ddp_bb": true,
153
+ "no_prefetcher": false,
154
+ "no_resume_opt": false,
155
+ "num_classes": null,
156
+ "opt_betas": null,
157
+ "opt_eps": null,
158
+ "patience_epochs": 10,
159
+ "pin_mem": false,
160
+ "prefetcher": true,
161
+ "pretrained": false,
162
+ "rank": 0,
163
+ "ratio": [
164
+ 0.75,
165
+ 1.3333333333333333
166
+ ],
167
+ "recount": 1,
168
+ "recovery_interval": 0,
169
+ "register_multiple": 16,
170
+ "remode": "pixel",
171
+ "reprob": 0.0,
172
+ "reset_loss_state": false,
173
+ "resplit": false,
174
+ "save_images": false,
175
+ "scale": [
176
+ 0.5,
177
+ 1.0
178
+ ],
179
+ "sched": "cosine",
180
+ "seed": 42,
181
+ "smoothing": 0.1,
182
+ "spectral_heads": false,
183
+ "spectral_reparam": false,
184
+ "split_bn": false,
185
+ "start_epoch": null,
186
+ "std": null,
187
+ "stream_teachers": true,
188
+ "sync_bn": false,
189
+ "synchronize_step": false,
190
+ "teachers": [
191
+ {
192
+ "fd_normalize": false,
193
+ "feature_distillation": true,
194
+ "input_size": 378,
195
+ "model": "ViT-H-14-378-quickgelu",
196
+ "name": "clip",
197
+ "pretrained": "dfn5b",
198
+ "type": "open_clip",
199
+ "use_summary": true
200
+ },
201
+ {
202
+ "fd_normalize": false,
203
+ "feature_distillation": true,
204
+ "input_size": 378,
205
+ "model": "ViT-SO400M-14-SigLIP-384",
206
+ "name": "siglip",
207
+ "pretrained": "webli",
208
+ "type": "open_clip",
209
+ "use_summary": true
210
+ },
211
+ {
212
+ "fd_normalize": false,
213
+ "feature_distillation": true,
214
+ "input_size": 378,
215
+ "model": "dinov2_vitg14_reg",
216
+ "name": "dino_v2",
217
+ "type": "dino_v2",
218
+ "use_summary": true
219
+ },
220
+ {
221
+ "fd_normalize": false,
222
+ "feature_distillation": true,
223
+ "input_size": 1024,
224
+ "model": "vit-h",
225
+ "name": "sam",
226
+ "type": "sam",
227
+ "use_summary": false
228
+ }
229
+ ],
230
+ "torchcompile": null,
231
+ "torchscript": false,
232
+ "train_interpolation": "random",
233
+ "train_split": "train",
234
+ "tta": 0,
235
+ "use_coco": false,
236
+ "use_multi_epochs_loader": false,
237
+ "val_ema_only": false,
238
+ "val_split": "val",
239
+ "vflip": 0.0,
240
+ "vitdet_version": 1,
241
+ "wandb_entity": "",
242
+ "wandb_job_type": "",
243
+ "wandb_name": "",
244
+ "wandb_project": "",
245
+ "warmup_lr": 1e-05,
246
+ "warmup_prefix": false,
247
+ "worker_seeding": "all",
248
+ "workers": 8,
249
+ "world_size": 256
250
+ },
251
+ "auto_map": {
252
+ "AutoConfig": "nvidia/C-RADIOv2-H--hf_model.RADIOConfig",
253
+ "AutoModel": "nvidia/C-RADIOv2-H--hf_model.RADIOModel"
254
+ },
255
+ "feature_normalizer_config": null,
256
+ "inter_feature_normalizer_config": null,
257
+ "max_resolution": 2048,
258
+ "model_type": "",
259
+ "patch_size": 16,
260
+ "preferred_resolution": [
261
+ 768,
262
+ 768
263
+ ],
264
+ "torch_dtype": "bfloat16",
265
+ "use_flash_attn": false,
266
+ "version": "radio_v2.5-h",
267
+ "vitdet_window_size": null
268
+ },
269
  "vit_hidden_size": 1280,
270
  "quantization_config": {
271
  "config_groups": {