| | |
| | seed_everything: 0 |
| | trainer: |
| | precision: bf16-mixed |
| | max_steps: 50000 |
| | data: |
| | class_path: lightning_ir.LightningIRDataModule |
| | init_args: |
| | num_workers: 1 |
| | train_batch_size: 64 |
| | shuffle_train: true |
| | train_dataset: |
| | class_path: lightning_ir.RunDataset |
| | init_args: |
| | run_path_or_id: msmarco-passage/train/rank-distillm/set-encoder |
| | depth: 100 |
| | sample_size: 8 |
| | sampling_strategy: log_random |
| | targets: score |
| | normalize_targets: false |
| | model: |
| | class_path: lightning_ir.BiEncoderModule |
| | init_args: |
| | model_name_or_path: bert-base-uncased |
| | config: |
| | class_path: lightning_ir.ColConfig |
| | init_args: |
| | similarity_function: dot |
| | query_expansion: true |
| | attend_to_query_expanded_tokens: true |
| | query_mask_scoring_tokens: null |
| | doc_mask_scoring_tokens: punctuation |
| | query_aggregation_function: mean |
| | normalize: false |
| | add_marker_tokens: false |
| | embedding_dim: 128 |
| | projection: linear |
| | query_pooling_strategy: null |
| | doc_expansion: false |
| | attend_to_doc_expanded_tokens: false |
| | doc_pooling_strategy: null |
| | sparsification: null |
| | query_length: 32 |
| | doc_length: 256 |
| | loss_functions: |
| | - class_path: lightning_ir.SupervisedMarginMSE |
| | - class_path: lightning_ir.KLDivergence |
| | - class_path: lightning_ir.InBatchCrossEntropy |
| | init_args: |
| | pos_sampling_technique: first |
| | neg_sampling_technique: first |
| | max_num_neg_samples: 8 |
| | optimizer: |
| | class_path: torch.optim.AdamW |
| | init_args: |
| | lr: 2.0e-05 |
| | lr_scheduler: |
| | class_path: lightning_ir.LinearLRSchedulerWithLinearWarmup |
| | init_args: |
| | num_warmup_steps: 5000 |
| | final_value: 0.02 |
| | num_delay_steps: 0 |
| |
|