final push, all released

pritamqu · Jul 9, 2024 · c9fdc55 · c9fdc55
1 parent dc58b7e
commit c9fdc55
Show file tree

Hide file tree

Showing 112 changed files with 18,175 additions and 717 deletions.
diff --git a/codes/eval/checkpointing/__init__.py b/codes/eval/checkpointing/__init__.py
@@ -0,0 +1 @@
+from .utils import *
diff --git a/codes/eval/checkpointing/utils.py b/codes/eval/checkpointing/utils.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+
+import torch
+import os 
+from datetime import datetime
+
+def create_or_restore_training_state3(args, model, optimizer, logger, amp):
+
+    def _system_restore(training_state, model, optimizer, amp):
+        model.load_state_dict(training_state['model'])
+        optimizer.load_state_dict(training_state['optimizer'])
+        start_epoch = training_state['epoch']+1
+        rng = training_state['rng'].cpu()
+        torch.random.set_rng_state(rng)
+        if amp is not None:
+                amp.load_state_dict(training_state['amp'])
+
+        return model, optimizer, start_epoch, amp, rng
+
+    logger.add_line("looking for checkpoint restoration...")    
+    path = None
+    start_epoch=0
+    rng = torch.random.get_rng_state()
+    if os.path.isfile(os.path.join(args.ckpt_dir, 'checkpoint.pth.tar')):
+        path = os.path.join(args.ckpt_dir, 'checkpoint.pth.tar')
+    if args.resume and os.path.isfile(os.path.join(args.resume, 'checkpoint.pth.tar')):
+        path = os.path.join(args.resume, 'checkpoint.pth.tar')
+
+    if path is not None:
+        if args.gpu is None:
+            training_state = torch.load(path)
+        else:
+            # Map model to be loaded to specified single gpu.
+            training_state = torch.load(path, map_location='cuda:{}'.format(args.gpu))
+        model, optimizer, start_epoch, amp, rng = _system_restore(training_state, 
+                                                                           model, optimizer, amp)
+
+        logger.add_line(f"training state restored from {path} at epoch: {start_epoch}")
+
+    else:
+        logger.add_line(f"No checkpoint found either at resume: {args.resume} or ckpt_dir: {args.ckpt_dir}")
+
+    return model, optimizer, start_epoch, amp, rng
+
+
+def commit_state3(args, model, optimizer, epoch, amp, rng, logger):
+
+    # for narval, cedar, will work for mist as well
+    temp_path = os.path.join(args.ckpt_dir, "temp.pth.tar")
+    model_path = os.path.join(args.ckpt_dir, "checkpoint.pth.tar")
+
+    training_state = {'model' : model.state_dict(),
+                      'optimizer' : optimizer.state_dict(),
+                      'epoch': epoch,
+                      'rng' : rng}
+    if amp is not None:
+        training_state.update({'amp': amp.state_dict()})
+
+    # first save to temp file
+    torch.save(training_state, temp_path)
+    # according to the GNU spec of rename, the state of checkpoint_path
+    # is atomic, i.e. it will either be modified or not modified, but not in
+    # between, during a system crash (i.e. preemtion)
+    os.replace(temp_path, model_path)
+    msg = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ": Checkpoint saved at " + model_path
+    logger.add_line(msg)
diff --git a/codes/eval/configs/finetune/esc50/config_f1.yaml b/codes/eval/configs/finetune/esc50/config_f1.yaml
@@ -0,0 +1,85 @@
+apex: true
+apex_opt_level: O1
+dataset:
+  aud_transform: "strong"
+  audio_clip_duration: 4
+  audio_fps: 16000.0
+  audio_fps_out: 112
+  batch_size: 64
+  fold: 1
+  hop_length: 143
+  n_fft: 1024
+  n_mels: 80
+  name: esc50
+  test:
+    aud_aug_kwargs:
+      normalize: true
+      trim_pad: true
+      DB_NORM: '_KINETICS'
+    aug_mode: val
+    clips_per_video: 3
+    drop_last: false
+    mode: "video"
+    split: test-{fold:01d}
+    use_shuffle: false
+
+  train:
+    aug_mode: "train"
+    clips_per_video: 1
+    mode: "clip" 
+    split: train-{fold:01d}
+    aud_aug_kwargs: # ** VJ-MASK-RC-TW **
+        vol: 0.2
+        fmask_len: [0, 10]
+        tmask_len: [0, 20]
+        num_fmask: 2
+        num_tmask: 2
+        wrap_window: 20
+        voljitter: true
+        timewarp: true
+        fmask: true
+        tmask: true
+        randcrop: true
+        normalize: true
+        trim_pad: true
+        DB_NORM: '_KINETICS'
+    use_shuffle: true
+    drop_last: true
+model:
+  backbone: # configurations are based on the pretraining
+      spec_size: [80, 448] 
+      patch_spatial: [4, 16]
+      apply_cls_token: true 
+      vit_config: "base"
+  classifier:
+      num_classes: 50
+      use_bn: false
+      l2_norm: true
+      use_dropout: true
+      dropout: 0.5
+  fwd_kwargs:
+      feat_op: 'pool' # 'cls'
+name: ESC50-fintune
+num_workers: 20
+hyperparams:
+    layer_decay: 0.65
+    num_epochs: 100
+    optimizer:
+        name: "adamw"
+        betas: [0.9, 0.999]
+        momentum: 0.9 # ignored for adam and adamw
+        weight_decay: 0.005
+    lr:
+        name: "cosine" # fixed, cosine
+        warmup_epochs: 10
+        warmup_lr: 0
+        base_lr: 0.0001
+        final_lr: 0
+        milestones: [10, 30]
+        gamma: 0.1
+progress:
+  log2tb: false
+  print_freq: 10
+  wandb: false
+sync_bn: false
+eval_freq: 1
diff --git a/codes/eval/configs/finetune/esc50/config_f2.yaml b/codes/eval/configs/finetune/esc50/config_f2.yaml
@@ -0,0 +1,85 @@
+apex: true
+apex_opt_level: O1
+dataset:
+  aud_transform: "strong"
+  audio_clip_duration: 4
+  audio_fps: 16000.0
+  audio_fps_out: 112
+  batch_size: 64
+  fold: 2
+  hop_length: 143
+  n_fft: 1024
+  n_mels: 80
+  name: esc50
+  test:
+    aud_aug_kwargs:
+      normalize: true
+      trim_pad: true
+      DB_NORM: '_KINETICS'
+    aug_mode: val
+    clips_per_video: 3
+    drop_last: false
+    mode: "video"
+    split: test-{fold:01d}
+    use_shuffle: false
+
+  train:
+    aug_mode: "train"
+    clips_per_video: 1
+    mode: "clip" 
+    split: train-{fold:01d}
+    aud_aug_kwargs: # ** VJ-MASK-RC-TW **
+        vol: 0.2
+        fmask_len: [0, 10]
+        tmask_len: [0, 20]
+        num_fmask: 2
+        num_tmask: 2
+        wrap_window: 20
+        voljitter: true
+        timewarp: true
+        fmask: true
+        tmask: true
+        randcrop: true
+        normalize: true
+        trim_pad: true
+        DB_NORM: '_KINETICS'
+    use_shuffle: true
+    drop_last: true
+model:
+  backbone: # configurations are based on the pretraining
+      spec_size: [80, 448] 
+      patch_spatial: [4, 16]
+      apply_cls_token: true 
+      vit_config: "base"
+  classifier:
+      num_classes: 50
+      use_bn: false
+      l2_norm: true
+      use_dropout: true
+      dropout: 0.5
+  fwd_kwargs:
+      feat_op: 'pool' # 'cls'
+name: ESC50-fintune
+num_workers: 20
+hyperparams:
+    layer_decay: 0.65
+    num_epochs: 100
+    optimizer:
+        name: "adamw"
+        betas: [0.9, 0.999]
+        momentum: 0.9 # ignored for adam and adamw
+        weight_decay: 0.005
+    lr:
+        name: "cosine" # fixed, cosine
+        warmup_epochs: 10
+        warmup_lr: 0
+        base_lr: 0.0001
+        final_lr: 0
+        milestones: [10, 30]
+        gamma: 0.1
+progress:
+  log2tb: false
+  print_freq: 10
+  wandb: false
+sync_bn: false
+eval_freq: 1
diff --git a/codes/eval/configs/finetune/esc50/config_f3.yaml b/codes/eval/configs/finetune/esc50/config_f3.yaml
@@ -0,0 +1,85 @@
+apex: true
+apex_opt_level: O1
+dataset:
+  aud_transform: "strong"
+  audio_clip_duration: 4
+  audio_fps: 16000.0
+  audio_fps_out: 112
+  batch_size: 64
+  fold: 3
+  hop_length: 143
+  n_fft: 1024
+  n_mels: 80
+  name: esc50
+  test:
+    aud_aug_kwargs:
+      normalize: true
+      trim_pad: true
+      DB_NORM: '_KINETICS'
+    aug_mode: val
+    clips_per_video: 3
+    drop_last: false
+    mode: "video"
+    split: test-{fold:01d}
+    use_shuffle: false
+
+  train:
+    aug_mode: "train"
+    clips_per_video: 1
+    mode: "clip" 
+    split: train-{fold:01d}
+    aud_aug_kwargs: # ** VJ-MASK-RC-TW **
+        vol: 0.2
+        fmask_len: [0, 10]
+        tmask_len: [0, 20]
+        num_fmask: 2
+        num_tmask: 2
+        wrap_window: 20
+        voljitter: true
+        timewarp: true
+        fmask: true
+        tmask: true
+        randcrop: true
+        normalize: true
+        trim_pad: true
+        DB_NORM: '_KINETICS'
+    use_shuffle: true
+    drop_last: true
+model:
+  backbone: # configurations are based on the pretraining
+      spec_size: [80, 448] 
+      patch_spatial: [4, 16]
+      apply_cls_token: true 
+      vit_config: "base"
+  classifier:
+      num_classes: 50
+      use_bn: false
+      l2_norm: true
+      use_dropout: true
+      dropout: 0.5
+  fwd_kwargs:
+      feat_op: 'pool' # 'cls'
+name: ESC50-fintune
+num_workers: 20
+hyperparams:
+    layer_decay: 0.65
+    num_epochs: 100
+    optimizer:
+        name: "adamw"
+        betas: [0.9, 0.999]
+        momentum: 0.9 # ignored for adam and adamw
+        weight_decay: 0.005
+    lr:
+        name: "cosine" # fixed, cosine
+        warmup_epochs: 10
+        warmup_lr: 0
+        base_lr: 0.0001
+        final_lr: 0
+        milestones: [10, 30]
+        gamma: 0.1
+progress:
+  log2tb: false
+  print_freq: 10
+  wandb: false
+sync_bn: false
+eval_freq: 1