Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support classification #95

Merged
merged 23 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
b2bc253
feat: scripts prep classification
n0w0f Aug 10, 2024
59fc287
chore: configs for bg and form
n0w0f Aug 11, 2024
7ea9c0b
configs for llama-run
n0w0f Aug 12, 2024
94cc3e9
chore: update config
n0w0f Aug 13, 2024
015c83c
Merge branch 'main' of https://github.com/lamalab-org/MatText into re…
n0w0f Aug 13, 2024
4a572ed
Merge branch 'revision-bg' of https://github.com/lamalab-org/MatText …
n0w0f Aug 13, 2024
f03074e
feat: add classification
n0w0f Aug 13, 2024
ea18cf3
chore: update
n0w0f Aug 13, 2024
3cafe20
fix: classification benchmarking
n0w0f Aug 13, 2024
0dfe9ee
Update src/mattext/main.py
kjappelbaum Aug 14, 2024
0de6dda
Update revision-scripts/mp_classification.py
kjappelbaum Aug 14, 2024
07ef612
Update revision-scripts/matbench_is_metal.py
kjappelbaum Aug 14, 2024
d872722
chore: remove deduplication, dictionary mapping task, multifold under…
n0w0f Aug 20, 2024
f4e8f4e
chore: abstract out benchmarking to a base class for reg and classifi…
n0w0f Aug 20, 2024
5e5bdd1
chore: abstract out finetuning to base class for reg and classification
n0w0f Aug 20, 2024
c6548a8
chore: abstract out inference from reg and classification to base
n0w0f Aug 20, 2024
824471a
chore: refactor task
n0w0f Aug 20, 2024
e26424b
chore: improve Mattext Tasks
n0w0f Aug 20, 2024
624dd4c
chore: improve benchmarking abstraction
n0w0f Aug 21, 2024
c45f568
refactor
n0w0f Aug 21, 2024
e29825f
Merge branch 'main' of https://github.com/lamalab-org/MatText into cl…
n0w0f Sep 22, 2024
bfd872c
fix: add logger
n0w0f Sep 22, 2024
2d5d901
fix: empty gpu vram after each fold
n0w0f Oct 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions conf/bandgap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@


hydra:
job:
name: bandgap
run:
dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
sweep:
dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
subdir: ${hydra.job.override_dirname}

# launcher:
# _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
# submitit_folder: ${hydra.sweep.dir}/.submitit/%j
# timeout_min: 3600
# mem_gb: 160
# nodes: 1
# #gpus_per_task: 1
# gres: gpu:1
# #gpus_per_node: 2
# name: ${hydra.job.name}
# partition: 'gpu'
# additional_parameters:
# nodelist: 'gpu[008,013-017]'
# tasks_per_node: 1

defaults:
- model: none
# - override hydra/launcher: submitit_slurm

runs:
- name: benchmark_run
tasks: [benchmark]
53 changes: 31 additions & 22 deletions conf/benchmark.yaml
Original file line number Diff line number Diff line change
@@ -1,24 +1,33 @@
hydra:
job:
name: benchmark
run:
dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
sweep:
dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
subdir: ${hydra.job.override_dirname}



defaults:
- model: none



runs:


- name: benchmark_run
tasks: [benchmark]



hydra:
job:
name: benchmark
run:
dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
sweep:
dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
subdir: ${hydra.job.override_dirname}

# launcher:
# _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
# submitit_folder: ${hydra.sweep.dir}/.submitit/%j
# timeout_min: 3600
# mem_gb: 160
# nodes: 1
# #gpus_per_task: 1
# gres: gpu:1
# #gpus_per_node: 2
# name: ${hydra.job.name}
# partition: 'gpu'
# additional_parameters:
# nodelist: 'gpu[008,013-017]'
# tasks_per_node: 1

defaults:
- model: none
# - override hydra/launcher: submitit_slurm

runs:
- name: benchmark_run
tasks: [benchmark]
19 changes: 19 additions & 0 deletions conf/bg/atoms.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# @package _global_
model:
representation: atom_sequences
dataset: "bandgap"
dataset_type: matbench
special_num_token: False
checkpoint: n0w0f/MatText-atom-seq-2m
logging:
wandb_project: revision-bg

finetune:
model_name: revision-bg
context_length: 32
training_arguments:
per_device_train_batch_size: 1024
path:
pretrained_checkpoint: n0w0f/MatText-atom-seq-2m


17 changes: 17 additions & 0 deletions conf/bg/atoms_params.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# @package _global_
model:
representation: atom_sequences_plusplus
dataset: "bandgap"
dataset_type: matbench
special_num_token: False
checkpoint: n0w0f/MatText-atom-seq-plusplus-2m
logging:
wandb_project: revision-bg

finetune:
model_name: revision-bg
context_length: 32
training_arguments:
per_device_train_batch_size: 1024


17 changes: 17 additions & 0 deletions conf/bg/cifp1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# @package _global_
model:
representation: cif_p1
dataset: "bandgap"
dataset_type: matbench
special_num_token: False
checkpoint: n0w0f/MatText-cifp1-2m
logging:
wandb_project: revision-bg

finetune:
model_name: revision-bg
context_length: 1024
training_arguments:
per_device_train_batch_size: 128
path:
pretrained_checkpoint: n0w0f/MatText-cifp1-2m
17 changes: 17 additions & 0 deletions conf/bg/cifpsym.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# @package _global_
model:
representation: cif_symmetrized
dataset: "bandgap"
dataset_type: matbench
special_num_token: False
checkpoint: n0w0f/MatText-cifsymmetrized-2m
logging:
wandb_project: revision-bg

finetune:
model_name: revision-bg
context_length: 1024
training_arguments:
per_device_train_batch_size: 64
path:
pretrained_checkpoint: n0w0f/MatText-cifsymmetrized-2m
17 changes: 17 additions & 0 deletions conf/bg/composition.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# @package _global_
model:
representation: composition
dataset: "bandgap"
dataset_type: matbench
special_num_token: False
checkpoint: n0w0f/MatText-composition-2m
logging:
wandb_project: revision-bg

finetune:
model_name: revision-bg
context_length: 32
training_arguments:
per_device_train_batch_size: 1024


16 changes: 16 additions & 0 deletions conf/bg/crystal_llm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# @package _global_
model:
representation: crystal_text_llm
dataset: "bandgap"
dataset_type: matbench
special_num_token: False
checkpoint: /home/so87pot/n0w0f/structllm_ckpt/alpaca_ckpt/checkpoint-393000
logging:
wandb_project: revision-bg

finetune:
model_name: revision-bg
context_length: 512
training_arguments:
per_device_train_batch_size: 256

17 changes: 17 additions & 0 deletions conf/bg/local_env.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# @package _global_
model:
representation: local_env
dataset: "bandgap"
dataset_type: matbench
special_num_token: False
checkpoint: /home/so87pot/n0w0f/structllm_ckpt/santiago_ckpt_rt/checkpoint-95000
logging:
wandb_project: revision-bg

finetune:
model_name: revision-bg
context_length: 512
training_arguments:
per_device_train_batch_size: 256
path:
pretrained_checkpoint: /home/so87pot/n0w0f/structllm_ckpt/santiago_ckpt_rt/checkpoint-95000
17 changes: 17 additions & 0 deletions conf/bg/slices.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# @package _global_
model:
representation: slices
dataset: "bandgap"
dataset_type: matbench
special_num_token: False
checkpoint: n0w0f/MatText-slices-2m
logging:
wandb_project: revision-bg

finetune:
model_name: revision-bg
context_length: 512
training_arguments:
per_device_train_batch_size: 256
path:
pretrained_checkpoint: n0w0f/MatText-slices-2m
17 changes: 17 additions & 0 deletions conf/bg/zmatrix.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# @package _global_
model:
representation: zmatrix
dataset: "bandgap"
dataset_type: matbench
special_num_token: False
checkpoint: n0w0f/MatText-zmatrix-2m
logging:
wandb_project: revision-bg

finetune:
model_name: revision-bg
context_length: 512
training_arguments:
per_device_train_batch_size: 256
path:
pretrained_checkpoint: n0w0f/MatText-zmatrix-2m
13 changes: 13 additions & 0 deletions conf/bg2m/atoms.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# @package _global_
model:
representation: atoms_params
logging:
wandb_project: 2m_intel_ft

finetune:
model_name: 2m_intel_ft
context_length: 32
training_arguments:
per_device_train_batch_size: 1024
path:
pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_30k_atoms/checkpoint-1000
13 changes: 13 additions & 0 deletions conf/bg2m/atoms_params.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# @package _global_
model:
representation: atoms_params
logging:
wandb_project: 2m_intel_ft

finetune:
model_name: 2m_intel_ft
context_length: 32
training_arguments:
per_device_train_batch_size: 1024
path:
pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_30k_atoms/checkpoint-1000
13 changes: 13 additions & 0 deletions conf/bg2m/cifp1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# @package _global_
model:
representation: cif_p1
logging:
wandb_project: 2m_intel_ft

finetune:
model_name: 2m_intel_ft
context_length: 1024
training_arguments:
per_device_train_batch_size: 32
path:
pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/cif_p1_pt_30k_rt_2/checkpoint-46000
13 changes: 13 additions & 0 deletions conf/bg2m/cifsymmetrized.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# @package _global_
model:
representation: cif_symmetrized
logging:
wandb_project: 2m_intel_ft

finetune:
model_name: 2m_intel_ft
context_length: 1024
training_arguments:
per_device_train_batch_size: 32
path:
pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/cif_symmetrized_pt_30k_rt/checkpoint-45000
13 changes: 13 additions & 0 deletions conf/bg2m/composition.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# @package _global_
model:
representation: composition
logging:
wandb_project: 2m_intel_ft

finetune:
model_name: 2m_intel_ft
context_length: 32
training_arguments:
per_device_train_batch_size: 1024
path:
pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/composition_pt_30k_rt/checkpoint-1000
13 changes: 13 additions & 0 deletions conf/bg2m/crystal_llm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# @package _global_
model:
representation: crystal_llm_rep
logging:
wandb_project: 2m_intel_ft

finetune:
model_name: 2m_intel_ft
context_length: 512
training_arguments:
per_device_train_batch_size: 64
path:
pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/crystal_llm_rep_pt_30k_rt/checkpoint-11000
13 changes: 13 additions & 0 deletions conf/bg2m/local_env.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# @package _global_
model:
representation: zmatrix
logging:
wandb_project: 2m_intel_ft

finetune:
model_name: 2m_intel_ft
context_length: 512
training_arguments:
per_device_train_batch_size: 64
path:
pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_30k_atoms/checkpoint-1000
13 changes: 13 additions & 0 deletions conf/bg2m/slice.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# @package _global_
model:
representation: slice
logging:
wandb_project: 2m_intel_ft

finetune:
model_name: 2m_intel_ft
context_length: 512
training_arguments:
per_device_train_batch_size: 64
path:
pretrained_checkpoint: /work/so87pot/mattext/megaloop2/checkpoints/checkpoints/slice_pt_30k_rt/checkpoint-23000
13 changes: 13 additions & 0 deletions conf/bg2m/zmatrix.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# @package _global_
model:
representation: zmatrix
logging:
wandb_project: 2m_intel_ft

finetune:
model_name: 2m_intel_ft
context_length: 512
training_arguments:
per_device_train_batch_size: 64
path:
pretrained_checkpoint: /work/so87pot/mattext/megaloop/checkpoints/checkpoints/atoms_params_pt_30k_atoms/checkpoint-1000
Loading
Loading