Skip to content

Commit

Permalink
remove fp8 parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
xrsrke committed Jan 17, 2025
1 parent 222bd00 commit f70230e
Show file tree
Hide file tree
Showing 7 changed files with 0 additions and 286 deletions.
10 changes: 0 additions & 10 deletions src/nanotron/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,4 @@
CHECKPOINT_FILE_NAME = "checkpoint_metadata.json"
MODEL_CONFIG_FILE_NAME = "model_config.json"


# TODO(xrsrke): remove this shit
ITERATION_STEP = 1
# TODO(xrsrke): refactor to training stage,
# keep it in the same class as iteration_step

is_ready_to_log = False

# TODO(xrsrke): refactor
CPU_WEIGHTS = {}
ACCUM_GRADS = {}
1 change: 0 additions & 1 deletion src/nanotron/fp8/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from nanotron.fp8.dtypes import DTypes # noqa
from nanotron.fp8.linear import FP8Linear # noqa
from nanotron.fp8.parameter import FP8Parameter # noqa
from nanotron.fp8.tensor import FP8Tensor # noqa

try:
Expand Down
115 changes: 0 additions & 115 deletions src/nanotron/fp8/parameter.py

This file was deleted.

1 change: 0 additions & 1 deletion src/nanotron/parallel/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,6 @@ def __repr__(self):

@property
def data(self):
# from nanotron.fp8.parameter import FP8Parameter
return self._data

@data.setter
Expand Down
7 changes: 0 additions & 7 deletions src/nanotron/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,12 +216,6 @@ def __init__(
if hasattr(p, "_is_future_fp8") and p._is_future_fp8 is True:
constants.CPU_WEIGHTS[n.replace("module.", "")] = p.data.cpu().clone()

# NOTE: sanity check all hash are different
# param_hash = []
# for p in self.model.parameters():
# assert hash(p) not in param_hash
# param_hash.append(hash(p))

# NOTE: if we cast model to FP8 before wrapping it with NanotronParameter,
# then we can create a NanotronParameter that has dtype=[torch.int8, torch.uint8]
# which then it allows us to assign [torch.int8, torch.uint8] gradients to the parameter
Expand All @@ -231,7 +225,6 @@ def __init__(
# Please ensure that the gradient and the tensor have the same dtype"
# NOTE: the reason that we cast after initializing the optimizer is that
# we want to create some master weights for fp8 parameters, before quantizing them

if self.config.model.dtype == torch.int8:
self.model = convert_model_to_fp8(self.model, config=self.config)

Expand Down
151 changes: 0 additions & 151 deletions tests/fp8/_test_fp8_parameter.py

This file was deleted.

1 change: 0 additions & 1 deletion tests/fp8/_test_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from nanotron.fp8.constants import FP8_DTYPES, QTYPE_TO_DTYPE
from nanotron.fp8.dtypes import DTypes
from nanotron.fp8.linear import FP8Linear, FP8LinearMeta
from nanotron.fp8.parameter import FP8Parameter
from nanotron.fp8.recipe import FP8LinearRecipe
from nanotron.fp8.tensor import FP8Tensor, convert_tensor_from_fp8
from nanotron.fp8.utils import convert_linear_to_fp8, convert_to_fp8_module, is_overflow_underflow_nan
Expand Down

0 comments on commit f70230e

Please sign in to comment.