From 83c95518d95d709555cf6bc4f3d6346f507b4535 Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Tue, 27 Feb 2024 16:50:14 +0400 Subject: [PATCH 1/8] Make fixes --- src/petals/utils/convert_block.py | 2 +- src/petals/utils/peft.py | 77 ++++++++++++------------------- 2 files changed, 31 insertions(+), 48 deletions(-) diff --git a/src/petals/utils/convert_block.py b/src/petals/utils/convert_block.py index 94d3e29f3..9dde41493 100644 --- a/src/petals/utils/convert_block.py +++ b/src/petals/utils/convert_block.py @@ -61,7 +61,7 @@ def convert_block( if adapters: from petals.utils.peft import add_adapter_to_block, create_lora_adapter, load_peft - create_lora_adapter(block, quant_type=quant_type) + create_lora_adapter(block) for adapter_name in adapters: adapter_config, adapter_state_dict = load_peft( adapter_name, diff --git a/src/petals/utils/peft.py b/src/petals/utils/peft.py index 5d285efa1..91ecbc778 100644 --- a/src/petals/utils/peft.py +++ b/src/petals/utils/peft.py @@ -12,19 +12,21 @@ from huggingface_hub import HfFileSystem, get_hf_file_metadata, hf_hub_url from peft.config import PeftConfig from peft.tuners import lora -from peft.utils import COMMON_LAYERS_PATTERN, CONFIG_NAME, SAFETENSORS_WEIGHTS_NAME +from peft.utils import CONFIG_NAME, SAFETENSORS_WEIGHTS_NAME from safetensors import safe_open from safetensors.torch import load_file from transformers.utils import get_file_from_repo from petals.server.block_utils import resolve_block_dtype -from petals.utils.convert_block import QuantType from petals.utils.disk_cache import allow_cache_reads, allow_cache_writes, free_disk_space_for from petals.utils.misc import get_size_in_bytes logger = get_logger(__name__) +COMMON_LAYERS_PATTERN = ["layers", "h", "block", "blocks", "layer"] + + def check_peft_repository(repo_id: str) -> bool: return HfFileSystem().exists(f"{repo_id}/{SAFETENSORS_WEIGHTS_NAME}") @@ -157,61 +159,41 @@ def active_adapter(self, value: Optional[str]): class LoraLinear(AdapterContextMixin, lora.Linear): """LoRA linear layer that uses adapter selected via using_adapter""" + def __init__(self, base_layer, adapter_name: str): + nn.Module.__init__(self) + lora.LoraLayer.__init__(self, base_layer) + + self._active_adapter = adapter_name + self.is_target_conv_1d_layer = False -class LoraLinear8bitLt(AdapterContextMixin, lora.Linear8bitLt): +# TODO: Check if lora.Linear can be mixed with lora.Linear8bitLt +class LoraLinear8bitLt(LoraLinear, lora.Linear8bitLt): """LoRA linear 8-bit with outliers that uses adapter selected via using_adapter""" -class LoraLinear4bit(AdapterContextMixin, lora.Linear4bit): +# TODO: Check if lora.Linear can be mixed with lora.Linear4bit +class LoraLinear4bit(LoraLinear, lora.Linear4bit): """LoRA linear 4-bit that uses adapter selected via using_adapter""" -def create_lora_adapter(block, quant_type: QuantType): - for _, module in block.named_modules(): +def create_lora_adapter(block): + for module_name, module in block.named_modules(): + if isinstance(module, LoraLinear): + continue for child_name, child in module.named_children(): - lora_wrapped_child = None - if not isinstance(child, (nn.Linear, bnb.nn.Linear8bitLt, bnb.nn.Linear4bit)): - continue - if quant_type == QuantType.INT8: - kwargs = { - "has_fp16_weights": False, - "threshold": 6.0, - "bias": hasattr(child, "bias") and child.bias is not None, - } - lora_wrapped_child = LoraLinear8bitLt( - AdapterContextMixin.ADAPTER_NOT_SET, - child.in_features, - child.out_features, - **kwargs, - ) - elif quant_type == QuantType.NF4: - kwargs = { - "compress_statistics": True, - "quant_type": "nf4", - "blocksize": 64, - "bias": hasattr(child, "bias") and child.bias is not None, - } - lora_wrapped_child = LoraLinear4bit( - AdapterContextMixin.ADAPTER_NOT_SET, - child.in_features, - child.out_features, - **kwargs, - ) - lora_wrapped_child.compute_dtype = child.compute_dtype - else: - bias = hasattr(child, "bias") and child.bias is not None - lora_wrapped_child = LoraLinear( + lora_class = None + if isinstance(child, nn.Linear): + lora_class = LoraLinear + elif isinstance(child, bnb.nn.Linear8bitLt): + lora_class = LoraLinear8bitLt + elif isinstance(child, bnb.nn.Linear4bit): + lora_class = LoraLinear4bit + if lora_class: + lora_wrapped_child = lora_class( + child, AdapterContextMixin.ADAPTER_NOT_SET, - child.in_features, - child.out_features, - bias=bias, ) - if lora_wrapped_child: - lora_wrapped_child.weight = child.weight - lora_wrapped_child.bias = child.bias - for p in lora_wrapped_child.parameters(): - p.requires_grad = False setattr(module, child_name, lora_wrapped_child) @@ -240,6 +222,7 @@ def add_adapter_to_block(block, block_index, adapter_name, peft_config, peft_sta adapter_name, peft_config["r"], peft_config["lora_alpha"], + use_rslora=peft_config.get("use_rslora", False), lora_dropout=peft_config["lora_dropout"], init_lora_weights=peft_config["init_lora_weights"], ) @@ -275,7 +258,7 @@ def estimate_adapter_memory_per_block( with init_empty_weights(include_buffers=True): block = block_config.block_class(block_config) base_block_parameters = sum(p.numel() for p in block.parameters()) - create_lora_adapter(block, quant_type=QuantType.NONE) + create_lora_adapter(block) for adapter in adapters: peft_config, peft_state_dict = load_peft(adapter, block_idx=0, **load_peft_kwargs) From cf4f80a02005f1d7b9f3d6eef517ec2c106bb30b Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Tue, 27 Feb 2024 16:53:59 +0400 Subject: [PATCH 2/8] lib number --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index dc0bd4eb6..9bd5fad3b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -47,7 +47,7 @@ install_requires = cpufeature>=0.2.0; platform_machine == "x86_64" packaging>=20.9 sentencepiece>=0.1.99 - peft==0.5.0 + peft==0.8.2 safetensors>=0.3.1 Dijkstar>=2.6.0 From a93221a9f38062beddcc9737eebc2c9b786ae271 Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Mon, 4 Mar 2024 13:48:33 +0400 Subject: [PATCH 3/8] Fix inference without adapter --- src/petals/utils/peft.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/petals/utils/peft.py b/src/petals/utils/peft.py index 91ecbc778..989332ec0 100644 --- a/src/petals/utils/peft.py +++ b/src/petals/utils/peft.py @@ -153,6 +153,12 @@ def active_adapter(self): def active_adapter(self, value: Optional[str]): assert value == self.ADAPTER_NOT_SET, "active adapter can only be changed via .using_adapter" "" + @property + def active_adapters(self): + if self._context_active_adapter == self.ADAPTER_NOT_SET: + logger.warning(f"Layer {self} was called without using_adapter. This should only be used for debug") + return [self._context_active_adapter] + using_adapter = AdapterContextMixin.using_adapter From 7d9afb0fb48e6340a2018c3b57c8b14ec454182a Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Tue, 5 Mar 2024 13:08:06 +0400 Subject: [PATCH 4/8] Fix trainability --- src/petals/utils/peft.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/petals/utils/peft.py b/src/petals/utils/peft.py index 989332ec0..510f90e75 100644 --- a/src/petals/utils/peft.py +++ b/src/petals/utils/peft.py @@ -155,10 +155,16 @@ def active_adapter(self, value: Optional[str]): @property def active_adapters(self): - if self._context_active_adapter == self.ADAPTER_NOT_SET: - logger.warning(f"Layer {self} was called without using_adapter. This should only be used for debug") return [self._context_active_adapter] + def set_adapter(self, adapter_names: str | list[str]) -> None: + """ + In PEFT, this function making adapter trainable. However, in Petals environment is not possible now. So, + this code remove this functionality. + Link to peft code: https://github.com/huggingface/peft/blob/98f4db2c7990ef9c879a0e1da9a28a19a04701ef/src/peft/tuners/tuners_utils.py#L463 + """ + pass + using_adapter = AdapterContextMixin.using_adapter From e90612221b78206b801ae69e0801fe3ee9d0d0fe Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Tue, 5 Mar 2024 13:45:16 +0400 Subject: [PATCH 5/8] Fix versions --- src/petals/utils/peft.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/petals/utils/peft.py b/src/petals/utils/peft.py index 510f90e75..a47bf7993 100644 --- a/src/petals/utils/peft.py +++ b/src/petals/utils/peft.py @@ -1,7 +1,7 @@ import contextlib import re import time -from typing import Optional, Sequence, Union +from typing import List, Optional, Sequence, Union import bitsandbytes as bnb import torch @@ -157,7 +157,7 @@ def active_adapter(self, value: Optional[str]): def active_adapters(self): return [self._context_active_adapter] - def set_adapter(self, adapter_names: str | list[str]) -> None: + def set_adapter(self, adapter_names) -> None: """ In PEFT, this function making adapter trainable. However, in Petals environment is not possible now. So, this code remove this functionality. From b14305554706f9e413323db184236b6651f49055 Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Tue, 5 Mar 2024 13:45:41 +0400 Subject: [PATCH 6/8] style --- src/petals/utils/peft.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/petals/utils/peft.py b/src/petals/utils/peft.py index a47bf7993..04b2a4a3b 100644 --- a/src/petals/utils/peft.py +++ b/src/petals/utils/peft.py @@ -171,6 +171,7 @@ def set_adapter(self, adapter_names) -> None: class LoraLinear(AdapterContextMixin, lora.Linear): """LoRA linear layer that uses adapter selected via using_adapter""" + def __init__(self, base_layer, adapter_name: str): nn.Module.__init__(self) lora.LoraLayer.__init__(self, base_layer) From 96cf244faea352ec4097a3bc336a27657bf2c0b4 Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Fri, 29 Mar 2024 12:23:51 +0100 Subject: [PATCH 7/8] Update comments Co-authored-by: Max Ryabinin --- src/petals/utils/peft.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/petals/utils/peft.py b/src/petals/utils/peft.py index 04b2a4a3b..bed8dd78c 100644 --- a/src/petals/utils/peft.py +++ b/src/petals/utils/peft.py @@ -159,8 +159,8 @@ def active_adapters(self): def set_adapter(self, adapter_names) -> None: """ - In PEFT, this function making adapter trainable. However, in Petals environment is not possible now. So, - this code remove this functionality. + In PEFT, this function makes the adapter trainable. However, in Petals environment this is not possible now. Thus, + this code removes this functionality. Link to peft code: https://github.com/huggingface/peft/blob/98f4db2c7990ef9c879a0e1da9a28a19a04701ef/src/peft/tuners/tuners_utils.py#L463 """ pass From 3f66c3615a3a0e727ca4f1bc54ea460db1e13ca8 Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Fri, 29 Mar 2024 12:26:41 +0100 Subject: [PATCH 8/8] Remove unnesc todo --- src/petals/utils/peft.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/petals/utils/peft.py b/src/petals/utils/peft.py index bed8dd78c..3edc9376b 100644 --- a/src/petals/utils/peft.py +++ b/src/petals/utils/peft.py @@ -180,12 +180,10 @@ def __init__(self, base_layer, adapter_name: str): self.is_target_conv_1d_layer = False -# TODO: Check if lora.Linear can be mixed with lora.Linear8bitLt class LoraLinear8bitLt(LoraLinear, lora.Linear8bitLt): """LoRA linear 8-bit with outliers that uses adapter selected via using_adapter""" -# TODO: Check if lora.Linear can be mixed with lora.Linear4bit class LoraLinear4bit(LoraLinear, lora.Linear4bit): """LoRA linear 4-bit that uses adapter selected via using_adapter"""