From 83c95518d95d709555cf6bc4f3d6346f507b4535 Mon Sep 17 00:00:00 2001
From: Artem Chumachenko <artek.chumak@gmail.com>
Date: Tue, 27 Feb 2024 16:50:14 +0400
Subject: [PATCH 1/8] Make fixes

---
 src/petals/utils/convert_block.py |  2 +-
 src/petals/utils/peft.py          | 77 ++++++++++++-------------------
 2 files changed, 31 insertions(+), 48 deletions(-)

diff --git a/src/petals/utils/convert_block.py b/src/petals/utils/convert_block.py
index 94d3e29f3..9dde41493 100644
--- a/src/petals/utils/convert_block.py
+++ b/src/petals/utils/convert_block.py
@@ -61,7 +61,7 @@ def convert_block(
     if adapters:
         from petals.utils.peft import add_adapter_to_block, create_lora_adapter, load_peft
 
-        create_lora_adapter(block, quant_type=quant_type)
+        create_lora_adapter(block)
         for adapter_name in adapters:
             adapter_config, adapter_state_dict = load_peft(
                 adapter_name,
diff --git a/src/petals/utils/peft.py b/src/petals/utils/peft.py
index 5d285efa1..91ecbc778 100644
--- a/src/petals/utils/peft.py
+++ b/src/petals/utils/peft.py
@@ -12,19 +12,21 @@
 from huggingface_hub import HfFileSystem, get_hf_file_metadata, hf_hub_url
 from peft.config import PeftConfig
 from peft.tuners import lora
-from peft.utils import COMMON_LAYERS_PATTERN, CONFIG_NAME, SAFETENSORS_WEIGHTS_NAME
+from peft.utils import CONFIG_NAME, SAFETENSORS_WEIGHTS_NAME
 from safetensors import safe_open
 from safetensors.torch import load_file
 from transformers.utils import get_file_from_repo
 
 from petals.server.block_utils import resolve_block_dtype
-from petals.utils.convert_block import QuantType
 from petals.utils.disk_cache import allow_cache_reads, allow_cache_writes, free_disk_space_for
 from petals.utils.misc import get_size_in_bytes
 
 logger = get_logger(__name__)
 
 
+COMMON_LAYERS_PATTERN = ["layers", "h", "block", "blocks", "layer"]
+
+
 def check_peft_repository(repo_id: str) -> bool:
     return HfFileSystem().exists(f"{repo_id}/{SAFETENSORS_WEIGHTS_NAME}")
 
@@ -157,61 +159,41 @@ def active_adapter(self, value: Optional[str]):
 
 class LoraLinear(AdapterContextMixin, lora.Linear):
     """LoRA linear layer that uses adapter selected via using_adapter"""
+    def __init__(self, base_layer, adapter_name: str):
+        nn.Module.__init__(self)
+        lora.LoraLayer.__init__(self, base_layer)
+
+        self._active_adapter = adapter_name
+        self.is_target_conv_1d_layer = False
 
 
-class LoraLinear8bitLt(AdapterContextMixin, lora.Linear8bitLt):
+# TODO: Check if lora.Linear can be mixed with lora.Linear8bitLt
+class LoraLinear8bitLt(LoraLinear, lora.Linear8bitLt):
     """LoRA linear 8-bit with outliers that uses adapter selected via using_adapter"""
 
 
-class LoraLinear4bit(AdapterContextMixin, lora.Linear4bit):
+# TODO: Check if lora.Linear can be mixed with lora.Linear4bit
+class LoraLinear4bit(LoraLinear, lora.Linear4bit):
     """LoRA linear 4-bit that uses adapter selected via using_adapter"""
 
 
-def create_lora_adapter(block, quant_type: QuantType):
-    for _, module in block.named_modules():
+def create_lora_adapter(block):
+    for module_name, module in block.named_modules():
+        if isinstance(module, LoraLinear):
+            continue
         for child_name, child in module.named_children():
-            lora_wrapped_child = None
-            if not isinstance(child, (nn.Linear, bnb.nn.Linear8bitLt, bnb.nn.Linear4bit)):
-                continue
-            if quant_type == QuantType.INT8:
-                kwargs = {
-                    "has_fp16_weights": False,
-                    "threshold": 6.0,
-                    "bias": hasattr(child, "bias") and child.bias is not None,
-                }
-                lora_wrapped_child = LoraLinear8bitLt(
-                    AdapterContextMixin.ADAPTER_NOT_SET,
-                    child.in_features,
-                    child.out_features,
-                    **kwargs,
-                )
-            elif quant_type == QuantType.NF4:
-                kwargs = {
-                    "compress_statistics": True,
-                    "quant_type": "nf4",
-                    "blocksize": 64,
-                    "bias": hasattr(child, "bias") and child.bias is not None,
-                }
-                lora_wrapped_child = LoraLinear4bit(
-                    AdapterContextMixin.ADAPTER_NOT_SET,
-                    child.in_features,
-                    child.out_features,
-                    **kwargs,
-                )
-                lora_wrapped_child.compute_dtype = child.compute_dtype
-            else:
-                bias = hasattr(child, "bias") and child.bias is not None
-                lora_wrapped_child = LoraLinear(
+            lora_class = None
+            if isinstance(child, nn.Linear):
+                lora_class = LoraLinear
+            elif isinstance(child, bnb.nn.Linear8bitLt):
+                lora_class = LoraLinear8bitLt
+            elif isinstance(child, bnb.nn.Linear4bit):
+                lora_class = LoraLinear4bit
+            if lora_class:
+                lora_wrapped_child = lora_class(
+                    child,
                     AdapterContextMixin.ADAPTER_NOT_SET,
-                    child.in_features,
-                    child.out_features,
-                    bias=bias,
                 )
-            if lora_wrapped_child:
-                lora_wrapped_child.weight = child.weight
-                lora_wrapped_child.bias = child.bias
-                for p in lora_wrapped_child.parameters():
-                    p.requires_grad = False
                 setattr(module, child_name, lora_wrapped_child)
 
 
@@ -240,6 +222,7 @@ def add_adapter_to_block(block, block_index, adapter_name, peft_config, peft_sta
                             adapter_name,
                             peft_config["r"],
                             peft_config["lora_alpha"],
+                            use_rslora=peft_config.get("use_rslora", False),
                             lora_dropout=peft_config["lora_dropout"],
                             init_lora_weights=peft_config["init_lora_weights"],
                         )
@@ -275,7 +258,7 @@ def estimate_adapter_memory_per_block(
     with init_empty_weights(include_buffers=True):
         block = block_config.block_class(block_config)
         base_block_parameters = sum(p.numel() for p in block.parameters())
-        create_lora_adapter(block, quant_type=QuantType.NONE)
+        create_lora_adapter(block)
 
         for adapter in adapters:
             peft_config, peft_state_dict = load_peft(adapter, block_idx=0, **load_peft_kwargs)

From cf4f80a02005f1d7b9f3d6eef517ec2c106bb30b Mon Sep 17 00:00:00 2001
From: Artem Chumachenko <artek.chumak@gmail.com>
Date: Tue, 27 Feb 2024 16:53:59 +0400
Subject: [PATCH 2/8] lib number

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index dc0bd4eb6..9bd5fad3b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -47,7 +47,7 @@ install_requires =
     cpufeature>=0.2.0; platform_machine == "x86_64"
     packaging>=20.9
     sentencepiece>=0.1.99
-    peft==0.5.0
+    peft==0.8.2
     safetensors>=0.3.1
     Dijkstar>=2.6.0
 

From a93221a9f38062beddcc9737eebc2c9b786ae271 Mon Sep 17 00:00:00 2001
From: Artem Chumachenko <artek.chumak@gmail.com>
Date: Mon, 4 Mar 2024 13:48:33 +0400
Subject: [PATCH 3/8] Fix inference without adapter

---
 src/petals/utils/peft.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/petals/utils/peft.py b/src/petals/utils/peft.py
index 91ecbc778..989332ec0 100644
--- a/src/petals/utils/peft.py
+++ b/src/petals/utils/peft.py
@@ -153,6 +153,12 @@ def active_adapter(self):
     def active_adapter(self, value: Optional[str]):
         assert value == self.ADAPTER_NOT_SET, "active adapter can only be changed via .using_adapter" ""
 
+    @property
+    def active_adapters(self):
+        if self._context_active_adapter == self.ADAPTER_NOT_SET:
+            logger.warning(f"Layer {self} was called without using_adapter. This should only be used for debug")
+        return [self._context_active_adapter]
+
 
 using_adapter = AdapterContextMixin.using_adapter
 

From 7d9afb0fb48e6340a2018c3b57c8b14ec454182a Mon Sep 17 00:00:00 2001
From: Artem Chumachenko <artek.chumak@gmail.com>
Date: Tue, 5 Mar 2024 13:08:06 +0400
Subject: [PATCH 4/8] Fix trainability

---
 src/petals/utils/peft.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/petals/utils/peft.py b/src/petals/utils/peft.py
index 989332ec0..510f90e75 100644
--- a/src/petals/utils/peft.py
+++ b/src/petals/utils/peft.py
@@ -155,10 +155,16 @@ def active_adapter(self, value: Optional[str]):
 
     @property
     def active_adapters(self):
-        if self._context_active_adapter == self.ADAPTER_NOT_SET:
-            logger.warning(f"Layer {self} was called without using_adapter. This should only be used for debug")
         return [self._context_active_adapter]
 
+    def set_adapter(self, adapter_names: str | list[str]) -> None:
+        """
+        In PEFT, this function making adapter trainable. However, in Petals environment is not possible now. So,
+        this code remove this functionality.
+        Link to peft code: https://github.com/huggingface/peft/blob/98f4db2c7990ef9c879a0e1da9a28a19a04701ef/src/peft/tuners/tuners_utils.py#L463
+        """
+        pass
+
 
 using_adapter = AdapterContextMixin.using_adapter
 

From e90612221b78206b801ae69e0801fe3ee9d0d0fe Mon Sep 17 00:00:00 2001
From: Artem Chumachenko <artek.chumak@gmail.com>
Date: Tue, 5 Mar 2024 13:45:16 +0400
Subject: [PATCH 5/8] Fix versions

---
 src/petals/utils/peft.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/petals/utils/peft.py b/src/petals/utils/peft.py
index 510f90e75..a47bf7993 100644
--- a/src/petals/utils/peft.py
+++ b/src/petals/utils/peft.py
@@ -1,7 +1,7 @@
 import contextlib
 import re
 import time
-from typing import Optional, Sequence, Union
+from typing import List, Optional, Sequence, Union
 
 import bitsandbytes as bnb
 import torch
@@ -157,7 +157,7 @@ def active_adapter(self, value: Optional[str]):
     def active_adapters(self):
         return [self._context_active_adapter]
 
-    def set_adapter(self, adapter_names: str | list[str]) -> None:
+    def set_adapter(self, adapter_names) -> None:
         """
         In PEFT, this function making adapter trainable. However, in Petals environment is not possible now. So,
         this code remove this functionality.

From b14305554706f9e413323db184236b6651f49055 Mon Sep 17 00:00:00 2001
From: Artem Chumachenko <artek.chumak@gmail.com>
Date: Tue, 5 Mar 2024 13:45:41 +0400
Subject: [PATCH 6/8] style

---
 src/petals/utils/peft.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/petals/utils/peft.py b/src/petals/utils/peft.py
index a47bf7993..04b2a4a3b 100644
--- a/src/petals/utils/peft.py
+++ b/src/petals/utils/peft.py
@@ -171,6 +171,7 @@ def set_adapter(self, adapter_names) -> None:
 
 class LoraLinear(AdapterContextMixin, lora.Linear):
     """LoRA linear layer that uses adapter selected via using_adapter"""
+
     def __init__(self, base_layer, adapter_name: str):
         nn.Module.__init__(self)
         lora.LoraLayer.__init__(self, base_layer)

From 96cf244faea352ec4097a3bc336a27657bf2c0b4 Mon Sep 17 00:00:00 2001
From: Artem Chumachenko <artek.chumak@gmail.com>
Date: Fri, 29 Mar 2024 12:23:51 +0100
Subject: [PATCH 7/8] Update comments

Co-authored-by: Max Ryabinin <mryabinin0@gmail.com>
---
 src/petals/utils/peft.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/petals/utils/peft.py b/src/petals/utils/peft.py
index 04b2a4a3b..bed8dd78c 100644
--- a/src/petals/utils/peft.py
+++ b/src/petals/utils/peft.py
@@ -159,8 +159,8 @@ def active_adapters(self):
 
     def set_adapter(self, adapter_names) -> None:
         """
-        In PEFT, this function making adapter trainable. However, in Petals environment is not possible now. So,
-        this code remove this functionality.
+        In PEFT, this function makes the adapter trainable. However, in Petals environment this is not possible now. Thus,
+        this code removes this functionality.
         Link to peft code: https://github.com/huggingface/peft/blob/98f4db2c7990ef9c879a0e1da9a28a19a04701ef/src/peft/tuners/tuners_utils.py#L463
         """
         pass

From 3f66c3615a3a0e727ca4f1bc54ea460db1e13ca8 Mon Sep 17 00:00:00 2001
From: Artem Chumachenko <artek.chumak@gmail.com>
Date: Fri, 29 Mar 2024 12:26:41 +0100
Subject: [PATCH 8/8] Remove unnesc todo

---
 src/petals/utils/peft.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/petals/utils/peft.py b/src/petals/utils/peft.py
index bed8dd78c..3edc9376b 100644
--- a/src/petals/utils/peft.py
+++ b/src/petals/utils/peft.py
@@ -180,12 +180,10 @@ def __init__(self, base_layer, adapter_name: str):
         self.is_target_conv_1d_layer = False
 
 
-# TODO: Check if lora.Linear can be mixed with lora.Linear8bitLt
 class LoraLinear8bitLt(LoraLinear, lora.Linear8bitLt):
     """LoRA linear 8-bit with outliers that uses adapter selected via using_adapter"""
 
 
-# TODO: Check if lora.Linear can be mixed with lora.Linear4bit
 class LoraLinear4bit(LoraLinear, lora.Linear4bit):
     """LoRA linear 4-bit that uses adapter selected via using_adapter"""