From 3f68aabdb8af66eb9ddf59d334aa6523f3a18700 Mon Sep 17 00:00:00 2001
From: zhuwenxing <wenxing.zhu@zilliz.com>
Date: Mon, 6 Nov 2023 15:20:46 +0800
Subject: [PATCH] [test]Add test for array data type

Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
---
 .github/workflows/main.yaml            |   4 +-
 tests/base/client_base.py              |  61 ++++++--
 tests/base/collection_wrapper.py       |  12 ++
 tests/common/common_func.py            |   5 +
 tests/common/common_type.py            |   5 +-
 tests/requirements.txt                 |   2 +-
 tests/testcases/test_create_backup.py  |  85 ++++++++++-
 tests/testcases/test_get_backup.py     |   2 +-
 tests/testcases/test_get_restore.py    |   2 +-
 tests/testcases/test_list_backup.py    |   2 +-
 tests/testcases/test_restore_backup.py | 194 ++++++++++++++++++++++++-
 11 files changed, 343 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index e12cb9d..d514aa2 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -410,7 +410,7 @@ jobs:
       matrix:
         deploy_tools: [docker-compose]
         milvus_mode: [standalone]
-        case_tag: [L0, L1]
+        case_tag: [L0, L1, L2]
         exclude:
           - deploy_tools: helm
             milvus_mode: cluster
@@ -518,7 +518,7 @@ jobs:
         if: ${{ ! success() }}
         uses: actions/upload-artifact@v2
         with:
-          name: api-test-logs-${{ matrix.deploy_tools }}-${{ matrix.milvus_mode }}
+          name: api-test-logs-${{ matrix.deploy_tools }}-${{ matrix.milvus_mode }}-${{ matrix.case_tag }}
           path: |
             ./logs
             ./server.log
diff --git a/tests/base/client_base.py b/tests/base/client_base.py
index 674a5ac..b2e98b5 100644
--- a/tests/base/client_base.py
+++ b/tests/base/client_base.py
@@ -1,4 +1,5 @@
 import sys
+import time
 from pymilvus import DefaultConfig, DataType, db
 
 sys.path.append("..")
@@ -338,37 +339,71 @@ def is_binary_by_schema(self, schema):
             if field.dtype == DataType.FLOAT_VECTOR:
                 return False
 
-    def compare_collections(self, src_name, dist_name, output_fields=None):
+    def compare_collections(self, src_name, dist_name, output_fields=None, verify_by_query=False):
         if output_fields is None:
-            output_fields = [ct.default_int64_field_name, ct.default_json_field_name]
+            output_fields = ["*"]
         collection_src, _ = self.collection_wrap.init_collection(name=src_name)
         collection_dist, _ = self.collection_wrap.init_collection(name=dist_name)
-        assert collection_src.num_entities == collection_dist.num_entities, \
-            f"collection_src {src_name} num_entities: {collection_src.num_entities} != " \
-            f"collection_dist {dist_name} num_entities: {collection_dist.num_entities}"
+        log.info(f"collection_src schema: {collection_src.schema}")
+        log.info(f"collection_dist schema: {collection_dist.schema}")
         assert collection_src.schema == collection_dist.schema
         # get partitions
         partitions_src = collection_src.partitions
         partitions_dist = collection_dist.partitions
         log.info(f"partitions_src: {partitions_src}, partitions_dist: {partitions_dist}")
         assert len(partitions_src) == len(partitions_dist)
-
+        # get num entities
+        src_num = collection_src.num_entities
+        dist_num = collection_dist.num_entities
+        log.info(f"src_num: {src_num}, dist_num: {dist_num}")
+        if not verify_by_query:
+            assert src_num == dist_num
+            return
         for coll in [collection_src, collection_dist]:
             is_binary = self.is_binary_by_schema(coll.schema)
-            if is_binary:
-                coll.create_index(ct.default_binary_vec_field_name, ct.default_bin_flat_index,
-                                  index_name=cf.gen_unique_str())
-            else:
-                coll.create_index(ct.default_float_vec_field_name, ct.default_index, index_name=cf.gen_unique_str())
+            try:
+                if is_binary:
+                    coll.create_index(ct.default_binary_vec_field_name, ct.default_bin_flat_index,
+                                      index_name=cf.gen_unique_str())
+                else:
+                    coll.create_index(ct.default_float_vec_field_name, ct.default_index, index_name=cf.gen_unique_str())
+            except Exception as e:
+                log.error(f"collection {coll.name} create index failed with error: {e}")
             coll.load()
+            time.sleep(5)
+        # get entities by count
+        src_count = collection_src.query(
+            expr="",
+            output_fields=["count(*)"]
+        )
+        dist_count = collection_dist.query(
+            expr="",
+            output_fields=["count(*)"]
+        )
+        log.info(f"src count: {src_count}, dist count: {dist_count}")
         src_res = collection_src.query(expr=f'{ct.default_int64_field_name} >= 0',
                                        output_fields=output_fields)
-        log.info(f"src res: {len(src_res)}")
+        # log.info(f"src res: {len(src_res)}, src res: {src_res[-1]}")
         dist_res = collection_dist.query(expr=f'{ct.default_int64_field_name} >= 0',
                                          output_fields=output_fields)
-        log.info(f"dist res: {len(dist_res)}")
+        # log.info(f"dist res: {len(dist_res)}, dist res: {dist_res[-1]}")
         assert len(dist_res) == len(src_res)
 
+        # sort by primary key and compare
+        src_res = sorted(src_res, key=lambda x: x[ct.default_int64_field_name])
+        dist_res = sorted(dist_res, key=lambda x: x[ct.default_int64_field_name])
+        src_pk = [r[ct.default_int64_field_name] for r in src_res]
+        dist_pk = [r[ct.default_int64_field_name] for r in dist_res]
+        diff = list(set(src_pk).difference(set(dist_pk)))
+        log.info(f"pk diff: {diff}")
+        for i in range(len(src_res)):
+            assert src_res[i] == dist_res[i]
+        for coll in [collection_src, collection_dist]:
+            try:
+                coll.release()
+            except Exception as e:
+                log.error(f"collection {coll.name} release failed with error: {e}")
+
     def check_collection_binary(self, name):
         collection_w, _ = self.collection_wrap.init_collection(name=name)
         field_types = [field.dtype for field in collection_w.schema.fields]
diff --git a/tests/base/collection_wrapper.py b/tests/base/collection_wrapper.py
index 180f514..b061c87 100644
--- a/tests/base/collection_wrapper.py
+++ b/tests/base/collection_wrapper.py
@@ -124,6 +124,18 @@ def insert(self, data, partition_name=None, check_task=None, check_items=None, *
                                        **kwargs).run()
         return res, check_result
 
+    @trace()
+    def upsert(self, data, partition_name=None, check_task=None, check_items=None, **kwargs):
+        timeout = kwargs.get("timeout", TIMEOUT)
+        kwargs.update({"timeout": timeout})
+
+        func_name = sys._getframe().f_code.co_name
+        res, check = api_request([self.collection.upsert, data, partition_name], **kwargs)
+        check_result = ResponseChecker(res, func_name, check_task, check_items, check,
+                                       dat=data, partition_name=partition_name,
+                                       **kwargs).run()
+        return res, check_result
+
     # @trace()
     # def flush(self, check_task=None, check_items=None, **kwargs):
     #     #TODO:currently, flush is not supported by sdk in milvus
diff --git a/tests/common/common_func.py b/tests/common/common_func.py
index b764c4c..d90f9ac 100644
--- a/tests/common/common_func.py
+++ b/tests/common/common_func.py
@@ -106,6 +106,11 @@ def gen_json_field(name=ct.default_json_field_name, is_primary=False, descriptio
                                                                 description=description, is_primary=is_primary)
     return json_field
 
+def gen_array_field(name=ct.default_array_field_name, is_primary=False, element_type=DataType.VARCHAR ,description=ct.default_desc):
+    array_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.ARRAY,
+                                                                description=description, is_primary=is_primary, element_type=element_type, max_capacity=2000, max_length=1500)
+    return array_field
+
 
 def gen_float_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim,
                         description=ct.default_desc):
diff --git a/tests/common/common_type.py b/tests/common/common_type.py
index 6a57cd8..61e1850 100644
--- a/tests/common/common_type.py
+++ b/tests/common/common_type.py
@@ -34,6 +34,7 @@
 default_double_field_name = "double"
 default_string_field_name = "varchar"
 default_json_field_name = "json"
+default_array_field_name = "array"
 default_float_vec_field_name = "float_vector"
 another_float_vec_field_name = "float_vector1"
 default_binary_vec_field_name = "binary_vector"
@@ -73,8 +74,8 @@
 err_msg = "err_msg"
 in_cluster_env = "IN_CLUSTER"
 
-default_flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "L2"}
-default_bin_flat_index = {"index_type": "BIN_FLAT", "params": {}, "metric_type": "JACCARD"}
+default_flat_index = {"index_type": "IVF_SQ8", "metric_type": "COSINE", "params": {"nlist": 64}}
+default_bin_flat_index = {"index_type": "BIN_IVF_FLAT", "params": {"nlist": 128}, "metric_type": "JACCARD"}
 
 """" List of parameters used to pass """
 get_invalid_strs = [
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 0cd9687..2f712e0 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -17,7 +17,7 @@ pytest-print==0.2.1
 pytest-level==0.1.1
 pytest-xdist==2.5.0
 pytest-loguru==0.2.0
-pymilvus==2.2.9.dev18
+pymilvus==2.3.2
 pytest-rerunfailures==9.1.1
 git+https://github.com/Projectplace/pytest-tags
 ndg-httpsclient
diff --git a/tests/testcases/test_create_backup.py b/tests/testcases/test_create_backup.py
index 1408fdf..d10ad90 100644
--- a/tests/testcases/test_create_backup.py
+++ b/tests/testcases/test_create_backup.py
@@ -1,6 +1,6 @@
 import time
 import pytest
-
+from pymilvus import Collection
 from base.client_base import TestcaseBase
 from common import common_func as cf
 from common import common_type as ct
@@ -16,7 +16,7 @@
 
 class TestCreateBackup(TestcaseBase):
     """ Test case of end to end"""
-    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.tags(CaseLabel.L0)
     @pytest.mark.parametrize("is_async", [True, False])
     @pytest.mark.parametrize("collection_need_to_backup", [1, 2, 3])
     @pytest.mark.parametrize("collection_type", ["binary", "float", "all"])
@@ -67,9 +67,88 @@ def test_milvus_create_backup(self, collection_type, collection_need_to_backup,
             assert len(backup_collections) == collection_need_to_backup
         assert set(names_to_backup).issubset(backup_collections)
 
+    @pytest.mark.parametrize("is_async", [False])
+    @pytest.mark.parametrize("collection_need_to_backup", ["all"])
+    @pytest.mark.parametrize("collection_type", ["binary", "float", "all"])
+    @pytest.mark.parametrize("collection_load_status", ["loaded", "not_loaded"])
+    def test_milvus_create_backup_with_indexed_and_loaded(self, collection_type, collection_need_to_backup, is_async, collection_load_status):
+        # prepare data
+        names_origin = []
+        back_up_name = cf.gen_unique_str(backup_prefix)
+        if collection_type == "all":
+            for is_binary in [True, False, False]:
+                names_origin.append(cf.gen_unique_str(prefix))
+                self.prepare_data(names_origin[-1], is_binary=is_binary, check_function=True)
+        if collection_type == "float":
+            for is_binary in [False, False, False]:
+                names_origin.append(cf.gen_unique_str(prefix))
+                self.prepare_data(names_origin[-1], is_binary=is_binary, check_function=True)
+        if collection_type == "binary":
+            for is_binary in [True, True, True]:
+                names_origin.append(cf.gen_unique_str(prefix))
+                self.prepare_data(names_origin[-1], is_binary=is_binary, check_function=True)
+        log.info(f"name_origin:{names_origin}, back_up_name: {back_up_name}")
+        if collection_load_status == "loaded":
+            for name in names_origin:
+                c = Collection(name=name)
+                c.load()
+        if collection_load_status == "not_loaded":
+            for name in names_origin:
+                c = Collection(name=name)
+                c.load()
+                c.release()
+        collection_info = {}
+        for name in names_origin:
+            d = {}
+            res, _ = self.utility_wrap.has_collection(name)
+            assert res is True
+            c = Collection(name=name)
+            index_info = [x.to_dict() for x in c.indexes]
 
+            loaded = "NotLoad"
+            try:
+                c.get_replicas()
+                loaded = "Loaded"
+            except Exception as e:
+                log.error(f"get replicas failed: {e}")
+            collection_info[name] = {
+                "index_info": index_info,
+                "load_state": loaded
+            }
+        log.info(f"collection_info: {collection_info}")
 
-
+        # create backup
+        names_to_backup = []
+        if collection_need_to_backup == "all":
+            names_to_backup = names_origin
+            payload = {"async": is_async, "backup_name": back_up_name}
+        else:
+            names_need_backup = names_origin[:collection_need_to_backup]
+            payload = {"async": is_async, "backup_name": back_up_name, "collection_names": names_need_backup}
+        res = client.create_backup(payload)
+        log.info(f"create backup response: {res}")
+        if is_async:
+            res = client.wait_create_backup_complete(back_up_name)
+            assert res is True
+        backup_info = res["data"]["collection_backups"]
+        # check load state and index info in backup
+        for backup in backup_info:
+            c_name = backup["collection_name"]
+            assert backup["load_state"] == collection_info[c_name]["load_state"]
+            assert len(backup["index_infos"]) == len(collection_info[c_name]["index_info"])
+        res = client.list_backup()
+        log.info(f"list backup response: {res}")
+        if "data" in res:
+            all_backup = [r["name"] for r in res["data"]]
+        else:
+            all_backup = []
+        assert back_up_name in all_backup
+        backup = client.get_backup(back_up_name)
+        assert backup["data"]["name"] == back_up_name
+        backup_collections = [backup["collection_name"]for backup in backup["data"]["collection_backups"]]
+        if isinstance(collection_need_to_backup, int):
+            assert len(backup_collections) == collection_need_to_backup
+        assert set(names_to_backup).issubset(backup_collections)
 
 
 
diff --git a/tests/testcases/test_get_backup.py b/tests/testcases/test_get_backup.py
index 6dfb40e..df94413 100644
--- a/tests/testcases/test_get_backup.py
+++ b/tests/testcases/test_get_backup.py
@@ -14,7 +14,7 @@
 
 class TestGetBackup(TestcaseBase):
     """ Test case of end to end"""
-    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.tags(CaseLabel.L2)
     @pytest.mark.parametrize("is_async", [True, False])
     @pytest.mark.parametrize("backup_num", [1, 2, 3])
     def test_milvus_get_backup(self, backup_num, is_async):
diff --git a/tests/testcases/test_get_restore.py b/tests/testcases/test_get_restore.py
index 3f02da1..7fcb870 100644
--- a/tests/testcases/test_get_restore.py
+++ b/tests/testcases/test_get_restore.py
@@ -16,7 +16,7 @@
 
 class TestGetRestore(TestcaseBase):
     """ Test case of end to end"""
-    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.tags(CaseLabel.L2)
     @pytest.mark.parametrize("is_async", [True, False])
     @pytest.mark.parametrize("restore_num", [1, 2, 3])
     def test_milvus_get_restore(self, restore_num, is_async):
diff --git a/tests/testcases/test_list_backup.py b/tests/testcases/test_list_backup.py
index baecac6..f6ca9cf 100644
--- a/tests/testcases/test_list_backup.py
+++ b/tests/testcases/test_list_backup.py
@@ -14,7 +14,7 @@
 
 class TestListBackup(TestcaseBase):
     """ Test case of end to end"""
-    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.tags(CaseLabel.L2)
     @pytest.mark.parametrize("is_async", [True, False])
     @pytest.mark.parametrize("backup_num", [1, 2, 3])
     def test_milvus_list_backup(self, backup_num, is_async):
diff --git a/tests/testcases/test_restore_backup.py b/tests/testcases/test_restore_backup.py
index 5b21dd4..d374dd3 100644
--- a/tests/testcases/test_restore_backup.py
+++ b/tests/testcases/test_restore_backup.py
@@ -3,7 +3,7 @@
 import json
 import numpy as np
 from collections import defaultdict
-from pymilvus import db, list_collections, Collection
+from pymilvus import db, list_collections, Collection, DataType
 from base.client_base import TestcaseBase
 from common import common_func as cf
 from common import common_type as ct
@@ -17,7 +17,6 @@
 client = MilvusBackupClient("http://localhost:8080/api/v1")
 
 
-@pytest.mark.tags(CaseLabel.L0)
 class TestRestoreBackup(TestcaseBase):
     """ Test case of end to end"""
 
@@ -27,6 +26,7 @@ class TestRestoreBackup(TestcaseBase):
     @pytest.mark.parametrize("is_async", [True, False])
     @pytest.mark.parametrize("collection_need_to_restore", [1, 2, 3])
     @pytest.mark.parametrize("collection_type", ["binary", "float", "all"])
+    @pytest.mark.tags(CaseLabel.L0)
     def test_milvus_restore_back(self, collection_type, collection_need_to_restore, is_async, is_auto_id, enable_partition, nb):
         # prepare data
         names_origin = []
@@ -48,7 +48,6 @@ def test_milvus_restore_back(self, collection_type, collection_need_to_restore,
             res, _ = self.utility_wrap.has_collection(name)
             assert res is True
         # create backup
-
         names_need_backup = names_origin
         payload = {"async": False, "backup_name": back_up_name, "collection_names": names_need_backup}
         res = client.create_backup(payload)
@@ -86,6 +85,7 @@ def test_milvus_restore_back(self, collection_type, collection_need_to_restore,
     @pytest.mark.parametrize("is_async", [True])
     @pytest.mark.parametrize("collection_need_to_restore", [3])
     @pytest.mark.parametrize("collection_type", ["all"])
+    @pytest.mark.tags(CaseLabel.L0)
     def test_milvus_restore_back_with_multi_partition(self, collection_type, collection_need_to_restore, is_async, is_auto_id, enable_partition, nb):
         # prepare data
         names_origin = []
@@ -137,8 +137,8 @@ def test_milvus_restore_back_with_multi_partition(self, collection_type, collect
             assert name + suffix in res
         for name in restore_collections:
             self.compare_collections(name, name+suffix)
-    
-    @pytest.mark.tags(CaseLabel.L1)
+
+    @pytest.mark.tags(CaseLabel.L0)
     def test_milvus_restore_back_with_db_support(self):
         # prepare data
         self._connect()
@@ -188,7 +188,7 @@ def test_milvus_restore_back_with_db_support(self):
     @pytest.mark.parametrize("include_partition_key", [True, False])
     @pytest.mark.parametrize("include_dynamic", [True, False])
     @pytest.mark.parametrize("include_json", [True, False])
-    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.tags(CaseLabel.L0)
     def test_milvus_restore_back_with_new_feature_support(self, include_json, include_dynamic, include_partition_key):
         self._connect()
         name_origin = cf.gen_unique_str(prefix)
@@ -262,7 +262,7 @@ def test_milvus_restore_back_with_new_feature_support(self, include_json, includ
 
     @pytest.mark.parametrize("drop_db", [True, False])
     @pytest.mark.parametrize("str_json", [True, False])
-    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.tags(CaseLabel.L0)
     def test_milvus_restore_with_db_collections(self, drop_db, str_json):
         # prepare data
         self._connect()
@@ -322,3 +322,183 @@ def test_milvus_restore_with_db_collections(self, drop_db, str_json):
             assert collection_name + suffix in res
             if not drop_db:
                 self.compare_collections(collection_name, collection_name + suffix)
+
+    @pytest.mark.parametrize("include_partition_key", [True, False])
+    @pytest.mark.parametrize("include_dynamic", [True, False])
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_milvus_restore_back_with_array_datatype(self, include_dynamic, include_partition_key):
+        self._connect()
+        name_origin = cf.gen_unique_str(prefix)
+        back_up_name = cf.gen_unique_str(backup_prefix)
+        fields = [cf.gen_int64_field(name="int64", is_primary=True),
+                    cf.gen_int64_field(name="key"),
+                    cf.gen_json_field(name="json"),
+                    cf.gen_array_field(name="var_array", element_type=DataType.VARCHAR),
+                    cf.gen_array_field(name="int_array", element_type=DataType.INT64),
+                    cf.gen_float_vec_field(name="float_vector", dim=128),
+                    ]
+        if include_partition_key:
+            partition_key = "key"
+            default_schema = cf.gen_collection_schema(fields,
+                                                      enable_dynamic_field=include_dynamic,
+                                                      partition_key_field=partition_key)
+        else:
+            default_schema = cf.gen_collection_schema(fields,
+                                                      enable_dynamic_field=include_dynamic)
+
+        collection_w = self.init_collection_wrap(name=name_origin, schema=default_schema, active_trace=True)
+        nb = 3000
+        data = [
+            [i for i in range(nb)],
+            [i % 3 for i in range(nb)],
+            [{f"key_{str(i)}": i} for i in range(nb)],
+            [[str(x) for x in range(10)] for i in range(nb)],
+            [[int(x) for x in range(10)] for i in range(nb)],
+            [[np.float32(i) for i in range(128)] for _ in range(nb)],
+        ]
+        collection_w.insert(data=data)
+        if include_dynamic:
+            data = [
+                {
+                    "int64": i,
+                    "key": i % 3,
+                    "json": {f"key_{str(i)}": i},
+                    "var_array": [str(x) for x in range(10)],
+                    "int_array": [int(x) for x in range(10)],
+                    "float_vector": [np.float32(i) for i in range(128)],
+                    f"dynamic_{str(i)}": i
+                } for i in range(nb, nb*2)
+            ]
+            collection_w.insert(data=data)
+        res = client.create_backup({"async": False, "backup_name": back_up_name, "collection_names": [name_origin]})
+        log.info(f"create_backup {res}")
+        res = client.list_backup()
+        log.info(f"list_backup {res}")
+        if "data" in res:
+            all_backup = [r["name"] for r in res["data"]]
+        else:
+            all_backup = []
+        assert back_up_name in all_backup
+        backup = client.get_backup(back_up_name)
+        assert backup["data"]["name"] == back_up_name
+        backup_collections = [backup["collection_name"]for backup in backup["data"]["collection_backups"]]
+        assert name_origin in backup_collections
+        res = client.restore_backup({"async": False, "backup_name": back_up_name, "collection_names": [name_origin],
+                                     "collection_suffix": suffix})
+        log.info(f"restore_backup: {res}")
+        res, _ = self.utility_wrap.list_collections()
+        assert name_origin + suffix in res
+        output_fields = None
+        self.compare_collections(name_origin, name_origin + suffix, output_fields=output_fields)
+        res = client.delete_backup(back_up_name)
+        res = client.list_backup()
+        if "data" in res:
+            all_backup = [r["name"] for r in res["data"]]
+        else:
+            all_backup = []
+        assert back_up_name not in all_backup
+
+    @pytest.mark.tags(CaseLabel.L1)
+    def test_milvus_restore_back_with_delete(self):
+        self._connect()
+        name_origin = cf.gen_unique_str(prefix)
+        back_up_name = cf.gen_unique_str(backup_prefix)
+        fields = [cf.gen_int64_field(name="int64", is_primary=True),
+                    cf.gen_int64_field(name="key"),
+                    cf.gen_json_field(name="json"),
+                    cf.gen_array_field(name="var_array", element_type=DataType.VARCHAR),
+                    cf.gen_array_field(name="int_array", element_type=DataType.INT64),
+                    cf.gen_float_vec_field(name="float_vector", dim=128),
+                    ]
+        default_schema = cf.gen_collection_schema(fields)
+        collection_w = self.init_collection_wrap(name=name_origin, schema=default_schema, active_trace=True)
+        nb = 3000
+        data = [
+            [i for i in range(nb)],
+            [i % 3 for i in range(nb)],
+            [{f"key_{str(i)}": i} for i in range(nb)],
+            [[str(x) for x in range(10)] for i in range(nb)],
+            [[int(x) for x in range(10)] for i in range(nb)],
+            [[np.float32(i) for i in range(128)] for _ in range(nb)],
+        ]
+        res, result = collection_w.insert(data=data)
+        pk = res.primary_keys
+        # delete first 100 rows
+        delete_ids = pk[:100]
+        collection_w.delete(expr=f"int64 in {delete_ids}")
+        res = client.create_backup({"async": False, "backup_name": back_up_name, "collection_names": [name_origin]})
+        log.info(f"create_backup {res}")
+        res = client.list_backup()
+        log.info(f"list_backup {res}")
+        if "data" in res:
+            all_backup = [r["name"] for r in res["data"]]
+        else:
+            all_backup = []
+        assert back_up_name in all_backup
+        backup = client.get_backup(back_up_name)
+        assert backup["data"]["name"] == back_up_name
+        backup_collections = [backup["collection_name"]for backup in backup["data"]["collection_backups"]]
+        assert name_origin in backup_collections
+        res = client.restore_backup({"async": False, "backup_name": back_up_name, "collection_names": [name_origin],
+                                     "collection_suffix": suffix})
+        log.info(f"restore_backup: {res}")
+        res, _ = self.utility_wrap.list_collections()
+        assert name_origin + suffix in res
+        output_fields = None
+        self.compare_collections(name_origin, name_origin + suffix, output_fields=output_fields, verify_by_query=True)
+
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.xfail(reason="issue 260")
+    def test_milvus_restore_back_with_upsert(self):
+        self._connect()
+        name_origin = cf.gen_unique_str(prefix)
+        back_up_name = cf.gen_unique_str(backup_prefix)
+        fields = [cf.gen_int64_field(name="int64", is_primary=True),
+                    cf.gen_int64_field(name="key"),
+                    cf.gen_json_field(name="json"),
+                    cf.gen_array_field(name="var_array", element_type=DataType.VARCHAR),
+                    cf.gen_array_field(name="int_array", element_type=DataType.INT64),
+                    cf.gen_float_vec_field(name="float_vector", dim=128),
+                    ]
+        default_schema = cf.gen_collection_schema(fields)
+        collection_w = self.init_collection_wrap(name=name_origin, schema=default_schema, active_trace=True)
+        nb = 3000
+        data = [
+            [i for i in range(nb)],
+            [i % 3 for i in range(nb)],
+            [{f"key_{str(i)}": i} for i in range(nb)],
+            [[str(x) for x in range(10)] for i in range(nb)],
+            [[int(x) for x in range(10)] for i in range(nb)],
+            [[np.float32(i) for i in range(128)] for _ in range(nb)],
+        ]
+        res, result = collection_w.insert(data=data)
+        # upsert first 100 rows by pk
+        upsert_data = [
+            [i for i in range(100)],
+            [i % 3 for i in range(100, 200)],
+            [{f"key_{str(i)}": i} for i in range(100, 200)],
+            [[str(x) for x in range(10, 20)] for _ in range(100)],
+            [[int(x) for x in range(10)] for _ in range(100)],
+            [[np.float32(i) for i in range(128, 128*2)] for _ in range(100)],
+        ]
+        res, result = collection_w.upsert(data=upsert_data)
+        res = client.create_backup({"async": False, "backup_name": back_up_name, "collection_names": [name_origin]})
+        log.info(f"create_backup {res}")
+        res = client.list_backup()
+        log.info(f"list_backup {res}")
+        if "data" in res:
+            all_backup = [r["name"] for r in res["data"]]
+        else:
+            all_backup = []
+        assert back_up_name in all_backup
+        backup = client.get_backup(back_up_name)
+        assert backup["data"]["name"] == back_up_name
+        backup_collections = [backup["collection_name"]for backup in backup["data"]["collection_backups"]]
+        assert name_origin in backup_collections
+        res = client.restore_backup({"async": False, "backup_name": back_up_name, "collection_names": [name_origin],
+                                     "collection_suffix": suffix})
+        log.info(f"restore_backup: {res}")
+        res, _ = self.utility_wrap.list_collections()
+        assert name_origin + suffix in res
+        output_fields = None
+        self.compare_collections(name_origin, name_origin + suffix, output_fields=output_fields, verify_by_query=True)