test: add index check after restore (#507)

* test: add index check after restore Signed-off-by: zhuwenxing <[email protected]>
zilliztech · Jan 23, 2025 · 3fdd33f · 3fdd33f
1 parent 70794b6
commit 3fdd33f
Show file tree

Hide file tree

Showing 4 changed files with 153 additions and 28 deletions.
diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
@@ -43,11 +43,11 @@ jobs:
         deploy_tools: [docker-compose]
         milvus_mode: [standalone]
         another_milvus_mode: [standalone]
-        source_image_tag: [2.3-latest, 2.4-latest]
-        target_image_tag: [master-latest, 2.4-latest]
+        source_image_tag: [2.4-latest, v2.5.4]
+        target_image_tag: [master-latest, v2.5.4]
         exclude:
-          - source_image_tag: 2.4-latest
-            target_image_tag: 2.4-latest
+          - source_image_tag: v2.5.4
+            target_image_tag: v2.5.4
 
     steps:
       - uses: actions/checkout@v3
@@ -184,11 +184,11 @@ jobs:
         deploy_tools: [docker-compose]
         milvus_mode: [standalone]
         another_milvus_mode: [standalone]
-        source_image_tag: [v2.4.17, 2.4-latest]
-        target_image_tag: [master-latest, 2.4-latest]
+        source_image_tag: [v2.4.21, v2.5.4]
+        target_image_tag: [master-latest, v2.5.4]
         exclude:
-          - source_image_tag: 2.4-latest
-            target_image_tag: 2.4-latest
+          - source_image_tag: v2.5.4
+            target_image_tag: v2.5.4
 
     steps:
       - uses: actions/checkout@v3
@@ -561,10 +561,10 @@ jobs:
       matrix:
         deploy_tools: [docker-compose]
         milvus_mode: [standalone]
-        image_tag: [master-latest, 2.4-latest]
+        image_tag: [master-latest, v2.5.4]
         case_tag: [L0, L1, L2, MASTER]
         exclude:
-          - image_tag: 2.4-latest
+          - image_tag: v2.5.4
             case_tag: MASTER
 
     steps:

diff --git a/tests/base/client_base.py b/tests/base/client_base.py
@@ -513,8 +513,23 @@ def is_binary_by_schema(self, schema):
             if field.dtype == DataType.FLOAT_VECTOR:
                 return False
 
+    def compare_indexes(self, src_name, dist_name):
+        collection_src, _ = self.collection_wrap.init_collection(name=src_name)
+        collection_dist, _ = self.collection_wrap.init_collection(name=dist_name)
+        src_indexes_info = [x.to_dict() for x in collection_src.indexes]
+        dist_indexes_info = [x.to_dict() for x in collection_dist.indexes]
+        log.info(f"collection_src indexes: {src_indexes_info}")
+        log.info(f"collection_dist indexes: {dist_indexes_info}")
+        # compare indexes info with same field name for src and dist
+        for src_index_info in src_indexes_info:
+            for dist_index_info in dist_indexes_info:
+                if src_index_info["field"] == dist_index_info["field"]:
+                    src_index_info.pop("collection")
+                    dist_index_info.pop("collection")
+                    assert src_index_info == dist_index_info
+
     def compare_collections(
-        self, src_name, dist_name, output_fields=None, verify_by_query=False
+        self, src_name, dist_name, output_fields=None, verify_by_query=False, skip_index=False
     ):
         if output_fields is None:
             output_fields = ["*"]
@@ -538,22 +553,23 @@ def compare_collections(
             assert src_num == dist_num, f"srs_num: {src_num}, dist_num: {dist_num}"
             return
         for coll in [collection_src, collection_dist]:
-            is_binary = self.is_binary_by_schema(coll.schema)
-            try:
-                if is_binary:
-                    coll.create_index(
-                        ct.default_binary_vec_field_name,
-                        ct.default_bin_flat_index,
-                        index_name=cf.gen_unique_str(),
-                    )
-                else:
-                    coll.create_index(
-                        ct.default_float_vec_field_name,
-                        ct.default_index,
-                        index_name=cf.gen_unique_str(),
-                    )
-            except Exception as e:
-                log.error(f"collection {coll.name} create index failed with error: {e}")
+            if not skip_index:
+                is_binary = self.is_binary_by_schema(coll.schema)
+                try:
+                    if is_binary:
+                        coll.create_index(
+                            ct.default_binary_vec_field_name,
+                            ct.default_bin_flat_index,
+                            index_name=cf.gen_unique_str(),
+                        )
+                    else:
+                        coll.create_index(
+                            ct.default_float_vec_field_name,
+                            ct.default_index,
+                            index_name=cf.gen_unique_str(),
+                        )
+                except Exception as e:
+                    log.error(f"collection {coll.name} create index failed with error: {e}")
             coll.load()
             time.sleep(5)
         # get entities by count

diff --git a/tests/requirements.txt b/tests/requirements.txt
@@ -1,4 +1,3 @@
---extra-index-url https://test.pypi.org/simple/
 pytest-cov==2.8.1
 requests==2.26.0
 scikit-learn==1.1.3

diff --git a/tests/testcases/test_restore_backup.py b/tests/testcases/test_restore_backup.py
@@ -128,6 +128,116 @@ def test_milvus_restore_back(
         for name in restore_collections:
             self.compare_collections(name, name + suffix, verify_by_query=True)
 
+    @pytest.mark.parametrize("nb", [3000])
+    @pytest.mark.parametrize("is_auto_id", [True])
+    @pytest.mark.parametrize("enable_partition", [False])
+    @pytest.mark.parametrize("is_async", [True, False])
+    @pytest.mark.parametrize("collection_need_to_restore", [3])
+    @pytest.mark.parametrize("collection_type", ["all"])
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_milvus_restore_back_with_index(
+        self,
+        collection_type,
+        collection_need_to_restore,
+        is_async,
+        is_auto_id,
+        enable_partition,
+        nb,
+    ):
+        # prepare data
+        names_origin = []
+        back_up_name = cf.gen_unique_str(backup_prefix)
+        if collection_type == "all":
+            for is_binary in [True, False, False]:
+                names_origin.append(cf.gen_unique_str(prefix))
+                self.prepare_data(
+                    names_origin[-1],
+                    nb=nb,
+                    is_binary=is_binary,
+                    auto_id=is_auto_id,
+                    check_function=False,
+                    enable_partition=enable_partition,
+                )
+        if collection_type == "float":
+            for is_binary in [False, False, False]:
+                names_origin.append(cf.gen_unique_str(prefix))
+                self.prepare_data(
+                    names_origin[-1],
+                    nb=nb,
+                    is_binary=is_binary,
+                    auto_id=is_auto_id,
+                    check_function=False,
+                    enable_partition=enable_partition,
+                )
+        if collection_type == "binary":
+            for is_binary in [True, True, True]:
+                names_origin.append(cf.gen_unique_str(prefix))
+                self.prepare_data(
+                    names_origin[-1],
+                    nb=nb,
+                    is_binary=is_binary,
+                    auto_id=is_auto_id,
+                    check_function=False,
+                    enable_partition=enable_partition,
+                )
+        log.info(f"name_origin:{names_origin}, back_up_name: {back_up_name}")
+        for name in names_origin:
+            res, _ = self.utility_wrap.has_collection(name)
+            assert res is True
+
+        # create index for source collection
+        for name in names_origin:
+            c = Collection(name)
+            create_index_for_vector_fields(c)
+
+        # create backup
+        names_need_backup = names_origin
+        payload = {
+            "async": False,
+            "backup_name": back_up_name,
+            "collection_names": names_need_backup,
+        }
+        res = self.client.create_backup(payload)
+        log.info(f"create backup response: {res}")
+        backup = self.client.get_backup(back_up_name)
+        assert backup["data"]["name"] == back_up_name
+        backup_collections = [
+            backup["collection_name"] for backup in backup["data"]["collection_backups"]
+        ]
+        restore_collections = backup_collections
+        if collection_need_to_restore == "all":
+            payload = {
+                "async": False,
+                "backup_name": back_up_name,
+                "collection_suffix": suffix,
+            }
+        else:
+            restore_collections = names_need_backup[:collection_need_to_restore]
+            payload = {
+                "async": False,
+                "backup_name": back_up_name,
+                "collection_suffix": suffix,
+                "collection_names": restore_collections,
+            }
+        payload["restoreIndex"] = True
+        t0 = time.time()
+        res = self.client.restore_backup(payload)
+        restore_id = res["data"]["id"]
+        log.info(f"restore_backup: {res}")
+        if is_async:
+            res = self.client.wait_restore_complete(restore_id)
+            assert res is True
+        t1 = time.time()
+        log.info(f"restore {restore_collections} cost time: {t1 - t0}")
+        res, _ = self.utility_wrap.list_collections()
+        for name in restore_collections:
+            assert name + suffix in res
+        for name in restore_collections:
+            self.compare_indexes(name, name + suffix)
+        for name in restore_collections:
+            self.compare_collections(name, name + suffix, verify_by_query=True, skip_index=True)
+
+
     @pytest.mark.tags(CaseLabel.L2)
     @pytest.mark.parametrize("nb", [3000])
     @pytest.mark.parametrize("is_auto_id", [True])