diff --git a/tests/post_training/data/ptq_reference_data.yaml b/tests/post_training/data/ptq_reference_data.yaml
index e1a1838336b..499dff19fdb 100644
--- a/tests/post_training/data/ptq_reference_data.yaml
+++ b/tests/post_training/data/ptq_reference_data.yaml
@@ -185,15 +185,15 @@ timm/resnet18_backend_TORCH:
 timm/swin_base_patch4_window7_224_backend_FP32:
   metric_value: 0.85274
 timm/swin_base_patch4_window7_224_backend_OV:
-  metric_value: 0.83566
+  metric_value: 0.8366
 timm/swin_base_patch4_window7_224_no_sq_backend_FP32:
   metric_value: 0.85274
 timm/swin_base_patch4_window7_224_no_sq_backend_CUDA_TORCH:
   metric_value: 0.85142
 timm/swin_base_patch4_window7_224_no_sq_backend_ONNX:
-  metric_value: 0.85212
+  metric_value: 0.85158
 timm/swin_base_patch4_window7_224_no_sq_backend_TORCH:
-  metric_value: 0.85178
+  metric_value: 0.85142
 timm/tf_inception_v3_backend_CUDA_TORCH:
   metric_value: 0.77542
 timm/tf_inception_v3_backend_FP32:
diff --git a/tests/post_training/model_scope.py b/tests/post_training/model_scope.py
index 0ad69a54317..0c4d72af5f2 100644
--- a/tests/post_training/model_scope.py
+++ b/tests/post_training/model_scope.py
@@ -249,7 +249,6 @@
             "model_type": ModelType.TRANSFORMER,
         },
         "backends": [BackendType.OV],
-        "batch_size": 32,
     },
     {
         "reported_name": "timm/swin_base_patch4_window7_224_no_sq",
@@ -263,7 +262,6 @@
             ),
         },
         "backends": [BackendType.TORCH, BackendType.CUDA_TORCH, BackendType.ONNX],
-        "batch_size": 128,
     },
     {
         "reported_name": "timm/tf_inception_v3",
diff --git a/tests/post_training/pipelines/image_classification_timm.py b/tests/post_training/pipelines/image_classification_timm.py
index f53643b6cc3..b8242951be5 100644
--- a/tests/post_training/pipelines/image_classification_timm.py
+++ b/tests/post_training/pipelines/image_classification_timm.py
@@ -122,13 +122,12 @@ def prepare_calibration_dataset(self):
 
     def _validate(self):
         val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform)
-        val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=2, shuffle=False)
+        val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=self.batch_size, num_workers=2, shuffle=False)
 
         dataset_size = len(val_loader)
-
         # Initialize result tensors for async inference support.
-        predictions = np.zeros((dataset_size))
-        references = -1 * np.ones((dataset_size))
+        predictions = [[] for _ in range(dataset_size)]
+        references = [[] for _ in range(dataset_size)]
 
         core = ov.Core()
 
@@ -160,8 +159,12 @@ def process_result(request, userdata):
                 references[i] = target
 
             infer_queue.wait_all()
-
-        acc_top1 = accuracy_score(predictions, references)
+        flatten_predictions = []
+        flatten_references = []
+        for i in range(len(predictions)):
+            flatten_predictions.extend(predictions[i])
+            flatten_references.extend(references[i])
+        acc_top1 = accuracy_score(flatten_predictions, flatten_references)
 
         self.run_info.metric_name = "Acc@1"
         self.run_info.metric_value = acc_top1