From 8dbf2598fb68f977be31b29290e2550f60b9df26 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sun, 14 Jan 2024 13:22:49 -0800
Subject: [PATCH] contest: vmtest: retry starting a VM

VMs sometimes die. Some bug in QEMU probably?

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 contest/remote/vmtest.py | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/contest/remote/vmtest.py b/contest/remote/vmtest.py
index 23579fd..3407354 100755
--- a/contest/remote/vmtest.py
+++ b/contest/remote/vmtest.py
@@ -241,6 +241,26 @@ def bash_prev_retcode(self):
         return int(stdout.split('\n')[1])
 
 
+def new_vm(results_path, vm_id, vm=None, config=None):
+    if vm is None:
+        vm = VM(config)
+    # For whatever reason starting sometimes hangs / crashes
+    i = 0
+    while True:
+        try:
+            vm.start()
+            vm_id += 1
+            vm.dump_log(results_path + '/vm-start-' + str(vm_id))
+            return vm_id, vm
+        except TimeoutError:
+            i += 1
+            if i > 4:
+                raise
+            print(f"WARNING: VM did not start, retrying {i}/4")
+            vm.dump_log(results_path + '/vm-crashed-' + str(vm_id))
+            vm.stop()
+
+
 def test(binfo, rinfo, config):
     print("Run at", datetime.datetime.now())
 
@@ -259,9 +279,8 @@ def test(binfo, rinfo, config):
     vm.build()
     vm.dump_log(results_path + '/build')
 
-    vm.start()
     vm_id = 0
-    vm.dump_log(results_path + '/vm-start-' + str(vm_id))
+    vm_id, vm = new_vm(results_path, vm_id, vm=vm)
 
     dir_path = config.get('local', 'tree_path') + "/tools/testing/selftests/drivers/net/netdevsim"
     for test in os.listdir(dir_path):
@@ -314,10 +333,7 @@ def test(binfo, rinfo, config):
             print("INFO: VM kernel crashed, starting a clean one!")
             vm.stop()
             vm.dump_log(results_path + '/vm-stop-' + str(vm_id))
-            vm = VM(config)
-            vm.start()
-            vm_id += 1
-            vm.dump_log(results_path + '/vm-start-' + str(vm_id))
+            vm_id, vm = new_vm(results_path, vm_id, config=config)
 
     vm.stop()
     vm.dump_log(results_path + '/vm-stop-' + str(vm_id))