Skip to content

Commit

Permalink
idrac: add state check and enable serial console log for idrac and ad…
Browse files Browse the repository at this point in the history
…d ClusterCapabilities
  • Loading branch information
LiliDeng committed Oct 7, 2023
1 parent eceb50d commit d1af46c
Show file tree
Hide file tree
Showing 6 changed files with 199 additions and 43 deletions.
8 changes: 7 additions & 1 deletion lisa/sut_orchestrator/baremetal/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from lisa.util import InitializableMixin, subclasses
from lisa.util.logger import get_logger

from ..schema import ClusterSchema
from ..schema import ClientCapabilities, ClientSchema, ClusterSchema


class Cluster(subclasses.BaseClassWithRunbookMixin, InitializableMixin):
Expand All @@ -35,3 +35,9 @@ def get_serial_console(self) -> Type[features.SerialConsole]:

def get_start_stop(self) -> Type[features.StartStop]:
raise NotImplementedError()

def get_client_capabilities(self, client: ClientSchema) -> ClientCapabilities:
raise NotImplementedError()

def cleanup(self) -> None:
raise NotImplementedError()
148 changes: 131 additions & 17 deletions lisa/sut_orchestrator/baremetal/cluster/idrac.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import base64
import time
import xml.etree.ElementTree as ETree
from typing import Any, Type
from pathlib import Path
from typing import Any, Optional, Type

import redfish # type: ignore
from assertpy import assert_that

from lisa import features, schema
from lisa.environment import Environment
from lisa.util import LisaException
from lisa.util import LisaException, check_till_timeout
from lisa.util.logger import get_logger
from lisa.util.perf_timer import create_timer

from ..platform_ import BareMetalPlatform
from ..schema import ClusterSchema, IdracSchema
from ..schema import ClientCapabilities, ClientSchema, ClusterSchema, IdracSchema
from .cluster import Cluster


Expand All @@ -38,27 +40,52 @@ def _stop(
"baremetal orchestrator does not support hibernate stop"
)
self._login()
if self.cluster.get_power_state() == "Off":
self._log.debug("System is already off.")
return
self.cluster.reset("GracefulShutdown")
self._logout()

def _start(self, wait: bool = True) -> None:
self._login()
if self.cluster.get_power_state() == "On":
self._log.debug("System is already powered on.")
return
self.cluster.reset("On")
self._logout()

def _restart(self, wait: bool = True) -> None:
self._login()
self.cluster.reset("ForceRestart")
self.cluster.reset("ForceRestart", force_run=True)
self._logout()


class IdracSerialConsole(features.SerialConsole):
def _login(self) -> None:
platform: BareMetalPlatform = self._platform # type: ignore
self.cluster: Idrac = platform.cluster # type: ignore
self.cluster.login()

def _logout(self) -> None:
platform: BareMetalPlatform = self._platform # type: ignore
self.cluster = platform.cluster # type: ignore
self.cluster.logout()

def _get_console_log(self, saved_path: Optional[Path]) -> bytes:
self._login()
if saved_path:
screenshot_file_name: str = "serial_console"
decoded_data = base64.b64decode(self.cluster.get_server_screen_shot())
screenshot_raw_name = saved_path / f"{screenshot_file_name}.png"
with open(screenshot_raw_name, "wb") as img_file:
img_file.write(decoded_data)
console_log = self.cluster.get_serial_console_log().encode("utf-8")
self._logout()
return console_log


class Idrac(Cluster):
state_dict = {
"GracefulShutdown": "Off",
"ForceRestart": "On",
"On": "On",
"ForceOff": "Off",
}

def __init__(self, runbook: ClusterSchema) -> None:
super().__init__(runbook)
self.idrac_runbook: IdracSchema = self.runbook
Expand All @@ -68,6 +95,7 @@ def __init__(self, runbook: ClusterSchema) -> None:
).is_equal_to(1)

self.client = self.idrac_runbook.client[0]
self._enable_serial_console()

@classmethod
def type_name(cls) -> str:
Expand All @@ -80,26 +108,80 @@ def type_schema(cls) -> Type[schema.TypedSchema]:
def get_start_stop(self) -> Type[features.StartStop]:
return IdracStartStop

def get_serial_console(self) -> Type[features.SerialConsole]:
return IdracSerialConsole

def deploy(self, environment: Environment) -> Any:
self.login()
self._eject_virtual_media()
self._change_boot_order_once("VCD-DVD")
assert self.client.iso_http_url, "iso_http_url is required for idrac client"
if self.get_power_state() == "Off":
self._log.debug("System is already off.")
else:
self.reset("GracefulShutdown")
self._change_boot_order_once("VCD-DVD")
self.reset("ForceOff")
self._insert_virtual_media(self.client.iso_http_url)
self.reset("On")
self.reset("On", force_run=True)
self.logout()

def cleanup(self) -> None:
self.login()
self._clear_serial_console_log()
self.logout()

def reset(self, operation: str) -> None:
def get_client_capabilities(self, client: ClientSchema) -> ClientCapabilities:
if client.capabilities:
return client.capabilities
self.login()
response = self.redfish_instance.get(
"/redfish/v1/Systems/System.Embedded.1/",
)
cluster_capabilities = ClientCapabilities()
cluster_capabilities.core_count = int(
response.dict["ProcessorSummary"]["LogicalProcessorCount"]
)
cluster_capabilities.free_memory_mb = (
int(response.dict["MemorySummary"]["TotalSystemMemoryGiB"]) * 1024
)
self.logout()
return cluster_capabilities

def get_serial_console_log(self) -> str:
response = self.redfish_instance.post(
"/redfish/v1/Managers/iDRAC.Embedded.1/SerialInterfaces"
"/Serial.1/Actions/Oem/DellSerialInterface.SerialDataExport",
body={},
)
check_till_timeout(
lambda: int(response.status) == 200,
timeout_message="wait for response status 200",
)
return str(response.text)

def get_server_screen_shot(self, file_type: str = "ServerScreenShot") -> str:
response = self.redfish_instance.post(
"/redfish/v1/Dell/Managers/iDRAC.Embedded.1/DellLCService/Actions/"
"DellLCService.ExportServerScreenShot",
body={"FileType": file_type},
)
self._wait_for_completion(response)
return str(response.dict["ServerScreenShotFile"])

def reset(self, operation: str, force_run: bool = False) -> None:
if operation in self.state_dict.keys():
expected_state = self.state_dict[operation]
if not force_run and self.get_power_state() == expected_state:
self._log.debug(f"System is already in {expected_state} state.")
return

body = {"ResetType": operation}
response = self.redfish_instance.post(
"/redfish/v1/Systems/System.Embedded.1/Actions/ComputerSystem.Reset",
body=body,
)
self._wait_for_completion(response)
if operation in self.state_dict.keys():
check_till_timeout(
lambda: self.get_power_state() == expected_state,
timeout_message=(f"wait for client into '{expected_state}' state"),
)
self._log.debug(f"{operation} initiated successfully.")

def get_power_state(self) -> str:
Expand Down Expand Up @@ -184,3 +266,35 @@ def _change_boot_order_once(self, boot_from: str) -> None:
self._log.debug("Waiting for boot order override task to complete...")
self._wait_for_completion(response)
self._log.debug(f"Updating boot source to {boot_from} completed")

def _enable_serial_console(self) -> None:
self.login()
response = self.redfish_instance.get(
"/redfish/v1/Managers/iDRAC.Embedded.1/Attributes"
)
if response.dict["Attributes"]["SerialCapture.1.Enable"] == "Disabled":
response = self.redfish_instance.patch(
"/redfish/v1/Managers/iDRAC.Embedded.1/Attributes",
body={"Attributes": {"SerialCapture.1.Enable": "Enabled"}},
)
response = self.redfish_instance.get(
"/redfish/v1/Managers/iDRAC.Embedded.1/Attributes"
)
if response.dict["Attributes"]["SerialCapture.1.Enable"] == "Enabled":
self._log.debug("Serial console enabled successfully.")
else:
raise LisaException("Failed to enable serial console.")
self.logout()

def _clear_serial_console_log(self) -> None:
response = self.redfish_instance.get(
"/redfish/v1/Managers/iDRAC.Embedded.1/Attributes"
)
if response.dict["Attributes"]["SerialCapture.1.Enable"] == "Disabled":
self._log.debug("Serial console is already disabled. No need to clear log.")
response = self.redfish_instance.post(
"/redfish/v1/Managers/iDRAC.Embedded.1/SerialInterfaces"
"/Serial.1/Actions/Oem/DellSerialInterface.SerialDataClear",
body={},
)
self._wait_for_completion(response)
28 changes: 21 additions & 7 deletions lisa/sut_orchestrator/baremetal/cluster/rackmanager.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from typing import Any, Type

from lisa import features, schema
Expand All @@ -9,7 +8,7 @@
from lisa.util.logger import get_logger

from ..platform_ import BareMetalPlatform
from ..schema import RackManagerSchema
from ..schema import ClientCapabilities, ClientSchema, RackManagerSchema
from .cluster import Cluster


Expand Down Expand Up @@ -54,19 +53,34 @@ def type_schema(cls) -> Type[schema.TypedSchema]:
def get_start_stop(self) -> Type[features.StartStop]:
return RackManagerStartStop

def connect_to_rack_manager(self) -> None:
assert self.rm_runbook.connection, "connection is required for rackmanager"
self.rm_runbook.connection.name = "rackmanager"
self.rm_node = quick_connect(
self.rm_runbook.connection, logger_name="rackmanager"
)

def deploy(self, environment: Environment) -> Any:
self.reset("off")
self.reset("on")

def reset(self, operation: str) -> None:
assert self.rm_runbook.connection, "connection is required for rackmanager"
self.rm_runbook.connection.name = "rackmanager"
rm_node = quick_connect(self.rm_runbook.connection, logger_name="rackmanager")
self.connect_to_rack_manager()
assert self.rm_runbook.client, "client is required for rackmanager"
for client in self.rm_runbook.client:
assert (
client.management_port
), "management_port is required for rackmanager client"
rm_node.execute(f"set system {operation} -i {client.management_port}")

self.rm_node.execute(f"set system {operation} -i {client.management_port}")
self._log.debug(f"client has been {operation} successfully")

def get_client_capabilities(self, client: ClientSchema) -> ClientCapabilities:
if client.capabilities:
return client.capabilities
cluster_capabilities = ClientCapabilities()
cluster_capabilities.core_count = 0
cluster_capabilities.free_memory_mb = 0
return cluster_capabilities

def cleanup(self) -> None:
pass
1 change: 1 addition & 0 deletions lisa/sut_orchestrator/baremetal/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def _initialize(self, *args: Any, **kwargs: Any) -> None:
*args,
**kwargs,
)
self._inner.initialize()


class StartStop(ClusterFeature):
Expand Down
Loading

0 comments on commit d1af46c

Please sign in to comment.