diff --git a/src/python/WMCore/MicroService/MSOutput/MSOutput.py b/src/python/WMCore/MicroService/MSOutput/MSOutput.py index 238d0de8a8..6cafde00e3 100644 --- a/src/python/WMCore/MicroService/MSOutput/MSOutput.py +++ b/src/python/WMCore/MicroService/MSOutput/MSOutput.py @@ -698,16 +698,8 @@ def docInfoUpdate(self, msOutDoc): # if there were containers not found in Rucio, create an email alert if notFoundDIDs: - # send alert via AlertManager API - alertName = "ms-output: output containers not found for workflow: {}".format(msOutDoc["RequestName"]) - alertSeverity = "high" - alertSummary = "[MSOutput] Workflow '{}' has output datasets unknown to Rucio".format(msOutDoc["RequestName"]) - alertDescription = "Dataset(s): {} cannot be found in Rucio. ".format(notFoundDIDs) - alertDescription += "Thus, we are skipping these datasets from the final output " - alertDescription += "data placement, such that this workflow can get archived." - self.logger.warning(alertDescription) - if self.msConfig["sendNotification"]: - self.alertManagerAPI.sendAlert(alertName, alertSeverity, alertSummary, alertDescription, self.alertServiceName) + # log and send alert via AlertManager API + self.alertDIDNotFound(msOutDoc["RequestName"], notFoundDIDs) try: msOutDoc.updateDoc({"OutputMap": updatedOutputMap}, throw=True) @@ -758,16 +750,8 @@ def canDatasetGoToDisk(self, dataItem, isRelVal=False): msg += "under campaign: {}. Letting it pass though...".format(dataItem['Campaign']) self.logger.warning(msg) return True - # send alert via AlertManager API - alertName = "ms-output: Campaign not found: {}".format(dataItem['Campaign']) - alertSeverity = "high" - alertSummary = "[MSOutput] Campaign '{}' not found in central CouchDB".format(dataItem['Campaign']) - alertDescription = "Dataset: {} cannot have an output transfer rule ".format(dataItem['Dataset']) - alertDescription += "because its campaign: {} cannot be found in central CouchDB.".format(dataItem['Campaign']) - alertDescription += " In order to get output data placement working, add it ASAP please." - self.logger.critical(alertDescription) - if self.msConfig["sendNotification"]: - self.alertManagerAPI.sendAlert(alertName, alertSeverity, alertSummary, alertDescription, self.alertServiceName) + # log and send alert via AlertManager API + self.alertCampaignNotFound(dataItem['Campaign'], dataItem['Dataset']) raise if dataTier in self.uConfig['tiers_to_DDM']['value']: @@ -775,16 +759,8 @@ def canDatasetGoToDisk(self, dataItem, isRelVal=False): elif dataTier in self.uConfig['tiers_no_DDM']['value']: return False else: - # send alert via AlertManager API - alertName = "ms-output: Datatier not found: {}".format(dataTier) - alertSeverity = "high" - alertSummary = "[MSOutput] Datatier not found in the Unified configuration: {}".format(dataTier) - alertDescription = "Dataset: {} contains a datatier: {}".format(dataItem['Dataset'], dataTier) - alertDescription += " not yet inserted into Unified configuration. " - alertDescription += "Please add it ASAP. Letting it pass for now..." - self.logger.critical(alertDescription) - if self.msConfig["sendNotification"] and not isRelVal: - self.alertManagerAPI.sendAlert(alertName, alertSeverity, alertSummary, alertDescription, self.alertServiceName) + # log and send alert via AlertManager API + self.alertDatatierNotFound(dataTier, dataItem['Dataset'], isRelVal) return True def _getDataVolumeForTape(self, workflow): @@ -989,3 +965,56 @@ def docCleaner(self, doc): of the document """ return doc.clear() + + def alertDIDNotFound(self, wflowName, containerList): + """ + Send an alert to Prometheus for output containers not found within + a given workflow. + :param wflowName: string with the workflow name + :param containerList: list of container names + :return: none + """ + alertName = "ms-output: output containers not found for workflow: {}".format(wflowName) + alertSeverity = "high" + alertSummary = "[MSOutput] Workflow '{}' has output datasets unknown to Rucio".format(wflowName) + alertDescription = "Dataset(s): {} cannot be found in Rucio. ".format(containerList) + alertDescription += "Thus, we are skipping these datasets from the final output " + alertDescription += "data placement, such that this workflow can get archived." + self.logger.warning(alertDescription) + if self.msConfig["sendNotification"]: + self.sendAlert(alertName, alertSeverity, alertSummary, alertDescription, self.alertServiceName) + + def alertCampaignNotFound(self, campaignName, containerName): + """ + Send an alert to Prometheus for campaign not found in the database. + :param campaignName: string with the campaign name + :param containerName: string with the container name + :return: none + """ + alertName = "ms-output: Campaign not found: {}".format(campaignName) + alertSeverity = "high" + alertSummary = "[MSOutput] Campaign '{}' not found in central CouchDB".format(campaignName) + alertDescription = "Dataset: {} cannot have an output transfer rule ".format(containerName) + alertDescription += "because its campaign: {} cannot be found in central CouchDB.".format(campaignName) + alertDescription += " In order to get output data placement working, add it ASAP please." + self.logger.critical(alertDescription) + if self.msConfig["sendNotification"]: + self.sendAlert(alertName, alertSeverity, alertSummary, alertDescription, self.alertServiceName) + + def alertDatatierNotFound(self, datatierName, containerName, isRelVal): + """ + Send an alert to Prometheus for datatier not found in the configuration. + :param datatierName: string with the datatier name + :param containerName: string with the container name + :param isRelVal: boolean whether it's a RelVal workflow or not + :return: none + """ + alertName = "ms-output: Datatier not found: {}".format(datatierName) + alertSeverity = "high" + alertSummary = "[MSOutput] Datatier not found in the Unified configuration: {}".format(datatierName) + alertDescription = "Dataset: {} contains a datatier: {}".format(containerName, datatierName) + alertDescription += " not yet inserted into Unified configuration. " + alertDescription += "Please add it ASAP. Letting it pass for now..." + self.logger.critical(alertDescription) + if self.msConfig["sendNotification"] and not isRelVal: + self.sendAlert(alertName, alertSeverity, alertSummary, alertDescription, self.alertServiceName) diff --git a/src/python/WMCore/MicroService/MSRuleCleaner/MSRuleCleaner.py b/src/python/WMCore/MicroService/MSRuleCleaner/MSRuleCleaner.py index 50cf3147f5..7c66941c9f 100644 --- a/src/python/WMCore/MicroService/MSRuleCleaner/MSRuleCleaner.py +++ b/src/python/WMCore/MicroService/MSRuleCleaner/MSRuleCleaner.py @@ -590,12 +590,14 @@ def getMSOutputTransferInfo(self, wflow): msg += "Error: %s" self.logger.exception(msg, wflow['RequestName'], str(ex)) - # Set Transfer status - information fetched from MSOutput only - if transferInfo is not None and transferInfo['TransferStatus'] == 'done': + if transferInfo is None: + msg = f"Workflow {wflow['RequestName']} is still missing the output transfer document." + self.logger.warning(msg) + elif transferInfo['TransferStatus'] == 'done': + # Set Transfer status - information fetched from MSOutput only wflow['TransferDone'] = True - - # Set Tape rules status - information fetched from Rucio (tape rule ids from MSOutput) - if transferInfo is not None and transferInfo['OutputMap']: + elif transferInfo['OutputMap']: + # Set Tape rules status - information fetched from Rucio (tape rule ids from MSOutput) tapeRulesStatusList = [] # For setting 'TransferTape' = True we require either no tape rules for the # workflow have been created or all existing tape rules to be in status 'OK',