From 6e910bd795b8441eb435b37944cc656b0a3b4b91 Mon Sep 17 00:00:00 2001 From: Athish Thiruvengadam Date: Thu, 19 Sep 2024 17:22:34 -0500 Subject: [PATCH] RXTE and SUZAKU Text conversions removed utils.py small fix updated trigger_date parsing and calet jsons creation SUZAKU Text conversion RXTE Text conversion added field updated SUZAKU conversions Updated RXTE conversions --- .../notices/rxte/README.md | 19 +++ .../notices/rxte/__init__.py | 0 .../notices/rxte/__main__.py | 4 + .../notices/rxte/conversion.py | 161 ++++++++++++++++++ .../notices/suzaku/README.md | 11 ++ .../notices/suzaku/__init__.py | 0 .../notices/suzaku/__main__.py | 4 + .../notices/suzaku/conversion.py | 98 +++++++++++ 8 files changed, 297 insertions(+) create mode 100644 gcn_classic_text_to_json/notices/rxte/README.md create mode 100644 gcn_classic_text_to_json/notices/rxte/__init__.py create mode 100644 gcn_classic_text_to_json/notices/rxte/__main__.py create mode 100644 gcn_classic_text_to_json/notices/rxte/conversion.py create mode 100644 gcn_classic_text_to_json/notices/suzaku/README.md create mode 100644 gcn_classic_text_to_json/notices/suzaku/__init__.py create mode 100644 gcn_classic_text_to_json/notices/suzaku/__main__.py create mode 100644 gcn_classic_text_to_json/notices/suzaku/conversion.py diff --git a/gcn_classic_text_to_json/notices/rxte/README.md b/gcn_classic_text_to_json/notices/rxte/README.md new file mode 100644 index 0000000..af29ee7 --- /dev/null +++ b/gcn_classic_text_to_json/notices/rxte/README.md @@ -0,0 +1,19 @@ +# RXTE Text Conversion + +Parses through all webpages with RXTE text notices and creates a JSON with GCN schema keywords. Creates a `rxte_jsons` directory inside an `output` directory and saves jsons as `RXTE_{serial_number}_{record_number}.json` where serial_number is a random iterating number with no association to the notices and record_number is the current notice in the webpage. + +### Uses the following fields from the core schema for text notice fields +- `id` → TRIGGER_NUM (PCA Notices Only) +- `ra` → GRB_LOCBURST_RA (PCA) / GRB_RXTE_RA (ASM) +- `dec` → GRB_LOCBURST_DEC (PCA) / GRB_RXTE_DEC (ASM) +- `alert_datetime` → NOTICE_DATE +- `trigger_time` → GRB_DATE, GRB_TIME +- `ra_dec_error` → GRB_RXTE_INTEN + +### Defines the following new fields for the text notice fields (For ASM Notices Only) +- `position_type` → POSITION_TYPE +- `flux_energy_crab` → GRB_RXTE_INTEN + +## Caveats +- Some Notices are marked PCA for testing but follow the ASM format. I've manually converted these into ASM Notices. +- There are additional fields associated with the POSITION_TYPE which details the properties of the error box. I've chosen to not include these fields. diff --git a/gcn_classic_text_to_json/notices/rxte/__init__.py b/gcn_classic_text_to_json/notices/rxte/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gcn_classic_text_to_json/notices/rxte/__main__.py b/gcn_classic_text_to_json/notices/rxte/__main__.py new file mode 100644 index 0000000..baae721 --- /dev/null +++ b/gcn_classic_text_to_json/notices/rxte/__main__.py @@ -0,0 +1,4 @@ +from . import conversion + +if __name__ == "__main__": + conversion.create_all_rxte_jsons() diff --git a/gcn_classic_text_to_json/notices/rxte/conversion.py b/gcn_classic_text_to_json/notices/rxte/conversion.py new file mode 100644 index 0000000..16a33fa --- /dev/null +++ b/gcn_classic_text_to_json/notices/rxte/conversion.py @@ -0,0 +1,161 @@ +import email +import json +import os + +import requests + +from ... import conversion + +input_pca = { + "standard": { + "id": "TRIGGER_NUM", + "alert_datetime": "NOTICE_DATE", + "trigger_time": ["GRB_DATE", "GRB_TIME"], + } +} + +input_pca_burst_alert = { + "standard": { + "id": "TRIGGER_NUM", + "ra": "GRB_LOCBURST_RA", + "dec": "GRB_LOCBURST_DEC", + "alert_datetime": "NOTICE_DATE", + "trigger_time": ["GRB_DATE", "GRB_TIME"], + } +} + +input_pca_burst_position = { + "standard": { + "id": "TRIGGER_NUM", + "ra": "GRB_RXTE_RA", + "dec": "GRB_RXTE_RA", + "alert_datetime": "NOTICE_DATE", + "trigger_time": ["GRB_DATE", "GRB_TIME"], + }, + "additional": { + "ra_dec_error": ("GRB_RXTE_ERROR", "float"), + "flux_energy_mcrab": ("GRB_RXTE_INTEN", "float"), + }, +} + +input_asm = { + "standard": { + "ra": "GRB_RXTE_RA", + "dec": "GRB_RXTE_DEC", + "alert_datetime": "NOTICE_DATE", + "trigger_time": ["GRB_DATE", "GRB_TIME"], + }, + "additional": { + "position_type": ("POSITION_TYPE", "string"), + "ra_dec_error": ("GRB_RXTE_ERROR", "float"), + "flux_energy_mcrab": ("GRB_RXTE_INTEN", "float"), + }, +} + + +def text_to_json_rxte(notice, input, record_number, instrument): + """Function calls text_to_json and then adds additional fields depending on the type of instrument. + + Parameters + ----------- + notice: dict + The text notice that is being parsed. + input: dict + The mapping between text notices keywords and GCN schema keywords. + record_number: int + The current notice in the webpage being parsed. + instrument: + The RXTE notice type. Either PCS or ASM. + + + Returns + ------- + dictionary + A dictionary compliant with the associated schema for the mission. + Notes + ----- + It seems that there are some ASM notices labelled PCA (It says for testing purposes). + I've changed these manually.""" + if "RXTE-PCA was NOT able to localize this GRB." in notice["COMMENTS"]: + return conversion.text_to_json(notice, input_pca) + output_dict = conversion.text_to_json(notice, input) + + output_dict["$schema"] = ( + "https://gcn.nasa.gov/schema/main/gcn/notices/classic/rxte/alert.schema.json" + ) + output_dict["mission"] = "RXTE" + output_dict["instrument"] = instrument + output_dict["messenger"] = "EM" + + output_dict["record_number"] = record_number + if record_number == 1: + output_dict["alert_type"] = "initial" + else: + output_dict["alert_type"] = "update" + + if "ra_dec_error" in output_dict: + output_dict["systematic_included"] = True + + return output_dict + + +def create_all_rxte_jsons(): + """Creates a `rxte_jsons` directory and fills it with the json for all RXTE triggers.""" + output_path = "./output/rxte_jsons/" + if not os.path.exists(output_path): + os.makedirs(output_path) + + archive_link = "https://gcn.gsfc.nasa.gov/rxte_grbs.html" + prefix = "https://gcn.gsfc.nasa.gov/" + search_string = "other/.*rxte" + links_set = conversion.parse_trigger_links(archive_link, prefix, search_string) + links_list = list(links_set) + + for sernum in range(len(links_list)): + link = links_list[sernum] + data = requests.get(link).text + + record_number = 1 + start_idx = data.find("TITLE") + while True: + end_idx = data.find("\n \n ", start_idx) + if end_idx == -1: + break + + notice = data[start_idx:end_idx] + if "///////////" in notice: + notice = notice.replace("/", "") + + notice_message = email.message_from_string(notice.strip()) + comment = "\n".join(notice_message.get_all("COMMENTS")) + notice_dict = dict(notice_message) + notice_dict["COMMENTS"] = comment + + instrument = notice_dict["NOTICE_TYPE"].split()[0][-3:] + + if instrument == "PCA": + if "POSITION_TYPE" in notice_dict: + output = text_to_json_rxte( + notice_dict, input_asm, record_number, "ASM" + ) + elif "BURST ALERT" in notice_dict["TITLE"]: + output = text_to_json_rxte( + notice_dict, input_pca_burst_alert, record_number, instrument + ) + else: + output = text_to_json_rxte( + notice_dict, input_pca_burst_position, record_number, instrument + ) + + elif instrument == "ASM": + output = text_to_json_rxte( + notice_dict, input_asm, record_number, instrument + ) + + with open(f"{output_path}RXTE_{sernum+1}_{record_number}.json", "w") as f: + json.dump(output, f) + + record_number += 1 + start_idx = data.find("TITLE", end_idx) + if start_idx == -1: + break diff --git a/gcn_classic_text_to_json/notices/suzaku/README.md b/gcn_classic_text_to_json/notices/suzaku/README.md new file mode 100644 index 0000000..b1e3012 --- /dev/null +++ b/gcn_classic_text_to_json/notices/suzaku/README.md @@ -0,0 +1,11 @@ +# SUZAKU Text Conversion + +Parses through all webpages with SUZAKU text notices and creates a JSON with GCN schema keywords. Creates a `suzaku_jsons` directory inside an `output` directory and saves jsons as `SUZAKU_WAM_{serial_number}_{record_number}.json` where serial_number is a random iterating number with no association to the notices and record_number is the current notice in the webpage. + +### Uses the following fields from the core schema for text notice fields +- `id` → TRIGGER_NUM +- `alert_datetime` → NOTICE_DATE +- `trigger_time` → TRIGGER_DATE, TRIGGER_TIME + +### Defines the following new fields for the text notice fields +- `lightcurve_url` → LC_URL diff --git a/gcn_classic_text_to_json/notices/suzaku/__init__.py b/gcn_classic_text_to_json/notices/suzaku/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gcn_classic_text_to_json/notices/suzaku/__main__.py b/gcn_classic_text_to_json/notices/suzaku/__main__.py new file mode 100644 index 0000000..a4f1a4b --- /dev/null +++ b/gcn_classic_text_to_json/notices/suzaku/__main__.py @@ -0,0 +1,4 @@ +from . import conversion + +if __name__ == "__main__": + conversion.create_all_suzaku_jsons() diff --git a/gcn_classic_text_to_json/notices/suzaku/conversion.py b/gcn_classic_text_to_json/notices/suzaku/conversion.py new file mode 100644 index 0000000..c496c04 --- /dev/null +++ b/gcn_classic_text_to_json/notices/suzaku/conversion.py @@ -0,0 +1,98 @@ +import email +import json +import os + +import requests + +from ... import conversion + +input = { + "standard": { + "id": "TRIGGER_NUM", + "alert_datetime": "NOTICE_DATE", + "trigger_time": ["TRIGGER_DATE", "TRIGGER_TIME"], + } +} + + +def text_to_json_suzaku(notice, input, record_number): + """Function calls text_to_json and then adds additional fields with cannot be dealt with by the general function. + + Parameters + ----------- + notice: dict + The text notice that is being parsed. + input: dict + The mapping between text notices keywords and GCN schema keywords. + record_number: int + The current notice in the webpage being parsed. + + Returns + ------- + dictionary + A dictionary compliant with the associated schema for the mission.""" + output_dict = conversion.text_to_json(notice, input) + + output_dict["$schema"] = ( + "https://gcn.nasa.gov/schema/main/gcn/notices/classic/suzaku/alert.schema.json" + ) + + output_dict["record_number"] = record_number + if record_number == 1: + output_dict["alert_type"] = "initial" + else: + output_dict["alert_type"] = "update" + + output_dict["mission"] = "SUZAKU" + output_dict["instrument"] = "WAM" + output_dict["trigger_type"] = "rate" + + url = notice["LC_URL"] + output_dict["lightcurve_url"] = f"https://gcn.gsfc.nasa.gov/notices_suz/{url}" + + return output_dict + + +def create_all_suzaku_jsons(): + """Creates a `suzaku_jsons` directory and fills it with the json for all CALET triggers.""" + output_path = "./output/suzaku_jsons/" + if not os.path.exists(output_path): + os.makedirs(output_path) + + archive_link = "https://gcn.gsfc.nasa.gov/suzaku_wam.html" + prefix = "https://gcn.gsfc.nasa.gov/" + search_string = "other/.*suzaku" + links_set = conversion.parse_trigger_links(archive_link, prefix, search_string) + links_list = list(links_set) + + for sernum in range(len(links_list)): + link = links_list[sernum] + data = requests.get(link).text + + record_number = 1 + start_idx = data.find("\n") + 1 + while True: + end_idx = data.find("\n \n ", start_idx) + if end_idx == -1: + break + + notice = data[start_idx:end_idx] + if "///////////" in notice: + notice = notice.replace("/", "") + + notice_message = email.message_from_string(notice.strip()) + comment = "\n".join(notice_message.get_all("COMMENTS")) + notice_dict = dict(notice_message) + notice_dict["COMMENTS"] = comment + output = text_to_json_suzaku(notice_dict, input, record_number) + + with open( + f"{output_path}SUZAKU_WAM_{sernum+1}_{record_number}.json", "w" + ) as f: + json.dump(output, f) + + record_number += 1 + temp_start_idx = data.find("///////////", end_idx) + start_idx = data.find("\n", temp_start_idx) + 1 + if temp_start_idx == -1: + break