From a7556ca053c8a9fac3934698e7657e6f9123c0a1 Mon Sep 17 00:00:00 2001 From: Athish Thiruvengadam Date: Thu, 19 Sep 2024 17:22:34 -0500 Subject: [PATCH 1/3] A series of bug fixes removed utils.py small fix updated trigger_date parsing and calet jsons creation editted conversion.py to deal with inputs that have no 'additional' dictionary Fixed ra/dec parsing to be seperate Updated alexis conversion updated poetry updated all modules for search_string Added \n to additional_info in snews and sk_sn added all current conversions to main.py updated id changed url to lightcurve_url; added field Added field to all conversions --- gcn_classic_text_to_json/conversion.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/gcn_classic_text_to_json/conversion.py b/gcn_classic_text_to_json/conversion.py index fbb7eca..da16417 100644 --- a/gcn_classic_text_to_json/conversion.py +++ b/gcn_classic_text_to_json/conversion.py @@ -123,14 +123,18 @@ def text_to_json(notice, keywords_dict): notice_ra = keywords_dict["standard"]["ra"] ra_data = notice[notice_ra].split() - if ra_data[0] != "Undefined": + if ra_data[0] == "Undefined": + output["ra"] = None + else: output["ra"] = float(ra_data[0][:-1]) if "dec" in keywords_dict["standard"]: notice_dec = keywords_dict["standard"]["dec"] dec_data = notice[notice_dec].split() - if dec_data[0] != "Undefined": + if dec_data[0] == "Undefined": + output["dec"] = None + else: output["dec"] = float(dec_data[0][:-1]) if "additional" in keywords_dict: From f42c37c3d50200f0b593f7cb2e78f51744c9deab Mon Sep 17 00:00:00 2001 From: Athish Thiruvengadam Date: Mon, 30 Sep 2024 10:55:03 -0500 Subject: [PATCH 2/3] MOA Text conversion --- .../notices/moa/README.md | 26 +++ .../notices/moa/__init__.py | 0 .../notices/moa/__main__.py | 4 + .../notices/moa/conversion.py | 149 ++++++++++++++++++ 4 files changed, 179 insertions(+) create mode 100644 gcn_classic_text_to_json/notices/moa/README.md create mode 100644 gcn_classic_text_to_json/notices/moa/__init__.py create mode 100644 gcn_classic_text_to_json/notices/moa/__main__.py create mode 100644 gcn_classic_text_to_json/notices/moa/conversion.py diff --git a/gcn_classic_text_to_json/notices/moa/README.md b/gcn_classic_text_to_json/notices/moa/README.md new file mode 100644 index 0000000..9872bcc --- /dev/null +++ b/gcn_classic_text_to_json/notices/moa/README.md @@ -0,0 +1,26 @@ +# MOA Text Conversion + +Parses through all webpages with MOA text notices and creates a JSON with GCN schema keywords. Creates a `moa_jsons` directory inside an `output` directory and saves jsons as `MOA_{serial_number}_{record_number}.json` where serial_number is a random iterating number with no association to the notices and record_number is the current notice in the webpage. + +### Uses the following fields from the core schema for text notice fields +- `id` → TRIGGER_NUM +- `ra` → POINT_RA +- `dec` → POINT_DEC +- `alert_datetime` → NOTICE_DATE +- `trigger_time` → DISCOVERY_DATE, DISCOVERY_TIME + +### Defines the following new fields for the text notice fields +- `lightcurve_url` → LC_URL, +- `max_time` → MAX_DATE, MAX_TIME +- `max_time_error` → MAX_UNCERT +- `cusp_width`. `cusp_width_error` → CUSP_WIDTH +- `u0`, `u0_error` → u0 +- `base_mag`, `base_mag_error` → BASE_MAG +- `max_mag` → MAX_MAG/PEAK_MAG +- `amplification` → AMPLIFICATION + +## Caveats +- `lightcurve_url` has been converted as is from the GCN text notices but some of them do not link to lightcurves. +- MAX_MAG and AMPLIFICATION has been provided for some of the notices but not for the others. I have updated their associated JSON notices similarly. +- Additionally, there is a LEAD_TIME in the text notices which is the difference between `trigger_time` and `max_time`. Since this can be calculated from these values, I have chosen to not include this in the JSON notices. +- Some text notices have a very different formatting like `https://gcn.gsfc.nasa.gov/other/moa/201500099_moa.txt` and `https://gcn.gsfc.nasa.gov/other/moa/_moa.txt` or notices with no information like `https://gcn.gsfc.nasa.gov/other/moa/201400214_moa.txt` and so I've adopted a slightly different parsing for these. diff --git a/gcn_classic_text_to_json/notices/moa/__init__.py b/gcn_classic_text_to_json/notices/moa/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gcn_classic_text_to_json/notices/moa/__main__.py b/gcn_classic_text_to_json/notices/moa/__main__.py new file mode 100644 index 0000000..d63941b --- /dev/null +++ b/gcn_classic_text_to_json/notices/moa/__main__.py @@ -0,0 +1,4 @@ +from . import conversion + +if __name__ == "__main__": + conversion.create_all_moa_jsons() diff --git a/gcn_classic_text_to_json/notices/moa/conversion.py b/gcn_classic_text_to_json/notices/moa/conversion.py new file mode 100644 index 0000000..e9fecde --- /dev/null +++ b/gcn_classic_text_to_json/notices/moa/conversion.py @@ -0,0 +1,149 @@ +import email +import json +import os + +import requests + +from ... import conversion + +input = { + "standard": { + "id": "SRC_ID_NUM", + "ra": "SRC_RA", + "dec": "SRC_DEC", + "alert_datetime": "NOTICE_DATE", + "trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"], + }, + "additional": { + "max_time_error": ("MAX_UNCERT", "float"), + "cusp_width": ("CUSP_WIDTH", "float"), + "u0": ("u0", "float"), + "base_mag": ("BASE_MAG", "float"), + "lightcurve_url": ("LC_URL", "string"), + }, +} + + +def text_to_json_moa(notice, input, record_number): + """Function calls text_to_json and then adds additional fields with cannot be dealt with by the general function. + + Parameters + ----------- + notice: dict + The text notice that is being parsed. + input: dict + The mapping between text notices keywords and GCN schema keywords. + record_number: int + The current notice in the webpage being parsed. + + Returns + ------- + dictionary + A dictionary compliant with the associated schema for the mission.""" + output_dict = conversion.text_to_json(notice, input) + + output_dict["$schema"] = ( + "https://gcn.nasa.gov/schema/main/gcn/notices/classic/moa/alert.schema.json" + ) + output_dict["mission"] = "MOA" + output_dict["record_number"] = record_number + if record_number == 1: + output_dict["alert_type"] = "initial" + else: + output_dict["alert_type"] = "update" + + max_date_data = notice["MAX_DATE"].split() + + max_date = max_date_data[-1] + if max_date == "(yy/mm/dd)": + max_date = "20" + max_date_data[-2] + + max_time_data = notice["MAX_TIME"] + max_time_start_idx = max_time_data.find("{") + max_time_end_idx = max_time_data.find("}", max_time_start_idx) + trigger_time = max_time_data[max_time_start_idx + 1 : max_time_end_idx] + max_datetime = f"{max_date.replace('/', '-', 2)}T{trigger_time}Z" + output_dict["max_time"] = max_datetime + + if "MAX_MAG" in notice: + max_mag = notice["MAX_MAG"].split()[0] + elif "PEAK_MAG" in notice: + max_mag = notice["PEAK_MAG"].split()[0] + if max_mag != "No": + output_dict["max_mag"] = float(max_mag) + + amplification = notice["AMPLIFICATION"].split()[0] + if amplification != "No": + output_dict["amplification"] = float(amplification) + + output_dict["cusp_width_error"] = float(notice["CUSP_WIDTH"].split()[-2]) + + output_dict["u0_error"] = float(notice["u0"].split()[-2]) + + output_dict["base_mag_error"] = float(notice["BASE_MAG"].split()[-2]) + + return output_dict + + +def create_all_moa_jsons(): + """Creates a `moa_jsons` directory and fills it with the json for all MOA triggers.""" + output_path = "./output/moa_jsons/" + if not os.path.exists(output_path): + os.makedirs(output_path) + + archive_link = "https://gcn.gsfc.nasa.gov/moa_events.html" + prefix = "https://gcn.gsfc.nasa.gov/" + search_string = "other/.*moa.txt" + links_set = conversion.parse_trigger_links(archive_link, prefix, search_string) + links_list = list(links_set) + + for sernum in range(len(links_list)): + link = links_list[sernum] + data = requests.get(link).text + + if link == "https://gcn.gsfc.nasa.gov/other/moa/201400214_moa.txt": + continue + + record_number = 1 + if ( + link != "https://gcn.gsfc.nasa.gov/other/moa/201500099_moa.txt" + and link != "https://gcn.gsfc.nasa.gov/other/moa/_moa.txt" + ): + start_idx = data.find("\n") + 1 + else: + start_idx = data.find("TITLE") + + while True: + if ( + link != "https://gcn.gsfc.nasa.gov/other/moa/201500099_moa.txt" + and link != "https://gcn.gsfc.nasa.gov/other/moa/_moa.txt" + ): + end_idx = data.find("\n \n ", start_idx) + else: + end_idx = data.find("unavailable", start_idx) + len("unavailable") + 1 + + notice_message = email.message_from_string(data[start_idx:end_idx].strip()) + print(link) + # print(notice_message) + comment = "\n".join(notice_message.get_all("COMMENTS")) + notice_dict = dict(notice_message) + notice_dict["COMMENTS"] = comment + + output = text_to_json_moa(notice_dict, input, record_number) + + with open(f"{output_path}MOA_{sernum+1}_{record_number}.json", "w") as f: + json.dump(output, f) + + record_number += 1 + if ( + link != "https://gcn.gsfc.nasa.gov/other/moa/201500099_moa.txt" + and link != "https://gcn.gsfc.nasa.gov/other/moa/_moa.txt" + ): + temp_start_idx = data.find("///////////", end_idx) + start_idx = data.find("\n", temp_start_idx) + if temp_start_idx == -1: + break + else: + start_idx = data.find("TITLE", end_idx) + if start_idx == -1: + break From 3766d8d19bae74442ecde1d754040232ea1c64b2 Mon Sep 17 00:00:00 2001 From: Athish Thiruvengadam Date: Mon, 30 Sep 2024 15:59:24 -0500 Subject: [PATCH 3/3] MAXI Text conversion Added citation removed print statements --- .../notices/maxi/README.md | 27 +++ .../notices/maxi/__init__.py | 0 .../notices/maxi/__main__.py | 4 + .../notices/maxi/conversion.py | 175 ++++++++++++++++++ .../notices/moa/conversion.py | 2 - 5 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 gcn_classic_text_to_json/notices/maxi/README.md create mode 100644 gcn_classic_text_to_json/notices/maxi/__init__.py create mode 100644 gcn_classic_text_to_json/notices/maxi/__main__.py create mode 100644 gcn_classic_text_to_json/notices/maxi/conversion.py diff --git a/gcn_classic_text_to_json/notices/maxi/README.md b/gcn_classic_text_to_json/notices/maxi/README.md new file mode 100644 index 0000000..6e24300 --- /dev/null +++ b/gcn_classic_text_to_json/notices/maxi/README.md @@ -0,0 +1,27 @@ +# MAXI Text Conversion + +Parses through all webpages with MAXI text notices and creates a JSON with GCN schema keywords. Creates a `maxi_jsons` directory inside an `output` directory and saves jsons as `MAXI_{serial_number}_{record_number}.json` where serial_number is a random iterating number with no association to the notices and record_number is the current notice in the webpage. + +### Uses the following fields from the core schema for text notice fields +- `id` → SRC_ID_NUM/EVENT_ID_NUM +- `ra` → SRC_RA/EVENT_RA +- `dec` → SRC_DEC/EVENT_DEC +- `ra_dec_error` → SRC_ERROR/EVENT_ERROR +- `alert_datetime` → NOTICE_DATE +- `trigger_time` → SRC_DATE/EVENT_DATE, SRC_TIME/EVENT_TIME +- `latitude`, `longitude` → ISS_LON_LAT +- `energy_flux` → SRC_FLUX/EVENT_FLUX +- `flux_energy_range` → SRC_EBAND/EVENT_EBAND +- `classification` → SRC_CLASS + +### Defines the following new fields for the text notice fields +- `notice_type` → NOTICE_TYPE +- `source_name` → SRC_NAME +- `duration` → SRC_TSCALE/EVENT_TSCALE +- `rate_snr` → SIGNIFICANCE +- `source_flux_low_band`, `background_flux_low_band`, `source_flux_medium_band`, `background_flux_medium_band`, `source_flux_high_band`, `background_flux_high_band` → BAND_FLUX + +## Caveats +- ISS_LAT_LON is just defined as 0.00, 0.00 for some notices. In this case, I have not added these values to the notices. +- Similarly, sometimes EVENT_FLUX has errors but these are always 0 so again I have not added these. +- There are a series of links that have empty notices. I have chosen to skip these. diff --git a/gcn_classic_text_to_json/notices/maxi/__init__.py b/gcn_classic_text_to_json/notices/maxi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gcn_classic_text_to_json/notices/maxi/__main__.py b/gcn_classic_text_to_json/notices/maxi/__main__.py new file mode 100644 index 0000000..e6810f6 --- /dev/null +++ b/gcn_classic_text_to_json/notices/maxi/__main__.py @@ -0,0 +1,4 @@ +from . import conversion + +if __name__ == "__main__": + conversion.create_all_maxi_jsons() diff --git a/gcn_classic_text_to_json/notices/maxi/conversion.py b/gcn_classic_text_to_json/notices/maxi/conversion.py new file mode 100644 index 0000000..c59e388 --- /dev/null +++ b/gcn_classic_text_to_json/notices/maxi/conversion.py @@ -0,0 +1,175 @@ +import email +import json +import os + +import requests + +from ... import conversion + +input_known = { + "standard": { + "id": "SRC_ID_NUM", + "alert_datetime": "NOTICE_DATE", + "trigger_time": ["SRC_DATE", "SRC_TIME"], + "ra": "SRC_RA", + "dec": "SRC_DEC", + }, + "additional": { + "ra_dec_error": ("SRC_ERROR", "float"), + "energy_flux": ("SRC_FLUX", "float"), + "duration": ("SRC_TSCALE", "string"), + "source_name": ("SOURCE_NAME", "string"), + }, +} + +input_unknown = { + "standard": { + "id": "EVENT_ID_NUM", + "alert_datetime": "NOTICE_DATE", + "trigger_time": ["EVENT_DATE", "EVENT_TIME"], + "ra": "EVENT_RA", + "dec": "EVENT_DEC", + }, + "additional": { + "ra_dec_error": ("EVENT_ERROR", "float"), + "energy_flux": ("EVENT_FLUX", "float"), + "duration": ("EVENT_TSCALE", "string"), + }, +} + +# From Kawamura et al. 2018 +conversion_factors = [4e-12, 1.24e-11, 1.65e-11, 8.74e-12] +energy_range_options = [[2, 4], [4, 10], [10, 20], [2, 10]] +source_band_flux = [ + ("source_flux_low_band", "background_flux_low_band"), + ("source_flux_medium_band", "background_flux_medium_band"), + ("source_flux_high_band", "background_flux_high_band"), +] + +bad_links = [ + "https://gcn.gsfc.nasa.gov/other/6743227223.maxi", + "https://gcn.gsfc.nasa.gov/other/6397334289.maxi", + "https://gcn.gsfc.nasa.gov/other/6841168969.maxi", + "https://gcn.gsfc.nasa.gov/other/6731800001.maxi", + "https://gcn.gsfc.nasa.gov/other/6397381732.maxi", + "https://gcn.gsfc.nasa.gov/other/6741178054.maxi", +] + + +def text_to_json_maxi(notice, input, record_number, notice_type): + """Function calls text_to_json and then adds additional fields depeding on the `notice_type`. + + Parameters + ----------- + notice: dict + The text notice that is being parsed. + input: dict + The mapping between text notices keywords and GCN schema keywords. + record_number: int + The current notice in the webpage being parsed. + notice_type: + The type of MAXI notice. + + Returns + ------- + dictionary + A dictionary compliant with the associated schema for the mission.""" + output_dict = conversion.text_to_json(notice, input) + + output_dict["$schema"] = ( + "https://gcn.nasa.gov/schema/main/gcn/notices/classic/maxi/alert.schema.json" + ) + output_dict["notice_type"] = notice_type + output_dict["systematic_included"] = True + + output_dict["record_number"] = record_number + if record_number == 1: + output_dict["alert_type"] = "initial" + else: + output_dict["alert_type"] = "update" + + if notice_type == "Known": + eband_data = notice["SRC_EBAND"].split()[1].split("-") + elif notice_type == "Unknown": + eband_data = notice["EVENT_EBAND"].split()[1].split("-") + eband = [int(eband_data[0]), int(eband_data[1])] + output_dict["flux_energy_range"] = eband + index = energy_range_options.index(eband) + output_dict["energy_flux"] = output_dict["energy_flux"] * conversion_factors[index] + + if notice_type == "Known": + output_dict["classification"] = {notice["SRC_CLASS"].split()[0]: 1} + + band_fluxes = notice["BAND_FLUX"].split("\n") + + for idx in range(len(band_fluxes)): + band_flux_data = band_fluxes[idx].split(",") + + output_dict[source_band_flux[idx][0]] = ( + float(band_flux_data[0][:-1]) * conversion_factors[idx] + ) + output_dict[source_band_flux[idx][1]] = ( + float(band_flux_data[1].split()[0]) * conversion_factors[idx] + ) + + lon_lat_data = notice["ISS_LON_LAT"].split(",") + if lon_lat_data[0] != "0.00": + output_dict["longitude"] = float(lon_lat_data[0]) + if lon_lat_data[1] != " 0.00": + output_dict["latitude"] = float(lon_lat_data[1].split()[0]) + + return output_dict + + +def create_all_maxi_jsons(): + """Creates a `maxi_jsons` directory inside an `output` directory and fills it with the json for all CALET triggers.""" + output_path = "./output/maxi_jsons/" + if not os.path.exists(output_path): + os.makedirs(output_path) + + archive_link = "https://gcn.gsfc.nasa.gov/maxi_grbs.html" + prefix = "https://gcn.gsfc.nasa.gov/" + search_string = "other/.*maxi" + links_set = conversion.parse_trigger_links(archive_link, prefix, search_string) + links_list = list(links_set) + + for sernum in range(len(links_list)): + link = links_list[sernum] + data = requests.get(link).text + + if link in bad_links: + continue + + record_number = 1 + start_idx = data.find("\n") + 1 + while True: + end_idx = data.find("\n \n", start_idx) + # Sometimes there is a \n\n isntead of a \n after SRC_NAME + # This messes with the email package + message = data[start_idx:end_idx].strip().replace("\n\n", "\n") + notice_message = email.message_from_string(message) + comment = "\n".join(notice_message.get_all("COMMENTS")) + notice_dict = dict(notice_message) + notice_dict["COMMENTS"] = comment + + notice_type = notice_dict["NOTICE_TYPE"].split()[1] + + if notice_type == "Known": + band_flux = "\n".join(notice_message.get_all("BAND_FLUX")) + notice_dict["BAND_FLUX"] = band_flux + output = text_to_json_maxi( + notice_dict, input_known, record_number, "Known" + ) + elif notice_type == "Unknown": + output = text_to_json_maxi( + notice_dict, input_unknown, record_number, "Unknown" + ) + + with open(f"{output_path}MAXI_{sernum+1}_{record_number}.json", "w") as f: + json.dump(output, f) + + record_number += 1 + temp_start_idx = data.find("///////////", end_idx) + start_idx = data.find("\n", temp_start_idx) + if temp_start_idx == -1: + break diff --git a/gcn_classic_text_to_json/notices/moa/conversion.py b/gcn_classic_text_to_json/notices/moa/conversion.py index e9fecde..fd768ab 100644 --- a/gcn_classic_text_to_json/notices/moa/conversion.py +++ b/gcn_classic_text_to_json/notices/moa/conversion.py @@ -123,8 +123,6 @@ def create_all_moa_jsons(): end_idx = data.find("unavailable", start_idx) + len("unavailable") + 1 notice_message = email.message_from_string(data[start_idx:end_idx].strip()) - print(link) - # print(notice_message) comment = "\n".join(notice_message.get_all("COMMENTS")) notice_dict = dict(notice_message) notice_dict["COMMENTS"] = comment