From a7556ca053c8a9fac3934698e7657e6f9123c0a1 Mon Sep 17 00:00:00 2001
From: Athish Thiruvengadam <athisht2@illinois.edu>
Date: Thu, 19 Sep 2024 17:22:34 -0500
Subject: [PATCH 1/3] A series of bug fixes

removed utils.py

small fix

updated trigger_date parsing and calet jsons creation

editted conversion.py to deal with inputs that have no 'additional' dictionary

Fixed ra/dec parsing to be seperate

Updated alexis conversion

updated poetry

updated all modules for search_string

 Added \n to additional_info in snews and sk_sn

added all current conversions to main.py

updated id

changed url to lightcurve_url; added  field

Added  field to all conversions
---
 gcn_classic_text_to_json/conversion.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcn_classic_text_to_json/conversion.py b/gcn_classic_text_to_json/conversion.py
index fbb7eca..da16417 100644
--- a/gcn_classic_text_to_json/conversion.py
+++ b/gcn_classic_text_to_json/conversion.py
@@ -123,14 +123,18 @@ def text_to_json(notice, keywords_dict):
         notice_ra = keywords_dict["standard"]["ra"]
         ra_data = notice[notice_ra].split()
 
-        if ra_data[0] != "Undefined":
+        if ra_data[0] == "Undefined":
+            output["ra"] = None
+        else:
             output["ra"] = float(ra_data[0][:-1])
 
     if "dec" in keywords_dict["standard"]:
         notice_dec = keywords_dict["standard"]["dec"]
         dec_data = notice[notice_dec].split()
 
-        if dec_data[0] != "Undefined":
+        if dec_data[0] == "Undefined":
+            output["dec"] = None
+        else:
             output["dec"] = float(dec_data[0][:-1])
 
     if "additional" in keywords_dict:

From f42c37c3d50200f0b593f7cb2e78f51744c9deab Mon Sep 17 00:00:00 2001
From: Athish Thiruvengadam <athisht2@illinois.edu>
Date: Mon, 30 Sep 2024 10:55:03 -0500
Subject: [PATCH 2/3] MOA Text conversion

---
 .../notices/moa/README.md                     |  26 +++
 .../notices/moa/__init__.py                   |   0
 .../notices/moa/__main__.py                   |   4 +
 .../notices/moa/conversion.py                 | 149 ++++++++++++++++++
 4 files changed, 179 insertions(+)
 create mode 100644 gcn_classic_text_to_json/notices/moa/README.md
 create mode 100644 gcn_classic_text_to_json/notices/moa/__init__.py
 create mode 100644 gcn_classic_text_to_json/notices/moa/__main__.py
 create mode 100644 gcn_classic_text_to_json/notices/moa/conversion.py

diff --git a/gcn_classic_text_to_json/notices/moa/README.md b/gcn_classic_text_to_json/notices/moa/README.md
new file mode 100644
index 0000000..9872bcc
--- /dev/null
+++ b/gcn_classic_text_to_json/notices/moa/README.md
@@ -0,0 +1,26 @@
+# MOA Text Conversion
+
+Parses through all webpages with MOA text notices and creates a JSON with GCN schema keywords. Creates a `moa_jsons` directory inside an `output` directory and saves jsons as `MOA_{serial_number}_{record_number}.json` where serial_number is a random iterating number with no association to the notices and record_number is the current notice in the webpage.
+
+### Uses the following fields from the core schema for text notice fields
+- `id` &#8594; TRIGGER_NUM
+- `ra` &#8594; POINT_RA
+- `dec` &#8594; POINT_DEC
+- `alert_datetime` &#8594; NOTICE_DATE
+- `trigger_time` &#8594; DISCOVERY_DATE, DISCOVERY_TIME
+
+### Defines the following new fields for the text notice fields
+- `lightcurve_url` &#8594; LC_URL,
+- `max_time` &#8594; MAX_DATE, MAX_TIME
+- `max_time_error` &#8594; MAX_UNCERT
+- `cusp_width`. `cusp_width_error` &#8594; CUSP_WIDTH
+- `u0`, `u0_error` &#8594; u0
+- `base_mag`, `base_mag_error` &#8594; BASE_MAG
+- `max_mag` &#8594; MAX_MAG/PEAK_MAG
+- `amplification` &#8594; AMPLIFICATION
+
+## Caveats
+- `lightcurve_url` has been converted as is from the GCN text notices but some of them do not link to lightcurves.
+- MAX_MAG and AMPLIFICATION has been provided for some of the notices but not for the others. I have updated their associated JSON notices similarly.
+- Additionally, there is a LEAD_TIME in the text notices which is the difference between `trigger_time` and `max_time`. Since this can be calculated from these values, I have chosen to not include this in the JSON notices.
+- Some text notices have a very different formatting like `https://gcn.gsfc.nasa.gov/other/moa/201500099_moa.txt` and `https://gcn.gsfc.nasa.gov/other/moa/_moa.txt` or notices with no information like `https://gcn.gsfc.nasa.gov/other/moa/201400214_moa.txt` and so I've adopted a slightly different parsing for these.
diff --git a/gcn_classic_text_to_json/notices/moa/__init__.py b/gcn_classic_text_to_json/notices/moa/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/gcn_classic_text_to_json/notices/moa/__main__.py b/gcn_classic_text_to_json/notices/moa/__main__.py
new file mode 100644
index 0000000..d63941b
--- /dev/null
+++ b/gcn_classic_text_to_json/notices/moa/__main__.py
@@ -0,0 +1,4 @@
+from . import conversion
+
+if __name__ == "__main__":
+    conversion.create_all_moa_jsons()
diff --git a/gcn_classic_text_to_json/notices/moa/conversion.py b/gcn_classic_text_to_json/notices/moa/conversion.py
new file mode 100644
index 0000000..e9fecde
--- /dev/null
+++ b/gcn_classic_text_to_json/notices/moa/conversion.py
@@ -0,0 +1,149 @@
+import email
+import json
+import os
+
+import requests
+
+from ... import conversion
+
+input = {
+    "standard": {
+        "id": "SRC_ID_NUM",
+        "ra": "SRC_RA",
+        "dec": "SRC_DEC",
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"],
+    },
+    "additional": {
+        "max_time_error": ("MAX_UNCERT", "float"),
+        "cusp_width": ("CUSP_WIDTH", "float"),
+        "u0": ("u0", "float"),
+        "base_mag": ("BASE_MAG", "float"),
+        "lightcurve_url": ("LC_URL", "string"),
+    },
+}
+
+
+def text_to_json_moa(notice, input, record_number):
+    """Function calls text_to_json and then adds additional fields with cannot be dealt with by the general function.
+
+    Parameters
+    -----------
+    notice: dict
+        The text notice that is being parsed.
+    input: dict
+        The mapping between text notices keywords and GCN schema keywords.
+    record_number: int
+        The current notice in the webpage being parsed.
+
+    Returns
+    -------
+    dictionary
+        A dictionary compliant with the associated schema for the mission."""
+    output_dict = conversion.text_to_json(notice, input)
+
+    output_dict["$schema"] = (
+        "https://gcn.nasa.gov/schema/main/gcn/notices/classic/moa/alert.schema.json"
+    )
+    output_dict["mission"] = "MOA"
+    output_dict["record_number"] = record_number
+    if record_number == 1:
+        output_dict["alert_type"] = "initial"
+    else:
+        output_dict["alert_type"] = "update"
+
+    max_date_data = notice["MAX_DATE"].split()
+
+    max_date = max_date_data[-1]
+    if max_date == "(yy/mm/dd)":
+        max_date = "20" + max_date_data[-2]
+
+    max_time_data = notice["MAX_TIME"]
+    max_time_start_idx = max_time_data.find("{")
+    max_time_end_idx = max_time_data.find("}", max_time_start_idx)
+    trigger_time = max_time_data[max_time_start_idx + 1 : max_time_end_idx]
+    max_datetime = f"{max_date.replace('/', '-', 2)}T{trigger_time}Z"
+    output_dict["max_time"] = max_datetime
+
+    if "MAX_MAG" in notice:
+        max_mag = notice["MAX_MAG"].split()[0]
+    elif "PEAK_MAG" in notice:
+        max_mag = notice["PEAK_MAG"].split()[0]
+    if max_mag != "No":
+        output_dict["max_mag"] = float(max_mag)
+
+    amplification = notice["AMPLIFICATION"].split()[0]
+    if amplification != "No":
+        output_dict["amplification"] = float(amplification)
+
+    output_dict["cusp_width_error"] = float(notice["CUSP_WIDTH"].split()[-2])
+
+    output_dict["u0_error"] = float(notice["u0"].split()[-2])
+
+    output_dict["base_mag_error"] = float(notice["BASE_MAG"].split()[-2])
+
+    return output_dict
+
+
+def create_all_moa_jsons():
+    """Creates a `moa_jsons` directory and fills it with the json for all MOA triggers."""
+    output_path = "./output/moa_jsons/"
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    archive_link = "https://gcn.gsfc.nasa.gov/moa_events.html"
+    prefix = "https://gcn.gsfc.nasa.gov/"
+    search_string = "other/.*moa.txt"
+    links_set = conversion.parse_trigger_links(archive_link, prefix, search_string)
+    links_list = list(links_set)
+
+    for sernum in range(len(links_list)):
+        link = links_list[sernum]
+        data = requests.get(link).text
+
+        if link == "https://gcn.gsfc.nasa.gov/other/moa/201400214_moa.txt":
+            continue
+
+        record_number = 1
+        if (
+            link != "https://gcn.gsfc.nasa.gov/other/moa/201500099_moa.txt"
+            and link != "https://gcn.gsfc.nasa.gov/other/moa/_moa.txt"
+        ):
+            start_idx = data.find("\n") + 1
+        else:
+            start_idx = data.find("TITLE")
+
+        while True:
+            if (
+                link != "https://gcn.gsfc.nasa.gov/other/moa/201500099_moa.txt"
+                and link != "https://gcn.gsfc.nasa.gov/other/moa/_moa.txt"
+            ):
+                end_idx = data.find("\n \n ", start_idx)
+            else:
+                end_idx = data.find("unavailable", start_idx) + len("unavailable") + 1
+
+            notice_message = email.message_from_string(data[start_idx:end_idx].strip())
+            print(link)
+            # print(notice_message)
+            comment = "\n".join(notice_message.get_all("COMMENTS"))
+            notice_dict = dict(notice_message)
+            notice_dict["COMMENTS"] = comment
+
+            output = text_to_json_moa(notice_dict, input, record_number)
+
+            with open(f"{output_path}MOA_{sernum+1}_{record_number}.json", "w") as f:
+                json.dump(output, f)
+
+            record_number += 1
+            if (
+                link != "https://gcn.gsfc.nasa.gov/other/moa/201500099_moa.txt"
+                and link != "https://gcn.gsfc.nasa.gov/other/moa/_moa.txt"
+            ):
+                temp_start_idx = data.find("///////////", end_idx)
+                start_idx = data.find("\n", temp_start_idx)
+                if temp_start_idx == -1:
+                    break
+            else:
+                start_idx = data.find("TITLE", end_idx)
+                if start_idx == -1:
+                    break

From 3766d8d19bae74442ecde1d754040232ea1c64b2 Mon Sep 17 00:00:00 2001
From: Athish Thiruvengadam <athisht2@illinois.edu>
Date: Mon, 30 Sep 2024 15:59:24 -0500
Subject: [PATCH 3/3] MAXI Text conversion

Added citation

removed print statements
---
 .../notices/maxi/README.md                    |  27 +++
 .../notices/maxi/__init__.py                  |   0
 .../notices/maxi/__main__.py                  |   4 +
 .../notices/maxi/conversion.py                | 175 ++++++++++++++++++
 .../notices/moa/conversion.py                 |   2 -
 5 files changed, 206 insertions(+), 2 deletions(-)
 create mode 100644 gcn_classic_text_to_json/notices/maxi/README.md
 create mode 100644 gcn_classic_text_to_json/notices/maxi/__init__.py
 create mode 100644 gcn_classic_text_to_json/notices/maxi/__main__.py
 create mode 100644 gcn_classic_text_to_json/notices/maxi/conversion.py

diff --git a/gcn_classic_text_to_json/notices/maxi/README.md b/gcn_classic_text_to_json/notices/maxi/README.md
new file mode 100644
index 0000000..6e24300
--- /dev/null
+++ b/gcn_classic_text_to_json/notices/maxi/README.md
@@ -0,0 +1,27 @@
+# MAXI Text Conversion
+
+Parses through all webpages with MAXI text notices and creates a JSON with GCN schema keywords. Creates a `maxi_jsons` directory inside an `output` directory and saves jsons as `MAXI_{serial_number}_{record_number}.json` where serial_number is a random iterating number with no association to the notices and record_number is the current notice in the webpage.
+
+### Uses the following fields from the core schema for text notice fields
+- `id` &#8594; SRC_ID_NUM/EVENT_ID_NUM
+- `ra` &#8594; SRC_RA/EVENT_RA
+- `dec` &#8594; SRC_DEC/EVENT_DEC
+- `ra_dec_error`  &#8594; SRC_ERROR/EVENT_ERROR
+- `alert_datetime` &#8594; NOTICE_DATE
+- `trigger_time` &#8594; SRC_DATE/EVENT_DATE, SRC_TIME/EVENT_TIME
+- `latitude`, `longitude` &#8594; ISS_LON_LAT
+- `energy_flux` &#8594; SRC_FLUX/EVENT_FLUX
+- `flux_energy_range` &#8594; SRC_EBAND/EVENT_EBAND
+- `classification`  &#8594; SRC_CLASS
+
+### Defines the following new fields for the text notice fields
+- `notice_type` &#8594; NOTICE_TYPE
+- `source_name` &#8594; SRC_NAME
+- `duration` &#8594; SRC_TSCALE/EVENT_TSCALE
+- `rate_snr` &#8594; SIGNIFICANCE
+- `source_flux_low_band`, `background_flux_low_band`, `source_flux_medium_band`, `background_flux_medium_band`, `source_flux_high_band`, `background_flux_high_band` &#8594; BAND_FLUX
+
+## Caveats
+- ISS_LAT_LON is just defined as 0.00, 0.00 for some notices. In this case, I have not added these values to the notices.
+- Similarly, sometimes EVENT_FLUX has errors but these are always 0 so again I have not added these.
+- There are a series of links that have empty notices. I have chosen to skip these.
diff --git a/gcn_classic_text_to_json/notices/maxi/__init__.py b/gcn_classic_text_to_json/notices/maxi/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/gcn_classic_text_to_json/notices/maxi/__main__.py b/gcn_classic_text_to_json/notices/maxi/__main__.py
new file mode 100644
index 0000000..e6810f6
--- /dev/null
+++ b/gcn_classic_text_to_json/notices/maxi/__main__.py
@@ -0,0 +1,4 @@
+from . import conversion
+
+if __name__ == "__main__":
+    conversion.create_all_maxi_jsons()
diff --git a/gcn_classic_text_to_json/notices/maxi/conversion.py b/gcn_classic_text_to_json/notices/maxi/conversion.py
new file mode 100644
index 0000000..c59e388
--- /dev/null
+++ b/gcn_classic_text_to_json/notices/maxi/conversion.py
@@ -0,0 +1,175 @@
+import email
+import json
+import os
+
+import requests
+
+from ... import conversion
+
+input_known = {
+    "standard": {
+        "id": "SRC_ID_NUM",
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["SRC_DATE", "SRC_TIME"],
+        "ra": "SRC_RA",
+        "dec": "SRC_DEC",
+    },
+    "additional": {
+        "ra_dec_error": ("SRC_ERROR", "float"),
+        "energy_flux": ("SRC_FLUX", "float"),
+        "duration": ("SRC_TSCALE", "string"),
+        "source_name": ("SOURCE_NAME", "string"),
+    },
+}
+
+input_unknown = {
+    "standard": {
+        "id": "EVENT_ID_NUM",
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["EVENT_DATE", "EVENT_TIME"],
+        "ra": "EVENT_RA",
+        "dec": "EVENT_DEC",
+    },
+    "additional": {
+        "ra_dec_error": ("EVENT_ERROR", "float"),
+        "energy_flux": ("EVENT_FLUX", "float"),
+        "duration": ("EVENT_TSCALE", "string"),
+    },
+}
+
+# From Kawamura et al. 2018
+conversion_factors = [4e-12, 1.24e-11, 1.65e-11, 8.74e-12]
+energy_range_options = [[2, 4], [4, 10], [10, 20], [2, 10]]
+source_band_flux = [
+    ("source_flux_low_band", "background_flux_low_band"),
+    ("source_flux_medium_band", "background_flux_medium_band"),
+    ("source_flux_high_band", "background_flux_high_band"),
+]
+
+bad_links = [
+    "https://gcn.gsfc.nasa.gov/other/6743227223.maxi",
+    "https://gcn.gsfc.nasa.gov/other/6397334289.maxi",
+    "https://gcn.gsfc.nasa.gov/other/6841168969.maxi",
+    "https://gcn.gsfc.nasa.gov/other/6731800001.maxi",
+    "https://gcn.gsfc.nasa.gov/other/6397381732.maxi",
+    "https://gcn.gsfc.nasa.gov/other/6741178054.maxi",
+]
+
+
+def text_to_json_maxi(notice, input, record_number, notice_type):
+    """Function calls text_to_json and then adds additional fields depeding on the `notice_type`.
+
+    Parameters
+    -----------
+    notice: dict
+        The text notice that is being parsed.
+    input: dict
+        The mapping between text notices keywords and GCN schema keywords.
+    record_number: int
+        The current notice in the webpage being parsed.
+    notice_type:
+        The type of MAXI notice.
+
+    Returns
+    -------
+    dictionary
+        A dictionary compliant with the associated schema for the mission."""
+    output_dict = conversion.text_to_json(notice, input)
+
+    output_dict["$schema"] = (
+        "https://gcn.nasa.gov/schema/main/gcn/notices/classic/maxi/alert.schema.json"
+    )
+    output_dict["notice_type"] = notice_type
+    output_dict["systematic_included"] = True
+
+    output_dict["record_number"] = record_number
+    if record_number == 1:
+        output_dict["alert_type"] = "initial"
+    else:
+        output_dict["alert_type"] = "update"
+
+    if notice_type == "Known":
+        eband_data = notice["SRC_EBAND"].split()[1].split("-")
+    elif notice_type == "Unknown":
+        eband_data = notice["EVENT_EBAND"].split()[1].split("-")
+    eband = [int(eband_data[0]), int(eband_data[1])]
+    output_dict["flux_energy_range"] = eband
+    index = energy_range_options.index(eband)
+    output_dict["energy_flux"] = output_dict["energy_flux"] * conversion_factors[index]
+
+    if notice_type == "Known":
+        output_dict["classification"] = {notice["SRC_CLASS"].split()[0]: 1}
+
+        band_fluxes = notice["BAND_FLUX"].split("\n")
+
+        for idx in range(len(band_fluxes)):
+            band_flux_data = band_fluxes[idx].split(",")
+
+            output_dict[source_band_flux[idx][0]] = (
+                float(band_flux_data[0][:-1]) * conversion_factors[idx]
+            )
+            output_dict[source_band_flux[idx][1]] = (
+                float(band_flux_data[1].split()[0]) * conversion_factors[idx]
+            )
+
+        lon_lat_data = notice["ISS_LON_LAT"].split(",")
+        if lon_lat_data[0] != "0.00":
+            output_dict["longitude"] = float(lon_lat_data[0])
+        if lon_lat_data[1] != " 0.00":
+            output_dict["latitude"] = float(lon_lat_data[1].split()[0])
+
+    return output_dict
+
+
+def create_all_maxi_jsons():
+    """Creates a `maxi_jsons` directory inside an `output` directory and fills it with the json for all CALET triggers."""
+    output_path = "./output/maxi_jsons/"
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    archive_link = "https://gcn.gsfc.nasa.gov/maxi_grbs.html"
+    prefix = "https://gcn.gsfc.nasa.gov/"
+    search_string = "other/.*maxi"
+    links_set = conversion.parse_trigger_links(archive_link, prefix, search_string)
+    links_list = list(links_set)
+
+    for sernum in range(len(links_list)):
+        link = links_list[sernum]
+        data = requests.get(link).text
+
+        if link in bad_links:
+            continue
+
+        record_number = 1
+        start_idx = data.find("\n") + 1
+        while True:
+            end_idx = data.find("\n \n", start_idx)
+            # Sometimes there is a \n\n isntead of a \n after SRC_NAME
+            # This messes with the email package
+            message = data[start_idx:end_idx].strip().replace("\n\n", "\n")
+            notice_message = email.message_from_string(message)
+            comment = "\n".join(notice_message.get_all("COMMENTS"))
+            notice_dict = dict(notice_message)
+            notice_dict["COMMENTS"] = comment
+
+            notice_type = notice_dict["NOTICE_TYPE"].split()[1]
+
+            if notice_type == "Known":
+                band_flux = "\n".join(notice_message.get_all("BAND_FLUX"))
+                notice_dict["BAND_FLUX"] = band_flux
+                output = text_to_json_maxi(
+                    notice_dict, input_known, record_number, "Known"
+                )
+            elif notice_type == "Unknown":
+                output = text_to_json_maxi(
+                    notice_dict, input_unknown, record_number, "Unknown"
+                )
+
+            with open(f"{output_path}MAXI_{sernum+1}_{record_number}.json", "w") as f:
+                json.dump(output, f)
+
+            record_number += 1
+            temp_start_idx = data.find("///////////", end_idx)
+            start_idx = data.find("\n", temp_start_idx)
+            if temp_start_idx == -1:
+                break
diff --git a/gcn_classic_text_to_json/notices/moa/conversion.py b/gcn_classic_text_to_json/notices/moa/conversion.py
index e9fecde..fd768ab 100644
--- a/gcn_classic_text_to_json/notices/moa/conversion.py
+++ b/gcn_classic_text_to_json/notices/moa/conversion.py
@@ -123,8 +123,6 @@ def create_all_moa_jsons():
                 end_idx = data.find("unavailable", start_idx) + len("unavailable") + 1
 
             notice_message = email.message_from_string(data[start_idx:end_idx].strip())
-            print(link)
-            # print(notice_message)
             comment = "\n".join(notice_message.get_all("COMMENTS"))
             notice_dict = dict(notice_message)
             notice_dict["COMMENTS"] = comment