From 6e910bd795b8441eb435b37944cc656b0a3b4b91 Mon Sep 17 00:00:00 2001
From: Athish Thiruvengadam <athisht2@illinois.edu>
Date: Thu, 19 Sep 2024 17:22:34 -0500
Subject: [PATCH] RXTE and SUZAKU Text conversions

removed utils.py

small fix

updated trigger_date parsing and calet jsons creation

SUZAKU Text conversion

RXTE Text conversion

added  field

updated SUZAKU conversions

Updated RXTE conversions
---
 .../notices/rxte/README.md                    |  19 +++
 .../notices/rxte/__init__.py                  |   0
 .../notices/rxte/__main__.py                  |   4 +
 .../notices/rxte/conversion.py                | 161 ++++++++++++++++++
 .../notices/suzaku/README.md                  |  11 ++
 .../notices/suzaku/__init__.py                |   0
 .../notices/suzaku/__main__.py                |   4 +
 .../notices/suzaku/conversion.py              |  98 +++++++++++
 8 files changed, 297 insertions(+)
 create mode 100644 gcn_classic_text_to_json/notices/rxte/README.md
 create mode 100644 gcn_classic_text_to_json/notices/rxte/__init__.py
 create mode 100644 gcn_classic_text_to_json/notices/rxte/__main__.py
 create mode 100644 gcn_classic_text_to_json/notices/rxte/conversion.py
 create mode 100644 gcn_classic_text_to_json/notices/suzaku/README.md
 create mode 100644 gcn_classic_text_to_json/notices/suzaku/__init__.py
 create mode 100644 gcn_classic_text_to_json/notices/suzaku/__main__.py
 create mode 100644 gcn_classic_text_to_json/notices/suzaku/conversion.py

diff --git a/gcn_classic_text_to_json/notices/rxte/README.md b/gcn_classic_text_to_json/notices/rxte/README.md
new file mode 100644
index 0000000..af29ee7
--- /dev/null
+++ b/gcn_classic_text_to_json/notices/rxte/README.md
@@ -0,0 +1,19 @@
+# RXTE Text Conversion
+
+Parses through all webpages with RXTE text notices and creates a JSON with GCN schema keywords. Creates a `rxte_jsons` directory inside an `output` directory and saves jsons as `RXTE_{serial_number}_{record_number}.json` where serial_number is a random iterating number with no association to the notices and record_number is the current notice in the webpage.
+
+### Uses the following fields from the core schema for text notice fields
+- `id` &#8594; TRIGGER_NUM (PCA Notices Only)
+- `ra` &#8594; GRB_LOCBURST_RA (PCA) / GRB_RXTE_RA (ASM)
+- `dec` &#8594; GRB_LOCBURST_DEC (PCA) / GRB_RXTE_DEC (ASM)
+- `alert_datetime` &#8594; NOTICE_DATE
+- `trigger_time` &#8594; GRB_DATE, GRB_TIME
+- `ra_dec_error` &#8594; GRB_RXTE_INTEN
+
+### Defines the following new fields for the text notice fields (For ASM Notices Only)
+- `position_type` &#8594; POSITION_TYPE
+- `flux_energy_crab` &#8594; GRB_RXTE_INTEN
+
+## Caveats
+- Some Notices are marked PCA for testing but follow the ASM format. I've manually converted these into ASM Notices.
+- There are additional fields associated with the POSITION_TYPE which details the properties of the error box. I've chosen to not include these fields.
diff --git a/gcn_classic_text_to_json/notices/rxte/__init__.py b/gcn_classic_text_to_json/notices/rxte/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/gcn_classic_text_to_json/notices/rxte/__main__.py b/gcn_classic_text_to_json/notices/rxte/__main__.py
new file mode 100644
index 0000000..baae721
--- /dev/null
+++ b/gcn_classic_text_to_json/notices/rxte/__main__.py
@@ -0,0 +1,4 @@
+from . import conversion
+
+if __name__ == "__main__":
+    conversion.create_all_rxte_jsons()
diff --git a/gcn_classic_text_to_json/notices/rxte/conversion.py b/gcn_classic_text_to_json/notices/rxte/conversion.py
new file mode 100644
index 0000000..16a33fa
--- /dev/null
+++ b/gcn_classic_text_to_json/notices/rxte/conversion.py
@@ -0,0 +1,161 @@
+import email
+import json
+import os
+
+import requests
+
+from ... import conversion
+
+input_pca = {
+    "standard": {
+        "id": "TRIGGER_NUM",
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["GRB_DATE", "GRB_TIME"],
+    }
+}
+
+input_pca_burst_alert = {
+    "standard": {
+        "id": "TRIGGER_NUM",
+        "ra": "GRB_LOCBURST_RA",
+        "dec": "GRB_LOCBURST_DEC",
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["GRB_DATE", "GRB_TIME"],
+    }
+}
+
+input_pca_burst_position = {
+    "standard": {
+        "id": "TRIGGER_NUM",
+        "ra": "GRB_RXTE_RA",
+        "dec": "GRB_RXTE_RA",
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["GRB_DATE", "GRB_TIME"],
+    },
+    "additional": {
+        "ra_dec_error": ("GRB_RXTE_ERROR", "float"),
+        "flux_energy_mcrab": ("GRB_RXTE_INTEN", "float"),
+    },
+}
+
+input_asm = {
+    "standard": {
+        "ra": "GRB_RXTE_RA",
+        "dec": "GRB_RXTE_DEC",
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["GRB_DATE", "GRB_TIME"],
+    },
+    "additional": {
+        "position_type": ("POSITION_TYPE", "string"),
+        "ra_dec_error": ("GRB_RXTE_ERROR", "float"),
+        "flux_energy_mcrab": ("GRB_RXTE_INTEN", "float"),
+    },
+}
+
+
+def text_to_json_rxte(notice, input, record_number, instrument):
+    """Function calls text_to_json and then adds additional fields depending on the type of instrument.
+
+    Parameters
+    -----------
+    notice: dict
+        The text notice that is being parsed.
+    input: dict
+        The mapping between text notices keywords and GCN schema keywords.
+    record_number: int
+        The current notice in the webpage being parsed.
+    instrument:
+        The RXTE notice type. Either PCS or ASM.
+
+
+    Returns
+    -------
+    dictionary
+        A dictionary compliant with the associated schema for the mission.
+    Notes
+    -----
+    It seems that there are some ASM notices labelled PCA (It says for testing purposes).
+    I've changed these manually."""
+    if "RXTE-PCA was NOT able to localize this GRB." in notice["COMMENTS"]:
+        return conversion.text_to_json(notice, input_pca)
+    output_dict = conversion.text_to_json(notice, input)
+
+    output_dict["$schema"] = (
+        "https://gcn.nasa.gov/schema/main/gcn/notices/classic/rxte/alert.schema.json"
+    )
+    output_dict["mission"] = "RXTE"
+    output_dict["instrument"] = instrument
+    output_dict["messenger"] = "EM"
+
+    output_dict["record_number"] = record_number
+    if record_number == 1:
+        output_dict["alert_type"] = "initial"
+    else:
+        output_dict["alert_type"] = "update"
+
+    if "ra_dec_error" in output_dict:
+        output_dict["systematic_included"] = True
+
+    return output_dict
+
+
+def create_all_rxte_jsons():
+    """Creates a `rxte_jsons` directory and fills it with the json for all RXTE triggers."""
+    output_path = "./output/rxte_jsons/"
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    archive_link = "https://gcn.gsfc.nasa.gov/rxte_grbs.html"
+    prefix = "https://gcn.gsfc.nasa.gov/"
+    search_string = "other/.*rxte"
+    links_set = conversion.parse_trigger_links(archive_link, prefix, search_string)
+    links_list = list(links_set)
+
+    for sernum in range(len(links_list)):
+        link = links_list[sernum]
+        data = requests.get(link).text
+
+        record_number = 1
+        start_idx = data.find("TITLE")
+        while True:
+            end_idx = data.find("\n \n ", start_idx)
+            if end_idx == -1:
+                break
+
+            notice = data[start_idx:end_idx]
+            if "///////////" in notice:
+                notice = notice.replace("/", "")
+
+            notice_message = email.message_from_string(notice.strip())
+            comment = "\n".join(notice_message.get_all("COMMENTS"))
+            notice_dict = dict(notice_message)
+            notice_dict["COMMENTS"] = comment
+
+            instrument = notice_dict["NOTICE_TYPE"].split()[0][-3:]
+
+            if instrument == "PCA":
+                if "POSITION_TYPE" in notice_dict:
+                    output = text_to_json_rxte(
+                        notice_dict, input_asm, record_number, "ASM"
+                    )
+                elif "BURST ALERT" in notice_dict["TITLE"]:
+                    output = text_to_json_rxte(
+                        notice_dict, input_pca_burst_alert, record_number, instrument
+                    )
+                else:
+                    output = text_to_json_rxte(
+                        notice_dict, input_pca_burst_position, record_number, instrument
+                    )
+
+            elif instrument == "ASM":
+                output = text_to_json_rxte(
+                    notice_dict, input_asm, record_number, instrument
+                )
+
+            with open(f"{output_path}RXTE_{sernum+1}_{record_number}.json", "w") as f:
+                json.dump(output, f)
+
+            record_number += 1
+            start_idx = data.find("TITLE", end_idx)
+            if start_idx == -1:
+                break
diff --git a/gcn_classic_text_to_json/notices/suzaku/README.md b/gcn_classic_text_to_json/notices/suzaku/README.md
new file mode 100644
index 0000000..b1e3012
--- /dev/null
+++ b/gcn_classic_text_to_json/notices/suzaku/README.md
@@ -0,0 +1,11 @@
+# SUZAKU Text Conversion
+
+Parses through all webpages with SUZAKU text notices and creates a JSON with GCN schema keywords. Creates a `suzaku_jsons` directory inside an `output` directory and saves jsons as `SUZAKU_WAM_{serial_number}_{record_number}.json` where serial_number is a random iterating number with no association to the notices and record_number is the current notice in the webpage.
+
+### Uses the following fields from the core schema for text notice fields
+- `id` &#8594; TRIGGER_NUM
+- `alert_datetime` &#8594; NOTICE_DATE
+- `trigger_time` &#8594; TRIGGER_DATE, TRIGGER_TIME
+
+### Defines the following new fields for the text notice fields
+- `lightcurve_url` &#8594; LC_URL
diff --git a/gcn_classic_text_to_json/notices/suzaku/__init__.py b/gcn_classic_text_to_json/notices/suzaku/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/gcn_classic_text_to_json/notices/suzaku/__main__.py b/gcn_classic_text_to_json/notices/suzaku/__main__.py
new file mode 100644
index 0000000..a4f1a4b
--- /dev/null
+++ b/gcn_classic_text_to_json/notices/suzaku/__main__.py
@@ -0,0 +1,4 @@
+from . import conversion
+
+if __name__ == "__main__":
+    conversion.create_all_suzaku_jsons()
diff --git a/gcn_classic_text_to_json/notices/suzaku/conversion.py b/gcn_classic_text_to_json/notices/suzaku/conversion.py
new file mode 100644
index 0000000..c496c04
--- /dev/null
+++ b/gcn_classic_text_to_json/notices/suzaku/conversion.py
@@ -0,0 +1,98 @@
+import email
+import json
+import os
+
+import requests
+
+from ... import conversion
+
+input = {
+    "standard": {
+        "id": "TRIGGER_NUM",
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["TRIGGER_DATE", "TRIGGER_TIME"],
+    }
+}
+
+
+def text_to_json_suzaku(notice, input, record_number):
+    """Function calls text_to_json and then adds additional fields with cannot be dealt with by the general function.
+
+    Parameters
+    -----------
+    notice: dict
+        The text notice that is being parsed.
+    input: dict
+        The mapping between text notices keywords and GCN schema keywords.
+    record_number: int
+        The current notice in the webpage being parsed.
+
+    Returns
+    -------
+    dictionary
+        A dictionary compliant with the associated schema for the mission."""
+    output_dict = conversion.text_to_json(notice, input)
+
+    output_dict["$schema"] = (
+        "https://gcn.nasa.gov/schema/main/gcn/notices/classic/suzaku/alert.schema.json"
+    )
+
+    output_dict["record_number"] = record_number
+    if record_number == 1:
+        output_dict["alert_type"] = "initial"
+    else:
+        output_dict["alert_type"] = "update"
+
+    output_dict["mission"] = "SUZAKU"
+    output_dict["instrument"] = "WAM"
+    output_dict["trigger_type"] = "rate"
+
+    url = notice["LC_URL"]
+    output_dict["lightcurve_url"] = f"https://gcn.gsfc.nasa.gov/notices_suz/{url}"
+
+    return output_dict
+
+
+def create_all_suzaku_jsons():
+    """Creates a `suzaku_jsons` directory and fills it with the json for all CALET triggers."""
+    output_path = "./output/suzaku_jsons/"
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    archive_link = "https://gcn.gsfc.nasa.gov/suzaku_wam.html"
+    prefix = "https://gcn.gsfc.nasa.gov/"
+    search_string = "other/.*suzaku"
+    links_set = conversion.parse_trigger_links(archive_link, prefix, search_string)
+    links_list = list(links_set)
+
+    for sernum in range(len(links_list)):
+        link = links_list[sernum]
+        data = requests.get(link).text
+
+        record_number = 1
+        start_idx = data.find("\n") + 1
+        while True:
+            end_idx = data.find("\n \n ", start_idx)
+            if end_idx == -1:
+                break
+
+            notice = data[start_idx:end_idx]
+            if "///////////" in notice:
+                notice = notice.replace("/", "")
+
+            notice_message = email.message_from_string(notice.strip())
+            comment = "\n".join(notice_message.get_all("COMMENTS"))
+            notice_dict = dict(notice_message)
+            notice_dict["COMMENTS"] = comment
+            output = text_to_json_suzaku(notice_dict, input, record_number)
+
+            with open(
+                f"{output_path}SUZAKU_WAM_{sernum+1}_{record_number}.json", "w"
+            ) as f:
+                json.dump(output, f)
+
+            record_number += 1
+            temp_start_idx = data.find("///////////", end_idx)
+            start_idx = data.find("\n", temp_start_idx) + 1
+            if temp_start_idx == -1:
+                break