nasa-gcn · athish-thiru · Sep 19, 2024 · Sep 29, 2024
diff --git a/gcn_classic_text_to_json/conversion.py b/gcn_classic_text_to_json/conversion.py
@@ -123,14 +123,18 @@ def text_to_json(notice, keywords_dict):
         notice_ra = keywords_dict["standard"]["ra"]
         ra_data = notice[notice_ra].split()
 
-        if ra_data[0] != "Undefined":
+        if ra_data[0] == "Undefined":
+            output["ra"] = None
+        else:
             output["ra"] = float(ra_data[0][:-1])
 
     if "dec" in keywords_dict["standard"]:
         notice_dec = keywords_dict["standard"]["dec"]
         dec_data = notice[notice_dec].split()
 
-        if dec_data[0] != "Undefined":
+        if dec_data[0] == "Undefined":
+            output["dec"] = None
+        else:
             output["dec"] = float(dec_data[0][:-1])
 
     if "additional" in keywords_dict:

diff --git a/gcn_classic_text_to_json/notices/counterpart/README.md b/gcn_classic_text_to_json/notices/counterpart/README.md
@@ -0,0 +1,23 @@
+# Counterpart Text Conversion
+
+Parses through all webpages with Counterpart text notices and creates a JSON with GCN schema keywords. Creates a `counterpart_jsons` directory inside an `output` directory and saves jsons as `COUNTERPART_{serial_number}_{record_number}.json` where serial_number is a random iterating number with no association to the notices and record_number is the current notice in the webpage.
+
+### Uses the following fields from the core schema for text notice fields
+- `id` &#8594; TRIGGER_NUM (GRB_Counterpart only)
+- `ref_ID` &#8594; EVENT_TRIG_NUM (LVC_Counterpart only)
+- `ra` &#8594; CNTRPART_RA
+- `dec` &#8594; CNTRPART_DEC
+- `ra_dec_error` &#8594; CNTRPART_ERROR
+- `alert_datetime` &#8594; NOTICE_DATE
+- `trigger_time` &#8594; OBS_DATE, OBS_TIME
+- `mission`, `instrument` &#8594; TELESCOPE
+
+### Defines the following new fields for the text notice fields
+- `submitter_name` &#8594; SUBMITTER
+- `energy_flux`, `energy_flux_error` &#8594; INTENSITY
+- `flux_energy_range` &#8594; ENERGY
+- `duration` &#8594; OBS_DUR
+- `rank` &#8594; RANK
+
+## Caveats
+- The LVC counterpart notices have two fields called SOURSE_SERNUM and WARN_FLAGS. I could not find what they represented in the documenatation and elected to ignore them.
diff --git a/gcn_classic_text_to_json/notices/counterpart/__init__.py b/gcn_classic_text_to_json/notices/counterpart/__init__.py
diff --git a/gcn_classic_text_to_json/notices/counterpart/__main__.py b/gcn_classic_text_to_json/notices/counterpart/__main__.py
@@ -0,0 +1,4 @@
+from . import conversion
+
+if __name__ == "__main__":
+    conversion.create_all_counterpart_jsons()
diff --git a/gcn_classic_text_to_json/notices/counterpart/conversion.py b/gcn_classic_text_to_json/notices/counterpart/conversion.py
@@ -0,0 +1,131 @@
+import email
+import json
+import os
+
+import requests
+
+from ... import conversion
+
+input_grb = {
+    "standard": {
+        "id": "TRIGGER_NUM",
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["OBS_DATE", "OBS_TIME"],
+        "ra": "CNTRPART_RA",
+        "dec": "CNTRPART_DEC",
+    },
+    "additional": {
+        "ra_dec_error": ("CNTRPART_ERROR", "float"),
+        "submitter_name": ("SUBMITTER", "string"),
+        "energy_flux": ("INTENSITY", "float"),
+        "duration": ("OBS_DUR", "float"),
+    },
+}
+
+input_lvc = {
+    "standard": {
+        "alert_datetime": "NOTICE_DATE",
+        "trigger_time": ["OBS_DATE", "OBS_TIME"],
+        "ra": "CNTRPART_RA",
+        "dec": "CNTRPART_DEC",
+    },
+    "additional": {
+        "ref_ID": ("EVENT_TRIG_NUM", "string"),
+        "ra_dec_error": ("CNTRPART_ERROR", "float"),
+        "submitter_name": ("SUBMITTER", "string"),
+        "energy_flux": ("INTENSITY", "float"),
+        "duration": ("OBS_DUR", "float"),
+        "rank": ("RANK", "int"),
+    },
+}
+
+
+def text_to_json_counterpart(notice, input, record_number, notice_type):
+    """Function calls text_to_json and then adds additional fields for each notice type.
+
+    Parameters
+    -----------
+    notice: dict
+        The text notice that is being parsed.
+    input: dict
+        The mapping between text notices keywords and GCN schema keywords.
+    record_number: int
+        The current notice in the webpage being parsed.
+    notice_type: string
+        Whether it is a GRB or LVC counterpart.
+
+    Returns
+    -------
+    dictionary
+        A dictionary compliant with the associated schema for the mission."""
+    output_dict = conversion.text_to_json(notice, input)
+
+    output_dict["$schema"] = (
+        "https://gcn.nasa.gov/schema/main/gcn/notices/classic/counterpart/alert.schema.json"
+    )
+    output_dict["notice_type"] = f"{notice_type}_Counterpart"
+    output_dict["record_number"] = record_number
+    if record_number == 1:
+        output_dict["alert_type"] = "initial"
+    else:
+        output_dict["alert_type"] = "update"
+
+    telescope_data = notice["TELESCOPE"].split()[0].split("-")
+    output_dict["mission"] = telescope_data[0]
+    output_dict["instrument"] = telescope_data[1]
+
+    output_dict["ra_dec_error"] /= 3600
+    output_dict["energy_flux_error"] = float(notice["INTENSITY"].split()[-2])
+    energy_data = notice["ENERGY"].split()[0].split("-")
+    output_dict["flux_energy_range"] = [float(energy_data[0]), float(energy_data[1])]
+
+    return output_dict
+
+
+def create_all_counterpart_jsons():
+    """Creates a `counterpart_jsons` directory and fills it with the json for all COUNTERPART triggers."""
+    output_path = "./output/counterpart_jsons/"
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    archive_link = "https://gcn.gsfc.nasa.gov/counterpart_tbl.html"
+    prefix = "https://gcn.gsfc.nasa.gov/"
+    search_string = "other/.*counterpart"
+    links_set = conversion.parse_trigger_links(archive_link, prefix, search_string)
+    links_list = list(links_set)
+
+    for sernum in range(len(links_list)):
+        link = links_list[sernum]
+        data = requests.get(link).text
+
+        record_number = 1
+        start_idx = data.find("\n") + 1
+        while True:
+            end_idx = data.find("\n \n ", start_idx)
+            notice_message = email.message_from_string(data[start_idx:end_idx].strip())
+            comment = "\n".join(notice_message.get_all("COMMENTS"))
+            notice_dict = dict(notice_message)
+            notice_dict["COMMENTS"] = comment
+
+            notice_type = ((notice_dict["TITLE"].split()[0]).split("/")[-1]).split("_")[
+                0
+            ]
+            if notice_type == "LVC":
+                output = text_to_json_counterpart(
+                    notice_dict, input_lvc, record_number, "LVC"
+                )
+            elif notice_type == "GRB":
+                output = text_to_json_counterpart(
+                    notice_dict, input_grb, record_number, "GRB"
+                )
+
+            with open(
+                f"{output_path}COUNTERPART_{sernum+1}_{record_number}.json", "w"
+            ) as f:
+                json.dump(output, f)
+
+            record_number += 1
+            temp_start_idx = data.find("///////////", end_idx)
+            start_idx = data.find("\n", temp_start_idx)
+            if temp_start_idx == -1:
+                break