Merge pull request #43 from lincc-frameworks/issue/42/getting-started…

…-download Getting Started notebook for downloader and Logging fixup
lincc-frameworks · Aug 27, 2024 · 79293c8 · 79293c8
2 parents 02df7b5 + 52559df
commit 79293c8
Show file tree

Hide file tree

Showing 3 changed files with 141 additions and 10 deletions.
diff --git a/.gitignore b/.gitignore
@@ -152,3 +152,5 @@ _html/
 # Default save locations for fibad
 data/
 example_model.pth
+# Common location to stash personal or notebook example config
+fibad_config.toml
diff --git a/example_notebooks/GettingStartedDownloader.ipynb b/example_notebooks/GettingStartedDownloader.ipynb
@@ -0,0 +1,75 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import fibad\n",
+    "\n",
+    "# We could be all fancy and use a toml file writer here, but\n",
+    "# I'm trying to show how simple this can be.\n",
+    "fibad_config = \"fibad_config.toml\"\n",
+    "with open(fibad_config, \"w\") as f:\n",
+    "    # This simply contains defaults, but you'll have to change at least\n",
+    "    # username, password, fits_file, and cutout_dir to your liking\n",
+    "    # if you want this to work.\n",
+    "\n",
+    "    f.write(\n",
+    "        \"\"\"\n",
+    "\n",
+    "    [download]\n",
+    "    sw = \"22asec\"\n",
+    "    sh = \"22asec\"\n",
+    "    filter = [\"HSC-G\", \"HSC-R\", \"HSC-I\", \"HSC-Z\", \"HSC-Y\"]\n",
+    "    type = \"coadd\"\n",
+    "    rerun = \"pdr3_wide\"\n",
+    "\n",
+    "    username = \"<Your HSC Username>@local\"\n",
+    "    password = \"<Your HSC password>\"\n",
+    "\n",
+    "    fits_file = \"<Your fits catalog filename goes here>\"\n",
+    "    cutout_dir = \"<The file with the cutouts goes here>\"\n",
+    "\n",
+    "    offset = 0\n",
+    "    num_sources = 10\n",
+    "            \n",
+    "    \"\"\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fibad_instance = fibad.Fibad(config_file=fibad_config)\n",
+    "\n",
+    "fibad_instance.download()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "fibad",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/fibad/fibad.py b/src/fibad/fibad.py
@@ -16,32 +16,87 @@ class Fibad:
 
     verbs = ["train", "predict", "download"]
 
-    def __init__(self, *, config_file: Union[Path, str] = None):
+    def __init__(self, *, config_file: Union[Path, str] = None, setup_logging: bool = True):
         """Initialize fibad. Always applies the default config, and merges it with any provided config file.
 
         Parameters
         ----------
         config_file : Union[Path, str], optional
             filename or pathlib.Path to a config file, by default None
+
+        setup_logging : bool, optional
+            Logging setup for a fibad object is global loggers named "fibad.*" If you want to turn off
+            logging config for "fibad.*" python loggers, pass False here. By default True.
+
+            You may want to set this to True if:
+            - You have multiple Fibad objects in your application or notebook, and would like to control
+              which of their logging configs is used globally. By creating one of your objects with
+              setup_logging=True and the others with setup_logging=False, the single object created with
+              setup_logging=True will control where the log is emitted to and what the threshold level is.
+            - You have another library which needs overall control over python logging's config, and you
+              do not want fibad to alter any global logging config. In this case you should always pass
+              setup_logging=False. Fibad will stil send logs into python logging; however, the other
+              system will be responsible for where those logs are emitted, and what the threshold level
+              is.
+
+            You may want to leave the default of setup_logging=True if:
+            - You have a single Fibad object in use at any time. This is true in most notebook like
+              environments.
         """
         self.config = get_runtime_config(runtime_config_filepath=config_file)
-        general_config = self.config.get("general", {})
 
         # Configure our logger. We do not use __name__ here because that would give us a "fibad.fibad" logger
-        # which would not aggregate logs from fibad.downloadCutout which creates its own logger
-        # for backwards compatibility with its CLI.
+        # which would not aggregate logs from fibad.downloadCutout which creates its own
+        # "fibad.downloadCutout" logger. It uses this name to preserve backwards compatibility with its CLI.
         #
         # Choosing "fibad" as our log name also means that modules like fibad.models, or fibad.dataloaders
         # can get a logger with logging.getLogger(__name__) and will automatically roll up to us.
         #
-        # A downside is that multiple Fibad objects all log the same place and have the ability to clobber
-        # one another's settings, and combine logs with one another.
         self.logger = logging.getLogger("fibad")
 
+        # The logger object we get from logging.getLogger() is a global process-level object.
+        #
+        # This creates some tension around what fibad initialization ought happen when the log handlers for
+        # fibad have already been configured by another object!
+        #
+        # There are two imagined situations at time of writing where this tension occurs:
+        #
+        # 1) A notebook user has a cell with `fibad_instance = fibad.Fibad()` and they run this multiple times
+        #    in the same kernel process. If this person is changing any logging in their fibad config file
+        #    and rerunning (perhaps to debug an issue) they likely expect the most recently __init__()ed
+        #    fibad object to control their logging.
+        #
+        # 2) An application (or notebook) user configures multiple Fibad instance objects intending to use
+        #    several different configs. If the logging configs differ between those objects we don't have a
+        #    clear choice about what the global fibad logging config ought be, unless the user tells us
+        #    somehow.
+        #
+        # In the face of this tension we allow a kwarg to be passed (setup_logging) which defaults to True
+        #
+        # In the default case setup_logging=True, User 1 gets their expected behavior where the most recently
+        # initialized object controls global logging.
+        #
+        # User 2 can initialize some (or all!) of their objects with setup_logging = False to achieve
+        # appropriate logging behavior for their application.
+        #
+        if setup_logging:
+            # Remove all prior handlers
+            for handler in self.logger.handlers:
+                self.logger.removeHandler(handler)
+
+            # Setup our handlers from config
+            self._initialize_log_handlers()
+
+        self.logger.info(f"Runtime Config read from: {config_file}")
+
+    def _initialize_log_handlers(self):
+        """Private initialization helper, Adds handlers and level setting sto the global self.logger object"""
+
+        general_config = self.config.get("general", {})
         # default to warning level
         level = general_config.get("log_level", "warning")
         if not isinstance(level, str):
-            self.logger.setLevel(logging.WARNING)
+            level = logging.WARNING
         else:
             # In python 3.11 and above you can do this:
             #
@@ -57,7 +112,8 @@ def __init__(self, *, config_file: Union[Path, str] = None):
                 "debug": logging.DEBUG,
             }
             level = level_map.get(level.lower(), logging.WARNING)
-            self.logger.setLevel(level)
+
+        self.logger.setLevel(level)
 
         # Default to stderr for destination
         log_destination = general_config.get("log_destination", "stderr")
@@ -79,8 +135,6 @@ def __init__(self, *, config_file: Union[Path, str] = None):
         formatter = logging.Formatter("[%(asctime)s %(name)s:%(levelname)s] %(message)s")
         handler.setFormatter(formatter)
 
-        self.logger.info(f"Runtime Config read from: {config_file}")
-
     def train(self, **kwargs):
         """
         See Fibad.train.run()