Skip to content

Commit

Permalink
Merge branch 'develop' into celery_logging_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
york-stsci authored Jan 17, 2025
2 parents 4d1e697 + 3c7e1a4 commit 69dd4eb
Show file tree
Hide file tree
Showing 8 changed files with 272 additions and 68 deletions.
18 changes: 18 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,23 @@
## What's Changed

1.3.0 (2024-12-19)
==================

Web Application
~~~~~~~~~~~~~~~
* Exclude source-specific WFSS files from observation page by @bhilbert4 in https://github.com/spacetelescope/jwql/pull/1651
* Switch URL for prog info scraping to use the OPO site by @bhilbert4 in https://github.com/spacetelescope/jwql/pull/1662

Project & API Documentation
~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Added logging configuration to config file, and use it when opening logging by @york-stsci in https://github.com/spacetelescope/jwql/pull/1635
* Fix bad parens in dark monitor model definitions by @bhilbert4 in https://github.com/spacetelescope/jwql/pull/1644
* Add radius keyword to bokeh.figure.circle calls by @bhilbert4 in https://github.com/spacetelescope/jwql/pull/1643
* Remove bokeh templating code by @bhilbert4 in https://github.com/spacetelescope/jwql/pull/1647
* Update Bad Pixel Monitor to use Django DB Models by @mfixstsci in https://github.com/spacetelescope/jwql/pull/1497
* Update Bias Monitor to use Django DB Models by @bsunnquist in https://github.com/spacetelescope/jwql/pull/1503


1.2.11 (2024-08-26)
===================

Expand Down
50 changes: 25 additions & 25 deletions jwql/jwql_monitors/monitor_filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,22 +49,27 @@
import numpy as np
from sqlalchemy.exc import DataError

from jwql.database.database_interface import engine
from jwql.database.database_interface import session
from jwql.database.database_interface import FilesystemCharacteristics
from jwql.database.database_interface import FilesystemGeneral
from jwql.database.database_interface import FilesystemInstrument
from jwql.database.database_interface import CentralStore
from jwql.utils.logging_functions import log_info, log_fail
from jwql.utils.permissions import set_permissions
from jwql.utils.constants import FILESYSTEM_MONITOR_SUBDIRS, FILE_SUFFIX_TYPES, FILTERS_PER_INSTRUMENT, INSTRUMENT_SERVICE_MATCH
from jwql.utils.constants import JWST_INSTRUMENT_NAMES, JWST_INSTRUMENT_NAMES_MIXEDCASE, JWST_INSTRUMENT_NAMES_MIXEDCASE
from jwql.utils.constants import ON_GITHUB_ACTIONS, ON_READTHEDOCS
from jwql.utils.utils import filename_parser
from jwql.utils.utils import get_config
from jwql.utils.monitor_utils import initialize_instrument_monitor, update_monitor_table
from jwql.utils.protect_module import lock_module
from jwql.website.apps.jwql.data_containers import get_instrument_proposals

if not ON_GITHUB_ACTIONS and not ON_READTHEDOCS:
# Need to set up django apps before we can access the models
import django # noqa: E402 (module level import not at top of file)
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings")
django.setup()

# Import * is okay here because this module specifically only contains database models
# for this monitor
from jwql.website.apps.jwql.monitor_models.common import * # noqa: E402 (module level import not at top of file)

SETTINGS = get_config()
FILESYSTEM = SETTINGS['filesystem']
PROPRIETARY_FILESYSTEM = os.path.join(FILESYSTEM, 'proprietary')
Expand All @@ -74,6 +79,7 @@
PREVIEW_IMAGES = SETTINGS['preview_image_filesystem']
THUMBNAILS = SETTINGS['thumbnail_filesystem']
LOGS = SETTINGS['log_dir']
WORKING = SETTINGS['working']


def files_per_filter():
Expand Down Expand Up @@ -232,7 +238,8 @@ def get_area_stats(central_storage_dict):
'logs': LOGS,
'preview_images': PREVIEW_IMAGES,
'thumbnails': THUMBNAILS,
'all': CENTRAL}
'all': CENTRAL,
'working':WORKING}

counteddirs = []

Expand Down Expand Up @@ -368,7 +375,7 @@ def initialize_results_dicts():
A dictionary for the ``central_storage`` database table
"""

now = datetime.datetime.now()
now = datetime.datetime.now(datetime.timezone.utc)

general_results_dict = {}
general_results_dict['date'] = now
Expand Down Expand Up @@ -430,9 +437,9 @@ def update_central_store_database(central_storage_dict):
new_record['size'] = central_storage_dict[area]['size']
new_record['used'] = central_storage_dict[area]['used']
new_record['available'] = central_storage_dict[area]['available']
with engine.begin() as connection:
connection.execute(CentralStore.__table__.insert(), new_record)
session.close()

entry = CentralStorage(**new_record)
entry.save()


def update_characteristics_database(char_info):
Expand All @@ -447,7 +454,7 @@ def update_characteristics_database(char_info):
using that filter/pupil.
"""
logging.info('\tUpdating the characteristics database')
now = datetime.datetime.now()
now = datetime.datetime.now(datetime.timezone.utc)

# Add data to filesystem_instrument table
for instrument in ['nircam', 'niriss', 'nirspec', 'miri']:
Expand All @@ -458,11 +465,9 @@ def update_characteristics_database(char_info):
new_record['instrument'] = instrument
new_record['filter_pupil'] = optics
new_record['obs_per_filter_pupil'] = values
with engine.begin() as connection:
connection.execute(
FilesystemCharacteristics.__table__.insert(), new_record)

session.close()
entry = FilesystemCharacteristics(**new_record)
entry.save()


def update_database(general_results_dict, instrument_results_dict, central_storage_dict):
Expand All @@ -478,8 +483,8 @@ def update_database(general_results_dict, instrument_results_dict, central_stora
"""
logging.info('\tUpdating the database')

with engine.begin() as connection:
connection.execute(FilesystemGeneral.__table__.insert(), general_results_dict)
fs_general_entry = FilesystemGeneral(**general_results_dict)
fs_general_entry.save()

# Add data to filesystem_instrument table
for instrument in JWST_INSTRUMENT_NAMES:
Expand All @@ -493,13 +498,8 @@ def update_database(general_results_dict, instrument_results_dict, central_stora

# Protect against updated enum options that have not been propagated to
# the table definition
try:
with engine.begin() as connection:
connection.execute(FilesystemInstrument.__table__.insert(), new_record)
except DataError as e:
logging.error(e)

session.close()
fs_instrument_entry = FilesystemInstrument(**new_record)
fs_instrument_entry.save()


@lock_module
Expand Down
38 changes: 38 additions & 0 deletions jwql/tests/test_archive_database_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#! /usr/bin/env python

"""Tests for the ``archive_database_update`` module.
Authors
-------
- Bryan Hilbert
Use
---
These tests can be run via the command line (omit the ``-s`` to
suppress verbose output to stdout):
::
pytest -s test_archive_database_update.py
"""


import pytest

from jwql.website.apps.jwql import archive_database_update


def test_filter_rootnames():
"""Test the filtering of source-based level 2 files
"""
files = ['jw06434-c1021_s000001510_nircam_f444w-grismr.fits',
'jw01068004001_02102_00001_nrcb4_rate.fits',
'jw06434-c1021_t000_nircam_clear-f090w_segm.fits',
'jw06434-o001_t000_nircam_clear-f090w_segm.fits',
'jw02183117001_03103_00001-seg001_nrca1_rate.fits']

filtered = archive_database_update.filter_rootnames(files)
expected = ['jw01068004001_02102_00001_nrcb4_rate.fits',
'jw02183117001_03103_00001-seg001_nrca1_rate.fits']
assert filtered == expected
1 change: 1 addition & 0 deletions jwql/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@
FILE_PROG_ID_LEN = 5
FILE_SEG_LEN = 3
FILE_SOURCE_ID_LEN = 5
FILE_SOURCE_ID_LONG_LEN = 9
FILE_TARG_ID_LEN = 3
FILE_VISIT_GRP_LEN = 2
FILE_VISIT_LEN = 3
Expand Down
116 changes: 96 additions & 20 deletions jwql/website/apps/jwql/archive_database_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,32 +43,45 @@
import logging
import os
import argparse
import re

import numpy as np
import django

from django.apps import apps
from jwql.utils.protect_module import lock_module
from jwql.utils.constants import DEFAULT_MODEL_CHARFIELD

# These lines are needed in order to use the Django models in a standalone
# script (as opposed to code run as a result of a webpage request). If these
# lines are not run, the script will crash when attempting to import the
# Django models in the line below.
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings")
django.setup()

from jwql.website.apps.jwql.models import Archive, Observation, Proposal, RootFileInfo # noqa
from jwql.utils.constants import JWST_INSTRUMENT_NAMES_MIXEDCASE # noqa
from jwql.utils.logging_functions import log_info, log_fail # noqa
from jwql.utils.monitor_utils import initialize_instrument_monitor # noqa
from jwql.utils.constants import MAST_QUERY_LIMIT # noqa
from jwql.utils.utils import filename_parser, filesystem_path, get_config # noqa
from jwql.website.apps.jwql.data_containers import create_archived_proposals_context # noqa
from jwql.website.apps.jwql.data_containers import get_instrument_proposals, get_filenames_by_instrument # noqa
from jwql.website.apps.jwql.data_containers import get_proposal_info, mast_query_filenames_by_instrument, mast_query_by_rootname # noqa

FILESYSTEM = get_config()['filesystem']
from jwql.utils.constants import (DEFAULT_MODEL_CHARFIELD,
FILE_PROG_ID_LEN,
FILE_AC_O_ID_LEN,
FILE_AC_CAR_ID_LEN,
FILE_SOURCE_ID_LONG_LEN,
FILE_TARG_ID_LEN,
JWST_INSTRUMENT_NAMES_MIXEDCASE,
MAST_QUERY_LIMIT,
ON_GITHUB_ACTIONS,
ON_READTHEDOCS
)
from jwql.utils.logging_functions import log_info, log_fail
from jwql.utils.monitor_utils import initialize_instrument_monitor
from jwql.utils.utils import filename_parser, filesystem_path, get_config
from jwql.website.apps.jwql.data_containers import create_archived_proposals_context
from jwql.website.apps.jwql.data_containers import get_instrument_proposals, get_filenames_by_instrument
from jwql.website.apps.jwql.data_containers import (get_proposal_info,
mast_query_filenames_by_instrument,
mast_query_by_rootname
)


if not ON_GITHUB_ACTIONS and not ON_READTHEDOCS:
# These lines are needed in order to use the Django models in a standalone
# script (as opposed to code run as a result of a webpage request). If these
# lines are not run, the script will crash when attempting to import the
# Django models in the line below.
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings")
django.setup()

from jwql.website.apps.jwql.models import Archive, Observation, Proposal, RootFileInfo # noqa
FILESYSTEM = get_config()['filesystem']


@log_info
Expand Down Expand Up @@ -113,6 +126,11 @@ def get_updates(update_database):

# Get set of unique rootnames
all_rootnames = set(['_'.join(f.split('/')[-1].split('_')[:-1]) for f in filenames])

# Filter source-based level 2 files out of the rootnames and filenames
all_rootnames = filter_rootnames(all_rootnames)
filenames = filter_filenames(filenames, all_rootnames)

rootnames = []
for rootname in all_rootnames:
filename_dict = filename_parser(rootname)
Expand Down Expand Up @@ -510,6 +528,64 @@ def fill_empty_rootfileinfo(rootfileinfo_set):
logging.info(f'\tSaved {saved_rootfileinfos} Root File Infos')


def filter_filenames(fnames, roots):
"""Filter out filenames from ``fnames`` that don't match the names in ``roots``
Parameters
----------
fnames : list
List of filenames
roots : list
List of rootnames
Returns
-------
filtered_fnames : list
Filtered list of filenames
"""
filtered_fnames = []
for fname in fnames:
for root in roots:
if root in fname:
filtered_fnames.append(fname)
break
return filtered_fnames


def filter_rootnames(rootnames):
"""Filter out rootnames that we know can't be parsed by the filename_parser. We use this
custom filter here rather than within the filename parser itself because in archive_database_update
we can end up providing thousands of unrecognized filenames (e.g. source-based WFSS files) to
the filename parser, which would result in thousands of logging statments and massive log files.
This way, we filter out the rootnames that obviously won't be parsed before calling the
filename_parser with the rest. jw06434-c1021_s000001510_nircam_f444w-grismr
jw06434-c1021_t000_nircam_clear-f090w_segm.fits
Parameters
----------
rootnames : list
List of rootnames
Returns
-------
good_rootnames : list
List of rootnames that do not match the filters
"""
stage_2_source = \
r"jw" \
r"(?P<program_id>\d{" + f"{FILE_PROG_ID_LEN}" + "})"\
r"-(?P<ac_id>(o\d{" + f"{FILE_AC_O_ID_LEN}" + r"}|(c|a|r)\d{" + f"{FILE_AC_CAR_ID_LEN}" + "}))"\
r"_(?P<target_id>(s\d{" + f"{FILE_SOURCE_ID_LONG_LEN}" + r"}|(t)\d{" + f"{FILE_TARG_ID_LEN}" + "}))"\
r"_(?P<instrument>(nircam|niriss|miri))"\
r"_(?P<optical_elements>((?!_)[\w-])+)"\
r"-"

elements = re.compile(stage_2_source)
good_rootnames = [e for e in rootnames if elements.match(e) is None]
return good_rootnames


@lock_module
def protected_code(update_database, fill_empty_list):
"""Protected code ensures only 1 instance of module will run at any given time
Expand Down
7 changes: 5 additions & 2 deletions jwql/website/apps/jwql/data_containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2057,12 +2057,15 @@ def text_scrape(prop_id):
program_meta : dict
Dictionary containing information about program
"""
# Ensure prop_id is a 5-digit string
prop_id = str(prop_id).zfill(5)

# Generate url
url = 'http://www.stsci.edu/cgi-bin/get-proposal-info?id=' + str(prop_id) + '&submit=Go&observatory=JWST'
url = f'https://www.stsci.edu/jwst-program-info/program/?program={prop_id}'
html = BeautifulSoup(requests.get(url).text, 'lxml')
not_available = "not available via this interface" in html.text
not_available |= "temporarily unable" in html.text
not_available |= "internal error" in html.text

program_meta = {}
program_meta['prop_id'] = prop_id
Expand All @@ -2081,7 +2084,7 @@ def text_scrape(prop_id):

links = html.findAll('a')

proposal_type = links[0].contents[0]
proposal_type = links[3].contents[0]

program_meta['prop_type'] = proposal_type

Expand Down
Loading

0 comments on commit 69dd4eb

Please sign in to comment.