diff --git a/.github/workflows/requirements.txt b/.github/workflows/requirements.txt new file mode 100644 index 00000000..0ca24a78 --- /dev/null +++ b/.github/workflows/requirements.txt @@ -0,0 +1,4 @@ +lsst-daf-butler +psycopg2 +botocore +boto3 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index 0ade32c7..00000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: test -on: - push: - branches: - - main - tags: - - v* - pull_request: - -jobs: - test: - runs-on: ubuntu-latest - steps: - - name: Checkout the repo - uses: actions/checkout@v3 - - name: Setup python 3.10 - uses: actions/setup-python@v2 - with: - python-version: "3.10" - #- name: docker build - # run: | - # docker build . -f Dockerfile - - name: Install dependencies - run: | - python -m ensurepip - python -m pip install lsst-daf-butler - # pip install pytest-flake8 - # pip install flake8 - # python -m pip install pytest - #- name: install lsstinstall - # run: | - # cd ./tests - # curl -OL https://ls.st/lsstinstall - # chmod u+x lsstinstall - # ./lsstinstall -T "w_2023_19" - # source loadLSST.sh - # # mamba activate lsst-scipipe-6.0.0 - # eups distrib install -t "w_2023_19" daf_butler - # setup -t w_2023_19 daf_butler - # # eups distrib install -t "w_2023_19" lsst_distrib - # python test_move_embargo_args.py - - name: Test with pytest - run: | - # mamba activate lsst-scipipe-6.0.0 - cd ./tests - python test_move_embargo_args.py - # pytest -s test_move_embargo_args.py --log-cli-level 11 diff --git a/.github/workflows/test_and_build.yml b/.github/workflows/test_and_build.yml new file mode 100644 index 00000000..31664c3c --- /dev/null +++ b/.github/workflows/test_and_build.yml @@ -0,0 +1,50 @@ +name: test +on: + push: + branches: + - main + tags: + - v* + pull_request: + +env: + PROGRAM_NAME: transfer-embargo + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout the repo + uses: actions/checkout@v3 + - name: Setup python 3.11 + uses: actions/setup-python@v4 + with: + python-version: "3.11" + - name: Install dependencies + run: | + python -m ensurepip + python -m pip install lsst-daf-butler + - name: Run tests + run: | + cd ./tests + python test_move_embargo_args.py + - name: docker build + run: | + docker build . \ + --tag $PROGRAM_NAME + - name: Log in to Github container registry + run: echo "${{ secrets.GITHUB_TOKEN }}" |docker login ghcr.io -u $ --password-stdin + - name: Push images + run: | + PROGRAM_ID=ghcr.io/${{ github.repository_owner }}/$PROGRAM_NAME + if [[ "${{ github.ref }}" == "refs/pull/"* ]]; then + VERSION=$(echo "${{ github.head_ref }}" | sed -e 's|.*/||') + elif [[ "${{ github.ref }}" == "refs/tags/"* ]]; then + VERSION=$(echo "${{ github.ref_name }}" | sed -e 's|^v||') + else + VERSION=latest + fi + echo VERSION=$VERSION + echo PROGRAM_ID=$PROGRAM_ID + docker tag $PROGRAM_NAME $PROGRAM_ID:$VERSION + docker push $PROGRAM_ID:$VERSION diff --git a/Dockerfile b/Dockerfile index 838849c3..53e97ff9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,26 +1,32 @@ # Dockerfile -COPY ./src /opt/lsst/transfer_embargo -WORKDIR /opt/lsst/transfer_embargo - -FROM python:3.9 - -# ADD move_embargo_scratch.py . - -# RUN setup lsst_distrib -t w_2023_19 +FROM python:3.11 +# Copy source code and test files +COPY requirements.txt /opt/lsst/transfer_embargo/ +COPY src/ /opt/lsst/transfer_embargo/src/ +COPY tests_docker/ /opt/lsst/transfer_embargo/tests_docker/ +# Set the working directory +WORKDIR /opt/lsst/transfer_embargo -FROM lsstsqre/newinstall:latest -USER lsst -RUN source loadLSST.bash && mamba install rucio-clients -RUN source loadLSST.bash && eups distrib install -t "w_2023_21" obs_lsst - -# RUN pip install -r requirements.txt - -CMD ["python", "-m", "ensurepip"] -CMD ["python", "-m", "pip", "install", "lsst-daf-butler"] -# this is from the test.yml file -#python -m ensurepip -#python -m pip install lsst-daf-butler - -# CMD ["python", "./move_embargo_scratch.py"] \ No newline at end of file +# List files for debugging +# RUN ls -la /opt/lsst/transfer_embargo/ +# RUN ls -R /opt/lsst/transfer_embargo/src/ +# RUN ls -R /opt/lsst/transfer_embargo/tests_docker/ +# RUN ls -R /opt/lsst/transfer_embargo/tests/data/test_from/ + +RUN pip install -r requirements.txt + +# Define the environment variables +# These are written over if they are re-defined +# by the cronjob or on the command line deploy +# of the pod +ENV FROMREPO "tests_docker/temp_from" +ENV TOREPO "tests_docker/temp_to" +ENV INSTRUMENT "LATISS" +ENV NOW "2020-03-01 23:59:59.999999" +ENV EMBARGO_HRS 1063.08018813861 +ENV MOVE "True" +ENV LOG "True" + +CMD ["/bin/sh", "-c", "python src/move_embargo_args.py \"$FROMREPO\" \"$TOREPO\" \"$INSTRUMENT\" --nowtime \"$NOW\" --embargohours \"$EMBARGO_HRS\" --move \"$MOVE\" --log \"$LOG\""] diff --git a/cronjob_test_deploy_embargo.yaml b/cronjob_test_deploy_embargo.yaml new file mode 100644 index 00000000..68c9f1af --- /dev/null +++ b/cronjob_test_deploy_embargo.yaml @@ -0,0 +1,97 @@ +# This file is a test deployment of the cronjob +# the full deployment can be found: +# https://github.com/slaclab/usdf-embargo-deploy/tree/u/beckynevin/transfer-embargo-deploy/kubernetes/overlays/summit +# --- +# apiVersion: v1 +# kind: PersistentVolumeClaim +# metadata: +# name: sdf-group-rubin +# namespace: devel-transfer-embargo +# spec: +# storageClassName: sdf-group-rubin +# accessModes: +# - ReadWriteMany +# resources: +# requests: +# storage: 1Gi +# --- +# apiVersion: v1 +# kind: PersistentVolumeClaim +# metadata: +# name: sdf-data-rubin +# namespace: devel-transfer-embargo +# spec: +# storageClassName: sdf-data-rubin +# accessModes: +# - ReadWriteMany +# resources: +# requests: +# storage: 1Gi +# --- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: transfer-embargo-temp + namespace: devel-transfer-embargo +spec: + schedule: "* * * * *" + jobTemplate: + spec: + template: + spec: + containers: + - name: transfer-embargo-container + image: "ghcr.io/lsst-dm/transfer-embargo:latest" + imagePullPolicy: Always + env: + - name: FROMREPO + value: "./tests_docker/temp_from" + - name: TOREPO + value: "./tests_docker/temp_to" + - name: INSTRUMENT + value: "LATISS" + - name: EMBARGO_HRS + value: "1063" + # - name: datasettype + # value: "raw" + # - name: collection + # value: "LATISS/raw/all" + - name: NOW + value: "2020-03-01 23:59:59.999999" + - name: MOVE + value: "False" + # - name: PGPASSWORD + # valueFrom: + # secretKeyRef: + # name: db-env + # key: pg_password + # - name: PGUSER + # valueFrom: + # secretKeyRef: + # name: db-env + # key: pg_user + # - name: S3_ENDPOINT_URL + # value: http://s3dfrgw.slac.stanford.edu + # - name: AWS_ACCESS_KEY_ID + # valueFrom: + # secretKeyRef: + # name: s3 + # key: s3_access + # - name: AWS_SECRET_ACCESS_KEY + # valueFrom: + # secretKeyRef: + # name: s3 + # key: s3_key + volumeMounts: + - name: sdf-group-rubin + mountPath: /sdf/group/rubin + - name: sdf-data-rubin + mountPath: /sdf/data/rubin + volumes: + - name: sdf-group-rubin + persistentVolumeClaim: + claimName: sdf-group-rubin + - name: sdf-data-rubin + persistentVolumeClaim: + claimName: sdf-data-rubin + restartPolicy: OnFailure diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..e05e0759 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +lsst-daf-butler +psycopg2 +botocore +boto3 \ No newline at end of file diff --git a/src/move_embargo_args.py b/src/move_embargo_args.py index e84b8cc0..5a592cc8 100644 --- a/src/move_embargo_args.py +++ b/src/move_embargo_args.py @@ -3,6 +3,7 @@ import astropy.time from lsst.daf.butler import Butler, Timespan from lsst.daf.butler.cli.cliLog import CliLog +import logging def parse_args(): @@ -82,11 +83,9 @@ def parse_args(): # If move is true, then you'll need write # permissions from the fromrepo (embargo) butler = Butler(namespace.fromrepo, writeable=namespace.move) - print("temp from path:", namespace.fromrepo) - print("temp to path:", namespace.torepo) registry = butler.registry dest = Butler(namespace.torepo, writeable=True) - scratch_registry = dest.registry + prompt_registry = dest.registry datasetType = namespace.datasettype collections = namespace.collections move = namespace.move @@ -100,13 +99,26 @@ def parse_args(): now = astropy.time.Time(namespace.nowtime, scale="tai", format="iso") else: now = astropy.time.Time.now().tai - timespan_embargo = Timespan(now - embargo_period, now) + + if namespace.log == "True": + CliLog.initLog(longlog=True) + logger = logging.getLogger("lsst.transfer.embargo") + logger.info("from path: %s", namespace.fromrepo) + logger.info("to path: %s", namespace.torepo) + # the timespan object defines a "forbidden" region of time + # starting at the nowtime minus the embargo period + # and terminating in anything in the future + # this forbidden timespan will be de-select + # any exposure that overlaps with it + # documentation here: + # https://community.lsst.org/t/constructing-a-where-for-query-dimension-records/6478 + timespan_embargo = Timespan(now - embargo_period, None) # The Dimensions query # If (now - embargo period, now) does not overlap # with observation time interval: move # Else: don't move # Save data Ids of these observations into a list - after_embargo = [ + outside_embargo = [ dt.id for dt in registry.queryDimensionRecords( "exposure", @@ -124,11 +136,8 @@ def parse_args(): dataId=dataId, collections=collections, where="exposure.id IN (exposure_ids)", - bind={"exposure_ids": after_embargo}, + bind={"exposure_ids": outside_embargo}, ).expanded() - if namespace.log == "True": - cli_log = CliLog.initLog(longlog=True) - CliLog.setLogLevels([(None, "DEBUG")]) out = dest.transfer_from( butler, source_refs=datasetRefs, @@ -137,5 +146,11 @@ def parse_args(): register_dataset_types=True, transfer_dimensions=True, ) + datasetRefs_moved = prompt_registry.queryDatasets( + datasetType=datasetType, collections=collections + ) + if namespace.log == "True": + ids_moved = [dt.dataId.full["exposure"] for dt in datasetRefs_moved] + logger.info("ids moved: %s", ids_moved) if move == "True": - butler.pruneDatasets(refs=datasetRefs, unstore=True, purge=True) + butler.pruneDatasets(refs=datasetRefs_moved, unstore=True, purge=True) diff --git a/tests/test_move_embargo_args.py b/tests/test_move_embargo_args.py index 8fee2edb..51e107f3 100644 --- a/tests/test_move_embargo_args.py +++ b/tests/test_move_embargo_args.py @@ -60,6 +60,7 @@ def is_it_there( dt.dataId.full["exposure"] for dt in registry_from.queryDatasets(datasetType=..., collections=...) ] + # verifying the contents of the from butler # if move is on, only the ids_remain should be in temp_from butler if move == "True": @@ -80,6 +81,10 @@ def is_it_there( class TestMoveEmbargoArgs(unittest.TestCase): def setUp(self): + """ + Performs the setup necessary to run + all tests + """ temp_dir = tempfile.TemporaryDirectory() temp_from_path = os.path.join(temp_dir.name, "temp_test_from") temp_to_path = os.path.join(temp_dir.name, "temp_test_to") @@ -97,11 +102,110 @@ def setUp(self): # The above is if we are running 'move', # If copy, it should be both of these # added together - self.log = "False" + self.log = "True" def tearDown(self): + """ + Removes all test files created by tests + """ shutil.rmtree(self.temp_dir.name, ignore_errors=True) + def test_nothing_moves(self): + """ + Nothing should move when the embargo hours falls right on + the oldest exposure + """ + move = "True" + now_time_embargo = "2020-01-17 16:55:11.322700" + embargo_hours = 5596964.255774 / 3600.0 + # IDs that should be moved to temp_to: + ids_moved = [] + # IDs that should stay in the temp_from: + ids_remain = [ + 2019111300059, + 2019111300061, + 2020011700002, + 2020011700003, + 2020011700004, + 2020011700005, + 2020011700006, + ] + is_it_there( + embargo_hours, + now_time_embargo, + ids_remain, + ids_moved, + self.temp_from_path, + self.temp_to_path, + move=move, + log=self.log, + ) + + def test_after_now_01(self): + """ + Verify that exposures after now are not being moved + when the nowtime is right in the middle of the exposures + """ + move = "True" + now_time_embargo = "2020-01-17 16:55:11.322700" + embargo_hours = 0.1 # hours + # IDs that should be moved to temp_to: + ids_moved = [ + 2019111300059, + 2019111300061, + 2020011700002, + 2020011700003, + ] + # IDs that should stay in the temp_from: + ids_remain = [ + 2020011700004, + 2020011700005, + 2020011700006, + ] + is_it_there( + embargo_hours, + now_time_embargo, + ids_remain, + ids_moved, + self.temp_from_path, + self.temp_to_path, + move=move, + log=self.log, + ) + + def test_after_now_05(self): + """ + Verify that exposures after now are not being moved + when the nowtime is right in the middle of the exposures + for a slightly longer embargo period (0.5 hours) + """ + move = "True" + now_time_embargo = "2020-01-17 16:55:11.322700" + embargo_hours = 0.5 # hours + # IDs that should be moved to temp_to: + ids_moved = [ + 2019111300059, + 2019111300061, + ] + # IDs that should stay in the temp_from: + ids_remain = [ + 2020011700002, + 2020011700003, + 2020011700004, + 2020011700005, + 2020011700006, + ] + is_it_there( + embargo_hours, + now_time_embargo, + ids_remain, + ids_moved, + self.temp_from_path, + self.temp_to_path, + move=move, + log=self.log, + ) + def test_main_move(self): """ Run move_embargo_args to move some IDs from the fake_from butler @@ -133,7 +237,6 @@ def test_main_move(self): move=move, log=self.log, ) - # os.system("sqlite3 "+self.temp_from_path+"/gen3.sqlite3") def test_main_copy(self): """ diff --git a/tests_docker/create_testto_butler.sh b/tests_docker/create_testto_butler.sh new file mode 100755 index 00000000..add4ee90 --- /dev/null +++ b/tests_docker/create_testto_butler.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Shell script for creating a fake butler that you can use +# for testing. +# you also need the lsst_distrib loaded to run this file +# in order to run this file: +# chmod u+x create_testto_butler.sh +# and then ./create_testto_butler.sh + +# setup lsst_distrib +#run this if you need to create scratch butler + +butler create $1 +# butler create $2 \ No newline at end of file diff --git a/tests_docker/temp_to/butler.yaml b/tests_docker/temp_to/butler.yaml new file mode 100644 index 00000000..4e637867 --- /dev/null +++ b/tests_docker/temp_to/butler.yaml @@ -0,0 +1,14 @@ +datastore: + cls: lsst.daf.butler.datastores.fileDatastore.FileDatastore + records: + table: file_datastore_records + root: +registry: + db: sqlite:////gen3.sqlite3 + managers: + attributes: lsst.daf.butler.registry.attributes.DefaultButlerAttributeManager + collections: lsst.daf.butler.registry.collections.synthIntKey.SynthIntKeyCollectionManager + datasets: lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID + datastores: lsst.daf.butler.registry.bridge.monolithic.MonolithicDatastoreRegistryBridgeManager + dimensions: lsst.daf.butler.registry.dimensions.static.StaticDimensionRecordStorageManager + opaque: lsst.daf.butler.registry.opaque.ByNameOpaqueTableStorageManager diff --git a/tests_docker/temp_to/gen3.sqlite3 b/tests_docker/temp_to/gen3.sqlite3 new file mode 100644 index 00000000..de1db8a8 Binary files /dev/null and b/tests_docker/temp_to/gen3.sqlite3 differ