Skip to content

Commit

Permalink
Merge pull request #23 from lsst-dm/tickets/DM-40268
Browse files Browse the repository at this point in the history
Tickets/dm 40268
  • Loading branch information
beckynevin authored Sep 28, 2023
2 parents 73d64af + 10e7984 commit 65a8434
Show file tree
Hide file tree
Showing 11 changed files with 341 additions and 81 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
lsst-daf-butler
psycopg2
botocore
boto3
47 changes: 0 additions & 47 deletions .github/workflows/test.yml

This file was deleted.

50 changes: 50 additions & 0 deletions .github/workflows/test_and_build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
name: test
on:
push:
branches:
- main
tags:
- v*
pull_request:

env:
PROGRAM_NAME: transfer-embargo

jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Checkout the repo
uses: actions/checkout@v3
- name: Setup python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m ensurepip
python -m pip install lsst-daf-butler
- name: Run tests
run: |
cd ./tests
python test_move_embargo_args.py
- name: docker build
run: |
docker build . \
--tag $PROGRAM_NAME
- name: Log in to Github container registry
run: echo "${{ secrets.GITHUB_TOKEN }}" |docker login ghcr.io -u $ --password-stdin
- name: Push images
run: |
PROGRAM_ID=ghcr.io/${{ github.repository_owner }}/$PROGRAM_NAME
if [[ "${{ github.ref }}" == "refs/pull/"* ]]; then
VERSION=$(echo "${{ github.head_ref }}" | sed -e 's|.*/||')
elif [[ "${{ github.ref }}" == "refs/tags/"* ]]; then
VERSION=$(echo "${{ github.ref_name }}" | sed -e 's|^v||')
else
VERSION=latest
fi
echo VERSION=$VERSION
echo PROGRAM_ID=$PROGRAM_ID
docker tag $PROGRAM_NAME $PROGRAM_ID:$VERSION
docker push $PROGRAM_ID:$VERSION
50 changes: 28 additions & 22 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,26 +1,32 @@
# Dockerfile
COPY ./src /opt/lsst/transfer_embargo
WORKDIR /opt/lsst/transfer_embargo

FROM python:3.9

# ADD move_embargo_scratch.py .

# RUN setup lsst_distrib -t w_2023_19
FROM python:3.11

# Copy source code and test files
COPY requirements.txt /opt/lsst/transfer_embargo/
COPY src/ /opt/lsst/transfer_embargo/src/
COPY tests_docker/ /opt/lsst/transfer_embargo/tests_docker/

# Set the working directory
WORKDIR /opt/lsst/transfer_embargo

FROM lsstsqre/newinstall:latest
USER lsst
RUN source loadLSST.bash && mamba install rucio-clients
RUN source loadLSST.bash && eups distrib install -t "w_2023_21" obs_lsst

# RUN pip install -r requirements.txt

CMD ["python", "-m", "ensurepip"]
CMD ["python", "-m", "pip", "install", "lsst-daf-butler"]
# this is from the test.yml file
#python -m ensurepip
#python -m pip install lsst-daf-butler

# CMD ["python", "./move_embargo_scratch.py"]
# List files for debugging
# RUN ls -la /opt/lsst/transfer_embargo/
# RUN ls -R /opt/lsst/transfer_embargo/src/
# RUN ls -R /opt/lsst/transfer_embargo/tests_docker/
# RUN ls -R /opt/lsst/transfer_embargo/tests/data/test_from/

RUN pip install -r requirements.txt

# Define the environment variables
# These are written over if they are re-defined
# by the cronjob or on the command line deploy
# of the pod
ENV FROMREPO "tests_docker/temp_from"
ENV TOREPO "tests_docker/temp_to"
ENV INSTRUMENT "LATISS"
ENV NOW "2020-03-01 23:59:59.999999"
ENV EMBARGO_HRS 1063.08018813861
ENV MOVE "True"
ENV LOG "True"

CMD ["/bin/sh", "-c", "python src/move_embargo_args.py \"$FROMREPO\" \"$TOREPO\" \"$INSTRUMENT\" --nowtime \"$NOW\" --embargohours \"$EMBARGO_HRS\" --move \"$MOVE\" --log \"$LOG\""]
97 changes: 97 additions & 0 deletions cronjob_test_deploy_embargo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# This file is a test deployment of the cronjob
# the full deployment can be found:
# https://github.com/slaclab/usdf-embargo-deploy/tree/u/beckynevin/transfer-embargo-deploy/kubernetes/overlays/summit
# ---
# apiVersion: v1
# kind: PersistentVolumeClaim
# metadata:
# name: sdf-group-rubin
# namespace: devel-transfer-embargo
# spec:
# storageClassName: sdf-group-rubin
# accessModes:
# - ReadWriteMany
# resources:
# requests:
# storage: 1Gi
# ---
# apiVersion: v1
# kind: PersistentVolumeClaim
# metadata:
# name: sdf-data-rubin
# namespace: devel-transfer-embargo
# spec:
# storageClassName: sdf-data-rubin
# accessModes:
# - ReadWriteMany
# resources:
# requests:
# storage: 1Gi
# ---
apiVersion: batch/v1
kind: CronJob
metadata:
name: transfer-embargo-temp
namespace: devel-transfer-embargo
spec:
schedule: "* * * * *"
jobTemplate:
spec:
template:
spec:
containers:
- name: transfer-embargo-container
image: "ghcr.io/lsst-dm/transfer-embargo:latest"
imagePullPolicy: Always
env:
- name: FROMREPO
value: "./tests_docker/temp_from"
- name: TOREPO
value: "./tests_docker/temp_to"
- name: INSTRUMENT
value: "LATISS"
- name: EMBARGO_HRS
value: "1063"
# - name: datasettype
# value: "raw"
# - name: collection
# value: "LATISS/raw/all"
- name: NOW
value: "2020-03-01 23:59:59.999999"
- name: MOVE
value: "False"
# - name: PGPASSWORD
# valueFrom:
# secretKeyRef:
# name: db-env
# key: pg_password
# - name: PGUSER
# valueFrom:
# secretKeyRef:
# name: db-env
# key: pg_user
# - name: S3_ENDPOINT_URL
# value: http://s3dfrgw.slac.stanford.edu
# - name: AWS_ACCESS_KEY_ID
# valueFrom:
# secretKeyRef:
# name: s3
# key: s3_access
# - name: AWS_SECRET_ACCESS_KEY
# valueFrom:
# secretKeyRef:
# name: s3
# key: s3_key
volumeMounts:
- name: sdf-group-rubin
mountPath: /sdf/group/rubin
- name: sdf-data-rubin
mountPath: /sdf/data/rubin
volumes:
- name: sdf-group-rubin
persistentVolumeClaim:
claimName: sdf-group-rubin
- name: sdf-data-rubin
persistentVolumeClaim:
claimName: sdf-data-rubin
restartPolicy: OnFailure
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
lsst-daf-butler
psycopg2
botocore
boto3
35 changes: 25 additions & 10 deletions src/move_embargo_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import astropy.time
from lsst.daf.butler import Butler, Timespan
from lsst.daf.butler.cli.cliLog import CliLog
import logging


def parse_args():
Expand Down Expand Up @@ -82,11 +83,9 @@ def parse_args():
# If move is true, then you'll need write
# permissions from the fromrepo (embargo)
butler = Butler(namespace.fromrepo, writeable=namespace.move)
print("temp from path:", namespace.fromrepo)
print("temp to path:", namespace.torepo)
registry = butler.registry
dest = Butler(namespace.torepo, writeable=True)
scratch_registry = dest.registry
prompt_registry = dest.registry
datasetType = namespace.datasettype
collections = namespace.collections
move = namespace.move
Expand All @@ -100,13 +99,26 @@ def parse_args():
now = astropy.time.Time(namespace.nowtime, scale="tai", format="iso")
else:
now = astropy.time.Time.now().tai
timespan_embargo = Timespan(now - embargo_period, now)

if namespace.log == "True":
CliLog.initLog(longlog=True)
logger = logging.getLogger("lsst.transfer.embargo")
logger.info("from path: %s", namespace.fromrepo)
logger.info("to path: %s", namespace.torepo)
# the timespan object defines a "forbidden" region of time
# starting at the nowtime minus the embargo period
# and terminating in anything in the future
# this forbidden timespan will be de-select
# any exposure that overlaps with it
# documentation here:
# https://community.lsst.org/t/constructing-a-where-for-query-dimension-records/6478
timespan_embargo = Timespan(now - embargo_period, None)
# The Dimensions query
# If (now - embargo period, now) does not overlap
# with observation time interval: move
# Else: don't move
# Save data Ids of these observations into a list
after_embargo = [
outside_embargo = [
dt.id
for dt in registry.queryDimensionRecords(
"exposure",
Expand All @@ -124,11 +136,8 @@ def parse_args():
dataId=dataId,
collections=collections,
where="exposure.id IN (exposure_ids)",
bind={"exposure_ids": after_embargo},
bind={"exposure_ids": outside_embargo},
).expanded()
if namespace.log == "True":
cli_log = CliLog.initLog(longlog=True)
CliLog.setLogLevels([(None, "DEBUG")])
out = dest.transfer_from(
butler,
source_refs=datasetRefs,
Expand All @@ -137,5 +146,11 @@ def parse_args():
register_dataset_types=True,
transfer_dimensions=True,
)
datasetRefs_moved = prompt_registry.queryDatasets(
datasetType=datasetType, collections=collections
)
if namespace.log == "True":
ids_moved = [dt.dataId.full["exposure"] for dt in datasetRefs_moved]
logger.info("ids moved: %s", ids_moved)
if move == "True":
butler.pruneDatasets(refs=datasetRefs, unstore=True, purge=True)
butler.pruneDatasets(refs=datasetRefs_moved, unstore=True, purge=True)
Loading

0 comments on commit 65a8434

Please sign in to comment.