From cbadcac9267c5fe89eeac8247c1acd630d029771 Mon Sep 17 00:00:00 2001 From: Alice Fage Date: Tue, 3 Dec 2024 16:16:05 +1300 Subject: [PATCH 1/4] feat: publish capture-dates.geojson file --- workflows/raster/README.md | 2 +- workflows/raster/copy.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/raster/README.md b/workflows/raster/README.md index 4cae68cbf..a464d2584 100644 --- a/workflows/raster/README.md +++ b/workflows/raster/README.md @@ -218,7 +218,7 @@ Access permissions are controlled by the [Bucket Sharing Config](https://github. | region | enum | | Region of the dataset | | source | str | s3://linz-imagery-staging/test/sample/ | The URIs (paths) to the s3 source location. Separate multiple source paths with `;` | | target | str | s3://linz-imagery-staging/test/sample_target/ | The URIs (paths) to the s3 target location | -| include | regex | \\.tiff?\$\|\\.json\$\|\\.tfw$\|/capture-area\\.geojson$ | A regular expression to match object path(s) or name(s) from within the source path to include in the copy. | +| include | regex | '\.tiff?$|\.json$|\.tfw$|\Q/capture-area.geojson\E|\Q/capture-dates.geojson\E' | A regular expression to match object path(s) or name(s) from within the source path to include in the copy. | | exclude | regex | | A regular expression to match object path(s) or name(s) from within the source path to exclude from the copy. | | copy_option | enum | --no-clobber |
`--no-clobber`
Skip overwriting existing files.
`--force`
Overwrite all files.
`--force-no-clobber`
Overwrite only changed files, skip unchanged files.
| | flatten | enum | false | Flatten the files in the target location (useful for multiple source locations) | diff --git a/workflows/raster/copy.yaml b/workflows/raster/copy.yaml index 22ce17e0f..34befbfb7 100644 --- a/workflows/raster/copy.yaml +++ b/workflows/raster/copy.yaml @@ -76,7 +76,7 @@ spec: - name: target value: 's3://linz-imagery-staging/test/sample_target/' - name: include - value: '\.tiff?$|\.json$|\.tfw$|/capture-area\.geojson$' + value: '\.tiff?$|\.json$|\.tfw$|\Q/capture-area.geojson\E|\Q/capture-dates.geojson\E' - name: exclude value: '' - name: copy_option From 38ad1c985afc1c0e591e7d3b1c2c9c55c278d949 Mon Sep 17 00:00:00 2001 From: Alice Fage Date: Tue, 3 Dec 2024 16:21:30 +1300 Subject: [PATCH 2/4] fix: formatting --- workflows/raster/README.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/workflows/raster/README.md b/workflows/raster/README.md index a464d2584..47ec397a1 100644 --- a/workflows/raster/README.md +++ b/workflows/raster/README.md @@ -211,20 +211,20 @@ Access permissions are controlled by the [Bucket Sharing Config](https://github. ## Workflow Input Parameters -| Parameter | Type | Default | Description | -| -------------------- | ----- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| user_group | enum | none | Group of users running the workflow | -| ticket | str | | Ticket ID e.g. 'AIP-55' | -| region | enum | | Region of the dataset | -| source | str | s3://linz-imagery-staging/test/sample/ | The URIs (paths) to the s3 source location. Separate multiple source paths with `;` | -| target | str | s3://linz-imagery-staging/test/sample_target/ | The URIs (paths) to the s3 target location | -| include | regex | '\.tiff?$|\.json$|\.tfw$|\Q/capture-area.geojson\E|\Q/capture-dates.geojson\E' | A regular expression to match object path(s) or name(s) from within the source path to include in the copy. | -| exclude | regex | | A regular expression to match object path(s) or name(s) from within the source path to exclude from the copy. | -| copy_option | enum | --no-clobber |
`--no-clobber`
Skip overwriting existing files.
`--force`
Overwrite all files.
`--force-no-clobber`
Overwrite only changed files, skip unchanged files.
| -| flatten | enum | false | Flatten the files in the target location (useful for multiple source locations) | -| group | int | 1000 | The maximum number of files for each pod to copy (will use the value of `group` or `group_size` that is reached first). | -| group_size | str | 100Gi | The maximum group size of files for each pod to copy (will use the value of `group` or `group_size` that is reached first). | -| transform | str | `f` | String to be transformed from source to target to renamed filenames, e.g. `f.replace("text to replace", "new_text_to_use")`. Leave as `f` for no transformation. | +| Parameter | Type | Default | Description | +| -------------------- | ----- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| user_group | enum | none | Group of users running the workflow | +| ticket | str | | Ticket ID e.g. 'AIP-55' | +| region | enum | | Region of the dataset | +| source | str | s3://linz-imagery-staging/test/sample/ | The URIs (paths) to the s3 source location. Separate multiple source paths with `;` | +| target | str | s3://linz-imagery-staging/test/sample_target/ | The URIs (paths) to the s3 target location | +| include | regex | \\.tiff?\$\|\\.json\$\|\\.tfw\$\|\\Q\/capture-area.geojson\\E\|\\Q\/capture-dates.geojson\\E | A regular expression to match object path(s) or name(s) from within the source path to include in the copy. | +| exclude | regex | | A regular expression to match object path(s) or name(s) from within the source path to exclude from the copy. | +| copy_option | enum | --no-clobber |
`--no-clobber`
Skip overwriting existing files.
`--force`
Overwrite all files.
`--force-no-clobber`
Overwrite only changed files, skip unchanged files.
| +| flatten | enum | false | Flatten the files in the target location (useful for multiple source locations) | +| group | int | 1000 | The maximum number of files for each pod to copy (will use the value of `group` or `group_size` that is reached first). | +| group_size | str | 100Gi | The maximum group size of files for each pod to copy (will use the value of `group` or `group_size` that is reached first). | +| transform | str | `f` | String to be transformed from source to target to renamed filenames, e.g. `f.replace("text to replace", "new_text_to_use")`. Leave as `f` for no transformation. | | aws_role_config_path | str | `s3://linz-bucket-config/config-write.elevation.json,s3://linz-bucket-config/config-write.imagery.json,s3://linz-bucket-config/config-write.topographic.json` | s3 URL or comma-separated list of s3 URLs allowing the workflow to write to a target(s). | ## Examples From e65eeb073938ca175ed09b128e0e9bf36234a1b9 Mon Sep 17 00:00:00 2001 From: Alice Fage Date: Tue, 3 Dec 2024 16:48:04 +1300 Subject: [PATCH 3/4] fix: copy regex --- workflows/raster/README.md | 4 ++-- workflows/raster/copy.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/workflows/raster/README.md b/workflows/raster/README.md index 47ec397a1..9ef1af5e8 100644 --- a/workflows/raster/README.md +++ b/workflows/raster/README.md @@ -212,13 +212,13 @@ Access permissions are controlled by the [Bucket Sharing Config](https://github. ## Workflow Input Parameters | Parameter | Type | Default | Description | -| -------------------- | ----- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| -------------------- | ----- |--------------------------------------------------------------------------------------------------------------------------------------------------------------| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | user_group | enum | none | Group of users running the workflow | | ticket | str | | Ticket ID e.g. 'AIP-55' | | region | enum | | Region of the dataset | | source | str | s3://linz-imagery-staging/test/sample/ | The URIs (paths) to the s3 source location. Separate multiple source paths with `;` | | target | str | s3://linz-imagery-staging/test/sample_target/ | The URIs (paths) to the s3 target location | -| include | regex | \\.tiff?\$\|\\.json\$\|\\.tfw\$\|\\Q\/capture-area.geojson\\E\|\\Q\/capture-dates.geojson\\E | A regular expression to match object path(s) or name(s) from within the source path to include in the copy. | +| include | regex | \\.tiff?\$\|\\.json\$\|\\.tfw$\|/capture-area\\.geojson\$\|/capture-area\\.geojson\$ | A regular expression to match object path(s) or name(s) from within the source path to include in the copy. | | exclude | regex | | A regular expression to match object path(s) or name(s) from within the source path to exclude from the copy. | | copy_option | enum | --no-clobber |
`--no-clobber`
Skip overwriting existing files.
`--force`
Overwrite all files.
`--force-no-clobber`
Overwrite only changed files, skip unchanged files.
| | flatten | enum | false | Flatten the files in the target location (useful for multiple source locations) | diff --git a/workflows/raster/copy.yaml b/workflows/raster/copy.yaml index 34befbfb7..ff74c8cae 100644 --- a/workflows/raster/copy.yaml +++ b/workflows/raster/copy.yaml @@ -76,7 +76,7 @@ spec: - name: target value: 's3://linz-imagery-staging/test/sample_target/' - name: include - value: '\.tiff?$|\.json$|\.tfw$|\Q/capture-area.geojson\E|\Q/capture-dates.geojson\E' + value: '\.tiff?$|\.json$|\.tfw$|/capture-area\.geojson$|/capture-dates\.geojson$' - name: exclude value: '' - name: copy_option From ff07042a0cd143b865f0c2bf40215b7007607507 Mon Sep 17 00:00:00 2001 From: Alice Fage Date: Tue, 3 Dec 2024 16:51:00 +1300 Subject: [PATCH 4/4] fix: escape character --- workflows/raster/README.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/workflows/raster/README.md b/workflows/raster/README.md index 9ef1af5e8..0e68c3847 100644 --- a/workflows/raster/README.md +++ b/workflows/raster/README.md @@ -211,20 +211,20 @@ Access permissions are controlled by the [Bucket Sharing Config](https://github. ## Workflow Input Parameters -| Parameter | Type | Default | Description | -| -------------------- | ----- |--------------------------------------------------------------------------------------------------------------------------------------------------------------| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| user_group | enum | none | Group of users running the workflow | -| ticket | str | | Ticket ID e.g. 'AIP-55' | -| region | enum | | Region of the dataset | -| source | str | s3://linz-imagery-staging/test/sample/ | The URIs (paths) to the s3 source location. Separate multiple source paths with `;` | -| target | str | s3://linz-imagery-staging/test/sample_target/ | The URIs (paths) to the s3 target location | -| include | regex | \\.tiff?\$\|\\.json\$\|\\.tfw$\|/capture-area\\.geojson\$\|/capture-area\\.geojson\$ | A regular expression to match object path(s) or name(s) from within the source path to include in the copy. | -| exclude | regex | | A regular expression to match object path(s) or name(s) from within the source path to exclude from the copy. | -| copy_option | enum | --no-clobber |
`--no-clobber`
Skip overwriting existing files.
`--force`
Overwrite all files.
`--force-no-clobber`
Overwrite only changed files, skip unchanged files.
| -| flatten | enum | false | Flatten the files in the target location (useful for multiple source locations) | -| group | int | 1000 | The maximum number of files for each pod to copy (will use the value of `group` or `group_size` that is reached first). | -| group_size | str | 100Gi | The maximum group size of files for each pod to copy (will use the value of `group` or `group_size` that is reached first). | -| transform | str | `f` | String to be transformed from source to target to renamed filenames, e.g. `f.replace("text to replace", "new_text_to_use")`. Leave as `f` for no transformation. | +| Parameter | Type | Default | Description | +| -------------------- | ----- |---------------------------------------------------------------------------------------------------------------------------------------------------------------| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| user_group | enum | none | Group of users running the workflow | +| ticket | str | | Ticket ID e.g. 'AIP-55' | +| region | enum | | Region of the dataset | +| source | str | s3://linz-imagery-staging/test/sample/ | The URIs (paths) to the s3 source location. Separate multiple source paths with `;` | +| target | str | s3://linz-imagery-staging/test/sample_target/ | The URIs (paths) to the s3 target location | +| include | regex | \\.tiff?\$\|\\.json\$\|\\.tfw\$\|/capture-area\\.geojson\$\|/capture-area\\.geojson\$ | A regular expression to match object path(s) or name(s) from within the source path to include in the copy. | +| exclude | regex | | A regular expression to match object path(s) or name(s) from within the source path to exclude from the copy. | +| copy_option | enum | --no-clobber |
`--no-clobber`
Skip overwriting existing files.
`--force`
Overwrite all files.
`--force-no-clobber`
Overwrite only changed files, skip unchanged files.
| +| flatten | enum | false | Flatten the files in the target location (useful for multiple source locations) | +| group | int | 1000 | The maximum number of files for each pod to copy (will use the value of `group` or `group_size` that is reached first). | +| group_size | str | 100Gi | The maximum group size of files for each pod to copy (will use the value of `group` or `group_size` that is reached first). | +| transform | str | `f` | String to be transformed from source to target to renamed filenames, e.g. `f.replace("text to replace", "new_text_to_use")`. Leave as `f` for no transformation. | | aws_role_config_path | str | `s3://linz-bucket-config/config-write.elevation.json,s3://linz-bucket-config/config-write.imagery.json,s3://linz-bucket-config/config-write.topographic.json` | s3 URL or comma-separated list of s3 URLs allowing the workflow to write to a target(s). | ## Examples