Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove user id from filepath #1161

Merged
2 changes: 2 additions & 0 deletions .github/integration/tests/sda/01_install_dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ for t in curl expect jq openssh-client postgresql-client xxd; do
apt-get -o DPkg::Lock::Timeout=60 install -y "$t" >/dev/null
fi
done

pip -q install s3cmd
2 changes: 0 additions & 2 deletions .github/integration/tests/sda/10_upload_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ done
## truncate database
psql -U postgres -h postgres -d sda -At -c "TRUNCATE TABLE sda.files, sda.encryption_keys CASCADE;"

pip -q install s3cmd

for file in NA12878.bam NA12878_20k_b37.bam NA12878.bai NA12878_20k_b37.bai; do
curl --retry 100 -s -L -o /shared/$file "https://github.com/ga4gh/htsget-refserver/raw/main/data/gcp/gatk-test-data/wgs_bam/$file"
if [ ! -f "$file.c4gh" ]; then
Expand Down
2 changes: 1 addition & 1 deletion .github/integration/tests/sda/20_ingest-verify_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ for file in NA12878.bam NA12878_20k_b37.bam NA12878.bai NA12878_20k_b37.bai; do
jq -r -c -n \
--arg type ingest \
--arg user [email protected] \
--arg filepath test_dummy.org/"$file.c4gh" \
--arg filepath "$file.c4gh" \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
Expand Down
4 changes: 2 additions & 2 deletions .github/integration/tests/sda/21_cancel_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ cancel_payload=$(
jq -r -c -n \
--arg type cancel \
--arg user [email protected] \
--arg filepath test_dummy.org/NA12878_20k_b37.bam.c4gh \
--arg filepath NA12878_20k_b37.bam.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
Expand Down Expand Up @@ -65,7 +65,7 @@ ingest_payload=$(
jq -r -c -n \
--arg type ingest \
--arg user [email protected] \
--arg filepath test_dummy.org/NA12878_20k_b37.bam.c4gh \
--arg filepath NA12878_20k_b37.bam.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
Expand Down
6 changes: 3 additions & 3 deletions .github/integration/tests/sda/22_error_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ if [ -n "$PGSSLCERT" ]; then
fi

## get correlation id from message
CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'test_dummy.org/NB12878.bam.c4gh';")
CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'NB12878.bam.c4gh';")

properties=$(
jq -c -n \
Expand All @@ -50,7 +50,7 @@ ingest_payload=$(
jq -r -c -n \
--arg type ingest \
--arg user [email protected] \
--arg filepath test_dummy.org/NB12878.bam.c4gh \
--arg filepath NB12878.bam.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
Expand Down Expand Up @@ -96,7 +96,7 @@ verify_payload=$(
--arg user [email protected] \
--arg archive_path "$CORRID" \
--arg file_id "$CORRID" \
--arg filepath test_dummy.org/NB12878.bam.c4gh \
--arg filepath NB12878.bam.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
--argjson re_verify false \
'$ARGS.named|@base64'
Expand Down
2 changes: 1 addition & 1 deletion .github/integration/tests/sda/30_backup-finalize_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ EOD

# check DB for archive file names
for file in NA12878.bam.c4gh NA12878.bai.c4gh NA12878_20k_b37.bam.c4gh NA12878_20k_b37.bai.c4gh; do
archiveName=$(psql -U postgres -h postgres -d sda -At -c "SELECT archive_file_path from sda.files where submission_file_path = 'test_dummy.org/$file';")
archiveName=$(psql -U postgres -h postgres -d sda -At -c "SELECT archive_file_path from sda.files where submission_file_path = '$file';")
size=$(s3cmd -c direct ls s3://backup/"$archiveName" | tr -s ' ' | cut -d ' ' -f 3)
if [ "$size" -eq 0 ]; then
echo "Failed to get size of $file from backup site"
Expand Down
8 changes: 4 additions & 4 deletions .github/integration/tests/sda/31_cancel_test2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ ENC_SHA=$(sha256sum NA12878.bam.c4gh | cut -d' ' -f 1)
ENC_MD5=$(md5sum NA12878.bam.c4gh | cut -d' ' -f 1)

## get correlation id from message
CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'test_dummy.org/NA12878.bam.c4gh';")
CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'NA12878.bam.c4gh';")


properties=$(
Expand All @@ -30,7 +30,7 @@ cancel_payload=$(
jq -r -c -n \
--arg type cancel \
--arg user [email protected] \
--arg filepath test_dummy.org/NA12878.bam.c4gh \
--arg filepath NA12878.bam.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
Expand Down Expand Up @@ -67,7 +67,7 @@ ingest_payload=$(
jq -r -c -n \
--arg type ingest \
--arg user [email protected] \
--arg filepath test_dummy.org/NA12878.bam.c4gh \
--arg filepath NA12878.bam.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
Expand Down Expand Up @@ -110,7 +110,7 @@ accession_payload=$(
jq -r -c -n \
--arg type accession \
--arg user [email protected] \
--arg filepath test_dummy.org/NA12878.bam.c4gh \
--arg filepath NA12878.bam.c4gh \
--arg accession_id EGAF74900000001 \
--argjson decrypted_checksums "$decrypted_checksums" \
'$ARGS.named|@base64'
Expand Down
6 changes: 3 additions & 3 deletions .github/integration/tests/sda/32_test_race_condition.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ ENC_SHA=$(sha256sum race_file.c4gh | cut -d' ' -f 1)
ENC_MD5=$(md5sum race_file.c4gh | cut -d' ' -f 1)

## get correlation id from message
CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'test_dummy.org/race_file.c4gh';")
CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'race_file.c4gh';")

properties=$(
jq -c -n \
Expand All @@ -36,7 +36,7 @@ accession_payload=$(
jq -r -c -n \
--arg type accession \
--arg user [email protected] \
--arg filepath test_dummy.org/race_file.c4gh \
--arg filepath race_file.c4gh \
--arg accession_id EGAF74900000099 \
--argjson decrypted_checksums "$decrypted_checksums" \
'$ARGS.named|@base64'
Expand Down Expand Up @@ -74,7 +74,7 @@ ingest_payload=$(
jq -r -c -n \
--arg type ingest \
--arg user [email protected] \
--arg filepath test_dummy.org/race_file.c4gh \
--arg filepath race_file.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
Expand Down
59 changes: 29 additions & 30 deletions .github/integration/tests/sda/60_api_admin_test.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/bin/sh
#!/bin/bash
set -e
cd shared || true

token="$(curl http://oidc:8080/tokens | jq -r '.[0]')"
# Upload a file and make sure it's listed
## make sure previously uploaded files are listed
result="$(curl -sk -L "http://api:8080/users/[email protected]/files" -H "Authorization: Bearer $token" | jq '. | length')"
if [ "$result" -ne 2 ]; then
echo "wrong number of files returned for user [email protected]"
Expand Down Expand Up @@ -41,25 +41,27 @@ fi

# Reupload a file under a different name, test to delete it
s3cmd -c s3cfg put "NA12878.bam.c4gh" s3://test_dummy.org/NC12878.bam.c4gh
stream_size=$(curl -s -u guest:guest http://rabbitmq:15672/api/queues/sda/inbox | jq '.messages_ready')

echo "waiting for upload to complete"
URI=http://rabbitmq:15672
if [ -n "$PGSSLCERT" ]; then
URI=https://rabbitmq:15671
fi
stream_size=$((stream_size + 1))
RETRY_TIMES=0
until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -eq 4 ]; do
echo "waiting for upload to complete"
RETRY_TIMES=$((RETRY_TIMES + 1))
if [ "$RETRY_TIMES" -eq 30 ]; then
echo "::error::Time out while waiting for upload to complete"
exit 1
fi
sleep 2

until [ $((stream_size)) -eq "$(curl -s -u guest:guest http://rabbitmq:15672/api/queues/sda/inbox | jq '.messages_ready')" ]; do
echo "waiting for upload to complete"
RETRY_TIMES=$((RETRY_TIMES + 1))
if [ "$RETRY_TIMES" -eq 30 ]; then
echo "Upload did not complete successfully"
exit 1
fi
sleep 2
done

# get the fileId of the new file
fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/[email protected]/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NC12878.bam.c4gh") | .fileID')"
fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/[email protected]/files" | jq -r '.[] | select(.inboxPath == "NC12878.bam.c4gh") | .fileID')"

output=$(s3cmd -c s3cfg ls s3://test_dummy.org/NC12878.bam.c4gh 2>/dev/null)
if [ -z "$output" ] ; then
Expand Down Expand Up @@ -104,27 +106,23 @@ fi

# Re-upload the file and use the api to ingest it, then try to delete it
s3cmd -c s3cfg put NA12878.bam.c4gh s3://test_dummy.org/NE12878.bam.c4gh

URI=http://rabbitmq:15672
if [ -n "$PGSSLCERT" ]; then
URI=https://rabbitmq:15671
fi
stream_size=$((stream_size + 1))
RETRY_TIMES=0
until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -eq 6 ]; do
echo "waiting for upload to complete"
RETRY_TIMES=$((RETRY_TIMES + 1))
if [ "$RETRY_TIMES" -eq 3 ]; then
echo "::error::Time out while waiting for upload to complete"
#exit 1
break
fi
sleep 2

until [ $((stream_size)) -eq "$(curl -s -u guest:guest $URI/api/queues/sda/inbox | jq '.messages_ready')" ]; do
echo "waiting for upload to complete"
RETRY_TIMES=$((RETRY_TIMES + 1))
if [ "$RETRY_TIMES" -eq 30 ]; then
echo "Upload did not complete successfully"
exit 1
fi
sleep 2
done

# Ingest it
new_payload=$(
jq -c -n \
--arg filepath "test_dummy.org/NE12878.bam.c4gh" \
--arg filepath "NE12878.bam.c4gh" \
--arg user "[email protected]" \
'$ARGS.named'
)
Expand All @@ -135,7 +133,7 @@ if [ "$resp" != "200" ]; then
exit 1
fi

fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/[email protected]/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NE12878.bam.c4gh") | .fileID')"
fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/[email protected]/files" | jq -r '.[] | select(.inboxPath == "NE12878.bam.c4gh") | .fileID')"
# wait for the fail to get the correct status
RETRY_TIMES=0

Expand All @@ -150,9 +148,10 @@ until [ "$(psql -U postgres -h postgres -d sda -At -c "select id from sda.file_e
done

# Try to delete file not in inbox
fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/[email protected]/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NE12878.bam.c4gh") | .fileID')"
fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/[email protected]/files" | jq -r '.[] | select(.inboxPath == "NE12878.bam.c4gh") | .fileID')"
resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -X DELETE "http://api:8080/file/[email protected]/$fileid")"
if [ "$resp" != "404" ]; then
echo "Error when deleting the file, expected 404 got: $resp"
exit 1
fi
fi
echo "API admin tests completed successfully"
6 changes: 3 additions & 3 deletions .github/integration/tests/sda/92_handle_file_errors.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ bad_file_payload=$(
jq -r -c -n \
--arg type ingest \
--arg user [email protected] \
--arg filepath test_dummy.org/bad.file.c4gh \
--arg filepath bad.file.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
Expand Down Expand Up @@ -77,7 +77,7 @@ missing_file_payload=$(
jq -r -c -n \
--arg type ingest \
--arg user [email protected] \
--arg filepath test_dummy.org/missing.file.c4gh \
--arg filepath missing.file.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
Expand Down Expand Up @@ -125,7 +125,7 @@ truncated_file_payload=$(
jq -r -c -n \
--arg type ingest \
--arg user [email protected] \
--arg filepath test_dummy.org/truncated.c4gh \
--arg filepath truncated.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
Expand Down
2 changes: 2 additions & 0 deletions sda/cmd/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/neicnordic/sensitive-data-archive/internal/broker"
"github.com/neicnordic/sensitive-data-archive/internal/config"
"github.com/neicnordic/sensitive-data-archive/internal/database"
"github.com/neicnordic/sensitive-data-archive/internal/helper"
"github.com/neicnordic/sensitive-data-archive/internal/jsonadapter"
"github.com/neicnordic/sensitive-data-archive/internal/schema"
"github.com/neicnordic/sensitive-data-archive/internal/storage"
Expand Down Expand Up @@ -320,6 +321,7 @@ func deleteFile(c *gin.Context) {
return
}

filePath = helper.UnanonymizeFilepath(filePath, submissionUser)
var RetryTimes = 5
for count := 1; count <= RetryTimes; count++ {
err = inbox.RemoveFile(filePath)
Expand Down
5 changes: 3 additions & 2 deletions sda/cmd/ingest/ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/neicnordic/sensitive-data-archive/internal/broker"
"github.com/neicnordic/sensitive-data-archive/internal/config"
"github.com/neicnordic/sensitive-data-archive/internal/database"
"github.com/neicnordic/sensitive-data-archive/internal/helper"
"github.com/neicnordic/sensitive-data-archive/internal/schema"
"github.com/neicnordic/sensitive-data-archive/internal/storage"

Expand Down Expand Up @@ -259,7 +260,7 @@ func main() {
}
}

file, err := inbox.NewFileReader(message.FilePath)
file, err := inbox.NewFileReader(helper.UnanonymizeFilepath(message.FilePath, message.User))
if err != nil { //nolint:nestif
log.Errorf("Failed to open file to ingest reason: (%s)", err.Error())
if strings.Contains(err.Error(), "no such file or directory") || strings.Contains(err.Error(), "NoSuchKey:") {
Expand Down Expand Up @@ -292,7 +293,7 @@ func main() {
continue
}

fileSize, err := inbox.GetFileSize(message.FilePath)
fileSize, err := inbox.GetFileSize(helper.UnanonymizeFilepath(message.FilePath, message.User))
if err != nil {
log.Errorf("Failed to get file size of file to ingest, reason: (%s)", err.Error())
// Nack message so the server gets notified that something is wrong and requeue the message.
Expand Down
10 changes: 6 additions & 4 deletions sda/cmd/mapper/mapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/neicnordic/sensitive-data-archive/internal/broker"
"github.com/neicnordic/sensitive-data-archive/internal/config"
"github.com/neicnordic/sensitive-data-archive/internal/database"
"github.com/neicnordic/sensitive-data-archive/internal/helper"
"github.com/neicnordic/sensitive-data-archive/internal/schema"
"github.com/neicnordic/sensitive-data-archive/internal/storage"

Expand Down Expand Up @@ -103,13 +104,14 @@ func main() {

for _, aID := range mappings.AccessionIDs {
log.Debugf("Mapped file to dataset (corr-id: %s, datasetid: %s, accessionid: %s)", delivered.CorrelationId, mappings.DatasetID, aID)
aaperis marked this conversation as resolved.
Show resolved Hide resolved
filePath, err := db.GetInboxPath(aID)
fileInfo, err := db.GetFileInfoFromAccessionID(aID)
if err != nil {
log.Errorf("failed to get inbox path for file with stable ID: %s", aID)
log.Errorf("failed to get file info for file with stable ID: %s", aID)
}
err = inbox.RemoveFile(filePath)

err = inbox.RemoveFile(helper.UnanonymizeFilepath(fileInfo.FilePath, fileInfo.User))
if err != nil {
log.Errorf("Remove file from inbox failed, reason: %v", err)
log.Errorf("Remove file from inbox %s failed, reason: %v", fileInfo.FilePath, err)
}
}

Expand Down
Loading
Loading