diff --git a/.github/integration/tests/sda/01_install_dependencies.sh b/.github/integration/tests/sda/01_install_dependencies.sh
index 9020696c1..174d96c6d 100644
--- a/.github/integration/tests/sda/01_install_dependencies.sh
+++ b/.github/integration/tests/sda/01_install_dependencies.sh
@@ -13,3 +13,5 @@ for t in curl expect jq openssh-client postgresql-client xxd; do
apt-get -o DPkg::Lock::Timeout=60 install -y "$t" >/dev/null
fi
done
+
+pip -q install s3cmd
diff --git a/.github/integration/tests/sda/10_upload_test.sh b/.github/integration/tests/sda/10_upload_test.sh
index 640045454..a5b8419de 100644
--- a/.github/integration/tests/sda/10_upload_test.sh
+++ b/.github/integration/tests/sda/10_upload_test.sh
@@ -20,8 +20,6 @@ done
## truncate database
psql -U postgres -h postgres -d sda -At -c "TRUNCATE TABLE sda.files, sda.encryption_keys CASCADE;"
-pip -q install s3cmd
-
for file in NA12878.bam NA12878_20k_b37.bam NA12878.bai NA12878_20k_b37.bai; do
curl --retry 100 -s -L -o /shared/$file "https://github.com/ga4gh/htsget-refserver/raw/main/data/gcp/gatk-test-data/wgs_bam/$file"
if [ ! -f "$file.c4gh" ]; then
diff --git a/.github/integration/tests/sda/20_ingest-verify_test.sh b/.github/integration/tests/sda/20_ingest-verify_test.sh
index f9443cfb3..0c750f0cf 100644
--- a/.github/integration/tests/sda/20_ingest-verify_test.sh
+++ b/.github/integration/tests/sda/20_ingest-verify_test.sh
@@ -36,7 +36,7 @@ for file in NA12878.bam NA12878_20k_b37.bam NA12878.bai NA12878_20k_b37.bai; do
jq -r -c -n \
--arg type ingest \
--arg user test@dummy.org \
- --arg filepath test_dummy.org/"$file.c4gh" \
+ --arg filepath "$file.c4gh" \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
diff --git a/.github/integration/tests/sda/21_cancel_test.sh b/.github/integration/tests/sda/21_cancel_test.sh
index ab6fd64b2..c7482bdba 100644
--- a/.github/integration/tests/sda/21_cancel_test.sh
+++ b/.github/integration/tests/sda/21_cancel_test.sh
@@ -34,7 +34,7 @@ cancel_payload=$(
jq -r -c -n \
--arg type cancel \
--arg user test@dummy.org \
- --arg filepath test_dummy.org/NA12878_20k_b37.bam.c4gh \
+ --arg filepath NA12878_20k_b37.bam.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
@@ -65,7 +65,7 @@ ingest_payload=$(
jq -r -c -n \
--arg type ingest \
--arg user test@dummy.org \
- --arg filepath test_dummy.org/NA12878_20k_b37.bam.c4gh \
+ --arg filepath NA12878_20k_b37.bam.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
diff --git a/.github/integration/tests/sda/22_error_test.sh b/.github/integration/tests/sda/22_error_test.sh
index 461b0f5dc..ea79144e8 100644
--- a/.github/integration/tests/sda/22_error_test.sh
+++ b/.github/integration/tests/sda/22_error_test.sh
@@ -29,7 +29,7 @@ if [ -n "$PGSSLCERT" ]; then
fi
## get correlation id from message
-CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'test_dummy.org/NB12878.bam.c4gh';")
+CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'NB12878.bam.c4gh';")
properties=$(
jq -c -n \
@@ -50,7 +50,7 @@ ingest_payload=$(
jq -r -c -n \
--arg type ingest \
--arg user test@dummy.org \
- --arg filepath test_dummy.org/NB12878.bam.c4gh \
+ --arg filepath NB12878.bam.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
@@ -96,7 +96,7 @@ verify_payload=$(
--arg user test@dummy.com \
--arg archive_path "$CORRID" \
--arg file_id "$CORRID" \
- --arg filepath test_dummy.org/NB12878.bam.c4gh \
+ --arg filepath NB12878.bam.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
--argjson re_verify false \
'$ARGS.named|@base64'
diff --git a/.github/integration/tests/sda/30_backup-finalize_test.sh b/.github/integration/tests/sda/30_backup-finalize_test.sh
index e4a6d4f81..6ad782826 100644
--- a/.github/integration/tests/sda/30_backup-finalize_test.sh
+++ b/.github/integration/tests/sda/30_backup-finalize_test.sh
@@ -94,7 +94,7 @@ EOD
# check DB for archive file names
for file in NA12878.bam.c4gh NA12878.bai.c4gh NA12878_20k_b37.bam.c4gh NA12878_20k_b37.bai.c4gh; do
- archiveName=$(psql -U postgres -h postgres -d sda -At -c "SELECT archive_file_path from sda.files where submission_file_path = 'test_dummy.org/$file';")
+ archiveName=$(psql -U postgres -h postgres -d sda -At -c "SELECT archive_file_path from sda.files where submission_file_path = '$file';")
size=$(s3cmd -c direct ls s3://backup/"$archiveName" | tr -s ' ' | cut -d ' ' -f 3)
if [ "$size" -eq 0 ]; then
echo "Failed to get size of $file from backup site"
diff --git a/.github/integration/tests/sda/31_cancel_test2.sh b/.github/integration/tests/sda/31_cancel_test2.sh
index 74cce3f02..0095a0687 100644
--- a/.github/integration/tests/sda/31_cancel_test2.sh
+++ b/.github/integration/tests/sda/31_cancel_test2.sh
@@ -7,7 +7,7 @@ ENC_SHA=$(sha256sum NA12878.bam.c4gh | cut -d' ' -f 1)
ENC_MD5=$(md5sum NA12878.bam.c4gh | cut -d' ' -f 1)
## get correlation id from message
-CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'test_dummy.org/NA12878.bam.c4gh';")
+CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'NA12878.bam.c4gh';")
properties=$(
@@ -30,7 +30,7 @@ cancel_payload=$(
jq -r -c -n \
--arg type cancel \
--arg user test@dummy.org \
- --arg filepath test_dummy.org/NA12878.bam.c4gh \
+ --arg filepath NA12878.bam.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
@@ -67,7 +67,7 @@ ingest_payload=$(
jq -r -c -n \
--arg type ingest \
--arg user test@dummy.org \
- --arg filepath test_dummy.org/NA12878.bam.c4gh \
+ --arg filepath NA12878.bam.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
@@ -110,7 +110,7 @@ accession_payload=$(
jq -r -c -n \
--arg type accession \
--arg user test@dummy.org \
- --arg filepath test_dummy.org/NA12878.bam.c4gh \
+ --arg filepath NA12878.bam.c4gh \
--arg accession_id EGAF74900000001 \
--argjson decrypted_checksums "$decrypted_checksums" \
'$ARGS.named|@base64'
diff --git a/.github/integration/tests/sda/32_test_race_condition.sh b/.github/integration/tests/sda/32_test_race_condition.sh
index 58260e828..eb04ba8e6 100644
--- a/.github/integration/tests/sda/32_test_race_condition.sh
+++ b/.github/integration/tests/sda/32_test_race_condition.sh
@@ -14,7 +14,7 @@ ENC_SHA=$(sha256sum race_file.c4gh | cut -d' ' -f 1)
ENC_MD5=$(md5sum race_file.c4gh | cut -d' ' -f 1)
## get correlation id from message
-CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'test_dummy.org/race_file.c4gh';")
+CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'race_file.c4gh';")
properties=$(
jq -c -n \
@@ -36,7 +36,7 @@ accession_payload=$(
jq -r -c -n \
--arg type accession \
--arg user test@dummy.org \
- --arg filepath test_dummy.org/race_file.c4gh \
+ --arg filepath race_file.c4gh \
--arg accession_id EGAF74900000099 \
--argjson decrypted_checksums "$decrypted_checksums" \
'$ARGS.named|@base64'
@@ -74,7 +74,7 @@ ingest_payload=$(
jq -r -c -n \
--arg type ingest \
--arg user test@dummy.org \
- --arg filepath test_dummy.org/race_file.c4gh \
+ --arg filepath race_file.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
diff --git a/.github/integration/tests/sda/60_api_admin_test.sh b/.github/integration/tests/sda/60_api_admin_test.sh
index cecf5a86c..0b823bd3b 100644
--- a/.github/integration/tests/sda/60_api_admin_test.sh
+++ b/.github/integration/tests/sda/60_api_admin_test.sh
@@ -1,9 +1,9 @@
-#!/bin/sh
+#!/bin/bash
set -e
cd shared || true
token="$(curl http://oidc:8080/tokens | jq -r '.[0]')"
-# Upload a file and make sure it's listed
+## make sure previously uploaded files are listed
result="$(curl -sk -L "http://api:8080/users/test@dummy.org/files" -H "Authorization: Bearer $token" | jq '. | length')"
if [ "$result" -ne 2 ]; then
echo "wrong number of files returned for user test@dummy.org"
@@ -41,25 +41,27 @@ fi
# Reupload a file under a different name, test to delete it
s3cmd -c s3cfg put "NA12878.bam.c4gh" s3://test_dummy.org/NC12878.bam.c4gh
+stream_size=$(curl -s -u guest:guest http://rabbitmq:15672/api/queues/sda/inbox | jq '.messages_ready')
-echo "waiting for upload to complete"
URI=http://rabbitmq:15672
if [ -n "$PGSSLCERT" ]; then
URI=https://rabbitmq:15671
fi
+stream_size=$((stream_size + 1))
RETRY_TIMES=0
-until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -eq 4 ]; do
- echo "waiting for upload to complete"
- RETRY_TIMES=$((RETRY_TIMES + 1))
- if [ "$RETRY_TIMES" -eq 30 ]; then
- echo "::error::Time out while waiting for upload to complete"
- exit 1
- fi
- sleep 2
+
+until [ $((stream_size)) -eq "$(curl -s -u guest:guest http://rabbitmq:15672/api/queues/sda/inbox | jq '.messages_ready')" ]; do
+ echo "waiting for upload to complete"
+ RETRY_TIMES=$((RETRY_TIMES + 1))
+ if [ "$RETRY_TIMES" -eq 30 ]; then
+ echo "Upload did not complete successfully"
+ exit 1
+ fi
+ sleep 2
done
# get the fileId of the new file
-fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NC12878.bam.c4gh") | .fileID')"
+fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "NC12878.bam.c4gh") | .fileID')"
output=$(s3cmd -c s3cfg ls s3://test_dummy.org/NC12878.bam.c4gh 2>/dev/null)
if [ -z "$output" ] ; then
@@ -104,27 +106,23 @@ fi
# Re-upload the file and use the api to ingest it, then try to delete it
s3cmd -c s3cfg put NA12878.bam.c4gh s3://test_dummy.org/NE12878.bam.c4gh
-
-URI=http://rabbitmq:15672
-if [ -n "$PGSSLCERT" ]; then
- URI=https://rabbitmq:15671
-fi
+stream_size=$((stream_size + 1))
RETRY_TIMES=0
-until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -eq 6 ]; do
- echo "waiting for upload to complete"
- RETRY_TIMES=$((RETRY_TIMES + 1))
- if [ "$RETRY_TIMES" -eq 3 ]; then
- echo "::error::Time out while waiting for upload to complete"
- #exit 1
- break
- fi
- sleep 2
+
+until [ $((stream_size)) -eq "$(curl -s -u guest:guest $URI/api/queues/sda/inbox | jq '.messages_ready')" ]; do
+ echo "waiting for upload to complete"
+ RETRY_TIMES=$((RETRY_TIMES + 1))
+ if [ "$RETRY_TIMES" -eq 30 ]; then
+ echo "Upload did not complete successfully"
+ exit 1
+ fi
+ sleep 2
done
# Ingest it
new_payload=$(
jq -c -n \
- --arg filepath "test_dummy.org/NE12878.bam.c4gh" \
+ --arg filepath "NE12878.bam.c4gh" \
--arg user "test@dummy.org" \
'$ARGS.named'
)
@@ -135,7 +133,7 @@ if [ "$resp" != "200" ]; then
exit 1
fi
-fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NE12878.bam.c4gh") | .fileID')"
+fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "NE12878.bam.c4gh") | .fileID')"
# wait for the fail to get the correct status
RETRY_TIMES=0
@@ -150,9 +148,10 @@ until [ "$(psql -U postgres -h postgres -d sda -At -c "select id from sda.file_e
done
# Try to delete file not in inbox
-fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NE12878.bam.c4gh") | .fileID')"
+fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "NE12878.bam.c4gh") | .fileID')"
resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -X DELETE "http://api:8080/file/test@dummy.org/$fileid")"
if [ "$resp" != "404" ]; then
echo "Error when deleting the file, expected 404 got: $resp"
exit 1
-fi
\ No newline at end of file
+fi
+echo "API admin tests completed successfully"
\ No newline at end of file
diff --git a/.github/integration/tests/sda/92_handle_file_errors.sh b/.github/integration/tests/sda/92_handle_file_errors.sh
index f715ebc90..93d284325 100644
--- a/.github/integration/tests/sda/92_handle_file_errors.sh
+++ b/.github/integration/tests/sda/92_handle_file_errors.sh
@@ -45,7 +45,7 @@ bad_file_payload=$(
jq -r -c -n \
--arg type ingest \
--arg user test@dummy.org \
- --arg filepath test_dummy.org/bad.file.c4gh \
+ --arg filepath bad.file.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
@@ -77,7 +77,7 @@ missing_file_payload=$(
jq -r -c -n \
--arg type ingest \
--arg user test@dummy.org \
- --arg filepath test_dummy.org/missing.file.c4gh \
+ --arg filepath missing.file.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
@@ -125,7 +125,7 @@ truncated_file_payload=$(
jq -r -c -n \
--arg type ingest \
--arg user test@dummy.org \
- --arg filepath test_dummy.org/truncated.c4gh \
+ --arg filepath truncated.c4gh \
--argjson encrypted_checksums "$encrypted_checksums" \
'$ARGS.named|@base64'
)
diff --git a/sda/cmd/api/api.go b/sda/cmd/api/api.go
index 338b9cf00..deddbcabd 100644
--- a/sda/cmd/api/api.go
+++ b/sda/cmd/api/api.go
@@ -24,6 +24,7 @@ import (
"github.com/neicnordic/sensitive-data-archive/internal/broker"
"github.com/neicnordic/sensitive-data-archive/internal/config"
"github.com/neicnordic/sensitive-data-archive/internal/database"
+ "github.com/neicnordic/sensitive-data-archive/internal/helper"
"github.com/neicnordic/sensitive-data-archive/internal/jsonadapter"
"github.com/neicnordic/sensitive-data-archive/internal/schema"
"github.com/neicnordic/sensitive-data-archive/internal/storage"
@@ -320,6 +321,7 @@ func deleteFile(c *gin.Context) {
return
}
+ filePath = helper.UnanonymizeFilepath(filePath, submissionUser)
var RetryTimes = 5
for count := 1; count <= RetryTimes; count++ {
err = inbox.RemoveFile(filePath)
diff --git a/sda/cmd/ingest/ingest.go b/sda/cmd/ingest/ingest.go
index 5c62310e5..026d43425 100644
--- a/sda/cmd/ingest/ingest.go
+++ b/sda/cmd/ingest/ingest.go
@@ -21,6 +21,7 @@ import (
"github.com/neicnordic/sensitive-data-archive/internal/broker"
"github.com/neicnordic/sensitive-data-archive/internal/config"
"github.com/neicnordic/sensitive-data-archive/internal/database"
+ "github.com/neicnordic/sensitive-data-archive/internal/helper"
"github.com/neicnordic/sensitive-data-archive/internal/schema"
"github.com/neicnordic/sensitive-data-archive/internal/storage"
@@ -259,7 +260,7 @@ func main() {
}
}
- file, err := inbox.NewFileReader(message.FilePath)
+ file, err := inbox.NewFileReader(helper.UnanonymizeFilepath(message.FilePath, message.User))
if err != nil { //nolint:nestif
log.Errorf("Failed to open file to ingest reason: (%s)", err.Error())
if strings.Contains(err.Error(), "no such file or directory") || strings.Contains(err.Error(), "NoSuchKey:") {
@@ -292,7 +293,7 @@ func main() {
continue
}
- fileSize, err := inbox.GetFileSize(message.FilePath)
+ fileSize, err := inbox.GetFileSize(helper.UnanonymizeFilepath(message.FilePath, message.User))
if err != nil {
log.Errorf("Failed to get file size of file to ingest, reason: (%s)", err.Error())
// Nack message so the server gets notified that something is wrong and requeue the message.
diff --git a/sda/cmd/mapper/mapper.go b/sda/cmd/mapper/mapper.go
index a7455342f..631329caa 100644
--- a/sda/cmd/mapper/mapper.go
+++ b/sda/cmd/mapper/mapper.go
@@ -9,6 +9,7 @@ import (
"github.com/neicnordic/sensitive-data-archive/internal/broker"
"github.com/neicnordic/sensitive-data-archive/internal/config"
"github.com/neicnordic/sensitive-data-archive/internal/database"
+ "github.com/neicnordic/sensitive-data-archive/internal/helper"
"github.com/neicnordic/sensitive-data-archive/internal/schema"
"github.com/neicnordic/sensitive-data-archive/internal/storage"
@@ -103,13 +104,14 @@ func main() {
for _, aID := range mappings.AccessionIDs {
log.Debugf("Mapped file to dataset (corr-id: %s, datasetid: %s, accessionid: %s)", delivered.CorrelationId, mappings.DatasetID, aID)
- filePath, err := db.GetInboxPath(aID)
+ fileInfo, err := db.GetFileInfoFromAccessionID(aID)
if err != nil {
- log.Errorf("failed to get inbox path for file with stable ID: %s", aID)
+ log.Errorf("failed to get file info for file with stable ID: %s", aID)
}
- err = inbox.RemoveFile(filePath)
+
+ err = inbox.RemoveFile(helper.UnanonymizeFilepath(fileInfo.FilePath, fileInfo.User))
if err != nil {
- log.Errorf("Remove file from inbox failed, reason: %v", err)
+ log.Errorf("Remove file from inbox %s failed, reason: %v", fileInfo.FilePath, err)
}
}
diff --git a/sda/cmd/s3inbox/proxy.go b/sda/cmd/s3inbox/proxy.go
index 7bc63cf11..6459c226e 100644
--- a/sda/cmd/s3inbox/proxy.go
+++ b/sda/cmd/s3inbox/proxy.go
@@ -21,6 +21,7 @@ import (
"github.com/minio/minio-go/v6/pkg/signer"
"github.com/neicnordic/sensitive-data-archive/internal/broker"
"github.com/neicnordic/sensitive-data-archive/internal/database"
+ "github.com/neicnordic/sensitive-data-archive/internal/helper"
"github.com/neicnordic/sensitive-data-archive/internal/storage"
"github.com/neicnordic/sensitive-data-archive/internal/userauth"
log "github.com/sirupsen/logrus"
@@ -148,8 +149,9 @@ func (p *Proxy) allowedResponse(w http.ResponseWriter, r *http.Request, token jw
username := token.Subject()
rawFilepath := strings.Replace(r.URL.Path, "/"+p.s3.Bucket+"/", "", 1)
+ anonymizedFilepath := helper.AnonymizeFilepath(rawFilepath, username)
- filepath, err := formatUploadFilePath(rawFilepath)
+ filepath, err := formatUploadFilePath(anonymizedFilepath)
if err != nil {
reportError(http.StatusNotAcceptable, err.Error(), w)
@@ -179,7 +181,7 @@ func (p *Proxy) allowedResponse(w http.ResponseWriter, r *http.Request, token jw
// Send message to upstream and set file as uploaded in the database
if p.uploadFinishedSuccessfully(r, s3response) {
log.Debug("create message")
- message, err := p.CreateMessageFromRequest(r, token)
+ message, err := p.CreateMessageFromRequest(r, token, anonymizedFilepath)
if err != nil {
p.internalServerError(w, r, err.Error())
@@ -262,6 +264,7 @@ func (p *Proxy) checkAndSendMessage(jsonMessage []byte, r *http.Request) error {
}
}
+ log.Debugf("Sending message with id %s", p.fileIds[r.URL.Path])
if err := p.messenger.SendMessage(p.fileIds[r.URL.Path], p.messenger.Conf.Exchange, p.messenger.Conf.RoutingKey, jsonMessage); err != nil {
return fmt.Errorf("error when sending message to broker: %v", err)
}
@@ -449,7 +452,7 @@ func (p *Proxy) detectRequestType(r *http.Request) S3RequestType {
// CreateMessageFromRequest is a function that can take a http request and
// figure out the correct rabbitmq message to send from it.
-func (p *Proxy) CreateMessageFromRequest(r *http.Request, claims jwt.Token) (Event, error) {
+func (p *Proxy) CreateMessageFromRequest(r *http.Request, claims jwt.Token, user string) (Event, error) {
event := Event{}
checksum := Checksum{}
var err error
@@ -461,7 +464,9 @@ func (p *Proxy) CreateMessageFromRequest(r *http.Request, claims jwt.Token) (Eve
// Case for simple upload
event.Operation = "upload"
- event.Filepath = strings.Replace(r.URL.Path, "/"+p.s3.Bucket+"/", "", 1)
+ rawFilepath := strings.Replace(r.URL.Path, "/"+p.s3.Bucket+"/", "", 1)
+ event.Filepath = helper.AnonymizeFilepath(rawFilepath, user)
+
event.Username = claims.Subject()
checksum.Type = "sha256"
event.Checksum = []interface{}{checksum}
diff --git a/sda/cmd/s3inbox/proxy_test.go b/sda/cmd/s3inbox/proxy_test.go
index 08d44de61..7c8fd696c 100644
--- a/sda/cmd/s3inbox/proxy_test.go
+++ b/sda/cmd/s3inbox/proxy_test.go
@@ -416,12 +416,13 @@ func (suite *ProxyTests) TestMessageFormatting() {
r.Header.Set("x-amz-content-sha256", "checksum")
claims := jwt.New()
- assert.NoError(suite.T(), claims.Set("sub", "user@host.domain"))
+ user := "user@host.domain"
+ assert.NoError(suite.T(), claims.Set("sub", user))
// start proxy that denies everything
proxy := NewProxy(suite.S3conf, &helper.AlwaysDeny{}, suite.messenger, suite.database, new(tls.Config))
suite.fakeServer.resp = "test/user/new_file.txt12false/user/new_file.txt2020-03-10T13:20:15.000Z"0a44282bd39178db9680f24813c41aec-1"1234STANDARD"
- msg, err := proxy.CreateMessageFromRequest(r, claims)
+ msg, err := proxy.CreateMessageFromRequest(r, claims, user)
assert.Nil(suite.T(), err)
assert.IsType(suite.T(), Event{}, msg)
@@ -437,7 +438,7 @@ func (suite *ProxyTests) TestMessageFormatting() {
// Test single shot upload
r.Method = "PUT"
- msg, err = proxy.CreateMessageFromRequest(r, jwt.New())
+ msg, err = proxy.CreateMessageFromRequest(r, jwt.New(), user)
assert.Nil(suite.T(), err)
assert.IsType(suite.T(), Event{}, msg)
assert.Equal(suite.T(), "upload", msg.Operation)
@@ -455,6 +456,7 @@ func (suite *ProxyTests) TestDatabaseConnection() {
// PUT a file into the system
filename := "/dummy/db-test-file"
+ anonymFilename := "db-test-file"
r, _ := http.NewRequest("PUT", filename, nil)
w := httptest.NewRecorder()
suite.fakeServer.resp = "test/elixirid/db-test-file.txt12false/elixirid/file.txt2020-03-10T13:20:15.000Z"0a44282bd39178db9680f24813c41aec-1"5STANDARD"
@@ -472,7 +474,7 @@ func (suite *ProxyTests) TestDatabaseConnection() {
// Check that the file is in the database
var fileID string
query := "SELECT id FROM sda.files WHERE submission_file_path = $1;"
- err = db.QueryRow(query, filename[1:]).Scan(&fileID)
+ err = db.QueryRow(query, anonymFilename).Scan(&fileID)
assert.Nil(suite.T(), err, "Failed to query database")
assert.NotNil(suite.T(), fileID, "File not found in database")
diff --git a/sda/internal/database/db_functions.go b/sda/internal/database/db_functions.go
index ea7a39344..a4064a2ae 100644
--- a/sda/internal/database/db_functions.go
+++ b/sda/internal/database/db_functions.go
@@ -553,6 +553,39 @@ func (dbs *SDAdb) GetHeaderForStableID(stableID string) ([]byte, error) {
return header, nil
}
+// GetFileInfoFromAccessionID retrieves the file information needed for mapping
+func (dbs *SDAdb) GetFileInfoFromAccessionID(accessionID string) (SyncData, error) {
+ var (
+ s SyncData
+ err error
+ )
+
+ for count := 1; count <= RetryTimes; count++ {
+ s, err = dbs.getFileInfoFromAccessionID(accessionID)
+ if err == nil {
+ break
+ }
+ time.Sleep(time.Duration(math.Pow(3, float64(count))) * time.Second)
+ }
+
+ return s, err
+}
+
+// getFileInfoFromAccessionID is the actual function performing work for GetFileInfoFromAccessionID
+func (dbs *SDAdb) getFileInfoFromAccessionID(accessionID string) (SyncData, error) {
+ dbs.checkAndReconnectIfNeeded()
+
+ const query = "SELECT submission_user, submission_file_path from sda.files WHERE stable_id = $1;"
+ var data SyncData
+ if err := dbs.DB.QueryRow(query, accessionID).Scan(&data.User, &data.FilePath); err != nil {
+ log.Warnf("Error while searching for id %s: %v", accessionID, err)
+
+ return SyncData{}, err
+ }
+
+ return data, nil
+}
+
// GetSyncData retrieves the file information needed to sync a dataset
func (dbs *SDAdb) GetSyncData(accessionID string) (SyncData, error) {
var (
diff --git a/sda/internal/helper/helper.go b/sda/internal/helper/helper.go
index 426815a73..2489d66f1 100644
--- a/sda/internal/helper/helper.go
+++ b/sda/internal/helper/helper.go
@@ -15,6 +15,7 @@ import (
"net/http"
"os"
"path/filepath"
+ "strings"
"time"
"github.com/lestrrat-go/jwx/v2/jwa"
@@ -421,3 +422,11 @@ func TLScertToFile(filename string, derBytes []byte) error {
return err
}
+
+func AnonymizeFilepath(filepath string, username string) string {
+ return strings.ReplaceAll(filepath, strings.Replace(username, "@", "_", 1)+"/", "")
+}
+
+func UnanonymizeFilepath(filepath string, username string) string {
+ return strings.Replace(username, "@", "_", 1) + "/" + filepath
+}