diff --git a/.github/integration/tests/sda/01_install_dependencies.sh b/.github/integration/tests/sda/01_install_dependencies.sh index 9020696c1..174d96c6d 100644 --- a/.github/integration/tests/sda/01_install_dependencies.sh +++ b/.github/integration/tests/sda/01_install_dependencies.sh @@ -13,3 +13,5 @@ for t in curl expect jq openssh-client postgresql-client xxd; do apt-get -o DPkg::Lock::Timeout=60 install -y "$t" >/dev/null fi done + +pip -q install s3cmd diff --git a/.github/integration/tests/sda/10_upload_test.sh b/.github/integration/tests/sda/10_upload_test.sh index 640045454..a5b8419de 100644 --- a/.github/integration/tests/sda/10_upload_test.sh +++ b/.github/integration/tests/sda/10_upload_test.sh @@ -20,8 +20,6 @@ done ## truncate database psql -U postgres -h postgres -d sda -At -c "TRUNCATE TABLE sda.files, sda.encryption_keys CASCADE;" -pip -q install s3cmd - for file in NA12878.bam NA12878_20k_b37.bam NA12878.bai NA12878_20k_b37.bai; do curl --retry 100 -s -L -o /shared/$file "https://github.com/ga4gh/htsget-refserver/raw/main/data/gcp/gatk-test-data/wgs_bam/$file" if [ ! -f "$file.c4gh" ]; then diff --git a/.github/integration/tests/sda/20_ingest-verify_test.sh b/.github/integration/tests/sda/20_ingest-verify_test.sh index f9443cfb3..0c750f0cf 100644 --- a/.github/integration/tests/sda/20_ingest-verify_test.sh +++ b/.github/integration/tests/sda/20_ingest-verify_test.sh @@ -36,7 +36,7 @@ for file in NA12878.bam NA12878_20k_b37.bam NA12878.bai NA12878_20k_b37.bai; do jq -r -c -n \ --arg type ingest \ --arg user test@dummy.org \ - --arg filepath test_dummy.org/"$file.c4gh" \ + --arg filepath "$file.c4gh" \ --argjson encrypted_checksums "$encrypted_checksums" \ '$ARGS.named|@base64' ) diff --git a/.github/integration/tests/sda/21_cancel_test.sh b/.github/integration/tests/sda/21_cancel_test.sh index ab6fd64b2..c7482bdba 100644 --- a/.github/integration/tests/sda/21_cancel_test.sh +++ b/.github/integration/tests/sda/21_cancel_test.sh @@ -34,7 +34,7 @@ cancel_payload=$( jq -r -c -n \ --arg type cancel \ --arg user test@dummy.org \ - --arg filepath test_dummy.org/NA12878_20k_b37.bam.c4gh \ + --arg filepath NA12878_20k_b37.bam.c4gh \ --argjson encrypted_checksums "$encrypted_checksums" \ '$ARGS.named|@base64' ) @@ -65,7 +65,7 @@ ingest_payload=$( jq -r -c -n \ --arg type ingest \ --arg user test@dummy.org \ - --arg filepath test_dummy.org/NA12878_20k_b37.bam.c4gh \ + --arg filepath NA12878_20k_b37.bam.c4gh \ --argjson encrypted_checksums "$encrypted_checksums" \ '$ARGS.named|@base64' ) diff --git a/.github/integration/tests/sda/22_error_test.sh b/.github/integration/tests/sda/22_error_test.sh index 461b0f5dc..ea79144e8 100644 --- a/.github/integration/tests/sda/22_error_test.sh +++ b/.github/integration/tests/sda/22_error_test.sh @@ -29,7 +29,7 @@ if [ -n "$PGSSLCERT" ]; then fi ## get correlation id from message -CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'test_dummy.org/NB12878.bam.c4gh';") +CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'NB12878.bam.c4gh';") properties=$( jq -c -n \ @@ -50,7 +50,7 @@ ingest_payload=$( jq -r -c -n \ --arg type ingest \ --arg user test@dummy.org \ - --arg filepath test_dummy.org/NB12878.bam.c4gh \ + --arg filepath NB12878.bam.c4gh \ --argjson encrypted_checksums "$encrypted_checksums" \ '$ARGS.named|@base64' ) @@ -96,7 +96,7 @@ verify_payload=$( --arg user test@dummy.com \ --arg archive_path "$CORRID" \ --arg file_id "$CORRID" \ - --arg filepath test_dummy.org/NB12878.bam.c4gh \ + --arg filepath NB12878.bam.c4gh \ --argjson encrypted_checksums "$encrypted_checksums" \ --argjson re_verify false \ '$ARGS.named|@base64' diff --git a/.github/integration/tests/sda/30_backup-finalize_test.sh b/.github/integration/tests/sda/30_backup-finalize_test.sh index e4a6d4f81..6ad782826 100644 --- a/.github/integration/tests/sda/30_backup-finalize_test.sh +++ b/.github/integration/tests/sda/30_backup-finalize_test.sh @@ -94,7 +94,7 @@ EOD # check DB for archive file names for file in NA12878.bam.c4gh NA12878.bai.c4gh NA12878_20k_b37.bam.c4gh NA12878_20k_b37.bai.c4gh; do - archiveName=$(psql -U postgres -h postgres -d sda -At -c "SELECT archive_file_path from sda.files where submission_file_path = 'test_dummy.org/$file';") + archiveName=$(psql -U postgres -h postgres -d sda -At -c "SELECT archive_file_path from sda.files where submission_file_path = '$file';") size=$(s3cmd -c direct ls s3://backup/"$archiveName" | tr -s ' ' | cut -d ' ' -f 3) if [ "$size" -eq 0 ]; then echo "Failed to get size of $file from backup site" diff --git a/.github/integration/tests/sda/31_cancel_test2.sh b/.github/integration/tests/sda/31_cancel_test2.sh index 74cce3f02..0095a0687 100644 --- a/.github/integration/tests/sda/31_cancel_test2.sh +++ b/.github/integration/tests/sda/31_cancel_test2.sh @@ -7,7 +7,7 @@ ENC_SHA=$(sha256sum NA12878.bam.c4gh | cut -d' ' -f 1) ENC_MD5=$(md5sum NA12878.bam.c4gh | cut -d' ' -f 1) ## get correlation id from message -CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'test_dummy.org/NA12878.bam.c4gh';") +CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'NA12878.bam.c4gh';") properties=$( @@ -30,7 +30,7 @@ cancel_payload=$( jq -r -c -n \ --arg type cancel \ --arg user test@dummy.org \ - --arg filepath test_dummy.org/NA12878.bam.c4gh \ + --arg filepath NA12878.bam.c4gh \ --argjson encrypted_checksums "$encrypted_checksums" \ '$ARGS.named|@base64' ) @@ -67,7 +67,7 @@ ingest_payload=$( jq -r -c -n \ --arg type ingest \ --arg user test@dummy.org \ - --arg filepath test_dummy.org/NA12878.bam.c4gh \ + --arg filepath NA12878.bam.c4gh \ --argjson encrypted_checksums "$encrypted_checksums" \ '$ARGS.named|@base64' ) @@ -110,7 +110,7 @@ accession_payload=$( jq -r -c -n \ --arg type accession \ --arg user test@dummy.org \ - --arg filepath test_dummy.org/NA12878.bam.c4gh \ + --arg filepath NA12878.bam.c4gh \ --arg accession_id EGAF74900000001 \ --argjson decrypted_checksums "$decrypted_checksums" \ '$ARGS.named|@base64' diff --git a/.github/integration/tests/sda/32_test_race_condition.sh b/.github/integration/tests/sda/32_test_race_condition.sh index 58260e828..eb04ba8e6 100644 --- a/.github/integration/tests/sda/32_test_race_condition.sh +++ b/.github/integration/tests/sda/32_test_race_condition.sh @@ -14,7 +14,7 @@ ENC_SHA=$(sha256sum race_file.c4gh | cut -d' ' -f 1) ENC_MD5=$(md5sum race_file.c4gh | cut -d' ' -f 1) ## get correlation id from message -CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'test_dummy.org/race_file.c4gh';") +CORRID=$(psql -U postgres -h postgres -d sda -At -c "select id from sda.files where submission_file_path = 'race_file.c4gh';") properties=$( jq -c -n \ @@ -36,7 +36,7 @@ accession_payload=$( jq -r -c -n \ --arg type accession \ --arg user test@dummy.org \ - --arg filepath test_dummy.org/race_file.c4gh \ + --arg filepath race_file.c4gh \ --arg accession_id EGAF74900000099 \ --argjson decrypted_checksums "$decrypted_checksums" \ '$ARGS.named|@base64' @@ -74,7 +74,7 @@ ingest_payload=$( jq -r -c -n \ --arg type ingest \ --arg user test@dummy.org \ - --arg filepath test_dummy.org/race_file.c4gh \ + --arg filepath race_file.c4gh \ --argjson encrypted_checksums "$encrypted_checksums" \ '$ARGS.named|@base64' ) diff --git a/.github/integration/tests/sda/60_api_admin_test.sh b/.github/integration/tests/sda/60_api_admin_test.sh index cecf5a86c..0b823bd3b 100644 --- a/.github/integration/tests/sda/60_api_admin_test.sh +++ b/.github/integration/tests/sda/60_api_admin_test.sh @@ -1,9 +1,9 @@ -#!/bin/sh +#!/bin/bash set -e cd shared || true token="$(curl http://oidc:8080/tokens | jq -r '.[0]')" -# Upload a file and make sure it's listed +## make sure previously uploaded files are listed result="$(curl -sk -L "http://api:8080/users/test@dummy.org/files" -H "Authorization: Bearer $token" | jq '. | length')" if [ "$result" -ne 2 ]; then echo "wrong number of files returned for user test@dummy.org" @@ -41,25 +41,27 @@ fi # Reupload a file under a different name, test to delete it s3cmd -c s3cfg put "NA12878.bam.c4gh" s3://test_dummy.org/NC12878.bam.c4gh +stream_size=$(curl -s -u guest:guest http://rabbitmq:15672/api/queues/sda/inbox | jq '.messages_ready') -echo "waiting for upload to complete" URI=http://rabbitmq:15672 if [ -n "$PGSSLCERT" ]; then URI=https://rabbitmq:15671 fi +stream_size=$((stream_size + 1)) RETRY_TIMES=0 -until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -eq 4 ]; do - echo "waiting for upload to complete" - RETRY_TIMES=$((RETRY_TIMES + 1)) - if [ "$RETRY_TIMES" -eq 30 ]; then - echo "::error::Time out while waiting for upload to complete" - exit 1 - fi - sleep 2 + +until [ $((stream_size)) -eq "$(curl -s -u guest:guest http://rabbitmq:15672/api/queues/sda/inbox | jq '.messages_ready')" ]; do + echo "waiting for upload to complete" + RETRY_TIMES=$((RETRY_TIMES + 1)) + if [ "$RETRY_TIMES" -eq 30 ]; then + echo "Upload did not complete successfully" + exit 1 + fi + sleep 2 done # get the fileId of the new file -fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NC12878.bam.c4gh") | .fileID')" +fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "NC12878.bam.c4gh") | .fileID')" output=$(s3cmd -c s3cfg ls s3://test_dummy.org/NC12878.bam.c4gh 2>/dev/null) if [ -z "$output" ] ; then @@ -104,27 +106,23 @@ fi # Re-upload the file and use the api to ingest it, then try to delete it s3cmd -c s3cfg put NA12878.bam.c4gh s3://test_dummy.org/NE12878.bam.c4gh - -URI=http://rabbitmq:15672 -if [ -n "$PGSSLCERT" ]; then - URI=https://rabbitmq:15671 -fi +stream_size=$((stream_size + 1)) RETRY_TIMES=0 -until [ "$(curl -s -k -u guest:guest $URI/api/queues/sda/inbox | jq -r '."messages_ready"')" -eq 6 ]; do - echo "waiting for upload to complete" - RETRY_TIMES=$((RETRY_TIMES + 1)) - if [ "$RETRY_TIMES" -eq 3 ]; then - echo "::error::Time out while waiting for upload to complete" - #exit 1 - break - fi - sleep 2 + +until [ $((stream_size)) -eq "$(curl -s -u guest:guest $URI/api/queues/sda/inbox | jq '.messages_ready')" ]; do + echo "waiting for upload to complete" + RETRY_TIMES=$((RETRY_TIMES + 1)) + if [ "$RETRY_TIMES" -eq 30 ]; then + echo "Upload did not complete successfully" + exit 1 + fi + sleep 2 done # Ingest it new_payload=$( jq -c -n \ - --arg filepath "test_dummy.org/NE12878.bam.c4gh" \ + --arg filepath "NE12878.bam.c4gh" \ --arg user "test@dummy.org" \ '$ARGS.named' ) @@ -135,7 +133,7 @@ if [ "$resp" != "200" ]; then exit 1 fi -fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NE12878.bam.c4gh") | .fileID')" +fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "NE12878.bam.c4gh") | .fileID')" # wait for the fail to get the correct status RETRY_TIMES=0 @@ -150,9 +148,10 @@ until [ "$(psql -U postgres -h postgres -d sda -At -c "select id from sda.file_e done # Try to delete file not in inbox -fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "test_dummy.org/NE12878.bam.c4gh") | .fileID')" +fileid="$(curl -k -L -H "Authorization: Bearer $token" "http://api:8080/users/test@dummy.org/files" | jq -r '.[] | select(.inboxPath == "NE12878.bam.c4gh") | .fileID')" resp="$(curl -s -k -L -o /dev/null -w "%{http_code}\n" -H "Authorization: Bearer $token" -X DELETE "http://api:8080/file/test@dummy.org/$fileid")" if [ "$resp" != "404" ]; then echo "Error when deleting the file, expected 404 got: $resp" exit 1 -fi \ No newline at end of file +fi +echo "API admin tests completed successfully" \ No newline at end of file diff --git a/.github/integration/tests/sda/92_handle_file_errors.sh b/.github/integration/tests/sda/92_handle_file_errors.sh index f715ebc90..93d284325 100644 --- a/.github/integration/tests/sda/92_handle_file_errors.sh +++ b/.github/integration/tests/sda/92_handle_file_errors.sh @@ -45,7 +45,7 @@ bad_file_payload=$( jq -r -c -n \ --arg type ingest \ --arg user test@dummy.org \ - --arg filepath test_dummy.org/bad.file.c4gh \ + --arg filepath bad.file.c4gh \ --argjson encrypted_checksums "$encrypted_checksums" \ '$ARGS.named|@base64' ) @@ -77,7 +77,7 @@ missing_file_payload=$( jq -r -c -n \ --arg type ingest \ --arg user test@dummy.org \ - --arg filepath test_dummy.org/missing.file.c4gh \ + --arg filepath missing.file.c4gh \ --argjson encrypted_checksums "$encrypted_checksums" \ '$ARGS.named|@base64' ) @@ -125,7 +125,7 @@ truncated_file_payload=$( jq -r -c -n \ --arg type ingest \ --arg user test@dummy.org \ - --arg filepath test_dummy.org/truncated.c4gh \ + --arg filepath truncated.c4gh \ --argjson encrypted_checksums "$encrypted_checksums" \ '$ARGS.named|@base64' ) diff --git a/sda/cmd/api/api.go b/sda/cmd/api/api.go index 338b9cf00..deddbcabd 100644 --- a/sda/cmd/api/api.go +++ b/sda/cmd/api/api.go @@ -24,6 +24,7 @@ import ( "github.com/neicnordic/sensitive-data-archive/internal/broker" "github.com/neicnordic/sensitive-data-archive/internal/config" "github.com/neicnordic/sensitive-data-archive/internal/database" + "github.com/neicnordic/sensitive-data-archive/internal/helper" "github.com/neicnordic/sensitive-data-archive/internal/jsonadapter" "github.com/neicnordic/sensitive-data-archive/internal/schema" "github.com/neicnordic/sensitive-data-archive/internal/storage" @@ -320,6 +321,7 @@ func deleteFile(c *gin.Context) { return } + filePath = helper.UnanonymizeFilepath(filePath, submissionUser) var RetryTimes = 5 for count := 1; count <= RetryTimes; count++ { err = inbox.RemoveFile(filePath) diff --git a/sda/cmd/ingest/ingest.go b/sda/cmd/ingest/ingest.go index 5c62310e5..026d43425 100644 --- a/sda/cmd/ingest/ingest.go +++ b/sda/cmd/ingest/ingest.go @@ -21,6 +21,7 @@ import ( "github.com/neicnordic/sensitive-data-archive/internal/broker" "github.com/neicnordic/sensitive-data-archive/internal/config" "github.com/neicnordic/sensitive-data-archive/internal/database" + "github.com/neicnordic/sensitive-data-archive/internal/helper" "github.com/neicnordic/sensitive-data-archive/internal/schema" "github.com/neicnordic/sensitive-data-archive/internal/storage" @@ -259,7 +260,7 @@ func main() { } } - file, err := inbox.NewFileReader(message.FilePath) + file, err := inbox.NewFileReader(helper.UnanonymizeFilepath(message.FilePath, message.User)) if err != nil { //nolint:nestif log.Errorf("Failed to open file to ingest reason: (%s)", err.Error()) if strings.Contains(err.Error(), "no such file or directory") || strings.Contains(err.Error(), "NoSuchKey:") { @@ -292,7 +293,7 @@ func main() { continue } - fileSize, err := inbox.GetFileSize(message.FilePath) + fileSize, err := inbox.GetFileSize(helper.UnanonymizeFilepath(message.FilePath, message.User)) if err != nil { log.Errorf("Failed to get file size of file to ingest, reason: (%s)", err.Error()) // Nack message so the server gets notified that something is wrong and requeue the message. diff --git a/sda/cmd/mapper/mapper.go b/sda/cmd/mapper/mapper.go index a7455342f..631329caa 100644 --- a/sda/cmd/mapper/mapper.go +++ b/sda/cmd/mapper/mapper.go @@ -9,6 +9,7 @@ import ( "github.com/neicnordic/sensitive-data-archive/internal/broker" "github.com/neicnordic/sensitive-data-archive/internal/config" "github.com/neicnordic/sensitive-data-archive/internal/database" + "github.com/neicnordic/sensitive-data-archive/internal/helper" "github.com/neicnordic/sensitive-data-archive/internal/schema" "github.com/neicnordic/sensitive-data-archive/internal/storage" @@ -103,13 +104,14 @@ func main() { for _, aID := range mappings.AccessionIDs { log.Debugf("Mapped file to dataset (corr-id: %s, datasetid: %s, accessionid: %s)", delivered.CorrelationId, mappings.DatasetID, aID) - filePath, err := db.GetInboxPath(aID) + fileInfo, err := db.GetFileInfoFromAccessionID(aID) if err != nil { - log.Errorf("failed to get inbox path for file with stable ID: %s", aID) + log.Errorf("failed to get file info for file with stable ID: %s", aID) } - err = inbox.RemoveFile(filePath) + + err = inbox.RemoveFile(helper.UnanonymizeFilepath(fileInfo.FilePath, fileInfo.User)) if err != nil { - log.Errorf("Remove file from inbox failed, reason: %v", err) + log.Errorf("Remove file from inbox %s failed, reason: %v", fileInfo.FilePath, err) } } diff --git a/sda/cmd/s3inbox/proxy.go b/sda/cmd/s3inbox/proxy.go index 7bc63cf11..6459c226e 100644 --- a/sda/cmd/s3inbox/proxy.go +++ b/sda/cmd/s3inbox/proxy.go @@ -21,6 +21,7 @@ import ( "github.com/minio/minio-go/v6/pkg/signer" "github.com/neicnordic/sensitive-data-archive/internal/broker" "github.com/neicnordic/sensitive-data-archive/internal/database" + "github.com/neicnordic/sensitive-data-archive/internal/helper" "github.com/neicnordic/sensitive-data-archive/internal/storage" "github.com/neicnordic/sensitive-data-archive/internal/userauth" log "github.com/sirupsen/logrus" @@ -148,8 +149,9 @@ func (p *Proxy) allowedResponse(w http.ResponseWriter, r *http.Request, token jw username := token.Subject() rawFilepath := strings.Replace(r.URL.Path, "/"+p.s3.Bucket+"/", "", 1) + anonymizedFilepath := helper.AnonymizeFilepath(rawFilepath, username) - filepath, err := formatUploadFilePath(rawFilepath) + filepath, err := formatUploadFilePath(anonymizedFilepath) if err != nil { reportError(http.StatusNotAcceptable, err.Error(), w) @@ -179,7 +181,7 @@ func (p *Proxy) allowedResponse(w http.ResponseWriter, r *http.Request, token jw // Send message to upstream and set file as uploaded in the database if p.uploadFinishedSuccessfully(r, s3response) { log.Debug("create message") - message, err := p.CreateMessageFromRequest(r, token) + message, err := p.CreateMessageFromRequest(r, token, anonymizedFilepath) if err != nil { p.internalServerError(w, r, err.Error()) @@ -262,6 +264,7 @@ func (p *Proxy) checkAndSendMessage(jsonMessage []byte, r *http.Request) error { } } + log.Debugf("Sending message with id %s", p.fileIds[r.URL.Path]) if err := p.messenger.SendMessage(p.fileIds[r.URL.Path], p.messenger.Conf.Exchange, p.messenger.Conf.RoutingKey, jsonMessage); err != nil { return fmt.Errorf("error when sending message to broker: %v", err) } @@ -449,7 +452,7 @@ func (p *Proxy) detectRequestType(r *http.Request) S3RequestType { // CreateMessageFromRequest is a function that can take a http request and // figure out the correct rabbitmq message to send from it. -func (p *Proxy) CreateMessageFromRequest(r *http.Request, claims jwt.Token) (Event, error) { +func (p *Proxy) CreateMessageFromRequest(r *http.Request, claims jwt.Token, user string) (Event, error) { event := Event{} checksum := Checksum{} var err error @@ -461,7 +464,9 @@ func (p *Proxy) CreateMessageFromRequest(r *http.Request, claims jwt.Token) (Eve // Case for simple upload event.Operation = "upload" - event.Filepath = strings.Replace(r.URL.Path, "/"+p.s3.Bucket+"/", "", 1) + rawFilepath := strings.Replace(r.URL.Path, "/"+p.s3.Bucket+"/", "", 1) + event.Filepath = helper.AnonymizeFilepath(rawFilepath, user) + event.Username = claims.Subject() checksum.Type = "sha256" event.Checksum = []interface{}{checksum} diff --git a/sda/cmd/s3inbox/proxy_test.go b/sda/cmd/s3inbox/proxy_test.go index 08d44de61..7c8fd696c 100644 --- a/sda/cmd/s3inbox/proxy_test.go +++ b/sda/cmd/s3inbox/proxy_test.go @@ -416,12 +416,13 @@ func (suite *ProxyTests) TestMessageFormatting() { r.Header.Set("x-amz-content-sha256", "checksum") claims := jwt.New() - assert.NoError(suite.T(), claims.Set("sub", "user@host.domain")) + user := "user@host.domain" + assert.NoError(suite.T(), claims.Set("sub", user)) // start proxy that denies everything proxy := NewProxy(suite.S3conf, &helper.AlwaysDeny{}, suite.messenger, suite.database, new(tls.Config)) suite.fakeServer.resp = "test/user/new_file.txt12false/user/new_file.txt2020-03-10T13:20:15.000Z"0a44282bd39178db9680f24813c41aec-1"1234STANDARD" - msg, err := proxy.CreateMessageFromRequest(r, claims) + msg, err := proxy.CreateMessageFromRequest(r, claims, user) assert.Nil(suite.T(), err) assert.IsType(suite.T(), Event{}, msg) @@ -437,7 +438,7 @@ func (suite *ProxyTests) TestMessageFormatting() { // Test single shot upload r.Method = "PUT" - msg, err = proxy.CreateMessageFromRequest(r, jwt.New()) + msg, err = proxy.CreateMessageFromRequest(r, jwt.New(), user) assert.Nil(suite.T(), err) assert.IsType(suite.T(), Event{}, msg) assert.Equal(suite.T(), "upload", msg.Operation) @@ -455,6 +456,7 @@ func (suite *ProxyTests) TestDatabaseConnection() { // PUT a file into the system filename := "/dummy/db-test-file" + anonymFilename := "db-test-file" r, _ := http.NewRequest("PUT", filename, nil) w := httptest.NewRecorder() suite.fakeServer.resp = "test/elixirid/db-test-file.txt12false/elixirid/file.txt2020-03-10T13:20:15.000Z"0a44282bd39178db9680f24813c41aec-1"5STANDARD" @@ -472,7 +474,7 @@ func (suite *ProxyTests) TestDatabaseConnection() { // Check that the file is in the database var fileID string query := "SELECT id FROM sda.files WHERE submission_file_path = $1;" - err = db.QueryRow(query, filename[1:]).Scan(&fileID) + err = db.QueryRow(query, anonymFilename).Scan(&fileID) assert.Nil(suite.T(), err, "Failed to query database") assert.NotNil(suite.T(), fileID, "File not found in database") diff --git a/sda/internal/database/db_functions.go b/sda/internal/database/db_functions.go index ea7a39344..a4064a2ae 100644 --- a/sda/internal/database/db_functions.go +++ b/sda/internal/database/db_functions.go @@ -553,6 +553,39 @@ func (dbs *SDAdb) GetHeaderForStableID(stableID string) ([]byte, error) { return header, nil } +// GetFileInfoFromAccessionID retrieves the file information needed for mapping +func (dbs *SDAdb) GetFileInfoFromAccessionID(accessionID string) (SyncData, error) { + var ( + s SyncData + err error + ) + + for count := 1; count <= RetryTimes; count++ { + s, err = dbs.getFileInfoFromAccessionID(accessionID) + if err == nil { + break + } + time.Sleep(time.Duration(math.Pow(3, float64(count))) * time.Second) + } + + return s, err +} + +// getFileInfoFromAccessionID is the actual function performing work for GetFileInfoFromAccessionID +func (dbs *SDAdb) getFileInfoFromAccessionID(accessionID string) (SyncData, error) { + dbs.checkAndReconnectIfNeeded() + + const query = "SELECT submission_user, submission_file_path from sda.files WHERE stable_id = $1;" + var data SyncData + if err := dbs.DB.QueryRow(query, accessionID).Scan(&data.User, &data.FilePath); err != nil { + log.Warnf("Error while searching for id %s: %v", accessionID, err) + + return SyncData{}, err + } + + return data, nil +} + // GetSyncData retrieves the file information needed to sync a dataset func (dbs *SDAdb) GetSyncData(accessionID string) (SyncData, error) { var ( diff --git a/sda/internal/helper/helper.go b/sda/internal/helper/helper.go index 426815a73..2489d66f1 100644 --- a/sda/internal/helper/helper.go +++ b/sda/internal/helper/helper.go @@ -15,6 +15,7 @@ import ( "net/http" "os" "path/filepath" + "strings" "time" "github.com/lestrrat-go/jwx/v2/jwa" @@ -421,3 +422,11 @@ func TLScertToFile(filename string, derBytes []byte) error { return err } + +func AnonymizeFilepath(filepath string, username string) string { + return strings.ReplaceAll(filepath, strings.Replace(username, "@", "_", 1)+"/", "") +} + +func UnanonymizeFilepath(filepath string, username string) string { + return strings.Replace(username, "@", "_", 1) + "/" + filepath +}