From edb9f068e1ad1969125c8e6bac597ba194768d22 Mon Sep 17 00:00:00 2001 From: mlatief Date: Mon, 30 Dec 2024 10:30:24 +0200 Subject: [PATCH] fix: allow Unicode object names This allows all Unicode characters that are also valid in XML 1.0 documents. See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html See: https://www.w3.org/TR/REC-xml/#charsets --- .../tenant/0026-unicode-object-names.sql | 4 + src/http/plugins/xml.ts | 4 + src/internal/database/migrations/types.ts | 1 + src/internal/errors/codes.ts | 2 +- src/storage/database/knex.ts | 6 +- src/storage/limits.ts | 12 +- src/test/common.ts | 36 +++++ src/test/object.test.ts | 90 ++++++++++- src/test/s3-protocol.test.ts | 146 ++++++++++++++++++ src/test/tenant.test.ts | 4 +- src/test/tus.test.ts | 132 +++++++++++++++- 11 files changed, 428 insertions(+), 9 deletions(-) create mode 100644 migrations/tenant/0026-unicode-object-names.sql diff --git a/migrations/tenant/0026-unicode-object-names.sql b/migrations/tenant/0026-unicode-object-names.sql new file mode 100644 index 00000000..9b37cb02 --- /dev/null +++ b/migrations/tenant/0026-unicode-object-names.sql @@ -0,0 +1,4 @@ +ALTER TABLE "storage"."objects" + ADD CONSTRAINT objects_name_check + CHECK (name SIMILAR TO '[\x09\x0A\x0D\x20-\xD7FF\xE000-\xFFFD\x00010000-\x0010ffff]+'); + diff --git a/src/http/plugins/xml.ts b/src/http/plugins/xml.ts index 6d474161..8d7a665b 100644 --- a/src/http/plugins/xml.ts +++ b/src/http/plugins/xml.ts @@ -21,6 +21,10 @@ export const xmlParser = fastifyPlugin( isArray: (_: string, jpath: string) => { return opts.parseAsArray?.includes(jpath) }, + tagValueProcessor: (name: string, value: string) => + value.replace(/&#x([0-9a-fA-F]{1,6});/g, (_, str: string) => + String.fromCharCode(Number.parseInt(str, 16)) + ), }) } diff --git a/src/internal/database/migrations/types.ts b/src/internal/database/migrations/types.ts index b283fc2d..6c473237 100644 --- a/src/internal/database/migrations/types.ts +++ b/src/internal/database/migrations/types.ts @@ -24,4 +24,5 @@ export const DBMigration = { 'optimize-search-function': 22, 'operation-function': 23, 'custom-metadata': 24, + 'unicode-object-names': 25, } as const diff --git a/src/internal/errors/codes.ts b/src/internal/errors/codes.ts index 1274b7d5..ef002519 100644 --- a/src/internal/errors/codes.ts +++ b/src/internal/errors/codes.ts @@ -265,7 +265,7 @@ export const ERRORS = { code: ErrorCode.InvalidKey, resource: key, httpStatusCode: 400, - message: `Invalid key: ${key}`, + message: `Invalid key: ${encodeURIComponent(key)}`, originalError: e, }), diff --git a/src/storage/database/knex.ts b/src/storage/database/knex.ts index a8609a10..bbd8f5a8 100644 --- a/src/storage/database/knex.ts +++ b/src/storage/database/knex.ts @@ -877,7 +877,11 @@ export class DBError extends StorageBackendError implements RenderableError { code: pgError.code, }) default: - return ERRORS.DatabaseError(pgError.message, pgError).withMetadata({ + const errorMessage = + pgError.code === '23514' && pgError.constraint === 'objects_name_check' + ? 'Invalid object name' + : pgError.message + return ERRORS.DatabaseError(errorMessage, pgError).withMetadata({ query, code: pgError.code, }) diff --git a/src/storage/limits.ts b/src/storage/limits.ts index cff8627d..b6fd4f2e 100644 --- a/src/storage/limits.ts +++ b/src/storage/limits.ts @@ -47,9 +47,15 @@ export async function isImageTransformationEnabled(tenantId: string) { * @param key */ export function isValidKey(key: string): boolean { - // only allow s3 safe characters and characters which require special handling for now - // https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html - return key.length > 0 && /^(\w|\/|!|-|\.|\*|'|\(|\)| |&|\$|@|=|;|:|\+|,|\?)*$/.test(key) + // Allow any sequence of Unicode characters with UTF-8 encoding, + // except characters not allowed in XML 1.0. + // See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html + // See: https://www.w3.org/TR/REC-xml/#charsets + // + const regex = + /[\0-\x08\x0B\f\x0E-\x1F\uFFFE\uFFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/ + + return key.length > 0 && !regex.test(key) } /** diff --git a/src/test/common.ts b/src/test/common.ts index 2ee80c40..e821892f 100644 --- a/src/test/common.ts +++ b/src/test/common.ts @@ -8,6 +8,42 @@ export const adminApp = app({}) const ENV = process.env +/** + * Should support all Unicode characters with UTF-8 encoding according to AWS S3 object naming guide, including: + * - Safe characters: 0-9 a-z A-Z !-_.*'() + * - Characters that might require special handling: &$@=;/:+,? and Space and ASCII characters \t, \n, and \r. + * - Characters: \{}^%`[]"<>~#| and non-printable ASCII characters (128–255 decimal characters). + * + * The following characters are not allowed: + * - ASCII characters 0x00–0x1F, except 0x09, 0x0A, and 0x0D. + * - Unicode \u{FFFE} and \u{FFFF}. + * - Lone surrogates characters. + * See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html + * See: https://www.w3.org/TR/REC-xml/#charsets + */ +export function getUnicodeObjectName(): string { + const objectName = 'test' + .concat("!-_*.'()") + // Characters that might require special handling + .concat('&$@=;:+,? \x09\x0A\x0D') + // Characters to avoid + .concat('\\{}^%`[]"<>~#|\xFF') + // MinIO max. length for each '/' separated segment is 255 + .concat('/') + .concat([...Array(127).keys()].map((i) => String.fromCodePoint(i + 128)).join('')) + .concat('/') + // Some special Unicode characters + .concat('\u2028\u202F\u{0001FFFF}') + // Some other Unicode characters + .concat('일이삼\u{0001f642}') + + return objectName +} + +export function getInvalidObjectName(): string { + return 'test\x01\x02\x03.txt' +} + export function useMockQueue() { const queueSpy: jest.SpyInstance | undefined = undefined beforeEach(() => { diff --git a/src/test/object.test.ts b/src/test/object.test.ts index 844ebc4c..565d75c8 100644 --- a/src/test/object.test.ts +++ b/src/test/object.test.ts @@ -6,10 +6,11 @@ import app from '../app' import { getConfig, mergeConfig } from '../config' import { signJWT } from '@internal/auth' import { Obj, backends } from '../storage' -import { useMockObject, useMockQueue } from './common' +import { getInvalidObjectName, getUnicodeObjectName, useMockObject, useMockQueue } from './common' import { getServiceKeyUser, getPostgresConnection } from '@internal/database' import { Knex } from 'knex' import { ErrorCode, StorageBackendError } from '@internal/errors' +import { randomUUID } from 'crypto' const { jwtSecret, serviceKey, tenantId } = getConfig() const anonKey = process.env.ANON_KEY || '' @@ -2456,3 +2457,90 @@ describe('testing list objects', () => { expect(responseJSON[1].name).toBe('sadcat-upload23.png') }) }) + +describe('Object key names with Unicode characters', () => { + test('can upload, get, list, and delete', async () => { + const prefix = `test-utf8-${randomUUID()}` + const objectName = getUnicodeObjectName() + + const form = new FormData() + form.append('file', fs.createReadStream(`./src/test/assets/sadcat.jpg`)) + const headers = Object.assign({}, form.getHeaders(), { + authorization: `Bearer ${serviceKey}`, + 'x-upsert': 'true', + }) + + const uploadResponse = await app().inject({ + method: 'POST', + url: `/object/bucket2/${prefix}/${encodeURIComponent(objectName)}`, + headers: { + ...headers, + ...form.getHeaders(), + }, + payload: form, + }) + expect(uploadResponse.statusCode).toBe(200) + + const getResponse = await app().inject({ + method: 'GET', + url: `/object/bucket2/${prefix}/${encodeURIComponent(objectName)}`, + headers: { + authorization: `Bearer ${serviceKey}`, + }, + }) + expect(getResponse.statusCode).toBe(200) + expect(getResponse.headers['etag']).toBe('abc') + expect(getResponse.headers['last-modified']).toBe('Thu, 12 Aug 2021 16:00:00 GMT') + expect(getResponse.headers['cache-control']).toBe('no-cache') + + const listResponse = await app().inject({ + method: 'POST', + url: `/object/list/bucket2`, + headers: { + authorization: `Bearer ${serviceKey}`, + }, + payload: { prefix }, + }) + expect(listResponse.statusCode).toBe(200) + const listResponseJSON = JSON.parse(listResponse.body) + expect(listResponseJSON).toHaveLength(1) + expect(listResponseJSON[0].name).toBe(objectName.split('/')[0]) + + const deleteResponse = await app().inject({ + method: 'DELETE', + url: `/object/bucket2/${prefix}/${encodeURIComponent(objectName)}`, + headers: { + authorization: `Bearer ${serviceKey}`, + }, + }) + expect(deleteResponse.statusCode).toBe(200) + }) + + test('should not upload if the name contains invalid characters', async () => { + const invalidObjectName = getInvalidObjectName() + const form = new FormData() + form.append('file', fs.createReadStream(`./src/test/assets/sadcat.jpg`)) + const headers = Object.assign({}, form.getHeaders(), { + authorization: `Bearer ${serviceKey}`, + 'x-upsert': 'true', + }) + const uploadResponse = await app().inject({ + method: 'POST', + url: `/object/bucket2/${encodeURIComponent(invalidObjectName)}`, + headers: { + ...headers, + ...form.getHeaders(), + }, + payload: form, + }) + expect(uploadResponse.statusCode).toBe(400) + expect(S3Backend.prototype.uploadObject).not.toHaveBeenCalled() + expect(uploadResponse.body).toBe( + JSON.stringify({ + statusCode: '400', + error: 'InvalidKey', + message: `Invalid key: ${encodeURIComponent(invalidObjectName)}`, + }) + ) + }) +}) diff --git a/src/test/s3-protocol.test.ts b/src/test/s3-protocol.test.ts index 6d505298..ef9020d3 100644 --- a/src/test/s3-protocol.test.ts +++ b/src/test/s3-protocol.test.ts @@ -32,6 +32,7 @@ import { randomUUID } from 'crypto' import { getSignedUrl } from '@aws-sdk/s3-request-presigner' import axios from 'axios' import { createPresignedPost } from '@aws-sdk/s3-presigned-post' +import { getInvalidObjectName, getUnicodeObjectName } from './common' const { s3ProtocolAccessKeySecret, s3ProtocolAccessKeyId, storageS3Region } = getConfig() @@ -1402,5 +1403,150 @@ describe('S3 Protocol', () => { expect(resp.ok).toBeTruthy() }) }) + + describe('Object key names with Unicode characters', () => { + it('can be used with MultipartUpload commands', async () => { + const bucketName = await createBucket(client) + const objectName = getUnicodeObjectName() + const createMultiPartUpload = new CreateMultipartUploadCommand({ + Bucket: bucketName, + Key: objectName, + ContentType: 'image/jpg', + CacheControl: 'max-age=2000', + }) + const createMultipartResp = await client.send(createMultiPartUpload) + expect(createMultipartResp.UploadId).toBeTruthy() + const uploadId = createMultipartResp.UploadId + + const listMultipartUploads = new ListMultipartUploadsCommand({ + Bucket: bucketName, + }) + const listMultipartResp = await client.send(listMultipartUploads) + expect(listMultipartResp.Uploads?.length).toBe(1) + expect(listMultipartResp.Uploads?.[0].Key).toBe(objectName) + + const data = Buffer.alloc(1024 * 1024 * 2) + const uploadPart = new UploadPartCommand({ + Bucket: bucketName, + Key: objectName, + ContentLength: data.length, + UploadId: uploadId, + Body: data, + PartNumber: 1, + }) + const uploadPartResp = await client.send(uploadPart) + expect(uploadPartResp.ETag).toBeTruthy() + + const listParts = new ListPartsCommand({ + Bucket: bucketName, + Key: objectName, + UploadId: uploadId, + }) + const listPartsResp = await client.send(listParts) + expect(listPartsResp.Parts?.length).toBe(1) + + const completeMultiPartUpload = new CompleteMultipartUploadCommand({ + Bucket: bucketName, + Key: objectName, + UploadId: uploadId, + MultipartUpload: { + Parts: [ + { + PartNumber: 1, + ETag: uploadPartResp.ETag, + }, + ], + }, + }) + const completeMultipartResp = await client.send(completeMultiPartUpload) + expect(completeMultipartResp.$metadata.httpStatusCode).toBe(200) + expect(completeMultipartResp.Key).toEqual(objectName) + }) + + it('can be used with Put, List, and Delete Object commands', async () => { + const bucketName = await createBucket(client) + const objectName = getUnicodeObjectName() + const putObject = new PutObjectCommand({ + Bucket: bucketName, + Key: objectName, + Body: Buffer.alloc(1024 * 1024 * 1), + }) + const putObjectResp = await client.send(putObject) + expect(putObjectResp.$metadata.httpStatusCode).toEqual(200) + + const listObjects = new ListObjectsCommand({ + Bucket: bucketName, + }) + const listObjectsResp = await client.send(listObjects) + expect(listObjectsResp.Contents?.length).toBe(1) + expect(listObjectsResp.Contents?.[0].Key).toBe(objectName) + + const listObjectsV2 = new ListObjectsV2Command({ + Bucket: bucketName, + }) + const listObjectsV2Resp = await client.send(listObjectsV2) + expect(listObjectsV2Resp.Contents?.length).toBe(1) + expect(listObjectsV2Resp.Contents?.[0].Key).toBe(objectName) + + const getObject = new GetObjectCommand({ + Bucket: bucketName, + Key: objectName, + }) + const getObjectResp = await client.send(getObject) + const getObjectRespData = await getObjectResp.Body?.transformToByteArray() + expect(getObjectRespData).toBeTruthy() + expect(getObjectResp.ETag).toBeTruthy() + + const deleteObjects = new DeleteObjectsCommand({ + Bucket: bucketName, + Delete: { + Objects: [ + { + Key: objectName, + }, + ], + }, + }) + const deleteObjectsResp = await client.send(deleteObjects) + expect(deleteObjectsResp.Errors).toBeFalsy() + expect(deleteObjectsResp.Deleted).toEqual([ + { + Key: objectName, + }, + ]) + + const getObjectDeleted = new GetObjectCommand({ + Bucket: bucketName, + Key: objectName, + }) + try { + await client.send(getObjectDeleted) + throw new Error('Should not reach here') + } catch (e) { + expect((e as S3ServiceException).$metadata.httpStatusCode).toEqual(404) + } + }) + + it('should not upload if the name contains invalid characters', async () => { + const bucketName = await createBucket(client) + const invalidObjectName = getInvalidObjectName() + try { + const putObject = new PutObjectCommand({ + Bucket: bucketName, + Key: invalidObjectName, + Body: Buffer.alloc(1024 * 1024 * 1), + }) + await client.send(putObject) + throw new Error('Should not reach here') + } catch (e) { + expect((e as Error).message).not.toBe('Should not reach here') + expect((e as S3ServiceException).$metadata.httpStatusCode).toEqual(400) + expect((e as S3ServiceException).name).toEqual('InvalidKey') + expect((e as S3ServiceException).message).toEqual( + `Invalid key: ${encodeURIComponent(invalidObjectName)}` + ) + } + }) + }) }) }) diff --git a/src/test/tenant.test.ts b/src/test/tenant.test.ts index d53bc0d1..080b96c8 100644 --- a/src/test/tenant.test.ts +++ b/src/test/tenant.test.ts @@ -16,7 +16,7 @@ const payload = { serviceKey: 'd', jwks: { keys: [] }, migrationStatus: 'COMPLETED', - migrationVersion: 'custom-metadata', + migrationVersion: 'unicode-object-names', tracingMode: 'basic', features: { imageTransformation: { @@ -40,7 +40,7 @@ const payload2 = { serviceKey: 'h', jwks: null, migrationStatus: 'COMPLETED', - migrationVersion: 'custom-metadata', + migrationVersion: 'unicode-object-names', tracingMode: 'basic', features: { imageTransformation: { diff --git a/src/test/tus.test.ts b/src/test/tus.test.ts index cdbcc933..8dc0d388 100644 --- a/src/test/tus.test.ts +++ b/src/test/tus.test.ts @@ -13,7 +13,7 @@ import { logger } from '@internal/monitoring' import { getServiceKeyUser, getPostgresConnection } from '@internal/database' import { getConfig } from '../config' import app from '../app' -import { checkBucketExists } from './common' +import { checkBucketExists, getInvalidObjectName, getUnicodeObjectName } from './common' import { Storage, backends, StorageKnexDB } from '../storage' const { serviceKey, tenantId, storageS3Bucket, storageBackendType } = getConfig() @@ -525,4 +525,134 @@ describe('Tus multipart', () => { } }) }) + + describe('Object key names with Unicode characters', () => { + it('can be uploaded with the TUS protocol', async () => { + const objectName = randomUUID() + '-' + getUnicodeObjectName() + + const bucket = await storage.createBucket({ + id: bucketName, + name: bucketName, + public: true, + }) + + const result = await new Promise((resolve, reject) => { + const upload = new tus.Upload(oneChunkFile, { + endpoint: `${localServerAddress}/upload/resumable`, + onShouldRetry: () => false, + uploadDataDuringCreation: false, + headers: { + authorization: `Bearer ${serviceKey}`, + 'x-upsert': 'true', + }, + metadata: { + bucketName: bucketName, + objectName: objectName, + contentType: 'image/jpeg', + cacheControl: '3600', + metadata: JSON.stringify({ + test1: 'test1', + test2: 'test2', + }), + }, + onError: function (error) { + reject(error) + }, + onSuccess: () => { + resolve(true) + }, + }) + + upload.start() + }) + + expect(result).toEqual(true) + + const dbAsset = await storage.from(bucket.id).findObject(objectName, '*') + expect(dbAsset).toEqual({ + bucket_id: bucket.id, + created_at: expect.any(Date), + id: expect.any(String), + last_accessed_at: expect.any(Date), + metadata: { + cacheControl: 'max-age=3600', + contentLength: 29526, + eTag: '"53e1323c929d57b09b95fbe6d531865c-1"', + httpStatusCode: 200, + lastModified: expect.any(String), + mimetype: 'image/jpeg', + size: 29526, + }, + user_metadata: { + test1: 'test1', + test2: 'test2', + }, + name: objectName, + owner: null, + owner_id: null, + path_tokens: objectName.split('/'), + updated_at: expect.any(Date), + version: expect.any(String), + }) + + const getResponse = await app().inject({ + method: 'GET', + url: `/object/${bucketName}/${encodeURIComponent(objectName)}`, + headers: { + authorization: `Bearer ${serviceKey}`, + }, + }) + expect(getResponse.statusCode).toBe(200) + expect(getResponse.headers['etag']).toBe('"53e1323c929d57b09b95fbe6d531865c-1"') + expect(getResponse.headers['cache-control']).toBe('max-age=3600') + expect(getResponse.headers['content-length']).toBe('29526') + expect(getResponse.headers['content-type']).toBe('image/jpeg') + }) + + it('should not upload if the name contains invalid characters', async () => { + await storage.createBucket({ + id: bucketName, + name: bucketName, + public: true, + }) + + const invalidObjectName = randomUUID() + '-' + getInvalidObjectName() + try { + await new Promise((resolve, reject) => { + const upload = new tus.Upload(oneChunkFile, { + endpoint: `${localServerAddress}/upload/resumable`, + onShouldRetry: () => false, + uploadDataDuringCreation: false, + headers: { + authorization: `Bearer ${serviceKey}`, + 'x-upsert': 'true', + }, + metadata: { + bucketName: bucketName, + objectName: invalidObjectName, + contentType: 'image/jpeg', + cacheControl: '3600', + }, + onError: function (error) { + reject(error) + }, + onSuccess: () => { + resolve(true) + }, + }) + + upload.start() + }) + + throw new Error('it should error with invalid key') + } catch (e) { + expect((e as Error).message).not.toEqual('it should error with invalid key') + const err = e as DetailedError + expect(err.originalResponse.getStatus()).toEqual(400) + expect(err.originalResponse.getBody()).toEqual( + `Invalid key: ${encodeURIComponent(invalidObjectName)}` + ) + } + }) + }) })