Skip to content

Commit

Permalink
fix: allow UTF-8 encoded object names
Browse files Browse the repository at this point in the history
  • Loading branch information
mlatief committed Jan 21, 2025
1 parent cf23e7d commit 5a2bb11
Show file tree
Hide file tree
Showing 12 changed files with 563 additions and 8 deletions.
162 changes: 162 additions & 0 deletions migrations/tenant/0026-unicode-object-names.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
ALTER TABLE "storage"."objects"
ADD CONSTRAINT objects_name_check
CHECK (name SIMILAR TO '[\x09\x0A\x0D\x20-\xD7FF\xE000-\xFFFD\x00010000-\x0010ffff]+');

CREATE OR REPLACE FUNCTION storage.search (
prefix TEXT,
bucketname TEXT,
limits INT DEFAULT 100,
levels INT DEFAULT 1,
offsets INT DEFAULT 0,
search TEXT DEFAULT '',
sortcolumn TEXT DEFAULT 'name',
sortorder TEXT DEFAULT 'asc'
) RETURNS TABLE (
name TEXT,
id UUID,
updated_at TIMESTAMPTZ,
created_at TIMESTAMPTZ,
last_accessed_at TIMESTAMPTZ,
metadata JSONB
)
AS $$
DECLARE
v_order_by TEXT;
v_sort_order TEXT;
BEGIN
CASE
WHEN sortcolumn = 'name' THEN
v_order_by = 'name';
WHEN sortcolumn = 'updated_at' THEN
v_order_by = 'updated_at';
WHEN sortcolumn = 'created_at' THEN
v_order_by = 'created_at';
WHEN sortcolumn = 'last_accessed_at' THEN
v_order_by = 'last_accessed_at';
ELSE
v_order_by = 'name';
END CASE;

CASE
WHEN sortorder = 'asc' THEN
v_sort_order = 'asc';
WHEN sortorder = 'desc' THEN
v_sort_order = 'desc';
ELSE
v_sort_order = 'asc';
END CASE;

v_order_by = v_order_by || ' ' || v_sort_order;

RETURN QUERY EXECUTE
'WITH folders AS (
SELECT path_tokens[$1] AS folder
FROM storage.objects
WHERE STARTS_WITH(LOWER(objects.name), $2 || $3)
AND bucket_id = $4
AND ARRAY_LENGTH(objects.path_tokens, 1) <> $1
GROUP BY folder
ORDER BY folder ' || v_sort_order || '
)
(SELECT folder AS "name",
NULL AS id,
NULL AS updated_at,
NULL AS created_at,
NULL AS last_accessed_at,
NULL AS metadata FROM folders)
UNION ALL
(SELECT path_tokens[$1] AS "name",
id,
updated_at,
created_at,
last_accessed_at,
metadata
FROM storage.objects
WHERE STARTS_WITH(LOWER(objects.name), $2 || $3)
AND bucket_id = $4
AND ARRAY_LENGTH(objects.path_tokens, 1) = $1
ORDER BY ' || v_order_by || ')
LIMIT $5
OFFSET $6' USING levels, LOWER(prefix), LOWER(search), bucketname, limits, offsets;
END;
$$ LANGUAGE plpgsql STABLE;

CREATE OR REPLACE FUNCTION storage.list_objects_with_delimiter(bucket_id TEXT, prefix_param TEXT, delimiter_param TEXT, max_keys INTEGER DEFAULT 100, start_after TEXT DEFAULT '', next_token TEXT DEFAULT '')
RETURNS TABLE (name TEXT, id UUID, metadata JSONB, updated_at TIMESTAMPTZ) AS
$$
BEGIN
RETURN QUERY EXECUTE
'SELECT DISTINCT ON(name COLLATE "C") * FROM (
SELECT
CASE
WHEN POSITION($2 IN SUBSTRING(name FROM LENGTH($1) + 1)) > 0 THEN
SUBSTRING(name FROM 1 for LENGTH($1) + POSITION($2 IN SUBSTRING(name FROM LENGTH($1) + 1)))
ELSE
name
END AS name, id, metadata, updated_at
FROM
storage.objects
WHERE
bucket_id = $5 AND
STARTS_WITH(LOWER(name), $1) AND
CASE
WHEN $6 != '''' THEN
name COLLATE "C" > $6
ELSE true END
AND CASE
WHEN $4 != '''' THEN
CASE
WHEN POSITION($2 IN SUBSTRING(name FROM LENGTH($1) + 1)) > 0 THEN
SUBSTRING(name FROM 1 FOR LENGTH($1) + POSITION($2 IN SUBSTRING(name FROM LENGTH($1) + 1))) COLLATE "C" > $4
ELSE
name COLLATE "C" > $4
END
ELSE
TRUE
END
ORDER BY
name COLLATE "C" ASC) AS e ORDER BY name COLLATE "C" LIMIT $3'
USING LOWER(prefix_param), delimiter_param, max_keys, next_token, bucket_id, start_after;
END;
$$ LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION storage.list_multipart_uploads_with_delimiter(bucket_id text, prefix_param text, delimiter_param text, max_keys integer default 100, next_key_token text DEFAULT '', next_upload_token text default '')
RETURNS TABLE (key text, id text, created_at timestamptz) AS
$$
BEGIN
RETURN QUERY EXECUTE
'SELECT DISTINCT ON(key COLLATE "C") * FROM (
SELECT
CASE
WHEN POSITION($2 IN SUBSTRING(key FROM LENGTH($1) + 1)) > 0 THEN
SUBSTRING(key FROM 1 FOR LENGTH($1) + POSITION($2 IN SUBSTRING(key FROM LENGTH($1) + 1)))
ELSE
key
END AS key, id, created_at
FROM
storage.s3_multipart_uploads
WHERE
bucket_id = $5 AND
STARTS_WITH(LOWER(key), $1) AND
CASE
WHEN $4 != '''' AND $6 = '''' THEN
CASE
WHEN POSITION($2 IN SUBSTRING(key FROM LENGTH($1) + 1)) > 0 THEN
SUBSTRING(key FROM 1 FOR LENGTH($1) + POSITION($2 IN SUBSTRING(key FROM LENGTH($1) + 1))) COLLATE "C" > $4
ELSE
key COLLATE "C" > $4
END
ELSE
TRUE
END AND
CASE
WHEN $6 != '''' THEN
id COLLATE "C" > $6
ELSE
TRUE
END
ORDER BY
key COLLATE "C" ASC, created_at ASC) AS e ORDER BY key COLLATE "C" LIMIT $3'
USING LOWER(prefix_param), delimiter_param, max_keys, next_key_token, bucket_id, next_upload_token;
END;
$$ LANGUAGE plpgsql;
4 changes: 4 additions & 0 deletions src/http/plugins/xml.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ export const xmlParser = fastifyPlugin(
isArray: (_: string, jpath: string) => {
return opts.parseAsArray?.includes(jpath)
},
tagValueProcessor: (name: string, value: string) =>
value.replace(/&#x([0-9a-fA-F]{1,6});/g, (_, str: string) =>
String.fromCharCode(Number.parseInt(str, 16))
),
})
}

Expand Down
1 change: 1 addition & 0 deletions src/internal/database/migrations/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ export const DBMigration = {
'optimize-search-function': 22,
'operation-function': 23,
'custom-metadata': 24,
'unicode-object-names': 25,
} as const
2 changes: 1 addition & 1 deletion src/internal/errors/codes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ export const ERRORS = {
code: ErrorCode.InvalidKey,
resource: key,
httpStatusCode: 400,
message: `Invalid key: ${key}`,
message: `Invalid key: ${encodeURIComponent(key)}`,
originalError: e,
}),

Expand Down
7 changes: 7 additions & 0 deletions src/storage/database/knex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,13 @@ export class DBError extends StorageBackendError implements RenderableError {
query,
code: pgError.code,
})
case '23514':
if (pgError.constraint === 'objects_name_check') {
return ERRORS.DatabaseError('Invalid object name', pgError).withMetadata({
query,
code: pgError.code,
})
}
default:
return ERRORS.DatabaseError(pgError.message, pgError).withMetadata({
query,
Expand Down
12 changes: 9 additions & 3 deletions src/storage/limits.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,15 @@ export async function isImageTransformationEnabled(tenantId: string) {
* @param key
*/
export function isValidKey(key: string): boolean {
// only allow s3 safe characters and characters which require special handling for now
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
return key.length > 0 && /^(\w|\/|!|-|\.|\*|'|\(|\)| |&|\$|@|=|;|:|\+|,|\?)*$/.test(key)
// Allow any sequence of Unicode characters with UTF-8 encoding,
// except characters not allowed in XML 1.0.
// See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
// See: https://www.w3.org/TR/REC-xml/#charsets
//
const regex =
/[\0-\x08\x0B\f\x0E-\x1F\uFFFE\uFFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/

return key.length > 0 && !regex.test(key)
}

/**
Expand Down
34 changes: 34 additions & 0 deletions src/storage/protocols/s3/s3-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,9 @@ export class S3ProtocolHandler {
throw ERRORS.InvalidUploadId()
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)

await uploader.canUpload({
bucketId: Bucket as string,
objectName: Key as string,
Expand Down Expand Up @@ -601,6 +604,9 @@ export class S3ProtocolHandler {
throw ERRORS.MissingContentLength()
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)

const bucket = await this.storage.asSuperUser().findBucket(Bucket, 'file_size_limit')
const maxFileSize = await getFileSizeLimit(this.storage.db.tenantId, bucket?.file_size_limit)

Expand Down Expand Up @@ -755,6 +761,9 @@ export class S3ProtocolHandler {
throw ERRORS.MissingParameter('Key')
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)

const multipart = await this.storage.db
.asSuperUser()
.findMultipartUpload(UploadId, 'id,version')
Expand Down Expand Up @@ -797,6 +806,9 @@ export class S3ProtocolHandler {
throw ERRORS.MissingParameter('Bucket')
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)

const object = await this.storage
.from(Bucket)
.findObject(Key, 'metadata,user_metadata,created_at,updated_at')
Expand Down Expand Up @@ -836,6 +848,9 @@ export class S3ProtocolHandler {
throw ERRORS.MissingParameter('Key')
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)

const object = await this.storage.from(Bucket).findObject(Key, 'id')

if (!object) {
Expand Down Expand Up @@ -864,6 +879,9 @@ export class S3ProtocolHandler {
const bucket = command.Bucket as string
const key = command.Key as string

mustBeValidBucketName(bucket)
mustBeValidKey(key)

const object = await this.storage.from(bucket).findObject(key, 'version,user_metadata')
const response = await this.storage.backend.getObject(
storageS3Bucket,
Expand Down Expand Up @@ -916,6 +934,9 @@ export class S3ProtocolHandler {
throw ERRORS.MissingParameter('Key')
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)

await this.storage.from(Bucket).deleteObject(Key)

return {}
Expand Down Expand Up @@ -947,6 +968,9 @@ export class S3ProtocolHandler {
return {}
}

mustBeValidBucketName(Bucket)
Delete.Objects.forEach((o) => mustBeValidKey(o.Key))

const deletedResult = await this.storage
.from(Bucket)
.deleteObjects(Delete.Objects.map((o) => o.Key || ''))
Expand Down Expand Up @@ -1012,6 +1036,11 @@ export class S3ProtocolHandler {
throw ERRORS.MissingParameter('CopySource')
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)
mustBeValidBucketName(sourceBucket)
mustBeValidKey(sourceKey)

const copyResult = await this.storage.from(sourceBucket).copyObject({
sourceKey,
destinationBucket: Bucket,
Expand Down Expand Up @@ -1142,6 +1171,11 @@ export class S3ProtocolHandler {
throw ERRORS.NoSuchKey('')
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)
mustBeValidBucketName(sourceBucketName)
mustBeValidKey(sourceKey)

// Check if copy source exists
const copySource = await this.storage.db.findObject(
sourceBucketName,
Expand Down
37 changes: 37 additions & 0 deletions src/test/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,43 @@ export const adminApp = app({})

const ENV = process.env

/**
* Should support all Unicode characters with UTF-8 encoding according to AWS S3 object naming guide, including:
* - Safe characters: 0-9 a-z A-Z !-_.*'()
* - Characters that might require special handling: &$@=;/:+,? and Space and ASCII characters \t, \n, and \r.
* - Characters: \{}^%`[]"<>~#| and non-printable ASCII characters (128–255 decimal characters).
* - Astral code points
*
* The following characters are not allowed:
* - ASCII characters 0x00–0x1F, except 0x09, 0x0A, and 0x0D.
* - Unicode \u{FFFE} and \u{FFFF}.
* - Lone surrogate characters.
* See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
* See: https://www.w3.org/TR/REC-xml/#charsets
*/
export function getUnicodeObjectName(): string {
const objectName = 'test'
.concat("!-_*.'()")
// Characters that might require special handling
.concat('&$@=;:+,? \x09\x0A\x0D')
// Characters to avoid
.concat('\\{}^%`[]"<>~#|\xFF')
// MinIO max. length for each '/' separated segment is 255
.concat('/')
.concat([...Array(127).keys()].map((i) => String.fromCodePoint(i + 128)).join(''))
.concat('/')
// Some special Unicode characters
.concat('\u2028\u202F\u{0001FFFF}')
// Some other Unicode characters
.concat('일이삼\u{0001f642}')

return objectName
}

export function getBadObjectName(): string {
return 'test '.concat('\x01\x02\x03')
}

export function useMockQueue() {
const queueSpy: jest.SpyInstance | undefined = undefined
beforeEach(() => {
Expand Down
Loading

0 comments on commit 5a2bb11

Please sign in to comment.