Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: allow UTF-8 encoded object names #605

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 162 additions & 0 deletions migrations/tenant/0026-unicode-object-names.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
ALTER TABLE "storage"."objects"
ADD CONSTRAINT objects_name_check
CHECK (name SIMILAR TO '[\x09\x0A\x0D\x20-\xD7FF\xE000-\xFFFD\x00010000-\x0010ffff]+');

CREATE OR REPLACE FUNCTION storage.search (
prefix TEXT,
bucketname TEXT,
limits INT DEFAULT 100,
levels INT DEFAULT 1,
offsets INT DEFAULT 0,
search TEXT DEFAULT '',
sortcolumn TEXT DEFAULT 'name',
sortorder TEXT DEFAULT 'asc'
) RETURNS TABLE (
name TEXT,
id UUID,
updated_at TIMESTAMPTZ,
created_at TIMESTAMPTZ,
last_accessed_at TIMESTAMPTZ,
metadata JSONB
)
AS $$
DECLARE
v_order_by TEXT;
v_sort_order TEXT;
BEGIN
CASE
WHEN sortcolumn = 'name' THEN
v_order_by = 'name';
WHEN sortcolumn = 'updated_at' THEN
v_order_by = 'updated_at';
WHEN sortcolumn = 'created_at' THEN
v_order_by = 'created_at';
WHEN sortcolumn = 'last_accessed_at' THEN
v_order_by = 'last_accessed_at';
ELSE
v_order_by = 'name';
END CASE;

CASE
WHEN sortorder = 'asc' THEN
v_sort_order = 'asc';
WHEN sortorder = 'desc' THEN
v_sort_order = 'desc';
ELSE
v_sort_order = 'asc';
END CASE;

v_order_by = v_order_by || ' ' || v_sort_order;

RETURN QUERY EXECUTE
'WITH folders AS (
SELECT path_tokens[$1] AS folder
FROM storage.objects
WHERE STARTS_WITH(LOWER(objects.name), $2 || $3)
AND bucket_id = $4
AND ARRAY_LENGTH(objects.path_tokens, 1) <> $1
GROUP BY folder
ORDER BY folder ' || v_sort_order || '
)
(SELECT folder AS "name",
NULL AS id,
NULL AS updated_at,
NULL AS created_at,
NULL AS last_accessed_at,
NULL AS metadata FROM folders)
UNION ALL
(SELECT path_tokens[$1] AS "name",
id,
updated_at,
created_at,
last_accessed_at,
metadata
FROM storage.objects
WHERE STARTS_WITH(LOWER(objects.name), $2 || $3)
AND bucket_id = $4
AND ARRAY_LENGTH(objects.path_tokens, 1) = $1
ORDER BY ' || v_order_by || ')
LIMIT $5
OFFSET $6' USING levels, LOWER(prefix), LOWER(search), bucketname, limits, offsets;
END;
$$ LANGUAGE plpgsql STABLE;

CREATE OR REPLACE FUNCTION storage.list_objects_with_delimiter(bucket_id TEXT, prefix_param TEXT, delimiter_param TEXT, max_keys INTEGER DEFAULT 100, start_after TEXT DEFAULT '', next_token TEXT DEFAULT '')
RETURNS TABLE (name TEXT, id UUID, metadata JSONB, updated_at TIMESTAMPTZ) AS
$$
BEGIN
RETURN QUERY EXECUTE
'SELECT DISTINCT ON(name COLLATE "C") * FROM (
SELECT
CASE
WHEN POSITION($2 IN SUBSTRING(name FROM LENGTH($1) + 1)) > 0 THEN
SUBSTRING(name FROM 1 for LENGTH($1) + POSITION($2 IN SUBSTRING(name FROM LENGTH($1) + 1)))
ELSE
name
END AS name, id, metadata, updated_at
FROM
storage.objects
WHERE
bucket_id = $5 AND
STARTS_WITH(LOWER(name), $1) AND
CASE
WHEN $6 != '''' THEN
name COLLATE "C" > $6
ELSE true END
AND CASE
WHEN $4 != '''' THEN
CASE
WHEN POSITION($2 IN SUBSTRING(name FROM LENGTH($1) + 1)) > 0 THEN
SUBSTRING(name FROM 1 FOR LENGTH($1) + POSITION($2 IN SUBSTRING(name FROM LENGTH($1) + 1))) COLLATE "C" > $4
ELSE
name COLLATE "C" > $4
END
ELSE
TRUE
END
ORDER BY
name COLLATE "C" ASC) AS e ORDER BY name COLLATE "C" LIMIT $3'
USING LOWER(prefix_param), delimiter_param, max_keys, next_token, bucket_id, start_after;
END;
$$ LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION storage.list_multipart_uploads_with_delimiter(bucket_id text, prefix_param text, delimiter_param text, max_keys integer default 100, next_key_token text DEFAULT '', next_upload_token text default '')
RETURNS TABLE (key text, id text, created_at timestamptz) AS
$$
BEGIN
RETURN QUERY EXECUTE
'SELECT DISTINCT ON(key COLLATE "C") * FROM (
SELECT
CASE
WHEN POSITION($2 IN SUBSTRING(key FROM LENGTH($1) + 1)) > 0 THEN
SUBSTRING(key FROM 1 FOR LENGTH($1) + POSITION($2 IN SUBSTRING(key FROM LENGTH($1) + 1)))
ELSE
key
END AS key, id, created_at
FROM
storage.s3_multipart_uploads
WHERE
bucket_id = $5 AND
STARTS_WITH(LOWER(key), $1) AND
CASE
WHEN $4 != '''' AND $6 = '''' THEN
CASE
WHEN POSITION($2 IN SUBSTRING(key FROM LENGTH($1) + 1)) > 0 THEN
SUBSTRING(key FROM 1 FOR LENGTH($1) + POSITION($2 IN SUBSTRING(key FROM LENGTH($1) + 1))) COLLATE "C" > $4
ELSE
key COLLATE "C" > $4
END
ELSE
TRUE
END AND
CASE
WHEN $6 != '''' THEN
id COLLATE "C" > $6
ELSE
TRUE
END
ORDER BY
key COLLATE "C" ASC, created_at ASC) AS e ORDER BY key COLLATE "C" LIMIT $3'
USING LOWER(prefix_param), delimiter_param, max_keys, next_key_token, bucket_id, next_upload_token;
END;
$$ LANGUAGE plpgsql;
4 changes: 4 additions & 0 deletions src/http/plugins/xml.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ export const xmlParser = fastifyPlugin(
isArray: (_: string, jpath: string) => {
return opts.parseAsArray?.includes(jpath)
},
tagValueProcessor: (name: string, value: string) =>
value.replace(/&#x([0-9a-fA-F]{1,6});/g, (_, str: string) =>
String.fromCharCode(Number.parseInt(str, 16))
),
})
}

Expand Down
1 change: 1 addition & 0 deletions src/internal/database/migrations/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ export const DBMigration = {
'optimize-search-function': 22,
'operation-function': 23,
'custom-metadata': 24,
'unicode-object-names': 25,
} as const
2 changes: 1 addition & 1 deletion src/internal/errors/codes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ export const ERRORS = {
code: ErrorCode.InvalidKey,
resource: key,
httpStatusCode: 400,
message: `Invalid key: ${key}`,
message: `Invalid key: ${encodeURIComponent(key)}`,
originalError: e,
}),

Expand Down
6 changes: 5 additions & 1 deletion src/storage/database/knex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -877,7 +877,11 @@ export class DBError extends StorageBackendError implements RenderableError {
code: pgError.code,
})
default:
return ERRORS.DatabaseError(pgError.message, pgError).withMetadata({
const errorMessage =
pgError.code === '23514' && pgError.constraint === 'objects_name_check'
? 'Invalid object name'
: pgError.message
return ERRORS.DatabaseError(errorMessage, pgError).withMetadata({
query,
code: pgError.code,
})
Expand Down
12 changes: 9 additions & 3 deletions src/storage/limits.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,15 @@ export async function isImageTransformationEnabled(tenantId: string) {
* @param key
*/
export function isValidKey(key: string): boolean {
// only allow s3 safe characters and characters which require special handling for now
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
return key.length > 0 && /^(\w|\/|!|-|\.|\*|'|\(|\)| |&|\$|@|=|;|:|\+|,|\?)*$/.test(key)
// Allow any sequence of Unicode characters with UTF-8 encoding,
// except characters not allowed in XML 1.0.
// See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
// See: https://www.w3.org/TR/REC-xml/#charsets
//
const regex =
/[\0-\x08\x0B\f\x0E-\x1F\uFFFE\uFFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/

return key.length > 0 && !regex.test(key)
}

/**
Expand Down
34 changes: 34 additions & 0 deletions src/storage/protocols/s3/s3-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,9 @@ export class S3ProtocolHandler {
throw ERRORS.InvalidUploadId()
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)

await uploader.canUpload({
bucketId: Bucket as string,
objectName: Key as string,
Expand Down Expand Up @@ -601,6 +604,9 @@ export class S3ProtocolHandler {
throw ERRORS.MissingContentLength()
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)

const bucket = await this.storage.asSuperUser().findBucket(Bucket, 'file_size_limit')
const maxFileSize = await getFileSizeLimit(this.storage.db.tenantId, bucket?.file_size_limit)

Expand Down Expand Up @@ -755,6 +761,9 @@ export class S3ProtocolHandler {
throw ERRORS.MissingParameter('Key')
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)

const multipart = await this.storage.db
.asSuperUser()
.findMultipartUpload(UploadId, 'id,version')
Expand Down Expand Up @@ -797,6 +806,9 @@ export class S3ProtocolHandler {
throw ERRORS.MissingParameter('Bucket')
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)

const object = await this.storage
.from(Bucket)
.findObject(Key, 'metadata,user_metadata,created_at,updated_at')
Expand Down Expand Up @@ -836,6 +848,9 @@ export class S3ProtocolHandler {
throw ERRORS.MissingParameter('Key')
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)

const object = await this.storage.from(Bucket).findObject(Key, 'id')

if (!object) {
Expand Down Expand Up @@ -864,6 +879,9 @@ export class S3ProtocolHandler {
const bucket = command.Bucket as string
const key = command.Key as string

mustBeValidBucketName(bucket)
mustBeValidKey(key)

const object = await this.storage.from(bucket).findObject(key, 'version,user_metadata')
const response = await this.storage.backend.getObject(
storageS3Bucket,
Expand Down Expand Up @@ -916,6 +934,9 @@ export class S3ProtocolHandler {
throw ERRORS.MissingParameter('Key')
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)

await this.storage.from(Bucket).deleteObject(Key)

return {}
Expand Down Expand Up @@ -947,6 +968,9 @@ export class S3ProtocolHandler {
return {}
}

mustBeValidBucketName(Bucket)
Delete.Objects.forEach((o) => mustBeValidKey(o.Key))

const deletedResult = await this.storage
.from(Bucket)
.deleteObjects(Delete.Objects.map((o) => o.Key || ''))
Expand Down Expand Up @@ -1017,6 +1041,11 @@ export class S3ProtocolHandler {
command.MetadataDirective = 'COPY'
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)
mustBeValidBucketName(sourceBucket)
mustBeValidKey(sourceKey)

const copyResult = await this.storage.from(sourceBucket).copyObject({
sourceKey,
destinationBucket: Bucket,
Expand Down Expand Up @@ -1147,6 +1176,11 @@ export class S3ProtocolHandler {
throw ERRORS.NoSuchKey('')
}

mustBeValidBucketName(Bucket)
mustBeValidKey(Key)
mustBeValidBucketName(sourceBucketName)
mustBeValidKey(sourceKey)

// Check if copy source exists
const copySource = await this.storage.db.findObject(
sourceBucketName,
Expand Down
37 changes: 37 additions & 0 deletions src/test/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,43 @@ export const adminApp = app({})

const ENV = process.env

/**
* Should support all Unicode characters with UTF-8 encoding according to AWS S3 object naming guide, including:
* - Safe characters: 0-9 a-z A-Z !-_.*'()
* - Characters that might require special handling: &$@=;/:+,? and Space and ASCII characters \t, \n, and \r.
* - Characters: \{}^%`[]"<>~#| and non-printable ASCII characters (128–255 decimal characters).
* - Astral code points
*
* The following characters are not allowed:
* - ASCII characters 0x00–0x1F, except 0x09, 0x0A, and 0x0D.
* - Unicode \u{FFFE} and \u{FFFF}.
* - Lone surrogate characters.
* See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
* See: https://www.w3.org/TR/REC-xml/#charsets
*/
export function getUnicodeObjectName(): string {
const objectName = 'test'
.concat("!-_*.'()")
// Characters that might require special handling
.concat('&$@=;:+,? \x09\x0A\x0D')
// Characters to avoid
.concat('\\{}^%`[]"<>~#|\xFF')
// MinIO max. length for each '/' separated segment is 255
.concat('/')
.concat([...Array(127).keys()].map((i) => String.fromCodePoint(i + 128)).join(''))
.concat('/')
// Some special Unicode characters
.concat('\u2028\u202F\u{0001FFFF}')
// Some other Unicode characters
.concat('일이삼\u{0001f642}')

return objectName
}

export function getBadObjectName(): string {
return 'test '.concat('\x01\x02\x03')
}

export function useMockQueue() {
const queueSpy: jest.SpyInstance | undefined = undefined
beforeEach(() => {
Expand Down
Loading