Skip to content

Commit

Permalink
Normalize instances of UTF to Unicode
Browse files Browse the repository at this point in the history
Signed-off-by: Jono Yang <[email protected]>
  • Loading branch information
JonoYang committed Aug 25, 2024
1 parent 1fcb356 commit 22c1913
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 26 deletions.
2 changes: 2 additions & 0 deletions src/scancode/cli_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,8 @@ def streamline_scanned_file(scanned_file, remove_file_date=False, normalize_file
if normalize_file_type and (file_type := scanned_file.get('file_type')):
file_type_segments = file_type.split()
normalized_file_type = file_type_segments[0]
if 'UTF' in normalized_file_type:
normalized_file_type = 'Unicode'
scanned_file['file_type'] = normalized_file_type


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4252,7 +4252,7 @@
"md5": "301dfe021b3b4076b9f8d49577205b44",
"sha256": "ff6dfac01c9b7ad9fcb5e646db83b482f5f720d981f0ca6c68828c5aa4ec784b",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "Java",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -4333,7 +4333,7 @@
"md5": "5165fdeefda7a55c13e44c5e56cac920",
"sha256": "8553411bf58f4ac35fc9e7d6142f11fc2fbef33e50a77f514a253135807afd44",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "Java",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -4425,7 +4425,7 @@
"md5": "53a91ff66fdc4d812d7656b4e807bfd2",
"sha256": "ec1427fc2f7e322e6a4d5d99f8119310d6586aaaf5b30b2904b4ccd27966e120",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "Java",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -4506,7 +4506,7 @@
"md5": "52540f80f5c22d8d13627c57b76d44f4",
"sha256": "c3bcefbbb2706f65410b4bb91d531e2ec461fa4586135becc8865adeca3385c8",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "Java",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -4553,7 +4553,7 @@
"md5": "a0b4e3f4d679a98d11d75e7e27e894af",
"sha256": "5c0f94fc518daca08e74fd117fef09d6dd090b0fd623a47edde451be3aed28c7",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "Java",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -4600,7 +4600,7 @@
"md5": "cae07c80e6f79864de002700bf9ab02f",
"sha256": "fe96061d23b37c98913379d54c5388cffdc99239807c2caf8c96d7d10321d085",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "Java",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -4681,7 +4681,7 @@
"md5": "ffb481f2a0c6262d0f7d1e9a4681a6ca",
"sha256": "5470e41b0bfba6adb43649215df756f1a4a5173ceed6127af1b38801651efde2",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "Java",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -4728,7 +4728,7 @@
"md5": "effc6856ef85a9250fb1a470792b3f38",
"sha256": "165da86bfdf296cd5a0a3e20c1d1ee86d70ecb8a1fa579d6f8cadad8eee85878",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": null,
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -4819,7 +4819,7 @@
"md5": "92011414f344e34f711e77bac40e4bc4",
"sha256": "e533accabdc7434f3905ced32c4fba755f707929ad73df6e07172cd5a786e023",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "C",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -4911,7 +4911,7 @@
"md5": "4ed53ac605f16247ab7d571670f2351d",
"sha256": "b09e2a43e9960d3d28d37d24f10dfbc462a4cb376da54ba571ac9a8874911ed5",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "C",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -5003,7 +5003,7 @@
"md5": "807b91d2bf5e18de555e56de37e487d1",
"sha256": "3980fa5633b16f944641bf2fba3f49b8b9b9de6ffea7be1e142792393bf1a867",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "C",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -5095,7 +5095,7 @@
"md5": "57f047ea87f405486a94bc5a56ba7fcf",
"sha256": "963aabe87f6a51ca9c237669034a9fdecd71df7350eaf30bdf0718f63c5a94f8",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": null,
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -5158,7 +5158,7 @@
"md5": "62b51527599b11b32361699c75b05683",
"sha256": "8b54b0b90570e4b0d5b8c8520e4b5a8258ae15849ec1919f57da093f5df84f38",
"mime_type": "application/json",
"file_type": "JSON text data",
"file_type": "JSON",
"programming_language": null,
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -8686,7 +8686,7 @@
"md5": "4e58eb393ad904c1de81a9ca5b9e392c",
"sha256": "8e6da5a880c0547bac5b71f0f123164511ae17e3c7d7f424e4ec5b44e1cae731",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": null,
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -8761,7 +8761,7 @@
"md5": "48c4037f16b4670795fdf72e88cc278c",
"sha256": "fc1682e787ccc8d9eb83eb0b1f5acb59d22aa6ce1bff7c749a9dc315237b2240",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "C",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -8853,7 +8853,7 @@
"md5": "f11ed826baf25f2bfa9c610313460036",
"sha256": "ef7bf500e1accf7e91352788c92b5c6663bab52e2cd6c42284496518e4a5f054",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "C",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -8945,7 +8945,7 @@
"md5": "d8821cd288e2be7fd83cdcac22a427ce",
"sha256": "a3c2fa63c7e730bdd008bc07b7d3865dc76781082b77a03d52a918ce1c7d3459",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "C",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -9076,7 +9076,7 @@
"md5": "dbd621f76cb27e2fd8b8e2d9d985ffcf",
"sha256": "8b257a625c537736ee3ac2321b4ef56ba8a6fa4f922a35b1a0e992f9327d39e9",
"mime_type": "text/plain",
"file_type": "ASCII text, with CRLF line terminators",
"file_type": "ASCII",
"programming_language": "C#",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -9129,7 +9129,7 @@
"md5": "661652a0568e25d12fc9bfad2fdabfb2",
"sha256": "f3144ba734429b825e22c2f9048412e78c907e0142f2cdc629879b3a0fa63c89",
"mime_type": "text/plain",
"file_type": "Unicode text, UTF-8 text, with CRLF line terminators",
"file_type": "Unicode",
"programming_language": "C#",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -9249,7 +9249,7 @@
"md5": "4f0d2f55d43d9466750350f8b27f0302",
"sha256": "1551b8fb1c2f1f3cc92c8e4f31730ebdde0c766caeca6d670a7758b55e3804f3",
"mime_type": "text/plain",
"file_type": "ASCII text, with CRLF line terminators",
"file_type": "ASCII",
"programming_language": "GAS",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -9385,7 +9385,7 @@
"md5": "d570bd029ee2362f2a0927c87999773b",
"sha256": "03fb435669b57aa90fe00c4dbc12d8492ef87b6243427d1e1831006994ee1f80",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "C",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -9460,7 +9460,7 @@
"md5": "2913c8ea7fb43a0f469bb2797c820a95",
"sha256": "4f8db049a7156b8a4616a6c9df0b5ee09571f0ac2eb253e111f2fb727fb033ea",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "C",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -9574,7 +9574,7 @@
"md5": "8b897171ea0767232e586086bc94518c",
"sha256": "ee2eef602cba7c4ba350617f4154de50cb4cbf274c7dd773130aca0775d5d9cd",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "C",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -9655,7 +9655,7 @@
"md5": "07497e2688dad9406386f0534a0bbfca",
"sha256": "64fef1f0a7cd69eae744ad2fa754ee8568a5715588e38b7a3fa6e11eaeaec97e",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "C",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -9741,7 +9741,7 @@
"md5": "2a0ea6a99e31fb0989209a027476038d",
"sha256": "dc7224c7f079d237acf510ff40282cf6d50352c851d4e2957d227e2a3b57eb4d",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "C",
"is_binary": false,
"is_text": true,
Expand Down Expand Up @@ -9833,7 +9833,7 @@
"md5": "807b91d2bf5e18de555e56de37e487d1",
"sha256": "3980fa5633b16f944641bf2fba3f49b8b9b9de6ffea7be1e142792393bf1a867",
"mime_type": "text/plain",
"file_type": "ASCII text",
"file_type": "ASCII",
"programming_language": "C",
"is_binary": false,
"is_text": true,
Expand Down

0 comments on commit 22c1913

Please sign in to comment.