-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathConvertUTF.h
83 lines (62 loc) · 2.46 KB
/
ConvertUTF.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#pragma once
namespace Util
{
/* ---------------------------------------------------------------------
The following 4 definitions are compiler-specific.
The C standard does not guarantee that wchar_t has at least
16 bits, so wchar_t is no less portable than unsigned short!
All should be unsigned values to avoid sign extension during
bit mask & shift operations.
------------------------------------------------------------------------ */
typedef unsigned long UTF32; /* at least 32 bits */
typedef unsigned short UTF16; /* at least 16 bits */
typedef unsigned char UTF8; /* typically 8 bits */
typedef unsigned char Boolean; /* 0 or 1 */
typedef unsigned char Byte;
/* Some fundamental constants */
#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
#define UNI_MAX_BMP (UTF32)0x0000FFFF
#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
enum ConversionError
{
partialCharacter,
badEncoding
};
enum ConversionResult
{
/* conversion successful */
conversionOK,
/* partial character in source, but hit end */
sourceExhausted,
/* insuff. room in target for conversion */
targetExhausted,
/* source sequence is illegal/malformed */
sourceIllegal
} ;
enum ConversionFlags
{
strictConversion ,
lenientConversion
};
ConversionResult ConvertUTF8toUTF16(
const UTF8 **sourceStart, const UTF8 *sourceEnd,
UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags );
ConversionResult ConvertUTF16toUTF8(
const UTF16 **sourceStart, const UTF16 *sourceEnd,
UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags );
ConversionResult ConvertUTF8toUTF32(
const UTF8 **sourceStart, const UTF8 *sourceEnd,
UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags );
ConversionResult ConvertUTF32toUTF8(
const UTF32 **sourceStart, const UTF32 *sourceEnd,
UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags );
ConversionResult ConvertUTF16toUTF32(
const UTF16 **sourceStart, const UTF16 *sourceEnd,
UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags );
ConversionResult ConvertUTF32toUTF16(
const UTF32 **sourceStart, const UTF32 *sourceEnd,
UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags );
Boolean isLegalUTF8Sequence( const UTF8 *source, const UTF8 *sourceEnd );
};