Wire Sysio Wire Sysion 1.0.0
Loading...
Searching...
No Matches
UTF8 Namespace Reference

Functions

const U8validateString (const U8 *nextChar, const U8 *endChar)
 
template<typename String >
void encodeCodepoint (U32 codepoint, String &outString)
 

Function Documentation

◆ encodeCodepoint()

template<typename String >
void UTF8::encodeCodepoint ( U32 codepoint,
String & outString )
inline

Definition at line 73 of file UTF8.h.

74 {
75 if(codepoint < 0x80)
76 {
77 outString += char(codepoint);
78 }
79 else if(codepoint < 0x800)
80 {
81 outString += char((codepoint >> 6) & 0x1F) | 0xC0;
82 outString += char((codepoint & 0x3F) | 0x80);
83 }
84 else if(codepoint < 0x10000)
85 {
86 outString += char((codepoint >> 12) & 0x0F) | 0xE0;
87 outString += char((codepoint >> 6) & 0x3F) | 0x80;
88 outString += char((codepoint & 0x3F) | 0x80);
89 }
90 else
91 {
92 WAVM_ASSERT_THROW(codepoint < 0x200000);
93 outString += char((codepoint >> 18) & 0x07) | 0xF0;
94 outString += char((codepoint >> 12) & 0x3F) | 0x80;
95 outString += char((codepoint >> 6) & 0x3F) | 0x80;
96 outString += char((codepoint & 0x3F) | 0x80);
97 }
98 }
#define WAVM_ASSERT_THROW(cond)
Definition Errors.h:29

◆ validateString()

const U8 * UTF8::validateString ( const U8 * nextChar,
const U8 * endChar )
inline

Definition at line 8 of file UTF8.h.

9 {
10 // Check that the string is a valid UTF-8 encoding.
11 // The valid ranges are taken from table 3-7 in the Unicode Standard 9.0:
12 // "Well-Formed UTF-8 Byte Sequences"
13 while(nextChar != endChar)
14 {
15 if(*nextChar < 0x80) { ++nextChar; }
16 else if(*nextChar >= 0xc2 && *nextChar <= 0xdf)
17 {
18 if(nextChar + 1 >= endChar
19 || nextChar[1] < 0x80 || nextChar[1] > 0xbf) { break; }
20 nextChar += 2;
21 }
22 else if(*nextChar == 0xe0)
23 {
24 if(nextChar + 2 >= endChar
25 || nextChar[1] < 0xa0 || nextChar[1] > 0xbf
26 || nextChar[2] < 0x80 || nextChar[2] > 0xbf) { break; }
27 nextChar += 3;
28 }
29 else if(*nextChar == 0xed)
30 {
31 if(nextChar + 2 >= endChar
32 || nextChar[1] < 0xa0 || nextChar[1] > 0x9f
33 || nextChar[2] < 0x80 || nextChar[2] > 0xbf) { break; }
34 nextChar += 3;
35 }
36 else if(*nextChar >= 0xe1 && *nextChar <= 0xef)
37 {
38 if(nextChar + 2 >= endChar
39 || nextChar[1] < 0x80 || nextChar[1] > 0xbf
40 || nextChar[2] < 0x80 || nextChar[2] > 0xbf) { break; }
41 nextChar += 3;
42 }
43 else if(*nextChar == 0xf0)
44 {
45 if(nextChar + 3 >= endChar
46 || nextChar[1] < 0x90 || nextChar[1] > 0xbf
47 || nextChar[2] < 0x80 || nextChar[2] > 0xbf
48 || nextChar[3] < 0x80 || nextChar[3] > 0xbf) { break; }
49 nextChar += 4;
50 }
51 else if(*nextChar >= 0xf1 && *nextChar <= 0xf3)
52 {
53 if(nextChar + 3 >= endChar
54 || nextChar[1] < 0x80 || nextChar[1] > 0xbf
55 || nextChar[2] < 0x80 || nextChar[2] > 0xbf
56 || nextChar[3] < 0x80 || nextChar[3] > 0xbf) { break; }
57 nextChar += 4;
58 }
59 else if(*nextChar == 0xf4)
60 {
61 if(nextChar + 3 >= endChar
62 || nextChar[1] < 0x80 || nextChar[1] > 0x8f
63 || nextChar[2] < 0x80 || nextChar[2] > 0xbf
64 || nextChar[3] < 0x80 || nextChar[3] > 0xbf) { break; }
65 nextChar += 4;
66 }
67 else { break; }
68 }
69 return nextChar;
70 }
Here is the caller graph for this function: