26static const unsigned kCodepointRanges[] = {
242#define UTF8_ACCEPT 0u
244static const unsigned char utf8d[] = {
247 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
248 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
249 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
250 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
251 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
252 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
253 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
254 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
258 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
259 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
260 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
261 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
262 12,36,12,12,12,12,12,12,12,12,12,12,
265static unsigned inline decode(
unsigned*
state,
unsigned* codep,
unsigned byte) {
266 unsigned type = utf8d[
byte];
269 (
byte & 0x3fu) | (*codep << 6) :
270 (0xffu >> type) & (
byte);
287 for (
const unsigned* range = kCodepointRanges; *range != 0xFFFFFFFF; range += 2) {
288 for (
unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) {
291 const char* encodedStr =
os.GetString();
295 unsigned decodedCodepoint = 0;
298 unsigned decodedCount = 0;
299 for (
const char*
s = encodedStr; *
s; ++
s)
300 if (!decode(&
state, &decodedCodepoint,
static_cast<unsigned char>(*
s))) {
311 std::cout << std::hex << codepoint <<
" " << decodedCodepoint << std::endl;
317 unsigned decodedCodepoint;
321 if (!result || codepoint != decodedCodepoint)
322 std::cout << std::hex << codepoint <<
" " << decodedCodepoint << std::endl;
340 for (
const unsigned* range = kCodepointRanges; *range != 0xFFFFFFFF; range += 2) {
341 for (
unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) {
354 unsigned decodedCodepoint = 0;
358 if (!decode(&
state, &decodedCodepoint,
static_cast<unsigned char>(*
s)))
362 if (codepoint <= 0xFFFF)
366 *
p++ =
static_cast<UTF16<>::Ch>(0xD7C0 + (decodedCodepoint >> 10));
367 *
p++ =
static_cast<UTF16<>::Ch>(0xDC00 + (decodedCodepoint & 0x3FF));
377 unsigned decodedCodepoint;
381 if (!result || codepoint != decodedCodepoint)
382 std::cout << std::hex << codepoint <<
" " << decodedCodepoint << std::endl;
399 for (
const unsigned* range = kCodepointRanges; *range != 0xFFFFFFFF; range += 2) {
400 for (
unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) {
408 unsigned decodedCodepoint;
412 if (!result || codepoint != decodedCodepoint)
413 std::cout << std::hex << codepoint <<
" " << decodedCodepoint << std::endl;
430 for (
unsigned codepoint = 0; codepoint < 128; codepoint++) {
436 unsigned decodedCodepoint;
438 if (!result || codepoint != decodedCodepoint)
439 std::cout << std::hex << codepoint <<
" " << decodedCodepoint << std::endl;
const Ch * GetString() const
#define EXPECT_EQ(val1, val2)
#define EXPECT_TRUE(condition)
#define TEST(test_case_name, test_name)
static const AddressFrame byte(8)
static bool Validate(InputStream &is, OutputStream &os)
static bool Decode(InputStream &is, unsigned *codepoint)
static void Encode(OutputStream &os, unsigned codepoint)
static bool Decode(InputStream &is, unsigned *codepoint)
static bool Validate(InputStream &is, OutputStream &os)
static void Encode(OutputStream &os, unsigned codepoint)
static void Encode(OutputStream &os, unsigned codepoint)
static bool Decode(InputStream &is, unsigned *codepoint)
static bool Validate(InputStream &is, OutputStream &os)
int StrCmp(const Ch *s1, const Ch *s2)