28#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
29#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
58 template<
typename octet_type>
61 return static_cast<uint8_t>(0xff & oc);
63 template<
typename u16_type>
66 return static_cast<uint16_t>(0xffff & oc);
68 template<
typename octet_type>
74 template <
typename u16>
80 template <
typename u16>
86 template <
typename u16>
92 template <
typename u32>
98 template <
typename octet_iterator>
99 inline typename std::iterator_traits<octet_iterator>::difference_type
105 else if ((lead >> 5) == 0x6)
107 else if ((lead >> 4) == 0xe)
109 else if ((lead >> 3) == 0x1e)
115 template <
typename octet_difference_type>
122 else if (cp < 0x800) {
126 else if (cp < 0x10000) {
137 template <
typename octet_iterator>
149 #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}
152 template <
typename octet_iterator>
163 template <
typename octet_iterator>
173 code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
178 template <
typename octet_iterator>
192 code_point += (*it) & 0x3f;
197 template <
typename octet_iterator>
215 code_point += (*it) & 0x3f;
220 #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
222 template <
typename octet_iterator>
227 octet_iterator original_it = it;
231 typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
274 template <
typename octet_iterator>
287 template <
typename octet_iterator>
290 octet_iterator result = start;
291 while (result != end) {
299 template <
typename octet_iterator>
300 inline bool is_valid(octet_iterator start, octet_iterator end)
305 template <
typename octet_iterator>
316 template <
typename octet_iterator>
#define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END)
const uint32_t CODE_POINT_MAX
bool is_lead_surrogate(u16 cp)
bool is_trail_surrogate(u16 cp)
bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
bool is_surrogate(u16 cp)
const uint32_t SURROGATE_OFFSET
utf_error increase_safely(octet_iterator &it, octet_iterator end)
Helper for get_sequence_x.
const uint16_t TRAIL_SURROGATE_MAX
const uint16_t LEAD_OFFSET
utf_error get_sequence_1(octet_iterator &it, octet_iterator end, uint32_t &code_point)
get_sequence_x functions decode utf-8 sequences of the length x
const uint16_t TRAIL_SURROGATE_MIN
bool is_code_point_valid(u32 cp)
utf_error get_sequence_2(octet_iterator &it, octet_iterator end, uint32_t &code_point)
uint16_t mask16(u16_type oc)
bool is_trail(octet_type oc)
utf_error validate_next(octet_iterator &it, octet_iterator end, uint32_t &code_point)
utf_error get_sequence_3(octet_iterator &it, octet_iterator end, uint32_t &code_point)
const uint16_t LEAD_SURROGATE_MIN
utf_error get_sequence_4(octet_iterator &it, octet_iterator end, uint32_t &code_point)
std::iterator_traits< octet_iterator >::difference_type sequence_length(octet_iterator lead_it)
uint8_t mask8(octet_type oc)
const uint16_t LEAD_SURROGATE_MAX
bool starts_with_bom(octet_iterator it, octet_iterator end)
const uint8_t bom[]
The library API - functions intended to be called by the users.
bool is_valid(octet_iterator start, octet_iterator end)
octet_iterator find_invalid(octet_iterator start, octet_iterator end)
bool is_bom(octet_iterator it)