Wire Sysio Wire Sysion 1.0.0
Loading...
Searching...
No Matches
utf8 Namespace Reference

Namespaces

namespace  internal
 
namespace  unchecked
 

Classes

class  exception
 
class  invalid_code_point
 
class  invalid_utf16
 
class  invalid_utf8
 
class  iterator
 
class  not_enough_room
 

Functions

template<typename octet_iterator >
octet_iterator append (uint32_t cp, octet_iterator result)
 The library API - functions intended to be called by the users.
 
template<typename octet_iterator , typename output_iterator >
output_iterator replace_invalid (octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
 
template<typename octet_iterator , typename output_iterator >
output_iterator replace_invalid (octet_iterator start, octet_iterator end, output_iterator out)
 
template<typename octet_iterator >
uint32_t next (octet_iterator &it, octet_iterator end)
 
template<typename octet_iterator >
uint32_t peek_next (octet_iterator it, octet_iterator end)
 
template<typename octet_iterator >
uint32_t prior (octet_iterator &it, octet_iterator start)
 
template<typename octet_iterator >
uint32_t previous (octet_iterator &it, octet_iterator pass_start)
 Deprecated in versions that include "prior".
 
template<typename octet_iterator , typename distance_type >
void advance (octet_iterator &it, distance_type n, octet_iterator end)
 
template<typename octet_iterator >
std::iterator_traits< octet_iterator >::difference_type distance (octet_iterator first, octet_iterator last)
 
template<typename u16bit_iterator , typename octet_iterator >
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
 
template<typename u16bit_iterator , typename octet_iterator >
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
 
template<typename octet_iterator , typename u32bit_iterator >
octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
 
template<typename octet_iterator , typename u32bit_iterator >
u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
 
template<typename octet_iterator >
octet_iterator find_invalid (octet_iterator start, octet_iterator end)
 
template<typename octet_iterator >
bool is_valid (octet_iterator start, octet_iterator end)
 
template<typename octet_iterator >
bool starts_with_bom (octet_iterator it, octet_iterator end)
 
template<typename octet_iterator >
bool is_bom (octet_iterator it)
 

Variables

const uint8_t bom [] = {0xef, 0xbb, 0xbf}
 The library API - functions intended to be called by the users.
 

Function Documentation

◆ advance()

template<typename octet_iterator , typename distance_type >
void utf8::advance ( octet_iterator & it,
distance_type n,
octet_iterator end )

Definition at line 190 of file checked.h.

191 {
192 for (distance_type i = 0; i < n; ++i)
193 utf8::next(it, end);
194 }
uint32_t next(octet_iterator &it, octet_iterator end)
Definition checked.h:137
Here is the call graph for this function:

◆ append()

template<typename octet_iterator >
octet_iterator utf8::append ( uint32_t cp,
octet_iterator result )

Definition at line 73 of file checked.h.

74 {
76 throw invalid_code_point(cp);
77
78 if (cp < 0x80) // one octet
79 *(result++) = static_cast<uint8_t>(cp);
80 else if (cp < 0x800) { // two octets
81 *(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
82 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
83 }
84 else if (cp < 0x10000) { // three octets
85 *(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
86 *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
87 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
88 }
89 else { // four octets
90 *(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
91 *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80);
92 *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
93 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
94 }
95 return result;
96 }
bool is_code_point_valid(u32 cp)
Definition core.h:93
unsigned char uint8_t
Definition stdint.h:124
Here is the call graph for this function:
Here is the caller graph for this function:

◆ distance()

template<typename octet_iterator >
std::iterator_traits< octet_iterator >::difference_type utf8::distance ( octet_iterator first,
octet_iterator last )

Definition at line 198 of file checked.h.

199 {
200 typename std::iterator_traits<octet_iterator>::difference_type dist;
201 for (dist = 0; first < last; ++dist)
202 utf8::next(first, last);
203 return dist;
204 }
Here is the call graph for this function:

◆ find_invalid()

template<typename octet_iterator >
octet_iterator utf8::find_invalid ( octet_iterator start,
octet_iterator end )

Definition at line 288 of file core.h.

289 {
290 octet_iterator result = start;
291 while (result != end) {
293 if (err_code != internal::UTF8_OK)
294 return result;
295 }
296 return result;
297 }
utf_error validate_next(octet_iterator &it, octet_iterator end, uint32_t &code_point)
Definition core.h:223
Here is the call graph for this function:
Here is the caller graph for this function:

◆ is_bom()

template<typename octet_iterator >
bool utf8::is_bom ( octet_iterator it)
inline

Definition at line 317 of file core.h.

318 {
319 return (
320 (utf8::internal::mask8(*it++)) == bom[0] &&
321 (utf8::internal::mask8(*it++)) == bom[1] &&
322 (utf8::internal::mask8(*it)) == bom[2]
323 );
324 }
uint8_t mask8(octet_type oc)
Definition core.h:59
Here is the call graph for this function:

◆ is_valid()

template<typename octet_iterator >
bool utf8::is_valid ( octet_iterator start,
octet_iterator end )
inline

Definition at line 300 of file core.h.

301 {
302 return (utf8::find_invalid(start, end) == end);
303 }
octet_iterator find_invalid(octet_iterator start, octet_iterator end)
Definition core.h:288
Here is the call graph for this function:
Here is the caller graph for this function:

◆ next()

template<typename octet_iterator >
uint32_t utf8::next ( octet_iterator & it,
octet_iterator end )

Definition at line 137 of file checked.h.

138 {
139 uint32_t cp = 0;
141 switch (err_code) {
142 case internal::UTF8_OK :
143 break;
144 case internal::NOT_ENOUGH_ROOM :
145 throw not_enough_room();
146 case internal::INVALID_LEAD :
147 case internal::INCOMPLETE_SEQUENCE :
148 case internal::OVERLONG_SEQUENCE :
149 throw invalid_utf8(*it);
150 case internal::INVALID_CODE_POINT :
151 throw invalid_code_point(cp);
152 }
153 return cp;
154 }
unsigned int uint32_t
Definition stdint.h:126
Here is the call graph for this function:
Here is the caller graph for this function:

◆ peek_next()

template<typename octet_iterator >
uint32_t utf8::peek_next ( octet_iterator it,
octet_iterator end )

Definition at line 157 of file checked.h.

158 {
159 return utf8::next(it, end);
160 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ previous()

template<typename octet_iterator >
uint32_t utf8::previous ( octet_iterator & it,
octet_iterator pass_start )

Definition at line 179 of file checked.h.

180 {
181 octet_iterator end = it;
182 while (utf8::internal::is_trail(*(--it)))
183 if (it == pass_start)
184 throw invalid_utf8(*it); // error - no lead byte in the sequence
185 octet_iterator temp = it;
186 return utf8::next(temp, end);
187 }
bool is_trail(octet_type oc)
Definition core.h:69
Here is the call graph for this function:

◆ prior()

template<typename octet_iterator >
uint32_t utf8::prior ( octet_iterator & it,
octet_iterator start )

Definition at line 163 of file checked.h.

164 {
165 // can't do much if it == start
166 if (it == start)
167 throw not_enough_room();
168
169 octet_iterator end = it;
170 // Go back until we hit either a lead octet or start
171 while (utf8::internal::is_trail(*(--it)))
172 if (it == start)
173 throw invalid_utf8(*it); // error - no lead byte in the sequence
174 return utf8::peek_next(it, end);
175 }
uint32_t peek_next(octet_iterator it, octet_iterator end)
Definition checked.h:157
Here is the call graph for this function:
Here is the caller graph for this function:

◆ replace_invalid() [1/2]

template<typename octet_iterator , typename output_iterator >
output_iterator utf8::replace_invalid ( octet_iterator start,
octet_iterator end,
output_iterator out )
inline

Definition at line 130 of file checked.h.

131 {
132 static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
133 return utf8::replace_invalid(start, end, out, replacement_marker);
134 }
uint16_t mask16(u16_type oc)
Definition core.h:64
output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
Definition checked.h:99
Here is the call graph for this function:

◆ replace_invalid() [2/2]

template<typename octet_iterator , typename output_iterator >
output_iterator utf8::replace_invalid ( octet_iterator start,
octet_iterator end,
output_iterator out,
uint32_t replacement )

Definition at line 99 of file checked.h.

100 {
101 while (start != end) {
102 octet_iterator sequence_start = start;
104 switch (err_code) {
105 case internal::UTF8_OK :
106 for (octet_iterator it = sequence_start; it != start; ++it)
107 *out++ = *it;
108 break;
109 case internal::NOT_ENOUGH_ROOM:
110 throw not_enough_room();
111 case internal::INVALID_LEAD:
112 out = utf8::append (replacement, out);
113 ++start;
114 break;
115 case internal::INCOMPLETE_SEQUENCE:
116 case internal::OVERLONG_SEQUENCE:
117 case internal::INVALID_CODE_POINT:
118 out = utf8::append (replacement, out);
119 ++start;
120 // just one replacement mark for the sequence
121 while (start != end && utf8::internal::is_trail(*start))
122 ++start;
123 break;
124 }
125 }
126 return out;
127 }
octet_iterator append(uint32_t cp, octet_iterator result)
The library API - functions intended to be called by the users.
Definition checked.h:73
Here is the call graph for this function:
Here is the caller graph for this function:

◆ starts_with_bom()

template<typename octet_iterator >
bool utf8::starts_with_bom ( octet_iterator it,
octet_iterator end )
inline

Definition at line 306 of file core.h.

307 {
308 return (
309 ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
310 ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
311 ((it != end) && (utf8::internal::mask8(*it)) == bom[2])
312 );
313 }
Here is the call graph for this function:

◆ utf16to8()

template<typename u16bit_iterator , typename octet_iterator >
octet_iterator utf8::utf16to8 ( u16bit_iterator start,
u16bit_iterator end,
octet_iterator result )

Definition at line 207 of file checked.h.

208 {
209 while (start != end) {
210 uint32_t cp = utf8::internal::mask16(*start++);
211 // Take care of surrogate pairs first
213 if (start != end) {
214 uint32_t trail_surrogate = utf8::internal::mask16(*start++);
215 if (utf8::internal::is_trail_surrogate(trail_surrogate))
216 cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
217 else
218 throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
219 }
220 else
221 throw invalid_utf16(static_cast<uint16_t>(cp));
222
223 }
224 // Lone trail surrogate
226 throw invalid_utf16(static_cast<uint16_t>(cp));
227
228 result = utf8::append(cp, result);
229 }
230 return result;
231 }
bool is_lead_surrogate(u16 cp)
Definition core.h:75
bool is_trail_surrogate(u16 cp)
Definition core.h:81
unsigned short uint16_t
Definition stdint.h:125
Here is the call graph for this function:

◆ utf32to8()

template<typename octet_iterator , typename u32bit_iterator >
octet_iterator utf8::utf32to8 ( u32bit_iterator start,
u32bit_iterator end,
octet_iterator result )

Definition at line 249 of file checked.h.

250 {
251 while (start != end)
252 result = utf8::append(*(start++), result);
253
254 return result;
255 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ utf8to16()

template<typename u16bit_iterator , typename octet_iterator >
u16bit_iterator utf8::utf8to16 ( octet_iterator start,
octet_iterator end,
u16bit_iterator result )

Definition at line 234 of file checked.h.

235 {
236 while (start != end) {
237 uint32_t cp = utf8::next(start, end);
238 if (cp > 0xffff) { //make a surrogate pair
239 *result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
240 *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
241 }
242 else
243 *result++ = static_cast<uint16_t>(cp);
244 }
245 return result;
246 }
Here is the call graph for this function:

◆ utf8to32()

template<typename octet_iterator , typename u32bit_iterator >
u32bit_iterator utf8::utf8to32 ( octet_iterator start,
octet_iterator end,
u32bit_iterator result )

Definition at line 258 of file checked.h.

259 {
260 while (start != end)
261 (*result++) = utf8::next(start, end);
262
263 return result;
264 }
Here is the call graph for this function:
Here is the caller graph for this function:

Variable Documentation

◆ bom

const uint8_t utf8::bom[] = {0xef, 0xbb, 0xbf}

Definition at line 285 of file core.h.

285{0xef, 0xbb, 0xbf};