Wire Sysio Wire Sysion 1.0.0
Loading...
Searching...
No Matches
utf8.cpp
Go to the documentation of this file.
1#include "fc/utf8.hpp"
2
3#include "utf8/checked.h"
4#include "utf8/core.h"
5
6#include <fc/log/logger.hpp>
8
9namespace fc {
10
11 inline constexpr char hex_digits[] = "0123456789abcdef";
12
13 bool is_utf8( const std::string& str )
14 {
15 return utf8::is_valid( str.begin(), str.end() );
16 }
17
18 // tweaked utf8::find_invalid that also considers provided range as invalid
19 // @param invalid_range, indicates additional invalid values
20 // @return [iterator to found invalid char, the value found if in range of provided pair invalid_range otherwise UINT32_MAX]
21 template <typename octet_iterator>
22 std::pair<octet_iterator, uint32_t> find_invalid(octet_iterator start, octet_iterator end,
23 const std::pair<uint32_t, uint32_t>& invalid_range)
24 {
25 FC_ASSERT( invalid_range.first <= invalid_range.second );
26 octet_iterator result = start;
28 while( result != end ) {
29 octet_iterator itr = result;
31 if( err_code != utf8::internal::UTF8_OK )
32 return {result, UINT32_MAX};
33 if( value >= invalid_range.first && value <= invalid_range.second )
34 return {itr, value};
35 }
36 return {result, UINT32_MAX};
37 }
38
39
40 bool is_valid_utf8( const std::string_view& str ) {
41 const auto invalid_range = std::make_pair<uint32_t, uint32_t>(0x80, 0x9F);
42 auto [itr, v] = find_invalid( str.begin(), str.end(), invalid_range );
43 return itr == str.end();
44 }
45
46 // escape 0x80-0x9F C1 control characters
47 string prune_invalid_utf8( const std::string_view& str ) {
48 const auto invalid_range = std::make_pair<uint32_t, uint32_t>(0x80, 0x9F);
49 auto [itr, v] = find_invalid( str.begin(), str.end(), invalid_range );
50 if( itr == str.end() ) return std::string( str );
51
52 string result;
53 auto escape = [&result](uint32_t v) { // v is [0x80-0x9F]
54 result += "\\u00";
55 result += hex_digits[v >> 4u];
56 result += hex_digits[v & 15u];
57 };
58
59 result = string( str.begin(), itr );
60 if( v != UINT32_MAX ) escape(v);
61 while( itr != str.end() ) {
62 ++itr;
63 auto start = itr;
64 std::tie(itr, v) = find_invalid( start, str.end(), invalid_range );
65 result += string( start, itr );
66 if( v != UINT32_MAX ) escape(v);
67 }
68 return result;
69 }
70
71 void decodeUtf8(const std::string& input, std::wstring* storage)
72 {
73 FC_ASSERT(storage != nullptr);
74
75 utf8::utf8to32(input.begin(), input.end(), std::back_inserter(*storage));
76 }
77
78 void encodeUtf8(const std::wstring& input, std::string* storage)
79 {
80 FC_ASSERT(storage != nullptr);
81
82 utf8::utf32to8(input.begin(), input.end(), std::back_inserter(*storage));
83 }
84
85}
Defines exception's used by fc.
#define FC_ASSERT(TEST,...)
Checks a condition and throws an assert_exception if the test is FALSE.
namespace sysio::chain
Definition authority.cpp:3
std::string string
Definition string.hpp:10
void decodeUtf8(const std::string &input, std::wstring *storage)
Definition utf8.cpp:71
bool is_valid_utf8(const std::string_view &str)
Definition utf8.cpp:40
void encodeUtf8(const std::wstring &input, std::string *storage)
Definition utf8.cpp:78
bool is_utf8(const std::string &str)
Definition utf8.cpp:13
constexpr char hex_digits[]
Definition utf8.cpp:11
std::pair< octet_iterator, uint32_t > find_invalid(octet_iterator start, octet_iterator end, const std::pair< uint32_t, uint32_t > &invalid_range)
Definition utf8.cpp:22
std::string prune_invalid_utf8(const std::string_view &str)
Definition utf8.cpp:47
utf_error validate_next(octet_iterator &it, octet_iterator end, uint32_t &code_point)
Definition core.h:223
octet_iterator utf32to8(u32bit_iterator start, u32bit_iterator end, octet_iterator result)
Definition checked.h:249
u32bit_iterator utf8to32(octet_iterator start, octet_iterator end, u32bit_iterator result)
Definition checked.h:258
bool is_valid(octet_iterator start, octet_iterator end)
Definition core.h:300
#define value
Definition pkcs11.h:157
unsigned int uint32_t
Definition stdint.h:126
#define UINT32_MAX
Definition stdint.h:188