Wire Sysio Wire Sysion 1.0.0
Loading...
Searching...
No Matches
encodedstream.h
Go to the documentation of this file.
1// Tencent is pleased to support the open source community by making RapidJSON available.
2//
3// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4//
5// Licensed under the MIT License (the "License"); you may not use this file except
6// in compliance with the License. You may obtain a copy of the License at
7//
8// http://opensource.org/licenses/MIT
9//
10// Unless required by applicable law or agreed to in writing, software distributed
11// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13// specific language governing permissions and limitations under the License.
14
15#ifndef RAPIDJSON_ENCODEDSTREAM_H_
16#define RAPIDJSON_ENCODEDSTREAM_H_
17
18#include "stream.h"
19#include "memorystream.h"
20
21#ifdef __GNUC__
22RAPIDJSON_DIAG_PUSH
23RAPIDJSON_DIAG_OFF(effc++)
24#endif
25
26#ifdef __clang__
27RAPIDJSON_DIAG_PUSH
28RAPIDJSON_DIAG_OFF(padded)
29#endif
30
32
34
38template <typename Encoding, typename InputByteStream>
40 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
41public:
42 typedef typename Encoding::Ch Ch;
43
44 EncodedInputStream(InputByteStream& is) : is_(is) {
45 current_ = Encoding::TakeBOM(is_);
46 }
47
48 Ch Peek() const { return current_; }
49 Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; }
50 size_t Tell() const { return is_.Tell(); }
51
52 // Not implemented
53 void Put(Ch) { RAPIDJSON_ASSERT(false); }
54 void Flush() { RAPIDJSON_ASSERT(false); }
55 Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
56 size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
57
58private:
60 EncodedInputStream& operator=(const EncodedInputStream&);
61
62 InputByteStream& is_;
63 Ch current_;
64};
65
67template <>
69public:
70 typedef UTF8<>::Ch Ch;
71
73 if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take();
74 if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take();
75 if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take();
76 }
77 Ch Peek() const { return is_.Peek(); }
78 Ch Take() { return is_.Take(); }
79 size_t Tell() const { return is_.Tell(); }
80
81 // Not implemented
82 void Put(Ch) {}
83 void Flush() {}
84 Ch* PutBegin() { return 0; }
85 size_t PutEnd(Ch*) { return 0; }
86
88
89private:
91 EncodedInputStream& operator=(const EncodedInputStream&);
92};
93
95
99template <typename Encoding, typename OutputByteStream>
101 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
102public:
103 typedef typename Encoding::Ch Ch;
104
105 EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) {
106 if (putBOM)
107 Encoding::PutBOM(os_);
108 }
109
110 void Put(Ch c) { Encoding::Put(os_, c); }
111 void Flush() { os_.Flush(); }
112
113 // Not implemented
114 Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
115 Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
116 size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
117 Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
118 size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
119
120private:
122 EncodedOutputStream& operator=(const EncodedOutputStream&);
123
124 OutputByteStream& os_;
125};
126
127#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
128
130
134template <typename CharType, typename InputByteStream>
136 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
137public:
138 typedef CharType Ch;
139
141
145 AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) {
146 RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
147 DetectType();
148 static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) };
149 takeFunc_ = f[type_];
150 current_ = takeFunc_(*is_);
151 }
152
153 UTFType GetType() const { return type_; }
154 bool HasBOM() const { return hasBOM_; }
155
156 Ch Peek() const { return current_; }
157 Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; }
158 size_t Tell() const { return is_->Tell(); }
159
160 // Not implemented
161 void Put(Ch) { RAPIDJSON_ASSERT(false); }
162 void Flush() { RAPIDJSON_ASSERT(false); }
163 Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
164 size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
165
166private:
168 AutoUTFInputStream& operator=(const AutoUTFInputStream&);
169
170 // Detect encoding type with BOM or RFC 4627
171 void DetectType() {
172 // BOM (Byte Order Mark):
173 // 00 00 FE FF UTF-32BE
174 // FF FE 00 00 UTF-32LE
175 // FE FF UTF-16BE
176 // FF FE UTF-16LE
177 // EF BB BF UTF-8
178
179 const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4());
180 if (!c)
181 return;
182
183 unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24));
184 hasBOM_ = false;
185 if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
186 else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
187 else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); }
188 else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); }
189 else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); }
190
191 // RFC 4627: Section 3
192 // "Since the first two characters of a JSON text will always be ASCII
193 // characters [RFC0020], it is possible to determine whether an octet
194 // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
195 // at the pattern of nulls in the first four octets."
196 // 00 00 00 xx UTF-32BE
197 // 00 xx 00 xx UTF-16BE
198 // xx 00 00 00 UTF-32LE
199 // xx 00 xx 00 UTF-16LE
200 // xx xx xx xx UTF-8
201
202 if (!hasBOM_) {
203 int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
204 switch (pattern) {
205 case 0x08: type_ = kUTF32BE; break;
206 case 0x0A: type_ = kUTF16BE; break;
207 case 0x01: type_ = kUTF32LE; break;
208 case 0x05: type_ = kUTF16LE; break;
209 case 0x0F: type_ = kUTF8; break;
210 default: break; // Use type defined by user.
211 }
212 }
213
214 // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
215 if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
216 if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
217 }
218
219 typedef Ch (*TakeFunc)(InputByteStream& is);
220 InputByteStream* is_;
221 UTFType type_;
222 Ch current_;
223 TakeFunc takeFunc_;
224 bool hasBOM_;
225};
226
228
232template <typename CharType, typename OutputByteStream>
234 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
235public:
236 typedef CharType Ch;
237
239
244 AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) {
245 RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
246
247 // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
248 if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
249 if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
250
251 static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) };
252 putFunc_ = f[type_];
253
254 if (putBOM)
255 PutBOM();
256 }
257
258 UTFType GetType() const { return type_; }
259
260 void Put(Ch c) { putFunc_(*os_, c); }
261 void Flush() { os_->Flush(); }
262
263 // Not implemented
264 Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
265 Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
266 size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
267 Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
268 size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
269
270private:
272 AutoUTFOutputStream& operator=(const AutoUTFOutputStream&);
273
274 void PutBOM() {
275 typedef void (*PutBOMFunc)(OutputByteStream&);
276 static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) };
277 f[type_](*os_);
278 }
279
280 typedef void (*PutFunc)(OutputByteStream&, Ch);
281
282 OutputByteStream* os_;
283 UTFType type_;
284 PutFunc putFunc_;
285};
286
287#undef RAPIDJSON_ENCODINGS_FUNC
288
290
291#ifdef __clang__
292RAPIDJSON_DIAG_POP
293#endif
294
295#ifdef __GNUC__
296RAPIDJSON_DIAG_POP
297#endif
298
299#endif // RAPIDJSON_FILESTREAM_H_
Input stream wrapper with dynamically bound encoding and automatic encoding detection.
size_t PutEnd(Ch *)
size_t Tell() const
AutoUTFInputStream(InputByteStream &is, UTFType type=kUTF8)
Constructor.
bool HasBOM() const
UTFType GetType() const
Output stream wrapper with dynamically bound encoding and automatic encoding detection.
AutoUTFOutputStream(OutputByteStream &os, UTFType type, bool putBOM)
Constructor.
UTFType GetType() const
size_t Tell() const
Input byte stream wrapper with a statically bound encoding.
EncodedInputStream(InputByteStream &is)
size_t PutEnd(Ch *)
size_t Tell() const
Output byte stream wrapper with statically bound encoding.
size_t Tell() const
EncodedOutputStream(OutputByteStream &os, bool putBOM=true)
#define RAPIDJSON_ENCODINGS_FUNC(x)
UTFType
Runtime-specified UTF encoding type of a stream.
Definition encodings.h:603
@ kUTF32BE
UTF-32 big endian.
Definition encodings.h:608
@ kUTF16BE
UTF-16 big endian.
Definition encodings.h:606
@ kUTF8
UTF-8.
Definition encodings.h:604
@ kUTF32LE
UTF-32 little endian.
Definition encodings.h:607
@ kUTF16LE
UTF-16 little endian.
Definition encodings.h:605
os_t os
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition rapidjson.h:406
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition rapidjson.h:121
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition rapidjson.h:124
Definition UTF8.h:7
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition rapidjson.h:445
Represents an in-memory input byte stream.
UTF-8 encoding.
Definition encodings.h:96
if(ppFunctionList==NULL)