Wire Sysio Wire Sysion 1.0.0
Loading...
Searching...
No Matches
Catch::XmlEncode Class Reference

#include <catch_xmlwriter.h>

Public Types

enum  ForWhat { ForTextNodes , ForAttributes }
 

Public Member Functions

 XmlEncode (std::string const &str, ForWhat forWhat=ForTextNodes)
 
void encodeTo (std::ostream &os) const
 

Friends

std::ostream & operator<< (std::ostream &os, XmlEncode const &xmlEncode)
 

Detailed Description

Definition at line 18 of file catch_xmlwriter.h.

Member Enumeration Documentation

◆ ForWhat

Enumerator
ForTextNodes 
ForAttributes 

Definition at line 20 of file catch_xmlwriter.h.

Constructor & Destructor Documentation

◆ XmlEncode()

Catch::XmlEncode::XmlEncode ( std::string const & str,
ForWhat forWhat = ForTextNodes )

Definition at line 56 of file catch_xmlwriter.cpp.

57 : m_str( str ),
58 m_forWhat( forWhat )
59 {}

Member Function Documentation

◆ encodeTo()

void Catch::XmlEncode::encodeTo ( std::ostream & os) const

Definition at line 61 of file catch_xmlwriter.cpp.

61 {
62 // Apostrophe escaping not necessary if we always use " to write attributes
63 // (see: http://www.w3.org/TR/xml/#syntax)
64
65 for( std::size_t idx = 0; idx < m_str.size(); ++ idx ) {
66 uchar c = m_str[idx];
67 switch (c) {
68 case '<': os << "&lt;"; break;
69 case '&': os << "&amp;"; break;
70
71 case '>':
72 // See: http://www.w3.org/TR/xml/#syntax
73 if (idx > 2 && m_str[idx - 1] == ']' && m_str[idx - 2] == ']')
74 os << "&gt;";
75 else
76 os << c;
77 break;
78
79 case '\"':
80 if (m_forWhat == ForAttributes)
81 os << "&quot;";
82 else
83 os << c;
84 break;
85
86 default:
87 // Check for control characters and invalid utf-8
88
89 // Escape control characters in standard ascii
90 // see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0
91 if (c < 0x09 || (c > 0x0D && c < 0x20) || c == 0x7F) {
92 hexEscapeChar(os, c);
93 break;
94 }
95
96 // Plain ASCII: Write it to stream
97 if (c < 0x7F) {
98 os << c;
99 break;
100 }
101
102 // UTF-8 territory
103 // Check if the encoding is valid and if it is not, hex escape bytes.
104 // Important: We do not check the exact decoded values for validity, only the encoding format
105 // First check that this bytes is a valid lead byte:
106 // This means that it is not encoded as 1111 1XXX
107 // Or as 10XX XXXX
108 if (c < 0xC0 ||
109 c >= 0xF8) {
110 hexEscapeChar(os, c);
111 break;
112 }
113
114 auto encBytes = trailingBytes(c);
115 // Are there enough bytes left to avoid accessing out-of-bounds memory?
116 if (idx + encBytes - 1 >= m_str.size()) {
117 hexEscapeChar(os, c);
118 break;
119 }
120 // The header is valid, check data
121 // The next encBytes bytes must together be a valid utf-8
122 // This means: bitpattern 10XX XXXX and the extracted value is sane (ish)
123 bool valid = true;
124 uint32_t value = headerValue(c);
125 for (std::size_t n = 1; n < encBytes; ++n) {
126 uchar nc = m_str[idx + n];
127 valid &= ((nc & 0xC0) == 0x80);
128 value = (value << 6) | (nc & 0x3F);
129 }
130
131 if (
132 // Wrong bit pattern of following bytes
133 (!valid) ||
134 // Overlong encodings
135 (value < 0x80) ||
136 (0x80 <= value && value < 0x800 && encBytes > 2) ||
137 (0x800 < value && value < 0x10000 && encBytes > 3) ||
138 // Encoded value out of range
139 (value >= 0x110000)
140 ) {
141 hexEscapeChar(os, c);
142 break;
143 }
144
145 // If we got here, this is in fact a valid(ish) utf-8 sequence
146 for (std::size_t n = 0; n < encBytes; ++n) {
147 os << m_str[idx + n];
148 }
149 idx += encBytes - 1;
150 break;
151 }
152 }
153 }
os_t os
unsigned char uchar
#define value
Definition pkcs11.h:157
unsigned int uint32_t
Definition stdint.h:126

Friends And Related Symbol Documentation

◆ operator<<

std::ostream & operator<< ( std::ostream & os,
XmlEncode const & xmlEncode )
friend

Definition at line 155 of file catch_xmlwriter.cpp.

155 {
156 xmlEncode.encodeTo( os );
157 return os;
158 }

The documentation for this class was generated from the following files: