Wire Sysio Wire Sysion 1.0.0
Loading...
Searching...
No Matches
NFA Namespace Reference

Classes

struct  Builder
 
struct  DFAState
 
struct  Machine
 
struct  NFAState
 
struct  StateTransitionsByChar
 

Typedefs

typedef std::vector< StateIndexStateSet
 
typedef DenseStaticIntSet< U8, 256 > CharSet
 
typedef I16 StateIndex
 

Enumerations

enum  { edgeDoesntConsumeInputFlag = (StateIndex)0x4000 , unmatchedCharacterTerminal = (StateIndex)0x8000 , maximumTerminalStateIndex = (StateIndex)0xbfff }
 

Functions

template<typename Element >
void addUnique (std::vector< Element > &vector, const Element &element)
 
BuildercreateBuilder ()
 
StateIndex addState (Builder *builder)
 
void addEdge (Builder *builder, StateIndex initialState, const CharSet &predicate, StateIndex nextState)
 
void addEpsilonEdge (Builder *builder, StateIndex initialState, StateIndex nextState)
 
StateIndex getNonTerminalEdge (Builder *builder, StateIndex initialState, char c)
 
std::vector< DFAStateconvertToDFA (Builder *builder)
 
char nibbleToHexChar (U8 value)
 
std::string escapeString (const std::string &string)
 
std::string getGraphEdgeCharLabel (Uptr charIndex)
 
std::string getGraphEdgeLabel (const CharSet &charSet)
 
std::string dumpNFAGraphViz (const Builder *builder)
 

Typedef Documentation

◆ CharSet

Definition at line 11 of file NFA.h.

◆ StateIndex

Definition at line 14 of file NFA.h.

◆ StateSet

typedef std::vector<StateIndex> NFA::StateSet

Definition at line 14 of file NFA.cpp.

Enumeration Type Documentation

◆ anonymous enum

anonymous enum
Enumerator
edgeDoesntConsumeInputFlag 
unmatchedCharacterTerminal 
maximumTerminalStateIndex 

Definition at line 16 of file NFA.h.

17 {
18 edgeDoesntConsumeInputFlag = (StateIndex)0x4000, // A flag that's set on terminal DFA state transitions that don't consume any input
19 unmatchedCharacterTerminal = (StateIndex)0x8000, // An implicit terminal state that indicates the DFA didn't recognize the input
20 maximumTerminalStateIndex = (StateIndex)0xbfff, // Should be the largest negative number that doesn't have edgeDoesntConsumeInputFlag set.
21 };
@ maximumTerminalStateIndex
Definition NFA.h:20
@ edgeDoesntConsumeInputFlag
Definition NFA.h:18
I16 StateIndex
Definition NFA.h:14

Function Documentation

◆ addEdge()

void NFA::addEdge ( Builder * builder,
StateIndex initialState,
const CharSet & predicate,
StateIndex nextState )

Definition at line 54 of file NFA.cpp.

55 {
56 CharSet& transitionPredicate = builder->nfaStates[initialState].nextStateToPredicateMap[nextState];
57 transitionPredicate = transitionPredicate | predicate;
58 }
std::vector< NFAState > nfaStates
Definition NFA.cpp:37

◆ addEpsilonEdge()

void NFA::addEpsilonEdge ( Builder * builder,
StateIndex initialState,
StateIndex nextState )

Definition at line 60 of file NFA.cpp.

61 {
62 addUnique(builder->nfaStates[initialState].epsilonNextStates,nextState);
63 }
void addUnique(std::vector< Element > &vector, const Element &element)
Definition NFA.cpp:17
Here is the call graph for this function:

◆ addState()

StateIndex NFA::addState ( Builder * builder)

Definition at line 47 of file NFA.cpp.

48 {
49 WAVM_ASSERT_THROW(builder->nfaStates.size() < INT16_MAX);
50 builder->nfaStates.emplace_back();
51 return StateIndex(builder->nfaStates.size() - 1);
52 }
#define WAVM_ASSERT_THROW(cond)
Definition Errors.h:29
#define INT16_MAX
Definition stdint.h:181
Here is the caller graph for this function:

◆ addUnique()

template<typename Element >
void NFA::addUnique ( std::vector< Element > & vector,
const Element & element )

Definition at line 17 of file NFA.cpp.

18 {
19 for(const auto& existingElement : vector)
20 {
21 if(existingElement == element)
22 {
23 return;
24 }
25 }
26 vector.push_back(element);
27 }
Here is the caller graph for this function:

◆ convertToDFA()

std::vector< DFAState > NFA::convertToDFA ( Builder * builder)

Definition at line 90 of file NFA.cpp.

91 {
92 Timing::Timer timer;
93
94 std::vector<DFAState> dfaStates;
95 std::map<StateSet,StateIndex> nfaStateSetToDFAStateMap;
96 std::vector<StateSet> dfaStateToNFAStateSetMap;
97 std::vector<StateIndex> pendingDFAStates;
98
99 nfaStateSetToDFAStateMap.emplace(StateSet {0},(StateIndex)0);
100 dfaStateToNFAStateSetMap.emplace_back(StateSet {0});
101 dfaStates.emplace_back();
102 pendingDFAStates.push_back((StateIndex)0);
103
104 Uptr maxLocalStates = 0;
105 Uptr maxDFANextStates = 0;
106
107 while(pendingDFAStates.size())
108 {
109 const StateIndex currentDFAStateIndex = pendingDFAStates.back();
110 pendingDFAStates.pop_back();
111
112 const StateSet currentStateSet = dfaStateToNFAStateSetMap[currentDFAStateIndex];
113
114 // Expand the set of current states to include all states reachable by epsilon transitions from the current states.
115 StateSet epsilonClosureCurrentStateSet = currentStateSet;
116 for(Uptr scanIndex = 0;scanIndex < epsilonClosureCurrentStateSet.size();++scanIndex)
117 {
118 StateIndex scanState = epsilonClosureCurrentStateSet[scanIndex];
119 if(scanState >= 0)
120 {
121 for(auto epsilonNextState : builder->nfaStates[scanState].epsilonNextStates)
122 {
123 addUnique(epsilonClosureCurrentStateSet,epsilonNextState);
124 }
125 }
126 }
127
128 // Find the subset of the non-terminal states in the current set.
129 StateSet nonTerminalCurrentStateSet;
131 bool hasCurrentTerminalState = false;
132 for(auto stateIndex : epsilonClosureCurrentStateSet)
133 {
134 if(stateIndex >= 0)
135 {
136 addUnique(nonTerminalCurrentStateSet,stateIndex);
137 }
138 else
139 {
140 if(hasCurrentTerminalState)
141 {
142 Errors::fatalf("NFA has multiple possible terminal states for the same input");
143 }
144 hasCurrentTerminalState = true;
145 currentTerminalState = stateIndex | edgeDoesntConsumeInputFlag;
146 }
147 }
148
149 // Build a compact index of the states following all states in the current set.
150 std::map<StateIndex,StateIndex> stateIndexToLocalStateIndexMap;
151 std::vector<StateIndex> localStateIndexToStateIndexMap;
152 for(auto stateIndex : nonTerminalCurrentStateSet)
153 {
154 const NFAState& nfaState = builder->nfaStates[stateIndex];
155 for(auto transition : nfaState.nextStateToPredicateMap)
156 {
157 if(!stateIndexToLocalStateIndexMap.count(transition.first))
158 {
159 stateIndexToLocalStateIndexMap.emplace(transition.first,(StateIndex)localStateIndexToStateIndexMap.size());
160 localStateIndexToStateIndexMap.emplace_back(transition.first);
161 }
162 }
163 }
164
165 if(!stateIndexToLocalStateIndexMap.count(currentTerminalState))
166 {
167 stateIndexToLocalStateIndexMap.emplace(currentTerminalState,(StateIndex)localStateIndexToStateIndexMap.size());
168 localStateIndexToStateIndexMap.emplace_back(currentTerminalState);
169 }
170
171 enum { numSupportedLocalStates = 64 };
173
174 const Uptr numLocalStates = stateIndexToLocalStateIndexMap.size();
175 WAVM_ASSERT_THROW(numLocalStates <= numSupportedLocalStates);
176 maxLocalStates = std::max<Uptr>(maxLocalStates,numLocalStates);
177
178 // Combine the [nextState][char] transition maps for current states and transpose to [char][nextState]
179 // After building the compact index of referenced states, the nextState set can be represented as a 64-bit mask.
180 LocalStateSet charToLocalStateSet[256];
181 for(auto stateIndex : nonTerminalCurrentStateSet)
182 {
183 const NFAState& nfaState = builder->nfaStates[stateIndex];
184 for(auto transition : nfaState.nextStateToPredicateMap)
185 {
186 for(Uptr charIndex = 0;charIndex < 256;++charIndex)
187 {
188 if(transition.second.contains((char)charIndex))
189 {
190 charToLocalStateSet[charIndex].add(stateIndexToLocalStateIndexMap.at(transition.first));
191 }
192 }
193 }
194 }
195
196 const LocalStateSet currentTerminalStateLocalSet(stateIndexToLocalStateIndexMap.at(currentTerminalState));
197 for(Uptr charIndex = 0;charIndex < 256;++charIndex)
198 {
199 if(charToLocalStateSet[charIndex].isEmpty())
200 {
201 charToLocalStateSet[charIndex] = currentTerminalStateLocalSet;
202 }
203 }
204
205 // Find the set of unique local state sets that follow this state set.
206 std::vector<LocalStateSet> uniqueLocalNextStateSets;
207 for(Uptr charIndex = 0;charIndex < 256;++charIndex)
208 {
209 const LocalStateSet localStateSet = charToLocalStateSet[charIndex];
210 if(!localStateSet.isEmpty())
211 {
212 addUnique(uniqueLocalNextStateSets,localStateSet);
213 }
214 }
215
216 // For each unique local state set that follows this state set, find or create a corresponding DFA state.
217 std::map<LocalStateSet,StateIndex> localStateSetToDFAStateIndexMap;
218 for(auto localNextStateSet : uniqueLocalNextStateSets)
219 {
220 // Convert the local state set bit mask to a global NFA state set.
221 StateSet nextStateSet;
222 {
223 LocalStateSet residualLocalStateSet = localNextStateSet;
224 while(true)
225 {
226 const StateIndex localStateIndex = residualLocalStateSet.getSmallestMember();
227 if(localStateIndex == numSupportedLocalStates) { break; }
228
229 nextStateSet.push_back(localStateIndexToStateIndexMap.at(localStateIndex));
230 residualLocalStateSet.remove(localStateIndex);
231 };
232 }
233
234 if(nextStateSet.size() == 1 && *nextStateSet.begin() < 0)
235 {
236 localStateSetToDFAStateIndexMap.insert(std::make_pair(localNextStateSet,*nextStateSet.begin()));
237 }
238 else
239 {
240 // Find an existing DFA state corresponding to this NFA state set.
241 auto nextDFAStateIt = nfaStateSetToDFAStateMap.find(nextStateSet);
242 StateIndex nextDFAStateIndex;
243 if(nextDFAStateIt != nfaStateSetToDFAStateMap.end())
244 {
245 localStateSetToDFAStateIndexMap.emplace(localNextStateSet,nextDFAStateIt->second);
246 }
247 else
248 {
249 // If no corresponding DFA state existing yet, create a new one and add it to the queue
250 // of pending states to process.
251 nextDFAStateIndex = (StateIndex)dfaStates.size();
252 localStateSetToDFAStateIndexMap.emplace(localNextStateSet,nextDFAStateIndex);
253 nfaStateSetToDFAStateMap.insert(std::make_pair(nextStateSet,nextDFAStateIndex));
254 dfaStateToNFAStateSetMap.emplace_back(nextStateSet);
255 dfaStates.emplace_back();
256 pendingDFAStates.push_back(nextDFAStateIndex);
257 }
258 }
259 }
260
261 // Set up the DFA transition map.
262 DFAState& dfaState = dfaStates[nfaStateSetToDFAStateMap[currentStateSet]];
263 for(auto localStateSetToDFAStateIndex : localStateSetToDFAStateIndexMap)
264 {
265 const LocalStateSet localStateSet = localStateSetToDFAStateIndex.first;
266 const StateIndex nextDFAStateIndex = localStateSetToDFAStateIndex.second;
267 for(Uptr charIndex = 0;charIndex < 256;++charIndex)
268 {
269 if(charToLocalStateSet[charIndex] == localStateSet)
270 {
271 dfaState.nextStateByChar[charIndex] = nextDFAStateIndex;
272 }
273 }
274 }
275 maxDFANextStates = std::max(maxDFANextStates,(Uptr)uniqueLocalNextStateSets.size());
276 };
277
278 Timing::logTimer("translated NFA->DFA",timer);
279 Log::printf(Log::Category::metrics," translated NFA with %u states to DFA with %u states\n",builder->nfaStates.size(),dfaStates.size());
280 Log::printf(Log::Category::metrics," maximum number of states following a NFA state set: %u\n",maxLocalStates);
281 Log::printf(Log::Category::metrics," maximum number of states following a DFA state: %u\n",maxDFANextStates);
282
283 return dfaStates;
284 }
PointerIntHelper< sizeof(size_t)>::UnsignedIntType Uptr
Definition BasicTypes.h:22
void fatalf(const char *messageFormat,...)
Definition Errors.h:12
LOGGING_API void printf(Category category, const char *format,...)
Definition Logging.cpp:30
std::vector< StateIndex > StateSet
Definition NFA.cpp:14
@ unmatchedCharacterTerminal
Definition NFA.h:19
void logTimer(const char *context, Timer &timer)
Definition Timing.h:28
Here is the call graph for this function:
Here is the caller graph for this function:

◆ createBuilder()

Builder * NFA::createBuilder ( )

Definition at line 40 of file NFA.cpp.

41 {
42 auto builder = new Builder();
43 addState(builder);
44 return builder;
45 }
StateIndex addState(Builder *builder)
Definition NFA.cpp:47
Here is the call graph for this function:
Here is the caller graph for this function:

◆ dumpNFAGraphViz()

std::string NFA::dumpNFAGraphViz ( const Builder * builder)

Definition at line 467 of file NFA.cpp.

468 {
469 std::string result;
470 result += "digraph {\n";
471 std::set<StateIndex> terminalStates;
472 for(Uptr stateIndex = 0;stateIndex < builder->nfaStates.size();++stateIndex)
473 {
474 const NFAState& nfaState = builder->nfaStates[stateIndex];
475
476 result += "state" + std::to_string(stateIndex) + "[shape=square label=\"" + std::to_string(stateIndex) + "\"];\n";
477
478 for(const auto& statePredicatePair : nfaState.nextStateToPredicateMap)
479 {
480 std::string edgeLabel = getGraphEdgeLabel(statePredicatePair.second);
481 std::string nextStateName = statePredicatePair.first < 0
482 ? "terminal" + std::to_string(-statePredicatePair.first)
483 : "state" + std::to_string(statePredicatePair.first);
484 result += "state" + std::to_string(stateIndex)
485 + " -> "
486 + nextStateName + "[label=\"" + escapeString(edgeLabel) + "\"];\n";
487 if(statePredicatePair.first < 0)
488 {
489 terminalStates.emplace(statePredicatePair.first);
490 }
491 }
492
493 for(auto epsilonNextState : nfaState.epsilonNextStates)
494 {
495 std::string nextStateName = epsilonNextState < 0
496 ? "terminal" + std::to_string(-epsilonNextState)
497 : "state" + std::to_string(epsilonNextState);
498 result += "state" + std::to_string(stateIndex) + " -> " + nextStateName + "[label=\"&epsilon;\"];\n";
499 }
500 }
501 for(auto terminalState : terminalStates)
502 {
503 result += "terminal" + std::to_string(-terminalState)
504 + "[shape=octagon label=\"" + std::to_string(maximumTerminalStateIndex - terminalState) + "\"];\n";
505 }
506 result += "}\n";
507 return result;
508 }
std::string getGraphEdgeLabel(const CharSet &charSet)
Definition NFA.cpp:436
std::string escapeString(const std::string &string)
Definition NFA.cpp:399
Definition name.hpp:106
std::vector< StateIndex > epsilonNextStates
Definition NFA.cpp:32
std::map< StateIndex, CharSet > nextStateToPredicateMap
Definition NFA.cpp:31
Here is the call graph for this function:
Here is the caller graph for this function:

◆ escapeString()

std::string NFA::escapeString ( const std::string & string)

Definition at line 399 of file NFA.cpp.

400 {
401 std::string result;
402 for(Uptr charIndex = 0;charIndex < string.size();++charIndex)
403 {
404 auto c = string[charIndex];
405 if(c == '\\') { result += "\\\\"; }
406 else if(c == '\"') { result += "\\\""; }
407 else if(c == '\n') { result += "\\n"; }
408 else if(c < 0x20 || c > 0x7e)
409 {
410 result += "\\\\";
411 result += nibbleToHexChar((c & 0xf0) >> 4);
412 result += nibbleToHexChar((c & 0x0f) >> 0);
413 }
414 else { result += c; }
415 }
416 return result;
417 }
char nibbleToHexChar(U8 value)
Definition NFA.cpp:397
Here is the call graph for this function:
Here is the caller graph for this function:

◆ getGraphEdgeCharLabel()

std::string NFA::getGraphEdgeCharLabel ( Uptr charIndex)

Definition at line 419 of file NFA.cpp.

420 {
421 switch(charIndex)
422 {
423 case '^': return "\\^";
424 case '\f': return "\\f";
425 case '\r': return "\\r";
426 case '\n': return "\\n";
427 case '\t': return "\\t";
428 case '\'': return "\\\'";
429 case '\"': return "\\\"";
430 case '\\': return "\\\\";
431 case '-': return "\\-";
432 default: return std::string(1,(char)charIndex);
433 };
434 }
Here is the caller graph for this function:

◆ getGraphEdgeLabel()

std::string NFA::getGraphEdgeLabel ( const CharSet & charSet)

Definition at line 436 of file NFA.cpp.

437 {
438 std::string edgeLabel;
439 U8 numSetChars = 0;
440 for(Uptr charIndex = 0;charIndex < 256;++charIndex)
441 {
442 if(charSet.contains((char)charIndex)) { ++numSetChars; }
443 }
444 if(numSetChars > 1) { edgeLabel += "["; }
445 const bool isNegative = numSetChars >= 100;
446 if(isNegative) { edgeLabel += "^"; }
447 for(Uptr charIndex = 0;charIndex < 256;++charIndex)
448 {
449 if(charSet.contains((char)charIndex) != isNegative)
450 {
451 edgeLabel += getGraphEdgeCharLabel(charIndex);
452 if(charIndex + 2 < 256
453 && charSet.contains((char)charIndex+1) != isNegative
454 && charSet.contains((char)charIndex+2) != isNegative)
455 {
456 edgeLabel += "-";
457 charIndex += 2;
458 while(charIndex + 1 < 256 && charSet.contains((char)charIndex+1) != isNegative) { ++charIndex; }
459 edgeLabel += getGraphEdgeCharLabel(charIndex);
460 }
461 }
462 }
463 if(numSetChars > 1) { edgeLabel += "]"; }
464 return edgeLabel;
465 }
uint8_t U8
Definition BasicTypes.h:5
std::string getGraphEdgeCharLabel(Uptr charIndex)
Definition NFA.cpp:419
bool contains(Index index) const
Here is the call graph for this function:
Here is the caller graph for this function:

◆ getNonTerminalEdge()

StateIndex NFA::getNonTerminalEdge ( Builder * builder,
StateIndex initialState,
char c )

Definition at line 65 of file NFA.cpp.

66 {
67 for(const auto& nextStateToPredicatePair : builder->nfaStates[initialState].nextStateToPredicateMap)
68 {
69 if(nextStateToPredicatePair.first >= 0 && nextStateToPredicatePair.second.contains((U8)c))
70 {
71 return nextStateToPredicatePair.first;
72 }
73 }
74
76 }

◆ nibbleToHexChar()

char NFA::nibbleToHexChar ( U8 value)

Definition at line 397 of file NFA.cpp.

397{ return value < 10 ? ('0' + value) : 'a' + value - 10; }
#define value
Definition pkcs11.h:157
Here is the caller graph for this function: