91 {
93
94 std::vector<DFAState> dfaStates;
95 std::map<StateSet,StateIndex> nfaStateSetToDFAStateMap;
96 std::vector<StateSet> dfaStateToNFAStateSetMap;
97 std::vector<StateIndex> pendingDFAStates;
98
99 nfaStateSetToDFAStateMap.emplace(
StateSet {0},(StateIndex)0);
100 dfaStateToNFAStateSetMap.emplace_back(StateSet {0});
101 dfaStates.emplace_back();
102 pendingDFAStates.push_back((StateIndex)0);
103
104 Uptr maxLocalStates = 0;
105 Uptr maxDFANextStates = 0;
106
107 while(pendingDFAStates.size())
108 {
109 const StateIndex currentDFAStateIndex = pendingDFAStates.back();
110 pendingDFAStates.pop_back();
111
112 const StateSet currentStateSet = dfaStateToNFAStateSetMap[currentDFAStateIndex];
113
114
115 StateSet epsilonClosureCurrentStateSet = currentStateSet;
116 for(
Uptr scanIndex = 0;scanIndex < epsilonClosureCurrentStateSet.size();++scanIndex)
117 {
118 StateIndex scanState = epsilonClosureCurrentStateSet[scanIndex];
119 if(scanState >= 0)
120 {
121 for(
auto epsilonNextState : builder->
nfaStates[scanState].epsilonNextStates)
122 {
123 addUnique(epsilonClosureCurrentStateSet,epsilonNextState);
124 }
125 }
126 }
127
128
129 StateSet nonTerminalCurrentStateSet;
131 bool hasCurrentTerminalState = false;
132 for(auto stateIndex : epsilonClosureCurrentStateSet)
133 {
134 if(stateIndex >= 0)
135 {
136 addUnique(nonTerminalCurrentStateSet,stateIndex);
137 }
138 else
139 {
140 if(hasCurrentTerminalState)
141 {
142 Errors::fatalf(
"NFA has multiple possible terminal states for the same input");
143 }
144 hasCurrentTerminalState = true;
146 }
147 }
148
149
150 std::map<StateIndex,StateIndex> stateIndexToLocalStateIndexMap;
151 std::vector<StateIndex> localStateIndexToStateIndexMap;
152 for(auto stateIndex : nonTerminalCurrentStateSet)
153 {
154 const NFAState& nfaState = builder->
nfaStates[stateIndex];
155 for(auto transition : nfaState.nextStateToPredicateMap)
156 {
157 if(!stateIndexToLocalStateIndexMap.count(transition.first))
158 {
159 stateIndexToLocalStateIndexMap.emplace(transition.first,(StateIndex)localStateIndexToStateIndexMap.size());
160 localStateIndexToStateIndexMap.emplace_back(transition.first);
161 }
162 }
163 }
164
165 if(!stateIndexToLocalStateIndexMap.count(currentTerminalState))
166 {
167 stateIndexToLocalStateIndexMap.emplace(currentTerminalState,(StateIndex)localStateIndexToStateIndexMap.size());
168 localStateIndexToStateIndexMap.emplace_back(currentTerminalState);
169 }
170
171 enum { numSupportedLocalStates = 64 };
173
174 const Uptr numLocalStates = stateIndexToLocalStateIndexMap.size();
176 maxLocalStates = std::max<Uptr>(maxLocalStates,numLocalStates);
177
178
179
180 LocalStateSet charToLocalStateSet[256];
181 for(auto stateIndex : nonTerminalCurrentStateSet)
182 {
183 const NFAState& nfaState = builder->
nfaStates[stateIndex];
184 for(auto transition : nfaState.nextStateToPredicateMap)
185 {
186 for(
Uptr charIndex = 0;charIndex < 256;++charIndex)
187 {
188 if(transition.second.contains((char)charIndex))
189 {
190 charToLocalStateSet[charIndex].add(stateIndexToLocalStateIndexMap.at(transition.first));
191 }
192 }
193 }
194 }
195
196 const LocalStateSet currentTerminalStateLocalSet(stateIndexToLocalStateIndexMap.at(currentTerminalState));
197 for(
Uptr charIndex = 0;charIndex < 256;++charIndex)
198 {
199 if(charToLocalStateSet[charIndex].isEmpty())
200 {
201 charToLocalStateSet[charIndex] = currentTerminalStateLocalSet;
202 }
203 }
204
205
206 std::vector<LocalStateSet> uniqueLocalNextStateSets;
207 for(
Uptr charIndex = 0;charIndex < 256;++charIndex)
208 {
209 const LocalStateSet localStateSet = charToLocalStateSet[charIndex];
210 if(!localStateSet.isEmpty())
211 {
212 addUnique(uniqueLocalNextStateSets,localStateSet);
213 }
214 }
215
216
217 std::map<LocalStateSet,StateIndex> localStateSetToDFAStateIndexMap;
218 for(auto localNextStateSet : uniqueLocalNextStateSets)
219 {
220
222 {
223 LocalStateSet residualLocalStateSet = localNextStateSet;
224 while(true)
225 {
226 const StateIndex localStateIndex = residualLocalStateSet.getSmallestMember();
227 if(localStateIndex == numSupportedLocalStates) { break; }
228
229 nextStateSet.push_back(localStateIndexToStateIndexMap.at(localStateIndex));
230 residualLocalStateSet.remove(localStateIndex);
231 };
232 }
233
234 if(nextStateSet.size() == 1 && *nextStateSet.begin() < 0)
235 {
236 localStateSetToDFAStateIndexMap.insert(std::make_pair(localNextStateSet,*nextStateSet.begin()));
237 }
238 else
239 {
240
241 auto nextDFAStateIt = nfaStateSetToDFAStateMap.find(nextStateSet);
243 if(nextDFAStateIt != nfaStateSetToDFAStateMap.end())
244 {
245 localStateSetToDFAStateIndexMap.emplace(localNextStateSet,nextDFAStateIt->second);
246 }
247 else
248 {
249
250
251 nextDFAStateIndex = (
StateIndex)dfaStates.size();
252 localStateSetToDFAStateIndexMap.emplace(localNextStateSet,nextDFAStateIndex);
253 nfaStateSetToDFAStateMap.insert(std::make_pair(nextStateSet,nextDFAStateIndex));
254 dfaStateToNFAStateSetMap.emplace_back(nextStateSet);
255 dfaStates.emplace_back();
256 pendingDFAStates.push_back(nextDFAStateIndex);
257 }
258 }
259 }
260
261
262 DFAState& dfaState = dfaStates[nfaStateSetToDFAStateMap[currentStateSet]];
263 for(auto localStateSetToDFAStateIndex : localStateSetToDFAStateIndexMap)
264 {
265 const LocalStateSet localStateSet = localStateSetToDFAStateIndex.first;
266 const StateIndex nextDFAStateIndex = localStateSetToDFAStateIndex.second;
267 for(
Uptr charIndex = 0;charIndex < 256;++charIndex)
268 {
269 if(charToLocalStateSet[charIndex] == localStateSet)
270 {
271 dfaState.nextStateByChar[charIndex] = nextDFAStateIndex;
272 }
273 }
274 }
275 maxDFANextStates = std::max(maxDFANextStates,(
Uptr)uniqueLocalNextStateSets.size());
276 };
277
282
283 return dfaStates;
284 }
PointerIntHelper< sizeof(size_t)>::UnsignedIntType Uptr
void fatalf(const char *messageFormat,...)
LOGGING_API void printf(Category category, const char *format,...)
std::vector< StateIndex > StateSet
@ unmatchedCharacterTerminal
void logTimer(const char *context, Timer &timer)