1 module gamma.parsgen.lalr1.LRMachine; 2 3 import gamma.grammar.Alternative; 4 import gamma.grammar.Grammar; 5 import gamma.grammar.GrammarProperties; 6 import gamma.grammar.Nonterminal; 7 import gamma.grammar.Rule; 8 import gamma.grammar.Symbol; 9 import gamma.grammar.SymbolNode; 10 import gamma.grammar.Terminal; 11 import gamma.parsgen.lalr1.LRItem; 12 import gamma.util.Indexed; 13 import log; 14 15 /** 16 * Computes an LR(0) machine from a given context-free grammar. 17 * The resulting LR(0) machine is "ordered" in the sense of 18 * 19 * J. Röhrich: "Methods for the Automatic Construction of Error Correcting Parsers", 20 * Acta Informatica 13, 115--139 (1980). 21 * 22 * @author SöKa 23 */ 24 class LRMachine 25 { 26 private Grammar grammar; 27 28 private GrammarProperties grammarProperties; 29 30 private Transition[] transitions; // Transition elements 31 32 private Transition[] ntTransitions; // Transition elements 33 34 private State[] states; // State elements 35 36 private State[State] hashedStates; // HashMap of objects in states (mapped to themselves) 37 38 private LRItem[][] items; // [lhs.index][alt#] 39 40 private int nextItemId; 41 42 private bool[] itemTraversed; 43 44 Transition[] getNontTransitions() 45 { 46 return ntTransitions.dup; 47 } 48 49 Transition[] getTransitions() 50 { 51 return transitions.dup; 52 } 53 54 Transition getTransition(State state, Symbol label) 55 { 56 foreach (transition; state.outgoing) 57 if (transition.label == label) 58 return transition; 59 return null; 60 } 61 62 /** 63 * Returns an unmodifiable List of the computed LR(0) machine's LRMachine.State 64 * objects. All states in this List are guaranteed to have distinct index values 65 * 0..(<i>n</i>-1) where <i>n</i> is the number of states in the List, 66 * and each state's index reflects the state's position in the List. 67 */ 68 State[] getStates() 69 { 70 return states.dup; 71 } 72 73 private interface LRItemProcessor 74 { 75 void process(LRItem item); 76 } 77 78 class State : Indexed 79 { 80 size_t index_; // set when state is added to the lrMachine's state list 81 82 LRItem[] kernelItems; 83 84 LRItem[] orderedClosureItems; 85 86 Transition[] outgoing; 87 88 this(LRItem[] kernelItems) 89 { 90 // Caller guarantees that (ordered) kernelItems[] of this state contain no duplicates! 91 this.index_ = -1; 92 this.kernelItems = kernelItems; 93 this.orderedClosureItems = null; 94 this.outgoing = null; 95 } 96 97 public size_t index() const 98 { 99 return this.index_; 100 } 101 102 public override bool opEquals(Object x) const 103 { 104 if (!cast(State) x) 105 return false; 106 107 State s = cast(State) x; 108 109 if (this.kernelItems.length != s.kernelItems.length) 110 return false; 111 for (size_t i = 0, n = this.kernelItems.length; i < n; ++i) 112 if (this.kernelItems[i] != s.kernelItems[i]) 113 return false; 114 return true; 115 } 116 117 public override size_t toHash() const 118 { 119 size_t hc = this.kernelItems.length; 120 121 for (size_t i = 0, n = this.kernelItems.length; i < n; ++i) 122 hc = (hc * 379 + this.kernelItems[i].index) % 13579; 123 return hc; 124 } 125 126 private void forEachOrderedClosureItem0(LRItem item, LRItemProcessor itemProcessor) 127 { 128 itemTraversed[item.index] = true; 129 130 itemProcessor.process(item); 131 132 // Recursively depth-first process closure items for item. Enumeration of closure items is according to 133 // 134 // J. Röhrich: "Methods for the Automatic Construction of Error Correcting Parsers", 135 // Acta Informatica 13, 115--139 (1980) 136 // 137 // such that the closure item belonging to the continuation grammar precedes the other closure items. 138 if (cast(Nonterminal) item.symbolBehindDot) 139 { 140 LRItem [] closureItems = this.outer.items[(cast(Nonterminal) item.symbolBehindDot).index]; 141 142 for (size_t j = 0, n = closureItems.length; j < n; ++j) 143 { 144 if (closureItems[j].altIsInContinuationGrammar && !itemTraversed[closureItems[j].index]) 145 forEachOrderedClosureItem0(closureItems[j], itemProcessor); 146 } 147 for (size_t j = 0, n = closureItems.length; j < n; ++j) 148 { 149 if (!closureItems[j].altIsInContinuationGrammar && !itemTraversed[closureItems[j].index]) 150 forEachOrderedClosureItem0(closureItems[j], itemProcessor); 151 } 152 } 153 } 154 155 void forEachOrderedClosureItem(LRItemProcessor itemProcessor) 156 { 157 itemTraversed[] = false; 158 159 for (size_t i = 0, m = this.kernelItems.length; i < m; ++i) 160 { 161 // process kernel item 162 forEachOrderedClosureItem0(this.kernelItems[i], itemProcessor); 163 } 164 } 165 166 void computeTransitions() 167 { 168 import std.algorithm : sort, SwapStrategy; 169 170 /* 171 * Compute ordered closure of this.kernelItems[]. 172 * Note that this.kernelItems[] contains no duplicate items, 173 * and that the "ordered closure" algorithm will, therefore, also produce no duplicates! 174 */ 175 LRItem[] orderedClosureItemList; 176 177 forEachOrderedClosureItem( 178 new class LRItemProcessor 179 { 180 public void process(LRItem item) 181 { 182 orderedClosureItemList ~= item; 183 } 184 } 185 ); 186 this.orderedClosureItems = cast(LRItem []) orderedClosureItemList; 187 188 trace!"state %s%-(\n%s%)"(index, this.orderedClosureItems); 189 190 /* 191 * Move dot right in all (ordered) items of this state; collect the resulting items, 192 * and sort them by access symbol. 193 * The number of outgoing transitions can be computed from the result. 194 */ 195 LRItem[] newItems; 196 197 for (size_t i = 0, n = this.orderedClosureItems.length; i < n; ++i) 198 { 199 if (!orderedClosureItems[i].complete) 200 newItems ~= orderedClosureItems[i].nextItem; 201 } 202 203 /* 204 * Any new states / item sets at all? 205 */ 206 if (newItems.length == 0) 207 return; 208 209 /* 210 * Sort items in newItems[] by access symbol, then by whether the item's dotted 211 * alternatives belong to the chosen Röhrich continuation grammar or not, and 212 * finally by index. 213 * We rely on the fact that the Arrays.sort(Object[], Comparator) algorithm is a 214 * "stable" sort, i.e. it does not change the order of equal members w.r.t. the 215 * given Comparator. 216 */ 217 LRItem[] sortedItems = cast(LRItem []) newItems; 218 219 sortedItems.sort!((a, b) => itemSetPartitioner(a, b) < 0, SwapStrategy.stable); 220 221 /* 222 * Now inspect the items. Groups of them belong to one target state, and there 223 * are no duplicate items. Check if target state is new or already known (kernel 224 * items suffice for this check). Build corresponding Transition objects and put 225 * them into the vector of outgoing transitions. 226 */ 227 int m = 0; 228 229 for (size_t i = 0, n = sortedItems.length; i < n;) 230 { 231 size_t i0 = i; 232 Symbol accessSymbol = sortedItems[i].symbolPrecedingDot; 233 234 ++i; 235 while (i < n && sortedItems[i].symbolPrecedingDot == accessSymbol) 236 ++i; 237 238 LRItem[] newKernelItems = new LRItem[i - i0]; 239 240 newKernelItems[0 .. i - i0] = sortedItems[i0 .. i]; 241 State toState = new State(newKernelItems); 242 243 // Lookup hashedStates to see if toState is a known state 244 State toState1 = hashedStates.get(toState, null); 245 246 if (toState1 is null) 247 addState(toState); 248 else 249 toState = toState1; 250 251 trace!"GOTO(%s, %s) = %s"(this.index, accessSymbol, toState.index); 252 253 addTransition(new Transition(transitions.length, this, toState, accessSymbol)); 254 ++m; 255 } 256 257 this.outgoing = new Transition [m]; 258 for (size_t i = 0, j = transitions.length - m; i < m; ++i, ++j) 259 this.outgoing[i] = transitions[j]; 260 } 261 262 private void print() 263 { 264 trace!"state %s%-(\n%s%)"(index, orderedClosureItems); 265 } 266 } 267 268 class Transition : Indexed 269 { 270 const size_t index_; 271 272 State from; 273 274 State to; 275 276 Symbol label; 277 278 this(size_t index, State from, State to, Symbol label) 279 { 280 this.index_ = index; 281 this.from = from; 282 this.to = to; 283 this.label = label; 284 } 285 286 public size_t index() const 287 { 288 return this.index_; 289 } 290 } 291 292 this(Grammar grammar, GrammarProperties grammarProperties) 293 { 294 this.grammar = grammar; 295 this.grammarProperties = grammarProperties; 296 } 297 298 void computeLR0Machine() 299 { 300 this.nextItemId = 0; 301 302 this.transitions = null; 303 this.ntTransitions = null; 304 this.states = null; 305 hashedStates = null; 306 307 // Generate all LR(0) items 308 this.items = new LRItem[][this.grammar.nonterminals.length]; 309 foreach (lhs; this.grammar.nonterminals) 310 { 311 Rule rule = this.grammar.ruleOf(lhs); 312 this.items[lhs.index] = new LRItem[rule.alternatives.length]; 313 for (size_t i = 0, n2 = rule.alternatives.length; i < n2; ++i) 314 { 315 Alternative alt = rule.alternatives[i]; 316 LRItem item = null; 317 318 for (size_t j = alt.rhs.length + 1; j > 0; --j) 319 item = new LRItem(this.nextItemId++, alt, 320 grammarProperties.isFirstProductiveAlternative(alt), j - 1, item); 321 this.items[lhs.index][i] = item; 322 } 323 } 324 325 this.itemTraversed = new bool[numberOfItems]; 326 addState(initialState); 327 for (int stateNumber = 0; stateNumber < this.states.length; ++stateNumber) 328 { 329 State state = this.states[stateNumber]; 330 state.computeTransitions; 331 } 332 this.itemTraversed = null; 333 334 // Unfortunately, we cannot drop all items and item sets here (as we could 335 // with Pennello/DeRemer's original algorithm) because output generation 336 // according to Röhrich error handling and caller-controlled conflict 337 // resolution require that we keep ordered item lists at the states. 338 } 339 340 private State initialState() 341 { 342 LRItem[] kernelItems = [this.items[grammar.startSymbol.index][0]]; 343 344 return new State(kernelItems); 345 } 346 347 private void addTransition(Transition transition) 348 { 349 this.transitions ~= transition; 350 if (cast(Nonterminal) transition.label) 351 this.ntTransitions ~= transition; 352 } 353 354 private void addState(State state) 355 { 356 state.index_ = this.states.length; 357 this.states ~= state; 358 this.hashedStates[state] = state; 359 } 360 361 private int numberOfItems() 362 { 363 return this.nextItemId; 364 } 365 366 // private LRItem shiftedDotItemOf(LRItem item) 367 // { 368 // return item.nextItem; 369 // } 370 } 371 372 private int itemSetPartitioner(LRItem i1, LRItem i2) 373 { 374 import std.conv : to; 375 376 if (i1.symbolPrecedingDot !is null && i2.symbolPrecedingDot !is null 377 && i1.symbolPrecedingDot != i2.symbolPrecedingDot) 378 { 379 int n1; 380 int n2; 381 382 if (cast(Terminal) i1.symbolPrecedingDot) 383 n1 = - (cast(Terminal) i1.symbolPrecedingDot).index.to!int - 1; 384 else 385 n1 = (cast(Nonterminal) i1.symbolPrecedingDot).index.to!int; 386 if (cast(Terminal) i2.symbolPrecedingDot) 387 n2 = - (cast(Terminal) i2.symbolPrecedingDot).index.to!int - 1; 388 else 389 n2 = (cast(Nonterminal) i2.symbolPrecedingDot).index.to!int; 390 return n1 - n2; 391 } 392 393 return 0; 394 } 395 396 @("compute LR(0) machine") 397 unittest 398 { 399 import gamma.grammar.GrammarBuilder : TestGrammarBuilder; 400 import std.conv : to; 401 402 with (TestGrammarBuilder()) 403 { 404 rule("G: E = E | f"); 405 rule("E: T | E + T"); 406 rule("T: f | T * f"); 407 408 auto grammarProperties = new GrammarProperties(grammar); 409 410 with (new LRMachine(grammar, grammarProperties)) 411 { 412 computeLR0Machine; 413 414 auto states = getStates; 415 416 assert(states.length == 10); 417 // state 0 418 assert(states[0].orderedClosureItems.to!(string[]) == 419 [ 420 `G -> . E "=" E`, 421 `E => . T`, 422 `T => . "f"`, 423 `T -> . T "*" "f"`, 424 `E -> . E "+" T` 425 ]); 426 assert(getTransition(states[0], symbol("f")).to == states[1]); 427 assert(getTransition(states[0], symbol("E")).to == states[2]); 428 assert(getTransition(states[0], symbol("T")).to == states[3]); 429 // state 1 430 assert(states[1].orderedClosureItems.to!(string[]) == 431 [ 432 `T => "f" .`, 433 ]); 434 // state 2 435 assert(states[2].orderedClosureItems.to!(string[]) == 436 [ 437 `G -> E . "=" E`, 438 `E -> E . "+" T`, 439 ]); 440 assert(getTransition(states[2], symbol("+")).to == states[4]); 441 assert(getTransition(states[2], symbol("=")).to == states[5]); 442 // state 3 443 assert(states[3].orderedClosureItems.to!(string[]) == 444 [ 445 `E => T .`, 446 `T -> T . "*" "f"`, 447 ]); 448 assert(getTransition(states[3], symbol("*")).to == states[6]); 449 // state 4 450 assert(states[4].orderedClosureItems.to!(string[]) == 451 [ 452 `E -> E "+" . T`, 453 `T => . "f"`, 454 `T -> . T "*" "f"`, 455 ]); 456 assert(getTransition(states[4], symbol("f")).to == states[1]); 457 assert(getTransition(states[4], symbol("T")).to == states[7]); 458 // state 5 459 assert(states[5].orderedClosureItems.to!(string[]) == 460 [ 461 `G -> E "=" . E`, 462 `E => . T`, 463 `T => . "f"`, 464 `T -> . T "*" "f"`, 465 `E -> . E "+" T`, 466 ]); 467 assert(getTransition(states[5], symbol("f")).to == states[1]); 468 assert(getTransition(states[5], symbol("E")).to == states[8]); 469 assert(getTransition(states[5], symbol("T")).to == states[3]); 470 // state 6 471 assert(states[6].orderedClosureItems.to!(string[]) == 472 [ 473 `T -> T "*" . "f"`, 474 ]); 475 assert(getTransition(states[6], symbol("f")).to == states[9]); 476 // state 7 477 assert(states[7].orderedClosureItems.to!(string[]) == 478 [ 479 `E -> E "+" T .`, 480 `T -> T . "*" "f"`, 481 ]); 482 assert(getTransition(states[7], symbol("*")).to == states[6]); 483 // state 8 484 assert(states[8].orderedClosureItems.to!(string[]) == 485 [ 486 `G -> E "=" E .`, 487 `E -> E . "+" T`, 488 ]); 489 assert(getTransition(states[8], symbol("+")).to == states[4]); 490 // state 9 491 assert(states[9].orderedClosureItems.to!(string[]) == 492 [ 493 `T -> T "*" "f" .`, 494 ]); 495 } 496 } 497 }