gamma.parsgen.lalr1.LRMachine source code

1 module gamma.parsgen.lalr1.LRMachine;
2 
3 import gamma.grammar.Alternative;
4 import gamma.grammar.Grammar;
5 import gamma.grammar.GrammarProperties;
6 import gamma.grammar.Nonterminal;
7 import gamma.grammar.Rule;
8 import gamma.grammar.Symbol;
9 import gamma.grammar.SymbolNode;
10 import gamma.grammar.Terminal;
11 import gamma.parsgen.lalr1.LRItem;
12 import gamma.util.Indexed;
13 import log;
14 
15 /**
16  * Computes an LR(0) machine from a given context-free grammar.
17  * The resulting LR(0) machine is "ordered" in the sense of
18  *
19  *     J. Röhrich: "Methods for the Automatic Construction of Error Correcting Parsers",
20  *     Acta Informatica 13, 115--139 (1980).
21  *
22  * @author SöKa
23  */
24 class LRMachine
25 {
26     private Grammar grammar;
27 
28     private GrammarProperties grammarProperties;
29 
30     private Transition[] transitions; // Transition elements
31 
32     private Transition[] ntTransitions; // Transition elements
33 
34     private State[] states; // State elements
35 
36     private State[State] hashedStates; // HashMap of objects in states (mapped to themselves)
37 
38     private LRItem[][] items; // [lhs.index][alt#]
39 
40     private int nextItemId;
41 
42     private bool[] itemTraversed;
43 
44     Transition[] getNontTransitions()
45     {
46         return ntTransitions.dup;
47     }
48 
49     Transition[] getTransitions()
50     {
51         return transitions.dup;
52     }
53 
54     Transition getTransition(State state, Symbol label)
55     {
56         foreach (transition; state.outgoing)
57             if (transition.label == label)
58                 return transition;
59         return null;
60     }
61 
62     /**
63      * Returns an unmodifiable List of the computed LR(0) machine's LRMachine.State
64      * objects. All states in this List are guaranteed to have distinct index values
65      * 0..(<i>n</i>-1) where <i>n</i> is the number of states in the List,
66      * and each state's index reflects the state's position in the List.
67      */
68     State[] getStates()
69     {
70         return states.dup;
71     }
72 
73     private interface LRItemProcessor
74     {
75         void process(LRItem item);
76     }
77 
78     class State : Indexed
79     {
80         size_t index_; // set when state is added to the lrMachine's state list
81 
82         LRItem[] kernelItems;
83 
84         LRItem[] orderedClosureItems;
85 
86         Transition[] outgoing;
87 
88         this(LRItem[] kernelItems)
89         {
90             // Caller guarantees that (ordered) kernelItems[] of this state contain no duplicates!
91             this.index_ = -1;
92             this.kernelItems = kernelItems;
93             this.orderedClosureItems = null;
94             this.outgoing = null;
95         }
96 
97         public size_t index() const
98         {
99             return this.index_;
100         }
101 
102         public override bool opEquals(Object x) const
103         {
104             if (!cast(State) x)
105                 return false;
106 
107             State s = cast(State) x;
108 
109             if (this.kernelItems.length != s.kernelItems.length)
110                 return false;
111             for (size_t i = 0, n = this.kernelItems.length; i < n; ++i)
112                 if (this.kernelItems[i] != s.kernelItems[i])
113                     return false;
114             return true;
115         }
116 
117         public override size_t toHash() const
118         {
119             size_t hc = this.kernelItems.length;
120 
121             for (size_t i = 0, n = this.kernelItems.length; i < n; ++i)
122                 hc = (hc * 379 + this.kernelItems[i].index) % 13579;
123             return hc;
124         }
125 
126         private void forEachOrderedClosureItem0(LRItem item, LRItemProcessor itemProcessor)
127         {
128             itemTraversed[item.index] = true;
129 
130             itemProcessor.process(item);
131 
132             // Recursively depth-first process closure items for item. Enumeration of closure items is according to
133             //
134             //   J. Röhrich: "Methods for the Automatic Construction of Error Correcting Parsers",
135             //   Acta Informatica 13, 115--139 (1980)
136             //
137             // such that the closure item belonging to the continuation grammar precedes the other closure items.
138             if (cast(Nonterminal) item.symbolBehindDot)
139             {
140                 LRItem [] closureItems = this.outer.items[(cast(Nonterminal) item.symbolBehindDot).index];
141 
142                 for (size_t j = 0, n = closureItems.length; j < n; ++j)
143                 {
144                     if (closureItems[j].altIsInContinuationGrammar && !itemTraversed[closureItems[j].index])
145                         forEachOrderedClosureItem0(closureItems[j], itemProcessor);
146                 }
147                 for (size_t j = 0, n = closureItems.length; j < n; ++j)
148                 {
149                     if (!closureItems[j].altIsInContinuationGrammar && !itemTraversed[closureItems[j].index])
150                         forEachOrderedClosureItem0(closureItems[j], itemProcessor);
151                 }
152             }
153         }
154 
155         void forEachOrderedClosureItem(LRItemProcessor itemProcessor)
156         {
157             itemTraversed[] = false;
158 
159             for (size_t i = 0, m = this.kernelItems.length; i < m; ++i)
160             {
161                 // process kernel item
162                 forEachOrderedClosureItem0(this.kernelItems[i], itemProcessor);
163             }
164         }
165 
166         void computeTransitions()
167         {
168             import std.algorithm : sort, SwapStrategy;
169 
170             /*
171              * Compute ordered closure of this.kernelItems[].
172              * Note that this.kernelItems[] contains no duplicate items,
173              * and that the "ordered closure" algorithm will, therefore, also produce no duplicates!
174              */
175             LRItem[] orderedClosureItemList;
176 
177             forEachOrderedClosureItem(
178                 new class LRItemProcessor
179                 {
180                     public void process(LRItem item)
181                     {
182                         orderedClosureItemList ~= item;
183                     }
184                 }
185                 );
186             this.orderedClosureItems = cast(LRItem []) orderedClosureItemList;
187 
188             trace!"state %s%-(\n%s%)"(index, this.orderedClosureItems);
189 
190             /*
191              * Move dot right in all (ordered) items of this state; collect the resulting items,
192              * and sort them by access symbol.
193              * The number of outgoing transitions can be computed from the result.
194              */
195             LRItem[] newItems;
196 
197             for (size_t i = 0, n = this.orderedClosureItems.length; i < n; ++i)
198             {
199                 if (!orderedClosureItems[i].complete)
200                     newItems ~= orderedClosureItems[i].nextItem;
201             }
202 
203             /*
204              * Any new states / item sets at all?
205              */
206             if (newItems.length == 0)
207                 return;
208 
209             /*
210              * Sort items in newItems[] by access symbol, then by whether the item's dotted
211              * alternatives belong to the chosen Röhrich continuation grammar or not, and
212              * finally by index.
213              * We rely on the fact that the Arrays.sort(Object[], Comparator) algorithm is a
214              * "stable" sort, i.e. it does not change the order of equal members w.r.t. the
215              * given Comparator.
216              */
217             LRItem[] sortedItems = cast(LRItem []) newItems;
218 
219             sortedItems.sort!((a, b) => itemSetPartitioner(a, b) < 0, SwapStrategy.stable);
220 
221             /*
222              * Now inspect the items. Groups of them belong to one target state, and there
223              * are no duplicate items. Check if target state is new or already known (kernel
224              * items suffice for this check). Build corresponding Transition objects and put
225              * them into the vector of outgoing transitions.
226              */
227             int m = 0;
228 
229             for (size_t i = 0, n = sortedItems.length; i < n;)
230             {
231                 size_t i0 = i;
232                 Symbol accessSymbol = sortedItems[i].symbolPrecedingDot;
233 
234                 ++i;
235                 while (i < n && sortedItems[i].symbolPrecedingDot == accessSymbol)
236                     ++i;
237 
238                 LRItem[] newKernelItems = new LRItem[i - i0];
239 
240                 newKernelItems[0 .. i - i0] = sortedItems[i0 .. i];
241                 State toState = new State(newKernelItems);
242 
243                 // Lookup hashedStates to see if toState is a known state
244                 State toState1 = hashedStates.get(toState, null);
245 
246                 if (toState1 is null)
247                     addState(toState);
248                 else
249                     toState = toState1;
250 
251                 trace!"GOTO(%s, %s) = %s"(this.index, accessSymbol, toState.index);
252 
253                 addTransition(new Transition(transitions.length, this, toState, accessSymbol));
254                 ++m;
255             }
256 
257             this.outgoing = new Transition [m];
258             for (size_t i = 0, j = transitions.length - m; i < m; ++i, ++j)
259                 this.outgoing[i] = transitions[j];
260         }
261 
262         private void print()
263         {
264             trace!"state %s%-(\n%s%)"(index, orderedClosureItems);
265         }
266     }
267 
268     class Transition : Indexed
269     {
270         const size_t index_;
271 
272         State from;
273 
274         State to;
275 
276         Symbol label;
277 
278         this(size_t index, State from, State to, Symbol label)
279         {
280             this.index_ = index;
281             this.from = from;
282             this.to = to;
283             this.label = label;
284         }
285 
286         public size_t index() const
287         {
288             return this.index_;
289         }
290     }
291 
292     this(Grammar grammar, GrammarProperties grammarProperties)
293     {
294         this.grammar = grammar;
295         this.grammarProperties = grammarProperties;
296     }
297 
298     void computeLR0Machine()
299     {
300         this.nextItemId = 0;
301 
302         this.transitions = null;
303         this.ntTransitions = null;
304         this.states = null;
305         hashedStates = null;
306 
307         // Generate all LR(0) items
308         this.items = new LRItem[][this.grammar.nonterminals.length];
309         foreach (lhs; this.grammar.nonterminals)
310         {
311             Rule rule = this.grammar.ruleOf(lhs);
312             this.items[lhs.index] = new LRItem[rule.alternatives.length];
313             for (size_t i = 0, n2 = rule.alternatives.length; i < n2; ++i)
314             {
315                 Alternative alt = rule.alternatives[i];
316                 LRItem item = null;
317 
318                 for (size_t j = alt.rhs.length + 1; j > 0; --j)
319                     item = new LRItem(this.nextItemId++, alt,
320                         grammarProperties.isFirstProductiveAlternative(alt), j - 1, item);
321                 this.items[lhs.index][i] = item;
322             }
323         }
324 
325         this.itemTraversed = new bool[numberOfItems];
326         addState(initialState);
327         for (int stateNumber = 0; stateNumber < this.states.length; ++stateNumber)
328         {
329             State state = this.states[stateNumber];
330             state.computeTransitions;
331         }
332         this.itemTraversed = null;
333 
334         // Unfortunately, we cannot drop all items and item sets here (as we could
335         // with Pennello/DeRemer's original algorithm) because output generation
336         // according to Röhrich error handling and caller-controlled conflict
337         // resolution require that we keep ordered item lists at the states.
338     }
339 
340     private State initialState()
341     {
342         LRItem[] kernelItems = [this.items[grammar.startSymbol.index][0]];
343 
344         return new State(kernelItems);
345     }
346 
347     private void addTransition(Transition transition)
348     {
349         this.transitions ~= transition;
350         if (cast(Nonterminal) transition.label)
351             this.ntTransitions ~= transition;
352     }
353 
354     private void addState(State state)
355     {
356         state.index_ = this.states.length;
357         this.states ~= state;
358         this.hashedStates[state] = state;
359     }
360 
361     private int numberOfItems()
362     {
363         return this.nextItemId;
364     }
365 
366 //    private LRItem shiftedDotItemOf(LRItem item)
367 //    {
368 //        return item.nextItem;
369 //    }
370 }
371 
372 private int itemSetPartitioner(LRItem i1, LRItem i2)
373 {
374     import std.conv : to;
375 
376     if (i1.symbolPrecedingDot !is null && i2.symbolPrecedingDot !is null
377         && i1.symbolPrecedingDot != i2.symbolPrecedingDot)
378     {
379         int n1;
380         int n2;
381 
382         if (cast(Terminal) i1.symbolPrecedingDot)
383             n1 = - (cast(Terminal) i1.symbolPrecedingDot).index.to!int - 1;
384         else
385             n1 = (cast(Nonterminal) i1.symbolPrecedingDot).index.to!int;
386         if (cast(Terminal) i2.symbolPrecedingDot)
387             n2 = - (cast(Terminal) i2.symbolPrecedingDot).index.to!int - 1;
388         else
389             n2 = (cast(Nonterminal) i2.symbolPrecedingDot).index.to!int;
390         return n1 - n2;
391     }
392 
393     return 0;
394 }
395 
396 @("compute LR(0) machine")
397 unittest
398 {
399     import gamma.grammar.GrammarBuilder : TestGrammarBuilder;
400     import std.conv : to;
401 
402     with (TestGrammarBuilder())
403     {
404         rule("G: E = E | f");
405         rule("E: T | E + T");
406         rule("T: f | T * f");
407 
408         auto grammarProperties = new GrammarProperties(grammar);
409 
410         with (new LRMachine(grammar, grammarProperties))
411         {
412             computeLR0Machine;
413 
414             auto states = getStates;
415 
416             assert(states.length == 10);
417             // state 0
418             assert(states[0].orderedClosureItems.to!(string[]) ==
419                 [
420                     `G -> . E "=" E`,
421                     `E => . T`,
422                     `T => . "f"`,
423                     `T -> . T "*" "f"`,
424                     `E -> . E "+" T`
425                 ]);
426             assert(getTransition(states[0], symbol("f")).to == states[1]);
427             assert(getTransition(states[0], symbol("E")).to == states[2]);
428             assert(getTransition(states[0], symbol("T")).to == states[3]);
429             // state 1
430             assert(states[1].orderedClosureItems.to!(string[]) ==
431                 [
432                     `T => "f" .`,
433                 ]);
434             // state 2
435             assert(states[2].orderedClosureItems.to!(string[]) ==
436                 [
437                     `G -> E . "=" E`,
438                     `E -> E . "+" T`,
439                 ]);
440             assert(getTransition(states[2], symbol("+")).to == states[4]);
441             assert(getTransition(states[2], symbol("=")).to == states[5]);
442             // state 3
443             assert(states[3].orderedClosureItems.to!(string[]) ==
444                 [
445                     `E => T .`,
446                     `T -> T . "*" "f"`,
447                 ]);
448             assert(getTransition(states[3], symbol("*")).to == states[6]);
449             // state 4
450             assert(states[4].orderedClosureItems.to!(string[]) ==
451                 [
452                     `E -> E "+" . T`,
453                     `T => . "f"`,
454                     `T -> . T "*" "f"`,
455                 ]);
456             assert(getTransition(states[4], symbol("f")).to == states[1]);
457             assert(getTransition(states[4], symbol("T")).to == states[7]);
458             // state 5
459             assert(states[5].orderedClosureItems.to!(string[]) ==
460                 [
461                     `G -> E "=" . E`,
462                     `E => . T`,
463                     `T => . "f"`,
464                     `T -> . T "*" "f"`,
465                     `E -> . E "+" T`,
466                 ]);
467             assert(getTransition(states[5], symbol("f")).to == states[1]);
468             assert(getTransition(states[5], symbol("E")).to == states[8]);
469             assert(getTransition(states[5], symbol("T")).to == states[3]);
470             // state 6
471             assert(states[6].orderedClosureItems.to!(string[]) ==
472                 [
473                     `T -> T "*" . "f"`,
474                 ]);
475             assert(getTransition(states[6], symbol("f")).to == states[9]);
476             // state 7
477             assert(states[7].orderedClosureItems.to!(string[]) ==
478                 [
479                     `E -> E "+" T .`,
480                     `T -> T . "*" "f"`,
481                 ]);
482             assert(getTransition(states[7], symbol("*")).to == states[6]);
483             // state 8
484             assert(states[8].orderedClosureItems.to!(string[]) ==
485                 [
486                     `G -> E "=" E .`,
487                     `E -> E . "+" T`,
488                 ]);
489             assert(getTransition(states[8], symbol("+")).to == states[4]);
490             // state 9
491             assert(states[9].orderedClosureItems.to!(string[]) ==
492                 [
493                     `T -> T "*" "f" .`,
494                 ]);
495         }
496     }
497 }