1 module gamma.grammar.GrammarProperties;
2 
3 import gamma.grammar.hyper.Operator;
4 import gamma.grammar.hyper.Option;
5 import gamma.grammar.hyper.Repetition;
6 import gamma.grammar.Alternative;
7 import gamma.grammar.Grammar;
8 import gamma.grammar.Node;
9 import gamma.grammar.Nonterminal;
10 import gamma.grammar.Rule;
11 import gamma.grammar.Symbol;
12 import gamma.grammar.SymbolNode;
13 import gamma.grammar.Terminal;
14 import log;
15 import std.algorithm;
16 import std.bitmanip : BitArray;
17 import std.range;
18 
19 version (unittest) import gamma.grammar.GrammarBuilder;
20 
21 public class GrammarProperties
22 {
23     /**
24      * The grammar for which all properties are computed.
25      */
26     private Grammar grammar_;
27 
28     /**
29      * A flag for each nonterminal which is true if the nonterminal produces the
30      * empty word.
31      */
32     private BitArray nullableNonterminals;
33 
34     /**
35      * A flag for each nonterminal which is true if the nonterminal produces
36      * terminals or the empty word.
37      */
38     private BitArray productiveNonterminals;
39 
40     /**
41      * Contains a list of alternatives for every nonterminal where it occurs on the rhs
42      * <code>nontOccurrences</code> : Nonterminal.index() -> List of Alternatives
43      */
44     private Alternative[][] nontOccurrences;
45 
46     /**
47      * Maps each (lhs) Nonterminal.index() to the first (or "simplest")
48      * Alternative which proved its left hand side to be productive.
49      */
50     private Alternative[] firstProductiveAlternative;
51 
52     /**
53      * A flag for each nonterminal which is true if the nonterminal is reachable
54      * from the grammar's start symbol by applying derivation rules.
55      */
56     private BitArray reachableNonterminals;
57 
58     /**
59      * A flag for each terminal which is true if the terminal is reachable from
60      * the grammar's start symbol by applying derivation rules.
61      */
62     private BitArray reachableTerminals;
63 
64     /**
65      * A flag for each nonterminal which is true if the nobtrminal defines
66      * a predicate
67      */
68     private BitArray predicateNonterminals;
69 
70     public this(Grammar grammar)
71     {
72         this.grammar_ = grammar;
73     }
74 
75     /**
76      * Returns whether the symbol is nullable or not.
77      */
78     public bool isNullable(Symbol symbol)
79     in (cast(Nonterminal) symbol || cast(Terminal) symbol)
80     {
81         if (nullableNonterminals.empty)
82             computeNullablesAndProductives;
83 
84         return cast(Nonterminal) symbol
85             && nullableNonterminals[(cast(Nonterminal) symbol).index];
86     }
87 
88     /**
89      * Returns whether the symbol is productive or not.
90      */
91     public bool isProductive(Symbol symbol)
92     in (cast(Nonterminal) symbol || cast(Terminal) symbol)
93     {
94         if (productiveNonterminals.empty)
95             computeNullablesAndProductives;
96 
97         return cast(Terminal) symbol
98             || productiveNonterminals[(cast(Nonterminal) symbol).index];
99     }
100 
101     /**
102      * Returns whether the symbol can be reached from the grammar's
103      * start symbol by applying derivation rules.
104      */
105     public bool isReachable(Symbol symbol)
106     in (cast(Nonterminal) symbol || cast(Terminal) symbol)
107     {
108         if (reachableNonterminals.empty)
109             computeReachables;
110 
111         if (cast(Nonterminal) symbol)
112             return reachableNonterminals[(cast(Nonterminal) symbol).index];
113         else
114             return reachableTerminals[(cast(Terminal) symbol).index];
115     }
116 
117     /**
118      * Returns true if the given symbol is strong nullable.
119      * A symbol is strong nullable if and only if it derives only the empty word.
120      *
121      * @author kuniss 16.10.2004
122      */
123     public bool isStrongNullable(Symbol symbol)
124     {
125         if (this.predicateNonterminals.empty)
126             computeStrongNullables;
127 
128         return (cast(Nonterminal) symbol)
129             && predicateNonterminals[(cast(Nonterminal) symbol).index];
130     }
131 
132     /**
133      * Return whether the given Alternative is the first (or "simplest") in its
134      * Rule structure which has been proven to be productive.
135      */
136     public bool isFirstProductiveAlternative(Alternative alternative)
137     {
138         if (productiveNonterminals.empty)
139             computeNullablesAndProductives;
140 
141         return firstProductiveAlternative[(cast(Nonterminal) alternative.lhs.symbol).index] == alternative;
142     }
143 
144     /**
145      * Return the first (or "simplest") Alternative in the given Rule structure
146      * which has been proven to be productive.
147      */
148     public Alternative firstProductiveAlternativeFor(Rule rule)
149     {
150         if (productiveNonterminals.empty)
151             computeNullablesAndProductives;
152 
153         Alternative[] alternatives = rule.alternatives;
154 
155         if (alternatives.empty)
156             return null;
157 
158         return firstProductiveAlternative[(cast(Nonterminal) alternatives.front.lhs.symbol).index];
159     }
160 
161     /**
162      * Returns whether the grammar is reduced.
163      */
164     public bool isReduced()
165     {
166         if (reachableNonterminals.empty)
167             computeReachables;
168 
169         if (productiveNonterminals.empty)
170             computeNullablesAndProductives;
171 
172         return grammar.terminals.all!(terminal => isReachable(terminal))
173             && grammar.nonterminals.all!(nonterminal => isReachable(nonterminal))
174             && grammar.nonterminals.all!(nonterminal => isProductive(nonterminal));
175     }
176 
177     /**
178      * Creates the map this.nontOccurrences,
179      * a list of alternatives for every nonterminal where it occurs on the rhs
180      * @see GrammarProperties.nontOccurrences
181      */
182     private void createNontOccurrences()
183     {
184         this.nontOccurrences = new Alternative[][this.grammar_.nonterminals.length];
185         foreach (lhs; this.grammar_.nonterminals)
186         {
187             foreach (alternative; this.grammar_.ruleOf(lhs).alternatives)
188             {
189                 foreach (i; 0 .. alternative.rhs.length)
190                 {
191                     auto node = cast(Node) alternative.rhs[i];
192 
193                     if (cast(SymbolNode) node)
194                     {
195                         auto symbol = cast(Symbol) (cast(SymbolNode) node).symbol;
196 
197                         if (cast(Nonterminal) symbol)
198                             this.nontOccurrences[(cast(Nonterminal) symbol).index] ~= alternative;
199                     }
200                 }
201             }
202         }
203     }
204 
205     /**
206      * Computes the set of nullable and productive nonterminals.
207      */
208     private void computeNullablesAndProductives()
209     {
210         /*
211          * create map:
212          * <code>alt2Count</code> : Alternative -> number of symbols and operators in the alternative
213          */
214         size_t[Alternative] alt2Count;
215 
216         foreach (lhs; this.grammar_.nonterminals)
217         {
218             foreach (alternative; this.grammar_.ruleOf(lhs).alternatives)
219                 alt2Count[alternative] = alternative.rhs.length;
220         }
221         if (this.nontOccurrences.empty)
222             createNontOccurrences;
223 
224         firstProductiveAlternative = new Alternative[this.grammar_.nonterminals.length];
225 
226         /*
227          * Determine nullable nonterminals.
228          * - the stack <code>nontStack</code> contains all nonterminals
229          *   which have been marked as nullable and for which this property
230          *   has to be propagated to the alternatives where they occur on
231          *   right hand sides;
232          * - alt2Count contains the number of symbols and operators which
233          *   are potentially not nullable
234          * 1. find all Repetition and Option operators and decrement
235          *     alt2Count for them
236          * 2. all nonterminals with empty alternatives on the right
237          *     hand side are marked and put on a stack
238          * 3. the nullable property is propagated until the stack is empty
239          */
240         nullableNonterminals = BitArray();
241         nullableNonterminals.length = this.grammar_.nonterminals.length;
242 
243         // find all Repetition and Option operators;
244         // they have implicit empty rules
245         foreach (lhs; this.grammar_.nonterminals)
246         {
247             foreach (alt; this.grammar_.ruleOf(lhs).alternatives)
248             {
249                 int op = 0;
250 
251                 for (size_t i = 0, rhsLen = alt2Count[alt]; i < rhsLen; i++)
252                 {
253                     auto node = cast(Node) alt.rhs[i];
254 
255                     if (cast(Repetition) node || cast(Option) node)
256                         ++op;
257                 }
258                 if (op > 0)
259                     alt2Count[alt] -= op;
260             }
261         }
262 
263         Nonterminal[] nontStack;
264 
265         // find all empty alternatives,
266         // initialize nullableNonterminals with them and put them on the stack
267         foreach (nont; this.grammar_.nonterminals)
268         {
269             foreach (alternative; this.grammar_.ruleOf(nont).alternatives)
270             {
271                 if (alt2Count[alternative] == 0 && !nullableNonterminals[nont.index])
272                 {
273                     // empty alternative
274                     nontStack ~= nont;
275                     nullableNonterminals[nont.index] = true;
276                     if (firstProductiveAlternative[nont.index] is null)
277                         firstProductiveAlternative[nont.index] = alternative;
278                 }
279             }
280         }
281 
282         // propagate nullable nonterminal
283         while (!nontStack.empty)
284         {
285             Nonterminal nont = nontStack.back;
286 
287             nontStack.popBack;
288             foreach (alternative; nontOccurrences[nont.index])
289             {
290                 size_t count = alt2Count[alternative];
291 
292                 --count;
293                 alt2Count[alternative] = count;
294                 if (count == 0) // the alternative is nullable
295                 {
296                     // mark the lhs nonterminal and put it on the stack
297                     assert(cast(Nonterminal) alternative.lhs.symbol);
298 
299                     auto lhs = cast(Nonterminal) alternative.lhs.symbol;
300 
301                     if (!nullableNonterminals[lhs.index])
302                     {
303                         nontStack ~= lhs;
304                         nullableNonterminals[lhs.index] = true;
305                         if (firstProductiveAlternative[lhs.index] is null)
306                             firstProductiveAlternative[lhs.index] = alternative;
307                     }
308                 }
309             }
310         }
311 
312         trace!"nullable nonterminals:%-(\n%s%)"
313             (this.grammar_.nonterminals.filter!(nonterminal => isNullable(nonterminal)));
314 
315         /*
316          * Compute productive nonterminals.
317          * - the stack <code>nontStack</code> is reused but now it contains
318          *   all nonterminals which has been marked as productive and for
319          *   which this property has to be propagated to the alternatives
320          *   where they occur on right hand sides.
321          * 1. all nullable nonterminals are productive
322          * 2. all nonterminals which produces terminals directly are marked
323          *     as productive and put on the stack
324          * 3. the productive property is propagated until the stack is empty
325          */
326         productiveNonterminals = BitArray();
327         productiveNonterminals.length = this.grammar_.nonterminals.length;
328 
329         // initialize productiveNonterminals with nullable nonterminals
330         foreach (nont; this.grammar_.nonterminals)
331         {
332             if (nullableNonterminals[nont.index])
333                 productiveNonterminals[nont.index] = true;
334         }
335 
336         // initialize productiveNonterminals with nonterminals which produces
337         // terminals directly
338         foreach (nont; this.grammar_.nonterminals)
339         {
340             foreach (alternative; this.grammar_.ruleOf(nont).alternatives)
341             {
342                 size_t t = 0; // the number of terminals in the alternative
343 
344                 for (size_t i = 0, n = alternative.rhs.length; i < n; ++i)
345                 {
346                     auto node = cast(Node) alternative.rhs[i];
347 
348                     if (cast(SymbolNode) node)
349                         if (cast(Terminal) (cast(SymbolNode) node).symbol)
350                             ++t;
351                 }
352                 if (t > 0)
353                 {
354                     t = alt2Count[alternative] - t;
355                     alt2Count[alternative] = t;
356                     if (t == 0 && !productiveNonterminals[nont.index])
357                     {
358                         nontStack ~= nont;
359                         productiveNonterminals[nont.index] = true;
360                         if (firstProductiveAlternative[nont.index] is null)
361                             firstProductiveAlternative[nont.index] = alternative;
362                     }
363                 }
364             }
365         }
366 
367         // propagate productive nonterminals (make transitive closure)
368         while (!nontStack.empty)
369         {
370             Nonterminal nont = nontStack.back;
371 
372             nontStack.popBack;
373             foreach (alternative; nontOccurrences[nont.index])
374             {
375                 size_t count = alt2Count[alternative];
376 
377                 --count;
378                 alt2Count[alternative] = count;
379                 if (count == 0)
380                 // the alternative is productive, mark the lhs nonterminal and
381                 // put it on the stack
382                 {
383                     assert(cast(Nonterminal) alternative.lhs.symbol);
384 
385                     auto lhs = cast(Nonterminal) alternative.lhs.symbol;
386 
387                     if (!productiveNonterminals[lhs.index])
388                     {
389                         nontStack ~= lhs;
390                         productiveNonterminals[lhs.index] = true;
391                         if (firstProductiveAlternative[lhs.index] is null)
392                             firstProductiveAlternative[lhs.index] = alternative;
393                     }
394                 }
395             }
396         }
397 
398         trace!"productive nonterminals:%-(\n%s%)"
399             (this.grammar_.nonterminals.filter!(nonterminal => isProductive(nonterminal)));
400     }
401 
402     /**
403      * Computes the set <code>reachableNonterminals</code> of nonterminals
404      * reached from the grammar's start symbol by applying derivation rules.
405      */
406     private void computeReachables()
407     {
408         reachableNonterminals = BitArray();
409         reachableNonterminals.length = this.grammar_.nonterminals.length;
410         reachableTerminals = BitArray();
411         reachableTerminals.length = this.grammar_.terminals.length;
412 
413         traverseReachables(this.grammar_.startSymbol);
414 
415         trace!"reachable nonterminals:%-(\n%s%)"
416             (this.grammar_.nonterminals.filter!(nonterminal => isReachable(nonterminal)));
417         trace!"reachable terminals:%-(\n%s%)"
418             (this.grammar_.terminals.filter!(terminal => isReachable(terminal)));
419     }
420 
421     /**
422      * Depth-first traversal of the symbols occurring on right hand sides (if
423      * any) of the given <code>symbol</code>.
424      */
425     private void traverseReachables(Symbol symbol)
426     in (cast(Nonterminal) symbol || cast(Terminal) symbol)
427     {
428         if (cast(Nonterminal) symbol)
429         {
430             auto nonterminal = cast(Nonterminal) symbol;
431 
432             if (!reachableNonterminals[nonterminal.index])
433             {
434                 reachableNonterminals[nonterminal.index] = true;
435 
436                 foreach (alt; this.grammar_.ruleOf(nonterminal).alternatives)
437                 {
438                     for (size_t i = 0, n = alt.rhs.length; i < n; ++i)
439                     {
440                         auto node = cast(Node) alt.rhs[i];
441 
442                         if (cast(SymbolNode) node)
443                             traverseReachables(cast(Symbol) (cast(SymbolNode) node).symbol);
444                         else
445                         {
446                             // for EBNF operators some walk-arounds are
447                             // necessary to get the lhs nonterminal
448                             // assert(node instanceof grammar_.hyper.Operator);
449                             // asser(((Operator) node).rule.alternatives.size > 0);
450                             auto firstAlt = cast(Alternative) (cast(Operator) node).rule.alternatives.front;
451 
452                             traverseReachables(firstAlt.lhs.symbol);
453                         }
454                     }
455                 }
456             }
457         }
458         else
459         {
460             if (!reachableTerminals[(cast(Terminal) symbol).index])
461                 reachableTerminals[(cast(Terminal) symbol).index] = true;
462         }
463     }
464 
465     /**
466      * Computes the set of strong nullable nonterminals
467      * TODO has to be integrated into computeNullablesAndProductives()
468      * @author kuniss 16.10.2004
469      */
470     private void computeStrongNullables()
471     {
472         if (this.nontOccurrences.empty)
473             createNontOccurrences;
474 
475         Nonterminal[] nonStrongNullableStack; // all nonterminals which are definitely not strong nullable
476         BitArray strongNullablesComplementSet;
477 
478         strongNullablesComplementSet.length = this.grammar_.nonterminals.length;
479 
480         // initialize stack and the complement set
481         foreach (lhs; this.grammar_.nonterminals)
482         {
483             if (!isNullable(lhs))
484             {
485                 nonStrongNullableStack ~= lhs;
486                 strongNullablesComplementSet[lhs.index] = true;
487             }
488             else
489             {
490                  foreach (alternative; this.grammar_.ruleOf(lhs).alternatives)
491                  {
492                     foreach (node; alternative.rhs)
493                     {
494                         if (cast(SymbolNode) node)
495                         {
496                             auto symbol = cast(Symbol) (cast(SymbolNode) node).symbol;
497 
498                             if (cast(Terminal) symbol)
499                             {
500                                 if (!strongNullablesComplementSet[lhs.index])
501                                 {
502                                     nonStrongNullableStack ~= lhs;
503                                     strongNullablesComplementSet[lhs.index] = true;
504                                 }
505                                 break; // node loop
506                             }
507                         }
508                     }
509                 }
510             }
511         }
512 
513         while (!nonStrongNullableStack.empty)
514         {
515             // assert nonStrongNullableStack.top instanceof Nonterminal
516             Nonterminal nont = nonStrongNullableStack.back;
517             Alternative[] altList = this.nontOccurrences[nont.index];
518 
519             nonStrongNullableStack.popBack;
520             foreach (alt; altList)
521             {
522                 // assert alt.lhs.symbol instanceof Nonterminal
523                 auto lhs = cast(Nonterminal) alt.lhs.symbol;
524 
525                 if (!strongNullablesComplementSet[lhs.index])
526                 {
527                     nonStrongNullableStack ~= lhs;
528                     strongNullablesComplementSet[lhs.index] = true;
529                 }
530             }
531 
532             this.predicateNonterminals = this.productiveNonterminals.dup;
533             this.predicateNonterminals -= strongNullablesComplementSet;
534         }
535 
536         trace!"predicates:%-(\n%s%)"
537             (this.grammar_.nonterminals.filter!(nonterminal => isStrongNullable(nonterminal)));
538     }
539 
540     public Grammar grammar()
541     {
542         return this.grammar_;
543     }
544 }
545 
546 @("compute properties of degenerate grammar")
547 unittest
548 {
549     with (TestGrammarBuilder())
550     {
551         rule("A: A");
552         rule("B: | B");
553 
554         with (new GrammarProperties(grammar))
555         {
556             assert(!isNullable(symbol("A")));
557             assert(isNullable(symbol("B")));
558             assert(isStrongNullable(symbol("B")));
559 
560             assert(!isProductive(symbol("A")));
561             assert(isProductive(symbol("B")));
562 
563             assert(isReachable(symbol("A")));
564             assert(!isReachable(symbol("B")));
565 
566             assert(!isReduced);
567         }
568     }
569 }
570 
571 @("compute properties of grammar from Tremblay and Sorenson")
572 unittest
573 {
574     with (TestGrammarBuilder())
575     {
576         rule("G: E = E | f");
577         rule("E: T | E + T");
578         rule("T: f | T * f");
579 
580         with (new GrammarProperties(grammar))
581         {
582             assert(!isNullable(symbol("G")));
583             assert(!isNullable(symbol("E")));
584             assert(!isNullable(symbol("T")));
585             assert(!isNullable(symbol("f")));
586             assert(!isNullable(symbol("=")));
587             assert(!isNullable(symbol("+")));
588             assert(!isNullable(symbol("*")));
589 
590             assert(isProductive(symbol("G")));
591             assert(isProductive(symbol("E")));
592             assert(isProductive(symbol("T")));
593             assert(isProductive(symbol("f")));
594             assert(isProductive(symbol("=")));
595             assert(isProductive(symbol("+")));
596             assert(isProductive(symbol("*")));
597 
598             assert(isReachable(symbol("G")));
599             assert(isReachable(symbol("E")));
600             assert(isReachable(symbol("T")));
601             assert(isReachable(symbol("f")));
602             assert(isReachable(symbol("=")));
603             assert(isReachable(symbol("+")));
604             assert(isReachable(symbol("*")));
605 
606             assert(isReduced);
607         }
608     }
609 }