1 module gamma.input.epsilang.Analyzer;
2 
3 import gamma.input.epsilang.Scanner;
4 import gamma.grammar.Alternative;
5 import gamma.grammar.Grammar;
6 import gamma.grammar.GrammarBuilder;
7 import gamma.grammar.Node;
8 import gamma.grammar.Nonterminal;
9 import gamma.grammar.Rule;
10 import gamma.grammar.SymbolNode;
11 import gamma.grammar.Terminal;
12 import gamma.grammar.hyper.Group;
13 import gamma.grammar.hyper.HyperSymbolNode;
14 import gamma.grammar.hyper.Operator;
15 import gamma.grammar.hyper.Option;
16 import gamma.grammar.hyper.Repetition;
17 import gamma.grammar.hyper.RepetitionAlternative;
18 import gamma.util.Position;
19 import std.stdio;
20 import std.typecons;
21 
22 public class Analyzer
23 {
24     private Scanner scanner;
25 
26     private char token;
27 
28     private Position lastPosition = null;
29 
30     private bool formalParams;
31 
32     private bool spareActualParams;
33 
34     private bool undecidedActualParams;
35 
36     private Position paramsPosition;
37 
38     private GrammarBuilder metaGrammarBuilder;
39 
40     private GrammarBuilder hyperGrammarBuilder;
41 
42     private Nonterminal startSymbol;
43 
44     private bool[Nonterminal]lexicalMetaNonterminals;
45 
46     private bool[Nonterminal] lexicalHyperNonterminals;
47 
48     /**
49      * Creates an analyzer for the given file.
50      */
51     public this(File input)
52     {
53         this.scanner = new Scanner(input);
54         this.token = this.scanner.read;
55     }
56 
57     private void markError(string message)
58     {
59         Position position = this.scanner.getPosition;
60 
61         if (position != this.lastPosition)
62         {
63             position.markError(message);
64             this.lastPosition = position;
65         }
66     }
67 
68     /**
69      * Specification:
70      *     { WhiteSpaceRule | MetaRule | HyperRule }.
71      */
72     public void parseSpecification()
73     {
74         for (;;)
75         {
76             if (this.token == ':')
77             {
78                 parseWhiteSpaceRule;
79             }
80             else if (this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE)
81             {
82                 const representation = this.scanner.getRepresentation;
83                 Position position = this.scanner.getPosition;
84                 const variableToken = this.token;
85 
86                 this.token = this.scanner.read;
87                 if (this.token == Scanner.NUMBER)
88                 {
89                     markError("unexpected number");
90                     this.token = this.scanner.read;
91                 }
92                 if (this.token != '=' && this.token != ':' && this.token != '<')
93                 {
94                     markError("unexpected symbol");
95                     if (this.token != '.' && this.token != Scanner.END)
96                         this.token = this.scanner.read;
97                 }
98                 if (this.token == '=')
99                 {
100                     Nonterminal nonterminal = this.metaGrammarBuilder.buildNonterminal(representation);
101                     SymbolNode lhs = new SymbolNode(nonterminal, position);
102 
103                     if (variableToken == Scanner.LEXICAL_VARIABLE)
104                         this.lexicalMetaNonterminals[nonterminal] = true;
105 
106                     parseMetaRule(lhs);
107                 }
108                 else if (this.token == ':' || this.token == '<')
109                 {
110                     Nonterminal nonterminal = this.hyperGrammarBuilder.buildNonterminal(representation);
111                     SymbolNode lhs = new HyperSymbolNode(nonterminal, null, position);
112 
113                     if (variableToken == Scanner.LEXICAL_VARIABLE)
114                         this.lexicalHyperNonterminals[nonterminal] = true;
115 
116                     if ( variableToken == Scanner.SYNTACTIC_VARIABLE )
117                         this.startSymbol = nonterminal;
118 
119                     parseHyperRule(lhs);
120                 }
121             }
122             else if (this.token == Scanner.END)
123                 break;
124             else
125             {  // sync
126                 markError("start of some rule expected");
127                 while (this.token != '.' && this.token != Scanner.END)
128                     this.token = this.scanner.read;
129                 if (this.token == '.')
130                     this.token = this.scanner.read;
131             }
132         }
133     }
134 
135     /**
136      * WhiteSpaceRule:
137      *     ":" WhiteSpaceDefinition { "|" WhiteSpaceDefinition } ".".
138      */
139     private void parseWhiteSpaceRule()
140     in (this.token == ':')
141     {
142         this.token = this.scanner.read;
143 
144         for (;;)
145         {
146             if (this.token == Scanner.LITERAL)
147                 parseWhiteSpaceDefinition;
148             else
149                 markError("white space definition expected");
150             if (this.token != '|' && this.token != '.' && this.token != Scanner.END)
151             {  // sync
152                 markError("unexpected symbol");
153                 do
154                     this.token = this.scanner.read;
155                 while (this.token != '|' && this.token != '.' && this.token != Scanner.END);
156             }
157             if (this.token == '|')
158                 this.token = this.scanner.read;
159             else
160                 break;
161         }
162 
163         assert(this.token == '.' || this.token == Scanner.END);
164 
165         if (this.token == '.')
166             this.token = this.scanner.read;
167         else
168             markError(`symbol "." expected`);
169     }
170 
171     /**
172      * WhiteSpaceDefinition:
173      *     string                  ! white space
174      *   | string "~"              ! comment that extends to end of line
175      *   | string "~" string       ! comment in brackets
176      *   | string "~" "~" string.  ! nesting comment in brackets
177      */
178     private void parseWhiteSpaceDefinition()
179     in (this.token == Scanner.LITERAL)
180     {
181         this.token = this.scanner.read;
182 
183         if (this.token == '~')
184         {
185             bool nestingComment = false;
186 
187             this.token = this.scanner.read;
188             if (this.token == '~')
189             {
190                 nestingComment = true;
191                 this.token = this.scanner.read;
192             }
193             if (this.token == Scanner.LITERAL)
194                 this.token = this.scanner.read;
195             else if (nestingComment)
196                 markError("closing bracket for nesting comment expected");
197         }
198     }
199 
200     /**
201      * MetaRule:
202      *     ident "=" MetaExpr ".".
203      *
204      * @param lhs  the identifier occurrence for the left-hand side
205      */
206     private void parseMetaRule(SymbolNode lhs)
207     in (this.token == '=')
208     {
209         Position position = this.scanner.getPosition();
210 
211         this.token = this.scanner.read;
212         parseMetaExpr(lhs, position);
213 
214         assert(this.token == '.' || this.token == Scanner.END);
215 
216         if (this.token == '.')
217             this.token = this.scanner.read;
218         else
219             markError(`symbol "." expected`);
220     }
221 
222     /**
223      * MetaExpr:
224      *     MetaTerm { "|" MetaTerm }.
225      *
226      * @param lhs       the identifier occurrence for the left-hand side
227      * @param position  the position for the first alternative
228      */
229     private void parseMetaExpr(SymbolNode lhs, Position position)
230     {
231         for (;;)
232         {
233             Node[] rhs = parseMetaTerm;
234             Alternative alternative = new Alternative(lhs, rhs, position);
235 
236             this.metaGrammarBuilder.add(alternative);
237 
238             assert(this.token == '|' || this.token == '.' || this.token == Scanner.END);
239 
240             if (this.token != '|')
241                 break;
242             position = this.scanner.getPosition;
243             this.token = this.scanner.read;
244         }
245     }
246 
247     /**
248      * MetaTerm:
249      *     { ident | string }.
250      *
251      * @return  the list of occurrences of identifiers and strings
252      */
253     private Node[] parseMetaTerm()
254     {
255         Node[] nodes;
256 
257         for (;;)
258             if (this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE)
259             {
260                 const representation = this.scanner.getRepresentation;
261                 Nonterminal nonterminal = this.metaGrammarBuilder.buildNonterminal(representation);
262                 Position position = this.scanner.getPosition;
263 
264                 nodes ~= new SymbolNode(nonterminal, position);
265                 this.token = this.scanner.read;
266                 if (this.token == Scanner.NUMBER)
267                 {
268                     markError("unexpected number");
269                     this.token = this.scanner.read;
270                 }
271             }
272             else if (this.token == Scanner.LITERAL)
273             {
274                 const representation = this.scanner.getRepresentation;
275                 Terminal terminal = this.metaGrammarBuilder.buildTerminal(representation);
276                 Position position = this.scanner.getPosition;
277 
278                 nodes ~= new SymbolNode(terminal, position);
279                 this.token = this.scanner.read;
280             }
281             else if (this.token == '|' || this.token == '.' || this.token == Scanner.END)
282                 break;
283             else
284             {  // sync
285                 markError("unexpected symbol");
286                 do
287                     this.token = this.scanner.read;
288                 while (this.token != '|' && this.token != '.' && this.token != Scanner.END);
289             }
290 
291         return nodes;
292     }
293 
294     /**
295      * HyperRule:
296      *     ident [ FormalParams ] ":" HyperExpr ".".
297      *
298      * @param lhs  the identifier occurrence for the left-hand side
299      */
300     private void parseHyperRule(SymbolNode lhs)
301     in (this.token == ':' || this.token == '<')
302     {
303         bool formalParams = false;
304         Position position = null;
305 
306         if (this.token == '<')
307         {
308             this.token = this.scanner.read;
309             parseFormalParams;
310             formalParams = true;
311         }
312         if (this.token == ':')
313         {
314             position = this.scanner.getPosition;
315             this.token = this.scanner.read;
316         } else
317             markError(`symbol ":" expected`);
318 
319         Alternative[] alternatives = parseHyperExpr(lhs,
320             formalParams ? No.formalParamsAllowed : Yes.formalParamsAllowed, No.repetition,
321             position);
322 
323         foreach (alternative; alternatives)
324             this.hyperGrammarBuilder.add(alternative);
325 
326         assert(this.token == ')' || this.token == ']' || this.token == '}'
327             || this.token == '.' || this.token == Scanner.END);
328 
329         if (this.token == '.')
330             this.token = this.scanner.read;
331         else
332             markError(`symbol "." expected`);
333     }
334 
335     /**
336      * HyperExpr:
337      *     [ FormalParams ] HyperTerm [ ActualParams ]
338      *     { "|" [ FormalParams ] HyperTerm [ ActualParams ] }.
339      *
340      * @param lhs  the identifier occurrence for the left-hand side
341      * @return     the list of alternatives
342      */
343     private Alternative[] parseHyperExpr(SymbolNode lhs,
344         Flag!"formalParamsAllowed" formalParamsAllowed, Flag!"repetition" repetition,
345         Position position)
346     {
347         Alternative[] alternatives;
348         bool formalParams = false;
349 
350         for (bool firstRound = true;; firstRound = false)
351         {
352             bool spareActualParams = false;
353             Position paramsPosition = null;
354 
355             if (this.token == '<')
356             {
357                 paramsPosition = this.scanner.getPosition;
358                 this.token = this.scanner.read;
359                 if (this.token == '+' || this.token == '-')
360                 {
361                     parseFormalParams;
362                     if (!formalParamsAllowed || !firstRound && !formalParams)
363                         paramsPosition.
364                             markError("unexpected formal parameters");
365                     else
366                         formalParams = true;
367                 }
368                 else
369                 {
370                     parseActualParams;
371                     if (formalParams)
372                         paramsPosition.
373                             markError("formal parameters expected");
374                     else
375                         spareActualParams = true;
376                 }
377             }
378             else if (formalParams)
379                 markError("formal parameters expected");
380 
381             Node[] rhs = parseHyperTerm(spareActualParams, paramsPosition);
382 
383             Alternative alternative;
384 
385             if (repetition)
386                 alternative = new RepetitionAlternative(lhs, rhs, null, position);
387             else
388                 alternative = new Alternative(lhs, rhs, position);
389             alternatives ~= alternative;
390 
391             if (repetition && formalParams)
392             {
393                 if (!this.undecidedActualParams && !this.spareActualParams)
394                     markError("actual parameters expected");
395             }
396             else
397                 if (this.spareActualParams)
398                     this.paramsPosition.markError("unexpected actual parameters");
399 
400             assert(this.token == ')' || this.token == ']' || this.token == '}'
401                 || this.token == '|' || this.token == '.' || this.token == Scanner.END);
402 
403             if (this.token != '|')
404                 break;
405             position = this.scanner.getPosition;
406             this.token = this.scanner.read;
407         }
408 
409         this.formalParams = formalParams;
410         return alternatives;
411     }
412 
413     /**
414      * HyperTerm:
415      *     { ident [ ActualParams ]
416      *   | string
417      *   | [ ActualParams ] ( "(" HyperExpr ")"
418      *                      | "[" HyperExpr "]" [ FormalParams ]
419      *                      | "{" HyperExpr "}" [ FormalParams ]
420      *                      )
421      *   }.
422      *
423      * @return  the list of occurrences of identifiers and strings
424      */
425     private Node[] parseHyperTerm(bool spareActualParams, Position paramsPosition)
426     {
427         Node[] nodes;
428         bool undecidedActualParams = false;
429 
430         for (;;)
431         {
432             if (this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE
433                 || this.token == Scanner.LITERAL || this.token == '<')
434             {
435                 undecidedActualParams = false;
436                 if (spareActualParams)
437                 {
438                     paramsPosition.markError("unexpected actual parameters");
439                     spareActualParams = false;
440                     paramsPosition = null;
441                 }
442                 if (this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE)
443                 {
444                     const representation = this.scanner.getRepresentation;
445                     Nonterminal nonterminal = this.hyperGrammarBuilder.buildNonterminal(representation);
446                     Position position = this.scanner.getPosition;
447                     Node node = new HyperSymbolNode (nonterminal, null, position);
448 
449                     nodes ~= node;
450                     this.token = this.scanner.read;
451                     if (this.token == Scanner.NUMBER)
452                     {
453                         markError("unexpected number");
454                         this.token = this.scanner.read;
455                     }
456                     if (this.token == '<')
457                     {
458                         this.token = this.scanner.read;
459                         parseActualParams;
460                         undecidedActualParams = true;
461                     }
462                 }
463                 else if (this.token == Scanner.LITERAL)
464                 {
465                     const representation = this.scanner.getRepresentation;
466                     Terminal terminal = this.hyperGrammarBuilder.buildTerminal(representation);
467                     Position position = this.scanner.getPosition;
468                     Node node = new SymbolNode(terminal, position);
469 
470                     nodes ~= node;
471                     this.token = this.scanner.read;
472                 }
473                 else if (this.token == '<')
474                 {
475                     this.token = this.scanner.read;
476                     parseActualParams;
477                     spareActualParams = true;
478                     paramsPosition = this.scanner.getPosition;
479                 }
480             }
481             else if (this.token == '(' || this.token == '[' || this.token == '{')
482             {
483                 const open = this.token;
484                 Position position = this.scanner.getPosition;
485 
486                 this.token = this.scanner.read;
487 
488                 Nonterminal identifier = hyperGrammarBuilder.buildGeneratedNonterminal;
489                 SymbolNode lhs = new HyperSymbolNode (identifier, null, position);
490                 Alternative[] alternatives = parseHyperExpr(lhs,
491                     Yes.formalParamsAllowed, (open == '{') ? Yes.repetition : No.repetition,
492                     position);
493                 Rule rule = new Rule(alternatives);
494                 Operator operator = null;
495 
496                 assert(this.token == ')' || this.token == ']' || this.token == '}'
497                     || this.token == '|' || this.token == '.' || this.token == Scanner.END);
498 
499                 if (open == '(')
500                 {
501                     if (this.token != ')')
502                         markError(`symbol ")" expected`);
503                     operator = new Group(null, rule, position);
504                 }
505                 else if (open == '[')
506                 {
507                     if (this.token != ']')
508                         markError(`symbol "]" expected`);
509                     operator = new Option(null, rule, null, position);
510                 }
511                 else if (open == '{')
512                 {
513                     if (this.token != '}')
514                         markError(`symbol "}" expected`);
515                     operator = new Repetition(null, rule, null, position);
516                 }
517                 nodes ~= operator;
518 
519                 if (this.token == ')' || this.token == ']' || this.token == '}')
520                     this.token = this.scanner.read;
521                 if (open != '(' && this.formalParams)
522                 {
523                     if (this.token == '<')
524                     {
525                         this.token = this.scanner.read;
526                         parseFormalParams;
527                     }
528                     else
529                         markError("formal parameters expected");
530                 }
531                 if (this.formalParams)
532                 {
533                     // FIXME: also OK for EBNF expression at beginning when LHS has no formal parameter
534                     if (!undecidedActualParams && !spareActualParams && false)
535                         position.markError("actual parameters expected");
536                 }
537                 else
538                     if (spareActualParams)
539                         paramsPosition.markError("unexpected actual parameters");
540                 undecidedActualParams = false;
541                 spareActualParams = false;
542                 paramsPosition = null;
543             }
544             else if (this.token == ')' || this.token == ']' || this.token == '}'
545                 || this.token == '|' || this.token == '.' || this.token == Scanner.END)
546                 break;
547             else
548             {  // sync
549                 markError("unexpected symbol");
550                 do
551                     this.token = this.scanner.read;
552                 while (this.token != '(' && this.token != '[' && this.token != '{'
553                     && this.token != '|' && this.token != '.' && this.token != Scanner.END);
554             }
555         }
556 
557         this.spareActualParams = spareActualParams;
558         this.undecidedActualParams = undecidedActualParams;
559         this.paramsPosition = paramsPosition;
560         return nodes;
561     }
562 
563     /**
564      * ActualParams:
565      *     "<" AffixForm { "," AffixForm } ">".
566      */
567     private void parseActualParams()
568     {
569         for (;;)
570         {
571             if (this.token == '+' || this.token == '-')
572             {
573                 markError(`symbol "+" or "-" not allowed in actual parameters`);
574                 this.token = this.scanner.read;
575             }
576             parseAffixForm;
577             if (this.token != ',' && this.token != '>'
578                 && this.token != '.' && this.token != Scanner.END)
579             {  // sync
580                 markError("unexpected symbol");
581                 do
582                     this.token = this.scanner.read;
583                 while (this.token != ',' && this.token != '>'
584                     && this.token != '.' && this.token != Scanner.END);
585             }
586             if (this.token == ',')
587                 this.token = this.scanner.read;
588             else
589                 break;
590         }
591 
592         assert(this.token == '>' || this.token == '.' || this.token == Scanner.END);
593 
594         if (this.token == '>')
595             this.token = this.scanner.read;
596         else
597             markError(`symbol ">" expected`);
598     }
599 
600     /**
601      * FormalParams:
602      *     "<" ( "+" | "-" ) ( AffixForm ":" ident | Variable )
603      *     { "," ( "+" | "-" ) ( AffixForm ":" ident | Variable ) } ">".
604      */
605     private void parseFormalParams()
606     {
607         for (;;)
608         {
609             if (this.token == '+' || this.token == '-')
610             {
611                 this.token = this.scanner.read;
612             }
613             else
614                 markError(`symbol "+" or "-" expected`);
615 
616             const isVariable = parseAffixForm;
617 
618             if (this.token == ':')
619             {
620                 this.token = this.scanner.read;
621                 if (this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE)
622                 {
623                     this.token = this.scanner.read;
624                     if (this.token == Scanner.NUMBER)
625                     {
626                         markError("unexpected number");
627                         this.token = this.scanner.read;
628                     }
629                 }
630                 else
631                     markError("meta-nonterminal expected");
632             }
633             else if (!isVariable)
634                 markError(`symbol ":" expected`);
635             if (this.token != ',' && this.token != '>'
636                 && this.token != '.' && this.token != Scanner.END)
637             {  // sync
638                 markError("unexpected symbol");
639                 do
640                     this.token = this.scanner.read;
641                 while (this.token != ',' && this.token != '>'
642                     && this.token != '.' && this.token != Scanner.END);
643             }
644             if (this.token == ',')
645                 this.token = this.scanner.read;
646             else
647                 break;
648         }
649         if (this.token == '>')
650             this.token = this.scanner.read;
651         else
652             markError(`symbol ">" expected`);
653     }
654 
655     /**
656      * AffixForm:
657      *     { string | Variable }.
658      */
659     private bool parseAffixForm()
660     {
661         bool isVariable = false;
662 
663         for (bool firstRound = true;; firstRound = false)
664         {
665             if (this.token == Scanner.LITERAL)
666             {
667                 this.token = this.scanner.read;
668                 isVariable = false;
669             }
670             else if (this.token == '!'
671                 || this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE)
672             {
673                 parseVariable;
674                 isVariable = firstRound;
675             }
676             else
677                 break;
678         }
679         return isVariable;
680     }
681 
682     /**
683      * Variable:
684      *     [ "!" ] ident [ number ].
685      */
686     private void parseVariable()
687     in (this.token == '!'
688         || this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE)
689     {
690         if (this.token == '!')
691             this.token = this.scanner.read;
692         if (this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE)
693         {
694             this.token = this.scanner.read;
695             if (this.token == Scanner.NUMBER)
696                 this.token = this.scanner.read;
697         }
698         else
699             markError("meta-variable expected");
700     }
701 
702     public int getErrorCount() const
703     {
704         return this.scanner.getErrorCount;
705     }
706 
707     public Grammar yieldMetaGrammar()
708     {
709         if (this.scanner.getErrorCount == 0
710             && this.metaGrammarBuilder.grammarIsWellDefined)
711         {
712             return this.metaGrammarBuilder.getGrammar;
713         }
714         else
715         {
716             this.metaGrammarBuilder.markErrors;
717             return null;
718         }
719     }
720 
721     public Grammar yieldHyperGrammar()
722     {
723         if (this.scanner.getErrorCount == 0 && this.startSymbol !is null
724             && this.hyperGrammarBuilder.grammarIsWellDefined)
725         {
726             return this.hyperGrammarBuilder.getGrammar(this.startSymbol);
727         }
728         else
729         {
730             this.hyperGrammarBuilder.markErrors;
731             return null;
732         }
733     }
734 
735     public bool[Nonterminal] getLexicalHyperNonterminals()
736     {
737         return lexicalHyperNonterminals;
738     }
739 }