1 module gamma.input.epsilang.Analyzer; 2 3 import gamma.input.epsilang.Scanner; 4 import gamma.grammar.Alternative; 5 import gamma.grammar.Grammar; 6 import gamma.grammar.GrammarBuilder; 7 import gamma.grammar.Node; 8 import gamma.grammar.Nonterminal; 9 import gamma.grammar.Rule; 10 import gamma.grammar.SymbolNode; 11 import gamma.grammar.Terminal; 12 import gamma.grammar.hyper.Group; 13 import gamma.grammar.hyper.HyperSymbolNode; 14 import gamma.grammar.hyper.Operator; 15 import gamma.grammar.hyper.Option; 16 import gamma.grammar.hyper.Repetition; 17 import gamma.grammar.hyper.RepetitionAlternative; 18 import gamma.util.Position; 19 import std.stdio; 20 import std.typecons; 21 22 public class Analyzer 23 { 24 private Scanner scanner; 25 26 private char token; 27 28 private Position lastPosition = null; 29 30 private bool formalParams; 31 32 private bool spareActualParams; 33 34 private bool undecidedActualParams; 35 36 private Position paramsPosition; 37 38 private GrammarBuilder metaGrammarBuilder; 39 40 private GrammarBuilder hyperGrammarBuilder; 41 42 private Nonterminal startSymbol; 43 44 private bool[Nonterminal]lexicalMetaNonterminals; 45 46 private bool[Nonterminal] lexicalHyperNonterminals; 47 48 /** 49 * Creates an analyzer for the given file. 50 */ 51 public this(File input) 52 { 53 this.scanner = new Scanner(input); 54 this.token = this.scanner.read; 55 } 56 57 private void markError(string message) 58 { 59 Position position = this.scanner.getPosition; 60 61 if (position != this.lastPosition) 62 { 63 position.markError(message); 64 this.lastPosition = position; 65 } 66 } 67 68 /** 69 * Specification: 70 * { WhiteSpaceRule | MetaRule | HyperRule }. 71 */ 72 public void parseSpecification() 73 { 74 for (;;) 75 { 76 if (this.token == ':') 77 { 78 parseWhiteSpaceRule; 79 } 80 else if (this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE) 81 { 82 const representation = this.scanner.getRepresentation; 83 Position position = this.scanner.getPosition; 84 const variableToken = this.token; 85 86 this.token = this.scanner.read; 87 if (this.token == Scanner.NUMBER) 88 { 89 markError("unexpected number"); 90 this.token = this.scanner.read; 91 } 92 if (this.token != '=' && this.token != ':' && this.token != '<') 93 { 94 markError("unexpected symbol"); 95 if (this.token != '.' && this.token != Scanner.END) 96 this.token = this.scanner.read; 97 } 98 if (this.token == '=') 99 { 100 Nonterminal nonterminal = this.metaGrammarBuilder.buildNonterminal(representation); 101 SymbolNode lhs = new SymbolNode(nonterminal, position); 102 103 if (variableToken == Scanner.LEXICAL_VARIABLE) 104 this.lexicalMetaNonterminals[nonterminal] = true; 105 106 parseMetaRule(lhs); 107 } 108 else if (this.token == ':' || this.token == '<') 109 { 110 Nonterminal nonterminal = this.hyperGrammarBuilder.buildNonterminal(representation); 111 SymbolNode lhs = new HyperSymbolNode(nonterminal, null, position); 112 113 if (variableToken == Scanner.LEXICAL_VARIABLE) 114 this.lexicalHyperNonterminals[nonterminal] = true; 115 116 if ( variableToken == Scanner.SYNTACTIC_VARIABLE ) 117 this.startSymbol = nonterminal; 118 119 parseHyperRule(lhs); 120 } 121 } 122 else if (this.token == Scanner.END) 123 break; 124 else 125 { // sync 126 markError("start of some rule expected"); 127 while (this.token != '.' && this.token != Scanner.END) 128 this.token = this.scanner.read; 129 if (this.token == '.') 130 this.token = this.scanner.read; 131 } 132 } 133 } 134 135 /** 136 * WhiteSpaceRule: 137 * ":" WhiteSpaceDefinition { "|" WhiteSpaceDefinition } ".". 138 */ 139 private void parseWhiteSpaceRule() 140 in (this.token == ':') 141 { 142 this.token = this.scanner.read; 143 144 for (;;) 145 { 146 if (this.token == Scanner.LITERAL) 147 parseWhiteSpaceDefinition; 148 else 149 markError("white space definition expected"); 150 if (this.token != '|' && this.token != '.' && this.token != Scanner.END) 151 { // sync 152 markError("unexpected symbol"); 153 do 154 this.token = this.scanner.read; 155 while (this.token != '|' && this.token != '.' && this.token != Scanner.END); 156 } 157 if (this.token == '|') 158 this.token = this.scanner.read; 159 else 160 break; 161 } 162 163 assert(this.token == '.' || this.token == Scanner.END); 164 165 if (this.token == '.') 166 this.token = this.scanner.read; 167 else 168 markError(`symbol "." expected`); 169 } 170 171 /** 172 * WhiteSpaceDefinition: 173 * string ! white space 174 * | string "~" ! comment that extends to end of line 175 * | string "~" string ! comment in brackets 176 * | string "~" "~" string. ! nesting comment in brackets 177 */ 178 private void parseWhiteSpaceDefinition() 179 in (this.token == Scanner.LITERAL) 180 { 181 this.token = this.scanner.read; 182 183 if (this.token == '~') 184 { 185 bool nestingComment = false; 186 187 this.token = this.scanner.read; 188 if (this.token == '~') 189 { 190 nestingComment = true; 191 this.token = this.scanner.read; 192 } 193 if (this.token == Scanner.LITERAL) 194 this.token = this.scanner.read; 195 else if (nestingComment) 196 markError("closing bracket for nesting comment expected"); 197 } 198 } 199 200 /** 201 * MetaRule: 202 * ident "=" MetaExpr ".". 203 * 204 * @param lhs the identifier occurrence for the left-hand side 205 */ 206 private void parseMetaRule(SymbolNode lhs) 207 in (this.token == '=') 208 { 209 Position position = this.scanner.getPosition(); 210 211 this.token = this.scanner.read; 212 parseMetaExpr(lhs, position); 213 214 assert(this.token == '.' || this.token == Scanner.END); 215 216 if (this.token == '.') 217 this.token = this.scanner.read; 218 else 219 markError(`symbol "." expected`); 220 } 221 222 /** 223 * MetaExpr: 224 * MetaTerm { "|" MetaTerm }. 225 * 226 * @param lhs the identifier occurrence for the left-hand side 227 * @param position the position for the first alternative 228 */ 229 private void parseMetaExpr(SymbolNode lhs, Position position) 230 { 231 for (;;) 232 { 233 Node[] rhs = parseMetaTerm; 234 Alternative alternative = new Alternative(lhs, rhs, position); 235 236 this.metaGrammarBuilder.add(alternative); 237 238 assert(this.token == '|' || this.token == '.' || this.token == Scanner.END); 239 240 if (this.token != '|') 241 break; 242 position = this.scanner.getPosition; 243 this.token = this.scanner.read; 244 } 245 } 246 247 /** 248 * MetaTerm: 249 * { ident | string }. 250 * 251 * @return the list of occurrences of identifiers and strings 252 */ 253 private Node[] parseMetaTerm() 254 { 255 Node[] nodes; 256 257 for (;;) 258 if (this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE) 259 { 260 const representation = this.scanner.getRepresentation; 261 Nonterminal nonterminal = this.metaGrammarBuilder.buildNonterminal(representation); 262 Position position = this.scanner.getPosition; 263 264 nodes ~= new SymbolNode(nonterminal, position); 265 this.token = this.scanner.read; 266 if (this.token == Scanner.NUMBER) 267 { 268 markError("unexpected number"); 269 this.token = this.scanner.read; 270 } 271 } 272 else if (this.token == Scanner.LITERAL) 273 { 274 const representation = this.scanner.getRepresentation; 275 Terminal terminal = this.metaGrammarBuilder.buildTerminal(representation); 276 Position position = this.scanner.getPosition; 277 278 nodes ~= new SymbolNode(terminal, position); 279 this.token = this.scanner.read; 280 } 281 else if (this.token == '|' || this.token == '.' || this.token == Scanner.END) 282 break; 283 else 284 { // sync 285 markError("unexpected symbol"); 286 do 287 this.token = this.scanner.read; 288 while (this.token != '|' && this.token != '.' && this.token != Scanner.END); 289 } 290 291 return nodes; 292 } 293 294 /** 295 * HyperRule: 296 * ident [ FormalParams ] ":" HyperExpr ".". 297 * 298 * @param lhs the identifier occurrence for the left-hand side 299 */ 300 private void parseHyperRule(SymbolNode lhs) 301 in (this.token == ':' || this.token == '<') 302 { 303 bool formalParams = false; 304 Position position = null; 305 306 if (this.token == '<') 307 { 308 this.token = this.scanner.read; 309 parseFormalParams; 310 formalParams = true; 311 } 312 if (this.token == ':') 313 { 314 position = this.scanner.getPosition; 315 this.token = this.scanner.read; 316 } else 317 markError(`symbol ":" expected`); 318 319 Alternative[] alternatives = parseHyperExpr(lhs, 320 formalParams ? No.formalParamsAllowed : Yes.formalParamsAllowed, No.repetition, 321 position); 322 323 foreach (alternative; alternatives) 324 this.hyperGrammarBuilder.add(alternative); 325 326 assert(this.token == ')' || this.token == ']' || this.token == '}' 327 || this.token == '.' || this.token == Scanner.END); 328 329 if (this.token == '.') 330 this.token = this.scanner.read; 331 else 332 markError(`symbol "." expected`); 333 } 334 335 /** 336 * HyperExpr: 337 * [ FormalParams ] HyperTerm [ ActualParams ] 338 * { "|" [ FormalParams ] HyperTerm [ ActualParams ] }. 339 * 340 * @param lhs the identifier occurrence for the left-hand side 341 * @return the list of alternatives 342 */ 343 private Alternative[] parseHyperExpr(SymbolNode lhs, 344 Flag!"formalParamsAllowed" formalParamsAllowed, Flag!"repetition" repetition, 345 Position position) 346 { 347 Alternative[] alternatives; 348 bool formalParams = false; 349 350 for (bool firstRound = true;; firstRound = false) 351 { 352 bool spareActualParams = false; 353 Position paramsPosition = null; 354 355 if (this.token == '<') 356 { 357 paramsPosition = this.scanner.getPosition; 358 this.token = this.scanner.read; 359 if (this.token == '+' || this.token == '-') 360 { 361 parseFormalParams; 362 if (!formalParamsAllowed || !firstRound && !formalParams) 363 paramsPosition. 364 markError("unexpected formal parameters"); 365 else 366 formalParams = true; 367 } 368 else 369 { 370 parseActualParams; 371 if (formalParams) 372 paramsPosition. 373 markError("formal parameters expected"); 374 else 375 spareActualParams = true; 376 } 377 } 378 else if (formalParams) 379 markError("formal parameters expected"); 380 381 Node[] rhs = parseHyperTerm(spareActualParams, paramsPosition); 382 383 Alternative alternative; 384 385 if (repetition) 386 alternative = new RepetitionAlternative(lhs, rhs, null, position); 387 else 388 alternative = new Alternative(lhs, rhs, position); 389 alternatives ~= alternative; 390 391 if (repetition && formalParams) 392 { 393 if (!this.undecidedActualParams && !this.spareActualParams) 394 markError("actual parameters expected"); 395 } 396 else 397 if (this.spareActualParams) 398 this.paramsPosition.markError("unexpected actual parameters"); 399 400 assert(this.token == ')' || this.token == ']' || this.token == '}' 401 || this.token == '|' || this.token == '.' || this.token == Scanner.END); 402 403 if (this.token != '|') 404 break; 405 position = this.scanner.getPosition; 406 this.token = this.scanner.read; 407 } 408 409 this.formalParams = formalParams; 410 return alternatives; 411 } 412 413 /** 414 * HyperTerm: 415 * { ident [ ActualParams ] 416 * | string 417 * | [ ActualParams ] ( "(" HyperExpr ")" 418 * | "[" HyperExpr "]" [ FormalParams ] 419 * | "{" HyperExpr "}" [ FormalParams ] 420 * ) 421 * }. 422 * 423 * @return the list of occurrences of identifiers and strings 424 */ 425 private Node[] parseHyperTerm(bool spareActualParams, Position paramsPosition) 426 { 427 Node[] nodes; 428 bool undecidedActualParams = false; 429 430 for (;;) 431 { 432 if (this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE 433 || this.token == Scanner.LITERAL || this.token == '<') 434 { 435 undecidedActualParams = false; 436 if (spareActualParams) 437 { 438 paramsPosition.markError("unexpected actual parameters"); 439 spareActualParams = false; 440 paramsPosition = null; 441 } 442 if (this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE) 443 { 444 const representation = this.scanner.getRepresentation; 445 Nonterminal nonterminal = this.hyperGrammarBuilder.buildNonterminal(representation); 446 Position position = this.scanner.getPosition; 447 Node node = new HyperSymbolNode (nonterminal, null, position); 448 449 nodes ~= node; 450 this.token = this.scanner.read; 451 if (this.token == Scanner.NUMBER) 452 { 453 markError("unexpected number"); 454 this.token = this.scanner.read; 455 } 456 if (this.token == '<') 457 { 458 this.token = this.scanner.read; 459 parseActualParams; 460 undecidedActualParams = true; 461 } 462 } 463 else if (this.token == Scanner.LITERAL) 464 { 465 const representation = this.scanner.getRepresentation; 466 Terminal terminal = this.hyperGrammarBuilder.buildTerminal(representation); 467 Position position = this.scanner.getPosition; 468 Node node = new SymbolNode(terminal, position); 469 470 nodes ~= node; 471 this.token = this.scanner.read; 472 } 473 else if (this.token == '<') 474 { 475 this.token = this.scanner.read; 476 parseActualParams; 477 spareActualParams = true; 478 paramsPosition = this.scanner.getPosition; 479 } 480 } 481 else if (this.token == '(' || this.token == '[' || this.token == '{') 482 { 483 const open = this.token; 484 Position position = this.scanner.getPosition; 485 486 this.token = this.scanner.read; 487 488 Nonterminal identifier = hyperGrammarBuilder.buildGeneratedNonterminal; 489 SymbolNode lhs = new HyperSymbolNode (identifier, null, position); 490 Alternative[] alternatives = parseHyperExpr(lhs, 491 Yes.formalParamsAllowed, (open == '{') ? Yes.repetition : No.repetition, 492 position); 493 Rule rule = new Rule(alternatives); 494 Operator operator = null; 495 496 assert(this.token == ')' || this.token == ']' || this.token == '}' 497 || this.token == '|' || this.token == '.' || this.token == Scanner.END); 498 499 if (open == '(') 500 { 501 if (this.token != ')') 502 markError(`symbol ")" expected`); 503 operator = new Group(null, rule, position); 504 } 505 else if (open == '[') 506 { 507 if (this.token != ']') 508 markError(`symbol "]" expected`); 509 operator = new Option(null, rule, null, position); 510 } 511 else if (open == '{') 512 { 513 if (this.token != '}') 514 markError(`symbol "}" expected`); 515 operator = new Repetition(null, rule, null, position); 516 } 517 nodes ~= operator; 518 519 if (this.token == ')' || this.token == ']' || this.token == '}') 520 this.token = this.scanner.read; 521 if (open != '(' && this.formalParams) 522 { 523 if (this.token == '<') 524 { 525 this.token = this.scanner.read; 526 parseFormalParams; 527 } 528 else 529 markError("formal parameters expected"); 530 } 531 if (this.formalParams) 532 { 533 // FIXME: also OK for EBNF expression at beginning when LHS has no formal parameter 534 if (!undecidedActualParams && !spareActualParams && false) 535 position.markError("actual parameters expected"); 536 } 537 else 538 if (spareActualParams) 539 paramsPosition.markError("unexpected actual parameters"); 540 undecidedActualParams = false; 541 spareActualParams = false; 542 paramsPosition = null; 543 } 544 else if (this.token == ')' || this.token == ']' || this.token == '}' 545 || this.token == '|' || this.token == '.' || this.token == Scanner.END) 546 break; 547 else 548 { // sync 549 markError("unexpected symbol"); 550 do 551 this.token = this.scanner.read; 552 while (this.token != '(' && this.token != '[' && this.token != '{' 553 && this.token != '|' && this.token != '.' && this.token != Scanner.END); 554 } 555 } 556 557 this.spareActualParams = spareActualParams; 558 this.undecidedActualParams = undecidedActualParams; 559 this.paramsPosition = paramsPosition; 560 return nodes; 561 } 562 563 /** 564 * ActualParams: 565 * "<" AffixForm { "," AffixForm } ">". 566 */ 567 private void parseActualParams() 568 { 569 for (;;) 570 { 571 if (this.token == '+' || this.token == '-') 572 { 573 markError(`symbol "+" or "-" not allowed in actual parameters`); 574 this.token = this.scanner.read; 575 } 576 parseAffixForm; 577 if (this.token != ',' && this.token != '>' 578 && this.token != '.' && this.token != Scanner.END) 579 { // sync 580 markError("unexpected symbol"); 581 do 582 this.token = this.scanner.read; 583 while (this.token != ',' && this.token != '>' 584 && this.token != '.' && this.token != Scanner.END); 585 } 586 if (this.token == ',') 587 this.token = this.scanner.read; 588 else 589 break; 590 } 591 592 assert(this.token == '>' || this.token == '.' || this.token == Scanner.END); 593 594 if (this.token == '>') 595 this.token = this.scanner.read; 596 else 597 markError(`symbol ">" expected`); 598 } 599 600 /** 601 * FormalParams: 602 * "<" ( "+" | "-" ) ( AffixForm ":" ident | Variable ) 603 * { "," ( "+" | "-" ) ( AffixForm ":" ident | Variable ) } ">". 604 */ 605 private void parseFormalParams() 606 { 607 for (;;) 608 { 609 if (this.token == '+' || this.token == '-') 610 { 611 this.token = this.scanner.read; 612 } 613 else 614 markError(`symbol "+" or "-" expected`); 615 616 const isVariable = parseAffixForm; 617 618 if (this.token == ':') 619 { 620 this.token = this.scanner.read; 621 if (this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE) 622 { 623 this.token = this.scanner.read; 624 if (this.token == Scanner.NUMBER) 625 { 626 markError("unexpected number"); 627 this.token = this.scanner.read; 628 } 629 } 630 else 631 markError("meta-nonterminal expected"); 632 } 633 else if (!isVariable) 634 markError(`symbol ":" expected`); 635 if (this.token != ',' && this.token != '>' 636 && this.token != '.' && this.token != Scanner.END) 637 { // sync 638 markError("unexpected symbol"); 639 do 640 this.token = this.scanner.read; 641 while (this.token != ',' && this.token != '>' 642 && this.token != '.' && this.token != Scanner.END); 643 } 644 if (this.token == ',') 645 this.token = this.scanner.read; 646 else 647 break; 648 } 649 if (this.token == '>') 650 this.token = this.scanner.read; 651 else 652 markError(`symbol ">" expected`); 653 } 654 655 /** 656 * AffixForm: 657 * { string | Variable }. 658 */ 659 private bool parseAffixForm() 660 { 661 bool isVariable = false; 662 663 for (bool firstRound = true;; firstRound = false) 664 { 665 if (this.token == Scanner.LITERAL) 666 { 667 this.token = this.scanner.read; 668 isVariable = false; 669 } 670 else if (this.token == '!' 671 || this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE) 672 { 673 parseVariable; 674 isVariable = firstRound; 675 } 676 else 677 break; 678 } 679 return isVariable; 680 } 681 682 /** 683 * Variable: 684 * [ "!" ] ident [ number ]. 685 */ 686 private void parseVariable() 687 in (this.token == '!' 688 || this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE) 689 { 690 if (this.token == '!') 691 this.token = this.scanner.read; 692 if (this.token == Scanner.SYNTACTIC_VARIABLE || this.token == Scanner.LEXICAL_VARIABLE) 693 { 694 this.token = this.scanner.read; 695 if (this.token == Scanner.NUMBER) 696 this.token = this.scanner.read; 697 } 698 else 699 markError("meta-variable expected"); 700 } 701 702 public int getErrorCount() const 703 { 704 return this.scanner.getErrorCount; 705 } 706 707 public Grammar yieldMetaGrammar() 708 { 709 if (this.scanner.getErrorCount == 0 710 && this.metaGrammarBuilder.grammarIsWellDefined) 711 { 712 return this.metaGrammarBuilder.getGrammar; 713 } 714 else 715 { 716 this.metaGrammarBuilder.markErrors; 717 return null; 718 } 719 } 720 721 public Grammar yieldHyperGrammar() 722 { 723 if (this.scanner.getErrorCount == 0 && this.startSymbol !is null 724 && this.hyperGrammarBuilder.grammarIsWellDefined) 725 { 726 return this.hyperGrammarBuilder.getGrammar(this.startSymbol); 727 } 728 else 729 { 730 this.hyperGrammarBuilder.markErrors; 731 return null; 732 } 733 } 734 735 public bool[Nonterminal] getLexicalHyperNonterminals() 736 { 737 return lexicalHyperNonterminals; 738 } 739 }