1 module epsilon.lexgen; 2 3 import EAG = epsilon.eag; 4 import epsilon.settings; 5 import io : Input, read; 6 import log; 7 import runtime; 8 import std.stdio; 9 10 private const firstUserTok = 3; 11 private const lenOfPredefinedToken = 8; 12 private bool[256] IsIdent; 13 private bool[256] IsSymbol; 14 15 public string Generate(Settings settings) 16 in (EAG.Performed(EAG.analysed)) 17 { 18 Input Fix; 19 File output; 20 int Term; 21 int MaxTokLen; 22 int Len; 23 bool Error; 24 25 void TestToken(string Str, ref int Len) 26 { 27 import std..string : toStringz; 28 29 int i; 30 31 void Err(string Msg) 32 { 33 Error = true; 34 error!"token %s %s"(Str, Msg); 35 } 36 37 const s = Str.toStringz; 38 39 Len = 0; 40 if (s[0] != '\'' && s[0] != '"' && s[0] != '`' || s[1] == 0 || s[1] == s[0]) 41 { 42 Err("must be non empty string"); 43 return; 44 } 45 if (s[1] == '\'' || s[1] == '"' || s[1] == '`' || s[1] == ' ') 46 { 47 i = 2; 48 } 49 else if ((s[0] == '\'' || s[0] == '"') && s[1] == '\\') 50 { 51 i = 3; 52 } 53 else if (s[0] == '`' && s[1] == '\\') 54 { 55 i = 2; 56 } 57 else if (IsIdent[s[1]]) 58 { 59 i = 2; 60 while (IsIdent[s[i]]) 61 ++i; 62 } 63 else if (IsSymbol[s[1]]) 64 { 65 i = 2; 66 while (IsSymbol[s[i]]) 67 ++i; 68 } 69 else 70 { 71 Err("contains illegal char"); 72 return; 73 } 74 if (s[i] != s[0] || s[i + 1] != 0) 75 { 76 Err("contains illegal char"); 77 return; 78 } 79 Len = i - 1; 80 } 81 82 void InclFix(char Term) 83 { 84 import std.conv : to; 85 import std.exception : enforce; 86 87 char c = Fix.front.to!char; 88 89 while (c != Term) 90 { 91 enforce(c != 0, 92 "error: unexpected end of lexgen.fix.d"); 93 94 output.write(c); 95 Fix.popFront; 96 c = Fix.front.to!char; 97 } 98 Fix.popFront; 99 } 100 101 info!"LexGen writing %s"(EAG.BaseName); 102 Error = false; 103 MaxTokLen = lenOfPredefinedToken; 104 for (Term = EAG.firstHTerm; Term < EAG.NextHTerm; ++Term) 105 { 106 const Str = EAG.symbolTable.symbol(EAG.HTerm[Term].Id); 107 108 TestToken(Str, Len); 109 if (Len > MaxTokLen) 110 MaxTokLen = Len; 111 } 112 if (Error) 113 assert(0, "TODO: error handling for lexer generator"); 114 115 enum fixName = "lexgen.fix.d"; 116 const name = EAG.BaseName ~ "Lex"; 117 const fileName = settings.path(name ~ ".d"); 118 119 Fix = Input(fixName, import(fixName)); 120 output = File(fileName, "w"); 121 InclFix('$'); 122 output.write(name); 123 InclFix('$'); 124 output.write(MaxTokLen + 1); 125 InclFix('$'); 126 output.write(EAG.NextHTerm - EAG.firstHTerm + firstUserTok); 127 InclFix('$'); 128 for (Term = EAG.firstHTerm; Term < EAG.NextHTerm; ++Term) 129 output.writeln("Enter(", Term - EAG.firstHTerm + firstUserTok, ", ", EAG.HTerm[Term].Id.repr, ");"); 130 InclFix('$'); 131 output.write(name); 132 InclFix('$'); 133 output.close; 134 return fileName; 135 } 136 137 private string repr(int id) 138 { 139 import std.range : dropBackOne, dropOne, front, only; 140 import std.format : format; 141 142 const value = EAG.symbolTable.symbol(id); 143 144 if (value.front == '\'') 145 { 146 return format!"%(%s%)"(only(value.dropOne.dropBackOne)); 147 } 148 return value; 149 150 } 151 152 static this() @nogc nothrow @safe 153 { 154 for (int i = 0; i < IsIdent.length; ++i) 155 IsIdent[i] = false; 156 for (int i = 'A'; i <= 'Z'; ++i) 157 IsIdent[i] = true; 158 for (int i = 'a'; i <= 'z'; ++i) 159 IsIdent[i] = true; 160 for (int i = '0'; i <= '9'; ++i) 161 IsIdent[i] = true; 162 for (int i = 0; i <= ' '; ++i) 163 IsSymbol[i] = false; 164 for (int i = ' ' + 1; i < IsSymbol.length; ++i) 165 IsSymbol[i] = !IsIdent[i]; 166 IsSymbol['\''] = false; 167 IsSymbol['"'] = false; 168 IsSymbol['`'] = false; 169 }