1 module epsilon.lexgen;
2 
3 import EAG = epsilon.eag;
4 import epsilon.settings;
5 import io : Input, read;
6 import log;
7 import runtime;
8 import std.stdio;
9 
10 private const firstUserTok = 3;
11 private const lenOfPredefinedToken = 8;
12 private bool[256] IsIdent;
13 private bool[256] IsSymbol;
14 
15 public string Generate(Settings settings)
16 in (EAG.Performed(EAG.analysed))
17 {
18     Input Fix;
19     File output;
20     int Term;
21     int MaxTokLen;
22     int Len;
23     bool Error;
24 
25     void TestToken(string Str, ref int Len)
26     {
27         import std..string : toStringz;
28 
29         int i;
30 
31         void Err(string Msg)
32         {
33             Error = true;
34             error!"token %s %s"(Str, Msg);
35         }
36 
37         const s = Str.toStringz;
38 
39         Len = 0;
40         if (s[0] != '\'' && s[0] != '"' && s[0] != '`' || s[1] == 0 || s[1] == s[0])
41         {
42             Err("must be non empty string");
43             return;
44         }
45         if (s[1] == '\'' || s[1] == '"' || s[1] == '`' || s[1] == ' ')
46         {
47             i = 2;
48         }
49         else if ((s[0] == '\'' || s[0] == '"') && s[1] == '\\')
50         {
51             i = 3;
52         }
53         else if (s[0] == '`' && s[1] == '\\')
54         {
55             i = 2;
56         }
57         else if (IsIdent[s[1]])
58         {
59             i = 2;
60             while (IsIdent[s[i]])
61                 ++i;
62         }
63         else if (IsSymbol[s[1]])
64         {
65             i = 2;
66             while (IsSymbol[s[i]])
67                 ++i;
68         }
69         else
70         {
71             Err("contains illegal char");
72             return;
73         }
74         if (s[i] != s[0] || s[i + 1] != 0)
75         {
76             Err("contains illegal char");
77             return;
78         }
79         Len = i - 1;
80     }
81 
82     void InclFix(char Term)
83     {
84         import std.conv : to;
85         import std.exception : enforce;
86 
87         char c = Fix.front.to!char;
88 
89         while (c != Term)
90         {
91             enforce(c != 0,
92                     "error: unexpected end of lexgen.fix.d");
93 
94             output.write(c);
95             Fix.popFront;
96             c = Fix.front.to!char;
97         }
98         Fix.popFront;
99     }
100 
101     info!"LexGen writing %s"(EAG.BaseName);
102     Error = false;
103     MaxTokLen = lenOfPredefinedToken;
104     for (Term = EAG.firstHTerm; Term < EAG.NextHTerm; ++Term)
105     {
106         const Str = EAG.symbolTable.symbol(EAG.HTerm[Term].Id);
107 
108         TestToken(Str, Len);
109         if (Len > MaxTokLen)
110             MaxTokLen = Len;
111     }
112     if (Error)
113         assert(0, "TODO: error handling for lexer generator");
114 
115     enum fixName = "lexgen.fix.d";
116     const name = EAG.BaseName ~ "Lex";
117     const fileName = settings.path(name ~ ".d");
118 
119     Fix = Input(fixName, import(fixName));
120     output = File(fileName, "w");
121     InclFix('$');
122     output.write(name);
123     InclFix('$');
124     output.write(MaxTokLen + 1);
125     InclFix('$');
126     output.write(EAG.NextHTerm - EAG.firstHTerm + firstUserTok);
127     InclFix('$');
128     for (Term = EAG.firstHTerm; Term < EAG.NextHTerm; ++Term)
129         output.writeln("Enter(", Term - EAG.firstHTerm + firstUserTok, ", ", EAG.HTerm[Term].Id.repr, ");");
130     InclFix('$');
131     output.write(name);
132     InclFix('$');
133     output.close;
134     return fileName;
135 }
136 
137 private string repr(int id)
138 {
139     import std.range : dropBackOne, dropOne, front, only;
140     import std.format : format;
141 
142     const value = EAG.symbolTable.symbol(id);
143 
144     if (value.front == '\'')
145     {
146         return format!"%(%s%)"(only(value.dropOne.dropBackOne));
147     }
148     return value;
149 
150 }
151 
152 static this() @nogc nothrow @safe
153 {
154     for (int i = 0; i < IsIdent.length; ++i)
155         IsIdent[i] = false;
156     for (int i = 'A'; i <= 'Z'; ++i)
157         IsIdent[i] = true;
158     for (int i = 'a'; i <= 'z'; ++i)
159         IsIdent[i] = true;
160     for (int i = '0'; i <= '9'; ++i)
161         IsIdent[i] = true;
162     for (int i = 0; i <= ' '; ++i)
163         IsSymbol[i] = false;
164     for (int i = ' ' + 1; i < IsSymbol.length; ++i)
165         IsSymbol[i] = !IsIdent[i];
166     IsSymbol['\''] = false;
167     IsSymbol['"'] = false;
168     IsSymbol['`'] = false;
169 }