1 module gamma.input.epsilang.Scanner; 2 3 import gamma.util.Position; 4 import log; 5 import std.ascii; 6 import std.conv; 7 import std.range; 8 import std.stdio; 9 10 class Scanner 11 { 12 private class MarkedLinePrinting : Position 13 { 14 private size_t pos; 15 16 this(size_t pos) 17 { 18 this.pos = pos; 19 } 20 21 public void markError(string message) 22 { 23 const lineNumber = this.outer.lineBeginPos.assumeSorted.lowerBound(this.pos + 1).length; 24 const beginPos = this.outer.lineBeginPos[lineNumber - 1]; 25 const endPos = findLineSeparator(this.pos); 26 27 if (beginPos <= this.pos) 28 { 29 char[] quotation = this.outer.source[beginPos .. endPos]; 30 char[] mark = new char[this.pos - beginPos + 1]; 31 32 foreach (i; 0 .. mark.length - 1) 33 if (this.outer.source[beginPos + i] == '\t') 34 mark[i] = '\t'; 35 else 36 mark[i] = ' '; 37 mark[mark.length - 1] = '^'; 38 error!"%s: %s\n%s\n%s"(lineNumber, message, quotation, mark); 39 } 40 else 41 error!"%s: %s"(lineNumber, message); 42 ++this.outer.errorCount; 43 } 44 45 public override bool opEquals(Object o) 46 { 47 if (!cast(MarkedLinePrinting) o) 48 return false; 49 50 auto that = cast(MarkedLinePrinting) o; 51 52 return this.pos == that.pos && this.scanner == that.scanner; 53 } 54 55 public override size_t toHash() nothrow @safe 56 { 57 return this.pos; 58 } 59 60 private Scanner scanner() 61 { 62 return this.outer; 63 } 64 } 65 66 static const char END = 0; 67 68 static const char LITERAL = '"'; 69 70 static const char NUMBER = '0'; 71 72 static const char LEXICAL_VARIABLE = 'a'; 73 74 static const char SYNTACTIC_VARIABLE = 'A'; 75 76 private char[] source; 77 78 private size_t pos = 0; 79 80 private Position position; 81 82 private string representation; 83 84 private int value; 85 86 private size_t[] lineBeginPos; 87 88 private int errorCount = 0; 89 90 this(File input) 91 { 92 import std.algorithm : joiner; 93 import std.array : array; 94 95 this.source = cast(char[]) input.byChunk(4096).joiner.array; 96 this.source ~= END; 97 this.lineBeginPos ~= this.pos; 98 } 99 100 /** 101 * @return the recognized token, coded as a single character 102 */ 103 char read() 104 { 105 import std.format : format; 106 107 char read() 108 { 109 char c; 110 111 for (;;) 112 { 113 c = this.source[this.pos]; 114 if (c == '\n' || c == '\r') 115 skipLine; 116 else if (isWhite(c)) 117 ++this.pos; 118 else if (c == '/') 119 if (this.source[this.pos + 1] == '/') 120 skipLine; 121 else if (this.source[this.pos + 1] == '*') 122 readComment; 123 else 124 break; 125 else if (c == '*') 126 // skip unsupported lexical-variable marker 127 ++this.pos; 128 else 129 break; 130 } 131 this.position = new MarkedLinePrinting(pos); 132 this.representation = null; 133 if (c == '"') 134 { 135 this.representation = readLiteral; 136 return LITERAL; 137 } 138 else if (isDigit(c)) 139 { 140 this.value = readNumber; 141 return NUMBER; 142 } 143 else if (isLower(c)) 144 { 145 this.representation = readVariable; 146 return LEXICAL_VARIABLE; 147 } 148 else if (isUpper(c)) 149 { 150 this.representation = readVariable; 151 return SYNTACTIC_VARIABLE; 152 } 153 else if (c != END) 154 { 155 ++this.pos; 156 return c; 157 } 158 else 159 return END; 160 } 161 162 const c = read; 163 164 if (levels & Level.trace) 165 { 166 switch (c) 167 { 168 case END: 169 getPosition.markError("END"); 170 break; 171 case LITERAL: 172 getPosition.markError(format!"LITERAL: %s"(getRepresentation)); 173 break; 174 case NUMBER: 175 getPosition.markError(format!"NUMBER: %s"(getValue)); 176 break; 177 case LEXICAL_VARIABLE: 178 getPosition.markError(format!"LEXICAL VARIABLE: %s"(getRepresentation)); 179 break; 180 case SYNTACTIC_VARIABLE: 181 getPosition.markError(format!"SYNTACTIC VARIABLE: %s"(getRepresentation)); 182 break; 183 default: 184 getPosition.markError(format!"CHAR: %s"(c)); 185 break; 186 } 187 --this.errorCount; 188 } 189 return c; 190 } 191 192 private void readComment() 193 in (this.source[this.pos] == '/' && this.source[this.pos + 1] == '*') 194 { 195 const pos = this.pos; 196 int level = 1; 197 char c1 = ' '; 198 char c2; 199 200 ++this.pos; 201 for (;;) 202 { 203 c2 = this.source[++this.pos]; 204 if (c1 == '/' && c2 == '*') 205 { 206 ++this.pos; 207 ++level; 208 c1 = this.source[this.pos]; 209 } 210 else if (c1 == '*' && c2 == '/') 211 { 212 ++this.pos; 213 if (--level == 0) 214 return; 215 c1 = this.source[this.pos]; 216 } 217 else 218 c1 = c2; 219 while (c1 == '\n' || c1 == '\r') 220 { 221 skipLine; 222 c1 = this.source[this.pos]; 223 } 224 if (c1 == END) 225 { 226 Position position = new MarkedLinePrinting(pos); 227 228 position.markError("comment not terminated at end of input"); 229 return; 230 } 231 } 232 } 233 234 private char readEscapeSequence() 235 in (this.source[this.pos] == '\\') 236 { 237 size_t pos = this.pos; 238 char c = this.source[++this.pos]; 239 int value = 0; 240 241 switch (c) 242 { 243 case 'b': 244 c = '\b'; 245 break; 246 case 't': 247 c = '\t'; 248 break; 249 case 'n': 250 c = '\n'; 251 break; 252 case 'f': 253 c = '\f'; 254 break; 255 case 'r': 256 c = '\r'; 257 break; 258 case '"': 259 c = '"'; 260 break; 261 case '\'': 262 c = '\''; 263 break; 264 case '\\': 265 c = '\\'; 266 break; 267 case '0': 268 case '1': 269 case '2': 270 case '3': 271 case '4': 272 case '5': 273 case '6': 274 case '7': 275 while (isOctalDigit(this.source[this.pos])) 276 ++this.pos; 277 // TODO: catch ConvException 278 value = this.source[pos + 1 .. this.pos].to!int(8); 279 if (value > 0xff) 280 { 281 Position position = new MarkedLinePrinting(pos); 282 283 position.markError("octal character constant out of range"); 284 return 0; 285 } 286 return cast(char) value; 287 case 'u': 288 for (int i = 0; i < 4; ++i) 289 { 290 if (!isHexDigit(this.source[++this.pos])) 291 { 292 Position position = new MarkedLinePrinting(this.pos); 293 294 position.markError("hexadecimal digit expected"); 295 return 0; 296 } 297 } 298 // TODO: catch ConvException 299 value = this.source[pos + 2 .. this.pos + 1].to!int(16); 300 c = cast(char) value; 301 break; 302 default: 303 if (c != '\n' && c != '\r' && c != END) 304 { 305 Position position = new MarkedLinePrinting(this.pos); 306 307 position.markError("illegal escape character"); 308 } 309 return 0; 310 } 311 ++this.pos; 312 return c; 313 } 314 315 private string readLiteral() 316 in (this.source[this.pos] == '"') 317 { 318 string representation; 319 size_t pos = this.pos + 1; 320 char c; 321 322 do 323 { 324 c = this.source[++this.pos]; 325 } 326 while (c != '\\' && c != '"' && c != '\n' && c != '\r' && c != END); 327 if (c == '\\') 328 { 329 char[] buffer; 330 331 buffer ~= this.source[pos .. this.pos]; 332 do 333 { 334 if (c == '\\') 335 { 336 buffer ~= readEscapeSequence; 337 c = this.source[this.pos]; 338 } 339 else 340 { 341 buffer ~= c; 342 c = this.source[++this.pos]; 343 } 344 } 345 while (c != '"' && c != '\n' && c != '\r' && c != END); 346 representation = buffer.dup; 347 } 348 else 349 { 350 if (c == '"' && this.pos == pos) 351 this.position.markError("illegal empty string"); 352 representation = this.source[pos .. this.pos].dup; 353 } 354 if (c != '"') 355 { 356 this.position.markError("string not terminated at end of line"); 357 representation = ""; 358 } 359 else 360 ++this.pos; 361 return representation; 362 } 363 364 private int readNumber() 365 in (isDigit(this.source[this.pos])) 366 { 367 size_t pos = this.pos; 368 int value = 0; 369 370 while (isDigit(this.source[this.pos])) 371 ++this.pos; 372 value = this.source[pos .. this.pos].to!int; 373 // TODO: catch ConvException 374 if (value > 9999) 375 { 376 position.markError("number out of range [0, 9999]"); 377 value = 0; 378 } 379 return value; 380 } 381 382 private string readVariable() 383 in (isAlpha(this.source[this.pos])) 384 { 385 size_t pos = this.pos; 386 387 while (isAlpha(this.source[this.pos])) 388 ++this.pos; 389 return this.source[pos .. this.pos].dup; 390 } 391 392 private void skipLine() 393 { 394 this.pos = findLineSeparator(this.pos); 395 if (this.source[this.pos] != END) 396 { 397 if (this.source[this.pos] == '\r' && this.source[this.pos + 1] == '\n') 398 ++this.pos; 399 ++this.pos; 400 this.lineBeginPos ~= this.pos; 401 } 402 } 403 404 private size_t findLineSeparator(size_t pos) 405 { 406 char c = this.source[pos]; 407 408 while (c != '\n' && c != '\r' && c != END) 409 c = this.source[++pos]; 410 return pos; 411 } 412 413 Position getPosition() 414 { 415 return this.position; 416 } 417 418 string getRepresentation() const 419 { 420 return this.representation; 421 } 422 423 int getValue() const 424 { 425 return this.value; 426 } 427 428 int getErrorCount() const 429 { 430 return this.errorCount; 431 } 432 }