// SDLang-D // Written in the D programming language. module sdlang.lexer; import std.algorithm; import std.array; static import std.ascii; import std.base64; import std.bigint; import std.conv; import std.datetime; import std.file; import std.format; import std.traits; import std.typecons; import std.uni; import std.utf; import std.variant; import sdlang.exception; import sdlang.symbol; import sdlang.token; import sdlang.util; alias sdlang.util.startsWith startsWith; Token[] lexFile(string filename) { auto source = cast(string)read(filename); return lexSource(source, filename); } Token[] lexSource(string source, string filename=null) { auto lexer = scoped!Lexer(source, filename); // Can't use 'std.array.array(Range)' because 'lexer' is scoped // and therefore cannot have its reference copied. Appender!(Token[]) tokens; foreach(tok; lexer) tokens.put(tok); return tokens.data; } // Kind of a poor-man's yield, but fast. // Only to be used inside Lexer.popFront (and Lexer.this). private template accept(string symbolName) { static assert(symbolName != "Value", "Value symbols must also take a value."); enum accept = acceptImpl!(symbolName, "null"); } private template accept(string symbolName, string value) { static assert(symbolName == "Value", "Only a Value symbol can take a value."); enum accept = acceptImpl!(symbolName, value); } private template accept(string symbolName, string value, string startLocation, string endLocation) { static assert(symbolName == "Value", "Only a Value symbol can take a value."); enum accept = (" { _front = makeToken!"~symbolName.stringof~"; _front.value = "~value~"; _front.location = "~(startLocation==""? "tokenStart" : startLocation)~"; _front.data = source[ "~(startLocation==""? "tokenStart.index" : startLocation)~" .. "~(endLocation==""? "location.index" : endLocation)~" ]; return; } ").replace("\n", ""); } private template acceptImpl(string symbolName, string value) { enum acceptImpl = (" { _front = makeToken!"~symbolName.stringof~"; _front.value = "~value~"; return; } ").replace("\n", ""); } class Lexer { string source; string filename; Location location; /// Location of current character in source private dchar ch; // Current character private dchar nextCh; // Lookahead character private size_t nextPos; // Position of lookahead character (an index into source) private bool hasNextCh; // If false, then there's no more lookahead, just EOF private size_t posAfterLookahead; // Position after lookahead character (an index into source) private Location tokenStart; // The starting location of the token being lexed // Length so far of the token being lexed, not including current char private size_t tokenLength; // Length in UTF-8 code units private size_t tokenLength32; // Length in UTF-32 code units // Slight kludge: // If a numeric fragment is found after a Date (separated by arbitrary // whitespace), it could be the "hours" part of a DateTime, or it could // be a separate numeric literal that simply follows a plain Date. If the // latter, then the Date must be emitted, but numeric fragment that was // found after it needs to be saved for the the lexer's next iteration. // // It's a slight kludge, and could instead be implemented as a slightly // kludgey parser hack, but it's the only situation where SDLang's lexing // needs to lookahead more than one character, so this is good enough. private struct LookaheadTokenInfo { bool exists = false; string numericFragment = ""; bool isNegative = false; Location tokenStart; } private LookaheadTokenInfo lookaheadTokenInfo; this(string source=null, string filename=null) { this.filename = filename; this.source = source; _front = Token(symbol!"Error", Location()); lookaheadTokenInfo = LookaheadTokenInfo.init; if( source.startsWith( ByteOrderMarks[BOM.UTF8] ) ) { source = source[ ByteOrderMarks[BOM.UTF8].length .. $ ]; this.source = source; } foreach(bom; ByteOrderMarks) if( source.startsWith(bom) ) error(Location(filename,0,0,0), "SDL spec only supports UTF-8, not UTF-16 or UTF-32"); if(source == "") mixin(accept!"EOF"); // Prime everything hasNextCh = true; nextCh = source.decode(posAfterLookahead); advanceChar(ErrorOnEOF.Yes); location = Location(filename, 0, 0, 0); popFront(); } @property bool empty() { return _front.symbol == symbol!"EOF"; } Token _front; @property Token front() { return _front; } @property bool isEOF() { return location.index == source.length && !lookaheadTokenInfo.exists; } private void error(string msg) { error(location, msg); } //TODO: Take varargs and use output range sink. private void error(Location loc, string msg) { throw new ParseException(loc, "Error: "~msg); } private Token makeToken(string symbolName)() { auto tok = Token(symbol!symbolName, tokenStart); tok.data = tokenData; return tok; } private @property string tokenData() { return source[ tokenStart.index .. location.index ]; } /// Check the lookahead character private bool lookahead(dchar ch) { return hasNextCh && nextCh == ch; } private bool lookahead(bool function(dchar) condition) { return hasNextCh && condition(nextCh); } private static bool isNewline(dchar ch) { return ch == '\n' || ch == '\r' || ch == lineSep || ch == paraSep; } /// Returns the length of the newline sequence, or zero if the current /// character is not a newline /// /// Note that there are only single character sequences and the two /// character sequence `\r\n` as used on Windows. private size_t isAtNewline() { if(ch == '\n' || ch == lineSep || ch == paraSep) return 1; else if(ch == '\r') return lookahead('\n') ? 2 : 1; else return 0; } /// Is 'ch' a valid base 64 character? private bool isBase64(dchar ch) { if(ch >= 'A' && ch <= 'Z') return true; if(ch >= 'a' && ch <= 'z') return true; if(ch >= '0' && ch <= '9') return true; return ch == '+' || ch == '/' || ch == '='; } /// Is the current character one that's allowed /// immediately *after* an int/float literal? private bool isEndOfNumber() { if(isEOF) return true; return !isDigit(ch) && ch != ':' && ch != '_' && !isAlpha(ch); } /// Is current character the last one in an ident? private bool isEndOfIdentCached = false; private bool _isEndOfIdent; private bool isEndOfIdent() { if(!isEndOfIdentCached) { if(!hasNextCh) _isEndOfIdent = true; else _isEndOfIdent = !isIdentChar(nextCh); isEndOfIdentCached = true; } return _isEndOfIdent; } /// Is 'ch' a character that's allowed *somewhere* in an identifier? private bool isIdentChar(dchar ch) { if(isAlpha(ch)) return true; else if(isNumber(ch)) return true; else return ch == '-' || ch == '_' || ch == '.' || ch == '$'; } private bool isDigit(dchar ch) { return ch >= '0' && ch <= '9'; } private enum KeywordResult { Accept, // Keyword is matched Continue, // Keyword is not matched *yet* Failed, // Keyword doesn't match } private KeywordResult checkKeyword(dstring keyword32) { // Still within length of keyword if(tokenLength32 < keyword32.length) { if(ch == keyword32[tokenLength32]) return KeywordResult.Continue; else return KeywordResult.Failed; } // At position after keyword else if(tokenLength32 == keyword32.length) { if(isEOF || !isIdentChar(ch)) { debug assert(tokenData == to!string(keyword32)); return KeywordResult.Accept; } else return KeywordResult.Failed; } assert(0, "Fell off end of keyword to check"); } enum ErrorOnEOF { No, Yes } /// Advance one code point. private void advanceChar(ErrorOnEOF errorOnEOF) { if(auto cnt = isAtNewline()) { if (cnt == 1) location.line++; location.col = 0; } else location.col++; location.index = nextPos; nextPos = posAfterLookahead; ch = nextCh; if(!hasNextCh) { if(errorOnEOF == ErrorOnEOF.Yes) error("Unexpected end of file"); return; } tokenLength32++; tokenLength = location.index - tokenStart.index; if(nextPos == source.length) { nextCh = dchar.init; hasNextCh = false; return; } nextCh = source.decode(posAfterLookahead); isEndOfIdentCached = false; } /// Advances the specified amount of characters private void advanceChar(size_t count, ErrorOnEOF errorOnEOF) { while(count-- > 0) advanceChar(errorOnEOF); } void popFront() { // -- Main Lexer ------------- eatWhite(); if(isEOF) mixin(accept!"EOF"); tokenStart = location; tokenLength = 0; tokenLength32 = 0; isEndOfIdentCached = false; if(lookaheadTokenInfo.exists) { tokenStart = lookaheadTokenInfo.tokenStart; auto prevLATokenInfo = lookaheadTokenInfo; lookaheadTokenInfo = LookaheadTokenInfo.init; lexNumeric(prevLATokenInfo); return; } if(ch == '=') { advanceChar(ErrorOnEOF.No); mixin(accept!"="); } else if(ch == '{') { advanceChar(ErrorOnEOF.No); mixin(accept!"{"); } else if(ch == '}') { advanceChar(ErrorOnEOF.No); mixin(accept!"}"); } else if(ch == ':') { advanceChar(ErrorOnEOF.No); mixin(accept!":"); } else if(ch == ';') { advanceChar(ErrorOnEOF.No); mixin(accept!"EOL"); } else if(auto cnt = isAtNewline()) { advanceChar(cnt, ErrorOnEOF.No); mixin(accept!"EOL"); } else if(isAlpha(ch) || ch == '_') lexIdentKeyword(); else if(ch == '"') lexRegularString(); else if(ch == '`') lexRawString(); else if(ch == '\'') lexCharacter(); else if(ch == '[') lexBinary(); else if(ch == '-' || ch == '.' || isDigit(ch)) lexNumeric(); else { if(ch == ',') error("Unexpected comma: SDLang is not a comma-separated format."); else if(std.ascii.isPrintable(ch)) error(text("Unexpected: ", ch)); else error("Unexpected character code 0x%02X".format(ch)); advanceChar(ErrorOnEOF.No); } } /// Lex Ident or Keyword private void lexIdentKeyword() { assert(isAlpha(ch) || ch == '_'); // Keyword struct Key { dstring name; Value value; bool failed = false; } static Key[5] keywords; static keywordsInited = false; if(!keywordsInited) { // Value (as a std.variant-based type) can't be statically inited keywords[0] = Key("true", Value(true )); keywords[1] = Key("false", Value(false)); keywords[2] = Key("on", Value(true )); keywords[3] = Key("off", Value(false)); keywords[4] = Key("null", Value(null )); keywordsInited = true; } foreach(ref key; keywords) key.failed = false; auto numKeys = keywords.length; do { foreach(ref key; keywords) if(!key.failed) { final switch(checkKeyword(key.name)) { case KeywordResult.Accept: mixin(accept!("Value", "key.value")); case KeywordResult.Continue: break; case KeywordResult.Failed: key.failed = true; numKeys--; break; } } if(numKeys == 0) { lexIdent(); return; } advanceChar(ErrorOnEOF.No); } while(!isEOF); foreach(ref key; keywords) if(!key.failed) if(key.name.length == tokenLength32+1) mixin(accept!("Value", "key.value")); mixin(accept!"Ident"); } /// Lex Ident private void lexIdent() { if(tokenLength == 0) assert(isAlpha(ch) || ch == '_'); while(!isEOF && isIdentChar(ch)) advanceChar(ErrorOnEOF.No); mixin(accept!"Ident"); } /// Lex regular string private void lexRegularString() { assert(ch == '"'); Appender!string buf; size_t spanStart = nextPos; // Doesn't include current character void updateBuf() { if(location.index == spanStart) return; buf.put( source[spanStart..location.index] ); } advanceChar(ErrorOnEOF.Yes); while(ch != '"') { if(ch == '\\') { updateBuf(); bool wasEscSequence = true; if(hasNextCh) { switch(nextCh) { case 'n': buf.put('\n'); break; case 'r': buf.put('\r'); break; case 't': buf.put('\t'); break; case '"': buf.put('\"'); break; case '\\': buf.put('\\'); break; default: wasEscSequence = false; break; } } if(wasEscSequence) { advanceChar(ErrorOnEOF.Yes); spanStart = nextPos; } else { eatWhite(false); spanStart = location.index; } } else if(isNewline(ch)) error("Unescaped newlines are only allowed in raw strings, not regular strings."); advanceChar(ErrorOnEOF.Yes); } updateBuf(); advanceChar(ErrorOnEOF.No); // Skip closing double-quote mixin(accept!("Value", "buf.data")); } /// Lex raw string private void lexRawString() { assert(ch == '`'); do advanceChar(ErrorOnEOF.Yes); while(ch != '`'); advanceChar(ErrorOnEOF.No); // Skip closing back-tick mixin(accept!("Value", "tokenData[1..$-1]")); } /// Lex character literal private void lexCharacter() { assert(ch == '\''); advanceChar(ErrorOnEOF.Yes); // Skip opening single-quote dchar value; if(ch == '\\') { advanceChar(ErrorOnEOF.Yes); // Skip escape backslash switch(ch) { case 'n': value = '\n'; break; case 'r': value = '\r'; break; case 't': value = '\t'; break; case '\'': value = '\''; break; case '\\': value = '\\'; break; default: error("Invalid escape sequence."); } } else if(isNewline(ch)) error("Newline not alowed in character literal."); else value = ch; advanceChar(ErrorOnEOF.Yes); // Skip the character itself if(ch == '\'') advanceChar(ErrorOnEOF.No); // Skip closing single-quote else error("Expected closing single-quote."); mixin(accept!("Value", "value")); } /// Lex base64 binary literal private void lexBinary() { assert(ch == '['); advanceChar(ErrorOnEOF.Yes); void eatBase64Whitespace() { while(!isEOF && isWhite(ch)) { if(isNewline(ch)) advanceChar(ErrorOnEOF.Yes); if(!isEOF && isWhite(ch)) eatWhite(); } } eatBase64Whitespace(); // Iterates all valid base64 characters, ending at ']'. // Skips all whitespace. Throws on invalid chars. struct Base64InputRange { Lexer lexer; private bool isInited = false; private int numInputCharsMod4 = 0; @property bool empty() { if(lexer.ch == ']') { if(numInputCharsMod4 != 0) lexer.error("Length of Base64 encoding must be a multiple of 4. ("~to!string(numInputCharsMod4)~")"); return true; } return false; } @property dchar front() { return lexer.ch; } void popFront() { auto lex = lexer; if(!isInited) { if(lexer.isBase64(lexer.ch)) { numInputCharsMod4++; numInputCharsMod4 %= 4; } isInited = true; } lex.advanceChar(lex.ErrorOnEOF.Yes); eatBase64Whitespace(); if(lex.isEOF) lex.error("Unexpected end of file."); if(lex.ch != ']') { if(!lex.isBase64(lex.ch)) lex.error("Invalid character in base64 binary literal."); numInputCharsMod4++; numInputCharsMod4 %= 4; } } } // This is a slow ugly hack. It's necessary because Base64.decode // currently requires the source to have known length. //TODO: Remove this when DMD issue #9543 is fixed. dchar[] tmpBuf = array(Base64InputRange(this)); Appender!(ubyte[]) outputBuf; // Ugly workaround for DMD issue #9102 //TODO: Remove this when DMD #9102 is fixed struct OutputBuf { void put(ubyte ch) { outputBuf.put(ch); } } try //Base64.decode(Base64InputRange(this), OutputBuf()); Base64.decode(tmpBuf, OutputBuf()); catch(Base64Exception e) error("Invalid character in base64 binary literal."); advanceChar(ErrorOnEOF.No); // Skip ']' mixin(accept!("Value", "outputBuf.data")); } private BigInt toBigInt(bool isNegative, string absValue) { auto num = BigInt(absValue); assert(num >= 0); if(isNegative) num = -num; return num; } /// Lex [0-9]+, but without emitting a token. /// This is used by the other numeric parsing functions. private string lexNumericFragment() { if(!isDigit(ch)) error("Expected a digit 0-9."); auto spanStart = location.index; do { advanceChar(ErrorOnEOF.No); } while(!isEOF && isDigit(ch)); return source[spanStart..location.index]; } /// Lex anything that starts with 0-9 or '-'. Ints, floats, dates, etc. private void lexNumeric(LookaheadTokenInfo laTokenInfo = LookaheadTokenInfo.init) { bool isNegative; string firstFragment; if(laTokenInfo.exists) { firstFragment = laTokenInfo.numericFragment; isNegative = laTokenInfo.isNegative; } else { assert(ch == '-' || ch == '.' || isDigit(ch)); // Check for negative isNegative = ch == '-'; if(isNegative) advanceChar(ErrorOnEOF.Yes); // Some floating point with omitted leading zero? if(ch == '.') { lexFloatingPoint(""); return; } firstFragment = lexNumericFragment(); } // Long integer (64-bit signed)? if(ch == 'L' || ch == 'l') { advanceChar(ErrorOnEOF.No); // BigInt(long.min) is a workaround for DMD issue #9548 auto num = toBigInt(isNegative, firstFragment); if(num < BigInt(long.min) || num > long.max) error(tokenStart, "Value doesn't fit in 64-bit signed long integer: "~to!string(num)); mixin(accept!("Value", "num.toLong()")); } // Float (32-bit signed)? else if(ch == 'F' || ch == 'f') { auto value = to!float(tokenData); advanceChar(ErrorOnEOF.No); mixin(accept!("Value", "value")); } // Double float (64-bit signed) with suffix? else if((ch == 'D' || ch == 'd') && !lookahead(':') ) { auto value = to!double(tokenData); advanceChar(ErrorOnEOF.No); mixin(accept!("Value", "value")); } // Decimal (128+ bits signed)? else if( (ch == 'B' || ch == 'b') && (lookahead('D') || lookahead('d')) ) { auto value = to!real(tokenData); advanceChar(ErrorOnEOF.No); advanceChar(ErrorOnEOF.No); mixin(accept!("Value", "value")); } // Some floating point? else if(ch == '.') lexFloatingPoint(firstFragment); // Some date? else if(ch == '/' && hasNextCh && isDigit(nextCh)) lexDate(isNegative, firstFragment); // Some time span? else if(ch == ':' || ch == 'd') lexTimeSpan(isNegative, firstFragment); // Integer (32-bit signed)? else if(isEndOfNumber()) { auto num = toBigInt(isNegative, firstFragment); if(num < int.min || num > int.max) error(tokenStart, "Value doesn't fit in 32-bit signed integer: "~to!string(num)); mixin(accept!("Value", "num.toInt()")); } // Invalid suffix else error("Invalid integer suffix."); } /// Lex any floating-point literal (after the initial numeric fragment was lexed) private void lexFloatingPoint(string firstPart) { assert(ch == '.'); advanceChar(ErrorOnEOF.No); auto secondPart = lexNumericFragment(); try { // Double float (64-bit signed) with suffix? if(ch == 'D' || ch == 'd') { auto value = to!double(tokenData); advanceChar(ErrorOnEOF.No); mixin(accept!("Value", "value")); } // Float (32-bit signed)? else if(ch == 'F' || ch == 'f') { auto value = to!float(tokenData); advanceChar(ErrorOnEOF.No); mixin(accept!("Value", "value")); } // Decimal (128+ bits signed)? else if(ch == 'B' || ch == 'b') { auto value = to!real(tokenData); advanceChar(ErrorOnEOF.Yes); if(!isEOF && (ch == 'D' || ch == 'd')) { advanceChar(ErrorOnEOF.No); if(isEndOfNumber()) mixin(accept!("Value", "value")); } error("Invalid floating point suffix."); } // Double float (64-bit signed) without suffix? else if(isEOF || !isIdentChar(ch)) { auto value = to!double(tokenData); mixin(accept!("Value", "value")); } // Invalid suffix else error("Invalid floating point suffix."); } catch(ConvException e) error("Invalid floating point literal."); } private Date makeDate(bool isNegative, string yearStr, string monthStr, string dayStr) { BigInt biTmp; biTmp = BigInt(yearStr); if(isNegative) biTmp = -biTmp; if(biTmp < int.min || biTmp > int.max) error(tokenStart, "Date's year is out of range. (Must fit within a 32-bit signed int.)"); auto year = biTmp.toInt(); biTmp = BigInt(monthStr); if(biTmp < 1 || biTmp > 12) error(tokenStart, "Date's month is out of range."); auto month = biTmp.toInt(); biTmp = BigInt(dayStr); if(biTmp < 1 || biTmp > 31) error(tokenStart, "Date's month is out of range."); auto day = biTmp.toInt(); return Date(year, month, day); } private DateTimeFrac makeDateTimeFrac( bool isNegative, Date date, string hourStr, string minuteStr, string secondStr, string millisecondStr ) { BigInt biTmp; biTmp = BigInt(hourStr); if(biTmp < int.min || biTmp > int.max) error(tokenStart, "Datetime's hour is out of range."); auto numHours = biTmp.toInt(); biTmp = BigInt(minuteStr); if(biTmp < 0 || biTmp > int.max) error(tokenStart, "Datetime's minute is out of range."); auto numMinutes = biTmp.toInt(); int numSeconds = 0; if(secondStr != "") { biTmp = BigInt(secondStr); if(biTmp < 0 || biTmp > int.max) error(tokenStart, "Datetime's second is out of range."); numSeconds = biTmp.toInt(); } int millisecond = 0; if(millisecondStr != "") { biTmp = BigInt(millisecondStr); if(biTmp < 0 || biTmp > int.max) error(tokenStart, "Datetime's millisecond is out of range."); millisecond = biTmp.toInt(); if(millisecondStr.length == 1) millisecond *= 100; else if(millisecondStr.length == 2) millisecond *= 10; } Duration fracSecs = millisecond.msecs; auto offset = hours(numHours) + minutes(numMinutes) + seconds(numSeconds); if(isNegative) { offset = -offset; fracSecs = -fracSecs; } return DateTimeFrac(DateTime(date) + offset, fracSecs); } private Duration makeDuration( bool isNegative, string dayStr, string hourStr, string minuteStr, string secondStr, string millisecondStr ) { BigInt biTmp; long day = 0; if(dayStr != "") { biTmp = BigInt(dayStr); if(biTmp < long.min || biTmp > long.max) error(tokenStart, "Time span's day is out of range."); day = biTmp.toLong(); } biTmp = BigInt(hourStr); if(biTmp < long.min || biTmp > long.max) error(tokenStart, "Time span's hour is out of range."); auto hour = biTmp.toLong(); biTmp = BigInt(minuteStr); if(biTmp < long.min || biTmp > long.max) error(tokenStart, "Time span's minute is out of range."); auto minute = biTmp.toLong(); biTmp = BigInt(secondStr); if(biTmp < long.min || biTmp > long.max) error(tokenStart, "Time span's second is out of range."); auto second = biTmp.toLong(); long millisecond = 0; if(millisecondStr != "") { biTmp = BigInt(millisecondStr); if(biTmp < long.min || biTmp > long.max) error(tokenStart, "Time span's millisecond is out of range."); millisecond = biTmp.toLong(); if(millisecondStr.length == 1) millisecond *= 100; else if(millisecondStr.length == 2) millisecond *= 10; } auto duration = dur!"days" (day) + dur!"hours" (hour) + dur!"minutes"(minute) + dur!"seconds"(second) + dur!"msecs" (millisecond); if(isNegative) duration = -duration; return duration; } // This has to reproduce some weird corner case behaviors from the // original Java version of SDL. So some of this may seem weird. private Nullable!Duration getTimeZoneOffset(string str) { if(str.length < 2) return Nullable!Duration(); // Unknown timezone if(str[0] != '+' && str[0] != '-') return Nullable!Duration(); // Unknown timezone auto isNegative = str[0] == '-'; string numHoursStr; string numMinutesStr; if(str[1] == ':') { numMinutesStr = str[1..$]; numHoursStr = ""; } else { numMinutesStr = str.find(':'); numHoursStr = str[1 .. $-numMinutesStr.length]; } long numHours = 0; long numMinutes = 0; bool isUnknown = false; try { switch(numHoursStr.length) { case 0: if(numMinutesStr.length == 3) { numHours = 0; numMinutes = to!long(numMinutesStr[1..$]); } else isUnknown = true; break; case 1: case 2: if(numMinutesStr.length == 0) { numHours = to!long(numHoursStr); numMinutes = 0; } else if(numMinutesStr.length == 3) { numHours = to!long(numHoursStr); numMinutes = to!long(numMinutesStr[1..$]); } else isUnknown = true; break; default: if(numMinutesStr.length == 0) { // Yes, this is correct numHours = 0; numMinutes = to!long(numHoursStr[1..$]); } else isUnknown = true; break; } } catch(ConvException e) isUnknown = true; if(isUnknown) return Nullable!Duration(); // Unknown timezone auto timeZoneOffset = hours(numHours) + minutes(numMinutes); if(isNegative) timeZoneOffset = -timeZoneOffset; // Timezone valid return Nullable!Duration(timeZoneOffset); } /// Lex date or datetime (after the initial numeric fragment was lexed) private void lexDate(bool isDateNegative, string yearStr) { assert(ch == '/'); // Lex months advanceChar(ErrorOnEOF.Yes); // Skip '/' auto monthStr = lexNumericFragment(); // Lex days if(ch != '/') error("Invalid date format: Missing days."); advanceChar(ErrorOnEOF.Yes); // Skip '/' auto dayStr = lexNumericFragment(); auto date = makeDate(isDateNegative, yearStr, monthStr, dayStr); if(!isEndOfNumber() && ch != '/') error("Dates cannot have suffixes."); // Date? if(isEOF) mixin(accept!("Value", "date")); auto endOfDate = location; while( !isEOF && ( ch == '\\' || ch == '/' || (isWhite(ch) && !isNewline(ch)) ) ) { if(ch == '\\' && hasNextCh && isNewline(nextCh)) { advanceChar(ErrorOnEOF.Yes); if(isAtNewline()) advanceChar(ErrorOnEOF.Yes); advanceChar(ErrorOnEOF.No); } eatWhite(); } // Date? if(isEOF || (!isDigit(ch) && ch != '-')) mixin(accept!("Value", "date", "", "endOfDate.index")); auto startOfTime = location; // Is time negative? bool isTimeNegative = ch == '-'; if(isTimeNegative) advanceChar(ErrorOnEOF.Yes); // Lex hours auto hourStr = ch == '.'? "" : lexNumericFragment(); // Lex minutes if(ch != ':') { // No minutes found. Therefore we had a plain Date followed // by a numeric literal, not a DateTime. lookaheadTokenInfo.exists = true; lookaheadTokenInfo.numericFragment = hourStr; lookaheadTokenInfo.isNegative = isTimeNegative; lookaheadTokenInfo.tokenStart = startOfTime; mixin(accept!("Value", "date", "", "endOfDate.index")); } advanceChar(ErrorOnEOF.Yes); // Skip ':' auto minuteStr = lexNumericFragment(); // Lex seconds, if exists string secondStr; if(ch == ':') { advanceChar(ErrorOnEOF.Yes); // Skip ':' secondStr = lexNumericFragment(); } // Lex milliseconds, if exists string millisecondStr; if(ch == '.') { advanceChar(ErrorOnEOF.Yes); // Skip '.' millisecondStr = lexNumericFragment(); } auto dateTimeFrac = makeDateTimeFrac(isTimeNegative, date, hourStr, minuteStr, secondStr, millisecondStr); // Lex zone, if exists if(ch == '-') { advanceChar(ErrorOnEOF.Yes); // Skip '-' auto timezoneStart = location; if(!isAlpha(ch)) error("Invalid timezone format."); while(!isEOF && !isWhite(ch)) advanceChar(ErrorOnEOF.No); auto timezoneStr = source[timezoneStart.index..location.index]; if(timezoneStr.startsWith("GMT")) { auto isoPart = timezoneStr["GMT".length..$]; auto offset = getTimeZoneOffset(isoPart); if(offset.isNull()) { // Unknown time zone mixin(accept!("Value", "DateTimeFracUnknownZone(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezoneStr)")); } else { auto timezone = new immutable SimpleTimeZone(offset.get()); mixin(accept!("Value", "SysTime(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezone)")); } } try { auto timezone = TimeZone.getTimeZone(timezoneStr); if(timezone) mixin(accept!("Value", "SysTime(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezone)")); } catch(TimeException e) { // Time zone not found. So just move along to "Unknown time zone" below. } // Unknown time zone mixin(accept!("Value", "DateTimeFracUnknownZone(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezoneStr)")); } if(!isEndOfNumber()) error("Date-Times cannot have suffixes."); mixin(accept!("Value", "dateTimeFrac")); } /// Lex time span (after the initial numeric fragment was lexed) private void lexTimeSpan(bool isNegative, string firstPart) { assert(ch == ':' || ch == 'd'); string dayStr = ""; string hourStr; // Lexed days? bool hasDays = ch == 'd'; if(hasDays) { dayStr = firstPart; advanceChar(ErrorOnEOF.Yes); // Skip 'd' // Lex hours if(ch != ':') error("Invalid time span format: Missing hours."); advanceChar(ErrorOnEOF.Yes); // Skip ':' hourStr = lexNumericFragment(); } else hourStr = firstPart; // Lex minutes if(ch != ':') error("Invalid time span format: Missing minutes."); advanceChar(ErrorOnEOF.Yes); // Skip ':' auto minuteStr = lexNumericFragment(); // Lex seconds if(ch != ':') error("Invalid time span format: Missing seconds."); advanceChar(ErrorOnEOF.Yes); // Skip ':' auto secondStr = lexNumericFragment(); // Lex milliseconds, if exists string millisecondStr = ""; if(ch == '.') { advanceChar(ErrorOnEOF.Yes); // Skip '.' millisecondStr = lexNumericFragment(); } if(!isEndOfNumber()) error("Time spans cannot have suffixes."); auto duration = makeDuration(isNegative, dayStr, hourStr, minuteStr, secondStr, millisecondStr); mixin(accept!("Value", "duration")); } /// Advances past whitespace and comments private void eatWhite(bool allowComments=true) { // -- Comment/Whitepace Lexer ------------- enum State { normal, lineComment, // Got "#" or "//" or "--", Eating everything until newline blockComment, // Got "/*", Eating everything until "*/" } if(isEOF) return; Location commentStart; State state = State.normal; bool consumeNewlines = false; bool hasConsumedNewline = false; while(true) { final switch(state) { case State.normal: if(ch == '\\') { commentStart = location; consumeNewlines = true; hasConsumedNewline = false; } else if(ch == '#') { if(!allowComments) return; commentStart = location; state = State.lineComment; continue; } else if(ch == '/' || ch == '-') { commentStart = location; if(lookahead(ch)) { if(!allowComments) return; advanceChar(ErrorOnEOF.No); state = State.lineComment; continue; } else if(ch == '/' && lookahead('*')) { if(!allowComments) return; advanceChar(ErrorOnEOF.No); state = State.blockComment; continue; } else return; // Done } else if(isAtNewline()) { if(consumeNewlines) hasConsumedNewline = true; else return; // Done } else if(!isWhite(ch)) { if(consumeNewlines) { if(hasConsumedNewline) return; // Done else error("Only whitespace can come between a line-continuation backslash and the following newline."); } else return; // Done } break; case State.lineComment: if(lookahead(&isNewline)) state = State.normal; break; case State.blockComment: if(ch == '*' && lookahead('/')) { advanceChar(ErrorOnEOF.No); state = State.normal; } break; } advanceChar(ErrorOnEOF.No); if(isEOF) { // Reached EOF if(consumeNewlines && !hasConsumedNewline) error("Missing newline after line-continuation backslash."); else if(state == State.blockComment) error(commentStart, "Unterminated block comment."); else return; // Done, reached EOF } } } } version(unittest) { import std.stdio; version(Have_unit_threaded) import unit_threaded; else { enum DontTest; } private auto loc = Location("filename", 0, 0, 0); private auto loc2 = Location("a", 1, 1, 1); @("lexer: EOL") unittest { assert([Token(symbol!"EOL",loc) ] == [Token(symbol!"EOL",loc) ] ); assert([Token(symbol!"EOL",loc,Value(7),"A")] == [Token(symbol!"EOL",loc2,Value(7),"B")] ); } private int numErrors = 0; @DontTest private void testLex(string source, Token[] expected, bool test_locations = false, string file=__FILE__, size_t line=__LINE__) { Token[] actual; try actual = lexSource(source, "filename"); catch(ParseException e) { numErrors++; stderr.writeln(file, "(", line, "): testLex failed on: ", source); stderr.writeln(" Expected:"); stderr.writeln(" ", expected); stderr.writeln(" Actual: ParseException thrown:"); stderr.writeln(" ", e.msg); return; } bool is_same = actual == expected; if (is_same && test_locations) { is_same = actual.map!(t => t.location).equal(expected.map!(t => t.location)); } if(!is_same) { numErrors++; stderr.writeln(file, "(", line, "): testLex failed on: ", source); stderr.writeln(" Expected:"); stderr.writeln(" ", expected); stderr.writeln(" Actual:"); stderr.writeln(" ", actual); if(expected.length > 1 || actual.length > 1) { stderr.writeln(" expected.length: ", expected.length); stderr.writeln(" actual.length: ", actual.length); if(actual.length == expected.length) foreach(i; 0..actual.length) if(actual[i] != expected[i]) { stderr.writeln(" Unequal at index #", i, ":"); stderr.writeln(" Expected:"); stderr.writeln(" ", expected[i]); stderr.writeln(" Actual:"); stderr.writeln(" ", actual[i]); } } } } private void testLexThrows(string file=__FILE__, size_t line=__LINE__)(string source) { bool hadException = false; Token[] actual; try actual = lexSource(source, "filename"); catch(ParseException e) hadException = true; if(!hadException) { numErrors++; stderr.writeln(file, "(", line, "): testLex failed on: ", source); stderr.writeln(" Expected ParseException"); stderr.writeln(" Actual:"); stderr.writeln(" ", actual); } } } @("sdlang lexer") unittest { testLex("", []); testLex(" ", []); testLex("\\\n", []); testLex("/*foo*/", []); testLex("/* multiline \n comment */", []); testLex("/* * */", []); testLexThrows("/* "); testLex(":", [ Token(symbol!":", loc) ]); testLex("=", [ Token(symbol!"=", loc) ]); testLex("{", [ Token(symbol!"{", loc) ]); testLex("}", [ Token(symbol!"}", loc) ]); testLex(";", [ Token(symbol!"EOL",loc) ]); testLex("\n", [ Token(symbol!"EOL",loc) ]); testLex("foo", [ Token(symbol!"Ident",loc,Value(null),"foo") ]); testLex("_foo", [ Token(symbol!"Ident",loc,Value(null),"_foo") ]); testLex("foo.bar", [ Token(symbol!"Ident",loc,Value(null),"foo.bar") ]); testLex("foo-bar", [ Token(symbol!"Ident",loc,Value(null),"foo-bar") ]); testLex("foo.", [ Token(symbol!"Ident",loc,Value(null),"foo.") ]); testLex("foo-", [ Token(symbol!"Ident",loc,Value(null),"foo-") ]); testLexThrows(".foo"); testLex("foo bar", [ Token(symbol!"Ident",loc,Value(null),"foo"), Token(symbol!"Ident",loc,Value(null),"bar"), ]); testLex("foo \\ \n \n bar", [ Token(symbol!"Ident",loc,Value(null),"foo"), Token(symbol!"Ident",loc,Value(null),"bar"), ]); testLex("foo \\ \n \\ \n bar", [ Token(symbol!"Ident",loc,Value(null),"foo"), Token(symbol!"Ident",loc,Value(null),"bar"), ]); testLexThrows("foo \\ "); testLexThrows("foo \\ bar"); testLexThrows("foo \\ \n \\ "); testLexThrows("foo \\ \n \\ bar"); testLex("foo : = { } ; \n bar \n", [ Token(symbol!"Ident",loc,Value(null),"foo"), Token(symbol!":",loc), Token(symbol!"=",loc), Token(symbol!"{",loc), Token(symbol!"}",loc), Token(symbol!"EOL",loc), Token(symbol!"EOL",loc), Token(symbol!"Ident",loc,Value(null),"bar"), Token(symbol!"EOL",loc), ]); testLexThrows("<"); testLexThrows("*"); testLexThrows(`\`); // Integers testLex( "7", [ Token(symbol!"Value",loc,Value(cast( int) 7)) ]); testLex( "-7", [ Token(symbol!"Value",loc,Value(cast( int)-7)) ]); testLex( "7L", [ Token(symbol!"Value",loc,Value(cast(long) 7)) ]); testLex( "7l", [ Token(symbol!"Value",loc,Value(cast(long) 7)) ]); testLex("-7L", [ Token(symbol!"Value",loc,Value(cast(long)-7)) ]); testLex( "0", [ Token(symbol!"Value",loc,Value(cast( int) 0)) ]); testLex( "-0", [ Token(symbol!"Value",loc,Value(cast( int) 0)) ]); testLex("7/**/", [ Token(symbol!"Value",loc,Value(cast( int) 7)) ]); testLex("7#", [ Token(symbol!"Value",loc,Value(cast( int) 7)) ]); testLex("7 A", [ Token(symbol!"Value",loc,Value(cast(int)7)), Token(symbol!"Ident",loc,Value( null),"A"), ]); testLexThrows("7A"); testLexThrows("-A"); testLexThrows(`-""`); testLex("7;", [ Token(symbol!"Value",loc,Value(cast(int)7)), Token(symbol!"EOL",loc), ]); // Floats testLex("1.2F" , [ Token(symbol!"Value",loc,Value(cast( float)1.2)) ]); testLex("1.2f" , [ Token(symbol!"Value",loc,Value(cast( float)1.2)) ]); testLex("1.2" , [ Token(symbol!"Value",loc,Value(cast(double)1.2)) ]); testLex("1.2D" , [ Token(symbol!"Value",loc,Value(cast(double)1.2)) ]); testLex("1.2d" , [ Token(symbol!"Value",loc,Value(cast(double)1.2)) ]); testLex("1.2BD", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]); testLex("1.2bd", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]); testLex("1.2Bd", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]); testLex("1.2bD", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]); testLex(".2F" , [ Token(symbol!"Value",loc,Value(cast( float)0.2)) ]); testLex(".2" , [ Token(symbol!"Value",loc,Value(cast(double)0.2)) ]); testLex(".2D" , [ Token(symbol!"Value",loc,Value(cast(double)0.2)) ]); testLex(".2BD", [ Token(symbol!"Value",loc,Value(cast( real)0.2)) ]); testLex("-1.2F" , [ Token(symbol!"Value",loc,Value(cast( float)-1.2)) ]); testLex("-1.2" , [ Token(symbol!"Value",loc,Value(cast(double)-1.2)) ]); testLex("-1.2D" , [ Token(symbol!"Value",loc,Value(cast(double)-1.2)) ]); testLex("-1.2BD", [ Token(symbol!"Value",loc,Value(cast( real)-1.2)) ]); testLex("-.2F" , [ Token(symbol!"Value",loc,Value(cast( float)-0.2)) ]); testLex("-.2" , [ Token(symbol!"Value",loc,Value(cast(double)-0.2)) ]); testLex("-.2D" , [ Token(symbol!"Value",loc,Value(cast(double)-0.2)) ]); testLex("-.2BD", [ Token(symbol!"Value",loc,Value(cast( real)-0.2)) ]); testLex( "0.0" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]); testLex( "0.0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]); testLex( "0.0BD", [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]); testLex("-0.0" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]); testLex("-0.0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]); testLex("-0.0BD", [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]); testLex( "7F" , [ Token(symbol!"Value",loc,Value(cast( float)7.0)) ]); testLex( "7D" , [ Token(symbol!"Value",loc,Value(cast(double)7.0)) ]); testLex( "7BD" , [ Token(symbol!"Value",loc,Value(cast( real)7.0)) ]); testLex( "0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]); testLex( "0D" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]); testLex( "0BD" , [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]); testLex("-0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]); testLex("-0D" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]); testLex("-0BD" , [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]); testLex("1.2 F", [ Token(symbol!"Value",loc,Value(cast(double)1.2)), Token(symbol!"Ident",loc,Value( null),"F"), ]); testLexThrows("1.2A"); testLexThrows("1.2B"); testLexThrows("1.2BDF"); testLex("1.2;", [ Token(symbol!"Value",loc,Value(cast(double)1.2)), Token(symbol!"EOL",loc), ]); testLex("1.2F;", [ Token(symbol!"Value",loc,Value(cast(float)1.2)), Token(symbol!"EOL",loc), ]); testLex("1.2BD;", [ Token(symbol!"Value",loc,Value(cast(real)1.2)), Token(symbol!"EOL",loc), ]); // Booleans and null testLex("true", [ Token(symbol!"Value",loc,Value( true)) ]); testLex("false", [ Token(symbol!"Value",loc,Value(false)) ]); testLex("on", [ Token(symbol!"Value",loc,Value( true)) ]); testLex("off", [ Token(symbol!"Value",loc,Value(false)) ]); testLex("null", [ Token(symbol!"Value",loc,Value( null)) ]); testLex("TRUE", [ Token(symbol!"Ident",loc,Value(null),"TRUE") ]); testLex("true ", [ Token(symbol!"Value",loc,Value(true)) ]); testLex("true ", [ Token(symbol!"Value",loc,Value(true)) ]); testLex("tru", [ Token(symbol!"Ident",loc,Value(null),"tru") ]); testLex("truX", [ Token(symbol!"Ident",loc,Value(null),"truX") ]); testLex("trueX", [ Token(symbol!"Ident",loc,Value(null),"trueX") ]); // Raw Backtick Strings testLex("`hello world`", [ Token(symbol!"Value",loc,Value(`hello world` )) ]); testLex("` hello world `", [ Token(symbol!"Value",loc,Value(` hello world ` )) ]); testLex("`hello \\t world`", [ Token(symbol!"Value",loc,Value(`hello \t world`)) ]); testLex("`hello \\n world`", [ Token(symbol!"Value",loc,Value(`hello \n world`)) ]); testLex("`hello \n world`", [ Token(symbol!"Value",loc,Value("hello \n world")) ]); testLex("`hello \r\n world`", [ Token(symbol!"Value",loc,Value("hello \r\n world")) ]); testLex("`hello \"world\"`", [ Token(symbol!"Value",loc,Value(`hello "world"` )) ]); testLexThrows("`foo"); testLexThrows("`"); // Double-Quote Strings testLex(`"hello world"`, [ Token(symbol!"Value",loc,Value("hello world" )) ]); testLex(`" hello world "`, [ Token(symbol!"Value",loc,Value(" hello world " )) ]); testLex(`"hello \t world"`, [ Token(symbol!"Value",loc,Value("hello \t world")) ]); testLex(`"hello \n world"`, [ Token(symbol!"Value",loc,Value("hello \n world")) ]); testLex("\"hello \\\n world\"", [ Token(symbol!"Value",loc,Value("hello world" )) ]); testLex("\"hello \\ \n world\"", [ Token(symbol!"Value",loc,Value("hello world" )) ]); testLex("\"hello \\ \n\n world\"", [ Token(symbol!"Value",loc,Value("hello world" )) ]); testLex(`"\"hello world\""`, [ Token(symbol!"Value",loc,Value(`"hello world"` )) ]); testLex(`""`, [ Token(symbol!"Value",loc,Value("" )) ]); // issue #34 testLexThrows("\"hello \n world\""); testLexThrows(`"foo`); testLexThrows(`"`); // Characters testLex("'a'", [ Token(symbol!"Value",loc,Value(cast(dchar) 'a')) ]); testLex("'\\n'", [ Token(symbol!"Value",loc,Value(cast(dchar)'\n')) ]); testLex("'\\t'", [ Token(symbol!"Value",loc,Value(cast(dchar)'\t')) ]); testLex("'\t'", [ Token(symbol!"Value",loc,Value(cast(dchar)'\t')) ]); testLex("'\\''", [ Token(symbol!"Value",loc,Value(cast(dchar)'\'')) ]); testLex(`'\\'`, [ Token(symbol!"Value",loc,Value(cast(dchar)'\\')) ]); testLexThrows("'a"); testLexThrows("'aa'"); testLexThrows("''"); testLexThrows("'\\\n'"); testLexThrows("'\n'"); testLexThrows(`'\`); testLexThrows(`'\'`); testLexThrows("'"); // Unicode testLex("日本語", [ Token(symbol!"Ident",loc,Value(null), "日本語") ]); testLex("`おはよう、日本。`", [ Token(symbol!"Value",loc,Value(`おはよう、日本。`)) ]); testLex(`"おはよう、日本。"`, [ Token(symbol!"Value",loc,Value(`おはよう、日本。`)) ]); testLex("'月'", [ Token(symbol!"Value",loc,Value("月"d.dup[0])) ]); // Base64 Binary testLex("[aGVsbG8gd29ybGQ=]", [ Token(symbol!"Value",loc,Value(cast(ubyte[])"hello world".dup))]); testLex("[ aGVsbG8gd29ybGQ= ]", [ Token(symbol!"Value",loc,Value(cast(ubyte[])"hello world".dup))]); testLex("[\n aGVsbG8g \n \n d29ybGQ= \n]", [ Token(symbol!"Value",loc,Value(cast(ubyte[])"hello world".dup))]); testLexThrows("[aGVsbG8gd29ybGQ]"); // Ie: Not multiple of 4 testLexThrows("[ aGVsbG8gd29ybGQ ]"); // Date testLex( "1999/12/5", [ Token(symbol!"Value",loc,Value(Date( 1999, 12, 5))) ]); testLex( "2013/2/22", [ Token(symbol!"Value",loc,Value(Date( 2013, 2, 22))) ]); testLex("-2013/2/22", [ Token(symbol!"Value",loc,Value(Date(-2013, 2, 22))) ]); testLexThrows("7/"); testLexThrows("2013/2/22a"); testLexThrows("2013/2/22f"); testLex("1999/12/5\n", [ Token(symbol!"Value",loc,Value(Date(1999, 12, 5))), Token(symbol!"EOL",loc), ]); // DateTime, no timezone testLex( "2013/2/22 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); testLex( "2013/2/22 \t 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); testLex( "2013/2/22/*foo*/07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); testLex( "2013/2/22 /*foo*/ \\\n /*bar*/ 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); testLex( "2013/2/22 /*foo*/ \\\n\n \n /*bar*/ 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); testLex( "2013/2/22 /*foo*/ \\\n\\\n \\\n /*bar*/ 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); testLex( "2013/2/22/*foo*/\\\n/*bar*/07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); testLex("-2013/2/22 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime(-2013, 2, 22, 7, 53, 0)))) ]); testLex( "2013/2/22 -07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53)))) ]); testLex("-2013/2/22 -07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime(-2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53)))) ]); testLex( "2013/2/22 07:53:34", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34)))) ]); testLex( "2013/2/22 07:53:34.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs))) ]); testLex( "2013/2/22 07:53:34.12", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34), 120.msecs))) ]); testLex( "2013/2/22 07:53:34.1", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34), 100.msecs))) ]); testLex( "2013/2/22 07:53.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs))) ]); testLex( "2013/2/22 34:65", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) + hours(34) + minutes(65) + seconds( 0)))) ]); testLex( "2013/2/22 34:65:77.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) + hours(34) + minutes(65) + seconds(77), 123.msecs))) ]); testLex( "2013/2/22 34:65.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) + hours(34) + minutes(65) + seconds( 0), 123.msecs))) ]); testLex( "2013/2/22 -34:65", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0)))) ]); testLex( "2013/2/22 -34:65:77.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds(77), -123.msecs))) ]); testLex( "2013/2/22 -34:65.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0), -123.msecs))) ]); testLexThrows("2013/2/22 07:53a"); testLexThrows("2013/2/22 07:53f"); testLexThrows("2013/2/22 07:53:34.123a"); testLexThrows("2013/2/22 07:53:34.123f"); testLexThrows("2013/2/22a 07:53"); testLex(`2013/2/22 "foo"`, [ Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), Token(symbol!"Value",loc,Value("foo")), ]); testLex("2013/2/22 07", [ Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), Token(symbol!"Value",loc,Value(cast(int)7)), ]); testLex("2013/2/22 1.2F", [ Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), Token(symbol!"Value",loc,Value(cast(float)1.2)), ]); testLex("2013/2/22 .2F", [ Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), Token(symbol!"Value",loc,Value(cast(float)0.2)), ]); testLex("2013/2/22 -1.2F", [ Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), Token(symbol!"Value",loc,Value(cast(float)-1.2)), ]); testLex("2013/2/22 -.2F", [ Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), Token(symbol!"Value",loc,Value(cast(float)-0.2)), ]); // DateTime, with known timezone testLex( "2013/2/22 07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone( hours(0) )))) ]); testLex("-2013/2/22 07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime(-2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone( hours(0) )))) ]); testLex( "2013/2/22 -07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), new immutable SimpleTimeZone( hours(0) )))) ]); testLex("-2013/2/22 -07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime(-2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), new immutable SimpleTimeZone( hours(0) )))) ]); testLex( "2013/2/22 07:53-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]); testLex( "2013/2/22 07:53-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]); testLex( "2013/2/22 07:53:34-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), new immutable SimpleTimeZone( hours(0) )))) ]); testLex( "2013/2/22 07:53:34-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]); testLex( "2013/2/22 07:53:34-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]); testLex( "2013/2/22 07:53:34.123-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, new immutable SimpleTimeZone( hours(0) )))) ]); testLex( "2013/2/22 07:53:34.123-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]); testLex( "2013/2/22 07:53:34.123-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]); testLex( "2013/2/22 07:53.123-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, new immutable SimpleTimeZone( hours(0) )))) ]); testLex( "2013/2/22 07:53.123-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]); testLex( "2013/2/22 07:53.123-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]); testLex( "2013/2/22 -34:65-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]); // DateTime, with Java SDLang's occasionally weird interpretation of some // "not quite ISO" variations of the "GMT with offset" timezone strings. Token testTokenSimpleTimeZone(Duration d) { auto dateTime = DateTime(2013, 2, 22, 7, 53, 0); auto tz = new immutable SimpleTimeZone(d); return Token( symbol!"Value", loc, Value(SysTime(dateTime,tz)) ); } Token testTokenUnknownTimeZone(string tzName) { auto dateTime = DateTime(2013, 2, 22, 7, 53, 0); auto frac = 0.msecs; return Token( symbol!"Value", loc, Value(DateTimeFracUnknownZone(dateTime,frac,tzName)) ); } testLex("2013/2/22 07:53-GMT+", [ testTokenUnknownTimeZone("GMT+") ]); testLex("2013/2/22 07:53-GMT+:", [ testTokenUnknownTimeZone("GMT+:") ]); testLex("2013/2/22 07:53-GMT+:3", [ testTokenUnknownTimeZone("GMT+:3") ]); testLex("2013/2/22 07:53-GMT+:03", [ testTokenSimpleTimeZone(minutes(3)) ]); testLex("2013/2/22 07:53-GMT+:003", [ testTokenUnknownTimeZone("GMT+:003") ]); testLex("2013/2/22 07:53-GMT+4", [ testTokenSimpleTimeZone(hours(4)) ]); testLex("2013/2/22 07:53-GMT+4:", [ testTokenUnknownTimeZone("GMT+4:") ]); testLex("2013/2/22 07:53-GMT+4:3", [ testTokenUnknownTimeZone("GMT+4:3") ]); testLex("2013/2/22 07:53-GMT+4:03", [ testTokenSimpleTimeZone(hours(4)+minutes(3)) ]); testLex("2013/2/22 07:53-GMT+4:003", [ testTokenUnknownTimeZone("GMT+4:003") ]); testLex("2013/2/22 07:53-GMT+04", [ testTokenSimpleTimeZone(hours(4)) ]); testLex("2013/2/22 07:53-GMT+04:", [ testTokenUnknownTimeZone("GMT+04:") ]); testLex("2013/2/22 07:53-GMT+04:3", [ testTokenUnknownTimeZone("GMT+04:3") ]); testLex("2013/2/22 07:53-GMT+04:03", [ testTokenSimpleTimeZone(hours(4)+minutes(3)) ]); testLex("2013/2/22 07:53-GMT+04:03abc", [ testTokenUnknownTimeZone("GMT+04:03abc") ]); testLex("2013/2/22 07:53-GMT+04:003", [ testTokenUnknownTimeZone("GMT+04:003") ]); testLex("2013/2/22 07:53-GMT+004", [ testTokenSimpleTimeZone(minutes(4)) ]); testLex("2013/2/22 07:53-GMT+004:", [ testTokenUnknownTimeZone("GMT+004:") ]); testLex("2013/2/22 07:53-GMT+004:3", [ testTokenUnknownTimeZone("GMT+004:3") ]); testLex("2013/2/22 07:53-GMT+004:03", [ testTokenUnknownTimeZone("GMT+004:03") ]); testLex("2013/2/22 07:53-GMT+004:003", [ testTokenUnknownTimeZone("GMT+004:003") ]); testLex("2013/2/22 07:53-GMT+0004", [ testTokenSimpleTimeZone(minutes(4)) ]); testLex("2013/2/22 07:53-GMT+0004:", [ testTokenUnknownTimeZone("GMT+0004:") ]); testLex("2013/2/22 07:53-GMT+0004:3", [ testTokenUnknownTimeZone("GMT+0004:3") ]); testLex("2013/2/22 07:53-GMT+0004:03", [ testTokenUnknownTimeZone("GMT+0004:03") ]); testLex("2013/2/22 07:53-GMT+0004:003", [ testTokenUnknownTimeZone("GMT+0004:003") ]); testLex("2013/2/22 07:53-GMT+00004", [ testTokenSimpleTimeZone(minutes(4)) ]); testLex("2013/2/22 07:53-GMT+00004:", [ testTokenUnknownTimeZone("GMT+00004:") ]); testLex("2013/2/22 07:53-GMT+00004:3", [ testTokenUnknownTimeZone("GMT+00004:3") ]); testLex("2013/2/22 07:53-GMT+00004:03", [ testTokenUnknownTimeZone("GMT+00004:03") ]); testLex("2013/2/22 07:53-GMT+00004:003", [ testTokenUnknownTimeZone("GMT+00004:003") ]); // DateTime, with unknown timezone testLex( "2013/2/22 07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 0), 0.msecs, "Bogus/Foo")), "2013/2/22 07:53-Bogus/Foo") ]); testLex("-2013/2/22 07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime(-2013, 2, 22, 7, 53, 0), 0.msecs, "Bogus/Foo"))) ]); testLex( "2013/2/22 -07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), 0.msecs, "Bogus/Foo"))) ]); testLex("-2013/2/22 -07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime(-2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), 0.msecs, "Bogus/Foo"))) ]); testLex( "2013/2/22 07:53:34-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 34), 0.msecs, "Bogus/Foo"))) ]); testLex( "2013/2/22 07:53:34.123-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, "Bogus/Foo"))) ]); testLex( "2013/2/22 07:53.123-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, "Bogus/Foo"))) ]); // Time Span testLex( "12:14:42", [ Token(symbol!"Value",loc,Value( days( 0)+hours(12)+minutes(14)+seconds(42)+msecs( 0))) ]); testLex("-12:14:42", [ Token(symbol!"Value",loc,Value(-days( 0)-hours(12)-minutes(14)-seconds(42)-msecs( 0))) ]); testLex( "00:09:12", [ Token(symbol!"Value",loc,Value( days( 0)+hours( 0)+minutes( 9)+seconds(12)+msecs( 0))) ]); testLex( "00:00:01.023", [ Token(symbol!"Value",loc,Value( days( 0)+hours( 0)+minutes( 0)+seconds( 1)+msecs( 23))) ]); testLex( "23d:05:21:23.532", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs(532))) ]); testLex( "23d:05:21:23.53", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs(530))) ]); testLex( "23d:05:21:23.5", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs(500))) ]); testLex("-23d:05:21:23.532", [ Token(symbol!"Value",loc,Value(-days(23)-hours( 5)-minutes(21)-seconds(23)-msecs(532))) ]); testLex("-23d:05:21:23.5", [ Token(symbol!"Value",loc,Value(-days(23)-hours( 5)-minutes(21)-seconds(23)-msecs(500))) ]); testLex( "23d:05:21:23", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs( 0))) ]); testLexThrows("12:14:42a"); testLexThrows("23d:05:21:23.532a"); testLexThrows("23d:05:21:23.532f"); // Combination testLex("foo. 7", [ Token(symbol!"Ident",loc,Value( null),"foo."), Token(symbol!"Value",loc,Value(cast(int)7)) ]); testLex(` namespace:person "foo" "bar" 1 23L name.first="ひとみ" name.last="Smith" { namespace:age 37; namespace:favorite_color "blue" // comment somedate 2013/2/22 07:53 -- comment inventory /* comment */ { socks } } `, [ Token(symbol!"EOL",loc,Value(null),"\n"), Token(symbol!"Ident", loc, Value( null ), "namespace"), Token(symbol!":", loc, Value( null ), ":"), Token(symbol!"Ident", loc, Value( null ), "person"), Token(symbol!"Value", loc, Value( "foo" ), `"foo"`), Token(symbol!"Value", loc, Value( "bar" ), `"bar"`), Token(symbol!"Value", loc, Value( cast( int) 1 ), "1"), Token(symbol!"Value", loc, Value( cast(long)23 ), "23L"), Token(symbol!"Ident", loc, Value( null ), "name.first"), Token(symbol!"=", loc, Value( null ), "="), Token(symbol!"Value", loc, Value( "ひとみ" ), `"ひとみ"`), Token(symbol!"Ident", loc, Value( null ), "name.last"), Token(symbol!"=", loc, Value( null ), "="), Token(symbol!"Value", loc, Value( "Smith" ), `"Smith"`), Token(symbol!"{", loc, Value( null ), "{"), Token(symbol!"EOL", loc, Value( null ), "\n"), Token(symbol!"Ident", loc, Value( null ), "namespace"), Token(symbol!":", loc, Value( null ), ":"), Token(symbol!"Ident", loc, Value( null ), "age"), Token(symbol!"Value", loc, Value( cast(int)37 ), "37"), Token(symbol!"EOL", loc, Value( null ), ";"), Token(symbol!"Ident", loc, Value( null ), "namespace"), Token(symbol!":", loc, Value( null ), ":"), Token(symbol!"Ident", loc, Value( null ), "favorite_color"), Token(symbol!"Value", loc, Value( "blue" ), `"blue"`), Token(symbol!"EOL", loc, Value( null ), "\n"), Token(symbol!"Ident", loc, Value( null ), "somedate"), Token(symbol!"Value", loc, Value( DateTimeFrac(DateTime(2013, 2, 22, 7, 53, 0)) ), "2013/2/22 07:53"), Token(symbol!"EOL", loc, Value( null ), "\n"), Token(symbol!"EOL", loc, Value( null ), "\n"), Token(symbol!"Ident", loc, Value(null), "inventory"), Token(symbol!"{", loc, Value(null), "{"), Token(symbol!"EOL", loc, Value(null), "\n"), Token(symbol!"Ident", loc, Value(null), "socks"), Token(symbol!"EOL", loc, Value(null), "\n"), Token(symbol!"}", loc, Value(null), "}"), Token(symbol!"EOL", loc, Value(null), "\n"), Token(symbol!"}", loc, Value(null), "}"), Token(symbol!"EOL", loc, Value(null), "\n"), ]); if(numErrors > 0) stderr.writeln(numErrors, " failed test(s)"); } @("lexer: Regression test issue #8") unittest { testLex(`"\n \n"`, [ Token(symbol!"Value",loc,Value("\n \n"),`"\n \n"`) ]); testLex(`"\t\t"`, [ Token(symbol!"Value",loc,Value("\t\t"),`"\t\t"`) ]); testLex(`"\n\n"`, [ Token(symbol!"Value",loc,Value("\n\n"),`"\n\n"`) ]); } @("lexer: Regression test issue #11") unittest { void test(string input) { testLex( input, [ Token(symbol!"EOL", loc, Value(null), "\n"), Token(symbol!"Ident",loc,Value(null), "a") ] ); } test("//X\na"); test("//\na"); test("--\na"); test("#\na"); } @("ast: Regression test issue #28") unittest { enum offset = 1; // workaround for an of-by-one error for line numbers testLex("test", [ Token(symbol!"Ident", Location("filename", 0, 0, 0), Value(null), "test") ], true); testLex("\ntest", [ Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\n"), Token(symbol!"Ident", Location("filename", 1, 0, 1), Value(null), "test") ], true); testLex("\rtest", [ Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r"), Token(symbol!"Ident", Location("filename", 1, 0, 1), Value(null), "test") ], true); testLex("\r\ntest", [ Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r\n"), Token(symbol!"Ident", Location("filename", 1, 0, 2), Value(null), "test") ], true); testLex("\r\n\ntest", [ Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r\n"), Token(symbol!"EOL", Location("filename", 1, 0, 2), Value(null), "\n"), Token(symbol!"Ident", Location("filename", 2, 0, 3), Value(null), "test") ], true); testLex("\r\r\ntest", [ Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r"), Token(symbol!"EOL", Location("filename", 1, 0, 1), Value(null), "\r\n"), Token(symbol!"Ident", Location("filename", 2, 0, 3), Value(null), "test") ], true); }