aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/sdlang/lexer.d
diff options
context:
space:
mode:
Diffstat (limited to 'src/sdlang/lexer.d')
-rw-r--r--src/sdlang/lexer.d58
1 files changed, 28 insertions, 30 deletions
diff --git a/src/sdlang/lexer.d b/src/sdlang/lexer.d
index 91e0a7d..3788188 100644
--- a/src/sdlang/lexer.d
+++ b/src/sdlang/lexer.d
@@ -5,20 +5,19 @@ module sdlang.lexer;
import std.algorithm;
import std.array;
+static import std.ascii;
import std.base64;
import std.bigint;
import std.conv;
import std.datetime;
import std.file;
-// import std.stream : ByteOrderMarks, BOM;
+import std.format;
import std.traits;
import std.typecons;
import std.uni;
import std.utf;
import std.variant;
-import undead.stream : ByteOrderMarks, BOM;
-
import sdlang.exception;
import sdlang.symbol;
import sdlang.token;
@@ -111,7 +110,7 @@ class Lexer
// found after it needs to be saved for the the lexer's next iteration.
//
// It's a slight kludge, and could instead be implemented as a slightly
- // kludgey parser hack, but it's the only situation where SDL's lexing
+ // kludgey parser hack, but it's the only situation where SDLang's lexing
// needs to lookahead more than one character, so this is good enough.
private struct LookaheadTokenInfo
{
@@ -172,9 +171,10 @@ class Lexer
error(location, msg);
}
+ //TODO: Take varargs and use output range sink.
private void error(Location loc, string msg)
{
- throw new SDLangParseException(loc, "Error: "~msg);
+ throw new ParseException(loc, "Error: "~msg);
}
private Token makeToken(string symbolName)()
@@ -442,8 +442,14 @@ class Lexer
else
{
+ if(ch == ',')
+ error("Unexpected comma: SDLang is not a comma-separated format.");
+ else if(std.ascii.isPrintable(ch))
+ error(text("Unexpected: ", ch));
+ else
+ error("Unexpected character code 0x%02X".format(ch));
+
advanceChar(ErrorOnEOF.No);
- error("Syntax error");
}
}
@@ -734,8 +740,7 @@ class Lexer
//Base64.decode(Base64InputRange(this), OutputBuf());
Base64.decode(tmpBuf, OutputBuf());
- //TODO: Starting with dmd 2.062, this should be a Base64Exception
- catch(Exception e)
+ catch(Base64Exception e)
error("Invalid character in base64 binary literal.");
advanceChar(ErrorOnEOF.No); // Skip ']'
@@ -1455,13 +1460,17 @@ class Lexer
}
}
-version(sdlangUnittest)
+version(unittest)
{
import std.stdio;
+ version(Have_unit_threaded) import unit_threaded;
+ else { enum DontTest; }
+
private auto loc = Location("filename", 0, 0, 0);
private auto loc2 = Location("a", 1, 1, 1);
+ @("lexer: EOL")
unittest
{
assert([Token(symbol!"EOL",loc) ] == [Token(symbol!"EOL",loc) ] );
@@ -1469,18 +1478,19 @@ version(sdlangUnittest)
}
private int numErrors = 0;
+ @DontTest
private void testLex(string source, Token[] expected, bool test_locations = false, string file=__FILE__, size_t line=__LINE__)
{
Token[] actual;
try
actual = lexSource(source, "filename");
- catch(SDLangParseException e)
+ catch(ParseException e)
{
numErrors++;
stderr.writeln(file, "(", line, "): testLex failed on: ", source);
stderr.writeln(" Expected:");
stderr.writeln(" ", expected);
- stderr.writeln(" Actual: SDLangParseException thrown:");
+ stderr.writeln(" Actual: ParseException thrown:");
stderr.writeln(" ", e.msg);
return;
}
@@ -1524,26 +1534,23 @@ version(sdlangUnittest)
Token[] actual;
try
actual = lexSource(source, "filename");
- catch(SDLangParseException e)
+ catch(ParseException e)
hadException = true;
if(!hadException)
{
numErrors++;
stderr.writeln(file, "(", line, "): testLex failed on: ", source);
- stderr.writeln(" Expected SDLangParseException");
+ stderr.writeln(" Expected ParseException");
stderr.writeln(" Actual:");
stderr.writeln(" ", actual);
}
}
}
-version(sdlangUnittest)
+@("sdlang lexer")
unittest
{
- writeln("Unittesting sdlang lexer...");
- stdout.flush();
-
testLex("", []);
testLex(" ", []);
testLex("\\\n", []);
@@ -1856,7 +1863,7 @@ unittest
testLex( "2013/2/22 -34:65-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]);
- // DateTime, with Java SDL's occasionally weird interpretation of some
+ // DateTime, with Java SDLang's occasionally weird interpretation of some
// "not quite ISO" variations of the "GMT with offset" timezone strings.
Token testTokenSimpleTimeZone(Duration d)
{
@@ -2001,23 +2008,17 @@ unittest
stderr.writeln(numErrors, " failed test(s)");
}
-version(sdlangUnittest)
+@("lexer: Regression test issue #8")
unittest
{
- writeln("lexer: Regression test issue #8...");
- stdout.flush();
-
testLex(`"\n \n"`, [ Token(symbol!"Value",loc,Value("\n \n"),`"\n \n"`) ]);
testLex(`"\t\t"`, [ Token(symbol!"Value",loc,Value("\t\t"),`"\t\t"`) ]);
testLex(`"\n\n"`, [ Token(symbol!"Value",loc,Value("\n\n"),`"\n\n"`) ]);
}
-version(sdlangUnittest)
+@("lexer: Regression test issue #11")
unittest
{
- writeln("lexer: Regression test issue #11...");
- stdout.flush();
-
void test(string input)
{
testLex(
@@ -2035,12 +2036,9 @@ unittest
test("#\na");
}
-version(sdlangUnittest)
+@("ast: Regression test issue #28")
unittest
{
- writeln("lexer: Regression test issue #28...");
- stdout.flush();
-
enum offset = 1; // workaround for an of-by-one error for line numbers
testLex("test", [
Token(symbol!"Ident", Location("filename", 0, 0, 0), Value(null), "test")