diff options
author | Ralph Amissah <ralph@amissah.com> | 2016-06-16 01:49:06 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2019-04-04 14:48:18 -0400 |
commit | 8ab7e935913c102fb039110e20b71f698a68c6ee (patch) | |
tree | 3472debd16ce656a57150399ce666e248565f011 /src/sdlang/parser.d | |
parent | step4.1 as step4 but extract header meta & make on first reading in document (diff) |
step5 sdlang used for config files and doc headers
Diffstat (limited to 'src/sdlang/parser.d')
-rw-r--r-- | src/sdlang/parser.d | 551 |
1 files changed, 551 insertions, 0 deletions
diff --git a/src/sdlang/parser.d b/src/sdlang/parser.d new file mode 100644 index 0000000..ed8084a --- /dev/null +++ b/src/sdlang/parser.d @@ -0,0 +1,551 @@ +// SDLang-D +// Written in the D programming language. + +module sdlang.parser; + +import std.file; + +import libInputVisitor; + +import sdlang.ast; +import sdlang.exception; +import sdlang.lexer; +import sdlang.symbol; +import sdlang.token; +import sdlang.util; + +/// Returns root tag. +Tag parseFile(string filename) +{ + auto source = cast(string)read(filename); + return parseSource(source, filename); +} + +/// Returns root tag. The optional 'filename' parameter can be included +/// so that the SDL document's filename (if any) can be displayed with +/// any syntax error messages. +Tag parseSource(string source, string filename=null) +{ + auto lexer = new Lexer(source, filename); + auto parser = DOMParser(lexer); + return parser.parseRoot(); +} + +/++ +Parses an SDL document using StAX/Pull-style. Returns an InputRange with +element type ParserEvent. + +The pullParseFile version reads a file and parses it, while pullParseSource +parses a string passed in. The optional 'filename' parameter in pullParseSource +can be included so that the SDL document's filename (if any) can be displayed +with any syntax error messages. + +Warning! The FileStartEvent and FileEndEvent events *might* be removed later. +See $(LINK https://github.com/Abscissa/SDLang-D/issues/17) + +Example: +------------------ +parent 12 attr="q" { + childA 34 + childB 56 +} +lastTag +------------------ + +The ParserEvent sequence emitted for that SDL document would be as +follows (indented for readability): +------------------ +FileStartEvent + TagStartEvent (parent) + ValueEvent (12) + AttributeEvent (attr, "q") + TagStartEvent (childA) + ValueEvent (34) + TagEndEvent + TagStartEvent (childB) + ValueEvent (56) + TagEndEvent + TagEndEvent + TagStartEvent (lastTag) + TagEndEvent +FileEndEvent +------------------ + +Example: +------------------ +foreach(event; pullParseFile("stuff.sdl")) +{ + import std.stdio; + + if(event.peek!FileStartEvent()) + writeln("FileStartEvent, starting! "); + + else if(event.peek!FileEndEvent()) + writeln("FileEndEvent, done! "); + + else if(auto e = event.peek!TagStartEvent()) + writeln("TagStartEvent: ", e.namespace, ":", e.name, " @ ", e.location); + + else if(event.peek!TagEndEvent()) + writeln("TagEndEvent"); + + else if(auto e = event.peek!ValueEvent()) + writeln("ValueEvent: ", e.value); + + else if(auto e = event.peek!AttributeEvent()) + writeln("AttributeEvent: ", e.namespace, ":", e.name, "=", e.value); + + else // Shouldn't happen + throw new Exception("Received unknown parser event"); +} +------------------ ++/ +auto pullParseFile(string filename) +{ + auto source = cast(string)read(filename); + return parseSource(source, filename); +} + +///ditto +auto pullParseSource(string source, string filename=null) +{ + auto lexer = new Lexer(source, filename); + auto parser = PullParser(lexer); + return inputVisitor!ParserEvent( parser ); +} + +/// The element of the InputRange returned by pullParseFile and pullParseSource: +alias ParserEvent = std.variant.Algebraic!( + FileStartEvent, + FileEndEvent, + TagStartEvent, + TagEndEvent, + ValueEvent, + AttributeEvent, +); + +/// Event: Start of file +struct FileStartEvent +{ + Location location; +} + +/// Event: End of file +struct FileEndEvent +{ + Location location; +} + +/// Event: Start of tag +struct TagStartEvent +{ + Location location; + string namespace; + string name; +} + +/// Event: End of tag +struct TagEndEvent +{ + //Location location; +} + +/// Event: Found a Value in the current tag +struct ValueEvent +{ + Location location; + Value value; +} + +/// Event: Found an Attribute in the current tag +struct AttributeEvent +{ + Location location; + string namespace; + string name; + Value value; +} + +// The actual pull parser +private struct PullParser +{ + private Lexer lexer; + + private struct IDFull + { + string namespace; + string name; + } + + private void error(string msg) + { + error(lexer.front.location, msg); + } + + private void error(Location loc, string msg) + { + throw new SDLangParseException(loc, "Error: "~msg); + } + + private InputVisitor!(PullParser, ParserEvent) v; + + void visit(InputVisitor!(PullParser, ParserEvent) v) + { + this.v = v; + parseRoot(); + } + + private void emit(Event)(Event event) + { + v.yield( ParserEvent(event) ); + } + + /// <Root> ::= <Tags> EOF (Lookaheads: Anything) + private void parseRoot() + { + //trace("Starting parse of file: ", lexer.filename); + //trace(__FUNCTION__, ": <Root> ::= <Tags> EOF (Lookaheads: Anything)"); + + auto startLocation = Location(lexer.filename, 0, 0, 0); + emit( FileStartEvent(startLocation) ); + + parseTags(); + + auto token = lexer.front; + if(!token.matches!"EOF"()) + error("Expected end-of-file, not " ~ token.symbol.name); + + emit( FileEndEvent(token.location) ); + } + + /// <Tags> ::= <Tag> <Tags> (Lookaheads: Ident Value) + /// | EOL <Tags> (Lookaheads: EOL) + /// | {empty} (Lookaheads: Anything else, except '{') + void parseTags() + { + //trace("Enter ", __FUNCTION__); + while(true) + { + auto token = lexer.front; + if(token.matches!"Ident"() || token.matches!"Value"()) + { + //trace(__FUNCTION__, ": <Tags> ::= <Tag> <Tags> (Lookaheads: Ident Value)"); + parseTag(); + continue; + } + else if(token.matches!"EOL"()) + { + //trace(__FUNCTION__, ": <Tags> ::= EOL <Tags> (Lookaheads: EOL)"); + lexer.popFront(); + continue; + } + else if(token.matches!"{"()) + { + error("Anonymous tags must have at least one value. They cannot just have children and attributes only."); + } + else + { + //trace(__FUNCTION__, ": <Tags> ::= {empty} (Lookaheads: Anything else, except '{')"); + break; + } + } + } + + /// <Tag> + /// ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Ident) + /// | <Value> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Value) + void parseTag() + { + auto token = lexer.front; + if(token.matches!"Ident"()) + { + //trace(__FUNCTION__, ": <Tag> ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Ident)"); + //trace("Found tag named: ", tag.fullName); + auto id = parseIDFull(); + emit( TagStartEvent(token.location, id.namespace, id.name) ); + } + else if(token.matches!"Value"()) + { + //trace(__FUNCTION__, ": <Tag> ::= <Value> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Value)"); + //trace("Found anonymous tag."); + emit( TagStartEvent(token.location, null, null) ); + } + else + error("Expected tag name or value, not " ~ token.symbol.name); + + if(lexer.front.matches!"="()) + error("Anonymous tags must have at least one value. They cannot just have attributes and children only."); + + parseValues(); + parseAttributes(); + parseOptChild(); + parseTagTerminator(); + + emit( TagEndEvent() ); + } + + /// <IDFull> ::= Ident <IDSuffix> (Lookaheads: Ident) + IDFull parseIDFull() + { + auto token = lexer.front; + if(token.matches!"Ident"()) + { + //trace(__FUNCTION__, ": <IDFull> ::= Ident <IDSuffix> (Lookaheads: Ident)"); + lexer.popFront(); + return parseIDSuffix(token.data); + } + else + { + error("Expected namespace or identifier, not " ~ token.symbol.name); + assert(0); + } + } + + /// <IDSuffix> + /// ::= ':' Ident (Lookaheads: ':') + /// ::= {empty} (Lookaheads: Anything else) + IDFull parseIDSuffix(string firstIdent) + { + auto token = lexer.front; + if(token.matches!":"()) + { + //trace(__FUNCTION__, ": <IDSuffix> ::= ':' Ident (Lookaheads: ':')"); + lexer.popFront(); + token = lexer.front; + if(token.matches!"Ident"()) + { + lexer.popFront(); + return IDFull(firstIdent, token.data); + } + else + { + error("Expected name, not " ~ token.symbol.name); + assert(0); + } + } + else + { + //trace(__FUNCTION__, ": <IDSuffix> ::= {empty} (Lookaheads: Anything else)"); + return IDFull("", firstIdent); + } + } + + /// <Values> + /// ::= Value <Values> (Lookaheads: Value) + /// | {empty} (Lookaheads: Anything else) + void parseValues() + { + while(true) + { + auto token = lexer.front; + if(token.matches!"Value"()) + { + //trace(__FUNCTION__, ": <Values> ::= Value <Values> (Lookaheads: Value)"); + parseValue(); + continue; + } + else + { + //trace(__FUNCTION__, ": <Values> ::= {empty} (Lookaheads: Anything else)"); + break; + } + } + } + + /// Handle Value terminals that aren't part of an attribute + void parseValue() + { + auto token = lexer.front; + if(token.matches!"Value"()) + { + //trace(__FUNCTION__, ": (Handle Value terminals that aren't part of an attribute)"); + auto value = token.value; + //trace("In tag '", parent.fullName, "', found value: ", value); + emit( ValueEvent(token.location, value) ); + + lexer.popFront(); + } + else + error("Expected value, not "~token.symbol.name); + } + + /// <Attributes> + /// ::= <Attribute> <Attributes> (Lookaheads: Ident) + /// | {empty} (Lookaheads: Anything else) + void parseAttributes() + { + while(true) + { + auto token = lexer.front; + if(token.matches!"Ident"()) + { + //trace(__FUNCTION__, ": <Attributes> ::= <Attribute> <Attributes> (Lookaheads: Ident)"); + parseAttribute(); + continue; + } + else + { + //trace(__FUNCTION__, ": <Attributes> ::= {empty} (Lookaheads: Anything else)"); + break; + } + } + } + + /// <Attribute> ::= <IDFull> '=' Value (Lookaheads: Ident) + void parseAttribute() + { + //trace(__FUNCTION__, ": <Attribute> ::= <IDFull> '=' Value (Lookaheads: Ident)"); + auto token = lexer.front; + if(!token.matches!"Ident"()) + error("Expected attribute name, not "~token.symbol.name); + + auto id = parseIDFull(); + + token = lexer.front; + if(!token.matches!"="()) + error("Expected '=' after attribute name, not "~token.symbol.name); + + lexer.popFront(); + token = lexer.front; + if(!token.matches!"Value"()) + error("Expected attribute value, not "~token.symbol.name); + + //trace("In tag '", parent.fullName, "', found attribute '", attr.fullName, "'"); + emit( AttributeEvent(token.location, id.namespace, id.name, token.value) ); + + lexer.popFront(); + } + + /// <OptChild> + /// ::= '{' EOL <Tags> '}' (Lookaheads: '{') + /// | {empty} (Lookaheads: Anything else) + void parseOptChild() + { + auto token = lexer.front; + if(token.matches!"{") + { + //trace(__FUNCTION__, ": <OptChild> ::= '{' EOL <Tags> '}' (Lookaheads: '{')"); + lexer.popFront(); + token = lexer.front; + if(!token.matches!"EOL"()) + error("Expected newline or semicolon after '{', not "~token.symbol.name); + + lexer.popFront(); + parseTags(); + + token = lexer.front; + if(!token.matches!"}"()) + error("Expected '}' after child tags, not "~token.symbol.name); + lexer.popFront(); + } + else + { + //trace(__FUNCTION__, ": <OptChild> ::= {empty} (Lookaheads: Anything else)"); + // Do nothing, no error. + } + } + + /// <TagTerminator> + /// ::= EOL (Lookahead: EOL) + /// | {empty} (Lookahead: EOF) + void parseTagTerminator() + { + auto token = lexer.front; + if(token.matches!"EOL") + { + //trace(__FUNCTION__, ": <TagTerminator> ::= EOL (Lookahead: EOL)"); + lexer.popFront(); + } + else if(token.matches!"EOF") + { + //trace(__FUNCTION__, ": <TagTerminator> ::= {empty} (Lookahead: EOF)"); + // Do nothing + } + else + error("Expected end of tag (newline, semicolon or end-of-file), not " ~ token.symbol.name); + } +} + +private struct DOMParser +{ + Lexer lexer; + + Tag parseRoot() + { + auto currTag = new Tag(null, null, "root"); + currTag.location = Location(lexer.filename, 0, 0, 0); + + auto parser = PullParser(lexer); + auto eventRange = inputVisitor!ParserEvent( parser ); + foreach(event; eventRange) + { + if(auto e = event.peek!TagStartEvent()) + { + auto newTag = new Tag(currTag, e.namespace, e.name); + newTag.location = e.location; + + currTag = newTag; + } + else if(event.peek!TagEndEvent()) + { + currTag = currTag.parent; + + if(!currTag) + parser.error("Internal Error: Received an extra TagEndEvent"); + } + else if(auto e = event.peek!ValueEvent()) + { + currTag.add(e.value); + } + else if(auto e = event.peek!AttributeEvent()) + { + auto attr = new Attribute(e.namespace, e.name, e.value, e.location); + currTag.add(attr); + } + else if(event.peek!FileStartEvent()) + { + // Do nothing + } + else if(event.peek!FileEndEvent()) + { + // There shouldn't be another parent. + if(currTag.parent) + parser.error("Internal Error: Unexpected end of file, not enough TagEndEvent"); + } + else + parser.error("Internal Error: Received unknown parser event"); + } + + return currTag; + } +} + +// Other parser tests are part of the AST's tests over in the ast module. + +// Regression test, issue #16: https://github.com/Abscissa/SDLang-D/issues/16 +version(sdlangUnittest) +unittest +{ + import std.stdio; + writeln("parser: Regression test issue #16..."); + stdout.flush(); + + // Shouldn't crash + foreach(event; pullParseSource(`tag "data"`)) + { + event.peek!FileStartEvent(); + } +} + +// Regression test, issue #31: https://github.com/Abscissa/SDLang-D/issues/31 +// "Escape sequence results in range violation error" +version(sdlangUnittest) +unittest +{ + import std.stdio; + writeln("parser: Regression test issue #31..."); + stdout.flush(); + + // Shouldn't get a Range violation + parseSource(`test "\"foo\""`); +} |