// SDLang-D // Written in the D programming language. module sdlang.parser; import std.file; import libInputVisitor; import taggedalgebraic; import sdlang.ast; import sdlang.exception; import sdlang.lexer; import sdlang.symbol; import sdlang.token; import sdlang.util; /// Returns root tag. Tag parseFile(string filename) { auto source = cast(string)read(filename); return parseSource(source, filename); } /// Returns root tag. The optional `filename` parameter can be included /// so that the SDLang document's filename (if any) can be displayed with /// any syntax error messages. Tag parseSource(string source, string filename=null) { auto lexer = new Lexer(source, filename); auto parser = DOMParser(lexer); return parser.parseRoot(); } /++ Parses an SDL document using StAX/Pull-style. Returns an InputRange with element type ParserEvent. The pullParseFile version reads a file and parses it, while pullParseSource parses a string passed in. The optional `filename` parameter in pullParseSource can be included so that the SDLang document's filename (if any) can be displayed with any syntax error messages. Note: The old FileStartEvent and FileEndEvent events $(LINK2 https://github.com/Abscissa/SDLang-D/issues/17, were deemed unnessecary) and removed as of SDLang-D v0.10.0. Note: Previously, in SDLang-D v0.9.x, ParserEvent was a $(LINK2 http://dlang.org/phobos/std_variant.html#.Algebraic, std.variant.Algebraic). As of SDLang-D v0.10.0, it is now a $(LINK2 https://github.com/s-ludwig/taggedalgebraic, TaggedAlgebraic), so usage has changed somewhat. Example: ------------------ parent 12 attr="q" { childA 34 childB 56 } lastTag ------------------ The ParserEvent sequence emitted for that SDL document would be as follows (indented for readability): ------------------ TagStartEvent (parent) ValueEvent (12) AttributeEvent (attr, "q") TagStartEvent (childA) ValueEvent (34) TagEndEvent TagStartEvent (childB) ValueEvent (56) TagEndEvent TagEndEvent TagStartEvent (lastTag) TagEndEvent ------------------ +/ auto pullParseFile(string filename) { auto source = cast(string)read(filename); return parseSource(source, filename); } ///ditto auto pullParseSource(string source, string filename=null) { auto lexer = new Lexer(source, filename); auto parser = PullParser(lexer); return inputVisitor!ParserEvent( parser ); } /// @("pullParseFile/pullParseSource example") unittest { // stuff.sdl immutable stuffSdl = ` name "sdlang-d" description "An SDL (Simple Declarative Language) library for D." homepage "http://github.com/Abscissa/SDLang-D" configuration "library" { targetType "library" } `; import std.stdio; foreach(event; pullParseSource(stuffSdl)) final switch(event.kind) { case ParserEvent.Kind.tagStart: auto e = cast(TagStartEvent) event; writeln("TagStartEvent: ", e.namespace, ":", e.name, " @ ", e.location); break; case ParserEvent.Kind.tagEnd: auto e = cast(TagEndEvent) event; writeln("TagEndEvent"); break; case ParserEvent.Kind.value: auto e = cast(ValueEvent) event; writeln("ValueEvent: ", e.value); break; case ParserEvent.Kind.attribute: auto e = cast(AttributeEvent) event; writeln("AttributeEvent: ", e.namespace, ":", e.name, "=", e.value); break; } } private union ParserEventUnion { TagStartEvent tagStart; TagEndEvent tagEnd; ValueEvent value; AttributeEvent attribute; } /++ The element of the InputRange returned by pullParseFile and pullParseSource. This is a tagged union, built from the following: ------- alias ParserEvent = TaggedAlgebraic!ParserEventUnion; private union ParserEventUnion { TagStartEvent tagStart; TagEndEvent tagEnd; ValueEvent value; AttributeEvent attribute; } ------- Note: The old FileStartEvent and FileEndEvent events $(LINK2 https://github.com/Abscissa/SDLang-D/issues/17, were deemed unnessecary) and removed as of SDLang-D v0.10.0. Note: Previously, in SDLang-D v0.9.x, ParserEvent was a $(LINK2 http://dlang.org/phobos/std_variant.html#.Algebraic, std.variant.Algebraic). As of SDLang-D v0.10.0, it is now a $(LINK2 https://github.com/s-ludwig/taggedalgebraic, TaggedAlgebraic), so usage has changed somewhat. +/ alias ParserEvent = TaggedAlgebraic!ParserEventUnion; /// @("ParserEvent example") unittest { // Create ParserEvent event1 = TagStartEvent(); ParserEvent event2 = TagEndEvent(); ParserEvent event3 = ValueEvent(); ParserEvent event4 = AttributeEvent(); // Check type assert(event1.kind == ParserEvent.Kind.tagStart); assert(event2.kind == ParserEvent.Kind.tagEnd); assert(event3.kind == ParserEvent.Kind.value); assert(event4.kind == ParserEvent.Kind.attribute); // Cast to base type auto e1 = cast(TagStartEvent) event1; auto e2 = cast(TagEndEvent) event2; auto e3 = cast(ValueEvent) event3; auto e4 = cast(AttributeEvent) event4; //auto noGood = cast(AttributeEvent) event1; // AssertError: event1 is a TagStartEvent, not AttributeEvent. // Use as base type. // In many cases, no casting is even needed. event1.name = "foo"; //auto noGood = event3.name; // AssertError: ValueEvent doesn't have a member 'name'. // Final switch is supported: final switch(event1.kind) { case ParserEvent.Kind.tagStart: break; case ParserEvent.Kind.tagEnd: break; case ParserEvent.Kind.value: break; case ParserEvent.Kind.attribute: break; } } /// Event: Start of tag struct TagStartEvent { Location location; string namespace; string name; } /// Event: End of tag struct TagEndEvent { //Location location; } /// Event: Found a Value in the current tag struct ValueEvent { Location location; Value value; } /// Event: Found an Attribute in the current tag struct AttributeEvent { Location location; string namespace; string name; Value value; } // The actual pull parser private struct PullParser { private Lexer lexer; private struct IDFull { string namespace; string name; } private void error(string msg) { error(lexer.front.location, msg); } private void error(Location loc, string msg) { throw new ParseException(loc, "Error: "~msg); } private InputVisitor!(PullParser, ParserEvent) v; void visit(InputVisitor!(PullParser, ParserEvent) v) { this.v = v; parseRoot(); } private void emit(Event)(Event event) { v.yield( ParserEvent(event) ); } /// ::= EOF (Lookaheads: Anything) private void parseRoot() { //trace("Starting parse of file: ", lexer.filename); //trace(__FUNCTION__, ": ::= EOF (Lookaheads: Anything)"); auto startLocation = Location(lexer.filename, 0, 0, 0); parseTags(); auto token = lexer.front; if(token.matches!":"()) { lexer.popFront(); token = lexer.front; if(token.matches!"Ident"()) { error("Missing namespace. If you don't wish to use a namespace, then say '"~token.data~"', not ':"~token.data~"'"); assert(0); } else { error("Missing namespace. If you don't wish to use a namespace, then omit the ':'"); assert(0); } } else if(!token.matches!"EOF"()) error("Expected a tag or end-of-file, not " ~ token.symbol.name); } /// ::= (Lookaheads: Ident Value) /// | EOL (Lookaheads: EOL) /// | {empty} (Lookaheads: Anything else, except '{') void parseTags() { //trace("Enter ", __FUNCTION__); while(true) { auto token = lexer.front; if(token.matches!"Ident"() || token.matches!"Value"()) { //trace(__FUNCTION__, ": ::= (Lookaheads: Ident Value)"); parseTag(); continue; } else if(token.matches!"EOL"()) { //trace(__FUNCTION__, ": ::= EOL (Lookaheads: EOL)"); lexer.popFront(); continue; } else if(token.matches!"{"()) { error("Found start of child block, but no tag name. If you intended an anonymous "~ "tag, you must have at least one value before any attributes or child tags."); } else { //trace(__FUNCTION__, ": ::= {empty} (Lookaheads: Anything else, except '{')"); break; } } } /// /// ::= (Lookaheads: Ident) /// | (Lookaheads: Value) void parseTag() { auto token = lexer.front; if(token.matches!"Ident"()) { //trace(__FUNCTION__, ": ::= (Lookaheads: Ident)"); //trace("Found tag named: ", tag.fullName); auto id = parseIDFull(); emit( TagStartEvent(token.location, id.namespace, id.name) ); } else if(token.matches!"Value"()) { //trace(__FUNCTION__, ": ::= (Lookaheads: Value)"); //trace("Found anonymous tag."); emit( TagStartEvent(token.location, null, null) ); } else error("Expected tag name or value, not " ~ token.symbol.name); if(lexer.front.matches!"="()) error("Found attribute, but no tag name. If you intended an anonymous "~ "tag, you must have at least one value before any attributes."); parseValues(); parseAttributes(); parseOptChild(); parseTagTerminator(); emit( TagEndEvent() ); } /// ::= Ident (Lookaheads: Ident) IDFull parseIDFull() { auto token = lexer.front; if(token.matches!"Ident"()) { //trace(__FUNCTION__, ": ::= Ident (Lookaheads: Ident)"); lexer.popFront(); return parseIDSuffix(token.data); } else { error("Expected namespace or identifier, not " ~ token.symbol.name); assert(0); } } /// /// ::= ':' Ident (Lookaheads: ':') /// ::= {empty} (Lookaheads: Anything else) IDFull parseIDSuffix(string firstIdent) { auto token = lexer.front; if(token.matches!":"()) { //trace(__FUNCTION__, ": ::= ':' Ident (Lookaheads: ':')"); lexer.popFront(); token = lexer.front; if(token.matches!"Ident"()) { lexer.popFront(); return IDFull(firstIdent, token.data); } else { error("Expected name, not " ~ token.symbol.name); assert(0); } } else { //trace(__FUNCTION__, ": ::= {empty} (Lookaheads: Anything else)"); return IDFull("", firstIdent); } } /// /// ::= Value (Lookaheads: Value) /// | {empty} (Lookaheads: Anything else) void parseValues() { while(true) { auto token = lexer.front; if(token.matches!"Value"()) { //trace(__FUNCTION__, ": ::= Value (Lookaheads: Value)"); parseValue(); continue; } else { //trace(__FUNCTION__, ": ::= {empty} (Lookaheads: Anything else)"); break; } } } /// Handle Value terminals that aren't part of an attribute void parseValue() { auto token = lexer.front; if(token.matches!"Value"()) { //trace(__FUNCTION__, ": (Handle Value terminals that aren't part of an attribute)"); auto value = token.value; //trace("In tag '", parent.fullName, "', found value: ", value); emit( ValueEvent(token.location, value) ); lexer.popFront(); } else error("Expected value, not "~token.symbol.name); } /// /// ::= (Lookaheads: Ident) /// | {empty} (Lookaheads: Anything else) void parseAttributes() { while(true) { auto token = lexer.front; if(token.matches!"Ident"()) { //trace(__FUNCTION__, ": ::= (Lookaheads: Ident)"); parseAttribute(); continue; } else { //trace(__FUNCTION__, ": ::= {empty} (Lookaheads: Anything else)"); break; } } } /// ::= '=' Value (Lookaheads: Ident) void parseAttribute() { //trace(__FUNCTION__, ": ::= '=' Value (Lookaheads: Ident)"); auto token = lexer.front; if(!token.matches!"Ident"()) error("Expected attribute name, not "~token.symbol.name); auto id = parseIDFull(); token = lexer.front; if(!token.matches!"="()) error("Expected '=' after attribute name, not "~token.symbol.name); lexer.popFront(); token = lexer.front; if(!token.matches!"Value"()) error("Expected attribute value, not "~token.symbol.name); //trace("In tag '", parent.fullName, "', found attribute '", attr.fullName, "'"); emit( AttributeEvent(token.location, id.namespace, id.name, token.value) ); lexer.popFront(); } /// /// ::= '{' EOL '}' (Lookaheads: '{') /// | {empty} (Lookaheads: Anything else) void parseOptChild() { auto token = lexer.front; if(token.matches!"{") { //trace(__FUNCTION__, ": ::= '{' EOL '}' (Lookaheads: '{')"); lexer.popFront(); token = lexer.front; if(!token.matches!"EOL"()) error("Expected newline or semicolon after '{', not "~token.symbol.name); lexer.popFront(); parseTags(); token = lexer.front; if(!token.matches!"}"()) error("Expected '}' after child tags, not "~token.symbol.name); lexer.popFront(); } else { //trace(__FUNCTION__, ": ::= {empty} (Lookaheads: Anything else)"); // Do nothing, no error. } } /// /// ::= EOL (Lookahead: EOL) /// | {empty} (Lookahead: EOF) void parseTagTerminator() { auto token = lexer.front; if(token.matches!"EOL") { //trace(__FUNCTION__, ": ::= EOL (Lookahead: EOL)"); lexer.popFront(); } else if(token.matches!"EOF") { //trace(__FUNCTION__, ": ::= {empty} (Lookahead: EOF)"); // Do nothing } else error("Expected end of tag (newline, semicolon or end-of-file), not " ~ token.symbol.name); } } private struct DOMParser { Lexer lexer; Tag parseRoot() { auto currTag = new Tag(null, null, "root"); currTag.location = Location(lexer.filename, 0, 0, 0); auto parser = PullParser(lexer); auto eventRange = inputVisitor!ParserEvent( parser ); foreach(event; eventRange) final switch(event.kind) { case ParserEvent.Kind.tagStart: auto newTag = new Tag(currTag, event.namespace, event.name); newTag.location = event.location; currTag = newTag; break; case ParserEvent.Kind.tagEnd: currTag = currTag.parent; if(!currTag) parser.error("Internal Error: Received an extra TagEndEvent"); break; case ParserEvent.Kind.value: currTag.add((cast(ValueEvent)event).value); break; case ParserEvent.Kind.attribute: auto e = cast(AttributeEvent) event; auto attr = new Attribute(e.namespace, e.name, e.value, e.location); currTag.add(attr); break; } return currTag; } } // Other parser tests are part of the AST's tests over in the ast module. // Regression test, issue #13: https://github.com/Abscissa/SDLang-D/issues/13 // "Incorrectly accepts ":tagname" (blank namespace, tagname prefixed with colon)" @("parser: Regression test issue #13") unittest { import std.exception; assertThrown!ParseException(parseSource(`:test`)); assertThrown!ParseException(parseSource(`:4`)); } // Regression test, issue #16: https://github.com/Abscissa/SDLang-D/issues/16 @("parser: Regression test issue #16") unittest { // Shouldn't crash foreach(event; pullParseSource(`tag "data"`)) { if(event.kind == ParserEvent.Kind.tagStart) auto e = cast(TagStartEvent) event; } } // Regression test, issue #31: https://github.com/Abscissa/SDLang-D/issues/31 // "Escape sequence results in range violation error" @("parser: Regression test issue #31") unittest { // Shouldn't get a Range violation parseSource(`test "\"foo\""`); }