diff options
author | Ralph Amissah <ralph@amissah.com> | 2016-10-01 14:12:13 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2019-04-10 15:14:13 -0400 |
commit | ba1712e77b31704fd9ba16d14e15518e7a7dd104 (patch) | |
tree | 1a0d3233fb611b68dbf43e098a41a0d9378e9ace /src/sdlang/parser.d | |
parent | update sdlang, start looking to using dub remote dependencies (diff) |
0.7.0 using dub remote dependencies (local src related to sdlang removed)
Diffstat (limited to 'src/sdlang/parser.d')
-rw-r--r-- | src/sdlang/parser.d | 628 |
1 files changed, 0 insertions, 628 deletions
diff --git a/src/sdlang/parser.d b/src/sdlang/parser.d deleted file mode 100644 index c9b8d4f..0000000 --- a/src/sdlang/parser.d +++ /dev/null @@ -1,628 +0,0 @@ -// SDLang-D -// Written in the D programming language. - -module sdlang.parser; - -import std.file; - -import libInputVisitor; -import taggedalgebraic; - -import sdlang.ast; -import sdlang.exception; -import sdlang.lexer; -import sdlang.symbol; -import sdlang.token; -import sdlang.util; - -/// Returns root tag. -Tag parseFile(string filename) -{ - auto source = cast(string)read(filename); - return parseSource(source, filename); -} - -/// Returns root tag. The optional `filename` parameter can be included -/// so that the SDLang document's filename (if any) can be displayed with -/// any syntax error messages. -Tag parseSource(string source, string filename=null) -{ - auto lexer = new Lexer(source, filename); - auto parser = DOMParser(lexer); - return parser.parseRoot(); -} - -/++ -Parses an SDL document using StAX/Pull-style. Returns an InputRange with -element type ParserEvent. - -The pullParseFile version reads a file and parses it, while pullParseSource -parses a string passed in. The optional `filename` parameter in pullParseSource -can be included so that the SDLang document's filename (if any) can be displayed -with any syntax error messages. - -Note: The old FileStartEvent and FileEndEvent events -$(LINK2 https://github.com/Abscissa/SDLang-D/issues/17, were deemed unnessecary) -and removed as of SDLang-D v0.10.0. - -Note: Previously, in SDLang-D v0.9.x, ParserEvent was a -$(LINK2 http://dlang.org/phobos/std_variant.html#.Algebraic, std.variant.Algebraic). -As of SDLang-D v0.10.0, it is now a -$(LINK2 https://github.com/s-ludwig/taggedalgebraic, TaggedAlgebraic), -so usage has changed somewhat. - -Example: ------------------- -parent 12 attr="q" { - childA 34 - childB 56 -} -lastTag ------------------- - -The ParserEvent sequence emitted for that SDL document would be as -follows (indented for readability): ------------------- -TagStartEvent (parent) - ValueEvent (12) - AttributeEvent (attr, "q") - TagStartEvent (childA) - ValueEvent (34) - TagEndEvent - TagStartEvent (childB) - ValueEvent (56) - TagEndEvent -TagEndEvent -TagStartEvent (lastTag) -TagEndEvent ------------------- -+/ -auto pullParseFile(string filename) -{ - auto source = cast(string)read(filename); - return parseSource(source, filename); -} - -///ditto -auto pullParseSource(string source, string filename=null) -{ - auto lexer = new Lexer(source, filename); - auto parser = PullParser(lexer); - return inputVisitor!ParserEvent( parser ); -} - -/// -@("pullParseFile/pullParseSource example") -unittest -{ - // stuff.sdl - immutable stuffSdl = ` - name "sdlang-d" - description "An SDL (Simple Declarative Language) library for D." - homepage "http://github.com/Abscissa/SDLang-D" - - configuration "library" { - targetType "library" - } - `; - - import std.stdio; - - foreach(event; pullParseSource(stuffSdl)) - final switch(event.kind) - { - case ParserEvent.Kind.tagStart: - auto e = cast(TagStartEvent) event; - writeln("TagStartEvent: ", e.namespace, ":", e.name, " @ ", e.location); - break; - - case ParserEvent.Kind.tagEnd: - auto e = cast(TagEndEvent) event; - writeln("TagEndEvent"); - break; - - case ParserEvent.Kind.value: - auto e = cast(ValueEvent) event; - writeln("ValueEvent: ", e.value); - break; - - case ParserEvent.Kind.attribute: - auto e = cast(AttributeEvent) event; - writeln("AttributeEvent: ", e.namespace, ":", e.name, "=", e.value); - break; - } -} - -private union ParserEventUnion -{ - TagStartEvent tagStart; - TagEndEvent tagEnd; - ValueEvent value; - AttributeEvent attribute; -} - -/++ -The element of the InputRange returned by pullParseFile and pullParseSource. - -This is a tagged union, built from the following: -------- -alias ParserEvent = TaggedAlgebraic!ParserEventUnion; -private union ParserEventUnion -{ - TagStartEvent tagStart; - TagEndEvent tagEnd; - ValueEvent value; - AttributeEvent attribute; -} -------- - -Note: The old FileStartEvent and FileEndEvent events -$(LINK2 https://github.com/Abscissa/SDLang-D/issues/17, were deemed unnessecary) -and removed as of SDLang-D v0.10.0. - -Note: Previously, in SDLang-D v0.9.x, ParserEvent was a -$(LINK2 http://dlang.org/phobos/std_variant.html#.Algebraic, std.variant.Algebraic). -As of SDLang-D v0.10.0, it is now a -$(LINK2 https://github.com/s-ludwig/taggedalgebraic, TaggedAlgebraic), -so usage has changed somewhat. -+/ -alias ParserEvent = TaggedAlgebraic!ParserEventUnion; - -/// -@("ParserEvent example") -unittest -{ - // Create - ParserEvent event1 = TagStartEvent(); - ParserEvent event2 = TagEndEvent(); - ParserEvent event3 = ValueEvent(); - ParserEvent event4 = AttributeEvent(); - - // Check type - assert(event1.kind == ParserEvent.Kind.tagStart); - assert(event2.kind == ParserEvent.Kind.tagEnd); - assert(event3.kind == ParserEvent.Kind.value); - assert(event4.kind == ParserEvent.Kind.attribute); - - // Cast to base type - auto e1 = cast(TagStartEvent) event1; - auto e2 = cast(TagEndEvent) event2; - auto e3 = cast(ValueEvent) event3; - auto e4 = cast(AttributeEvent) event4; - //auto noGood = cast(AttributeEvent) event1; // AssertError: event1 is a TagStartEvent, not AttributeEvent. - - // Use as base type. - // In many cases, no casting is even needed. - event1.name = "foo"; - //auto noGood = event3.name; // AssertError: ValueEvent doesn't have a member 'name'. - - // Final switch is supported: - final switch(event1.kind) - { - case ParserEvent.Kind.tagStart: break; - case ParserEvent.Kind.tagEnd: break; - case ParserEvent.Kind.value: break; - case ParserEvent.Kind.attribute: break; - } -} - -/// Event: Start of tag -struct TagStartEvent -{ - Location location; - string namespace; - string name; -} - -/// Event: End of tag -struct TagEndEvent -{ - //Location location; -} - -/// Event: Found a Value in the current tag -struct ValueEvent -{ - Location location; - Value value; -} - -/// Event: Found an Attribute in the current tag -struct AttributeEvent -{ - Location location; - string namespace; - string name; - Value value; -} - -// The actual pull parser -private struct PullParser -{ - private Lexer lexer; - - private struct IDFull - { - string namespace; - string name; - } - - private void error(string msg) - { - error(lexer.front.location, msg); - } - - private void error(Location loc, string msg) - { - throw new ParseException(loc, "Error: "~msg); - } - - private InputVisitor!(PullParser, ParserEvent) v; - - void visit(InputVisitor!(PullParser, ParserEvent) v) - { - this.v = v; - parseRoot(); - } - - private void emit(Event)(Event event) - { - v.yield( ParserEvent(event) ); - } - - /// <Root> ::= <Tags> EOF (Lookaheads: Anything) - private void parseRoot() - { - //trace("Starting parse of file: ", lexer.filename); - //trace(__FUNCTION__, ": <Root> ::= <Tags> EOF (Lookaheads: Anything)"); - - auto startLocation = Location(lexer.filename, 0, 0, 0); - - parseTags(); - - auto token = lexer.front; - if(token.matches!":"()) - { - lexer.popFront(); - token = lexer.front; - if(token.matches!"Ident"()) - { - error("Missing namespace. If you don't wish to use a namespace, then say '"~token.data~"', not ':"~token.data~"'"); - assert(0); - } - else - { - error("Missing namespace. If you don't wish to use a namespace, then omit the ':'"); - assert(0); - } - } - else if(!token.matches!"EOF"()) - error("Expected a tag or end-of-file, not " ~ token.symbol.name); - } - - /// <Tags> ::= <Tag> <Tags> (Lookaheads: Ident Value) - /// | EOL <Tags> (Lookaheads: EOL) - /// | {empty} (Lookaheads: Anything else, except '{') - void parseTags() - { - //trace("Enter ", __FUNCTION__); - while(true) - { - auto token = lexer.front; - if(token.matches!"Ident"() || token.matches!"Value"()) - { - //trace(__FUNCTION__, ": <Tags> ::= <Tag> <Tags> (Lookaheads: Ident Value)"); - parseTag(); - continue; - } - else if(token.matches!"EOL"()) - { - //trace(__FUNCTION__, ": <Tags> ::= EOL <Tags> (Lookaheads: EOL)"); - lexer.popFront(); - continue; - } - else if(token.matches!"{"()) - { - error("Found start of child block, but no tag name. If you intended an anonymous "~ - "tag, you must have at least one value before any attributes or child tags."); - } - else - { - //trace(__FUNCTION__, ": <Tags> ::= {empty} (Lookaheads: Anything else, except '{')"); - break; - } - } - } - - /// <Tag> - /// ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Ident) - /// | <Value> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Value) - void parseTag() - { - auto token = lexer.front; - if(token.matches!"Ident"()) - { - //trace(__FUNCTION__, ": <Tag> ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Ident)"); - //trace("Found tag named: ", tag.fullName); - auto id = parseIDFull(); - emit( TagStartEvent(token.location, id.namespace, id.name) ); - } - else if(token.matches!"Value"()) - { - //trace(__FUNCTION__, ": <Tag> ::= <Value> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Value)"); - //trace("Found anonymous tag."); - emit( TagStartEvent(token.location, null, null) ); - } - else - error("Expected tag name or value, not " ~ token.symbol.name); - - if(lexer.front.matches!"="()) - error("Found attribute, but no tag name. If you intended an anonymous "~ - "tag, you must have at least one value before any attributes."); - - parseValues(); - parseAttributes(); - parseOptChild(); - parseTagTerminator(); - - emit( TagEndEvent() ); - } - - /// <IDFull> ::= Ident <IDSuffix> (Lookaheads: Ident) - IDFull parseIDFull() - { - auto token = lexer.front; - if(token.matches!"Ident"()) - { - //trace(__FUNCTION__, ": <IDFull> ::= Ident <IDSuffix> (Lookaheads: Ident)"); - lexer.popFront(); - return parseIDSuffix(token.data); - } - else - { - error("Expected namespace or identifier, not " ~ token.symbol.name); - assert(0); - } - } - - /// <IDSuffix> - /// ::= ':' Ident (Lookaheads: ':') - /// ::= {empty} (Lookaheads: Anything else) - IDFull parseIDSuffix(string firstIdent) - { - auto token = lexer.front; - if(token.matches!":"()) - { - //trace(__FUNCTION__, ": <IDSuffix> ::= ':' Ident (Lookaheads: ':')"); - lexer.popFront(); - token = lexer.front; - if(token.matches!"Ident"()) - { - lexer.popFront(); - return IDFull(firstIdent, token.data); - } - else - { - error("Expected name, not " ~ token.symbol.name); - assert(0); - } - } - else - { - //trace(__FUNCTION__, ": <IDSuffix> ::= {empty} (Lookaheads: Anything else)"); - return IDFull("", firstIdent); - } - } - - /// <Values> - /// ::= Value <Values> (Lookaheads: Value) - /// | {empty} (Lookaheads: Anything else) - void parseValues() - { - while(true) - { - auto token = lexer.front; - if(token.matches!"Value"()) - { - //trace(__FUNCTION__, ": <Values> ::= Value <Values> (Lookaheads: Value)"); - parseValue(); - continue; - } - else - { - //trace(__FUNCTION__, ": <Values> ::= {empty} (Lookaheads: Anything else)"); - break; - } - } - } - - /// Handle Value terminals that aren't part of an attribute - void parseValue() - { - auto token = lexer.front; - if(token.matches!"Value"()) - { - //trace(__FUNCTION__, ": (Handle Value terminals that aren't part of an attribute)"); - auto value = token.value; - //trace("In tag '", parent.fullName, "', found value: ", value); - emit( ValueEvent(token.location, value) ); - - lexer.popFront(); - } - else - error("Expected value, not "~token.symbol.name); - } - - /// <Attributes> - /// ::= <Attribute> <Attributes> (Lookaheads: Ident) - /// | {empty} (Lookaheads: Anything else) - void parseAttributes() - { - while(true) - { - auto token = lexer.front; - if(token.matches!"Ident"()) - { - //trace(__FUNCTION__, ": <Attributes> ::= <Attribute> <Attributes> (Lookaheads: Ident)"); - parseAttribute(); - continue; - } - else - { - //trace(__FUNCTION__, ": <Attributes> ::= {empty} (Lookaheads: Anything else)"); - break; - } - } - } - - /// <Attribute> ::= <IDFull> '=' Value (Lookaheads: Ident) - void parseAttribute() - { - //trace(__FUNCTION__, ": <Attribute> ::= <IDFull> '=' Value (Lookaheads: Ident)"); - auto token = lexer.front; - if(!token.matches!"Ident"()) - error("Expected attribute name, not "~token.symbol.name); - - auto id = parseIDFull(); - - token = lexer.front; - if(!token.matches!"="()) - error("Expected '=' after attribute name, not "~token.symbol.name); - - lexer.popFront(); - token = lexer.front; - if(!token.matches!"Value"()) - error("Expected attribute value, not "~token.symbol.name); - - //trace("In tag '", parent.fullName, "', found attribute '", attr.fullName, "'"); - emit( AttributeEvent(token.location, id.namespace, id.name, token.value) ); - - lexer.popFront(); - } - - /// <OptChild> - /// ::= '{' EOL <Tags> '}' (Lookaheads: '{') - /// | {empty} (Lookaheads: Anything else) - void parseOptChild() - { - auto token = lexer.front; - if(token.matches!"{") - { - //trace(__FUNCTION__, ": <OptChild> ::= '{' EOL <Tags> '}' (Lookaheads: '{')"); - lexer.popFront(); - token = lexer.front; - if(!token.matches!"EOL"()) - error("Expected newline or semicolon after '{', not "~token.symbol.name); - - lexer.popFront(); - parseTags(); - - token = lexer.front; - if(!token.matches!"}"()) - error("Expected '}' after child tags, not "~token.symbol.name); - lexer.popFront(); - } - else - { - //trace(__FUNCTION__, ": <OptChild> ::= {empty} (Lookaheads: Anything else)"); - // Do nothing, no error. - } - } - - /// <TagTerminator> - /// ::= EOL (Lookahead: EOL) - /// | {empty} (Lookahead: EOF) - void parseTagTerminator() - { - auto token = lexer.front; - if(token.matches!"EOL") - { - //trace(__FUNCTION__, ": <TagTerminator> ::= EOL (Lookahead: EOL)"); - lexer.popFront(); - } - else if(token.matches!"EOF") - { - //trace(__FUNCTION__, ": <TagTerminator> ::= {empty} (Lookahead: EOF)"); - // Do nothing - } - else - error("Expected end of tag (newline, semicolon or end-of-file), not " ~ token.symbol.name); - } -} - -private struct DOMParser -{ - Lexer lexer; - - Tag parseRoot() - { - auto currTag = new Tag(null, null, "root"); - currTag.location = Location(lexer.filename, 0, 0, 0); - - auto parser = PullParser(lexer); - auto eventRange = inputVisitor!ParserEvent( parser ); - - foreach(event; eventRange) - final switch(event.kind) - { - case ParserEvent.Kind.tagStart: - auto newTag = new Tag(currTag, event.namespace, event.name); - newTag.location = event.location; - - currTag = newTag; - break; - - case ParserEvent.Kind.tagEnd: - currTag = currTag.parent; - - if(!currTag) - parser.error("Internal Error: Received an extra TagEndEvent"); - break; - - case ParserEvent.Kind.value: - currTag.add((cast(ValueEvent)event).value); - break; - - case ParserEvent.Kind.attribute: - auto e = cast(AttributeEvent) event; - auto attr = new Attribute(e.namespace, e.name, e.value, e.location); - currTag.add(attr); - break; - } - - return currTag; - } -} - -// Other parser tests are part of the AST's tests over in the ast module. - -// Regression test, issue #13: https://github.com/Abscissa/SDLang-D/issues/13 -// "Incorrectly accepts ":tagname" (blank namespace, tagname prefixed with colon)" -@("parser: Regression test issue #13") -unittest -{ - import std.exception; - assertThrown!ParseException(parseSource(`:test`)); - assertThrown!ParseException(parseSource(`:4`)); -} - -// Regression test, issue #16: https://github.com/Abscissa/SDLang-D/issues/16 -@("parser: Regression test issue #16") -unittest -{ - // Shouldn't crash - foreach(event; pullParseSource(`tag "data"`)) - { - if(event.kind == ParserEvent.Kind.tagStart) - auto e = cast(TagStartEvent) event; - } -} - -// Regression test, issue #31: https://github.com/Abscissa/SDLang-D/issues/31 -// "Escape sequence results in range violation error" -@("parser: Regression test issue #31") -unittest -{ - // Shouldn't get a Range violation - parseSource(`test "\"foo\""`); -} |