step5 sdlang used for config files and doc headers

author: Ralph Amissah <ralph@amissah.com> 2016-06-16 01:49:06 -0400
committer: Ralph Amissah <ralph@amissah.com> 2019-04-04 14:48:18 -0400
commit: 8ab7e935913c102fb039110e20b71f698a68c6ee (patch)
tree: 3472debd16ce656a57150399ce666e248565f011 /src/sdlang/parser.d
parent: step4.1 as step4 but extract header meta & make on first reading in document (diff)
1 files changed, 551 insertions, 0 deletions
diff --git a/src/sdlang/parser.d b/src/sdlang/parser.d
new file mode 100644
index 0000000..ed8084a
--- /dev/null
+++ b/src/sdlang/parser.d
@@ -0,0 +1,551 @@
+// SDLang-D
+// Written in the D programming language.
+
+module sdlang.parser;
+
+import std.file;
+
+import libInputVisitor;
+
+import sdlang.ast;
+import sdlang.exception;
+import sdlang.lexer;
+import sdlang.symbol;
+import sdlang.token;
+import sdlang.util;
+
+/// Returns root tag.
+Tag parseFile(string filename)
+{
+	auto source = cast(string)read(filename);
+	return parseSource(source, filename);
+}
+
+/// Returns root tag. The optional 'filename' parameter can be included
+/// so that the SDL document's filename (if any) can be displayed with
+/// any syntax error messages.
+Tag parseSource(string source, string filename=null)
+{
+	auto lexer = new Lexer(source, filename);
+	auto parser = DOMParser(lexer);
+	return parser.parseRoot();
+}
+
+/++
+Parses an SDL document using StAX/Pull-style. Returns an InputRange with
+element type ParserEvent.
+
+The pullParseFile version reads a file and parses it, while pullParseSource
+parses a string passed in. The optional 'filename' parameter in pullParseSource
+can be included so that the SDL document's filename (if any) can be displayed
+with any syntax error messages.
+
+Warning! The FileStartEvent and FileEndEvent events *might* be removed later.
+See $(LINK https://github.com/Abscissa/SDLang-D/issues/17)
+
+Example:
+------------------
+parent 12 attr="q" {
+	childA 34
+	childB 56
+}
+lastTag
+------------------
+
+The ParserEvent sequence emitted for that SDL document would be as
+follows (indented for readability):
+------------------
+FileStartEvent
+	TagStartEvent (parent)
+		ValueEvent (12)
+		AttributeEvent (attr, "q")
+		TagStartEvent (childA)
+			ValueEvent (34)
+		TagEndEvent
+		TagStartEvent (childB)
+			ValueEvent (56)
+		TagEndEvent
+	TagEndEvent
+	TagStartEvent  (lastTag)
+	TagEndEvent
+FileEndEvent
+------------------
+
+Example:
+------------------
+foreach(event; pullParseFile("stuff.sdl"))
+{
+	import std.stdio;
+
+	if(event.peek!FileStartEvent())
+		writeln("FileStartEvent, starting! ");
+
+	else if(event.peek!FileEndEvent())
+		writeln("FileEndEvent, done! ");
+
+	else if(auto e = event.peek!TagStartEvent())
+		writeln("TagStartEvent: ", e.namespace, ":", e.name, " @ ", e.location);
+
+	else if(event.peek!TagEndEvent())
+		writeln("TagEndEvent");
+
+	else if(auto e = event.peek!ValueEvent())
+		writeln("ValueEvent: ", e.value);
+
+	else if(auto e = event.peek!AttributeEvent())
+		writeln("AttributeEvent: ", e.namespace, ":", e.name, "=", e.value);
+
+	else // Shouldn't happen
+		throw new Exception("Received unknown parser event");
+}
+------------------
++/
+auto pullParseFile(string filename)
+{
+	auto source = cast(string)read(filename);
+	return parseSource(source, filename);
+}
+
+///ditto
+auto pullParseSource(string source, string filename=null)
+{
+	auto lexer = new Lexer(source, filename);
+	auto parser = PullParser(lexer);
+	return inputVisitor!ParserEvent( parser );
+}
+
+/// The element of the InputRange returned by pullParseFile and pullParseSource:
+alias ParserEvent = std.variant.Algebraic!(
+	FileStartEvent,
+	FileEndEvent,
+	TagStartEvent,
+	TagEndEvent,
+	ValueEvent,
+	AttributeEvent,
+);
+
+/// Event: Start of file
+struct FileStartEvent
+{
+	Location location;
+}
+
+/// Event: End of file
+struct FileEndEvent
+{
+	Location location;
+}
+
+/// Event: Start of tag
+struct TagStartEvent
+{
+	Location location;
+	string namespace;
+	string name;
+}
+
+/// Event: End of tag
+struct TagEndEvent
+{
+	//Location location;
+}
+
+/// Event: Found a Value in the current tag
+struct ValueEvent
+{
+	Location location;
+	Value value;
+}
+
+/// Event: Found an Attribute in the current tag
+struct AttributeEvent
+{
+	Location location;
+	string namespace;
+	string name;
+	Value value;
+}
+
+// The actual pull parser
+private struct PullParser
+{
+	private Lexer lexer;
+	
+	private struct IDFull
+	{
+		string namespace;
+		string name;
+	}
+	
+	private void error(string msg)
+	{
+		error(lexer.front.location, msg);
+	}
+
+	private void error(Location loc, string msg)
+	{
+		throw new SDLangParseException(loc, "Error: "~msg);
+	}
+	
+	private InputVisitor!(PullParser, ParserEvent) v;
+	
+	void visit(InputVisitor!(PullParser, ParserEvent) v)
+	{
+		this.v = v;
+		parseRoot();
+	}
+	
+	private void emit(Event)(Event event)
+	{
+		v.yield( ParserEvent(event) );
+	}
+	
+	/// <Root> ::= <Tags> EOF  (Lookaheads: Anything)
+	private void parseRoot()
+	{
+		//trace("Starting parse of file: ", lexer.filename);
+		//trace(__FUNCTION__, ": <Root> ::= <Tags> EOF  (Lookaheads: Anything)");
+
+		auto startLocation = Location(lexer.filename, 0, 0, 0);
+		emit( FileStartEvent(startLocation) );
+
+		parseTags();
+		
+		auto token = lexer.front;
+		if(!token.matches!"EOF"())
+			error("Expected end-of-file, not " ~ token.symbol.name);
+		
+		emit( FileEndEvent(token.location) );
+	}
+
+	/// <Tags> ::= <Tag> <Tags>  (Lookaheads: Ident Value)
+	///        |   EOL   <Tags>  (Lookaheads: EOL)
+	///        |   {empty}       (Lookaheads: Anything else, except '{')
+	void parseTags()
+	{
+		//trace("Enter ", __FUNCTION__);
+		while(true)
+		{
+			auto token = lexer.front;
+			if(token.matches!"Ident"() || token.matches!"Value"())
+			{
+				//trace(__FUNCTION__, ": <Tags> ::= <Tag> <Tags>  (Lookaheads: Ident Value)");
+				parseTag();
+				continue;
+			}
+			else if(token.matches!"EOL"())
+			{
+				//trace(__FUNCTION__, ": <Tags> ::= EOL <Tags>  (Lookaheads: EOL)");
+				lexer.popFront();
+				continue;
+			}
+			else if(token.matches!"{"())
+			{
+				error("Anonymous tags must have at least one value. They cannot just have children and attributes only.");
+			}
+			else
+			{
+				//trace(__FUNCTION__, ": <Tags> ::= {empty}  (Lookaheads: Anything else, except '{')");
+				break;
+			}
+		}
+	}
+
+	/// <Tag>
+	///     ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Ident)
+	///     |   <Value>  <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Value)
+	void parseTag()
+	{
+		auto token = lexer.front;
+		if(token.matches!"Ident"())
+		{
+			//trace(__FUNCTION__, ": <Tag> ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Ident)");
+			//trace("Found tag named: ", tag.fullName);
+			auto id = parseIDFull();
+			emit( TagStartEvent(token.location, id.namespace, id.name) );
+		}
+		else if(token.matches!"Value"())
+		{
+			//trace(__FUNCTION__, ": <Tag> ::= <Value>  <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Value)");
+			//trace("Found anonymous tag.");
+			emit( TagStartEvent(token.location, null, null) );
+		}
+		else
+			error("Expected tag name or value, not " ~ token.symbol.name);
+
+		if(lexer.front.matches!"="())
+			error("Anonymous tags must have at least one value. They cannot just have attributes and children only.");
+
+		parseValues();
+		parseAttributes();
+		parseOptChild();
+		parseTagTerminator();
+		
+		emit( TagEndEvent() );
+	}
+
+	/// <IDFull> ::= Ident <IDSuffix>  (Lookaheads: Ident)
+	IDFull parseIDFull()
+	{
+		auto token = lexer.front;
+		if(token.matches!"Ident"())
+		{
+			//trace(__FUNCTION__, ": <IDFull> ::= Ident <IDSuffix>  (Lookaheads: Ident)");
+			lexer.popFront();
+			return parseIDSuffix(token.data);
+		}
+		else
+		{
+			error("Expected namespace or identifier, not " ~ token.symbol.name);
+			assert(0);
+		}
+	}
+
+	/// <IDSuffix>
+	///     ::= ':' Ident  (Lookaheads: ':')
+	///     ::= {empty}    (Lookaheads: Anything else)
+	IDFull parseIDSuffix(string firstIdent)
+	{
+		auto token = lexer.front;
+		if(token.matches!":"())
+		{
+			//trace(__FUNCTION__, ": <IDSuffix> ::= ':' Ident  (Lookaheads: ':')");
+			lexer.popFront();
+			token = lexer.front;
+			if(token.matches!"Ident"())
+			{
+				lexer.popFront();
+				return IDFull(firstIdent, token.data);
+			}
+			else
+			{
+				error("Expected name, not " ~ token.symbol.name);
+				assert(0);
+			}
+		}
+		else
+		{
+			//trace(__FUNCTION__, ": <IDSuffix> ::= {empty}  (Lookaheads: Anything else)");
+			return IDFull("", firstIdent);
+		}
+	}
+
+	/// <Values>
+	///     ::= Value <Values>  (Lookaheads: Value)
+	///     |   {empty}         (Lookaheads: Anything else)
+	void parseValues()
+	{
+		while(true)
+		{
+			auto token = lexer.front;
+			if(token.matches!"Value"())
+			{
+				//trace(__FUNCTION__, ": <Values> ::= Value <Values>  (Lookaheads: Value)");
+				parseValue();
+				continue;
+			}
+			else
+			{
+				//trace(__FUNCTION__, ": <Values> ::= {empty}  (Lookaheads: Anything else)");
+				break;
+			}
+		}
+	}
+
+	/// Handle Value terminals that aren't part of an attribute
+	void parseValue()
+	{
+		auto token = lexer.front;
+		if(token.matches!"Value"())
+		{
+			//trace(__FUNCTION__, ": (Handle Value terminals that aren't part of an attribute)");
+			auto value = token.value;
+			//trace("In tag '", parent.fullName, "', found value: ", value);
+			emit( ValueEvent(token.location, value) );
+			
+			lexer.popFront();
+		}
+		else
+			error("Expected value, not "~token.symbol.name);
+	}
+
+	/// <Attributes>
+	///     ::= <Attribute> <Attributes>  (Lookaheads: Ident)
+	///     |   {empty}                   (Lookaheads: Anything else)
+	void parseAttributes()
+	{
+		while(true)
+		{
+			auto token = lexer.front;
+			if(token.matches!"Ident"())
+			{
+				//trace(__FUNCTION__, ": <Attributes> ::= <Attribute> <Attributes>  (Lookaheads: Ident)");
+				parseAttribute();
+				continue;
+			}
+			else
+			{
+				//trace(__FUNCTION__, ": <Attributes> ::= {empty}  (Lookaheads: Anything else)");
+				break;
+			}
+		}
+	}
+
+	/// <Attribute> ::= <IDFull> '=' Value  (Lookaheads: Ident)
+	void parseAttribute()
+	{
+		//trace(__FUNCTION__, ": <Attribute> ::= <IDFull> '=' Value  (Lookaheads: Ident)");
+		auto token = lexer.front;
+		if(!token.matches!"Ident"())
+			error("Expected attribute name, not "~token.symbol.name);
+		
+		auto id = parseIDFull();
+		
+		token = lexer.front;
+		if(!token.matches!"="())
+			error("Expected '=' after attribute name, not "~token.symbol.name);
+		
+		lexer.popFront();
+		token = lexer.front;
+		if(!token.matches!"Value"())
+			error("Expected attribute value, not "~token.symbol.name);
+		
+		//trace("In tag '", parent.fullName, "', found attribute '", attr.fullName, "'");
+		emit( AttributeEvent(token.location, id.namespace, id.name, token.value) );
+		
+		lexer.popFront();
+	}
+
+	/// <OptChild>
+	///      ::= '{' EOL <Tags> '}'  (Lookaheads: '{')
+	///      |   {empty}             (Lookaheads: Anything else)
+	void parseOptChild()
+	{
+		auto token = lexer.front;
+		if(token.matches!"{")
+		{
+			//trace(__FUNCTION__, ": <OptChild> ::= '{' EOL <Tags> '}'  (Lookaheads: '{')");
+			lexer.popFront();
+			token = lexer.front;
+			if(!token.matches!"EOL"())
+				error("Expected newline or semicolon after '{', not "~token.symbol.name);
+			
+			lexer.popFront();
+			parseTags();
+			
+			token = lexer.front;
+			if(!token.matches!"}"())
+				error("Expected '}' after child tags, not "~token.symbol.name);
+			lexer.popFront();
+		}
+		else
+		{
+			//trace(__FUNCTION__, ": <OptChild> ::= {empty}  (Lookaheads: Anything else)");
+			// Do nothing, no error.
+		}
+	}
+	
+	/// <TagTerminator>
+	///     ::= EOL      (Lookahead: EOL)
+	///     |   {empty}  (Lookahead: EOF)
+	void parseTagTerminator()
+	{
+		auto token = lexer.front;
+		if(token.matches!"EOL")
+		{
+			//trace(__FUNCTION__, ": <TagTerminator> ::= EOL  (Lookahead: EOL)");
+			lexer.popFront();
+		}
+		else if(token.matches!"EOF")
+		{
+			//trace(__FUNCTION__, ": <TagTerminator> ::= {empty}  (Lookahead: EOF)");
+			// Do nothing
+		}
+		else
+			error("Expected end of tag (newline, semicolon or end-of-file), not " ~ token.symbol.name);
+	}
+}
+
+private struct DOMParser
+{
+	Lexer lexer;
+	
+	Tag parseRoot()
+	{
+		auto currTag = new Tag(null, null, "root");
+		currTag.location = Location(lexer.filename, 0, 0, 0);
+		
+		auto parser = PullParser(lexer);
+		auto eventRange = inputVisitor!ParserEvent( parser );
+		foreach(event; eventRange)
+		{
+			if(auto e = event.peek!TagStartEvent())
+			{
+				auto newTag = new Tag(currTag, e.namespace, e.name);
+				newTag.location = e.location;
+				
+				currTag = newTag;
+			}
+			else if(event.peek!TagEndEvent())
+			{
+				currTag = currTag.parent;
+
+				if(!currTag)
+					parser.error("Internal Error: Received an extra TagEndEvent");
+			}
+			else if(auto e = event.peek!ValueEvent())
+			{
+				currTag.add(e.value);
+			}
+			else if(auto e = event.peek!AttributeEvent())
+			{
+				auto attr = new Attribute(e.namespace, e.name, e.value, e.location);
+				currTag.add(attr);
+			}
+			else if(event.peek!FileStartEvent())
+			{
+				// Do nothing
+			}
+			else if(event.peek!FileEndEvent())
+			{
+				// There shouldn't be another parent.
+				if(currTag.parent)
+					parser.error("Internal Error: Unexpected end of file, not enough TagEndEvent");
+			}
+			else
+				parser.error("Internal Error: Received unknown parser event");
+		}
+		
+		return currTag;
+	}
+}
+
+// Other parser tests are part of the AST's tests over in the ast module.
+
+// Regression test, issue #16: https://github.com/Abscissa/SDLang-D/issues/16
+version(sdlangUnittest)
+unittest
+{
+	import std.stdio;
+	writeln("parser: Regression test issue #16...");
+	stdout.flush();
+
+	// Shouldn't crash
+	foreach(event; pullParseSource(`tag "data"`))
+	{
+		event.peek!FileStartEvent();
+	}
+}
+
+// Regression test, issue #31: https://github.com/Abscissa/SDLang-D/issues/31
+// "Escape sequence results in range violation error"
+version(sdlangUnittest)
+unittest
+{
+	import std.stdio;
+	writeln("parser: Regression test issue #31...");
+	stdout.flush();
+
+	// Shouldn't get a Range violation
+	parseSource(`test "\"foo\""`);
+}
author	Ralph Amissah <ralph@amissah.com>	2016-06-16 01:49:06 -0400
committer	Ralph Amissah <ralph@amissah.com>	2019-04-04 14:48:18 -0400
commit	8ab7e935913c102fb039110e20b71f698a68c6ee (patch)
tree	3472debd16ce656a57150399ce666e248565f011 /src/sdlang/parser.d
parent	step4.1 as step4 but extract header meta & make on first reading in document (diff)