From 9bec897cdada305cae8ce78809dc3f9fe9cf8776 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 14 Jun 2016 23:25:36 -0400 Subject: step4.1 as step4 but extract header meta & make on first reading in document --- src/sdp/ao_header_extract.d | 334 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 334 insertions(+) create mode 100644 src/sdp/ao_header_extract.d (limited to 'src/sdp/ao_header_extract.d') diff --git a/src/sdp/ao_header_extract.d b/src/sdp/ao_header_extract.d new file mode 100644 index 0000000..7858406 --- /dev/null +++ b/src/sdp/ao_header_extract.d @@ -0,0 +1,334 @@ +/+ + extract header return json ++/ +template SiSUheaderExtract() { + private import + std.exception, + std.regex, + std.utf, + std.conv : to; + private import + ao_rgx; // ao_defaults.d + struct HeaderDocMetadataMakeJson { + mixin SiSUrgxInitFlags; + mixin RgxInit; + auto rgx = Rgx(); + enum State { off, on } + string hm, hs; + auto header_metadata_and_make_jsonstr( + string header, + JSONValue[string] dochead_meta, + JSONValue[string] dochead_make + ) + in { } + body { + scope(exit) { + destroy(header); + destroy(dochead_meta); + destroy(dochead_make); + } + if (auto t = match(header, rgx.head_main)) { + char[][] obj_spl = split( + cast(char[]) header, + rgx.line_delimiter_ws_strip + ); + auto hm = to!string(t.captures[1]); + if (match(hm, rgx.main_headers)) { + foreach (line; obj_spl) { + if (auto m = match(line, rgx.head_main)) { + if (!empty(m.captures[2])) { + if (hm == "creator") { + dochead_meta[hm]["author"].str = + to!string(m.captures[2]); + } else if (hm == "title") { + dochead_meta[hm]["main"].str = + to!string(m.captures[2]); + } else if (hm == "publisher") { + dochead_meta[hm]["name"].str = + to!string(m.captures[2]); + } + } + } else if (auto s = match(line, rgx.head_sub)) { + if (!empty(s.captures[2])) { + auto hs = to!string(s.captures[1]); + if ((hm == "make" ) + && (dochead_make[hm].type() == JSON_TYPE.OBJECT)) { + switch (hm) { + case "make": + if (match(hs, rgx.subhead_make)) { + if (dochead_make[hm][hs].type() == JSON_TYPE.STRING) { + dochead_make[hm][hs].str = to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + default: + break; + } + } else if (dochead_meta[hm].type() == JSON_TYPE.OBJECT) { + switch (hm) { + case "creator": + if (match(hs, rgx.subhead_creator)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "title": + if (match(hs, rgx.subhead_title)) { + if ((hs == "subtitle") + && (dochead_meta[hm]["sub"].type() == JSON_TYPE.STRING)) { + dochead_meta[hm]["sub"].str = + to!string(s.captures[2]); + } else if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "rights": + if (match(hs, rgx.subhead_rights)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "date": + if (match(hs, rgx.subhead_date)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "original": + if (match(hs, rgx.subhead_original)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "classify": + if (match(hs, rgx.subhead_classify)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "identifier": + if (match(hs, rgx.subhead_identifier)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "notes": + if (match(hs, rgx.subhead_notes)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "publisher": + if (match(hs, rgx.subhead_publisher)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "links": + destroy(hm); + destroy(hs); + // if (match(hs, rgx.subhead_links)) { + // if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + // dochead_meta[hm][hs].str = to!string(s.captures[2]); + // } + // } else { + // writeln("not a valid header type:", hm, ":", hs); + // destroy(hm); + // destroy(hs); + // } + break; + default: + break; + } + } + } + } + } + } else { + writeln("not a valid header type:", hm); + } + } + auto t = tuple(dochead_meta, dochead_make); + static assert(!isTypeTuple!(t)); + return t; + } + private auto header_extract( + char[] line, + ref int[string] line_occur, + ref string[string] an_object, + ref int[string] type + ) { + if (matchFirst(line, rgx.header_make)) { + /+ matched header_make +/ + debug(header1) { // header + // tell_l("yellow", line); + } + type["header"] = State.on; + type["header_make"] = State.on; + type["header_meta"] = State.off; + ++line_occur["header_make"]; + an_object["obj"] ~= line ~= "\n"; + } else if (matchFirst(line, rgx.header_meta)) { + /+ matched header_metadata +/ + debug(header1) { // header + // tell_l("yellow", line); + } + type["header"] = State.on; + type["header_make"] = State.off; + type["header_meta"] = State.on; + ++line_occur["header_meta"]; + an_object["obj"] ~= line ~= "\n"; + } else if (type["header_make"] == State.on + && (line_occur["header_make"] > State.off)) { + /+ header_make flag set +/ + if (matchFirst(line, rgx.header_sub)) { + /+ sub-header +/ + debug(header1) { + // tell_l("yellow", line); + } + // type["header"] = State.on; + ++line_occur["header_make"]; + an_object["obj"] ~= line ~= "\n"; + } + } else if (type["header_meta"] == State.on + && (line_occur["header_meta"] > State.off)) { + /+ header_metadata flag set +/ + if (matchFirst(line, rgx.header_sub)) { + /+ sub-header +/ + debug(header1) { + // tell_l("yellow", line); + } + ++line_occur["header_meta"]; + an_object["obj"] ~= line ~= "\n"; + } + } + // return 0; + return an_object; + } + auto header_set_common( + ref int[string] line_occur, + ref string[string] an_object, + ref int[string] type + ) { + // line_occur["header"] = State.off; + line_occur["header_make"] = State.off; + line_occur["header_meta"] = State.off; + type["header"] = State.off; + // type["header_make"] = State.off; + // type["header_meta"] = State.off; + an_object.remove("obj"); + an_object.remove("is"); + an_object.remove("attrib"); + } + private auto headerContentJSON(in char[] src_header) { + auto type = flags_type_init; + type = [ + "header" : State.off, + "header_make" : State.off, + "header_meta" : State.off, + ]; + string[string] an_object; + int[string] line_occur; + auto dochead_make = parseJSON(header_make_jsonstr).object; + auto dochead_meta = parseJSON(header_meta_jsonstr).object; + auto set_header = HeaderDocMetadataMakeJson(); + char[][] source_header_arr = + split(cast(char[]) src_header, rgx.line_delimiter); + foreach(header_line; source_header_arr) { + if (auto m = matchFirst(header_line, rgx.comment)) { + /+ matched comment +/ + debug(comment) { + // tell_l("blue", header_line); + } + header_set_common(line_occur, an_object, type); + // type["header_make"] = State.off; + // type["header_meta"] = State.off; + } else if ((matchFirst(header_line, rgx.header)) + || (type["header_make"] == State.on + && (line_occur["header_make"] > State.off)) + || (type["header_meta"] == State.on + && (line_occur["header_meta"] > State.off))) { + if (header_line.length == 0) { + /+ header_make instructions (current line empty) +/ + auto dochead_metadata_and_make = + set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make); + static assert(!isTypeTuple!(dochead_metadata_and_make)); + dochead_meta = dochead_metadata_and_make[0]; + dochead_make = dochead_metadata_and_make[1]; + header_set_common(line_occur, an_object, type); + type["header_make"] = State.off; + type["header_meta"] = State.off; + writeln(dochead_metadata_and_make); + } else { + an_object = header_extract(header_line, line_occur, an_object, type); + } + } else { + // writeln(__LINE__); + } + } + auto t = tuple( + dochead_make, + dochead_meta, + ); + return t; + } + } +} -- cgit v1.2.3