aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/sdp/ao_header_extract.d
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2016-06-14 23:25:36 -0400
committerRalph Amissah <ralph@amissah.com>2019-04-04 14:48:18 -0400
commit9bec897cdada305cae8ce78809dc3f9fe9cf8776 (patch)
treed05d0e4f2d9a9b2c5273e0a33bbbdf84580543b2 /src/sdp/ao_header_extract.d
parentstep4 some additional work (diff)
step4.1 as step4 but extract header meta & make on first reading in documentdoc-reform_v0.0.4
Diffstat (limited to 'src/sdp/ao_header_extract.d')
-rw-r--r--src/sdp/ao_header_extract.d334
1 files changed, 334 insertions, 0 deletions
diff --git a/src/sdp/ao_header_extract.d b/src/sdp/ao_header_extract.d
new file mode 100644
index 0000000..7858406
--- /dev/null
+++ b/src/sdp/ao_header_extract.d
@@ -0,0 +1,334 @@
+/+
+ extract header return json
++/
+template SiSUheaderExtract() {
+ private import
+ std.exception,
+ std.regex,
+ std.utf,
+ std.conv : to;
+ private import
+ ao_rgx; // ao_defaults.d
+ struct HeaderDocMetadataMakeJson {
+ mixin SiSUrgxInitFlags;
+ mixin RgxInit;
+ auto rgx = Rgx();
+ enum State { off, on }
+ string hm, hs;
+ auto header_metadata_and_make_jsonstr(
+ string header,
+ JSONValue[string] dochead_meta,
+ JSONValue[string] dochead_make
+ )
+ in { }
+ body {
+ scope(exit) {
+ destroy(header);
+ destroy(dochead_meta);
+ destroy(dochead_make);
+ }
+ if (auto t = match(header, rgx.head_main)) {
+ char[][] obj_spl = split(
+ cast(char[]) header,
+ rgx.line_delimiter_ws_strip
+ );
+ auto hm = to!string(t.captures[1]);
+ if (match(hm, rgx.main_headers)) {
+ foreach (line; obj_spl) {
+ if (auto m = match(line, rgx.head_main)) {
+ if (!empty(m.captures[2])) {
+ if (hm == "creator") {
+ dochead_meta[hm]["author"].str =
+ to!string(m.captures[2]);
+ } else if (hm == "title") {
+ dochead_meta[hm]["main"].str =
+ to!string(m.captures[2]);
+ } else if (hm == "publisher") {
+ dochead_meta[hm]["name"].str =
+ to!string(m.captures[2]);
+ }
+ }
+ } else if (auto s = match(line, rgx.head_sub)) {
+ if (!empty(s.captures[2])) {
+ auto hs = to!string(s.captures[1]);
+ if ((hm == "make" )
+ && (dochead_make[hm].type() == JSON_TYPE.OBJECT)) {
+ switch (hm) {
+ case "make":
+ if (match(hs, rgx.subhead_make)) {
+ if (dochead_make[hm][hs].type() == JSON_TYPE.STRING) {
+ dochead_make[hm][hs].str = to!string(s.captures[2]);
+ }
+ } else {
+ writeln("not a valid header type:", hm, ":", hs);
+ destroy(hm);
+ destroy(hs);
+ }
+ break;
+ default:
+ break;
+ }
+ } else if (dochead_meta[hm].type() == JSON_TYPE.OBJECT) {
+ switch (hm) {
+ case "creator":
+ if (match(hs, rgx.subhead_creator)) {
+ if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) {
+ dochead_meta[hm][hs].str =
+ to!string(s.captures[2]);
+ }
+ } else {
+ writeln("not a valid header type:", hm, ":", hs);
+ destroy(hm);
+ destroy(hs);
+ }
+ break;
+ case "title":
+ if (match(hs, rgx.subhead_title)) {
+ if ((hs == "subtitle")
+ && (dochead_meta[hm]["sub"].type() == JSON_TYPE.STRING)) {
+ dochead_meta[hm]["sub"].str =
+ to!string(s.captures[2]);
+ } else if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) {
+ dochead_meta[hm][hs].str =
+ to!string(s.captures[2]);
+ }
+ } else {
+ writeln("not a valid header type:", hm, ":", hs);
+ destroy(hm);
+ destroy(hs);
+ }
+ break;
+ case "rights":
+ if (match(hs, rgx.subhead_rights)) {
+ if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) {
+ dochead_meta[hm][hs].str =
+ to!string(s.captures[2]);
+ }
+ } else {
+ writeln("not a valid header type:", hm, ":", hs);
+ destroy(hm);
+ destroy(hs);
+ }
+ break;
+ case "date":
+ if (match(hs, rgx.subhead_date)) {
+ if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) {
+ dochead_meta[hm][hs].str =
+ to!string(s.captures[2]);
+ }
+ } else {
+ writeln("not a valid header type:", hm, ":", hs);
+ destroy(hm);
+ destroy(hs);
+ }
+ break;
+ case "original":
+ if (match(hs, rgx.subhead_original)) {
+ if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) {
+ dochead_meta[hm][hs].str =
+ to!string(s.captures[2]);
+ }
+ } else {
+ writeln("not a valid header type:", hm, ":", hs);
+ destroy(hm);
+ destroy(hs);
+ }
+ break;
+ case "classify":
+ if (match(hs, rgx.subhead_classify)) {
+ if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) {
+ dochead_meta[hm][hs].str =
+ to!string(s.captures[2]);
+ }
+ } else {
+ writeln("not a valid header type:", hm, ":", hs);
+ destroy(hm);
+ destroy(hs);
+ }
+ break;
+ case "identifier":
+ if (match(hs, rgx.subhead_identifier)) {
+ if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) {
+ dochead_meta[hm][hs].str =
+ to!string(s.captures[2]);
+ }
+ } else {
+ writeln("not a valid header type:", hm, ":", hs);
+ destroy(hm);
+ destroy(hs);
+ }
+ break;
+ case "notes":
+ if (match(hs, rgx.subhead_notes)) {
+ if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) {
+ dochead_meta[hm][hs].str =
+ to!string(s.captures[2]);
+ }
+ } else {
+ writeln("not a valid header type:", hm, ":", hs);
+ destroy(hm);
+ destroy(hs);
+ }
+ break;
+ case "publisher":
+ if (match(hs, rgx.subhead_publisher)) {
+ if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) {
+ dochead_meta[hm][hs].str =
+ to!string(s.captures[2]);
+ }
+ } else {
+ writeln("not a valid header type:", hm, ":", hs);
+ destroy(hm);
+ destroy(hs);
+ }
+ break;
+ case "links":
+ destroy(hm);
+ destroy(hs);
+ // if (match(hs, rgx.subhead_links)) {
+ // if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) {
+ // dochead_meta[hm][hs].str = to!string(s.captures[2]);
+ // }
+ // } else {
+ // writeln("not a valid header type:", hm, ":", hs);
+ // destroy(hm);
+ // destroy(hs);
+ // }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+ }
+ } else {
+ writeln("not a valid header type:", hm);
+ }
+ }
+ auto t = tuple(dochead_meta, dochead_make);
+ static assert(!isTypeTuple!(t));
+ return t;
+ }
+ private auto header_extract(
+ char[] line,
+ ref int[string] line_occur,
+ ref string[string] an_object,
+ ref int[string] type
+ ) {
+ if (matchFirst(line, rgx.header_make)) {
+ /+ matched header_make +/
+ debug(header1) { // header
+ // tell_l("yellow", line);
+ }
+ type["header"] = State.on;
+ type["header_make"] = State.on;
+ type["header_meta"] = State.off;
+ ++line_occur["header_make"];
+ an_object["obj"] ~= line ~= "\n";
+ } else if (matchFirst(line, rgx.header_meta)) {
+ /+ matched header_metadata +/
+ debug(header1) { // header
+ // tell_l("yellow", line);
+ }
+ type["header"] = State.on;
+ type["header_make"] = State.off;
+ type["header_meta"] = State.on;
+ ++line_occur["header_meta"];
+ an_object["obj"] ~= line ~= "\n";
+ } else if (type["header_make"] == State.on
+ && (line_occur["header_make"] > State.off)) {
+ /+ header_make flag set +/
+ if (matchFirst(line, rgx.header_sub)) {
+ /+ sub-header +/
+ debug(header1) {
+ // tell_l("yellow", line);
+ }
+ // type["header"] = State.on;
+ ++line_occur["header_make"];
+ an_object["obj"] ~= line ~= "\n";
+ }
+ } else if (type["header_meta"] == State.on
+ && (line_occur["header_meta"] > State.off)) {
+ /+ header_metadata flag set +/
+ if (matchFirst(line, rgx.header_sub)) {
+ /+ sub-header +/
+ debug(header1) {
+ // tell_l("yellow", line);
+ }
+ ++line_occur["header_meta"];
+ an_object["obj"] ~= line ~= "\n";
+ }
+ }
+ // return 0;
+ return an_object;
+ }
+ auto header_set_common(
+ ref int[string] line_occur,
+ ref string[string] an_object,
+ ref int[string] type
+ ) {
+ // line_occur["header"] = State.off;
+ line_occur["header_make"] = State.off;
+ line_occur["header_meta"] = State.off;
+ type["header"] = State.off;
+ // type["header_make"] = State.off;
+ // type["header_meta"] = State.off;
+ an_object.remove("obj");
+ an_object.remove("is");
+ an_object.remove("attrib");
+ }
+ private auto headerContentJSON(in char[] src_header) {
+ auto type = flags_type_init;
+ type = [
+ "header" : State.off,
+ "header_make" : State.off,
+ "header_meta" : State.off,
+ ];
+ string[string] an_object;
+ int[string] line_occur;
+ auto dochead_make = parseJSON(header_make_jsonstr).object;
+ auto dochead_meta = parseJSON(header_meta_jsonstr).object;
+ auto set_header = HeaderDocMetadataMakeJson();
+ char[][] source_header_arr =
+ split(cast(char[]) src_header, rgx.line_delimiter);
+ foreach(header_line; source_header_arr) {
+ if (auto m = matchFirst(header_line, rgx.comment)) {
+ /+ matched comment +/
+ debug(comment) {
+ // tell_l("blue", header_line);
+ }
+ header_set_common(line_occur, an_object, type);
+ // type["header_make"] = State.off;
+ // type["header_meta"] = State.off;
+ } else if ((matchFirst(header_line, rgx.header))
+ || (type["header_make"] == State.on
+ && (line_occur["header_make"] > State.off))
+ || (type["header_meta"] == State.on
+ && (line_occur["header_meta"] > State.off))) {
+ if (header_line.length == 0) {
+ /+ header_make instructions (current line empty) +/
+ auto dochead_metadata_and_make =
+ set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make);
+ static assert(!isTypeTuple!(dochead_metadata_and_make));
+ dochead_meta = dochead_metadata_and_make[0];
+ dochead_make = dochead_metadata_and_make[1];
+ header_set_common(line_occur, an_object, type);
+ type["header_make"] = State.off;
+ type["header_meta"] = State.off;
+ writeln(dochead_metadata_and_make);
+ } else {
+ an_object = header_extract(header_line, line_occur, an_object, type);
+ }
+ } else {
+ // writeln(__LINE__);
+ }
+ }
+ auto t = tuple(
+ dochead_make,
+ dochead_meta,
+ );
+ return t;
+ }
+ }
+}