From 9bec897cdada305cae8ce78809dc3f9fe9cf8776 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 14 Jun 2016 23:25:36 -0400 Subject: step4.1 as step4 but extract header meta & make on first reading in document --- src/sdp/ao_abstract_doc_source.d | 553 +++++++-------------------------------- 1 file changed, 93 insertions(+), 460 deletions(-) (limited to 'src/sdp/ao_abstract_doc_source.d') diff --git a/src/sdp/ao_abstract_doc_source.d b/src/sdp/ao_abstract_doc_source.d index 7539755..ca435ff 100644 --- a/src/sdp/ao_abstract_doc_source.d +++ b/src/sdp/ao_abstract_doc_source.d @@ -29,7 +29,6 @@ template SiSUdocAbstraction() { string[string] an_object, processing; auto set_abstract_object = ObjectAbstractSet(); - auto set_header = HeaderDocMetadataMakeJson(); auto note_section = NotesSection(); /+ enum +/ @@ -121,8 +120,11 @@ template SiSUdocAbstraction() { // mixin SiSUdocAbstractionFunctions; /+ ↓ abstract marked up document +/ - auto abstract_doc_source(char[][] markup_sourcefile_content) { - + auto abstract_doc_source( + char[][] markup_sourcefile_content, + JSONValue[string] dochead_make_json, + JSONValue[string] dochead_meta_json + ) { /+ ↓ abstraction init +/ scope(success) { } @@ -134,9 +136,11 @@ template SiSUdocAbstraction() { destroy(processing); destroy(biblio_arr_json); } + line_occur = [ + "heading" : 0, + "para" : 0, + ]; auto type = flags_type_init; - auto dochead_make = parseJSON(header_make_jsonstr).object; - auto dochead_meta = parseJSON(header_metadata_jsonstr).object; mixin ScreenTxtColors; int tell_lo(string color, int obj_cite_number, in char[] line) { writefln( @@ -242,8 +246,9 @@ template SiSUdocAbstraction() { /+ block object: code +/ code_block(line, an_object, type); continue; - } else if (!matchFirst(line, rgx.regular_parse_skip)) { - /+ object other than code block object (includes regular text paragraph) +/ + } else if (!matchFirst(line, rgx.skip_code_block_from_regular_parse)) { + /+ object other than "code block" object + (includes regular text paragraph, headings & blocks other than code) +/ if (((matchFirst(line, rgx.heading_biblio) || (type["heading_biblio"] == State.on))) && (!matchFirst(line, rgx.heading)) @@ -291,7 +296,7 @@ template SiSUdocAbstraction() { continue; } else if (!line.empty) { /+ line not empty +/ - /+ non blocks (headers, paragraphs) & closed blocks +/ + /+ non blocks (headings, paragraphs) & closed blocks +/ assert( !line.empty, "line tested, line not empty surely" @@ -330,26 +335,17 @@ template SiSUdocAbstraction() { set_abstract_object.contents_comment(strip(an_object["obj"])); header_set_common(line_occur, an_object, type); processing.remove("verse"); - type["header_make"] = State.off; - type["header_metadata"] = State.off; ++counter; - } else if ((matchFirst(line, rgx.header_make)) - || (matchFirst(line, rgx.header_metadata)) - || (type["header_make"] == State.on - && (line_occur["header_make"] > State.off)) - || (type["header_metadata"] == State.on - && (line_occur["header_metadata"] > State.off))) { - header_extract(line, line_occur, an_object, type); } else if (((line_occur["para"] == State.off) && (line_occur["heading"] == State.off)) && ((type["para"] == State.off) && (type["heading"] == State.off))) { /+ heading or para but neither flag nor line exists +/ - if ((to!string(dochead_make["make"]["headings"]).length > 2) + if ((to!string(dochead_make_json["make"]["headings"]).length > 2) && (type["make_headings"] == State.off)) { /+ heading found +/ auto dochead_make_headings = - to!string(dochead_make["make"]["headings"]); + to!string(dochead_make_json["make"]["headings"]); heading_found(line, dochead_make_headings, heading_match_str, heading_match_rgx, type); } if ((type["make_headings"] == State.on) @@ -362,27 +358,11 @@ template SiSUdocAbstraction() { } if (matchFirst(line, rgx.heading)) { /+ heading match +/ - heading_match(line, line_occur, an_object, lv, collapsed_lev, type, dochead_meta); + heading_matched(line, line_occur, an_object, lv, collapsed_lev, type, dochead_meta_json); } else if (line_occur["para"] == State.off) { /+ para match +/ - para_match(line, an_object, indent, bullet, type); - } - } else if (line_occur["header_make"] > State.off) { - /+ header_make +/ - // should be caught by sub-header - debug(header) { - tell_l("red", line); + para_match(line, an_object, indent, bullet, type, line_occur); } - an_object["obj"] ~= line ~= "\n"; - ++line_occur["header_make"]; - } else if (line_occur["header_metadata"] > State.off) { - /+ header_metadata +/ - // should be caught by sub-header - debug(header) { // para - tell_l("red", line); - } - an_object["obj"] ~= line ~= "\n"; - ++line_occur["header_metadata"]; } else if (line_occur["heading"] > State.off) { /+ heading +/ debug(heading) { // heading @@ -403,7 +383,7 @@ template SiSUdocAbstraction() { /+ line empty, with blocks flag +/ block_flag_line_empty(line, an_object, contents_the_objects, bookindex_unordered_hashes, obj_cite_number, node, counter, type, obj_cite_number_poem); // watch } else { - /+ line empty +/ + /+ line empty +/ /+ line.empty, post contents, empty variables: +/ assert( line.empty, @@ -413,29 +393,7 @@ template SiSUdocAbstraction() { (type["blocks"] == State.off), "code block status: none" ); - if ((type["header_make"] == State.on) - && (line_occur["header_make"] > State.off)) { - /+ header_make instructions (current line empty) +/ - auto dochead_metadata_and_make = - set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make); - static assert(!isTypeTuple!(dochead_metadata_and_make)); - dochead_meta = dochead_metadata_and_make[0]; - dochead_make = dochead_metadata_and_make[1]; - header_set_common(line_occur, an_object, type); - processing.remove("verse"); - } else if ((type["header_metadata"] == State.on) - && (line_occur["header_metadata"] > State.off)) { - /+ header_metadata (current line empty) +/ - auto dochead_metadata_and_make = - set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make); - static assert(!isTypeTuple!(dochead_meta)); - dochead_meta = dochead_metadata_and_make[0]; - dochead_make = dochead_metadata_and_make[1]; - header_set_common(line_occur, an_object, type); - type["header_make"] = State.off; - type["header_metadata"] = State.off; - processing.remove("verse"); - } else if ((type["heading"] == State.on) + if ((type["heading"] == State.on) && (line_occur["heading"] > State.off)) { /+ heading object (current line empty) +/ obj_cite_number = obj_cite_number_emit(type["obj_cite_number_status"]); @@ -474,8 +432,6 @@ template SiSUdocAbstraction() { // writeln(m.hit, "\n"); } header_set_common(line_occur, an_object, type); - type["header_make"] = State.off; - type["header_metadata"] = State.off; an_object.remove("lev"); an_object.remove("lev_markup_number"); // an_object["lev_markup_number"]="9"; @@ -518,8 +474,6 @@ template SiSUdocAbstraction() { // bullet // ); header_set_common(line_occur, an_object, type); - type["header_make"] = State.off; - type["header_metadata"] = State.off; indent["first"] = "0"; indent["second"] = "0"; bullet = false; @@ -535,21 +489,24 @@ template SiSUdocAbstraction() { } // close else for line empty } // close else for not the above } // close after non code, other blocks or regular text - if (((contents_the_objects[$-1].is_a == "para") - || (contents_the_objects[$-1].is_a == "heading")) - && (counter-1 > previous_count)) { - if (match(contents_the_objects[$-1].object, - rgx.inline_notes_delimiter_al_regular_number_note)) { - // endnotes/ footnotes for - // doc objects other than paragraphs & headings - // various forms of grouped text - previous_count=contents_the_objects.length -1; - note_section.gather_notes_for_endnote_section( - contents_the_objects, - contents_the_objects.length -1 - ); - // notes[notepoint]=note_section.notes_section(contents_the_objects, counter-1); - // notepoint +=1; + /+ unless (contents_the_objects.length == 0) ? +/ + if (contents_the_objects.length > 0) { + if (((contents_the_objects[$-1].is_a == "para") + || (contents_the_objects[$-1].is_a == "heading")) + && (counter-1 > previous_count)) { + if (match(contents_the_objects[$-1].object, + rgx.inline_notes_delimiter_al_regular_number_note)) { + // endnotes/ footnotes for + // doc objects other than paragraphs & headings + // various forms of grouped text + previous_count=contents_the_objects.length -1; + note_section.gather_notes_for_endnote_section( + contents_the_objects, + contents_the_objects.length -1 + ); + // notes[notepoint]=note_section.notes_section(contents_the_objects, counter-1); + // notepoint +=1; + } } } } /+ ← closed: loop markup document/text line by line +/ @@ -669,25 +626,22 @@ template SiSUdocAbstraction() { // struct Document { // char content; // char head_make; - // char head_metadata; + // char head_meta; // char bookindex_section; // char biblio; // } // struct Document { // char content; // char head_make; - // char head_metadata; + // char head_meta; // char bookindex_section; // char biblio; // } - auto t = - tuple( - document_the, - dochead_make, - dochead_meta, - bookindex_unordered_hashes, - biblio_ordered - ); + auto t = tuple( + document_the, + bookindex_unordered_hashes, + biblio_ordered + ); return t; /+ post loop markup document/text ↑ +/ @@ -707,13 +661,8 @@ template SiSUdocAbstraction() { ref int[string] type ) { // line_occur["header"] = State.off; - line_occur["header_make"] = State.off; - line_occur["header_metadata"] = State.off; line_occur["heading"] = State.off; line_occur["para"]= State.off; - type["header"] = State.off; - // type["header_make"] = State.off; - // type["header_metadata"] = State.off; type["heading"] = State.off; type["para"] = State.off; object_reset(an_object); @@ -1668,62 +1617,6 @@ template SiSUdocAbstraction() { } } } - auto header_extract( - char[] line, - ref int[string] line_occur, - ref string[string] an_object, - ref int[string] type - ) { - if (matchFirst(line, rgx.header_make)) { - /+ matched header_make +/ - debug(header1) { // header - tell_l("yellow", line); - } - type["header"] = State.on; - type["header_make"] = State.on; - type["header_metadata"] = State.off; - type["heading"] = State.off; - type["para"] = State.off; - ++line_occur["header_make"]; - an_object["obj"] ~= line ~= "\n"; - } else if (matchFirst(line, rgx.header_metadata)) { - /+ matched header_metadata +/ - debug(header1) { // header - tell_l("yellow", line); - } - type["header"] = State.on; - type["header_make"] = State.off; - type["header_metadata"] = State.on; - type["heading"] = State.off; - type["para"] = State.off; - ++line_occur["header_metadata"]; - an_object["obj"] ~= line ~= "\n"; - } else if (type["header_make"] == State.on - && (line_occur["header_make"] > State.off)) { - /+ header_make flag set +/ - if (matchFirst(line, rgx.header_sub)) { - /+ sub-header +/ - debug(header1) { - tell_l("yellow", line); - } - // type["header"] = State.on; - ++line_occur["header_make"]; - an_object["obj"] ~= line ~= "\n"; - } - } else if (type["header_metadata"] == State.on - && (line_occur["header_metadata"] > State.off)) { - /+ header_metadata flag set +/ - if (matchFirst(line, rgx.header_sub)) { - /+ sub-header +/ - debug(header1) { - tell_l("yellow", line); - } - ++line_occur["header_metadata"]; - an_object["obj"] ~= line ~= "\n"; - } - } - return 0; - } auto heading_found( char[] line, string dochead_make_headings, @@ -1868,31 +1761,31 @@ template SiSUdocAbstraction() { } } } - auto heading_match( + auto heading_matched( char[] line, ref int[string] line_occur, ref string[string] an_object, ref int[string] lv, ref int[string] collapsed_lev, ref int[string] type, - ref JSONValue[string] dochead_meta + ref JSONValue[string] dochead_meta_json ) { if (auto m = match(line, rgx.heading)) { /+ heading match +/ type["heading"] = State.on; - type["header"] = State.off; - type["header_make"] = State.off; - type["header_metadata"] = State.off; type["heading_biblio"] = State.off; type["para"] = State.off; ++line_occur["heading"]; an_object["obj"] ~= line ~= "\n"; an_object["lev"] ~= m.captures[1]; + // writeln("an object level: ", an_object); assertions_doc_structure(an_object, lv); // includes most of the logic for collapsed levels switch (an_object["lev"]) { case "A": - an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_title, to!string(dochead_meta["title"]["main"])); - an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_author, to!string(dochead_meta["creator"]["author"])); + an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_title, to!string(dochead_meta_json["title"]["main"])); + an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_author, to!string(dochead_meta_json["creator"]["author"])); + // an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_title, to!string(parseJSON(dochead_meta_json["title"]["main"]))); + // an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_author, to!string(parseJSON(dochead_meta_json["creator"]["author"]))); collapsed_lev["h0"] = 1; an_object["lev_collapsed_number"] = to!string(collapsed_lev["h0"]); @@ -2014,7 +1907,8 @@ template SiSUdocAbstraction() { ref string[string] an_object, ref string[string] indent, ref bool bullet, - ref int[string] type + ref int[string] type, + ref int[string] line_occur ) { if (line_occur["para"] == State.off) { /+ para matches +/ @@ -2141,28 +2035,29 @@ template SiSUdocAbstraction() { } invariant() { } - string header_make(string obj_txt_in) - in { } - body { - obj_txt["munge"]=obj_txt_in; - obj_txt["attrib"] = " \"use\": \"head\"," - ~ " \"of\": \"header\"," - ~ " \"is\": \"header_make\""; - return obj_txt["attrib"]; - } - invariant() { - } - string header_metadata(string obj_txt_in) - in { } - body { - obj_txt["munge"]=obj_txt_in; - obj_txt["attrib"] = " \"use\": \"head\"," - ~ " \"of\": \"header\"," - ~ " \"is\": \"header_metadata\""; - return obj_txt["attrib"]; - } - invariant() { - } + /+ revist +/ + // string header_make(string obj_txt_in) + // in { } + // body { + // obj_txt["munge"]=obj_txt_in; + // obj_txt["attrib"] = " \"use\": \"head\"," + // ~ " \"of\": \"header\"," + // ~ " \"is\": \"header_make\""; + // return obj_txt["attrib"]; + // } + // invariant() { + // } + // string header_meta(string obj_txt_in) + // in { } + // body { + // obj_txt["munge"]=obj_txt_in; + // obj_txt["attrib"] = " \"use\": \"head\"," + // ~ " \"of\": \"header\"," + // ~ " \"is\": \"header_metadata\""; + // return obj_txt["attrib"]; + // } + // invariant() { + // } string code(string obj_txt_in) in { } body { @@ -2350,22 +2245,23 @@ template SiSUdocAbstraction() { } invariant() { } - string header_make(string obj_txt_in) - in { } - body { - obj_txt["munge"]=obj_txt_in; - return obj_txt["munge"]; - } - invariant() { - } - string header_metadata(string obj_txt_in) - in { } - body { - obj_txt["munge"]=obj_txt_in; - return obj_txt["munge"]; - } - invariant() { - } + /+ revisit +/ + // string header_make(string obj_txt_in) + // in { } + // body { + // obj_txt["munge"]=obj_txt_in; + // return obj_txt["munge"]; + // } + // invariant() { + // } + // string header_meta(string obj_txt_in) + // in { } + // body { + // obj_txt["munge"]=obj_txt_in; + // return obj_txt["munge"]; + // } + // invariant() { + // } string code(string obj_txt_in) in { } body { @@ -2438,12 +2334,6 @@ template SiSUdocAbstraction() { ? obj_txt["munge"] : strip(obj_txt["munge"]); switch (obj_is_) { - case "header_make": - obj_txt["munge"]=munge.header_make(obj_txt["munge"]); - break; - case "header_metadata": - obj_txt["munge"]=munge.header_metadata(obj_txt["munge"]); - break; case "heading": obj_txt["munge"]=munge.heading(obj_txt["munge"]); break; @@ -2500,12 +2390,6 @@ template SiSUdocAbstraction() { obj_attrib.remove("json"); obj_attrib["json"] ="{"; switch (obj_is_) { - case "header_make": - obj_attrib["json"] ~= attrib.header_make(obj_raw); - break; - case "header_metadata": - obj_attrib["json"] ~= attrib.header_metadata(obj_raw); - break; case "heading": obj_attrib["json"] ~= attrib.heading(obj_raw); // break; @@ -2575,257 +2459,6 @@ template SiSUdocAbstraction() { invariant() { } } - struct HeaderDocMetadataMakeJson { - // class HeaderMetadataMakeHash : AssertHeaderMetadataMakeJson { - auto rgx = Rgx(); - string hm, hs; - auto header_metadata_and_make_jsonstr( - string header, - JSONValue[string] dochead_meta, - JSONValue[string] dochead_make - ) - in { } - body { - scope(exit) { - destroy(header); - destroy(dochead_meta); - destroy(dochead_make); - } - if (auto t = match(header, rgx.head_main)) { - char[][] obj_spl = split( - cast(char[]) header, - rgx.line_delimiter_ws_strip - ); - auto hm = to!string(t.captures[1]); - if (match(hm, rgx.main_headers)) { - foreach (line; obj_spl) { - if (auto m = match(line, rgx.head_main)) { - if (!empty(m.captures[2])) { - if (hm == "creator") { - dochead_meta[hm]["author"].str = - to!string(m.captures[2]); - } else if (hm == "title") { - dochead_meta[hm]["main"].str = - to!string(m.captures[2]); - } else if (hm == "publisher") { - dochead_meta[hm]["name"].str = - to!string(m.captures[2]); - } - } - } else if (auto s = match(line, rgx.head_sub)) { - if (!empty(s.captures[2])) { - auto hs = to!string(s.captures[1]); - if ((hm == "make" ) - && (dochead_make[hm].type() == JSON_TYPE.OBJECT)) { - switch (hm) { - case "make": - if (match(hs, rgx.subhead_make)) { - if (dochead_make[hm][hs].type() == JSON_TYPE.STRING) { - dochead_make[hm][hs].str = to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - default: - break; - } - } else if (dochead_meta[hm].type() == JSON_TYPE.OBJECT) { - switch (hm) { - case "creator": - if (match(hs, rgx.subhead_creator)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "title": - if (match(hs, rgx.subhead_title)) { - if ((hs == "subtitle") - && (dochead_meta[hm]["sub"].type() == JSON_TYPE.STRING)) { - dochead_meta[hm]["sub"].str = - to!string(s.captures[2]); - } else if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "rights": - if (match(hs, rgx.subhead_rights)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "date": - if (match(hs, rgx.subhead_date)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "original": - if (match(hs, rgx.subhead_original)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "classify": - if (match(hs, rgx.subhead_classify)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "identifier": - if (match(hs, rgx.subhead_identifier)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "notes": - if (match(hs, rgx.subhead_notes)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "publisher": - if (match(hs, rgx.subhead_publisher)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "links": - destroy(hm); - destroy(hs); - // if (match(hs, rgx.subhead_links)) { - // if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - // dochead_meta[hm][hs].str = to!string(s.captures[2]); - // } - // } else { - // writeln("not a valid header type:", hm, ":", hs); - // destroy(hm); - // destroy(hs); - // } - break; - default: - break; - } - } - } - } - } - } else { - writeln("not a valid header type:", hm); - } - } - auto t = tuple(dochead_meta, dochead_make); - static assert(!isTypeTuple!(t)); - return t; - } - // invariant() { - // } - } - class HeaderMetadataMakeHash { - // class HeaderMetadataMakeHash : AssertHeaderMetadataMakeHash { - auto rgx = Rgx(); - string header_main; - string[string] head; - string[string] header_topic_hash(string header) - in { } - body { - if (auto t = match(header, rgx.head_main)) { - char[][] obj_spl = split( - cast(char[]) header, - rgx.line_delimiter_ws_strip - ); - auto header_main = to!string(t.captures[1]); - head[header_main] = "{"; - foreach (line; obj_spl) { - if (auto m = match(line, rgx.head_main)) { - if (!empty(m.captures[2])) { - head[header_main] ~= - "\"" ~ header_main ~ - "\": \"" ~ - to!string(m.captures[2]) ~ - "\","; - } - } else if (auto s = match(line, rgx.head_sub)) { - head[header_main] ~= "\"" ~ s.captures[1] ~ "\":"; - if (!empty(s.captures[2])) { - head[header_main] ~= "\"" ~ s.captures[2] ~ "\","; - } - } - } - head[header_main] = replaceFirst( - head[header_main], - rgx.tailing_comma, - "" - ); - head[header_main] ~= "}"; - debug(headerjson) { - JSONValue j = parseJSON(head[header_main]); - assert( - (j.type == JSON_TYPE.OBJECT) - ); - } - } - return head; - } - invariant() { - } - } struct BookIndexNuggetHash { // class BookIndexNuggetHash : AssertBookIndexNuggetHash { string main_term, sub_term, sub_term_bits; -- cgit v1.2.3