From 9bec897cdada305cae8ce78809dc3f9fe9cf8776 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 14 Jun 2016 23:25:36 -0400 Subject: step4.1 as step4 but extract header meta & make on first reading in document --- dub.sdl | 4 +- makefile | 5 +- maker.org | 14 +- org/ao_abstract_doc_source.org | 597 ++++++--------------------------------- org/ao_defaults.org | 37 +-- org/ao_header_extract.org | 438 ++++++++++++++++++++++++++++ org/ao_read_source_files.org | 115 ++++---- org/output.org | 4 +- org/sdp.org | 116 ++++---- src/sdp.d | 81 +++--- src/sdp/ao_abstract_doc_source.d | 553 ++++++------------------------------ src/sdp/ao_defaults.d | 6 +- src/sdp/ao_header_extract.d | 334 ++++++++++++++++++++++ src/sdp/ao_read_source_files.d | 103 +++---- src/sdp/ao_rgx.d | 27 +- src/sdp/output_hub.d | 4 +- views/version.txt | 2 +- 17 files changed, 1230 insertions(+), 1210 deletions(-) create mode 100644 org/ao_header_extract.org create mode 100644 src/sdp/ao_header_extract.d diff --git a/dub.sdl b/dub.sdl index 1af641b..7aa4621 100644 --- a/dub.sdl +++ b/dub.sdl @@ -7,7 +7,7 @@ license "AGPL-3+" targetPath "./bin" sourcePath "./src" stringImportPaths "./views" -buildRequirements "disallowDeprecations" +buildRequirements "disallowDeprecations" "allowWarnings" configuration "sdp-release" { name "release" targetType "executable" @@ -52,6 +52,8 @@ configuration "sdp-tmp" { targetName "sdp-tmp" debugVersions "tmp" debugVersions "crap" + debugVersions "header" + debugVersions "header1" buildOptions "debugMode" "debugInfo" "optimize" postGenerateCommands "/usr/bin/notify-send -t 0 'D (debug) executable ready' 'sdp-tmp'" } diff --git a/makefile b/makefile index a6c8b0b..283c44e 100644 --- a/makefile +++ b/makefile @@ -148,8 +148,9 @@ clean: rm $(PRG_BINDIR)/* expunge: - rm -rf $(PRG_BINDIR); \ - rm -rf $(PRG_SRCDIR)/$(PRG_NAME); + rm -f $(PRG_SRCDIR)/*; \ + rm -rf $(PRG_SRCDIR)/$(PRG_NAME); \ + rm -rf $(PRG_BINDIR); distclean: expunge diff --git a/maker.org b/maker.org index 2cc2ea7..5c7a0d3 100644 --- a/maker.org +++ b/maker.org @@ -109,6 +109,7 @@ Set debug flags using DMD standard flag -debug= e.g.: footnotesdone group header + header_and_content header1 headerjson headermakejson @@ -349,8 +350,9 @@ clean: rm $(PRG_BINDIR)/* expunge: - rm -rf $(PRG_BINDIR); \ - rm -rf $(PRG_SRCDIR)/$(PRG_NAME); + rm -f $(PRG_SRCDIR)/*; \ + rm -rf $(PRG_SRCDIR)/$(PRG_NAME); \ + rm -rf $(PRG_BINDIR); distclean: expunge @@ -435,7 +437,7 @@ license "AGPL-3+" targetPath "./bin" sourcePath "./src" stringImportPaths "./views" -buildRequirements "disallowDeprecations" +buildRequirements "disallowDeprecations" "allowWarnings" configuration "sdp-release" { name "release" targetType "executable" @@ -480,6 +482,8 @@ configuration "sdp-tmp" { targetName "sdp-tmp" debugVersions "tmp" debugVersions "crap" + debugVersions "header" + debugVersions "header1" buildOptions "debugMode" "debugInfo" "optimize" postGenerateCommands "/usr/bin/notify-send -t 0 'D (debug) executable ready' 'sdp-tmp'" } @@ -493,6 +497,10 @@ configuration "sdp-debug-clean" { } #+END_SRC +dflags platform="dmd" "-v --force -de -w -J./views -I./src/sdp -of./bin/sdp" +dflags platform="dmd" "-O -release" +#buildOptions "-O -release" + *** +dub.json+ :json: **** +sdp+ diff --git a/org/ao_abstract_doc_source.org b/org/ao_abstract_doc_source.org index 3c82dba..557cda0 100644 --- a/org/ao_abstract_doc_source.org +++ b/org/ao_abstract_doc_source.org @@ -53,7 +53,6 @@ ObjComposite[] contents_the_objects; string[string] an_object, processing; auto set_abstract_object = ObjectAbstractSet(); -auto set_header = HeaderDocMetadataMakeJson(); auto note_section = NotesSection(); /+ enum +/ @@ -157,9 +156,11 @@ scope(exit) { destroy(processing); destroy(biblio_arr_json); } +line_occur = [ + "heading" : 0, + "para" : 0, +]; auto type = flags_type_init; -auto dochead_make = parseJSON(header_make_jsonstr).object; -auto dochead_meta = parseJSON(header_metadata_jsonstr).object; mixin ScreenTxtColors; int tell_lo(string color, int obj_cite_number, in char[] line) { writefln( @@ -284,8 +285,9 @@ if (type["code"] == TriState.on) { #+name: abs_in_loop_body_00 #+BEGIN_SRC d -} else if (!matchFirst(line, rgx.regular_parse_skip)) { - /+ object other than code block object (includes regular text paragraph) +/ +} else if (!matchFirst(line, rgx.skip_code_block_from_regular_parse)) { + /+ object other than "code block" object + (includes regular text paragraph, headings & blocks other than code) +/ #+END_SRC ***** within block group [+1] :block:active: @@ -394,7 +396,7 @@ if (matchFirst(line, rgx.block_open)) { #+BEGIN_SRC d } else if (!line.empty) { /+ line not empty +/ - /+ non blocks (headers, paragraphs) & closed blocks +/ + /+ non blocks (headings, paragraphs) & closed blocks +/ #+END_SRC ******* asserts :assert: @@ -451,22 +453,8 @@ if (matchFirst(line, rgx.block_open)) { set_abstract_object.contents_comment(strip(an_object["obj"])); header_set_common(line_occur, an_object, type); processing.remove("verse"); - type["header_make"] = State.off; - type["header_metadata"] = State.off; ++counter; #+END_SRC -******** matched: header make :header: -#+name: abs_in_loop_body_not_block_obj -#+BEGIN_SRC d - } else if ((matchFirst(line, rgx.header_make)) - || (matchFirst(line, rgx.header_metadata)) - || (type["header_make"] == State.on - && (line_occur["header_make"] > State.off)) - || (type["header_metadata"] == State.on - && (line_occur["header_metadata"] > State.off))) { - header_extract(line, line_occur, an_object, type); -#+END_SRC - ******** flag not set & line not exist: heading or para :heading:paragraph: #+name: abs_in_loop_body_not_block_obj #+BEGIN_SRC d @@ -475,11 +463,11 @@ if (matchFirst(line, rgx.block_open)) { && ((type["para"] == State.off) && (type["heading"] == State.off))) { /+ heading or para but neither flag nor line exists +/ - if ((to!string(dochead_make["make"]["headings"]).length > 2) + if ((to!string(dochead_make_json["make"]["headings"]).length > 2) && (type["make_headings"] == State.off)) { /+ heading found +/ auto dochead_make_headings = - to!string(dochead_make["make"]["headings"]); + to!string(dochead_make_json["make"]["headings"]); heading_found(line, dochead_make_headings, heading_match_str, heading_match_rgx, type); } if ((type["make_headings"] == State.on) @@ -492,36 +480,13 @@ if (matchFirst(line, rgx.block_open)) { } if (matchFirst(line, rgx.heading)) { /+ heading match +/ - heading_match(line, line_occur, an_object, lv, collapsed_lev, type, dochead_meta); + heading_matched(line, line_occur, an_object, lv, collapsed_lev, type, dochead_meta_json); } else if (line_occur["para"] == State.off) { /+ para match +/ - para_match(line, an_object, indent, bullet, type); - } -#+END_SRC -******** line exist: header make :header:make: -#+name: abs_in_loop_body_not_block_obj -#+BEGIN_SRC d - } else if (line_occur["header_make"] > State.off) { - /+ header_make +/ - // should be caught by sub-header - debug(header) { - tell_l("red", line); - } - an_object["obj"] ~= line ~= "\n"; - ++line_occur["header_make"]; -#+END_SRC -******** line exist: header metadata :header:metadata: -#+name: abs_in_loop_body_not_block_obj -#+BEGIN_SRC d - } else if (line_occur["header_metadata"] > State.off) { - /+ header_metadata +/ - // should be caught by sub-header - debug(header) { // para - tell_l("red", line); + para_match(line, an_object, indent, bullet, type, line_occur); } - an_object["obj"] ~= line ~= "\n"; - ++line_occur["header_metadata"]; #+END_SRC + ******** line exist: heading :heading: #+name: abs_in_loop_body_not_block_obj #+BEGIN_SRC d @@ -560,7 +525,7 @@ if (matchFirst(line, rgx.block_open)) { #+name: abs_in_loop_body_not_block_obj #+BEGIN_SRC d } else { -/+ line empty +/ + /+ line empty +/ #+END_SRC ******* assert line empty :assert: @@ -578,45 +543,11 @@ assert( ); #+END_SRC -******* header_make instructions :header:make:instructions: - -#+name: abs_in_loop_body_not_block_obj_line_empty -#+BEGIN_SRC d -if ((type["header_make"] == State.on) -&& (line_occur["header_make"] > State.off)) { - /+ header_make instructions (current line empty) +/ - auto dochead_metadata_and_make = - set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make); - static assert(!isTypeTuple!(dochead_metadata_and_make)); - dochead_meta = dochead_metadata_and_make[0]; - dochead_make = dochead_metadata_and_make[1]; - header_set_common(line_occur, an_object, type); - processing.remove("verse"); -#+END_SRC - -******* header_metadata :header:metadata: - -#+name: abs_in_loop_body_not_block_obj_line_empty -#+BEGIN_SRC d -} else if ((type["header_metadata"] == State.on) -&& (line_occur["header_metadata"] > State.off)) { - /+ header_metadata (current line empty) +/ - auto dochead_metadata_and_make = - set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make); - static assert(!isTypeTuple!(dochead_meta)); - dochead_meta = dochead_metadata_and_make[0]; - dochead_make = dochead_metadata_and_make[1]; - header_set_common(line_occur, an_object, type); - type["header_make"] = State.off; - type["header_metadata"] = State.off; - processing.remove("verse"); -#+END_SRC - ******* heading object :heading:object: #+name: abs_in_loop_body_not_block_obj_line_empty #+BEGIN_SRC d -} else if ((type["heading"] == State.on) +if ((type["heading"] == State.on) && (line_occur["heading"] > State.off)) { /+ heading object (current line empty) +/ obj_cite_number = obj_cite_number_emit(type["obj_cite_number_status"]); @@ -655,8 +586,6 @@ if ((type["header_make"] == State.on) // writeln(m.hit, "\n"); } header_set_common(line_occur, an_object, type); - type["header_make"] = State.off; - type["header_metadata"] = State.off; an_object.remove("lev"); an_object.remove("lev_markup_number"); // an_object["lev_markup_number"]="9"; @@ -705,8 +634,6 @@ if ((type["header_make"] == State.on) // bullet // ); header_set_common(line_occur, an_object, type); - type["header_make"] = State.off; - type["header_metadata"] = State.off; indent["first"] = "0"; indent["second"] = "0"; bullet = false; @@ -734,21 +661,24 @@ if ((type["header_make"] == State.on) #+name: abs_in_loop_body_01 #+BEGIN_SRC d -if (((contents_the_objects[$-1].is_a == "para") -|| (contents_the_objects[$-1].is_a == "heading")) -&& (counter-1 > previous_count)) { - if (match(contents_the_objects[$-1].object, - rgx.inline_notes_delimiter_al_regular_number_note)) { - // endnotes/ footnotes for - // doc objects other than paragraphs & headings - // various forms of grouped text - previous_count=contents_the_objects.length -1; - note_section.gather_notes_for_endnote_section( - contents_the_objects, - contents_the_objects.length -1 - ); - // notes[notepoint]=note_section.notes_section(contents_the_objects, counter-1); - // notepoint +=1; +/+ unless (contents_the_objects.length == 0) ? +/ +if (contents_the_objects.length > 0) { + if (((contents_the_objects[$-1].is_a == "para") + || (contents_the_objects[$-1].is_a == "heading")) + && (counter-1 > previous_count)) { + if (match(contents_the_objects[$-1].object, + rgx.inline_notes_delimiter_al_regular_number_note)) { + // endnotes/ footnotes for + // doc objects other than paragraphs & headings + // various forms of grouped text + previous_count=contents_the_objects.length -1; + note_section.gather_notes_for_endnote_section( + contents_the_objects, + contents_the_objects.length -1 + ); + // notes[notepoint]=note_section.notes_section(contents_the_objects, counter-1); + // notepoint +=1; + } } } #+END_SRC @@ -903,14 +833,14 @@ destroy(bookindex_section); // struct Document { // char content; // char head_make; -// char head_metadata; +// char head_meta; // char bookindex_section; // char biblio; // } // struct Document { // char content; // char head_make; -// char head_metadata; +// char head_meta; // char bookindex_section; // char biblio; // } @@ -920,14 +850,11 @@ destroy(bookindex_section); #+name: abs_post #+BEGIN_SRC d -auto t = - tuple( - document_the, - dochead_make, - dochead_meta, - bookindex_unordered_hashes, - biblio_ordered - ); +auto t = tuple( + document_the, + bookindex_unordered_hashes, + biblio_ordered +); return t; #+END_SRC @@ -958,13 +885,8 @@ auto header_set_common( ref int[string] type ) { // line_occur["header"] = State.off; - line_occur["header_make"] = State.off; - line_occur["header_metadata"] = State.off; line_occur["heading"] = State.off; line_occur["para"]= State.off; - type["header"] = State.off; - // type["header_make"] = State.off; - // type["header_metadata"] = State.off; type["heading"] = State.off; type["para"] = State.off; object_reset(an_object); @@ -2074,67 +1996,6 @@ auto book_index( } #+END_SRC -** header extract :header: - -#+name: abs_functions -#+BEGIN_SRC d -auto header_extract( - char[] line, - ref int[string] line_occur, - ref string[string] an_object, - ref int[string] type -) { - if (matchFirst(line, rgx.header_make)) { - /+ matched header_make +/ - debug(header1) { // header - tell_l("yellow", line); - } - type["header"] = State.on; - type["header_make"] = State.on; - type["header_metadata"] = State.off; - type["heading"] = State.off; - type["para"] = State.off; - ++line_occur["header_make"]; - an_object["obj"] ~= line ~= "\n"; - } else if (matchFirst(line, rgx.header_metadata)) { - /+ matched header_metadata +/ - debug(header1) { // header - tell_l("yellow", line); - } - type["header"] = State.on; - type["header_make"] = State.off; - type["header_metadata"] = State.on; - type["heading"] = State.off; - type["para"] = State.off; - ++line_occur["header_metadata"]; - an_object["obj"] ~= line ~= "\n"; - } else if (type["header_make"] == State.on - && (line_occur["header_make"] > State.off)) { - /+ header_make flag set +/ - if (matchFirst(line, rgx.header_sub)) { - /+ sub-header +/ - debug(header1) { - tell_l("yellow", line); - } - // type["header"] = State.on; - ++line_occur["header_make"]; - an_object["obj"] ~= line ~= "\n"; - } - } else if (type["header_metadata"] == State.on - && (line_occur["header_metadata"] > State.off)) { - /+ header_metadata flag set +/ - if (matchFirst(line, rgx.header_sub)) { - /+ sub-header +/ - debug(header1) { - tell_l("yellow", line); - } - ++line_occur["header_metadata"]; - an_object["obj"] ~= line ~= "\n"; - } - } - return 0; -} -#+END_SRC ** heading or paragraph :heading:paragraph: *** heading found :heading: @@ -2297,31 +2158,31 @@ auto heading_make_set( #+name: abs_functions #+BEGIN_SRC d -auto heading_match( +auto heading_matched( char[] line, ref int[string] line_occur, ref string[string] an_object, ref int[string] lv, ref int[string] collapsed_lev, ref int[string] type, - ref JSONValue[string] dochead_meta + ref JSONValue[string] dochead_meta_json ) { if (auto m = match(line, rgx.heading)) { /+ heading match +/ type["heading"] = State.on; - type["header"] = State.off; - type["header_make"] = State.off; - type["header_metadata"] = State.off; type["heading_biblio"] = State.off; type["para"] = State.off; ++line_occur["heading"]; an_object["obj"] ~= line ~= "\n"; an_object["lev"] ~= m.captures[1]; + // writeln("an object level: ", an_object); assertions_doc_structure(an_object, lv); // includes most of the logic for collapsed levels switch (an_object["lev"]) { case "A": - an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_title, to!string(dochead_meta["title"]["main"])); - an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_author, to!string(dochead_meta["creator"]["author"])); + an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_title, to!string(dochead_meta_json["title"]["main"])); + an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_author, to!string(dochead_meta_json["creator"]["author"])); + // an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_title, to!string(parseJSON(dochead_meta_json["title"]["main"]))); + // an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_author, to!string(parseJSON(dochead_meta_json["creator"]["author"]))); collapsed_lev["h0"] = 1; an_object["lev_collapsed_number"] = to!string(collapsed_lev["h0"]); @@ -2449,7 +2310,8 @@ auto para_match( ref string[string] an_object, ref string[string] indent, ref bool bullet, - ref int[string] type + ref int[string] type, + ref int[string] line_occur ) { if (line_occur["para"] == State.off) { /+ para matches +/ @@ -2587,28 +2449,29 @@ struct ObjAttributes { } invariant() { } - string header_make(string obj_txt_in) - in { } - body { - obj_txt["munge"]=obj_txt_in; - obj_txt["attrib"] = " \"use\": \"head\"," - ~ " \"of\": \"header\"," - ~ " \"is\": \"header_make\""; - return obj_txt["attrib"]; - } - invariant() { - } - string header_metadata(string obj_txt_in) - in { } - body { - obj_txt["munge"]=obj_txt_in; - obj_txt["attrib"] = " \"use\": \"head\"," - ~ " \"of\": \"header\"," - ~ " \"is\": \"header_metadata\""; - return obj_txt["attrib"]; - } - invariant() { - } + /+ revist +/ + // string header_make(string obj_txt_in) + // in { } + // body { + // obj_txt["munge"]=obj_txt_in; + // obj_txt["attrib"] = " \"use\": \"head\"," + // ~ " \"of\": \"header\"," + // ~ " \"is\": \"header_make\""; + // return obj_txt["attrib"]; + // } + // invariant() { + // } + // string header_meta(string obj_txt_in) + // in { } + // body { + // obj_txt["munge"]=obj_txt_in; + // obj_txt["attrib"] = " \"use\": \"head\"," + // ~ " \"of\": \"header\"," + // ~ " \"is\": \"header_metadata\""; + // return obj_txt["attrib"]; + // } + // invariant() { + // } string code(string obj_txt_in) in { } body { @@ -2802,22 +2665,23 @@ struct ObjInlineMarkupMunge { } invariant() { } - string header_make(string obj_txt_in) - in { } - body { - obj_txt["munge"]=obj_txt_in; - return obj_txt["munge"]; - } - invariant() { - } - string header_metadata(string obj_txt_in) - in { } - body { - obj_txt["munge"]=obj_txt_in; - return obj_txt["munge"]; - } - invariant() { - } + /+ revisit +/ + // string header_make(string obj_txt_in) + // in { } + // body { + // obj_txt["munge"]=obj_txt_in; + // return obj_txt["munge"]; + // } + // invariant() { + // } + // string header_meta(string obj_txt_in) + // in { } + // body { + // obj_txt["munge"]=obj_txt_in; + // return obj_txt["munge"]; + // } + // invariant() { + // } string code(string obj_txt_in) in { } body { @@ -2896,12 +2760,6 @@ struct ObjInlineMarkup { ? obj_txt["munge"] : strip(obj_txt["munge"]); switch (obj_is_) { - case "header_make": - obj_txt["munge"]=munge.header_make(obj_txt["munge"]); - break; - case "header_metadata": - obj_txt["munge"]=munge.header_metadata(obj_txt["munge"]); - break; case "heading": obj_txt["munge"]=munge.heading(obj_txt["munge"]); break; @@ -2964,12 +2822,6 @@ struct ObjAttrib { obj_attrib.remove("json"); obj_attrib["json"] ="{"; switch (obj_is_) { - case "header_make": - obj_attrib["json"] ~= attrib.header_make(obj_raw); - break; - case "header_metadata": - obj_attrib["json"] ~= attrib.header_metadata(obj_raw); - break; case "heading": obj_attrib["json"] ~= attrib.heading(obj_raw); // break; @@ -3041,270 +2893,6 @@ struct ObjAttrib { } #+END_SRC -*** header document metadata :document:metadata: -**** header document metadata in json :json: - -#+name: ao_emitters -#+BEGIN_SRC d -struct HeaderDocMetadataMakeJson { -// class HeaderMetadataMakeHash : AssertHeaderMetadataMakeJson { - auto rgx = Rgx(); - string hm, hs; - auto header_metadata_and_make_jsonstr( - string header, - JSONValue[string] dochead_meta, - JSONValue[string] dochead_make - ) - in { } - body { - scope(exit) { - destroy(header); - destroy(dochead_meta); - destroy(dochead_make); - } - if (auto t = match(header, rgx.head_main)) { - char[][] obj_spl = split( - cast(char[]) header, - rgx.line_delimiter_ws_strip - ); - auto hm = to!string(t.captures[1]); - if (match(hm, rgx.main_headers)) { - foreach (line; obj_spl) { - if (auto m = match(line, rgx.head_main)) { - if (!empty(m.captures[2])) { - if (hm == "creator") { - dochead_meta[hm]["author"].str = - to!string(m.captures[2]); - } else if (hm == "title") { - dochead_meta[hm]["main"].str = - to!string(m.captures[2]); - } else if (hm == "publisher") { - dochead_meta[hm]["name"].str = - to!string(m.captures[2]); - } - } - } else if (auto s = match(line, rgx.head_sub)) { - if (!empty(s.captures[2])) { - auto hs = to!string(s.captures[1]); - if ((hm == "make" ) - && (dochead_make[hm].type() == JSON_TYPE.OBJECT)) { - switch (hm) { - case "make": - if (match(hs, rgx.subhead_make)) { - if (dochead_make[hm][hs].type() == JSON_TYPE.STRING) { - dochead_make[hm][hs].str = to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - default: - break; - } - } else if (dochead_meta[hm].type() == JSON_TYPE.OBJECT) { - switch (hm) { - case "creator": - if (match(hs, rgx.subhead_creator)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "title": - if (match(hs, rgx.subhead_title)) { - if ((hs == "subtitle") - && (dochead_meta[hm]["sub"].type() == JSON_TYPE.STRING)) { - dochead_meta[hm]["sub"].str = - to!string(s.captures[2]); - } else if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "rights": - if (match(hs, rgx.subhead_rights)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "date": - if (match(hs, rgx.subhead_date)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "original": - if (match(hs, rgx.subhead_original)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "classify": - if (match(hs, rgx.subhead_classify)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "identifier": - if (match(hs, rgx.subhead_identifier)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "notes": - if (match(hs, rgx.subhead_notes)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "publisher": - if (match(hs, rgx.subhead_publisher)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "links": - destroy(hm); - destroy(hs); - // if (match(hs, rgx.subhead_links)) { - // if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - // dochead_meta[hm][hs].str = to!string(s.captures[2]); - // } - // } else { - // writeln("not a valid header type:", hm, ":", hs); - // destroy(hm); - // destroy(hs); - // } - break; - default: - break; - } - } - } - } - } - } else { - writeln("not a valid header type:", hm); - } - } - auto t = tuple(dochead_meta, dochead_make); - static assert(!isTypeTuple!(t)); - return t; - } - // invariant() { - // } -} -#+END_SRC - -**** header document metadata as hash :hash: - -#+name: ao_emitters -#+BEGIN_SRC d -class HeaderMetadataMakeHash { -// class HeaderMetadataMakeHash : AssertHeaderMetadataMakeHash { - auto rgx = Rgx(); - string header_main; - string[string] head; - string[string] header_topic_hash(string header) - in { } - body { - if (auto t = match(header, rgx.head_main)) { - char[][] obj_spl = split( - cast(char[]) header, - rgx.line_delimiter_ws_strip - ); - auto header_main = to!string(t.captures[1]); - head[header_main] = "{"; - foreach (line; obj_spl) { - if (auto m = match(line, rgx.head_main)) { - if (!empty(m.captures[2])) { - head[header_main] ~= - "\"" ~ header_main ~ - "\": \"" ~ - to!string(m.captures[2]) ~ - "\","; - } - } else if (auto s = match(line, rgx.head_sub)) { - head[header_main] ~= "\"" ~ s.captures[1] ~ "\":"; - if (!empty(s.captures[2])) { - head[header_main] ~= "\"" ~ s.captures[2] ~ "\","; - } - } - } - head[header_main] = replaceFirst( - head[header_main], - rgx.tailing_comma, - "" - ); - head[header_main] ~= "}"; - debug(headerjson) { - JSONValue j = parseJSON(head[header_main]); - assert( - (j.type == JSON_TYPE.OBJECT) - ); - } - } - return head; - } - invariant() { - } -} -#+END_SRC - *** book index :book:index: **** book index nugget hash :hash:nugget: @@ -4451,8 +4039,11 @@ template SiSUdocAbstraction() { <> /+ ↓ abstract marked up document +/ - auto abstract_doc_source(char[][] markup_sourcefile_content) { - + auto abstract_doc_source( + char[][] markup_sourcefile_content, + JSONValue[string] dochead_make_json, + JSONValue[string] dochead_meta_json + ) { /+ ↓ abstraction init +/ <> /+ abstraction init ↑ +/ diff --git a/org/ao_defaults.org b/org/ao_defaults.org index 00fce41..dabc47f 100644 --- a/org/ao_defaults.org +++ b/org/ao_defaults.org @@ -19,7 +19,7 @@ ** template: header :header: #+name: ao_defaults_templates #+BEGIN_SRC d -template SiSUheader() { +template SiSUheaderSkel() { auto header_make_jsonstr = `{ "make": { "cover_image" : "", @@ -37,7 +37,7 @@ template SiSUheader() { "css" : "" } }`; - auto header_metadata_jsonstr = `{ + auto header_meta_jsonstr = `{ "creator": { "author" : "", "translator" : "", @@ -192,7 +192,7 @@ template SiSUrgxInitFlags() { int[string] flags_type_init = [ "make_headings" : 0, "header_make" : 0, - "header_metadata" : 0, + "header_meta" : 0, "heading" : 0, "heading_biblio" : 0, "para" : 0, @@ -348,13 +348,6 @@ template InternalMarkup() { /+ misc +/ static flag_action = ctRegex!(`^(--[a-z][a-z0-9-]+)$`); static flag_action_str = ctRegex!(` (--[a-z][a-z0-9-]+)`); -static src_pth = ctRegex!(`^([a-zA-Z0-9._-]+/)*([a-zA-Z0-9._-]+[.]ss[tm])$`); -static src_fn = - ctRegex!(`^([a-zA-Z0-9._-]+/)*(?P(?P[a-zA-Z0-9._-]+)[.](?Pss[tm]))$`); -static src_fn_master = ctRegex!(`^([a-zA-Z0-9._-]+/)*([a-zA-Z0-9._-]+[.]ssm)$`); -static src_fn_find_inserts = ctRegex!(`^([a-zA-Z0-9._-]+/)*([a-zA-Z0-9._-]+[.]ss[im])$`); -// static ssm_fn = ctRegex!(`^[a-zA-Z0-9._-]+[.]ssm$`); -static line_delimiter = ctRegex!("\n"); // static arr_delimiter = ctRegex!(`\s*[;]\s*`); static within_quotes = ctRegex!(`"(.+?)"`); static make_heading_delimiter = ctRegex!(`[;][ ]*`); @@ -364,6 +357,7 @@ static name_delimiter = ctRegex!(`^([^,]+)[ ]*,[ ]+(.+?)$`); static book_index_go = ctRegex!("([0-9]+)(?:-[0-9]+)?"); static trailing_comma = ctRegex!(",[ ]*$"); static trailing_linebreak = ctRegex!(",[ ]{1,2}\\\\\\\\\n[ ]{4}$","m"); +static line_delimiter = ctRegex!("\n"); static line_delimiter_ws_strip = ctRegex!("[ ]*\n[ ]*"); static line_delimiter_only = ctRegex!("^\n"); static para_delimiter = ctRegex!("\n[ ]*\n+"); @@ -371,11 +365,16 @@ static levels_markup = ctRegex!(`^[A-D1-4]$`); static levels_numbered = ctRegex!(`^[0-9]$`); static levels_numbered_headings = ctRegex!(`^[0-7]$`); #+END_SRC -** markup insert file :insert:file: +** filename (and path) matching (including markup insert file) :insert:file:path:filename: #+name: ao_rgx #+BEGIN_SRC d -/+ insert markup file +/ -// static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*([a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[ti])$`); +static src_pth = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); +static src_fn = + ctRegex!(`^([a-zA-Z0-9._-]+/)*(?P(?P[a-zA-Z0-9._-]+)[.](?Pss[tm]))$`); +static src_fn_master = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ssm)$`); +static src_fn_text = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]sst)$`); +static src_fn_insert = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ssi)$`); +static src_fn_find_inserts = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[im])$`); static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[ti])$`); // static insert_ssi_or_sst_fn = ctRegex!(`^<<\s*[a-zA-Z0-9._-]+[.]ss[ti]`); #+END_SRC @@ -384,6 +383,7 @@ static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P[a-zA-Z0-9._-]+/)*(? #+BEGIN_SRC d /+ comments +/ static comment = ctRegex!(`^%+ `); +static comments = ctRegex!(`^%+ |^%+$`); #+END_SRC ** header :header: #+name: ao_rgx @@ -391,17 +391,18 @@ static comment = ctRegex!(`^%+ `); /+ header +/ static header = ctRegex!(`^@([a-z_]+):(?:\s|$)`); static header_make = ctRegex!(`^@(make):(?:\s|$)`); -static header_metadata = ctRegex!(`^@([a-z_]+):(?:\s|$)`); +static header_meta = ctRegex!(`^@([a-z_]+):(?:\s|$)`); static header_sub = ctRegex!(`^[ ]+:([a-z_]+):\s`); -static head_main = ctRegex!(`^@([a-z_]+):\s*(.*)`, "m"); -static head_sub = ctRegex!(`^[ ]*:([a-z_]+):\s+(.+)`, "m"); +static head_main = ctRegex!(`^@(?P
[a-z_]+):\s*(?P.*)`, "m"); +static head_sub = ctRegex!(`^[ ]*:(?P[a-z_]+):\s+(?P.+)`, "m"); static head_value_title = ctRegex!(`@title`); static head_value_author = ctRegex!(`@author`); #+END_SRC -** header & paragraph operators :paragraph:operator: +** heading & paragraph operators :paragraph:operator: #+name: ao_rgx #+BEGIN_SRC d /+ heading & paragraph operators +/ +static heading_a = ctRegex!(`^:?[A][~] `, "m"); static heading = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?) `); static heading_marker = ctRegex!(`^:?([A-D1-4])[~]`); static heading_title = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`); @@ -541,7 +542,7 @@ static obj_cite_number_block_marks = ctRegex!(`^--[+~-]#$`); #+name: ao_rgx #+BEGIN_SRC d /+ ignore outside code blocks +/ -static regular_parse_skip = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); // not structural info +static skip_code_block_from_regular_parse = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); // not structural info #+END_SRC ** line & page breaks :break: #+name: ao_rgx diff --git a/org/ao_header_extract.org b/org/ao_header_extract.org new file mode 100644 index 0000000..d075c7c --- /dev/null +++ b/org/ao_header_extract.org @@ -0,0 +1,438 @@ +#+TITLE: sdp header extract +#+AUTHOR: Ralph Amissah +#+EMAIL: ralph.amissah@gmail.com +#+STARTUP: indent +#+LANGUAGE: en +#+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t +#+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc +#+OPTIONS: author:nil email:nil creator:nil timestamp:nil +#+PROPERTY: header-args :padline no :exports code :noweb yes +#+EXPORT_SELECT_TAGS: export +#+EXPORT_EXCLUDE_TAGS: noexport +#+FILETAGS: :sdp:niu:ao: +#+TAGS: assert(a) class(c) debug(d) mixin(m) sdp(s) tangle(T) template(t) WEB(W) noexport(n) + +[[./sdp.org][sdp]] [[./][org/]] +* header + +// mixin SiSUheader; +// auto set_header = HeaderDocMetadataMakeJson(); // reintroduce + +** header document metadata in json :json: + +#+name: ao_markup_header_extract +#+BEGIN_SRC d +auto header_metadata_and_make_jsonstr( + string header, + JSONValue[string] dochead_meta, + JSONValue[string] dochead_make +) +in { } +body { + scope(exit) { + destroy(header); + destroy(dochead_meta); + destroy(dochead_make); + } + if (auto t = match(header, rgx.head_main)) { + char[][] obj_spl = split( + cast(char[]) header, + rgx.line_delimiter_ws_strip + ); + auto hm = to!string(t.captures[1]); + if (match(hm, rgx.main_headers)) { + foreach (line; obj_spl) { + if (auto m = match(line, rgx.head_main)) { + if (!empty(m.captures[2])) { + if (hm == "creator") { + dochead_meta[hm]["author"].str = + to!string(m.captures[2]); + } else if (hm == "title") { + dochead_meta[hm]["main"].str = + to!string(m.captures[2]); + } else if (hm == "publisher") { + dochead_meta[hm]["name"].str = + to!string(m.captures[2]); + } + } + } else if (auto s = match(line, rgx.head_sub)) { + if (!empty(s.captures[2])) { + auto hs = to!string(s.captures[1]); + if ((hm == "make" ) + && (dochead_make[hm].type() == JSON_TYPE.OBJECT)) { + switch (hm) { + case "make": + if (match(hs, rgx.subhead_make)) { + if (dochead_make[hm][hs].type() == JSON_TYPE.STRING) { + dochead_make[hm][hs].str = to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + default: + break; + } + } else if (dochead_meta[hm].type() == JSON_TYPE.OBJECT) { + switch (hm) { + case "creator": + if (match(hs, rgx.subhead_creator)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "title": + if (match(hs, rgx.subhead_title)) { + if ((hs == "subtitle") + && (dochead_meta[hm]["sub"].type() == JSON_TYPE.STRING)) { + dochead_meta[hm]["sub"].str = + to!string(s.captures[2]); + } else if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "rights": + if (match(hs, rgx.subhead_rights)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "date": + if (match(hs, rgx.subhead_date)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "original": + if (match(hs, rgx.subhead_original)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "classify": + if (match(hs, rgx.subhead_classify)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "identifier": + if (match(hs, rgx.subhead_identifier)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "notes": + if (match(hs, rgx.subhead_notes)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "publisher": + if (match(hs, rgx.subhead_publisher)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "links": + destroy(hm); + destroy(hs); + // if (match(hs, rgx.subhead_links)) { + // if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + // dochead_meta[hm][hs].str = to!string(s.captures[2]); + // } + // } else { + // writeln("not a valid header type:", hm, ":", hs); + // destroy(hm); + // destroy(hs); + // } + break; + default: + break; + } + } + } + } + } + } else { + writeln("not a valid header type:", hm); + } + } + auto t = tuple(dochead_meta, dochead_make); + static assert(!isTypeTuple!(t)); + return t; +} +#+END_SRC + +** header extract +#+name: ao_markup_header_extract +#+BEGIN_SRC d +private auto header_extract( + char[] line, + ref int[string] line_occur, + ref string[string] an_object, + ref int[string] type +) { + if (matchFirst(line, rgx.header_make)) { + /+ matched header_make +/ + debug(header1) { // header + // tell_l("yellow", line); + } + type["header"] = State.on; + type["header_make"] = State.on; + type["header_meta"] = State.off; + ++line_occur["header_make"]; + an_object["obj"] ~= line ~= "\n"; + } else if (matchFirst(line, rgx.header_meta)) { + /+ matched header_metadata +/ + debug(header1) { // header + // tell_l("yellow", line); + } + type["header"] = State.on; + type["header_make"] = State.off; + type["header_meta"] = State.on; + ++line_occur["header_meta"]; + an_object["obj"] ~= line ~= "\n"; + } else if (type["header_make"] == State.on + && (line_occur["header_make"] > State.off)) { + /+ header_make flag set +/ + if (matchFirst(line, rgx.header_sub)) { + /+ sub-header +/ + debug(header1) { + // tell_l("yellow", line); + } + // type["header"] = State.on; + ++line_occur["header_make"]; + an_object["obj"] ~= line ~= "\n"; + } + } else if (type["header_meta"] == State.on + && (line_occur["header_meta"] > State.off)) { + /+ header_metadata flag set +/ + if (matchFirst(line, rgx.header_sub)) { + /+ sub-header +/ + debug(header1) { + // tell_l("yellow", line); + } + ++line_occur["header_meta"]; + an_object["obj"] ~= line ~= "\n"; + } + } + // return 0; + return an_object; +} +#+END_SRC + +** header array :header: +#+name: ao_markup_header_extract +#+BEGIN_SRC d +auto header_set_common( + ref int[string] line_occur, + ref string[string] an_object, + ref int[string] type +) { + // line_occur["header"] = State.off; + line_occur["header_make"] = State.off; + line_occur["header_meta"] = State.off; + type["header"] = State.off; + // type["header_make"] = State.off; + // type["header_meta"] = State.off; + an_object.remove("obj"); + an_object.remove("is"); + an_object.remove("attrib"); +} +private auto headerContentJSON(in char[] src_header) { + auto type = flags_type_init; + type = [ + "header" : State.off, + "header_make" : State.off, + "header_meta" : State.off, + ]; + string[string] an_object; + int[string] line_occur; + auto dochead_make = parseJSON(header_make_jsonstr).object; + auto dochead_meta = parseJSON(header_meta_jsonstr).object; + auto set_header = HeaderDocMetadataMakeJson(); + char[][] source_header_arr = + split(cast(char[]) src_header, rgx.line_delimiter); + foreach(header_line; source_header_arr) { + if (auto m = matchFirst(header_line, rgx.comment)) { + /+ matched comment +/ + debug(comment) { + // tell_l("blue", header_line); + } + header_set_common(line_occur, an_object, type); + // type["header_make"] = State.off; + // type["header_meta"] = State.off; + } else if ((matchFirst(header_line, rgx.header)) + || (type["header_make"] == State.on + && (line_occur["header_make"] > State.off)) + || (type["header_meta"] == State.on + && (line_occur["header_meta"] > State.off))) { + if (header_line.length == 0) { + /+ header_make instructions (current line empty) +/ + auto dochead_metadata_and_make = + set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make); + static assert(!isTypeTuple!(dochead_metadata_and_make)); + dochead_meta = dochead_metadata_and_make[0]; + dochead_make = dochead_metadata_and_make[1]; + header_set_common(line_occur, an_object, type); + type["header_make"] = State.off; + type["header_meta"] = State.off; + writeln(dochead_metadata_and_make); + } else { + an_object = header_extract(header_line, line_occur, an_object, type); + } + } else { + // writeln(__LINE__); + } + } + auto t = tuple( + dochead_make, + dochead_meta, + ); + return t; +} +#+END_SRC + +** +header document metadata+ :document:metadata: +*** +within abstraction loop+ + +**** +line exist: header make+ :header:make: +# #+name: abs_in_loop_body_not_block_obj +# #+BEGIN_SRC d +# } else if (line_occur["header_make"] > State.off) { +# /+ header_make +/ +# // should be caught by sub-header +# debug(header) { +# tell_l("red", line); +# } +# an_object["obj"] ~= line ~= "\n"; +# ++line_occur["header_make"]; +# #+END_SRC + +**** +line exist: header metadata+ :header:metadata: +# #+name: abs_in_loop_body_not_block_obj +# #+BEGIN_SRC d +# } else if (line_occur["header_meta"] > State.off) { +# /+ header_metadata +/ +# // should be caught by sub-header +# debug(header) { // para +# tell_l("red", line); +# } +# an_object["obj"] ~= line ~= "\n"; +# ++line_occur["header_meta"]; +# #+END_SRC + +**** +header_make instructions+ :header:make:instructions: + +# #+name: abs_in_loop_body_not_block_obj_line_empty +# #+BEGIN_SRC d +# if ((type["header_make"] == State.on) +# && (line_occur["header_make"] > State.off)) { +# /+ header_make instructions (current line empty) +/ +# auto dochead_metadata_and_make = +# set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make); +# static assert(!isTypeTuple!(dochead_metadata_and_make)); +# dochead_meta = dochead_metadata_and_make[0]; +# dochead_make = dochead_metadata_and_make[1]; +# header_set_common(line_occur, an_object, type); +# processing.remove("verse"); +# #+END_SRC + +**** +header_metadata+ :header:metadata: + +# #+name: abs_in_loop_body_not_block_obj_line_empty +# #+BEGIN_SRC d +# } else if ((type["header_meta"] == State.on) +# && (line_occur["header_meta"] > State.off)) { +# /+ header_meta (current line empty) +/ +# auto dochead_metadata_and_make = +# set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make); +# static assert(!isTypeTuple!(dochead_metadata_and_make)); +# dochead_meta = dochead_metadata_and_make[0]; +# dochead_make = dochead_metadata_and_make[1]; +# header_set_common(line_occur, an_object, type); +# type["header_make"] = State.off; +# type["header_meta"] = State.off; +# processing.remove("verse"); +# #+END_SRC + +* tangles (code structure) :tangle: +** ao_markup_header_extract.d: :ao_markup_header_extract.d: +#+BEGIN_SRC d :tangle ../src/sdp/ao_header_extract.d +/+ + extract header return json ++/ +template SiSUheaderExtract() { + private import + std.exception, + std.regex, + std.utf, + std.conv : to; + private import + ao_rgx; // ao_defaults.d + struct HeaderDocMetadataMakeJson { + mixin SiSUrgxInitFlags; + mixin RgxInit; + auto rgx = Rgx(); + enum State { off, on } + string hm, hs; + <> + } +} +#+END_SRC diff --git a/org/ao_read_source_files.org b/org/ao_read_source_files.org index 26f25fb..2d41105 100644 --- a/org/ao_read_source_files.org +++ b/org/ao_read_source_files.org @@ -12,9 +12,10 @@ #+FILETAGS: :sdp:niu:ao: #+TAGS: assert(a) class(c) debug(d) mixin(m) sdp(s) tangle(T) template(t) WEB(W) noexport(n) -* read file, get raw markup source :markup: [[./sdp.org][sdp]] [[./][org/]] +* get markup source, read file :source:markup: + ** [#A] read file, source string :string: #+name: ao_markup_source_raw #+BEGIN_SRC d @@ -79,10 +80,21 @@ catch (ErrnoException ex) { // } } +** array[0..1]: split header content :array: +#+name: ao_markup_source_raw +#+BEGIN_SRC d +final private char[][] header0Content1(in string src_text) { + /+ split string on first match of "^:?A~\s" into [header, content] tuple +/ + char[][] header_and_content = + split(cast(char[]) src_text, rgx.heading_a); + return header_and_content; +} +#+END_SRC + ** source line array :array: #+name: ao_markup_source_raw #+BEGIN_SRC d -final private char[][] markupSourceLineArray(in string src_text) { +final private char[][] markupSourceLineArray(in char[] src_text) { char[][] source_line_arr = split(cast(char[]) src_text, rgx.line_delimiter); return source_line_arr; @@ -95,6 +107,24 @@ final private char[][] markupSourceLineArray(in string src_text) { #+name: ao_markup_source_raw #+BEGIN_SRC d +auto markupSourceHeaderContentRawLineTupleArray(in string fn_src, Regex!(char) rgx_file ) { + enforce( + match(fn_src, rgx_file), + "not a sisu markup filename" + ); + auto source_txt_str = readInMarkupSource(fn_src); + auto hc = header0Content1(source_txt_str); + auto header = hc[0]; + char[] la; + la ~= "A~ "; + char[] source_txt = la ~ hc[1]; + auto source_line_arr = markupSourceLineArray(source_txt); + auto t = tuple( + header, + source_line_arr + ); + return t; +} final char[][] markupSourceContentRawLineArray(in string fn_src, Regex!(char) rgx_file ) { enforce( match(fn_src, rgx_file), @@ -106,7 +136,7 @@ final char[][] markupSourceContentRawLineArray(in string fn_src, Regex!(char) rg } #+END_SRC -* get markup source, master file & inserts :inserts: +* get markup source, master file & inserts :masterfile:inserts: [[./sdp.org][sdp]] [[./][org/]] ** scan inserts (sub-document) source :scan_insert_src: @@ -115,11 +145,6 @@ final char[][] markupSourceContentRawLineArray(in string fn_src, Regex!(char) rg #+BEGIN_SRC d char[][] contents_insert; auto type1 = flags_type_init; -mixin ScreenTxtColors; -int tell_l(string color, in char[] line) { - writeln(scr_txt_marker[color], line); - return 0; -} auto fn_pth_full = match(fn_src, rgx.src_pth); auto markup_src_file_path = fn_pth_full.captures[1]; #+END_SRC @@ -129,7 +154,7 @@ auto markup_src_file_path = fn_pth_full.captures[1]; #+BEGIN_SRC d if (type1["curly_code"] == 1) { type1["header_make"] = 0; - type1["header_metadata"] = 0; + type1["header_meta"] = 0; if (matchFirst(line, rgx.block_curly_code_close)) { type1["curly_code"] = 0; } @@ -137,11 +162,11 @@ if (type1["curly_code"] == 1) { } else if (matchFirst(line, rgx.block_curly_code_open)) { type1["curly_code"] = 1; type1["header_make"] = 0; - type1["header_metadata"] = 0; + type1["header_meta"] = 0; contents_insert ~= line; } else if (type1["tic_code"] == 1) { type1["header_make"] = 0; - type1["header_metadata"] = 0; + type1["header_meta"] = 0; if (matchFirst(line, rgx.block_tic_close)) { type1["tic_code"] = 0; } @@ -149,25 +174,25 @@ if (type1["curly_code"] == 1) { } else if (matchFirst(line, rgx.block_tic_code_open)) { type1["tic_code"] = 1; type1["header_make"] = 0; - type1["header_metadata"] = 0; + type1["header_meta"] = 0; contents_insert ~= line; } else if ( (type1["header_make"] == 1) && matchFirst(line, rgx.header_sub) ) { type1["header_make"] = 1; - type1["header_metadata"] = 0; + type1["header_meta"] = 0; // cont_dynamic_array ~= "% " ~ line; } else if ( - (type1["header_metadata"] == 1) + (type1["header_meta"] == 1) && matchFirst(line, rgx.header_sub) ) { - type1["header_metadata"] = 1; + type1["header_meta"] = 1; type1["header_make"] = 0; // cont_dynamic_array ~= "% " ~ line; } else if (auto m = match(line, rgx.insert_src_fn_ssi_or_sst)) { type1["header_make"] = 0; - type1["header_metadata"] = 0; + type1["header_meta"] = 0; auto insert_fn = m.captures[2]; auto insert_sub_pth = m.captures[1]; auto fn_src_insert = @@ -196,7 +221,7 @@ if (type1["curly_code"] == 1) { +/ } else { type1["header_make"] = 0; - type1["header_metadata"] = 0; + type1["header_meta"] = 0; contents_insert ~= line; } #+END_SRC @@ -213,62 +238,29 @@ return contents_insert; #+BEGIN_SRC d char[][] contents; auto type = flags_type_init; -mixin ScreenTxtColors; -int tell_l(string color, in char[] line) { - writeln(scr_txt_marker[color], line); - return 0; -} auto fn_pth_full = match(fn_src, rgx.src_pth); auto markup_src_file_path = fn_pth_full.captures[1]; #+END_SRC -*** loop insert (document) +*** loop master scan for inserts (insert documents) #+name: ao_master_doc_scan_for_insert_filenames_loop #+BEGIN_SRC d if (type["curly_code"] == 1) { - type["header_make"] = 0; - type["header_metadata"] = 0; if (matchFirst(line, rgx.block_curly_code_close)) { type["curly_code"] = 0; } contents ~= line; } else if (matchFirst(line, rgx.block_curly_code_open)) { type["curly_code"] = 1; - type["header_make"] = 0; - type["header_metadata"] = 0; contents ~= line; } else if (type["tic_code"] == 1) { - type["header_make"] = 0; - type["header_metadata"] = 0; if (matchFirst(line, rgx.block_tic_close)) { type["tic_code"] = 0; } contents ~= line; } else if (matchFirst(line, rgx.block_tic_code_open)) { type["tic_code"] = 1; - type["header_make"] = 0; - type["header_metadata"] = 0; - contents ~= line; -} else if ( - (type["header_make"] == 1) - && matchFirst(line, rgx.header_sub) -) { - contents ~= line; -} else if ( - (type["header_metadata"] == 1) - && matchFirst(line, rgx.header_sub) -) { - contents ~= line; -} else if (matchFirst(line, rgx.header_make)) { - type["header_make"] = 1; - type["header_metadata"] = 0; - contents ~= line; -} else if (matchFirst(line, rgx.header_metadata)) { - type["header_make"] = 0; - type["header_metadata"] = 1; contents ~= line; } else if (auto m = match(line, rgx.insert_src_fn_ssi_or_sst)) { - type["header_make"] = 0; - type["header_metadata"] = 0; auto insert_fn = m.captures[2]; auto insert_sub_pth = m.captures[1]; auto fn_src_insert = @@ -300,8 +292,6 @@ if (type["curly_code"] == 1) { 7. add line to new array; +/ } else { - type["header_make"] = 0; - type["header_metadata"] = 0; contents ~= line; } #+END_SRC @@ -338,22 +328,29 @@ template SiSUmarkupRaw() { mixin RgxInit; auto rgx = Rgx(); struct MarkupRaw { - final char[][] sourceContent(in string fn_src) { + auto sourceContent(in string fn_src) { auto raw = MarkupRawUnit(); - auto sourcefile_content = - raw.markupSourceContentRawLineArray(fn_src, rgx.src_pth); + auto t = + raw.markupSourceHeaderContentRawLineTupleArray(fn_src, rgx.src_pth); + auto header_content_raw = t[0]; + auto sourcefile_content = t[1]; if (match(fn_src, rgx.src_fn_master)) { auto ins = Inserts(); sourcefile_content = - ins.scan_master_doc_source_for_insert_filenames(sourcefile_content, fn_src); + ins.scan_master_src_for_insert_files_and_import_content(sourcefile_content, fn_src); // auto ins = SiSUdocInserts.Inserts(); } - return sourcefile_content; + t = tuple( + header_content_raw, + sourcefile_content + ); + return t; } } private struct MarkupRawUnit { private import std.file; + enum State { off, on } <> } struct Inserts { @@ -370,7 +367,7 @@ template SiSUmarkupRaw() { } // end src subdoc (inserts) loop <> } - auto scan_master_doc_source_for_insert_filenames( + auto scan_master_src_for_insert_files_and_import_content( char[][] sourcefile_content, string fn_src ) { diff --git a/org/output.org b/org/output.org index fa16447..3575558 100644 --- a/org/output.org +++ b/org/output.org @@ -1447,8 +1447,8 @@ struct SDPoutputHTML { auto ref const C contents, string[][string][string] bookindex_unordered_hashes, JSONValue[] biblio, - JSONValue[string] dochead_make, - JSONValue[string] dochead_meta, + // JSONValue[string] dochead_make, + // JSONValue[string] dochead_meta, string fn_src, bool[string] opt_action_bool ) { diff --git a/org/sdp.org b/org/sdp.org index 0b2fc53..80b7a45 100644 --- a/org/sdp.org +++ b/org/sdp.org @@ -28,7 +28,7 @@ struct Version { int minor; int patch; } -enum ver = Version(0, 4, 0); +enum ver = Version(0, 4, 1); #+END_SRC ** pre loop init @@ -60,11 +60,12 @@ import compile_time_info, // sdp/compile_time_info.d ao_abstract_doc_source, // sdp/ao_abstract_doc_source.d ao_defaults, // sdp/ao_defaults.d + ao_header_extract, // sdp/ao_header_extract.d ao_read_source_files, // sdp/ao_read_source_files.d ao_output_debugs, // sdp/ao_output_debugs.d - output_hub, // output_hub.d ao_rgx, // sdp/ao_rgx.d - ao_ansi_colors; // sdp/ao_ansi_colors.d + ao_ansi_colors, // sdp/ao_ansi_colors.d + output_hub; // output_hub.d // std.conv; #+END_SRC @@ -94,27 +95,6 @@ private import std.conv : to; #+END_SRC -**** version.txt :version: - -#+NAME: sdp_version_mixin -#+BEGIN_SRC d -mixin(import("version.txt")); -#+END_SRC - -*** mixin :mixin: - -#+NAME: sdp_args -#+BEGIN_SRC d -mixin SiSUheader; -mixin SiSUbiblio; -mixin SiSUrgxInitFlags; -mixin SiSUmarkupRaw; -mixin SiSUdocAbstraction; -mixin SiSUoutputDebugs; -mixin SiSUoutputHub; -mixin ScreenTxtColors; -#+END_SRC - *** sdp output :output: #+NAME: sdp_args #+BEGIN_SRC d @@ -123,15 +103,14 @@ struct SDPoutput { auto ref const S contents, string[][string][string] bookindex_unordered_hashes, JSONValue[] biblio, - JSONValue[string] dochead_make, - JSONValue[string] dochead_meta, + // JSONValue[string] dochead_make_json, + // JSONValue[string] dochead_meta_json, string fn_src, bool[string] opt_action_bool ) { mixin ScreenTxtColors; mixin RgxInit; mixin SiSUoutputHub; - // mixin SiSUoutput; auto rgx = Rgx(); uint return_ = 0; if (opt_action_bool["source"]) { @@ -149,8 +128,8 @@ struct SDPoutput { if (opt_action_bool["html"]) { auto html=SDPoutputHTML(); html.css_write; - // html.scroll(contents, bookindex_unordered_hashes, biblio, fn_src, opt_action_bool); - html.scroll(contents, bookindex_unordered_hashes, biblio, dochead_make, dochead_meta, fn_src, opt_action_bool); + html.scroll(contents, bookindex_unordered_hashes, biblio, fn_src, opt_action_bool); + // html.scroll(contents, bookindex_unordered_hashes, biblio, dochead_make_json, dochead_meta_json, fn_src, opt_action_bool); } if (opt_action_bool["epub"]) { writeln("epub processing"); @@ -172,18 +151,42 @@ struct SDPoutput { } #+END_SRC +**** version.txt :version: + +#+NAME: sdp_version_mixin +#+BEGIN_SRC d +mixin(import("version.txt")); +#+END_SRC + +*** mixin :mixin: + +#+NAME: sdp_args +#+BEGIN_SRC d +mixin SiSUheaderSkel; +mixin SiSUheaderExtract; +mixin SiSUbiblio; +mixin SiSUrgxInitFlags; +// mixin SiSUconfiguration; +mixin SiSUmarkupRaw; +mixin SiSUdocAbstraction; +mixin SiSUoutputDebugs; +mixin SiSUoutputHub; +mixin ScreenTxtColors; +#+END_SRC + *** init :init: #+NAME: sdp_args #+BEGIN_SRC d auto raw = MarkupRaw(); +auto head = HeaderDocMetadataMakeJson(); auto abs = Abstraction(); auto dbg = SDPoutputDebugs(); auto output = SDPoutput(); /+ struct DocumentParts { string[string][] contents; - JSONValue[string] metadata_json; + JSONValue[string] meta_json; JSONValue[string] make_json; string[][string][string] bookindex_unordered_hashes; JSONValue[] biblio; @@ -202,8 +205,7 @@ auto rgx = Rgx(); scope(success) { debug(checkdoc) { writefln( - "%s~ run complete, ok ~ %s (sdp-%s.%s.%s, %s v%s, %s %s)", - scr_txt_color["cyan"], scr_txt_color["off"], + "~ run complete, ok ~ (sdp-%s.%s.%s, %s v%s, %s %s)", ver.major, ver.minor, ver.patch, __VENDOR__, __VERSION__, bits, os, @@ -299,10 +301,7 @@ foreach(arg; args) { #+BEGIN_SRC d scope(success) { debug(checkdoc) { - writefln( - "%s~ document complete, ok ~%s", - scr_txt_color["green"], scr_txt_color["off"], - ); + writeln("~ document complete, ok ~"); } // stderr.writeln("0"); } @@ -329,22 +328,37 @@ enforce( #+NAME: sdp_each_file_do #+BEGIN_SRC d /+ ↓ read file +/ -auto sourcefile_content = - raw.sourceContent(fn_src); +auto header_and_content_tuple = raw.sourceContent(fn_src); +static assert(!isTypeTuple!(header_and_content_tuple)); +auto header = header_and_content_tuple[0]; +auto sourcefile_content = header_and_content_tuple[1]; +debug(header_and_content) { + writeln(header); + writeln(header_and_content_tuple.length); + writeln(sourcefile_content[0]); +} +#+END_SRC + +**** [#A] read doc header: metadata & make :doc:header:metadata:make: +#+NAME: sdp_each_file_do +#+BEGIN_SRC d +/+ ↓ headers metadata & make +/ +auto header_content = head.headerContentJSON(header); +static assert(!isTypeTuple!(header_content)); +auto dochead_make_json = header_content[0]; +auto dochead_meta_json = header_content[1]; #+END_SRC **** [#A] processing: document abstraction, tuple :processing: #+NAME: sdp_each_file_do #+BEGIN_SRC d /+ ↓ porcess document, return abstraction as tuple +/ -auto t = abs.abstract_doc_source(sourcefile_content); +auto t = abs.abstract_doc_source(sourcefile_content, dochead_make_json, dochead_meta_json); static assert(!isTypeTuple!(t)); auto doc_ao_contents = t[0]; // contents ~ endnotes ~ bookindex; // static assert(!isIterable!(doc_ao_contents)); -auto doc_ao_metadata_json = t[1]; -auto doc_ao_make_json = t[2]; -auto doc_ao_bookindex_unordered_hashes = t[3]; -auto doc_ao_biblio = t[4]; +auto doc_ao_bookindex_unordered_hashes = t[1]; +auto doc_ao_biblio = t[2]; // destroy(t); #+END_SRC @@ -358,8 +372,8 @@ debug(checkdoc) { // checkbook & dumpdoc doc_ao_contents, doc_ao_bookindex_unordered_hashes, doc_ao_biblio, - doc_ao_make_json, - doc_ao_metadata_json, + dochead_make_json, + dochead_meta_json, fn_src, opt_action_bool ); @@ -367,7 +381,7 @@ debug(checkdoc) { // checkbook & dumpdoc #+END_SRC **** TODO process outputs :outputs: -***** [#A] html :html: + #+NAME: sdp_each_file_do #+BEGIN_SRC d /+ ↓ output hub +/ @@ -375,8 +389,8 @@ output.hub( doc_ao_contents, doc_ao_bookindex_unordered_hashes, doc_ao_biblio, - doc_ao_make_json, - doc_ao_metadata_json, + // doc_ao_make_json, + // doc_ao_meta_json, fn_src, opt_action_bool ); @@ -395,9 +409,9 @@ scope(exit) { destroy(sourcefile_content); destroy(t); destroy(doc_ao_contents); - destroy(doc_ao_make_json); - destroy(doc_ao_metadata_json); - destroy(doc_ao_bookindex_unordered_hashes); + // destroy(doc_ao_make_json); + // destroy(doc_ao_meta_json); + // destroy(doc_ao_bookindex_unordered_hashes); destroy(doc_ao_biblio); destroy(fn_src); } diff --git a/src/sdp.d b/src/sdp.d index 624b239..d17c379 100755 --- a/src/sdp.d +++ b/src/sdp.d @@ -8,11 +8,12 @@ import compile_time_info, // sdp/compile_time_info.d ao_abstract_doc_source, // sdp/ao_abstract_doc_source.d ao_defaults, // sdp/ao_defaults.d + ao_header_extract, // sdp/ao_header_extract.d ao_read_source_files, // sdp/ao_read_source_files.d ao_output_debugs, // sdp/ao_output_debugs.d - output_hub, // output_hub.d ao_rgx, // sdp/ao_rgx.d - ao_ansi_colors; // sdp/ao_ansi_colors.d + ao_ansi_colors, // sdp/ao_ansi_colors.d + output_hub; // output_hub.d // std.conv; /+ sdp sisu document parser +/ private import @@ -40,28 +41,19 @@ mixin CompileTimeInfo; mixin RgxInit; void main(string[] args) { - mixin SiSUheader; - mixin SiSUbiblio; - mixin SiSUrgxInitFlags; - mixin SiSUmarkupRaw; - mixin SiSUdocAbstraction; - mixin SiSUoutputDebugs; - mixin SiSUoutputHub; - mixin ScreenTxtColors; struct SDPoutput { auto hub(S)( auto ref const S contents, string[][string][string] bookindex_unordered_hashes, JSONValue[] biblio, - JSONValue[string] dochead_make, - JSONValue[string] dochead_meta, + // JSONValue[string] dochead_make_json, + // JSONValue[string] dochead_meta_json, string fn_src, bool[string] opt_action_bool ) { mixin ScreenTxtColors; mixin RgxInit; mixin SiSUoutputHub; - // mixin SiSUoutput; auto rgx = Rgx(); uint return_ = 0; if (opt_action_bool["source"]) { @@ -79,8 +71,8 @@ void main(string[] args) { if (opt_action_bool["html"]) { auto html=SDPoutputHTML(); html.css_write; - // html.scroll(contents, bookindex_unordered_hashes, biblio, fn_src, opt_action_bool); - html.scroll(contents, bookindex_unordered_hashes, biblio, dochead_make, dochead_meta, fn_src, opt_action_bool); + html.scroll(contents, bookindex_unordered_hashes, biblio, fn_src, opt_action_bool); + // html.scroll(contents, bookindex_unordered_hashes, biblio, dochead_make_json, dochead_meta_json, fn_src, opt_action_bool); } if (opt_action_bool["epub"]) { writeln("epub processing"); @@ -100,14 +92,25 @@ void main(string[] args) { return return_; } } + mixin SiSUheaderSkel; + mixin SiSUheaderExtract; + mixin SiSUbiblio; + mixin SiSUrgxInitFlags; + // mixin SiSUconfiguration; + mixin SiSUmarkupRaw; + mixin SiSUdocAbstraction; + mixin SiSUoutputDebugs; + mixin SiSUoutputHub; + mixin ScreenTxtColors; auto raw = MarkupRaw(); + auto head = HeaderDocMetadataMakeJson(); auto abs = Abstraction(); auto dbg = SDPoutputDebugs(); auto output = SDPoutput(); /+ struct DocumentParts { string[string][] contents; - JSONValue[string] metadata_json; + JSONValue[string] meta_json; JSONValue[string] make_json; string[][string][string] bookindex_unordered_hashes; JSONValue[] biblio; @@ -120,8 +123,7 @@ void main(string[] args) { scope(success) { debug(checkdoc) { writefln( - "%s~ run complete, ok ~ %s (sdp-%s.%s.%s, %s v%s, %s %s)", - scr_txt_color["cyan"], scr_txt_color["off"], + "~ run complete, ok ~ (sdp-%s.%s.%s, %s v%s, %s %s)", ver.major, ver.minor, ver.patch, __VENDOR__, __VERSION__, bits, os, @@ -203,10 +205,7 @@ void main(string[] args) { if (!empty(fn_src)) { scope(success) { debug(checkdoc) { - writefln( - "%s~ document complete, ok ~%s", - scr_txt_color["green"], scr_txt_color["off"], - ); + writeln("~ document complete, ok ~"); } // stderr.writeln("0"); } @@ -226,17 +225,27 @@ void main(string[] args) { "not a sisu markup filename" ); /+ ↓ read file +/ - auto sourcefile_content = - raw.sourceContent(fn_src); + auto header_and_content_tuple = raw.sourceContent(fn_src); + static assert(!isTypeTuple!(header_and_content_tuple)); + auto header = header_and_content_tuple[0]; + auto sourcefile_content = header_and_content_tuple[1]; + debug(header_and_content) { + writeln(header); + writeln(header_and_content_tuple.length); + writeln(sourcefile_content[0]); + } + /+ ↓ headers metadata & make +/ + auto header_content = head.headerContentJSON(header); + static assert(!isTypeTuple!(header_content)); + auto dochead_make_json = header_content[0]; + auto dochead_meta_json = header_content[1]; /+ ↓ porcess document, return abstraction as tuple +/ - auto t = abs.abstract_doc_source(sourcefile_content); + auto t = abs.abstract_doc_source(sourcefile_content, dochead_make_json, dochead_meta_json); static assert(!isTypeTuple!(t)); auto doc_ao_contents = t[0]; // contents ~ endnotes ~ bookindex; // static assert(!isIterable!(doc_ao_contents)); - auto doc_ao_metadata_json = t[1]; - auto doc_ao_make_json = t[2]; - auto doc_ao_bookindex_unordered_hashes = t[3]; - auto doc_ao_biblio = t[4]; + auto doc_ao_bookindex_unordered_hashes = t[1]; + auto doc_ao_biblio = t[2]; // destroy(t); /+ ↓ document parts +/ debug(checkdoc) { // checkbook & dumpdoc @@ -244,8 +253,8 @@ void main(string[] args) { doc_ao_contents, doc_ao_bookindex_unordered_hashes, doc_ao_biblio, - doc_ao_make_json, - doc_ao_metadata_json, + dochead_make_json, + dochead_meta_json, fn_src, opt_action_bool ); @@ -255,8 +264,8 @@ void main(string[] args) { doc_ao_contents, doc_ao_bookindex_unordered_hashes, doc_ao_biblio, - doc_ao_make_json, - doc_ao_metadata_json, + // doc_ao_make_json, + // doc_ao_meta_json, fn_src, opt_action_bool ); @@ -270,9 +279,9 @@ void main(string[] args) { destroy(sourcefile_content); destroy(t); destroy(doc_ao_contents); - destroy(doc_ao_make_json); - destroy(doc_ao_metadata_json); - destroy(doc_ao_bookindex_unordered_hashes); + // destroy(doc_ao_make_json); + // destroy(doc_ao_meta_json); + // destroy(doc_ao_bookindex_unordered_hashes); destroy(doc_ao_biblio); destroy(fn_src); } diff --git a/src/sdp/ao_abstract_doc_source.d b/src/sdp/ao_abstract_doc_source.d index 7539755..ca435ff 100644 --- a/src/sdp/ao_abstract_doc_source.d +++ b/src/sdp/ao_abstract_doc_source.d @@ -29,7 +29,6 @@ template SiSUdocAbstraction() { string[string] an_object, processing; auto set_abstract_object = ObjectAbstractSet(); - auto set_header = HeaderDocMetadataMakeJson(); auto note_section = NotesSection(); /+ enum +/ @@ -121,8 +120,11 @@ template SiSUdocAbstraction() { // mixin SiSUdocAbstractionFunctions; /+ ↓ abstract marked up document +/ - auto abstract_doc_source(char[][] markup_sourcefile_content) { - + auto abstract_doc_source( + char[][] markup_sourcefile_content, + JSONValue[string] dochead_make_json, + JSONValue[string] dochead_meta_json + ) { /+ ↓ abstraction init +/ scope(success) { } @@ -134,9 +136,11 @@ template SiSUdocAbstraction() { destroy(processing); destroy(biblio_arr_json); } + line_occur = [ + "heading" : 0, + "para" : 0, + ]; auto type = flags_type_init; - auto dochead_make = parseJSON(header_make_jsonstr).object; - auto dochead_meta = parseJSON(header_metadata_jsonstr).object; mixin ScreenTxtColors; int tell_lo(string color, int obj_cite_number, in char[] line) { writefln( @@ -242,8 +246,9 @@ template SiSUdocAbstraction() { /+ block object: code +/ code_block(line, an_object, type); continue; - } else if (!matchFirst(line, rgx.regular_parse_skip)) { - /+ object other than code block object (includes regular text paragraph) +/ + } else if (!matchFirst(line, rgx.skip_code_block_from_regular_parse)) { + /+ object other than "code block" object + (includes regular text paragraph, headings & blocks other than code) +/ if (((matchFirst(line, rgx.heading_biblio) || (type["heading_biblio"] == State.on))) && (!matchFirst(line, rgx.heading)) @@ -291,7 +296,7 @@ template SiSUdocAbstraction() { continue; } else if (!line.empty) { /+ line not empty +/ - /+ non blocks (headers, paragraphs) & closed blocks +/ + /+ non blocks (headings, paragraphs) & closed blocks +/ assert( !line.empty, "line tested, line not empty surely" @@ -330,26 +335,17 @@ template SiSUdocAbstraction() { set_abstract_object.contents_comment(strip(an_object["obj"])); header_set_common(line_occur, an_object, type); processing.remove("verse"); - type["header_make"] = State.off; - type["header_metadata"] = State.off; ++counter; - } else if ((matchFirst(line, rgx.header_make)) - || (matchFirst(line, rgx.header_metadata)) - || (type["header_make"] == State.on - && (line_occur["header_make"] > State.off)) - || (type["header_metadata"] == State.on - && (line_occur["header_metadata"] > State.off))) { - header_extract(line, line_occur, an_object, type); } else if (((line_occur["para"] == State.off) && (line_occur["heading"] == State.off)) && ((type["para"] == State.off) && (type["heading"] == State.off))) { /+ heading or para but neither flag nor line exists +/ - if ((to!string(dochead_make["make"]["headings"]).length > 2) + if ((to!string(dochead_make_json["make"]["headings"]).length > 2) && (type["make_headings"] == State.off)) { /+ heading found +/ auto dochead_make_headings = - to!string(dochead_make["make"]["headings"]); + to!string(dochead_make_json["make"]["headings"]); heading_found(line, dochead_make_headings, heading_match_str, heading_match_rgx, type); } if ((type["make_headings"] == State.on) @@ -362,27 +358,11 @@ template SiSUdocAbstraction() { } if (matchFirst(line, rgx.heading)) { /+ heading match +/ - heading_match(line, line_occur, an_object, lv, collapsed_lev, type, dochead_meta); + heading_matched(line, line_occur, an_object, lv, collapsed_lev, type, dochead_meta_json); } else if (line_occur["para"] == State.off) { /+ para match +/ - para_match(line, an_object, indent, bullet, type); - } - } else if (line_occur["header_make"] > State.off) { - /+ header_make +/ - // should be caught by sub-header - debug(header) { - tell_l("red", line); + para_match(line, an_object, indent, bullet, type, line_occur); } - an_object["obj"] ~= line ~= "\n"; - ++line_occur["header_make"]; - } else if (line_occur["header_metadata"] > State.off) { - /+ header_metadata +/ - // should be caught by sub-header - debug(header) { // para - tell_l("red", line); - } - an_object["obj"] ~= line ~= "\n"; - ++line_occur["header_metadata"]; } else if (line_occur["heading"] > State.off) { /+ heading +/ debug(heading) { // heading @@ -403,7 +383,7 @@ template SiSUdocAbstraction() { /+ line empty, with blocks flag +/ block_flag_line_empty(line, an_object, contents_the_objects, bookindex_unordered_hashes, obj_cite_number, node, counter, type, obj_cite_number_poem); // watch } else { - /+ line empty +/ + /+ line empty +/ /+ line.empty, post contents, empty variables: +/ assert( line.empty, @@ -413,29 +393,7 @@ template SiSUdocAbstraction() { (type["blocks"] == State.off), "code block status: none" ); - if ((type["header_make"] == State.on) - && (line_occur["header_make"] > State.off)) { - /+ header_make instructions (current line empty) +/ - auto dochead_metadata_and_make = - set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make); - static assert(!isTypeTuple!(dochead_metadata_and_make)); - dochead_meta = dochead_metadata_and_make[0]; - dochead_make = dochead_metadata_and_make[1]; - header_set_common(line_occur, an_object, type); - processing.remove("verse"); - } else if ((type["header_metadata"] == State.on) - && (line_occur["header_metadata"] > State.off)) { - /+ header_metadata (current line empty) +/ - auto dochead_metadata_and_make = - set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make); - static assert(!isTypeTuple!(dochead_meta)); - dochead_meta = dochead_metadata_and_make[0]; - dochead_make = dochead_metadata_and_make[1]; - header_set_common(line_occur, an_object, type); - type["header_make"] = State.off; - type["header_metadata"] = State.off; - processing.remove("verse"); - } else if ((type["heading"] == State.on) + if ((type["heading"] == State.on) && (line_occur["heading"] > State.off)) { /+ heading object (current line empty) +/ obj_cite_number = obj_cite_number_emit(type["obj_cite_number_status"]); @@ -474,8 +432,6 @@ template SiSUdocAbstraction() { // writeln(m.hit, "\n"); } header_set_common(line_occur, an_object, type); - type["header_make"] = State.off; - type["header_metadata"] = State.off; an_object.remove("lev"); an_object.remove("lev_markup_number"); // an_object["lev_markup_number"]="9"; @@ -518,8 +474,6 @@ template SiSUdocAbstraction() { // bullet // ); header_set_common(line_occur, an_object, type); - type["header_make"] = State.off; - type["header_metadata"] = State.off; indent["first"] = "0"; indent["second"] = "0"; bullet = false; @@ -535,21 +489,24 @@ template SiSUdocAbstraction() { } // close else for line empty } // close else for not the above } // close after non code, other blocks or regular text - if (((contents_the_objects[$-1].is_a == "para") - || (contents_the_objects[$-1].is_a == "heading")) - && (counter-1 > previous_count)) { - if (match(contents_the_objects[$-1].object, - rgx.inline_notes_delimiter_al_regular_number_note)) { - // endnotes/ footnotes for - // doc objects other than paragraphs & headings - // various forms of grouped text - previous_count=contents_the_objects.length -1; - note_section.gather_notes_for_endnote_section( - contents_the_objects, - contents_the_objects.length -1 - ); - // notes[notepoint]=note_section.notes_section(contents_the_objects, counter-1); - // notepoint +=1; + /+ unless (contents_the_objects.length == 0) ? +/ + if (contents_the_objects.length > 0) { + if (((contents_the_objects[$-1].is_a == "para") + || (contents_the_objects[$-1].is_a == "heading")) + && (counter-1 > previous_count)) { + if (match(contents_the_objects[$-1].object, + rgx.inline_notes_delimiter_al_regular_number_note)) { + // endnotes/ footnotes for + // doc objects other than paragraphs & headings + // various forms of grouped text + previous_count=contents_the_objects.length -1; + note_section.gather_notes_for_endnote_section( + contents_the_objects, + contents_the_objects.length -1 + ); + // notes[notepoint]=note_section.notes_section(contents_the_objects, counter-1); + // notepoint +=1; + } } } } /+ ← closed: loop markup document/text line by line +/ @@ -669,25 +626,22 @@ template SiSUdocAbstraction() { // struct Document { // char content; // char head_make; - // char head_metadata; + // char head_meta; // char bookindex_section; // char biblio; // } // struct Document { // char content; // char head_make; - // char head_metadata; + // char head_meta; // char bookindex_section; // char biblio; // } - auto t = - tuple( - document_the, - dochead_make, - dochead_meta, - bookindex_unordered_hashes, - biblio_ordered - ); + auto t = tuple( + document_the, + bookindex_unordered_hashes, + biblio_ordered + ); return t; /+ post loop markup document/text ↑ +/ @@ -707,13 +661,8 @@ template SiSUdocAbstraction() { ref int[string] type ) { // line_occur["header"] = State.off; - line_occur["header_make"] = State.off; - line_occur["header_metadata"] = State.off; line_occur["heading"] = State.off; line_occur["para"]= State.off; - type["header"] = State.off; - // type["header_make"] = State.off; - // type["header_metadata"] = State.off; type["heading"] = State.off; type["para"] = State.off; object_reset(an_object); @@ -1668,62 +1617,6 @@ template SiSUdocAbstraction() { } } } - auto header_extract( - char[] line, - ref int[string] line_occur, - ref string[string] an_object, - ref int[string] type - ) { - if (matchFirst(line, rgx.header_make)) { - /+ matched header_make +/ - debug(header1) { // header - tell_l("yellow", line); - } - type["header"] = State.on; - type["header_make"] = State.on; - type["header_metadata"] = State.off; - type["heading"] = State.off; - type["para"] = State.off; - ++line_occur["header_make"]; - an_object["obj"] ~= line ~= "\n"; - } else if (matchFirst(line, rgx.header_metadata)) { - /+ matched header_metadata +/ - debug(header1) { // header - tell_l("yellow", line); - } - type["header"] = State.on; - type["header_make"] = State.off; - type["header_metadata"] = State.on; - type["heading"] = State.off; - type["para"] = State.off; - ++line_occur["header_metadata"]; - an_object["obj"] ~= line ~= "\n"; - } else if (type["header_make"] == State.on - && (line_occur["header_make"] > State.off)) { - /+ header_make flag set +/ - if (matchFirst(line, rgx.header_sub)) { - /+ sub-header +/ - debug(header1) { - tell_l("yellow", line); - } - // type["header"] = State.on; - ++line_occur["header_make"]; - an_object["obj"] ~= line ~= "\n"; - } - } else if (type["header_metadata"] == State.on - && (line_occur["header_metadata"] > State.off)) { - /+ header_metadata flag set +/ - if (matchFirst(line, rgx.header_sub)) { - /+ sub-header +/ - debug(header1) { - tell_l("yellow", line); - } - ++line_occur["header_metadata"]; - an_object["obj"] ~= line ~= "\n"; - } - } - return 0; - } auto heading_found( char[] line, string dochead_make_headings, @@ -1868,31 +1761,31 @@ template SiSUdocAbstraction() { } } } - auto heading_match( + auto heading_matched( char[] line, ref int[string] line_occur, ref string[string] an_object, ref int[string] lv, ref int[string] collapsed_lev, ref int[string] type, - ref JSONValue[string] dochead_meta + ref JSONValue[string] dochead_meta_json ) { if (auto m = match(line, rgx.heading)) { /+ heading match +/ type["heading"] = State.on; - type["header"] = State.off; - type["header_make"] = State.off; - type["header_metadata"] = State.off; type["heading_biblio"] = State.off; type["para"] = State.off; ++line_occur["heading"]; an_object["obj"] ~= line ~= "\n"; an_object["lev"] ~= m.captures[1]; + // writeln("an object level: ", an_object); assertions_doc_structure(an_object, lv); // includes most of the logic for collapsed levels switch (an_object["lev"]) { case "A": - an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_title, to!string(dochead_meta["title"]["main"])); - an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_author, to!string(dochead_meta["creator"]["author"])); + an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_title, to!string(dochead_meta_json["title"]["main"])); + an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_author, to!string(dochead_meta_json["creator"]["author"])); + // an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_title, to!string(parseJSON(dochead_meta_json["title"]["main"]))); + // an_object["obj"]=replaceFirst(an_object["obj"], rgx.head_value_author, to!string(parseJSON(dochead_meta_json["creator"]["author"]))); collapsed_lev["h0"] = 1; an_object["lev_collapsed_number"] = to!string(collapsed_lev["h0"]); @@ -2014,7 +1907,8 @@ template SiSUdocAbstraction() { ref string[string] an_object, ref string[string] indent, ref bool bullet, - ref int[string] type + ref int[string] type, + ref int[string] line_occur ) { if (line_occur["para"] == State.off) { /+ para matches +/ @@ -2141,28 +2035,29 @@ template SiSUdocAbstraction() { } invariant() { } - string header_make(string obj_txt_in) - in { } - body { - obj_txt["munge"]=obj_txt_in; - obj_txt["attrib"] = " \"use\": \"head\"," - ~ " \"of\": \"header\"," - ~ " \"is\": \"header_make\""; - return obj_txt["attrib"]; - } - invariant() { - } - string header_metadata(string obj_txt_in) - in { } - body { - obj_txt["munge"]=obj_txt_in; - obj_txt["attrib"] = " \"use\": \"head\"," - ~ " \"of\": \"header\"," - ~ " \"is\": \"header_metadata\""; - return obj_txt["attrib"]; - } - invariant() { - } + /+ revist +/ + // string header_make(string obj_txt_in) + // in { } + // body { + // obj_txt["munge"]=obj_txt_in; + // obj_txt["attrib"] = " \"use\": \"head\"," + // ~ " \"of\": \"header\"," + // ~ " \"is\": \"header_make\""; + // return obj_txt["attrib"]; + // } + // invariant() { + // } + // string header_meta(string obj_txt_in) + // in { } + // body { + // obj_txt["munge"]=obj_txt_in; + // obj_txt["attrib"] = " \"use\": \"head\"," + // ~ " \"of\": \"header\"," + // ~ " \"is\": \"header_metadata\""; + // return obj_txt["attrib"]; + // } + // invariant() { + // } string code(string obj_txt_in) in { } body { @@ -2350,22 +2245,23 @@ template SiSUdocAbstraction() { } invariant() { } - string header_make(string obj_txt_in) - in { } - body { - obj_txt["munge"]=obj_txt_in; - return obj_txt["munge"]; - } - invariant() { - } - string header_metadata(string obj_txt_in) - in { } - body { - obj_txt["munge"]=obj_txt_in; - return obj_txt["munge"]; - } - invariant() { - } + /+ revisit +/ + // string header_make(string obj_txt_in) + // in { } + // body { + // obj_txt["munge"]=obj_txt_in; + // return obj_txt["munge"]; + // } + // invariant() { + // } + // string header_meta(string obj_txt_in) + // in { } + // body { + // obj_txt["munge"]=obj_txt_in; + // return obj_txt["munge"]; + // } + // invariant() { + // } string code(string obj_txt_in) in { } body { @@ -2438,12 +2334,6 @@ template SiSUdocAbstraction() { ? obj_txt["munge"] : strip(obj_txt["munge"]); switch (obj_is_) { - case "header_make": - obj_txt["munge"]=munge.header_make(obj_txt["munge"]); - break; - case "header_metadata": - obj_txt["munge"]=munge.header_metadata(obj_txt["munge"]); - break; case "heading": obj_txt["munge"]=munge.heading(obj_txt["munge"]); break; @@ -2500,12 +2390,6 @@ template SiSUdocAbstraction() { obj_attrib.remove("json"); obj_attrib["json"] ="{"; switch (obj_is_) { - case "header_make": - obj_attrib["json"] ~= attrib.header_make(obj_raw); - break; - case "header_metadata": - obj_attrib["json"] ~= attrib.header_metadata(obj_raw); - break; case "heading": obj_attrib["json"] ~= attrib.heading(obj_raw); // break; @@ -2575,257 +2459,6 @@ template SiSUdocAbstraction() { invariant() { } } - struct HeaderDocMetadataMakeJson { - // class HeaderMetadataMakeHash : AssertHeaderMetadataMakeJson { - auto rgx = Rgx(); - string hm, hs; - auto header_metadata_and_make_jsonstr( - string header, - JSONValue[string] dochead_meta, - JSONValue[string] dochead_make - ) - in { } - body { - scope(exit) { - destroy(header); - destroy(dochead_meta); - destroy(dochead_make); - } - if (auto t = match(header, rgx.head_main)) { - char[][] obj_spl = split( - cast(char[]) header, - rgx.line_delimiter_ws_strip - ); - auto hm = to!string(t.captures[1]); - if (match(hm, rgx.main_headers)) { - foreach (line; obj_spl) { - if (auto m = match(line, rgx.head_main)) { - if (!empty(m.captures[2])) { - if (hm == "creator") { - dochead_meta[hm]["author"].str = - to!string(m.captures[2]); - } else if (hm == "title") { - dochead_meta[hm]["main"].str = - to!string(m.captures[2]); - } else if (hm == "publisher") { - dochead_meta[hm]["name"].str = - to!string(m.captures[2]); - } - } - } else if (auto s = match(line, rgx.head_sub)) { - if (!empty(s.captures[2])) { - auto hs = to!string(s.captures[1]); - if ((hm == "make" ) - && (dochead_make[hm].type() == JSON_TYPE.OBJECT)) { - switch (hm) { - case "make": - if (match(hs, rgx.subhead_make)) { - if (dochead_make[hm][hs].type() == JSON_TYPE.STRING) { - dochead_make[hm][hs].str = to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - default: - break; - } - } else if (dochead_meta[hm].type() == JSON_TYPE.OBJECT) { - switch (hm) { - case "creator": - if (match(hs, rgx.subhead_creator)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "title": - if (match(hs, rgx.subhead_title)) { - if ((hs == "subtitle") - && (dochead_meta[hm]["sub"].type() == JSON_TYPE.STRING)) { - dochead_meta[hm]["sub"].str = - to!string(s.captures[2]); - } else if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "rights": - if (match(hs, rgx.subhead_rights)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "date": - if (match(hs, rgx.subhead_date)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "original": - if (match(hs, rgx.subhead_original)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "classify": - if (match(hs, rgx.subhead_classify)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "identifier": - if (match(hs, rgx.subhead_identifier)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "notes": - if (match(hs, rgx.subhead_notes)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "publisher": - if (match(hs, rgx.subhead_publisher)) { - if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - dochead_meta[hm][hs].str = - to!string(s.captures[2]); - } - } else { - writeln("not a valid header type:", hm, ":", hs); - destroy(hm); - destroy(hs); - } - break; - case "links": - destroy(hm); - destroy(hs); - // if (match(hs, rgx.subhead_links)) { - // if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { - // dochead_meta[hm][hs].str = to!string(s.captures[2]); - // } - // } else { - // writeln("not a valid header type:", hm, ":", hs); - // destroy(hm); - // destroy(hs); - // } - break; - default: - break; - } - } - } - } - } - } else { - writeln("not a valid header type:", hm); - } - } - auto t = tuple(dochead_meta, dochead_make); - static assert(!isTypeTuple!(t)); - return t; - } - // invariant() { - // } - } - class HeaderMetadataMakeHash { - // class HeaderMetadataMakeHash : AssertHeaderMetadataMakeHash { - auto rgx = Rgx(); - string header_main; - string[string] head; - string[string] header_topic_hash(string header) - in { } - body { - if (auto t = match(header, rgx.head_main)) { - char[][] obj_spl = split( - cast(char[]) header, - rgx.line_delimiter_ws_strip - ); - auto header_main = to!string(t.captures[1]); - head[header_main] = "{"; - foreach (line; obj_spl) { - if (auto m = match(line, rgx.head_main)) { - if (!empty(m.captures[2])) { - head[header_main] ~= - "\"" ~ header_main ~ - "\": \"" ~ - to!string(m.captures[2]) ~ - "\","; - } - } else if (auto s = match(line, rgx.head_sub)) { - head[header_main] ~= "\"" ~ s.captures[1] ~ "\":"; - if (!empty(s.captures[2])) { - head[header_main] ~= "\"" ~ s.captures[2] ~ "\","; - } - } - } - head[header_main] = replaceFirst( - head[header_main], - rgx.tailing_comma, - "" - ); - head[header_main] ~= "}"; - debug(headerjson) { - JSONValue j = parseJSON(head[header_main]); - assert( - (j.type == JSON_TYPE.OBJECT) - ); - } - } - return head; - } - invariant() { - } - } struct BookIndexNuggetHash { // class BookIndexNuggetHash : AssertBookIndexNuggetHash { string main_term, sub_term, sub_term_bits; diff --git a/src/sdp/ao_defaults.d b/src/sdp/ao_defaults.d index 314635c..087067c 100644 --- a/src/sdp/ao_defaults.d +++ b/src/sdp/ao_defaults.d @@ -2,7 +2,7 @@ defaults ao_defaults.d +/ -template SiSUheader() { +template SiSUheaderSkel() { auto header_make_jsonstr = `{ "make": { "cover_image" : "", @@ -20,7 +20,7 @@ template SiSUheader() { "css" : "" } }`; - auto header_metadata_jsonstr = `{ + auto header_meta_jsonstr = `{ "creator": { "author" : "", "translator" : "", @@ -171,7 +171,7 @@ template SiSUrgxInitFlags() { int[string] flags_type_init = [ "make_headings" : 0, "header_make" : 0, - "header_metadata" : 0, + "header_meta" : 0, "heading" : 0, "heading_biblio" : 0, "para" : 0, diff --git a/src/sdp/ao_header_extract.d b/src/sdp/ao_header_extract.d new file mode 100644 index 0000000..7858406 --- /dev/null +++ b/src/sdp/ao_header_extract.d @@ -0,0 +1,334 @@ +/+ + extract header return json ++/ +template SiSUheaderExtract() { + private import + std.exception, + std.regex, + std.utf, + std.conv : to; + private import + ao_rgx; // ao_defaults.d + struct HeaderDocMetadataMakeJson { + mixin SiSUrgxInitFlags; + mixin RgxInit; + auto rgx = Rgx(); + enum State { off, on } + string hm, hs; + auto header_metadata_and_make_jsonstr( + string header, + JSONValue[string] dochead_meta, + JSONValue[string] dochead_make + ) + in { } + body { + scope(exit) { + destroy(header); + destroy(dochead_meta); + destroy(dochead_make); + } + if (auto t = match(header, rgx.head_main)) { + char[][] obj_spl = split( + cast(char[]) header, + rgx.line_delimiter_ws_strip + ); + auto hm = to!string(t.captures[1]); + if (match(hm, rgx.main_headers)) { + foreach (line; obj_spl) { + if (auto m = match(line, rgx.head_main)) { + if (!empty(m.captures[2])) { + if (hm == "creator") { + dochead_meta[hm]["author"].str = + to!string(m.captures[2]); + } else if (hm == "title") { + dochead_meta[hm]["main"].str = + to!string(m.captures[2]); + } else if (hm == "publisher") { + dochead_meta[hm]["name"].str = + to!string(m.captures[2]); + } + } + } else if (auto s = match(line, rgx.head_sub)) { + if (!empty(s.captures[2])) { + auto hs = to!string(s.captures[1]); + if ((hm == "make" ) + && (dochead_make[hm].type() == JSON_TYPE.OBJECT)) { + switch (hm) { + case "make": + if (match(hs, rgx.subhead_make)) { + if (dochead_make[hm][hs].type() == JSON_TYPE.STRING) { + dochead_make[hm][hs].str = to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + default: + break; + } + } else if (dochead_meta[hm].type() == JSON_TYPE.OBJECT) { + switch (hm) { + case "creator": + if (match(hs, rgx.subhead_creator)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "title": + if (match(hs, rgx.subhead_title)) { + if ((hs == "subtitle") + && (dochead_meta[hm]["sub"].type() == JSON_TYPE.STRING)) { + dochead_meta[hm]["sub"].str = + to!string(s.captures[2]); + } else if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "rights": + if (match(hs, rgx.subhead_rights)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "date": + if (match(hs, rgx.subhead_date)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "original": + if (match(hs, rgx.subhead_original)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "classify": + if (match(hs, rgx.subhead_classify)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "identifier": + if (match(hs, rgx.subhead_identifier)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "notes": + if (match(hs, rgx.subhead_notes)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "publisher": + if (match(hs, rgx.subhead_publisher)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "links": + destroy(hm); + destroy(hs); + // if (match(hs, rgx.subhead_links)) { + // if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + // dochead_meta[hm][hs].str = to!string(s.captures[2]); + // } + // } else { + // writeln("not a valid header type:", hm, ":", hs); + // destroy(hm); + // destroy(hs); + // } + break; + default: + break; + } + } + } + } + } + } else { + writeln("not a valid header type:", hm); + } + } + auto t = tuple(dochead_meta, dochead_make); + static assert(!isTypeTuple!(t)); + return t; + } + private auto header_extract( + char[] line, + ref int[string] line_occur, + ref string[string] an_object, + ref int[string] type + ) { + if (matchFirst(line, rgx.header_make)) { + /+ matched header_make +/ + debug(header1) { // header + // tell_l("yellow", line); + } + type["header"] = State.on; + type["header_make"] = State.on; + type["header_meta"] = State.off; + ++line_occur["header_make"]; + an_object["obj"] ~= line ~= "\n"; + } else if (matchFirst(line, rgx.header_meta)) { + /+ matched header_metadata +/ + debug(header1) { // header + // tell_l("yellow", line); + } + type["header"] = State.on; + type["header_make"] = State.off; + type["header_meta"] = State.on; + ++line_occur["header_meta"]; + an_object["obj"] ~= line ~= "\n"; + } else if (type["header_make"] == State.on + && (line_occur["header_make"] > State.off)) { + /+ header_make flag set +/ + if (matchFirst(line, rgx.header_sub)) { + /+ sub-header +/ + debug(header1) { + // tell_l("yellow", line); + } + // type["header"] = State.on; + ++line_occur["header_make"]; + an_object["obj"] ~= line ~= "\n"; + } + } else if (type["header_meta"] == State.on + && (line_occur["header_meta"] > State.off)) { + /+ header_metadata flag set +/ + if (matchFirst(line, rgx.header_sub)) { + /+ sub-header +/ + debug(header1) { + // tell_l("yellow", line); + } + ++line_occur["header_meta"]; + an_object["obj"] ~= line ~= "\n"; + } + } + // return 0; + return an_object; + } + auto header_set_common( + ref int[string] line_occur, + ref string[string] an_object, + ref int[string] type + ) { + // line_occur["header"] = State.off; + line_occur["header_make"] = State.off; + line_occur["header_meta"] = State.off; + type["header"] = State.off; + // type["header_make"] = State.off; + // type["header_meta"] = State.off; + an_object.remove("obj"); + an_object.remove("is"); + an_object.remove("attrib"); + } + private auto headerContentJSON(in char[] src_header) { + auto type = flags_type_init; + type = [ + "header" : State.off, + "header_make" : State.off, + "header_meta" : State.off, + ]; + string[string] an_object; + int[string] line_occur; + auto dochead_make = parseJSON(header_make_jsonstr).object; + auto dochead_meta = parseJSON(header_meta_jsonstr).object; + auto set_header = HeaderDocMetadataMakeJson(); + char[][] source_header_arr = + split(cast(char[]) src_header, rgx.line_delimiter); + foreach(header_line; source_header_arr) { + if (auto m = matchFirst(header_line, rgx.comment)) { + /+ matched comment +/ + debug(comment) { + // tell_l("blue", header_line); + } + header_set_common(line_occur, an_object, type); + // type["header_make"] = State.off; + // type["header_meta"] = State.off; + } else if ((matchFirst(header_line, rgx.header)) + || (type["header_make"] == State.on + && (line_occur["header_make"] > State.off)) + || (type["header_meta"] == State.on + && (line_occur["header_meta"] > State.off))) { + if (header_line.length == 0) { + /+ header_make instructions (current line empty) +/ + auto dochead_metadata_and_make = + set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make); + static assert(!isTypeTuple!(dochead_metadata_and_make)); + dochead_meta = dochead_metadata_and_make[0]; + dochead_make = dochead_metadata_and_make[1]; + header_set_common(line_occur, an_object, type); + type["header_make"] = State.off; + type["header_meta"] = State.off; + writeln(dochead_metadata_and_make); + } else { + an_object = header_extract(header_line, line_occur, an_object, type); + } + } else { + // writeln(__LINE__); + } + } + auto t = tuple( + dochead_make, + dochead_meta, + ); + return t; + } + } +} diff --git a/src/sdp/ao_read_source_files.d b/src/sdp/ao_read_source_files.d index 9bfaa05..e450bc8 100644 --- a/src/sdp/ao_read_source_files.d +++ b/src/sdp/ao_read_source_files.d @@ -16,22 +16,29 @@ template SiSUmarkupRaw() { mixin RgxInit; auto rgx = Rgx(); struct MarkupRaw { - final char[][] sourceContent(in string fn_src) { + auto sourceContent(in string fn_src) { auto raw = MarkupRawUnit(); - auto sourcefile_content = - raw.markupSourceContentRawLineArray(fn_src, rgx.src_pth); + auto t = + raw.markupSourceHeaderContentRawLineTupleArray(fn_src, rgx.src_pth); + auto header_content_raw = t[0]; + auto sourcefile_content = t[1]; if (match(fn_src, rgx.src_fn_master)) { auto ins = Inserts(); sourcefile_content = - ins.scan_master_doc_source_for_insert_filenames(sourcefile_content, fn_src); + ins.scan_master_src_for_insert_files_and_import_content(sourcefile_content, fn_src); // auto ins = SiSUdocInserts.Inserts(); } - return sourcefile_content; + t = tuple( + header_content_raw, + sourcefile_content + ); + return t; } } private struct MarkupRawUnit { private import std.file; + enum State { off, on } final private string readInMarkupSource(in string fn_src) { enforce( exists(fn_src)!=0, @@ -67,11 +74,35 @@ template SiSUmarkupRaw() { std.utf.validate(source_txt_str); return source_txt_str; } - final private char[][] markupSourceLineArray(in string src_text) { + final private char[][] header0Content1(in string src_text) { + /+ split string on first match of "^:?A~\s" into [header, content] tuple +/ + char[][] header_and_content = + split(cast(char[]) src_text, rgx.heading_a); + return header_and_content; + } + final private char[][] markupSourceLineArray(in char[] src_text) { char[][] source_line_arr = split(cast(char[]) src_text, rgx.line_delimiter); return source_line_arr; } + auto markupSourceHeaderContentRawLineTupleArray(in string fn_src, Regex!(char) rgx_file ) { + enforce( + match(fn_src, rgx_file), + "not a sisu markup filename" + ); + auto source_txt_str = readInMarkupSource(fn_src); + auto hc = header0Content1(source_txt_str); + auto header = hc[0]; + char[] la; + la ~= "A~ "; + char[] source_txt = la ~ hc[1]; + auto source_line_arr = markupSourceLineArray(source_txt); + auto t = tuple( + header, + source_line_arr + ); + return t; + } final char[][] markupSourceContentRawLineArray(in string fn_src, Regex!(char) rgx_file ) { enforce( match(fn_src, rgx_file), @@ -92,17 +123,12 @@ template SiSUmarkupRaw() { mixin SiSUrgxInitFlags; char[][] contents_insert; auto type1 = flags_type_init; - mixin ScreenTxtColors; - int tell_l(string color, in char[] line) { - writeln(scr_txt_marker[color], line); - return 0; - } auto fn_pth_full = match(fn_src, rgx.src_pth); auto markup_src_file_path = fn_pth_full.captures[1]; foreach (line; markup_sourcefile_insert_content) { if (type1["curly_code"] == 1) { type1["header_make"] = 0; - type1["header_metadata"] = 0; + type1["header_meta"] = 0; if (matchFirst(line, rgx.block_curly_code_close)) { type1["curly_code"] = 0; } @@ -110,11 +136,11 @@ template SiSUmarkupRaw() { } else if (matchFirst(line, rgx.block_curly_code_open)) { type1["curly_code"] = 1; type1["header_make"] = 0; - type1["header_metadata"] = 0; + type1["header_meta"] = 0; contents_insert ~= line; } else if (type1["tic_code"] == 1) { type1["header_make"] = 0; - type1["header_metadata"] = 0; + type1["header_meta"] = 0; if (matchFirst(line, rgx.block_tic_close)) { type1["tic_code"] = 0; } @@ -122,25 +148,25 @@ template SiSUmarkupRaw() { } else if (matchFirst(line, rgx.block_tic_code_open)) { type1["tic_code"] = 1; type1["header_make"] = 0; - type1["header_metadata"] = 0; + type1["header_meta"] = 0; contents_insert ~= line; } else if ( (type1["header_make"] == 1) && matchFirst(line, rgx.header_sub) ) { type1["header_make"] = 1; - type1["header_metadata"] = 0; + type1["header_meta"] = 0; // cont_dynamic_array ~= "% " ~ line; } else if ( - (type1["header_metadata"] == 1) + (type1["header_meta"] == 1) && matchFirst(line, rgx.header_sub) ) { - type1["header_metadata"] = 1; + type1["header_meta"] = 1; type1["header_make"] = 0; // cont_dynamic_array ~= "% " ~ line; } else if (auto m = match(line, rgx.insert_src_fn_ssi_or_sst)) { type1["header_make"] = 0; - type1["header_metadata"] = 0; + type1["header_meta"] = 0; auto insert_fn = m.captures[2]; auto insert_sub_pth = m.captures[1]; auto fn_src_insert = @@ -169,72 +195,39 @@ template SiSUmarkupRaw() { +/ } else { type1["header_make"] = 0; - type1["header_metadata"] = 0; + type1["header_meta"] = 0; contents_insert ~= line; } } // end src subdoc (inserts) loop return contents_insert; } - auto scan_master_doc_source_for_insert_filenames( + auto scan_master_src_for_insert_files_and_import_content( char[][] sourcefile_content, string fn_src ) { mixin SiSUrgxInitFlags; char[][] contents; auto type = flags_type_init; - mixin ScreenTxtColors; - int tell_l(string color, in char[] line) { - writeln(scr_txt_marker[color], line); - return 0; - } auto fn_pth_full = match(fn_src, rgx.src_pth); auto markup_src_file_path = fn_pth_full.captures[1]; foreach (line; sourcefile_content) { if (type["curly_code"] == 1) { - type["header_make"] = 0; - type["header_metadata"] = 0; if (matchFirst(line, rgx.block_curly_code_close)) { type["curly_code"] = 0; } contents ~= line; } else if (matchFirst(line, rgx.block_curly_code_open)) { type["curly_code"] = 1; - type["header_make"] = 0; - type["header_metadata"] = 0; contents ~= line; } else if (type["tic_code"] == 1) { - type["header_make"] = 0; - type["header_metadata"] = 0; if (matchFirst(line, rgx.block_tic_close)) { type["tic_code"] = 0; } contents ~= line; } else if (matchFirst(line, rgx.block_tic_code_open)) { type["tic_code"] = 1; - type["header_make"] = 0; - type["header_metadata"] = 0; - contents ~= line; - } else if ( - (type["header_make"] == 1) - && matchFirst(line, rgx.header_sub) - ) { - contents ~= line; - } else if ( - (type["header_metadata"] == 1) - && matchFirst(line, rgx.header_sub) - ) { - contents ~= line; - } else if (matchFirst(line, rgx.header_make)) { - type["header_make"] = 1; - type["header_metadata"] = 0; - contents ~= line; - } else if (matchFirst(line, rgx.header_metadata)) { - type["header_make"] = 0; - type["header_metadata"] = 1; contents ~= line; } else if (auto m = match(line, rgx.insert_src_fn_ssi_or_sst)) { - type["header_make"] = 0; - type["header_metadata"] = 0; auto insert_fn = m.captures[2]; auto insert_sub_pth = m.captures[1]; auto fn_src_insert = @@ -266,8 +259,6 @@ template SiSUmarkupRaw() { 7. add line to new array; +/ } else { - type["header_make"] = 0; - type["header_metadata"] = 0; contents ~= line; } } // end src doc loop diff --git a/src/sdp/ao_rgx.d b/src/sdp/ao_rgx.d index 073148e..1245a71 100644 --- a/src/sdp/ao_rgx.d +++ b/src/sdp/ao_rgx.d @@ -7,13 +7,6 @@ template RgxInit() { /+ misc +/ static flag_action = ctRegex!(`^(--[a-z][a-z0-9-]+)$`); static flag_action_str = ctRegex!(` (--[a-z][a-z0-9-]+)`); - static src_pth = ctRegex!(`^([a-zA-Z0-9._-]+/)*([a-zA-Z0-9._-]+[.]ss[tm])$`); - static src_fn = - ctRegex!(`^([a-zA-Z0-9._-]+/)*(?P(?P[a-zA-Z0-9._-]+)[.](?Pss[tm]))$`); - static src_fn_master = ctRegex!(`^([a-zA-Z0-9._-]+/)*([a-zA-Z0-9._-]+[.]ssm)$`); - static src_fn_find_inserts = ctRegex!(`^([a-zA-Z0-9._-]+/)*([a-zA-Z0-9._-]+[.]ss[im])$`); - // static ssm_fn = ctRegex!(`^[a-zA-Z0-9._-]+[.]ssm$`); - static line_delimiter = ctRegex!("\n"); // static arr_delimiter = ctRegex!(`\s*[;]\s*`); static within_quotes = ctRegex!(`"(.+?)"`); static make_heading_delimiter = ctRegex!(`[;][ ]*`); @@ -23,28 +16,36 @@ template RgxInit() { static book_index_go = ctRegex!("([0-9]+)(?:-[0-9]+)?"); static trailing_comma = ctRegex!(",[ ]*$"); static trailing_linebreak = ctRegex!(",[ ]{1,2}\\\\\\\\\n[ ]{4}$","m"); + static line_delimiter = ctRegex!("\n"); static line_delimiter_ws_strip = ctRegex!("[ ]*\n[ ]*"); static line_delimiter_only = ctRegex!("^\n"); static para_delimiter = ctRegex!("\n[ ]*\n+"); static levels_markup = ctRegex!(`^[A-D1-4]$`); static levels_numbered = ctRegex!(`^[0-9]$`); static levels_numbered_headings = ctRegex!(`^[0-7]$`); - /+ insert markup file +/ - // static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*([a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[ti])$`); + static src_pth = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); + static src_fn = + ctRegex!(`^([a-zA-Z0-9._-]+/)*(?P(?P[a-zA-Z0-9._-]+)[.](?Pss[tm]))$`); + static src_fn_master = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ssm)$`); + static src_fn_text = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]sst)$`); + static src_fn_insert = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ssi)$`); + static src_fn_find_inserts = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[im])$`); static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[ti])$`); // static insert_ssi_or_sst_fn = ctRegex!(`^<<\s*[a-zA-Z0-9._-]+[.]ss[ti]`); /+ comments +/ static comment = ctRegex!(`^%+ `); + static comments = ctRegex!(`^%+ |^%+$`); /+ header +/ static header = ctRegex!(`^@([a-z_]+):(?:\s|$)`); static header_make = ctRegex!(`^@(make):(?:\s|$)`); - static header_metadata = ctRegex!(`^@([a-z_]+):(?:\s|$)`); + static header_meta = ctRegex!(`^@([a-z_]+):(?:\s|$)`); static header_sub = ctRegex!(`^[ ]+:([a-z_]+):\s`); - static head_main = ctRegex!(`^@([a-z_]+):\s*(.*)`, "m"); - static head_sub = ctRegex!(`^[ ]*:([a-z_]+):\s+(.+)`, "m"); + static head_main = ctRegex!(`^@(?P
[a-z_]+):\s*(?P.*)`, "m"); + static head_sub = ctRegex!(`^[ ]*:(?P[a-z_]+):\s+(?P.+)`, "m"); static head_value_title = ctRegex!(`@title`); static head_value_author = ctRegex!(`@author`); /+ heading & paragraph operators +/ + static heading_a = ctRegex!(`^:?[A][~] `, "m"); static heading = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?) `); static heading_marker = ctRegex!(`^:?([A-D1-4])[~]`); static heading_title = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`); @@ -147,7 +148,7 @@ template RgxInit() { // static auto_obj_cite_number_ignore = ctRegex!(`^[+~*$-]{3,}$`); // reminder static obj_cite_number_block_marks = ctRegex!(`^--[+~-]#$`); /+ ignore outside code blocks +/ - static regular_parse_skip = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); // not structural info + static skip_code_block_from_regular_parse = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); // not structural info /+ line & page breaks +/ static break_line_within_object = ctRegex!(`[\\]{2}( |$)`); // static break_line_break_within_object = ctRegex!(`( |^)[\\]{2}( |$)`); diff --git a/src/sdp/output_hub.d b/src/sdp/output_hub.d index 5912e7e..f81c118 100644 --- a/src/sdp/output_hub.d +++ b/src/sdp/output_hub.d @@ -1410,8 +1410,8 @@ template SiSUoutputHub() { auto ref const C contents, string[][string][string] bookindex_unordered_hashes, JSONValue[] biblio, - JSONValue[string] dochead_make, - JSONValue[string] dochead_meta, + // JSONValue[string] dochead_make, + // JSONValue[string] dochead_meta, string fn_src, bool[string] opt_action_bool ) { diff --git a/views/version.txt b/views/version.txt index e61c0ba..c181dba 100644 --- a/views/version.txt +++ b/views/version.txt @@ -4,4 +4,4 @@ struct Version { int minor; int patch; } -enum ver = Version(0, 4, 0); +enum ver = Version(0, 4, 1); -- cgit v1.2.3