From 45e96028ce7696381aca7f155c21b0b718b6a610 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 30 Sep 2015 13:07:43 -0400 Subject: sdp, abstract objects, a start --- lib/sdp/ao_abstract_doc_source.d | 1695 ++++++++++++++++++++++++++++++++++++++ lib/sdp/ao_assertions.d | 222 +++++ lib/sdp/ao_defaults.d | 257 ++++++ lib/sdp/ao_emitter.d | 1322 +++++++++++++++++++++++++++++ lib/sdp/ao_interface.d | 145 ++++ lib/sdp/ao_markup_source_raw.d | 58 ++ lib/sdp/ao_object_setter.d | 90 ++ lib/sdp/ao_output_debugs.d | 354 ++++++++ lib/sdp/ao_rgx.d | 195 +++++ lib/sdp/ao_scan_inserts.d | 229 +++++ lib/sdp/ao_utils.d | 74 ++ lib/sdp/sdp.d | 161 ++++ lib/sdp/sdp.org | 331 ++++++++ 13 files changed, 5133 insertions(+) create mode 100644 lib/sdp/ao_abstract_doc_source.d create mode 100644 lib/sdp/ao_assertions.d create mode 100644 lib/sdp/ao_defaults.d create mode 100644 lib/sdp/ao_emitter.d create mode 100644 lib/sdp/ao_interface.d create mode 100644 lib/sdp/ao_markup_source_raw.d create mode 100644 lib/sdp/ao_object_setter.d create mode 100644 lib/sdp/ao_output_debugs.d create mode 100644 lib/sdp/ao_rgx.d create mode 100644 lib/sdp/ao_scan_inserts.d create mode 100644 lib/sdp/ao_utils.d create mode 100644 lib/sdp/sdp.d create mode 100644 lib/sdp/sdp.org (limited to 'lib/sdp') diff --git a/lib/sdp/ao_abstract_doc_source.d b/lib/sdp/ao_abstract_doc_source.d new file mode 100644 index 0000000..2fec35d --- /dev/null +++ b/lib/sdp/ao_abstract_doc_source.d @@ -0,0 +1,1695 @@ +/* +#+OPTIONS: ^:nil _:nil#+OPTIONS: ^:nil _:nil +* sisu_abstract_doc_source.d +*/ +mixin template SiSUdocAbstraction() { + class Abstraction { + auto abstract_doc_source(char[][] markup_sourcefile_content) { + mixin ObjectSetters; + mixin AssertionsOnMarkupDocumentStructure; + mixin AssertionsOnBlocks; + mixin ScreenTxtColors; + auto rgx = new Rgx(); + auto set_oa = new ObjectAbstractSet(); + auto set_header = new HeaderDocMetadataMakeJson(); + auto notesection = new NotesSection(); + string[string][131072] contents_arbitrary_max_length_set; // 2000 pg * 50 lines == 100000 + string[1024] notes; + string notes_str; + string[string] object, processing, head; + string biblio_tag_name, biblio_tag_entry, book_idx_tmp, st; + string[1024] biblio_arr_json = biblio_entry_tags_jsonstr; + JSONValue[1024] bib_arr_json; + uint[string] line_occur; + int counter, previous_count, count_biblio_entry, ocn, ocn_, verse_line, bib_entry, heading_pointer, notepoint; + string indent_first, indent_second; + string[][string][string] bookindex_unordered_hashes; + bool bullet = true; + uint[string] lv = [ + "lv" : 0, + "h0" : 0, + "h1" : 0, + "h2" : 0, + "h3" : 0, + "h4" : 0, + "h5" : 0, + "h6" : 0, + "h7" : 0, + "lcn" : 0, + ]; + int[string] collapsed_lev = [ + "h0" : 0, + "h1" : 0, + "h2" : 0, + "h3" : 0, + "h4" : 0, + "h5" : 0, + "h6" : 0, + "h7" : 0 + ]; + auto rgx_h_A = regex(r"^(none)"); + auto rgx_h_B = regex(r"^(none)"); + auto rgx_h_C = regex(r"^(none)"); + auto rgx_h_D = regex(r"^(none)"); + auto rgx_h_1 = regex(r"^(none)"); + auto rgx_h_2 = regex(r"^(none)"); + auto rgx_h_3 = regex(r"^(none)"); + auto rgx_h_4 = regex(r"^(none)"); + auto str_h_A = "^(none)"; + auto str_h_B = "^(none)"; + auto str_h_C = "^(none)"; + auto str_h_D = "^(none)"; + auto str_h_1 = "^(none)"; + auto str_h_2 = "^(none)"; + auto str_h_3 = "^(none)"; + auto str_h_4 = "^(none)"; + string content_non_header = "8"; + string node; + auto obj_im = new ObjInlineMarkup(); + auto obj_att = new ObjAttrib(); + auto object_citation_number = new OCNemitter(); + auto ft = flag_type.dup; + int ocn_emit(int ocn_status_flag) { + return object_citation_number.ocn_emitter(ocn_status_flag); + } + auto bookindex_extract_hash = new BookIndexNuggetHash(); + string[][string][string] bkidx_hash(string bookindex, int ocn) { + return bookindex_extract_hash.bookindex_nugget_hash(bookindex, ocn); + } + auto node_construct = new NodeStructureMetadata(); + string node_jstr( + string lvn, + int ocn_, + int counter, + int heading_pointer, + string is_ + ) { + return node_construct.node_emitter( + lvn, + ocn_, + counter, + heading_pointer, + is_ + ); + } + string node_jstr_heading( + string lvn, + string lcn, + int ocn_, + int counter, + int heading_pointer, + string is_ + ) { + return node_construct.node_emitter_heading( + lvn, + lcn, + ocn_, + counter, + heading_pointer, + is_ + ); + } + string[string] ocn_poem = [ + "start" : "", + "end" : "" + ]; + int tell_lo(string color, int ocn, in char[] line) { + writeln(scr_txt_marker[color], to!string(ocn), " ", to!string(line)); + return 0; + } + int tell_l(string color, in char[] line) { + writeln(scr_txt_marker[color], line); + return 0; + } + scope(success) { + } + scope(failure) { + } + scope(exit) { + destroy(contents_arbitrary_max_length_set); + destroy(object); + destroy(processing); + destroy(biblio_arr_json); + } + auto dochead_make = parseJSON(header_make_jsonstr).object; + auto dochead_metadata = parseJSON(header_metadata_jsonstr).object; + foreach (line; markup_sourcefile_content) { + scope(exit) { + } + scope(failure) { + writeln(__FILE__, ":", __LINE__, " failed here:"); + writeln(" line: ", line); + writeln(" is : ", object["is"]); + writeln(" node: ", node); + } + line = replaceAll(line, rgx.true_dollar, "$$$$"); + debug(source) { // source lines + writeln(line); + } + debug(srclines) { + if (!line.empty) { // source lines, not empty + writeln(scr_txt_marker["green"], line); + } + } + if ((!line.empty) && (ft["ocn_status_multi_obj"] == 0)) { + if (match(line, rgx.ocn_block_marks)) { + if (match(line, rgx.ocn_off_block)) { + ft["ocn_status_multi_obj"] = 1; + debug(ocnoff) { + tell_l("fuchsia", line); + } + } + if (match(line, rgx.ocn_off_block_dh)) { + ft["ocn_status_multi_obj"] = 2; + debug(ocnoff) { + tell_l("fuchsia", line); + } + } + } else { + if (ft["ocn_status_multi_obj"] == 0) { + if (match(line, rgx.ocn_off)) { + ft["ocn_status"] = 1; + } else if (match(line, rgx.ocn_off_dh)) { + ft["ocn_status"] = 2; + } else { + ft["ocn_status"] = 2; + ft["ocn_status"] = 0; + } + } else { + ft["ocn_status"] = ft["ocn_status_multi_obj"]; + } + } + } else if ((!line.empty) && (ft["ocn_status_multi_obj"] > 0)) { + if (auto m = match(line, rgx.ocn_off_block_close)) { + ft["ocn_status_multi_obj"] = 0; + ft["ocn_status"] = 0; + debug(ocnoff) { + tell_l("green", line); + } + } + } + if (ft["code"] == 1) { + if (ft["curly_code"] == 1) { + if (auto m = match(line, rgx.block_curly_code_close)) { + debug(code) { // code (curly) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["code"] = 2; + ft["curly_code"] = 0; + } else { + debug(code) { // code (curly) line + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // code (curly) line + } + } else if (ft["tic_code"] == 1) { + if (auto m = match(line, rgx.block_tic_close)) { + debug(code) { // code (tic) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["code"] = 2; + ft["tic_code"] = 0; + } else { + debug(code) { // code (tic) line + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // code (tic) line + } + } + } else if (!match(line, rgx.regular_parse_skip)) { + if (((match(line, rgx.heading_biblio) + || (ft["heading_biblio"] == 1))) + && (!match(line, rgx.heading)) + && (!match(line, rgx.comment))) { + if (match(line, rgx.heading_biblio)) { + ft["heading_biblio"] = 1; + } + if (empty(line) && (bib_entry == 0)) { + count_biblio_entry++; + bib_entry = 1; + } + debug(biblio) { + writeln( + scr_txt_color["yellow"], + "* ", + scr_txt_color["off"], + to!string(count_biblio_entry), + " ", + line + ); + } + if (match(line, rgx.biblio_tags)) { + auto bt = match(line, rgx.biblio_tags); + bib_entry = 0; + st=to!string(bt.captures[1]); + biblio_tag_entry=to!string(bt.captures[2]); + JSONValue j = parseJSON(biblio_arr_json[count_biblio_entry]); + if (match(st, rgx.biblio_abbreviations)) { + biblio_tag_name=biblio_tag_map[st]; + } else { + biblio_tag_name=st; + } + j.object[biblio_tag_name] = biblio_tag_entry; + auto header_tag_value=to!string(bt.captures[2]); + switch (biblio_tag_name) { + case "author_raw": // author_arr author (fn sn) + j["author_arr"]=split(header_tag_value, rgx.arr_delimiter); + string tmp; + foreach (au; j["author_arr"].array) { + if (auto x = match(au.str, rgx.name_delimiter)) { + tmp ~= x.captures[2] ~ " " ~ x.captures[1] ~ ", "; + } else { + tmp ~= au.str; + } + } + tmp = replace(tmp, rgx.trailing_comma, ""); + j["author"].str = tmp; + break; + case "editor_raw": // editor_arr editor (fn sn) + j["editor_arr"]=split(header_tag_value, rgx.arr_delimiter); + string tmp; + foreach (ed; j["editor_arr"].array) { + if (auto x = match(ed.str, rgx.name_delimiter)) { + tmp ~= x.captures[2] ~ " " ~ x.captures[1] ~ ", "; + } else { + tmp ~= ed.str; + } + } + tmp = replace(tmp, rgx.trailing_comma, ""); + j["editor"].str = tmp; + break; + case "fulltitle": // title & subtitle + break; + default: + break; + } + auto s = to!string(j); + s = j.toString(); + debug(biblio) { + writeln( + scr_txt_color["red"], + "* ", + scr_txt_color["off"], + biblio_tag_name, + ": ", + biblio_tag_entry + ); + writeln(biblio_arr_json[count_biblio_entry]); + writeln(j[biblio_tag_name], ":", j[biblio_tag_name]); + } + biblio_arr_json[count_biblio_entry] = s; + biblio_tag_entry=""; + } + } else if (ft["poem"] == 1) { + if (ft["curly_poem"] == 1) { + if (auto m = match(line, rgx.block_curly_poem_close)) { + object["obj"]="verse"; // check that this is as you please + debug(poem) { // poem (curly) close + writeln( + scr_txt_color["red"], + "* [poem curly] ", + scr_txt_color["off"], + line + ); + } + if (processing.length > 0) { + object["obj"] = processing["verse"]; + } + debug(poem) { // poem (curly) close + writeln(__LINE__); + writeln( + scr_txt_marker["fuchsia"], + ocn, + " ", + line + ); + } + if (object.length > 0) { + debug(poem) { // poem (curly) close + writeln(__LINE__); + tell_lo( + "fuchsia", + ocn, + object["obj"] + ); + writeln(__LINE__); + } + object["is"] = "verse"; + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + } + ocn_poem["end"] = to!string(ocn); + ft["blocks"] = 2; + ft["poem"] = 2; + ft["curly_poem"] = 0; + } else { + processing["verse"] ~= line ~= "\n"; + if (ft["verse_new"] == 1) { + ocn = ocn_emit(ft["ocn_status"]); + ft["verse_new"] = 0; + } else if (match(line, rgx.line_delimiter_only)) { + verse_line = 0; + ft["verse_new"] = 1; + } + if (ft["verse_new"] == 1) { + verse_line=1; + object["obj"] = processing["verse"]; + debug(poem) { // poem verse + writeln(scr_txt_marker["green"], + ocn, + " curly\n", + object["obj"]); + } + processing.remove("verse"); + object["is"] = "verse"; + node = node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + } + } + } else if (ft["tic_poem"] == 1) { + if (auto m = match(line, rgx.block_tic_close)) { // tic_poem_close + object["obj"]="verse"; // check that this is as you please + debug(poem) { // poem (curly) close + writeln( + scr_txt_color["red"], + "* [poem tic] ", + scr_txt_color["off"], + line + ); + } + if (processing.length > 0) { // needs looking at + object["obj"] = processing["verse"]; + } + if (object.length > 0) { + debug(poem) { // poem (tic) close + writeln(__LINE__); + tell_lo("fuchsia", ocn, line); + } + processing.remove("verse"); + object["is"] = "verse"; + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + ocn_poem["end"] = to!string(ocn); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + } + ft["blocks"] = 2; + ft["poem"] = 2; + ft["tic_poem"] = 0; + } else { + processing["verse"] ~= line ~= "\n"; + if (ft["verse_new"] == 1) { + ocn = ocn_emit(ft["ocn_status"]); + ft["verse_new"] = 0; + } else if (match(line, rgx.line_delimiter_only)) { + ft["verse_new"] = 1; + verse_line = 0; + } + if (ft["verse_new"] == 1) { + verse_line=1; + object["obj"] = processing["verse"]; + debug(poem) { // poem (tic) close + writeln(scr_txt_marker["green"], + ocn, + " tic\n", + object["obj"]); + } + processing.remove("verse"); + object["is"] = "verse"; + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + } + } + } + } else if (ft["group"] == 1) { + if (ft["curly_group"] == 1) { + if (auto m = match(line, rgx.block_curly_group_close)) { + debug(group) { // group (curly) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["group"] = 2; + ft["curly_group"] = 0; + } else { + debug(group) { // group + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build group array (or string) + } + } else if (ft["tic_group"] == 1) { + if (auto m = match(line, rgx.block_tic_close)) { + debug(group) { // group (tic) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["group"] = 2; + ft["tic_group"] = 0; + } else { + debug(group) { // group + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build group array (or string) + } + } + } else if (ft["block"] == 1) { + if (ft["curly_block"] == 1) { + if (auto m = match(line, rgx.block_curly_block_close)) { + debug(block) { // block (curly) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["block"] = 2; + ft["curly_block"] = 0; + } else { + debug(block) { // block + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build block array (or string) + } + } else if (ft["tic_block"] == 1) { + if (auto m = match(line, rgx.block_tic_close)) { + debug(block) { // block (tic) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["block"] = 2; + ft["tic_block"] = 0; + } else { + debug(block) { // block + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build block array (or string) + } + } + } else if (ft["quote"] == 1) { + if (ft["curly_quote"] == 1) { + if (auto m = match(line, rgx.block_curly_quote_close)) { + debug(quote) { // quote (curly) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["quote"] = 2; + ft["curly_quote"] = 0; + } else { + debug(quote) { // quote + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build quote array (or string) + } + } else if (ft["tic_quote"] == 1) { + if (auto m = match(line, rgx.block_tic_close)) { + debug(quote) { // quote (tic) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["quote"] = 2; + ft["tic_quote"] = 0; + } else { + debug(quote) { // quote + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build quote array (or string) + } + } + } else if (ft["table"] == 1) { + if (ft["curly_table"] == 1) { + if (auto m = match(line, rgx.block_curly_table_close)) { + debug(table) { // table (curly) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["table"] = 2; + ft["curly_table"] = 0; + } else { + debug(table) { // table + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build table array (or string) + } + } else if (ft["tic_table"] == 1) { + if (auto m = match(line, rgx.block_tic_close)) { + debug(table) { // table (tic) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["table"] = 2; + ft["tic_table"] = 0; + } else { + debug(table) { // table + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build table array (or string) + } + } + } else { + assert( + (ft["blocks"] == 0) + || (ft["blocks"] == 2), + "block status: none or closed" + ); + assertions_flag_types_block_status_none_or_closed(ft); + if (auto m = match(line, rgx.block_curly_code_open)) { + debug(code) { // code (curly) open + writeln( + scr_txt_color["blue"], + "* [code curly] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["code"] = 1; + ft["curly_code"] = 1; + } else if (auto m = match(line, rgx.block_curly_poem_open)) { + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + debug(poem) { // poem (curly) open + writeln( + scr_txt_color["red"], + "* [poem curly] ", + scr_txt_color["off"], + line + ); + } + ocn_poem["start"] = to!string(ocn); + ft["blocks"] = 1; + ft["verse_new"] = 1; + ft["poem"] = 1; + ft["curly_poem"] = 1; + } else if (auto m = match(line, rgx.block_curly_group_open)) { + debug(group) { // group (curly) open + writeln( + scr_txt_color["blue"], + "* [group curly] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["group"] = 1; + ft["curly_group"] = 1; + } else if (auto m = match(line, rgx.block_curly_block_open)) { + debug(block) { // block (curly) open + writeln( + scr_txt_color["blue"], + "* [block curly] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["block"] = 1; + ft["curly_block"] = 1; + } else if (auto m = match(line, rgx.block_curly_quote_open)) { + debug(quote) { // quote (curly) open + writeln( + scr_txt_color["blue"], + "* [quote curly] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["quote"] = 1; + ft["curly_quote"] = 1; + } else if (auto m = match(line, rgx.block_curly_table_open)) { + debug(table) { // table (curly) open + writeln( + scr_txt_color["blue"], + "* [table curly] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["table"] = 1; + ft["curly_table"] = 1; + } else if (auto m = match(line, rgx.block_tic_code_open)) { + debug(code) { // code (tic) open + writeln( + scr_txt_color["blue"], + "* [code tic] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["code"] = 1; + ft["tic_code"] = 1; + } else if (auto m = match(line, rgx.block_tic_poem_open)) { + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + debug(poem) { // poem (tic) open + writeln( + scr_txt_color["red"], + "* [poem tic] ", + scr_txt_color["off"], + line + ); + } + ocn_poem["start"] = to!string(ocn); + ft["blocks"] = 1; + ft["verse_new"] = 1; + ft["poem"] = 1; + ft["tic_poem"] = 1; + } else if (auto m = match(line, rgx.block_tic_group_open)) { + debug(group) { // group (tic) open + writeln( + scr_txt_color["blue"], + "* [group tic] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["group"] = 1; + ft["tic_group"] = 1; + } else if (auto m = match(line, rgx.block_tic_block_open)) { + debug(block) { // block (tic) open + writeln( + scr_txt_color["blue"], + "* [block tic] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["block"] = 1; + ft["tic_block"] = 1; + } else if (auto m = match(line, rgx.block_tic_quote_open)) { + debug(quote) { // quote (tic) open + writeln( + scr_txt_color["blue"], + "* [quote tic] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["quote"] = 1; + ft["tic_quote"] = 1; + } else if (auto m = match(line, rgx.block_tic_table_open)) { + debug(table) { // table (tic) open + writeln( + scr_txt_color["blue"], + "* [table tic] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["table"] = 1; + ft["tic_table"] = 1; + } else if (!line.empty) { + assert( + !line.empty, + "line tested, line not empty surely" + ); + assert( + (ft["blocks"] == 0) + || (ft["blocks"] == 2), + "code block status: none or closed" + ); + if (ft["blocks"] == 2) { + debug(check) { // block + writeln(__LINE__); + tell_l("red", line); + } + assert( + match(line, rgx.book_index) + || match(line, rgx.book_index_open) + || ft["book_index"] == 1 + ); + } + if (auto m = match(line, rgx.book_index)) { + debug(bookindexmatch) { // book index + writeln( + scr_txt_color["blue"], "* [bookindex] ", scr_txt_color["off"], + to!string(m.captures[1]), "\n" + ); + } + object["bookindex"] = to!string(m.captures[1]); + } else if (auto m = match(line, rgx.book_index_open)) { + ft["book_index"] = 1; + book_idx_tmp = to!string(m.captures[1]); + debug(bookindexmatch) { // book index + writeln( + scr_txt_color["blue"], + "* [bookindex] ", + scr_txt_color["off"], + book_idx_tmp, "\n" + ); + } + } else if (ft["book_index"] == 1 ) { + if (auto m = match(line, rgx.book_index_close)) { + ft["book_index"] = 0; + object["bookindex"] = book_idx_tmp ~ to!string(m.captures[1]); + debug(bookindexmatch) { // book index + writeln( + scr_txt_color["blue"], + "* [bookindex] ", + scr_txt_color["off"], + book_idx_tmp, "\n" + ); + } + book_idx_tmp = ""; + } else { + book_idx_tmp ~= line; + } + } else { + if (auto m = match(line, rgx.comment)) { + debug(comment) { + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; + contents_arbitrary_max_length_set[counter] = + set_oa.contents_comment(strip(object["obj"])); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + line_occur["header_metadata"] = 0; + line_occur["header_make"] = 0; + line_occur["heading"] = 0; + line_occur["para"] = 0; + ft["header"] = 0; + ft["header_make"] = 0; + ft["header_metadata"] = 0; + ft["heading"] = 0; + ft["para"] = 0; + counter++; + } else if (auto m = match(line, rgx.header_make)) { + debug(header1) { // header + tell_l("yellow", line); + } + ft["header"] = 1; + ft["header_make"] = 1; + ft["header_metadata"] = 0; + ft["heading"] = 0; + ft["para"] = 0; + line_occur["header_make"]++; + object["obj"] ~= line ~= "\n"; + } else if (auto m = match(line, rgx.header_metadata)) { + debug(header1) { // header + tell_l("yellow", line); + } + ft["header"] = 1; + ft["header_make"] = 0; + ft["header_metadata"] = 1; + ft["heading"] = 0; + ft["para"] = 0; + line_occur["header_metadata"]++; + object["obj"] ~= line ~= "\n"; + } else if (ft["header_make"] == 1 + && (line_occur["header_make"] > 0)) { + if (auto m = match(line, rgx.header_sub)) { + debug(header1) { // header sub + tell_l("yellow", line); + } + line_occur["header_make"]++; + object["obj"] ~= line ~= "\n"; + } + } else if (ft["header_metadata"] == 1 + && (line_occur["header_metadata"] > 0)) { + if (auto m = match(line, rgx.header_sub)) { + debug(header1) { // header sub + tell_l("yellow", line); + } + line_occur["header_metadata"]++; + object["obj"] ~= line ~= "\n"; + } + } else if (((line_occur["para"] == 0) + && (line_occur["heading"] == 0)) + && ((ft["para"] == 0) + && (ft["heading"] == 0))) { + if ((to!string(dochead_make["make"]["headings"]).length > 2) + && (ft["make_headings"] == 0)) { + debug(headingsfound) { + writeln(dochead_make["make"]["headings"]); + } + auto make_headings_txt = + match( + to!string(dochead_make["make"]["headings"]), + rgx.within_quotes); + char[][] make_headings_spl = + split( + cast(char[]) make_headings_txt.captures[1], + rgx.make_heading_delimiter); + debug(headingsfound) { + writeln(make_headings_spl.length); + writeln(make_headings_spl); + } + switch (make_headings_spl.length) { + case 7 : + if (!empty(make_headings_spl[6])) { + str_h_4 = "^(" ~ to!string(make_headings_spl[6]) ~ ")"; + rgx_h_4 = regex(str_h_4); + } + goto case; + case 6 : + if (!empty(make_headings_spl[5])) { + str_h_3 = "^(" ~ to!string(make_headings_spl[5]) ~ ")"; + rgx_h_3 = regex(str_h_3); + } + goto case; + case 5 : + if (!empty(make_headings_spl[4])) { + str_h_2 = "^(" ~ to!string(make_headings_spl[4]) ~ ")"; + rgx_h_2 = regex(str_h_2); + } + goto case; + case 4 : + if (!empty(make_headings_spl[3])) { + str_h_1 = "^(" ~ to!string(make_headings_spl[3]) ~ ")"; + rgx_h_1 = regex(str_h_1); + } + goto case; + case 3 : + if (!empty(make_headings_spl[2])) { + str_h_D = "^(" ~ to!string(make_headings_spl[2]) ~ ")"; + rgx_h_D = regex(str_h_D); + } + goto case; + case 2 : + if (!empty(make_headings_spl[1])) { + str_h_C = "^(" ~ to!string(make_headings_spl[1]) ~ ")"; + rgx_h_C = regex(str_h_C); + } + goto case; + case 1 : + if (!empty(make_headings_spl[0])) { + str_h_B = "^(" ~ to!string(make_headings_spl[0]) ~ ")"; + rgx_h_B = regex(str_h_B); + } + break; + default: + break; + } + ft["make_headings"] = 1; + } + if ((ft["make_headings"] == 1) + && ((line_occur["para"] == 0) + && (line_occur["heading"] == 0)) + && ((ft["para"] == 0) + && (ft["heading"] == 0))) { + if (match(line, rgx_h_B)) { + line = "B~ " ~ line; + debug(headingsfound) { + writeln(line); + } + } + if (match(line, rgx_h_C)) { + line = "C~ " ~ line; + debug(headingsfound) { + writeln(line); + } + } + if (match(line, rgx_h_D)) { + line = "D~ " ~ line; + debug(headingsfound) { + writeln(line); + } + } + if (match(line, rgx_h_1)) { + line = "1~ " ~ line; + debug(headingsfound) { + writeln(line); + } + } + if (match(line, rgx_h_2)) { + line = "2~ " ~ line; + debug(headingsfound) { + writeln(line); + } + } + if (match(line, rgx_h_3)) { + line = "3~ " ~ line; + debug(headingsfound) { + writeln(line); + } + } + if (match(line, rgx_h_4)) { + line = "4~ " ~ line; + debug(headingsfound) { + writeln(line); + } + } + } + if (auto m = match(line, rgx.heading)) { + ft["heading"] = 1; + ft["header"] = 0; + ft["header_make"] = 0; + ft["header_metadata"] = 0; + ft["heading_biblio"] = 0; + ft["para"] = 0; + line_occur["heading"]++; + object["obj"] ~= line ~= "\n"; + object["lev"] ~= m.captures[1]; + assertions_doc_structure(object, lv); // includes most of the logic for collapsed levels + switch (to!string(object["lev"])) { + case "A": + object["lvn"]="0"; + lv["lv"] = 0; + lv["h0"]++; + lv["h1"] = 0; + lv["h2"] = 0; + lv["h3"] = 0; + lv["h4"] = 0; + lv["h5"] = 0; + lv["h6"] = 0; + lv["h7"] = 0; + collapsed_lev["h0"] = 1; + object["lcn"] = to!string(collapsed_lev["h0"]); + break; + case "B": + collapsed_lev["h1"] = collapsed_lev["h0"] + 1; + object["lcn"] = to!string(collapsed_lev["h1"]); + object["lvn"]="1"; + lv["lv"] = 1; + lv["h1"]++; + lv["h2"] = 0; + lv["h3"] = 0; + lv["h4"] = 0; + lv["h5"] = 0; + lv["h6"] = 0; + lv["h7"] = 0; + break; + case "C": + collapsed_lev["h2"] = collapsed_lev["h1"] + 1; + object["lcn"] = to!string(collapsed_lev["h2"]); + object["lvn"]="2"; + lv["lv"] = 2; + lv["h2"]++; + lv["h3"] = 0; + lv["h4"] = 0; + lv["h5"] = 0; + lv["h6"] = 0; + lv["h7"] = 0; + break; + case "D": + collapsed_lev["h3"] = collapsed_lev["h2"] + 1; + object["lcn"] = to!string(collapsed_lev["h3"]); + object["lvn"]="3"; + lv["lv"] = 3; + lv["h3"]++; + lv["h4"] = 0; + lv["h5"] = 0; + lv["h6"] = 0; + lv["h7"] = 0; + break; + case "1": + if (lv["h3"] > 0) { + collapsed_lev["h4"] = collapsed_lev["h3"] + 1; + } else if (lv["h2"] > 0) { + collapsed_lev["h4"] = collapsed_lev["h2"] + 1; + } else if (lv["h1"] > 0) { + collapsed_lev["h4"] = collapsed_lev["h1"] + 1; + } else if (lv["h0"] > 0) { + collapsed_lev["h4"] = collapsed_lev["h0"] + 1; + } + object["lcn"] = to!string(collapsed_lev["h4"]); + object["lvn"]="4"; + lv["lv"] = 4; + lv["h4"]++; + lv["h5"] = 0; + lv["h6"] = 0; + lv["h7"] = 0; + break; + case "2": + if (lv["h5"] > 0) { + object["lcn"] = to!string(collapsed_lev["h5"]); + } else if (lv["h4"] > 0) { + collapsed_lev["h5"] = collapsed_lev["h4"] + 1; + object["lcn"] = to!string(collapsed_lev["h5"]); + } + object["lvn"]="5"; + lv["lv"] = 5; + lv["h5"]++; + lv["h6"] = 0; + lv["h7"] = 0; + break; + case "3": + if (lv["h6"] > 0) { + object["lcn"] = to!string(collapsed_lev["h6"]); + } else if (lv["h5"] > 0) { + collapsed_lev["h6"] = collapsed_lev["h5"] + 1; + object["lcn"] = to!string(collapsed_lev["h6"]); + } + object["lvn"]="6"; + lv["lv"] = 6; + lv["h6"]++; + lv["h7"] = 0; + break; + case "4": + if (lv["h7"] > 0) { + object["lcn"] = to!string(collapsed_lev["h7"]); + } else if (lv["h6"] > 0) { + collapsed_lev["h7"] = collapsed_lev["h6"] + 1; + object["lcn"] = to!string(collapsed_lev["h7"]); + } + object["lvn"]="7"; + lv["lv"] = 7; + lv["h7"]++; + break; + default: + break; + } + debug(heading) { // heading + tell_l("yellow", strip(line)); + } + } else if (line_occur["para"] == 0) { + if (auto m = match(line, rgx.para_indent)) { + debug(paraindent) { // para indent + tell_l("blue", line); + } + ft["para"] = 1; + object["obj"] ~= line ~= "\n"; + indent_first = to!string(m.captures[1]); + indent_second = "0"; + bullet = false; + } else if (auto m = match(line, rgx.para_bullet)) { + debug(parabullet) { // para bullet + tell_l("blue", line); + } + ft["para"] = 1; + object["obj"] ~= line; + indent_first = "0"; + indent_second = "0"; + bullet = true; + } else if (auto m = match(line, rgx.para_indent_hang)) { + debug(paraindenthang) { // para indent hang + tell_l("blue", line); + } + ft["para"] = 1; + object["obj"] ~= line; + indent_first = to!string(m.captures[1]); + indent_second = to!string(m.captures[2]); + bullet = false; + } else if (auto m = match(line, rgx.para_bullet_indent)) { + debug(parabulletindent) { // para bullet indent + tell_l("blue", line); + } + ft["para"] = 1; + object["obj"] ~= line; + indent_first = to!string(m.captures[1]); + indent_second = "0"; + bullet = true; + } else { + ft["para"] = 1; + object["obj"] ~= line; + indent_first = "0"; + indent_second = "0"; + bullet = false; + } + line_occur["para"]++; + } + } else if (line_occur["header_make"] > 0) { + debug(header) { // para + tell_l("red", line); + } + object["obj"] ~= line ~= "\n"; + line_occur["header_make"]++; + } else if (line_occur["header_metadata"] > 0) { + debug(header) { // para + tell_l("red", line); + } + object["obj"] ~= line ~= "\n"; + line_occur["header_metadata"]++; + } else if (line_occur["heading"] > 0) { + debug(heading) { // heading + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; + line_occur["heading"]++; + } else if (line_occur["para"] > 0) { + debug(para) { // para + tell_l("blue", line); + } + object["obj"] ~= line; + line_occur["para"]++; + } + } + } else if (ft["blocks"] == 2) { + assert( + line.empty, + "line should be empty" + ); + assert( + (ft["blocks"] == 2), + "code block status: closed" + ); + assertions_flag_types_block_status_none_or_closed(ft); + if (ft["code"] == 2) { + ocn = ocn_emit(ft["ocn_status"]); + object["bookindex"] = + ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = + bkidx_hash(object["bookindex"], ocn); + object["is"] = "code"; + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + ft["blocks"] = 0; + ft["code"] = 0; + } else if (ft["poem"] == 2) { + object["bookindex"] = + ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = + bkidx_hash(object["bookindex"], ocn); + object["is"] = "verse"; // check also + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block_ocn_string( + "poem", + "", + (ocn_poem["start"], ocn_poem["end"]), + node + ); // bookindex + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + ft["blocks"] = 0; + ft["poem"] = 0; + } else if (ft["table"] == 2) { + ocn = ocn_emit(ft["ocn_status"]); + object["bookindex"] = + ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = + bkidx_hash(object["bookindex"], ocn); + object["is"] = "table"; + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + ft["blocks"] = 0; + ft["table"] = 0; + } else if (ft["group"] == 2) { + ocn = ocn_emit(ft["ocn_status"]); + object["bookindex"] = ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = bkidx_hash(object["bookindex"], ocn); + object["is"] = "group"; + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + ft["blocks"] = 0; + ft["group"] = 0; + } else if (ft["block"] == 2) { + ocn = ocn_emit(ft["ocn_status"]); + object["bookindex"] = ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = bkidx_hash(object["bookindex"], ocn); + object["is"] = "block"; + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + ft["blocks"] = 0; + ft["block"] = 0; + } else if (ft["quote"] == 2) { + ocn = ocn_emit(ft["ocn_status"]); + object["bookindex"] = ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = + bkidx_hash(object["bookindex"], ocn); + object["is"] = "quote"; + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + ft["blocks"] = 0; + ft["quote"] = 0; + } + } else { + assert( + line.empty, + "line should be empty" + ); + assert( + (ft["blocks"] == 0), + "code block status: none" + ); + if ((ft["header_make"] == 1) + && (line_occur["header_make"] > 0)) { + auto dochead_metadata_and_make = + set_header.header_metadata_and_make_jsonstr(strip(object["obj"]), dochead_metadata, dochead_make); + static assert(!isTypeTuple!(dochead_metadata_and_make)); + dochead_metadata = dochead_metadata_and_make[0]; + dochead_make = dochead_metadata_and_make[1]; + line_occur["header_make"] = 0; + line_occur["header_metadata"] = 0; + line_occur["heading"] = 0; + line_occur["para"]= 0; + ft["header"] = 0; + ft["heading"] = 0; + ft["para"] = 0; + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + } else if ((ft["header_metadata"] == 1) + && (line_occur["header_metadata"] > 0)) { + auto dochead_metadata_and_make = + set_header.header_metadata_and_make_jsonstr(strip(object["obj"]), dochead_metadata, dochead_make); + static assert(!isTypeTuple!(dochead_metadata_and_make)); + dochead_metadata = dochead_metadata_and_make[0]; + dochead_make = dochead_metadata_and_make[1]; + line_occur["header_make"] = 0; + line_occur["header_metadata"] = 0; + line_occur["heading"] = 0; + line_occur["para"]= 0; + ft["header"] = 0; + ft["header_make"] = 0; + ft["header_metadata"] = 0; + ft["heading"] = 0; + ft["para"] = 0; + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + } else if ((ft["heading"] == 1) + && (line_occur["heading"] > 0)) { + ocn = ocn_emit(ft["ocn_status"]); + object["bookindex"] = + ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = + bkidx_hash(object["bookindex"], ocn); + object["is"] = "heading"; + node = + node_jstr_heading( + object["lvn"], + object["lcn"], + ocn, + counter, + heading_pointer, + object["is"] + ); // heading + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + heading_pointer++; + contents_arbitrary_max_length_set[counter] = + set_oa.contents_heading( + ft["ocn_status"], + object["markup"], + object["attrib"], + ocn, object["lev"], + object["lvn"], + object["lcn"] + ); + debug(objectrelated1) { // check + tell_l("blue", line); + } + line_occur["header_make"] = 0; + line_occur["header_metadata"] = 0; + line_occur["heading"] = 0; + line_occur["para"] = 0; + ft["header"] = 0; + ft["header_make"] = 0; + ft["header_metadata"] = 0; + ft["heading"] = 0; + ft["para"] = 0; + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("lev"); + object.remove("lvn"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + } else if ((ft["para"] == 1) && (line_occur["para"] > 0)) { + ocn = ocn_emit(ft["ocn_status"]); + object["bookindex"] = + ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = + bkidx_hash(object["bookindex"], ocn); + object["is"] = "para"; + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_para( + object["is"], + object["markup"], + object["attrib"], + ocn, + indent_first, + indent_second, + bullet + ); + line_occur["header_make"] = 0; + line_occur["header_metadata"] = 0; + line_occur["heading"] = 0; + line_occur["para"] = 0; + ft["header"] = 0; + ft["header_make"] = 0; + ft["header_metadata"] = 0; + ft["heading"] = 0; + ft["para"] = 0; + indent_first = "0"; + indent_second = "0"; + bullet = false; + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + } else { + assert( + line == null, + "line variable should be empty, should not occur" + ); + } + } + } + } + if (((contents_arbitrary_max_length_set[counter-1]["is"] == "para") + || (contents_arbitrary_max_length_set[counter-1]["is"] == "heading")) + && (counter-1 > previous_count)) { + if (match(contents_arbitrary_max_length_set[counter-1]["obj"], + rgx.inline_notes_delimiter_al_regular_number_note)) { + previous_count=counter-1; + notesection.gather_notes_for_endnote_section(contents_arbitrary_max_length_set, counter-1); + } + } + } + debug(objectrelated2) { // check + tell_l("blue", line); + } +/* + Backmatter: + * endnotes + * glossary + * references / bibliography + * book index +*/ + obj_im.obj_inline_markup("doc_end_reset", ""); + auto en_tuple = notesection.endnote_objects(ocn); + static assert(!isTypeTuple!(en_tuple)); + auto endnotes = en_tuple[0]; + ocn = en_tuple[1]; +debug(endnotes) { + writeln(__LINE__, " ", endnotes.length); + foreach (n; endnotes) { + writeln(n); + } +} + auto contents = contents_arbitrary_max_length_set[0..counter].dup; + auto biblio_unsorted_incomplete = biblio_arr_json[0..count_biblio_entry].dup; + auto biblio = new Bibliography(); + auto biblio_ordered = biblio.bibliography(biblio_unsorted_incomplete); + auto bi = new BookIndexReportSection(); + auto bi_tuple = + bi.bookindex_build_section(bookindex_unordered_hashes, ocn); + static assert(!isTypeTuple!(bi_tuple)); + auto bookindex = bi_tuple[0]; + auto document = contents ~ endnotes ~ bookindex; + ocn = bi_tuple[1]; +debug(bookindex) { // bookindex + foreach (bi_entry; bookindex) { + writeln(bi_entry["obj"]); + } +} +debug(heading) { // heading + string spc; + foreach (o; document) { + if (o["is"] == "heading") { + switch (o["lvn"]) { + case "0": + spc=""; + break; + case "1": + spc=" "; + break; + case "2": + spc=" "; + break; + case "3": + spc=" "; + break; + case "4": + spc=" "; + break; + case "5": + spc=" "; + break; + case "6": + spc=" "; + break; + case "7": + spc=" "; + break; + case "8": + spc=" "; + break; + default: + spc=""; + break; + } + writeln( + spc, "* ", " ", + strip(o["obj"]), + "\n ", + o["attrib"] + ); + } + } +} + destroy(contents); + destroy(endnotes); + destroy(bookindex); + auto t = + tuple( + document, + dochead_make, + dochead_metadata, + bookindex_unordered_hashes, + biblio_ordered + ); + return t; + } + } +} diff --git a/lib/sdp/ao_assertions.d b/lib/sdp/ao_assertions.d new file mode 100644 index 0000000..7cd1713 --- /dev/null +++ b/lib/sdp/ao_assertions.d @@ -0,0 +1,222 @@ +/* +#+OPTIONS: ^:nil _:nil#+OPTIONS: ^:nil _:nil +* sisu_assertions.d +*/ +mixin template AssertionsOnMarkupDocumentStructure() { + auto assertions_doc_structure(string[string] object, uint[string] lv) { + if (lv["h3"] > 0) { + assert(lv["h0"] > 0); + assert(lv["h1"] > 0); + assert(lv["h2"] > 0); + assert(lv["h3"] > 0); + } else if (lv["h2"] > 0) { + assert(lv["h0"] > 0); + assert(lv["h1"] > 0); + assert(lv["h2"] > 0); + assert(lv["h3"] == 0); + } else if (lv["h1"] > 0) { + assert(lv["h0"] > 0); + assert(lv["h1"] > 0); + assert(lv["h2"] == 0); + assert(lv["h3"] == 0); + } else if (lv["h0"] > 0) { + assert(lv["h0"] > 0); + assert(lv["h1"] == 0); + assert(lv["h2"] == 0); + assert(lv["h3"] == 0); + } else { + assert(lv["h0"] == 0); + assert(lv["h1"] == 0); + assert(lv["h2"] == 0); + assert(lv["h3"] == 0); + } + if (lv["h7"] > 0) { + assert(lv["h4"] > 0); + assert(lv["h5"] > 0); + assert(lv["h6"] > 0); + assert(lv["h7"] > 0); + } else if (lv["h6"] > 0) { + assert(lv["h4"] > 0); + assert(lv["h5"] > 0); + assert(lv["h6"] > 0); + assert(lv["h7"] == 0); + } else if (lv["h5"] > 0) { + assert(lv["h4"] > 0); + assert(lv["h5"] > 0); + assert(lv["h6"] == 0); + assert(lv["h7"] == 0); + } else if (lv["h4"] > 0) { + assert(lv["h4"] > 0); + assert(lv["h5"] == 0); + assert(lv["h6"] == 0); + assert(lv["h7"] == 0); + } else { + assert(lv["h4"] == 0); + assert(lv["h5"] == 0); + assert(lv["h6"] == 0); + assert(lv["h7"] == 0); + } + if (lv["h0"] == 0) { + assert(lv["h0"] == 0); + assert(lv["h1"] == 0); + assert(lv["h2"] == 0); + assert(lv["h3"] == 0); + assert(lv["h4"] == 0); + assert(lv["h5"] == 0); + assert(lv["h6"] == 0); + assert(lv["h7"] == 0); + } + if (lv["h1"] == 0) { + assert(lv["h1"] == 0); + assert(lv["h2"] == 0); + assert(lv["h3"] == 0); + } + if (lv["h2"] == 0) { + assert(lv["h2"] == 0); + assert(lv["h3"] == 0); + } + if (lv["h3"] == 0) { + assert(lv["h3"] == 0); + } + if (lv["h4"] == 0) { + assert(lv["h4"] == 0); + assert(lv["h5"] == 0); + assert(lv["h6"] == 0); + assert(lv["h7"] == 0); + } + if (lv["h5"] == 0) { + assert(lv["h5"] == 0); + assert(lv["h6"] == 0); + assert(lv["h7"] == 0); + } + if (lv["h6"] == 0) { + assert(lv["h6"] == 0); + assert(lv["h7"] == 0); + } + if (lv["h6"] == 0) { + assert(lv["h7"] == 0); + } + switch (to!string(object["lev"])) { + case "A": + if (lv["h0"]==0) { + assert(lv["h0"] == 0); + assert(lv["h1"] == 0); + assert(lv["h2"] == 0); + assert(lv["h3"] == 0); + assert(lv["h4"] == 0); + assert(lv["h5"] == 0); + assert(lv["h6"] == 0); + assert(lv["h7"] == 0); + } else { // (lv["h0"] >0) + assert(lv["h0"] == 0,"error should not enter level A a second time"); + } + break; + case "B": + if (lv["h1"]==0) { + assert(lv["h0"] > 0); + assert(lv["h1"] == 0); // + assert(lv["h2"] == 0); + assert(lv["h3"] == 0); + } else { // (lv["h1"] >0) + assert(lv["h0"] > 0); + assert(lv["h1"] > 0); // + } + break; + case "C": + if (lv["h2"]==0) { + assert(lv["h0"] > 0); + assert(lv["h1"] > 0); + assert(lv["h2"] == 0); // + assert(lv["h3"] == 0); + } else { // (lv["h2"] > 0) + assert(lv["h0"] > 0); + assert(lv["h1"] > 0); + assert(lv["h2"] > 0); // + } + break; + case "D": + if (lv["h3"]==0) { + assert(lv["h0"] > 0); + assert(lv["h1"] > 0); + assert(lv["h2"] > 0); + assert(lv["h3"] == 0); // + } else { // (lv["h3"] >0) + assert(lv["h0"] > 0); + assert(lv["h1"] > 0); + assert(lv["h2"] > 0); + assert(lv["h3"] > 0); + } + break; + case "1": + if (lv["h4"]==0) { + assert(lv["h0"] > 0); + assert(lv["h4"] == 0); // + } else { // (lv["h4"] >0) + assert(lv["h0"] > 0); + assert(lv["h4"] > 0); // + } + break; + case "2": + if (lv["h5"]==0) { + assert(lv["h0"] > 0); + assert(lv["h4"] > 0); + assert(lv["h5"] == 0); // + } else { // (lv["h5"] >0) + assert(lv["h0"] > 0); + assert(lv["h4"] > 0); + assert(lv["h5"] > 0); // + } + break; + case "3": + if (lv["h6"]==0) { + assert(lv["h0"] > 0); + assert(lv["h4"] > 0); + assert(lv["h5"] > 0); + assert(lv["h6"] == 0); // + } else { // (lv["h6"] >0) + assert(lv["h0"] > 0); + assert(lv["h4"] > 0); + assert(lv["h5"] > 0); + assert(lv["h6"] > 0); // + } + break; + case "4": + if (lv["h7"]==0) { + assert(lv["h0"] > 0); + assert(lv["h4"] > 0); + assert(lv["h5"] > 0); + assert(lv["h6"] > 0); + assert(lv["h7"] == 0); // + } else { // (lv["h7"] >0) + assert(lv["h0"] > 0); + assert(lv["h4"] > 0); + assert(lv["h5"] > 0); + assert(lv["h6"] > 0); + assert(lv["h7"] > 0); // + } + break; + default: + break; + } + } +} +mixin template AssertionsOnBlocks() { + auto assertions_flag_types_block_status_none_or_closed(int[string] flag_type) { + assert( + (flag_type["code"] == 0) || (flag_type["code"] == 2), + "code block status: none or closed"); + assert( + (flag_type["poem"] == 0) || (flag_type["poem"] == 2), + "poem status: none or closed"); + assert( + (flag_type["table"] == 0) || (flag_type["table"] == 2), + "table status: none or closed"); + assert( + (flag_type["group"] == 0) || (flag_type["group"] == 2), + "group block status: none or closed"); + assert( + (flag_type["block"] == 0) || (flag_type["block"] == 2), + "block status: none or closed"); + } +} + diff --git a/lib/sdp/ao_defaults.d b/lib/sdp/ao_defaults.d new file mode 100644 index 0000000..8996587 --- /dev/null +++ b/lib/sdp/ao_defaults.d @@ -0,0 +1,257 @@ +/* +#+OPTIONS: ^:nil _:nil#+OPTIONS: ^:nil _:nil +* sisu_defaults.d +*/ +template SiSUheader() { + auto header_make_jsonstr = `{ + "make": { + "cover_image" : "", + "home_button_image" : "", + "home_button_text" : "", + "footer" : "", + "headings" : "", + "num_top" : "", + "breaks" : "", + "substitute" : "", + "bold" : "", + "italics" : "", + "emphasis" : "", + "texpdf_font" : "", + "css" : "" + } + }`; + auto header_metadata_jsonstr = `{ + "creator": { + "author" : "", + "translator" : "", + "illustrator" : "" + }, + "title": { + "main" : "", + "sub" : "", + "full" : "", + "language" : "", + "edition" : "", + "note" : "" + }, + "rights": { + "copyright" : "", + "illustrations" : "", + "license" : "", + "cover" : "" + }, + "date": { + "published" : "", + "created" : "", + "issued" : "", + "available" : "", + "valid" : "", + "modified" : "", + "added_to_site" : "" + }, + "original": { + "title" : "", + "language" : "", + "source" : "" + }, + "classify": { + "topic_register" : "", + "subject" : "", + "keywords" : "", + "loc" : "", + "dewey" : "" + }, + "identifier": { + "oclc" : "", + "pg" : "", + "isbn" : "" + }, + "notes": { + "abstract" : "", + "description" : "" + }, + "publisher": { + "name" : "" + }, + "links": { + } + }`; // links + auto pointer_head_main = + [ + "creator", + "title", + "rights", + "date", + "original", + "classify", + "identifier", + "notes", + "make", + "links" + ]; + auto pointer_head_sub_creator = + [ + "author", + "translator", + "illustrator", + "cover" + ]; + auto pointer_head_sub_title = + [ + "main", + "sub", + "full", + "language", + "edition", + "note" + ]; + auto pointer_head_sub_rights = + [ + "copyright", + "illustrations", + "license" + ]; + auto pointer_head_sub_date = + [ + "published", + "created", + "issued", + "valid", + "modified", + "added_to_site" + ]; + auto pointer_head_sub_original = + [ + "title", + "language", + "source" + ]; + auto pointer_head_sub_classify = + [ + "topic_register", + "subject", + "keywords", + "loc", + "dewey" + ]; + auto pointer_head_sub_identifier = + [ + "oclc", + "pg", + "isbn" + ]; + auto pointer_head_sub_notes = + [ + "abstract", + "description" + ]; + auto pointer_head_sub_publisher = + [ "name" ]; + auto pointer_head_sub_make = + [ + "cover_image", + "home_button_image", + "home_button_text", + "footer", "headings", + "num_top", + "breaks", + "substitute", + "bold", + "italics", + "emphasis", + "texpdf_font", + "css" + ]; + auto config_jsonstr = `{ + }`; +} +/* +* RgxInitFlags +*/ +template SiSUrgxInitFlags() { +// mixin template RgxInit() { + int[string] flag_type = [ + "make_headings" : 0, + "header_make" : 0, + "header_metadata" : 0, + "heading" : 0, + "heading_biblio" : 0, + "para" : 0, + "blocks" : 0, // 0..2 generic + "code" : 0, // 0..2 + "poem" : 0, // 0..2 + "table" : 0, // 0..2 + "group" : 0, // 0..2 + "block" : 0, // 0..2 + "quote" : 0, // 0..2 + "verse_new" : 0, + "curly_code" : 0, + "curly_poem" : 0, + "curly_table" : 0, + "curly_group" : 0, + "curly_block" : 0, + "curly_quote" : 0, + "tic_code" : 0, + "tic_poem" : 0, + "tic_table" : 0, + "tic_group" : 0, + "tic_block" : 0, + "tic_quote" : 0, + "ocn_status" : 0, // 0 ocn; 1 no ocn; 2 no ocn & dummy headings + "ocn_status_multi_obj" : 0, // 0 ocn; 1 no ocn; 2 no ocn & dummy headings + "book_index" : 0, + ]; +} +template SiSUbiblio() { + auto biblio_entry_tags_jsonstr = `{ + "is" : "", + "sortby_deemed_author_year_title" : "", + "deemed_author" : "", + "author_raw" : "", + "author" : "", + "author_arr" : [ "" ], + "editor_raw" : "", + "editor" : "", + "editor_arr" : [ "" ], + "title" : "", + "subtitle" : "", + "fulltitle" : "", + "language" : "", + "trans" : "", + "src" : "", + "journal" : "", + "in" : "", + "volume" : "", + "edition" : "", + "year" : "", + "place" : "", + "publisher" : "", + "url" : "", + "pages" : "", + "note" : "", + "short_name" : "", + "id" : "" + }`; // is: book, article, magazine, newspaper, blog, other + auto biblio_tag_map = [ + "au" : "author_raw", + "ed" : "editor_raw", + "ti" : "fulltitle", + "lng" : "language", + "jo" : "journal", + "vol" : "volume", + "edn" : "edition", + "yr" : "year", + "pl" : "place", + "pb" : "publisher", + "pub" : "publisher", + "pg" : "pages", + "pgs" : "pages", + "sn" : "short_name" + ]; // is: book, article, magazine, newspaper, blog, other +} +template InternalMarkup() { + class InternalMarkup { + auto en_a_o = "【"; auto en_a_c = "】"; + auto en_b_o = "〖"; auto en_b_c = "〗"; + } +} diff --git a/lib/sdp/ao_emitter.d b/lib/sdp/ao_emitter.d new file mode 100644 index 0000000..dd7103d --- /dev/null +++ b/lib/sdp/ao_emitter.d @@ -0,0 +1,1322 @@ +/* +#+OPTIONS: ^:nil _:nil#+OPTIONS: ^:nil _:nil +* sisu_emitter.d +*/ +mixin template Emitters() { + mixin InternalMarkup; + class CLI { + string[string] extract_actions(string cmdlnins, string[string] actions) + in { } + body { + switch (cmdlnins) { + case "--no-assert": + actions["assert"] = "no"; + break; + default: + break; + } + return actions; + } + } + class OCNemitter : AssertOCN { + int ocn, ocn_; + int ocn_emitter(int ocn_status_flag) + in { assert(ocn_status_flag <= 2); } + body { + if (ocn_status_flag == 0) { + ocn=++ocn_; + } else { + ocn=0; + } + return ocn; + } + invariant() { + } + } + class ObjAttributes { + string[string] obj_txt; + string para_and_blocks(string obj_txt_in) + in { } + body { + auto rgx = new Rgx(); + obj_txt["munge"]=obj_txt_in; + if (match(obj_txt_in, rgx.para_bullet)) { + obj_txt["attrib"] =" \"bullet\": \"true\"," + ~ " \"indent_first\": 0," + ~ " \"indent_rest\": 0,"; + } else if (auto m = match(obj_txt_in, rgx.para_bullet_indent)) { + obj_txt["attrib"] =" \"bullet\": \"true\"," + ~ " \"indent_first\": " ~ to!string(m.captures[1]) ~ "," + ~ " \"indent_rest\": " ~ to!string(m.captures[1]) ~ ","; + } else if (auto m = match(obj_txt_in, rgx.para_indent_hang)) { + obj_txt["attrib"] =" \"bullet\": \"false\"," + ~ " \"indent_first\": " ~ to!string(m.captures[1]) ~ "," + ~ " \"indent_rest\": " ~ to!string(m.captures[2]) ~ ","; + } else if (auto m = match(obj_txt_in, rgx.para_indent)) { + obj_txt["attrib"] =" \"bullet\": \"false\"," + ~ " \"indent_first\": " ~ to!string(m.captures[1]) ~ "," + ~ " \"indent_rest\": " ~ to!string(m.captures[1]) ~ ","; + } else { + obj_txt["attrib"] =" \"bullet\": \"false\"," + ~ " \"indent_first\": 0," + ~ " \"indent_rest\": 0,"; + } + return obj_txt["attrib"]; + } + string para(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + obj_txt["attrib"] = " \"use\": \"content\"," + ~ " \"of\": \"para\"," + ~ " \"is\": \"para\""; + return obj_txt["attrib"]; + } + invariant() { + } + string heading(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + obj_txt["attrib"] = " \"use\": \"content\"," + ~ " \"of\": \"para\"," + ~ " \"is\": \"heading\""; + return obj_txt["attrib"]; + } + invariant() { + } + string header_make(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + obj_txt["attrib"] = " \"use\": \"head\"," + ~ " \"of\": \"header\"," + ~ " \"is\": \"header_make\""; + return obj_txt["attrib"]; + } + invariant() { + } + string header_metadata(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + obj_txt["attrib"] = " \"use\": \"head\"," + ~ " \"of\": \"header\"," + ~ " \"is\": \"header_metadata\""; + return obj_txt["attrib"]; + } + invariant() { + } + string code(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + obj_txt["attrib"] = " \"use\": \"content\"," + ~ " \"of\": \"block\"," + ~ " \"is\": \"code\""; + return obj_txt["attrib"]; + } + invariant() { + } + string group(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + obj_txt["attrib"] = " \"use\": \"content\"," + ~ " \"of\": \"block\"," + ~ " \"is\": \"group\""; + return obj_txt["attrib"]; + } + invariant() { + } + string block(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + obj_txt["attrib"] = " \"use\": \"content\"," + ~ " \"of\": \"block\"," + ~ " \"is\": \"block\""; + return obj_txt["attrib"]; + } + invariant() { + } + string verse(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + obj_txt["attrib"] = " \"use\": \"content\"," + ~ " \"of\": \"block\"," + ~ " \"is\": \"verse\""; + return obj_txt["attrib"]; + } + invariant() { + } + string quote(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + obj_txt["attrib"] = " \"use\": \"content\"," + ~ " \"of\": \"block\"," + ~ " \"is\": \"quote\""; + return obj_txt["attrib"]; + } + invariant() { + } + string table(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + obj_txt["attrib"] = " \"use\": \"content\"," + ~ " \"of\": \"block\"," + ~ " \"is\": \"table\""; + return obj_txt["attrib"]; + } + invariant() { + } + string comment(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + obj_txt["attrib"] = " \"use\": \"comment\"," + ~ " \"of\": \"comment\"," + ~ " \"is\": \"comment\""; + return obj_txt["attrib"]; + } + invariant() { + } + } + class ObjInlineMarkupMunge { + string[string] obj_txt; + int n_foot, n_foot_reg, n_foot_sp_asterisk, n_foot_sp_plus; + string obj_txt_out, tail, note; + private auto initialize_note_numbers() { + n_foot = 0; + n_foot_reg = 0; + n_foot_sp_asterisk = 0; + n_foot_sp_plus = 0; + } + private auto object_notes_(string obj_txt_in) + in { } + body { + auto rgx = new Rgx(); + auto mkup = new InternalMarkup(); + obj_txt_out = ""; + tail = ""; + obj_txt_in = replaceAll( + obj_txt_in, + rgx.inline_notes_curly_sp_asterisk, + (mkup.en_a_o ~ "*" ~ " $1" ~ mkup.en_a_c) + ); + obj_txt_in = + replaceAll( + obj_txt_in, + rgx.inline_notes_curly_sp_plus, + (mkup.en_a_o ~ "+" ~ " $1" ~ mkup.en_a_c) + ); + obj_txt_in = + replaceAll( + obj_txt_in, + rgx.inline_notes_curly, + (mkup.en_a_o ~ " $1" ~ mkup.en_a_c) + ); + if (match(obj_txt_in, rgx.inline_notes_al_gen)) { + foreach(m; matchAll(obj_txt_in, rgx.inline_text_and_note_al)) { + if (match(obj_txt_in, rgx.inline_al_delimiter_open_asterisk)) { + n_foot_sp_asterisk++; + n_foot=n_foot_sp_asterisk; + } else if (match(obj_txt_in, rgx.inline_al_delimiter_open_plus)) { + n_foot_sp_plus++; + n_foot=n_foot_sp_plus; + } else { + n_foot_reg++; + n_foot=n_foot_reg; + } + obj_txt_out ~= replaceFirst( + m.hit, + rgx.inline_al_delimiter_open_regular, + (mkup.en_a_o ~ to!string(n_foot)) + ); + tail = m.post; + } + } else { + obj_txt_out = obj_txt_in; + } + debug(footnotes) { + writeln(obj_txt_out, tail); + } + obj_txt_out = obj_txt_out ~ tail; + debug(footnotesdone) { + foreach(m; matchAll(obj_txt_out, + (mkup.en_a_o ~ `\s*(.+?)` ~ mkup.en_a_c))) { + writeln(m.captures[1]); + writeln(m.hit); + } + } + return obj_txt_out; + } + string para(string obj_txt_in) + in { } + body { + auto rgx = new Rgx(); + obj_txt["munge"]=obj_txt_in; + obj_txt["munge"]=replaceFirst(obj_txt["munge"], rgx.para_attribs, ""); + obj_txt["munge"]=replaceFirst(obj_txt["munge"], rgx.ocn_off_all, ""); + obj_txt["munge"]=object_notes_(obj_txt["munge"]); + debug(munge) { + writeln(__LINE__); + writeln(obj_txt_in); + writeln(__LINE__); + writeln(to!string(obj_txt["munge"])); + } + return obj_txt["munge"]; + } + string heading(string obj_txt_in) + in { } + body { + auto rgx = new Rgx(); + obj_txt["munge"]=obj_txt_in; + obj_txt["munge"]=replaceFirst(obj_txt["munge"], rgx.heading, ""); + obj_txt["munge"]=replaceFirst(obj_txt["munge"], rgx.ocn_off_all, ""); + obj_txt["munge"]=object_notes_(obj_txt["munge"]); + debug(munge) { + writeln(__LINE__); + writeln(obj_txt_in); + writeln(__LINE__); + writeln(to!string(obj_txt["munge"])); + } + return obj_txt["munge"]; + } + invariant() { + } + string header_make(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + return obj_txt["munge"]; + } + invariant() { + } + string header_metadata(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + return obj_txt["munge"]; + } + invariant() { + } + string code(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + return obj_txt["munge"]; + } + invariant() { + } + string group(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + obj_txt["munge"]=object_notes_(obj_txt["munge"]); + return obj_txt["munge"]; + } + invariant() { + } + string block(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + obj_txt["munge"]=object_notes_(obj_txt["munge"]); + return obj_txt["munge"]; + } + invariant() { + } + string verse(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + obj_txt["munge"]=object_notes_(obj_txt["munge"]); + return obj_txt["munge"]; + } + invariant() { + } + string quote(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + return obj_txt["munge"]; + } + invariant() { + } + string table(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + return obj_txt["munge"]; + } + invariant() { + } + string comment(string obj_txt_in) + in { } + body { + obj_txt["munge"]=obj_txt_in; + return obj_txt["munge"]; + } + invariant() { + } + } + class ObjInlineMarkup : AssertObjInlineMarkup { + auto munge = new ObjInlineMarkupMunge(); + string[string] obj_txt; + string obj_inline_markup(string obj_is_, string obj_raw) + in { } + body { + obj_txt["munge"]=obj_raw.dup; + obj_txt["munge"]=(match(obj_is_, ctRegex!(`verse|code`))) + ? obj_txt["munge"] + : strip(obj_txt["munge"]); + switch (obj_is_) { + case "header_make": + obj_txt["munge"]=munge.header_make(obj_txt["munge"]); + break; + case "header_metadata": + obj_txt["munge"]=munge.header_metadata(obj_txt["munge"]); + break; + case "heading": + obj_txt["munge"]=munge.heading(obj_txt["munge"]); + break; + case "para": + obj_txt["munge"]=munge.para(obj_txt["munge"]); + break; + case "code": + obj_txt["munge"]=munge.code(obj_txt["munge"]); + break; + case "group": + obj_txt["munge"]=munge.group(obj_txt["munge"]); + break; + case "block": + obj_txt["munge"]=munge.block(obj_txt["munge"]); + break; + case "verse": + obj_txt["munge"]=munge.verse(obj_txt["munge"]); + break; + case "quote": + obj_txt["munge"]=munge.quote(obj_txt["munge"]); + break; + case "table": + obj_txt["munge"]=munge.table(obj_txt["munge"]); + break; + case "comment": + obj_txt["munge"]=munge.comment(obj_txt["munge"]); + break; + case "doc_end_reset": + munge.initialize_note_numbers(); + break; + default: + break; + } + return obj_txt["munge"]; + } + invariant() { + } + } + class ObjAttrib : AssertObjAttrib { +// auto sink = appender!(char[])(); + auto attrib = new ObjAttributes(); + string[string] obj_attrib; + string obj_attributes(string obj_is_, string obj_raw, string node) + in { } + body { + scope(exit) { + destroy(obj_raw); + destroy(node); + } + JSONValue node_j = parseJSON(node); + obj_attrib.remove("json"); + obj_attrib["json"] ="{"; + switch (obj_is_) { + case "header_make": + obj_attrib["json"] ~= attrib.header_make(obj_raw); + break; + case "header_metadata": + obj_attrib["json"] ~= attrib.header_metadata(obj_raw); + break; + case "heading": + obj_attrib["json"] ~= attrib.heading(obj_raw); // + break; + case "para": + obj_attrib["json"] ~= attrib.para_and_blocks(obj_raw) + ~ attrib.para(obj_raw); + break; + case "code": + obj_attrib["json"] ~= attrib.code(obj_raw); + break; + case "group": + obj_attrib["json"] ~= attrib.para_and_blocks(obj_raw) + ~ attrib.group(obj_raw); + break; + case "block": + obj_attrib["json"] ~= attrib.para_and_blocks(obj_raw) + ~ attrib.block(obj_raw); + break; + case "verse": + obj_attrib["json"] ~= attrib.verse(obj_raw); + break; + case "quote": + obj_attrib["json"] ~= attrib.quote(obj_raw); + break; + case "table": + obj_attrib["json"] ~= attrib.table(obj_raw); + break; + case "comment": + obj_attrib["json"] ~= attrib.comment(obj_raw); + break; + default: + obj_attrib["json"] ~= attrib.para(obj_raw); + break; + } + obj_attrib["json"] ~=" }"; + JSONValue oa_j = parseJSON(obj_attrib["json"]); + assert( + (oa_j.type == JSON_TYPE.OBJECT) && + (node_j.type == JSON_TYPE.OBJECT) + ); + if (obj_is_ == "heading") { + oa_j.object["ocn"] = node_j["ocn"]; + oa_j.object["lvn"] = node_j["lvn"]; + oa_j.object["lcn"] = node_j["lcn"]; + oa_j.object["heading_pointer"] = + node_j["heading_pointer"]; // check + oa_j.object["doc_object_pointer"] = + node_j["doc_object_pointer"]; // check + } + oa_j.object["parent_ocn"] = node_j["parent_ocn"]; + oa_j.object["parent_lvn"] = node_j["parent_lvn"]; + obj_attrib["json"] = oa_j.toString(); + debug(structattrib) { + if (oa_j["is"].str() == "heading") { + writeln(obj_attrib["json"]); + writeln( + "is: ", oa_j["is"].str(), + "; ocn: ", oa_j["ocn"].integer() + ); + } + } + return obj_attrib["json"]; + } + invariant() { + } + } + class HeaderDocMetadataMakeJson { + auto rgx = new Rgx(); + string hm, hs; + auto header_metadata_and_make_jsonstr( + string header, + JSONValue[string] dochead_metadata, + JSONValue[string] dochead_make + ) + in { } + body { + scope(exit) { + destroy(header); + destroy(dochead_metadata); + destroy(dochead_make); + } + if (auto t = match(header, rgx.head_main)) { + char[][] obj_spl = split( + cast(char[]) header, + rgx.line_delimiter_ws_strip + ); + auto hm = to!string(t.captures[1]); + if (match(hm, rgx.main_headers)) { + foreach (line; obj_spl) { + if (auto m = match(line, rgx.head_main)) { + if (!empty(m.captures[2])) { + if (hm == "creator") { + dochead_metadata[hm]["author"].str = + to!string(m.captures[2]); + } else if (hm == "title") { + dochead_metadata[hm]["main"].str = + to!string(m.captures[2]); + } else if (hm == "publisher") { + dochead_metadata[hm]["name"].str = + to!string(m.captures[2]); + } + } + } else if (auto s = match(line, rgx.head_sub)) { + if (!empty(s.captures[2])) { + auto hs = to!string(s.captures[1]); + if ((hm == "make" ) + && (dochead_make[hm].type() == JSON_TYPE.OBJECT)) { + switch (hm) { + case "make": + if (match(hs, rgx.subhead_make)) { + if (dochead_make[hm][hs].type() == JSON_TYPE.STRING) { + dochead_make[hm][hs].str = to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + default: + break; + } + } else if (dochead_metadata[hm].type() == JSON_TYPE.OBJECT) { + switch (hm) { + case "creator": + if (match(hs, rgx.subhead_creator)) { + if (dochead_metadata[hm][hs].type() == JSON_TYPE.STRING) { + dochead_metadata[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "title": + if (match(hs, rgx.subhead_title)) { + if ((hs == "subtitle") + && (dochead_metadata[hm]["sub"].type() == JSON_TYPE.STRING)) { + dochead_metadata[hm]["sub"].str = + to!string(s.captures[2]); + } else if (dochead_metadata[hm][hs].type() == JSON_TYPE.STRING) { + dochead_metadata[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "rights": + if (match(hs, rgx.subhead_rights)) { + if (dochead_metadata[hm][hs].type() == JSON_TYPE.STRING) { + dochead_metadata[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "date": + if (match(hs, rgx.subhead_date)) { + if (dochead_metadata[hm][hs].type() == JSON_TYPE.STRING) { + dochead_metadata[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "original": + if (match(hs, rgx.subhead_original)) { + if (dochead_metadata[hm][hs].type() == JSON_TYPE.STRING) { + dochead_metadata[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "classify": + if (match(hs, rgx.subhead_classify)) { + if (dochead_metadata[hm][hs].type() == JSON_TYPE.STRING) { + dochead_metadata[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "identifier": + if (match(hs, rgx.subhead_identifier)) { + if (dochead_metadata[hm][hs].type() == JSON_TYPE.STRING) { + dochead_metadata[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "notes": + if (match(hs, rgx.subhead_notes)) { + if (dochead_metadata[hm][hs].type() == JSON_TYPE.STRING) { + dochead_metadata[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "publisher": + if (match(hs, rgx.subhead_publisher)) { + if (dochead_metadata[hm][hs].type() == JSON_TYPE.STRING) { + dochead_metadata[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "links": + destroy(hm); + destroy(hs); + break; + default: + break; + } + } + } + } + } + } else { + writeln("not a valid header type:", hm); + } + } + auto t = tuple(dochead_metadata, dochead_make); + static assert(!isTypeTuple!(t)); + return t; + } + } + class HeaderMetadataMakeHash { + auto rgx = new Rgx(); + string header_main; + string[string] head; + string[string] header_topic_hash(string header) + in { } + body { + if (auto t = match(header, rgx.head_main)) { + char[][] obj_spl = split( + cast(char[]) header, + rgx.line_delimiter_ws_strip + ); + auto header_main = to!string(t.captures[1]); + head[header_main] = "{"; + foreach (line; obj_spl) { + if (auto m = match(line, rgx.head_main)) { + if (!empty(m.captures[2])) { + head[header_main] ~= + "\"" ~ header_main ~ + "\": \"" ~ + to!string(m.captures[2]) ~ + "\","; + } + } else if (auto s = match(line, rgx.head_sub)) { + head[header_main] ~= "\"" ~ s.captures[1] ~ "\":"; + if (!empty(s.captures[2])) { + head[header_main] ~= "\"" ~ s.captures[2] ~ "\","; + } + } + } + head[header_main] = replaceFirst( + head[header_main], + rgx.tailing_comma, + "" + ); + head[header_main] ~= "}"; + debug(headerjson) { + JSONValue j = parseJSON(head[header_main]); + assert( + (j.type == JSON_TYPE.OBJECT) + ); + } + } + return head; + } + invariant() { + } + } + class BookIndexNuggetHash : AssertBookIndexNuggetHash { + string main_term, sub_term, sub_term_bits; + uint ocn_offset, ocn_endpoint; + string[] ocns; + string[][string][string] bi; + string[][string][string] hash_nugget; + string[] bi_main_terms_split_arr; + string[][string][string] bookindex_nugget_hash(string bookindex, int ocn) + in { } + body { + auto rgx = new Rgx(); + if (!bookindex.empty) { + auto bi_main_terms_split_arr = + split(bookindex, rgx.bi_main_terms_split); + foreach (bi_main_terms_content; bi_main_terms_split_arr) { + auto bi_main_term_and_rest = + split(bi_main_terms_content, rgx.bi_main_term_plus_rest_split); + if (auto m = match( + bi_main_term_and_rest[0], + rgx.bi_term_and_ocns_match) + ) { + main_term = strip(m.captures[1]); + ocn_offset = to!uint(m.captures[2]); + ocn_endpoint=(ocn + ocn_offset); + ocns ~= (to!string(ocn) ~ "-" ~ to!string(ocn_endpoint)); + } else { + main_term = strip(bi_main_term_and_rest[0]); + ocns ~= to!string(ocn); + } + bi[main_term]["_a"] ~= ocns; + ocns=null; + if (bi_main_term_and_rest.length > 1) { + auto bi_sub_terms_split_arr = + split( + bi_main_term_and_rest[1], + rgx.bi_sub_terms_plus_ocn_offset_split + ); + foreach (sub_terms_bits; bi_sub_terms_split_arr) { + if (auto m = match(sub_terms_bits, rgx.bi_term_and_ocns_match)) { + sub_term = strip(m.captures[1]); + ocn_offset = to!uint(m.captures[2]); + ocn_endpoint=(ocn + ocn_offset); + ocns ~= (to!string(ocn) ~ " - " ~ to!string(ocn_endpoint)); + } else { + sub_term = strip(sub_terms_bits); + ocns ~= to!string(ocn); + } + if (!empty(sub_term)) { + bi[main_term][sub_term] ~= ocns; + } + ocns=null; + } + } + } + } + hash_nugget = bi; + return hash_nugget; + } + invariant() { + } + } + class BookIndexReport { + int mkn, skn; + auto bookindex_report_sorted( + string[][string][string] bookindex_unordered_hashes + ) { + auto mainkeys=bookindex_unordered_hashes.byKey.array. + sort!("toLower(a) < toLower(b)", SwapStrategy.stable).release; + foreach (mainkey; mainkeys) { + auto subkeys=bookindex_unordered_hashes[mainkey].byKey.array. + sort!("toLower(a) < toLower(b)", SwapStrategy.stable).release; + foreach (subkey; subkeys) { + debug(bookindex) { + writeln( + mainkey, ": ", + subkey, ": ", + to!string(bookindex_unordered_hashes[mainkey][subkey]) + ); + } + skn++; + } + mkn++; + } + } + } + class BookIndexReportIndent { + int mkn, skn; + auto bookindex_report_indented( + string[][string][string] bookindex_unordered_hashes + ) { + auto mainkeys= + bookindex_unordered_hashes.byKey.array.sort().release; + foreach (mainkey; mainkeys) { + debug(bookindex) { + writeln(mainkey); + } + auto subkeys= + bookindex_unordered_hashes[mainkey].byKey.array.sort().release; + foreach (subkey; subkeys) { + debug(bookindex) { + writeln(" ", subkey); + writeln(" ", to!string( + bookindex_unordered_hashes[mainkey][subkey] + )); + } + skn++; + } + mkn++; + } + } + } + class BookIndexReportSection { + mixin ObjectSetters; + int mkn, skn; + auto rgx = new Rgx(); + auto bookindex_write_section( + string[][string][string] bookindex_unordered_hashes + ) { + auto mainkeys=bookindex_unordered_hashes.byKey.array.sort().release; + foreach (mainkey; mainkeys) { + write("_0_1 !{", mainkey, "}! "); + foreach (ref_; bookindex_unordered_hashes[mainkey]["_a"]) { + auto go = replaceAll(ref_, rgx.book_index_go, "$1"); + write(" {", ref_, "}#", go, ", "); + } + writeln(" \\\\"); + bookindex_unordered_hashes[mainkey].remove("_a"); + auto subkeys= + bookindex_unordered_hashes[mainkey].byKey.array.sort().release; + foreach (subkey; subkeys) { + write(" ", subkey, ", "); + foreach (ref_; bookindex_unordered_hashes[mainkey][subkey]) { + auto go = replaceAll(ref_, rgx.book_index_go, "$1"); + write(" {", ref_, "}#", go, ", "); + } + writeln(" \\\\"); + skn++; + } + mkn++; + } + } + auto bookindex_build_section( + string[][string][string] bookindex_unordered_hashes, + int ocn + ) { + string type; + int type_heading; + string lev, lvn, lcn; + string attrib; + string indent_first; + string indent_second; + auto set_oa = new ObjectAbstractSet(); + auto mainkeys = + bookindex_unordered_hashes.byKey.array.sort().release; + string bi_tmp; + string[string][1024] bookindex_arbitrary_max_length_set; + writeln(mainkeys.length); + type_heading=1; + bi_tmp = "Book Index"; + attrib=""; + lev="B"; + lvn="1"; + lcn="1"; + bookindex_arbitrary_max_length_set[mkn] = + set_oa.contents_heading( + type_heading, + bi_tmp, + attrib, + ocn, + lev, + lvn, + lcn + ); + ocn++; + mkn++; + type_heading=1; + bi_tmp = "Index"; + attrib=""; + lev="1"; + lvn="4"; + lcn="2"; + bookindex_arbitrary_max_length_set[mkn] = + set_oa.contents_heading( + type_heading, + bi_tmp, + attrib, + ocn, + lev, + lvn, + lcn + ); + ocn++; + mkn++; + foreach (mainkey; mainkeys) { + bi_tmp = "!{" ~ mainkey ~ "}! "; + foreach (ref_; bookindex_unordered_hashes[mainkey]["_a"]) { + auto go = replaceAll(ref_, rgx.book_index_go, "$1"); + bi_tmp ~= " {" ~ ref_ ~ "}#" ~ go ~ ", "; + } + bi_tmp ~= " \\\\\n "; + bookindex_unordered_hashes[mainkey].remove("_a"); + auto subkeys = + bookindex_unordered_hashes[mainkey].byKey.array.sort().release; + foreach (subkey; subkeys) { + bi_tmp ~= subkey ~ ", "; + foreach (ref_; bookindex_unordered_hashes[mainkey][subkey]) { + auto go = replaceAll(ref_, rgx.book_index_go, "$1"); + bi_tmp ~= " {" ~ ref_ ~ "}#" ~ go ~ ", "; + } + bi_tmp ~= " \\\\\n "; + skn++; + } + bi_tmp = replaceFirst(bi_tmp, rgx.trailing_linebreak, ""); + type="para"; + attrib=""; + indent_first = "0"; + indent_second = "1"; + attrib=""; + bookindex_arbitrary_max_length_set[mkn] = + set_oa.contents_para( + type, + bi_tmp, + attrib, + ocn, + indent_first, + indent_second, + false + ); + ocn++; + mkn++; + } + auto bookindex = + bookindex_arbitrary_max_length_set[0..mkn].dup; + auto t = tuple(bookindex, ocn); + return t; + } + auto bookindex_build_section_( + string[][string][string] bookindex_unordered_hashes + ) { + auto mainkeys = + bookindex_unordered_hashes.byKey.array.sort().release; + string bi_tmp; + string[1024] bookindex_arbitrary_max_length_set; + writeln(mainkeys.length); + foreach (mainkey; mainkeys) { + bi_tmp = "_0_1 !{" ~ mainkey ~ "}! "; + foreach (ref_; bookindex_unordered_hashes[mainkey]["_a"]) { + auto go = replaceAll(ref_, rgx.book_index_go, "$1"); + bi_tmp ~= " {" ~ ref_ ~ "}#" ~ go ~ ", "; + } + bi_tmp ~= " \\\\\n "; + bookindex_unordered_hashes[mainkey].remove("_a"); + auto subkeys = + bookindex_unordered_hashes[mainkey].byKey.array.sort().release; + foreach (subkey; subkeys) { + bi_tmp ~= subkey ~ ", "; + foreach (ref_; bookindex_unordered_hashes[mainkey][subkey]) { + auto go = replaceAll(ref_, rgx.book_index_go, "$1"); + bi_tmp ~= " {" ~ ref_ ~ "}#" ~ go ~ ", "; + } + bi_tmp ~= " \\\\\n "; + skn++; + } + bi_tmp = replaceFirst(bi_tmp, rgx.trailing_linebreak, ""); + bookindex_arbitrary_max_length_set[mkn] = bi_tmp; + mkn++; + } + auto bookindex = + bookindex_arbitrary_max_length_set[0..mkn].dup; + return bookindex; + } + } + class NotesSection { + mixin ObjectSetters; + string object_notes; + int previous_count; + int mkn; + auto rgx = new Rgx(); + private auto gather_notes_for_endnote_section( + string[string][131072] contents_arbitrary_max_length_set, + int counter + ) + in { + assert((contents_arbitrary_max_length_set[counter]["is"] == "para") + || (contents_arbitrary_max_length_set[counter]["is"] == "heading")); + assert(counter > previous_count); + previous_count=counter; + assert( + match(contents_arbitrary_max_length_set[counter]["obj"], + rgx.inline_notes_delimiter_al_regular_number_note) + ); + } + body { + foreach(m; + matchAll(contents_arbitrary_max_length_set[counter]["obj"], + rgx.inline_notes_delimiter_al_regular_number_note)) { + debug(endnotes_build) { + writeln( + "{^{", m.captures[1], ".}^}#noteref_", m.captures[1], " ", + m.captures[2]); // sometimes need segment name (segmented html & epub) + } + object_notes ~= + "{^{" ~ m.captures[1] ~ ".}^}#noteref_" ~ + m.captures[1] ~ " " ~ m.captures[2] ~ "』"; + } + return object_notes; + } + private auto gathered_notes() + in { + } + body { + string[] endnotes_; + if (object_notes.length > 1) { + endnotes_ = (split(object_notes, rgx.break_string))[0..$-1]; + } + return endnotes_; + } + private auto endnote_objects(int ocn) + in { + } + body { + auto set_oa = new ObjectAbstractSet(); + string[string][1024] endnotes_arbitrary_max_length_set; + auto endnotes_ = gathered_notes(); + string type; + int type_heading; + string lev, lvn, lcn; + string attrib; + string indent_first; + string indent_second; + type_heading=1; + attrib=""; + lev="B"; + lvn="1"; + lcn="1"; + endnotes_arbitrary_max_length_set[mkn] = + set_oa.contents_heading( + type_heading, + "Endnotes", + attrib, + ocn, + lev, + lvn, + lcn + ); + ocn++; + mkn++; + type_heading=1; + attrib=""; + lev="1"; + lvn="4"; + lcn="2"; + endnotes_arbitrary_max_length_set[mkn] = + set_oa.contents_heading( + type_heading, + "Endnotes", + attrib, + ocn, + lev, + lvn, + lcn + ); + ocn++; + mkn++; + foreach (endnote; endnotes_) { + type="para"; + attrib=""; + indent_first = "0"; + indent_second = "0"; + attrib=""; + endnotes_arbitrary_max_length_set[mkn] = + set_oa.contents_para( + type, + endnote, + attrib, + ocn, + indent_first, + indent_second, + false + ); + ocn++; + mkn++; + } + auto endnotes = + endnotes_arbitrary_max_length_set[0..mkn].dup; + auto t = tuple(endnotes, ocn); + return t; + } + } + class Bibliography { + public JSONValue[] bibliography(string[] biblio_unsorted_incomplete) + in { } + body { + JSONValue[] biblio_unsorted = + biblio_unsorted_complete(biblio_unsorted_incomplete); + JSONValue[] biblio_sorted = biblio_sort(biblio_unsorted); + biblio_debug(biblio_sorted); + return biblio_sorted; + } + final private JSONValue[] biblio_unsorted_complete( + string[] biblio_unordered + ) { + JSONValue[1024] bib_arr_json; + int count_biblio_entry; + count_biblio_entry=0; + foreach (bibent; biblio_unordered) { + JSONValue j = parseJSON(bibent); + if (!empty(j["fulltitle"].str)) { + if (!empty(j["author_raw"].str)) { + j["deemed_author"]=j["author_arr"][0]; + } else if (!empty(j["editor_raw"].str)) { + j["deemed_author"]=j["editor_arr"][0]; + } + j["sortby_deemed_author_year_title"] = ( + j["deemed_author"].str ~ + "; " ~ + j["year"].str ~ + "; " ~ + j["fulltitle"].str + ); + } + bib_arr_json[count_biblio_entry] = j; + count_biblio_entry++; + } + JSONValue[] biblio_unsorted_array_of_json_objects = + bib_arr_json[0..(count_biblio_entry)].dup; + return biblio_unsorted_array_of_json_objects; + } + final private JSONValue[] biblio_sort(JSONValue[] biblio_unordered) { + JSONValue[] biblio_sorted; + biblio_sorted = + sort!((a, b){ + return ((a["sortby_deemed_author_year_title"].str) < (b["sortby_deemed_author_year_title"].str)); + })(biblio_unordered).array; + debug(bibliosorted) { + foreach (j; biblio_sorted) { + if (!empty(j["fulltitle"].str)) { + writeln(j["sortby_deemed_author_year_title"]); + } + } + } + return biblio_sorted; + } + auto biblio_debug(JSONValue[] biblio_sorted) { + debug(biblio) { + foreach (j; biblio_sorted) { + if (!empty(j["fulltitle"].str)) { + writeln(j["sortby_deemed_author_year_title"]); + } + } + } + } + } + class NodeStructureMetadata : AssertNodeJSON { + int lv, lv0, lv1, lv2, lv3, lv4, lv5, lv6, lv7; + uint ocn; + uint[string] p_; // p_ parent_ + string node; + string node_emitter( + string lvn, + int ocn_, + int counter_, + int pointer_, + string is_ + ) + in { + auto rgx = new Rgx(); + } + body { + assert(is_ != "heading"); // should not be necessary + assert(to!int(ocn_) >= 0); // should not be necessary + uint ocn=to!uint(ocn_); + if (lv7 > 0) { + p_["lvn"] = 7; p_["ocn"] = lv7; + } else if (lv6 > 0) { + p_["lvn"] = 6; p_["ocn"] = lv6; + } else if (lv5 > 0) { + p_["lvn"] = 5; p_["ocn"] = lv5; + } else { + p_["lvn"] = 4; p_["ocn"] = lv4; + } + node=("{ " ~ + "\"is\": \"" ~ is_ ~ "\"" ~ + ", \"heading_pointer\": " ~ to!string(pointer_) ~ + ", \"doc_object_pointer\": " ~ to!string(counter_) ~ + ", \"ocn\": " ~ to!string(ocn_) ~ + ", \"parent_ocn\": " ~ to!string(p_["ocn"]) ~ + ", \"parent_lvn\": " ~ to!string(p_["lvn"]) ~ + " }" + ); + return node; + } + invariant() { + } + string node_emitter_heading( + string lvn, + string lcn, + int ocn_, + int counter_, + int pointer_, + string is_ + ) + in { + auto rgx = new Rgx(); + } + body { + uint ocn=to!uint(ocn_); + switch (lvn) { // switch (to!string(lv)) { + case "0": + lv=0; + lv0=ocn; lv1=0; lv2=0; lv3=0; lv4=0; lv5=0; lv6=0; lv7=0; + p_["lvn"] = 0; p_["ocn"] = 0; + break; + case "1": + lv=1; + lv1=ocn; lv2=0; lv3=0; lv4=0; lv5=0; lv6=0; lv7=0; + p_["lvn"] = 0; p_["ocn"] = lv0; + break; + case "2": + lv=2; + lv2=ocn; lv3=0; lv4=0; lv5=0; lv6=0; lv7=0; + p_["lvn"] = 1; p_["ocn"] = lv1; + break; + case "3": + lv=3; + lv3=ocn; lv4=0; lv5=0; lv6=0; lv7=0; + p_["lvn"] = 2; p_["ocn"] = lv2; + break; + case "4": + lv=4; + lv4=ocn; lv5=0; lv6=0; lv7=0; + if (lv3 > 0) { + p_["lvn"] = 3; p_["ocn"] = lv3; + } else if (lv2 > 0) { + p_["lvn"] = 2; p_["ocn"] = lv2; + } else if (lv1 > 0) { + p_["lvn"] = 1; p_["ocn"] = lv1; + } else { + p_["lvn"] = 0; p_["ocn"] = lv0; + } + break; + case "5": + lv=5; + lv5=ocn; lv6=0; lv7=0; + p_["lvn"] = 4; p_["ocn"] = lv4; + break; + case "6": + lv=6; + lv6=ocn; lv7=0; + p_["lvn"] = 5; p_["ocn"] = lv5; + break; + case "7": + lv=7; + lv7=ocn; + p_["lvn"] = 6; p_["ocn"] = lv6; + break; + default: + break; + } + node=("{ " ~ + "\"is\": \"" ~ is_ ~ "\"" ~ + ", \"heading_pointer\": " ~ to!string(pointer_) ~ + ", \"doc_object_pointer\": " ~ to!string(counter_) ~ + ", \"ocn\": " ~ to!string(ocn_) ~ + ", \"lvn\": " ~ to!string(lvn) ~ + ", \"lcn\": " ~ to!string(lcn) ~ + ", \"parent_ocn\": " ~ to!string(p_["ocn"]) ~ + ", \"parent_lvn\": " ~ to!string(p_["lvn"]) ~ + " }" + ); + return node; + } + invariant() { + } + } +} diff --git a/lib/sdp/ao_interface.d b/lib/sdp/ao_interface.d new file mode 100644 index 0000000..224603d --- /dev/null +++ b/lib/sdp/ao_interface.d @@ -0,0 +1,145 @@ +/* +#+OPTIONS: ^:nil _:nil#+OPTIONS: ^:nil _:nil +* sisu_interface.d +*/ +mixin template Interfaces() { + interface AssertOCN { + int ocn_emitter(int ocn_status_flag) + in { assert(ocn_status_flag <= 2); } + out(ocn) { assert(ocn >= 0); } + } + interface AssertObjInlineMarkup { + string obj_inline_markup(string obj_raw, string obj_type_) + in { + } + out(inline_markup) { + } + } + interface AssertObjAttrib { + string obj_attributes(string obj_raw, string node, string obj_type_) + in { + } + out(obj_attrib_json) { + } + } + interface AssertBookIndexNuggetHash { + string[][string][string] bookindex_nugget_hash(string bookindex, int ocn) + in { + debug(bookindexraw) { + mixin ScreenTxtColors; + if (!bookindex.empty) { + writeln( + scr_txt_color["blue"], "* [bookindex] ", scr_txt_color["off"], + "[", to!string(ocn), "] ", bookindex + ); + } + } + } + out(hash_nugget) { + } + } + interface AssertBookIndexReport { + string[][string][][string][] bookindex_nugget_hash(string[][string][string] bookindex_unordered_hashes) + in { + } + } + interface AssertNodeJSON { + string node_emitter( + string lvn, + int ocn_, + int counter_, + int pointer_, + string is_ + ) + in { + auto rgx = new Rgx(); + assert(is_ != "heading"); + assert(to!int(ocn_) >= 0); + } + out(node) { + debug(node) { + mixin ScreenTxtColors; + if (match(lvn, rgx.levels_numbered_headings)) { + writeln(scr_txt_marker["yellow"], to!string(node)); + } else { + writeln(scr_txt_marker["white"], to!string(node)); + } + } + JSONValue j = parseJSON(node); + assert(j["parent_lvn"].integer >= 4); + assert(j["parent_lvn"].integer <= 7); + assert(j["parent_ocn"].integer >= 0); + } + string node_emitter_heading( + string lvn, + string lcn, + int ocn_, + int counter_, + int pointer_, + string is_ + ) + in { + auto rgx = new Rgx(); + assert(is_ == "heading"); + assert(to!uint(ocn_) >= 0); + assert( + match(lvn, rgx.levels_numbered), + ("not a valid heading level: " ~ lvn ~ " at " ~ to!string(ocn_)) + ); + if (match(lvn, rgx.levels_numbered)) { + if (to!uint(lvn) == 0) { + assert(to!uint(ocn_) == 1); + } + } + } + out(node) { + auto rgx = new Rgx(); + debug(heading) { + mixin ScreenTxtColors; + if (match(lvn, rgx.levels_numbered_headings)) { + writeln(scr_txt_marker["yellow"], to!string(node)); + } + } + debug(node) { + mixin ScreenTxtColors; + if (match(lvn, rgx.levels_numbered_headings)) { + writeln(scr_txt_marker["yellow"], to!string(node)); + } else { + writeln(scr_txt_marker["white"], to!string(node)); + } + } + JSONValue j = parseJSON(node); + assert(j["parent_lvn"].integer <= 7); + assert(j["parent_ocn"].integer >= 0); + if (match(lvn, rgx.levels_numbered_headings)) { + assert(j["lvn"].integer <= 7); + assert(j["ocn"].integer >= 0); + if (j["parent_lvn"].integer > 0) { + assert(j["parent_lvn"].integer < j["lvn"].integer); + if (j["ocn"].integer != 0) { + assert(j["parent_ocn"].integer < j["ocn"].integer); + } + } + if (j["lvn"].integer == 0) { + assert(j["parent_lvn"].integer == 0); + } else if (j["lvn"].integer == 1) { + assert(j["parent_lvn"].integer == 0); + } else if (j["lvn"].integer == 2) { + assert(j["parent_lvn"].integer == 1); + } else if (j["lvn"].integer == 3) { + assert(j["parent_lvn"].integer == 2); + } else if (j["lvn"].integer == 4) { + assert(j["parent_lvn"].integer <= 3); + } else if (j["lvn"].integer == 5) { + assert(j["parent_lvn"].integer == 4); + } else if (j["lvn"].integer == 6) { + assert(j["parent_lvn"].integer == 5); + } else if (j["lvn"].integer == 7) { + assert(j["parent_lvn"].integer == 6); + } else if (j["lvn"].integer == 8) { + } + } + } + } +} + diff --git a/lib/sdp/ao_markup_source_raw.d b/lib/sdp/ao_markup_source_raw.d new file mode 100644 index 0000000..3710ff6 --- /dev/null +++ b/lib/sdp/ao_markup_source_raw.d @@ -0,0 +1,58 @@ +/* +#+OPTIONS: ^:nil _:nil#+OPTIONS: ^:nil _:nil +* sisu_markup_source_raw.d +*/ +mixin template SiSUmarkupRaw() { + class MarkupRaw { + auto rgx = new Rgx(); + final private string markupSourceString(in char[] fn_src) { + enforce( + match(fn_src, rgx.src_pth), + "not a sisu markup filename" + ); + enforce( + exists(fn_src)!=0, + "file not found" + ); + string source_txt_str = cast(string) read(fn_src); + std.utf.validate(source_txt_str); + return source_txt_str; + } + final private char[][] markupSourceLineArray(in string src_text) { + char[][] source_line_arr = + split(cast(char[]) src_text, rgx.line_delimiter); + return source_line_arr; + } + final char[][] markupSourceContentRawLineArray(in char[] fn_src) { + auto source_txt_str = markupSourceString(fn_src); + auto source_line_arr = markupSourceLineArray(source_txt_str); + return source_line_arr; + } + } + class MarkupInsertRaw { + auto rgx = new Rgx(); + final private string markupSourceString(in char[] fn_src) { + enforce( + match(fn_src, rgx.src_fn_find_inserts), + "not a sisu markup filename" + ); + enforce( + exists(fn_src)!=0, + "file not found" + ); + string source_txt_str = cast(string) read(fn_src); + std.utf.validate(source_txt_str); + return source_txt_str; + } + final private char[][] markupSourceLineArray(in string src_text) { + char[][] source_line_arr = + split(cast(char[]) src_text, rgx.line_delimiter); + return source_line_arr; + } + final char[][] markupSourceContentRawLineArray(in char[] fn_src) { + auto source_txt_str = markupSourceString(fn_src); + auto source_line_arr = markupSourceLineArray(source_txt_str); + return source_line_arr; + } + } +} diff --git a/lib/sdp/ao_object_setter.d b/lib/sdp/ao_object_setter.d new file mode 100644 index 0000000..050b606 --- /dev/null +++ b/lib/sdp/ao_object_setter.d @@ -0,0 +1,90 @@ +/* +#+OPTIONS: ^:nil _:nil#+OPTIONS: ^:nil _:nil +* sisu_setter.d +*/ +mixin template ObjectSetters() { + class ObjectAbstractSet { + import std.conv : to; + string[string] contents_comment(in string object) { + string object_set[string]; + object_set["use"] = "comment"; + object_set["of"] = "comment"; + object_set["is"] = "comment"; + object_set["obj"] = object; + return object_set; + } + string[string] contents_heading( + in int type, + in string object, + in string attrib, + in int ocn, + in string lev, + in string lvn, + in string lcn, + ) { + string object_set[string]; + object_set["use"] = "content"; + object_set["of"] = "para"; + object_set["is"] = "heading"; + object_set["type"] = to!string(type); + object_set["obj"] = object; + object_set["ocn"] = (ocn==0) ? "" : to!string(ocn); + object_set["lev"] = to!string(lev); + object_set["lvn"] = to!string(lvn); + object_set["lcn"] = to!string(lcn); + object_set["attrib"] = attrib; + return object_set; + } + string[string] contents_para( + in string type, + in string object, + in string attrib, + in int ocn, + in string indent_first, + in string indent_second, + in bool bullet + ) { + string object_set[string]; + object_set["use"] = "content"; + object_set["of"] = "para"; + object_set["is"] = type; + object_set["obj"] = object; + object_set["ocn"] = (ocn==0) ? "" : to!string(ocn); + object_set["indent_first"] = indent_first; + object_set["indent_second"] = indent_second; + object_set["bullet"] = to!string(bullet); + object_set["attrib"] = attrib; + return object_set; + } + string[string] contents_block( + in string type, + in string object, + in string attrib, + in int ocn + ) { + string object_set[string]; + object_set["use"] = "content"; + object_set["of"] = "block"; + object_set["is"] = type; + object_set["obj"] = object; + object_set["ocn"] = (ocn==0) ? "" : to!string(ocn); + object_set["attrib"] = attrib; + return object_set; + } + string[string] contents_block_ocn_string( + in string type, + in string object, + in string ocn, + in string node + ) { + string object_set[string]; + object_set["use"] = "content"; + object_set["of"] = "block"; + object_set["is"] = type; + object_set["obj"] = object; + object_set["ocn"] = ocn; + object_set["node"] = node; + return object_set; + } + } +} diff --git a/lib/sdp/ao_output_debugs.d b/lib/sdp/ao_output_debugs.d new file mode 100644 index 0000000..abc48b3 --- /dev/null +++ b/lib/sdp/ao_output_debugs.d @@ -0,0 +1,354 @@ +/* +#+OPTIONS: ^:nil _:nil#+OPTIONS: ^:nil _:nil +* sdp_output_debugs.d +*/ +template SiSUoutputDebugs() { + class SDPoutputDebugs { + auto abstract_doc_source_debugs( + string[string][] contents, + JSONValue[string] docmake, + JSONValue[string] dochead, + string[][string][string] bookindex_unordered_hashes, + JSONValue[] biblio, + string fn_src, + string[string] actions + ) { + mixin RgxInit; + mixin ScreenTxtColors; + auto rgx = new Rgx(); + debug(parent) { + writeln(__FILE__, ":", __LINE__); + foreach (obj; contents) { + if (obj["use"] == "content") { + if (obj["is"] == "heading") { + writeln(scr_txt_marker["cyan"], + obj["ocn"], + " node: ", obj["node"], + " heading: ", obj["lvn"], + " ", obj["obj"], + ); + } else { + } + } + } + } + debug(objects) { + writeln("-------------------------------"); + writeln(__FILE__, ":", __LINE__); + foreach (obj; contents) { + if (obj["use"] == "content") { + writeln( + scr_txt_color["green"], + "* [", obj["ocn"], "]", + "[", obj["is"], "] ", + scr_txt_color["off"], + obj["obj"] + ); + } + } + } + debug(headermakejson) { + writeln("document header, metadata & make instructions:"); + writeln(dochead); + writeln(pointer_head_main); + foreach (main_header; pointer_head_main) { + switch (main_header) { + case "make": + foreach (sub_header; pointer_head_sub_make) { + if (to!string(dochead[main_header][sub_header]).length > 2) { + writeln( + main_header, ":", + sub_header, ": ", + dochead[main_header][sub_header] + ); + } + } + break; + default: + break; + } + } + } + debug(headermetadatajson) { + writeln("document header, metadata & make instructions:"); + writeln(dochead); + writeln(pointer_head_main); + foreach (main_header; pointer_head_main) { + switch (main_header) { + case "creator": + foreach (sub_header; pointer_head_sub_creator) { + if (to!string(dochead[main_header][sub_header]).length > 2) { + writeln(main_header, ":", + sub_header, ": ", + dochead[main_header][sub_header] + ); + } + } + break; + case "title": + foreach (sub_header; pointer_head_sub_title) { + if (to!string(dochead[main_header][sub_header]).length > 2) { + writeln(main_header, ":", + sub_header, ": ", + dochead[main_header][sub_header] + ); + } + } + break; + case "rights": + foreach (sub_header; pointer_head_sub_rights) { + if (to!string(dochead[main_header][sub_header]).length > 2) { + writeln(main_header, ":", + sub_header, ": ", + dochead[main_header][sub_header] + ); + } + } + break; + case "date": + foreach (sub_header; pointer_head_sub_date) { + if (to!string(dochead[main_header][sub_header]).length > 2) { + writeln(main_header, ":", + sub_header, ": ", + dochead[main_header][sub_header] + ); + } + } + break; + case "original": + foreach (sub_header; pointer_head_sub_original) { + if (to!string(dochead[main_header][sub_header]).length > 2) { + writeln(main_header, ":", + sub_header, ": ", + dochead[main_header][sub_header] + ); + } + } + break; + case "classify": + foreach (sub_header; pointer_head_sub_classify) { + if (to!string(dochead[main_header][sub_header]).length > 2) { + writeln(main_header, ":", + sub_header, ": ", + dochead[main_header][sub_header] + ); + } + } + break; + case "identifier": + foreach (sub_header; pointer_head_sub_identifier) { + if (to!string(dochead[main_header][sub_header]).length > 2) { + writeln(main_header, ":", + sub_header, ": ", + dochead[main_header][sub_header] + ); + } + } + break; + case "notes": + foreach (sub_header; pointer_head_sub_notes) { + if (to!string(dochead[main_header][sub_header]).length > 2) { + writeln(main_header, ":", + sub_header, ": ", + dochead[main_header][sub_header] + ); + } + } + break; + case "publisher": + foreach (sub_header; pointer_head_sub_publisher) { + if (to!string(dochead[main_header][sub_header]).length > 2) { + writeln(main_header, ":", + sub_header, ": ", + dochead[main_header][sub_header] + ); + } + } + break; + default: + break; + } + } + } + debug(bookindex) { + writeln("-------------------------------"); + writeln(__FILE__, ":", __LINE__); + auto bookindex = new BookIndexReport(); + bookindex.bookindex_report_sorted(bookindex_unordered_hashes); + } + debug(summary) { + string[string] check = [ + "last_ocn" : "NA [debug \"checkdoc\" not run]", + ]; + debug(checkdoc) { + foreach (obj; contents) { + if (obj["use"] == "content") { + if (!empty(obj["ocn"])) { + check["last_ocn"] = obj["ocn"]; + } + } + } + } + debug(headings) { + writeln("-------------------------------"); + writeln(__FILE__, ":", __LINE__); + foreach (obj; contents) { + if (obj["is"] == "heading") { + writeln( + scr_txt_marker["yellow"], + obj["lev"], "~ ", + "[", obj["ocn"], "] ", + obj["obj"] + ); + } + } + } + writeln( + scr_txt_color["green"], + "-------------------------------", + scr_txt_color["off"], + "\n", fn_src, + "\nlength contents array: ", contents.length, + "\nlast ocn: ", check["last_ocn"], + "\nlength bookindex: ", bookindex_unordered_hashes.length, + "\n", __FILE__, ":", __LINE__, + ); + debug(checkdoc) { + if (auto mfn=match(fn_src, rgx.src_fn)) { + if (actions["assert"] == "yes") { + switch (mfn.captures[2]) { + case "live-manual.ssm": + assert(check["last_ocn"] == + "1019","last ocn should be: 1019 (check test, document is frequently updated)"); // ok + break; + case "sisu_markup.sst": + assert(check["last_ocn"] == + "297","last ocn should be: 297"); // ok + // assert(check["last_ocn"] == "297","last ocn should be: 297"); + // notes for first divergance study sisu headings 247 250 + // sisu has issue with code that contains heading 1~ which results in no ocn! ?? + // sisu currently has incorrect last body ocn of 294! + // bug in sisu? attend + break; + // sisu-markup-samples: + case "accelerando.charles_stross.sst": + assert(check["last_ocn"] == + "2861","last ocn should be: 2861"); // ok + break; + case "alices_adventures_in_wonderland.lewis_carroll.sst": + assert(check["last_ocn"] == + "805","last ocn should be: 805"); // 808 + break; + case "autonomy_markup0.sst": + assert(check["last_ocn"] == + "77","last ocn should be: 77"); // ok endnotes + // assert(check["last_ocn"] == "78","last ocn should be: 78"); + break; + case "content.cory_doctorow.sst": + assert(check["last_ocn"] == + "953","last ocn should be: 953"); // 1007 way off, check ocn off switches + // assert(check["last_ocn"] == "953","last ocn should be: 953"); + break; + case "democratizing_innovation.eric_von_hippel.sst": + // fixed ERROR! range violation, broken check! endnotes, bookindex, biblio + // error in bookindex ... (ch1; ch6; ch8 ) + assert(check["last_ocn"] == + "905","last ocn should be: 905"); // 911 + break; + case "down_and_out_in_the_magic_kingdom.cory_doctorow.sst": + assert(check["last_ocn"] == + "1417","last ocn should be: 1417"); // 1455 check ocn off switches + break; + case "for_the_win.cory_doctorow.sst": + assert(check["last_ocn"] == + "3510","last ocn should be: 3510"); // 3569 check ocn off switches + break; + case "free_as_in_freedom_2.richard_stallman_and_the_free_software_revolution.sam_williams.richard_stallman.sst": + assert(check["last_ocn"] == + "1082","last ocn should be: 1082"); // check 1079 too few + break; + case "free_culture.lawrence_lessig.sst": + assert(check["last_ocn"] == + "1330","last ocn should be: 1330"); // 1312 + // fixed ERROR! range violation, broken check! + // error in bookindex ... sections piracy (ch1) & property (ch10 market concentration) fixed + break; + case "free_for_all.peter_wayner.sst": // endnotes, bookindex, biblio + assert(check["last_ocn"] == + "1559","last ocn should be: 1559"); // 1560, check ocn off switches, has endnotes so 2 too many + // assert(check["last_ocn"] == "1559","last ocn should be: 1559"); + break; + case "gpl2.fsf.sst": + assert(check["last_ocn"] == + "65","last ocn should be: 65"); // ok endnotes? check + // assert(check["last_ocn"] == "66","last ocn should be: 66"); + break; + case "gpl3.fsf.sst": + assert(check["last_ocn"] == + "123","last ocn should be: 123"); // ok + break; + case "gullivers_travels.jonathan_swift.sst": + assert(check["last_ocn"] == + "668","last ocn should be: 668"); // 674 + break; + case "little_brother.cory_doctorow.sst": + assert(check["last_ocn"] == + "3130","last ocn should be: 3130"); // 3204, check ocn off switches + break; + case "the_cathedral_and_the_bazaar.eric_s_raymond.sst": + assert(check["last_ocn"] == + "258","last ocn should be: 258"); // ok + break; + case "the_public_domain.james_boyle.sst": + assert(check["last_ocn"] == + "970","last ocn should be: 970"); // 978 + break; + case "the_wealth_of_networks.yochai_benkler.sst": // endnotes, bookindex + assert(check["last_ocn"] == + "829","last ocn should be: 829"); // ok + // assert(check["last_ocn"] == "832","last ocn should be: 832"); + // has endnotes and bookindex, issue with sisu.rb + break; + case "through_the_looking_glass.lewis_carroll.sst": + assert(check["last_ocn"] == + "949","last ocn should be: 949"); // 955 + break; + case "two_bits.christopher_kelty.sst": // endnotes, bookindex, biblio + assert(check["last_ocn"] == + "1190","last ocn should be: 1190"); // 1191 + // assert(check["last_ocn"] == "1193","last ocn should be: 1193"); // 1191 ok? + // has endnotes and bookindex, issue with sisu.rb + break; + // fixed ERROR! range violation! + // error in bookindex ... (ch3 the movement) + break; + case "un_contracts_international_sale_of_goods_convention_1980.sst": + assert(check["last_ocn"] == + "377","last ocn should be: 377"); // ok + break; + case "viral_spiral.david_bollier.sst": // endnotes, bookindex + assert(check["last_ocn"] == + "1078","last ocn should be: 1078"); // 1100 + // fixed ERROR! range violation! + // error in bookindex ... (ch7 ... building the cc machine, an extra semi colon) + break; + // case ".sst": + // assert(check["last_ocn"] == "0","last ocn should be: 0"); + // break; + // case ".sst": + // assert(check["last_ocn"] == "0","last ocn should be: 0"); + // break; + // case ".sst": + // assert(check["last_ocn"] == "0","last ocn should be: 0"); + // break; + default: + writeln(fn_src); + break; + } + } + } + } + } + } + } +} diff --git a/lib/sdp/ao_rgx.d b/lib/sdp/ao_rgx.d new file mode 100644 index 0000000..755ba11 --- /dev/null +++ b/lib/sdp/ao_rgx.d @@ -0,0 +1,195 @@ +/* +#+OPTIONS: ^:nil _:nil#+OPTIONS: ^:nil _:nil +* sisu_rgx.d +*/ +mixin template RgxInit() { +/* +** misc +*/ + class Rgx { + static flag_action = ctRegex!(`^(--[a-z][a-z0-9-]+)$`); + static flag_action_str = ctRegex!(` (--[a-z][a-z0-9-]+)`); + static src_pth = ctRegex!(`^([a-zA-Z0-9._-]+/)*([a-zA-Z0-9._-]+[.]ss[tm])$`); + static src_fn = ctRegex!(`^([a-zA-Z0-9._-]+/)*([a-zA-Z0-9._-]+[.]ss[tm])$`); + static src_fn_master = ctRegex!(`^([a-zA-Z0-9._-]+/)*([a-zA-Z0-9._-]+[.]ssm)$`); + static src_fn_find_inserts = ctRegex!(`^([a-zA-Z0-9._-]+/)*([a-zA-Z0-9._-]+[.]ss[im])$`); + static line_delimiter = ctRegex!("\n"); + static within_quotes = ctRegex!(`"(.+?)"`); + static make_heading_delimiter = ctRegex!(`[;][ ]*`); + static arr_delimiter = ctRegex!(`[ ]*[;][ ]*`); + static name_delimiter = ctRegex!(`^([^,]+)[ ]*,[ ]+(.+?)$`); + static book_index_go = ctRegex!("([0-9]+)(?:-[0-9]+)?"); + static trailing_comma = ctRegex!(",[ ]*$"); + static trailing_linebreak = ctRegex!(",[ ]{1,2}\\\\\\\\\n[ ]{4}$","m"); + static line_delimiter_ws_strip = ctRegex!("[ ]*\n[ ]*"); + static line_delimiter_only = ctRegex!("^\n"); + static para_delimiter = ctRegex!("\n[ ]*\n+"); + static levels_markup = ctRegex!(`^[A-D1-4]$`); + static levels_numbered = ctRegex!(`^[0-9]$`); + static levels_numbered_headings = ctRegex!(`^[0-7]$`); +/* +** insert markup file +*/ + static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[ti])$`); +/* +** header & comments +*/ + static comment = ctRegex!(`^%+ `); + static header = ctRegex!(`^@([a-z_]+):(?:\s|$)`); + static header_make = ctRegex!(`^@(make):(?:\s|$)`); + static header_metadata = ctRegex!(`^@([a-z_]+):(?:\s|$)`); + static header_sub = ctRegex!(`^[ ]+:([a-z_]+):\s`); + static head_main = ctRegex!(`^@([a-z_]+):\s*(.*)`, "m"); + static head_sub = ctRegex!(`^[ ]*:([a-z_]+):\s+(.+)`, "m"); +/* +** heading & paragraph operators +*/ + static heading = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?) `); + static heading_marker = ctRegex!(`^:?([A-D1-4])[~]`); + static heading_title = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`); + static heading_all = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`); // test, particularly [2] name/hashtag which may or may not be, does this affect title [3] + static heading_biblio = ctRegex!(`^:?(1)[~][!](biblio(?:graphy)?|references?)`); + static heading_glossary = ctRegex!(`^:?(1)[~][!](glossary)`); + static para_bullet = ctRegex!(`^_[*] `); + static para_bullet_indent = ctRegex!(`^_([1-9])[*] `); + static para_indent = ctRegex!(`^_([1-9]) `); + static para_indent_hang = ctRegex!(`^_([0-9])_([0-9]) `); + static para_attribs = ctRegex!(`^_(([0-9])(_([0-9]))?|_([1-9])?[*]) `); +/* +** blocked markup tics +*/ + static block_tic_code_open = ctRegex!("^`{3} (code)"); + static block_tic_poem_open = ctRegex!("^`{3} (poem)"); + static block_tic_group_open = ctRegex!("^`{3} (group)"); + static block_tic_block_open = ctRegex!("^`{3} (block)"); + static block_tic_quote_open = ctRegex!("^`{3} (quote)"); + static block_tic_table_open = ctRegex!("^`{3} (table)"); + static block_tic_close = ctRegex!("^(`{3})$","m"); +/* +** blocked markup curly +*/ + static block_curly_code_open = ctRegex!(`^(code[{].*?$)`); + static block_curly_code_close = ctRegex!(`^([}]code)`); + static block_curly_poem_open = ctRegex!(`^(poem[{].*?$)`); + static block_curly_poem_close = ctRegex!(`^([}]poem)`); + static block_curly_group_open = ctRegex!(`^(group[{].*?$)`); + static block_curly_group_close = ctRegex!(`^([}]group)`); + static block_curly_block_open = ctRegex!(`^(block[{].*?$)`); + static block_curly_block_close = ctRegex!(`^([}]block)`); + static block_curly_quote_open = ctRegex!(`^(quote[{].*?$)`); + static block_curly_quote_close = ctRegex!(`^([}]quote)`); + static block_curly_table_open = ctRegex!(`^(table[{].*?$)`); + static block_curly_table_close = ctRegex!(`^([}]table)`); +/* +** inline markup font face mod +*/ + static inline_emphasis = ctRegex!(`\*\{(?P.+?)\}\*`); + static inline_bold = ctRegex!(`!\{(?P.+?)\}!`); + static inline_italics = ctRegex!(`/\{(?P.+?)\}/`); + static inline_superscript = ctRegex!(`\^\{(?P.+?)\}\^`); + static inline_subscript = ctRegex!(`,\{(?P.+?)\},`); + static inline_strike = ctRegex!(`-\{(?P.+?)\}-`); + static inline_insert = ctRegex!(`\+\{(?P.+?)\}\+`); + static inline_mono = ctRegex!(`#\{(?P.+?)\}#`); +/* +** inline markup footnotes +*/ + static true_dollar = ctRegex!(`\$`, "gm"); + static inline_notes_al = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg"); + static inline_notes_al_gen = ctRegex!(`【.+?】`, "m"); + static inline_notes_curly_gen = ctRegex!(`~\{.+?\}~`, "m"); + static inline_notes_curly = ctRegex!(`~\{\s*(.+?)\}~`, "mg"); + static inline_al_delimiter_open_regular = ctRegex!(`【`, "m"); + static inline_al_delimiter_close_regular = ctRegex!(`】`, "m"); + static inline_al_delimiter_open_and_close_regular = ctRegex!(`【|】`, "m"); + static inline_notes_delimiter_al_regular = ctRegex!(`【(.+?)】`, "m"); + static inline_notes_delimiter_al_regular_number_note = ctRegex!(`【(\d+)\s+(.+?)】`, "m"); + + static inline_al_delimiter_open_asterisk = ctRegex!(`【\*`, "m"); + static inline_al_delimiter_open_plus = ctRegex!(`【\+`, "m"); + + static inline_curly_delimiter_open_regular = ctRegex!(`~\{\s*`, "m"); + static inline_curly_delimiter_close_regular = ctRegex!(`\s*\}~`, "m"); + static inline_curly_delimiter_open_and_close_regular = ctRegex!(`~\{\s*|\s*\}~`, "m"); + static inline_notes_delimiter_curly_regular = ctRegex!(`~\{[ ]*(.+?)\}~`, "m"); + static inline_notes_curly_sp = ctRegex!(`~\{[*+]+\s+(.+?)\}~`, "m"); + static inline_notes_curly_sp_asterisk = ctRegex!(`~\{[*]+\s+(.+?)\}~`, "m"); + static inline_notes_curly_sp_plus = ctRegex!(`~\{[+]+\s+(.+?)\}~`, "m"); + static inline_text_and_note_al = ctRegex!(`(?P.+?)【(?:[*+ ]*)(?P.+?)】`, "mg"); + static inline_text_and_note_curly = ctRegex!(`(?P.+?)(?:(?:[~])[{][*+ ]*)(?P.+?)(?:[}][~])`, "mg"); + static inline_note_curly_delimiters = ctRegex!(`(~\{[*+]?\s*)(.+?)(\}~)`, "mg"); + static inline_notes_square = ctRegex!(`~\[\s*(.+?)\]~`, "mg"); + static inline_text_and_note_square_sp = ctRegex!(`(.+?)~\[[*+]+\s+(.+?)\]~`, "mg"); + static inline_text_and_note_square = ctRegex!(`(.+?)~\[\s*(.+?)\]~`, "mg"); + static inline_note_square_delimiters = ctRegex!(`(~\[\s*)(.+?)(\]~)`, "mg"); +/* +** inline markup book index +*/ + static book_index = ctRegex!(`^=\{\s*(.+?)\}$`, "m"); + static book_index_open = ctRegex!(`^=\{\s*([^}]+?)$`); + static book_index_close = ctRegex!(`^(.*?)\}$`, "m"); // strip +/* +** no ocn object +*/ + static ocn_off = ctRegex!(`~#$`, "m"); + static ocn_off_dh = ctRegex!(`-#$`, "m"); + static ocn_off_all = ctRegex!(`[~-]#$`, "m"); +/* +** no ocn block +*/ + static ocn_off_block = ctRegex!(`^--~#$`); + static ocn_off_block_dh = ctRegex!(`^---#$`); + static ocn_off_block_close = ctRegex!(`^--\+#$`); + static ocn_block_marks = ctRegex!(`^--[+~-]#$`); +/* +** ignore outside code blocks +*/ + static regular_parse_skip = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); // not structural info +/* +** line& page breaks +*/ + static break_line_within_object = ctRegex!(`[\\]{2}( |$)`); +// static break_line_break_within_object = ctRegex!(`( |^)[\\]{2}( |$)`); + static break_page = ctRegex!(`^-[\\]{2}-$`); + static break_page_new = ctRegex!(`^=[\\]{2}=$`); + static break_page_line_across = ctRegex!(`^=[.]{2}=$`); + static break_string = ctRegex!(`』`); +// ancestry, parent + static parent = ctRegex!(`([0-7]):([0-9]+)`); +/* +** json +*/ + static tailing_comma = ctRegex!(`,$`, "m"); +/* +** head +*/ + static main_headers = ctRegex!(`^(?:creator|title|rights|date|original|classify|identifier|notes|publisher|make|links)$`, "m"); + static subhead_creator = ctRegex!(`^(?:author|translator|illustrator)$`, "m"); + static subhead_title = ctRegex!(`^(?:main|sub(?:title)?|full|language|edition|note)$`, "m"); + static subhead_rights = ctRegex!(`^(?:copyright|illustrations|license|cover)$`, "m"); + static subhead_date = ctRegex!(`^(?:published|created|issued|available|valid|modified|added_to_site)$`, "m"); + static subhead_original = ctRegex!(`^(?:title|language|source)$`, "m"); + static subhead_classify = ctRegex!(`^(?:topic_register|subject|keywords|loc|dewey)$`, "m"); + static subhead_identifier = ctRegex!(`^(?:oclc|pg|isbn)$`, "m"); + static subhead_notes = ctRegex!(`^(?:abstract|description)$`, "m"); + static subhead_publisher = ctRegex!(`^(?:name)$`, "m"); + static subhead_make = ctRegex!(`^(?:cover_image|home_button_image|home_button_text|footer|headings|num_top|breaks|substitute|bold|italics|emphasis|texpdf_font|css)$`, "m"); +/* +** biblio tags +*/ + static biblio_tags = ctRegex!(`^(is|au|author_raw|author|author_arr|editor_raw|ed|editor_arr|ti|title|subtitle|fulltitle|lng|language|trans|src|jo|journal|in|vol|volume|edn|edition|yr|year|pl|place|pb|pub|publisher|url|pg|pages|note|short_name|id):\s+(.+)`); + static biblio_abbreviations = ctRegex!(`^(au|ed|ti|lng|jo|vol|edn|yr|pl|pb|pub|pg|pgs|sn)$`); +/* +** bookindex split +*/ + static bi_main_terms_split = ctRegex!(`\s*;\s*`); + static bi_main_term_plus_rest_split = ctRegex!(`\s*:\s*`); + static bi_sub_terms_plus_ocn_offset_split = ctRegex!(`\s*\|\s*`); + static bi_term_and_ocns_match = ctRegex!(`^(.+?)\+(\d+)`); +/* +* url matching http://url.is/got and { text }http://url.is/got +* image matching +* header substitution & bold & italics lists +*/ + } +} diff --git a/lib/sdp/ao_scan_inserts.d b/lib/sdp/ao_scan_inserts.d new file mode 100644 index 0000000..e99222a --- /dev/null +++ b/lib/sdp/ao_scan_inserts.d @@ -0,0 +1,229 @@ +/* +#+OPTIONS: ^:nil _:nil#+OPTIONS: ^:nil _:nil +* sisu_scan_inserts.d +*/ +mixin template SiSUdocInserts() { + class Inserts { + auto scan_subdoc_source( + char[][] markup_sourcefile_insert_content, string fn_src, int add + ) { + char[][100000] cont_arbitrary_max_length_set_; + auto ft0 = flag_type.dup; + auto ft1 = flag_type.dup; + string[string] processing; + uint[string] line_occur; + auto obj_im = new ObjInlineMarkup(); + auto obj_att = new ObjAttrib(); + int[string] counter; counter["add"]=0; + mixin ScreenTxtColors; + auto rgx = new Rgx(); + int tell_l(string color, in char[] line) { + writeln(scr_txt_marker[color], line); + return 0; + } + auto fn_pth_full = match(fn_src, rgx.src_pth); + auto markup_src_file_path = fn_pth_full.captures[1]; + foreach (line; markup_sourcefile_insert_content) { + if (ft1["curly_code"] == 1) { + ft1["header_make"] = 0; + ft1["header_metadata"] = 0; + if (auto m = match(line, rgx.block_curly_code_close)) { + ft1["curly_code"] = 0; + } + cont_arbitrary_max_length_set_[counter["add"]] = line; + ++counter["add"]; ++add; + } else if (auto m = match(line, rgx.block_curly_code_open)) { + ft1["curly_code"] = 1; + ft1["header_make"] = 0; + ft1["header_metadata"] = 0; + cont_arbitrary_max_length_set_[counter["add"]] = line; + ++counter["add"]; ++add; + } else if (ft1["tic_code"] == 1) { + ft1["header_make"] = 0; + ft1["header_metadata"] = 0; + if (auto m = match(line, rgx.block_tic_close)) { + ft1["tic_code"] = 0; + } + cont_arbitrary_max_length_set_[counter["add"]] = line; + ++counter["add"]; ++add; + } else if (auto m = match(line, rgx.block_tic_code_open)) { + ft1["tic_code"] = 1; + ft1["header_make"] = 0; + ft1["header_metadata"] = 0; + cont_arbitrary_max_length_set_[counter["add"]] = line; + ++counter["add"]; ++add; + } else if ( + (ft1["header_make"] == 1) && (line_occur["header_make"] > 0) + && match(line, rgx.header_sub) + ) { + ft1["header_make"] = 1; + ft1["header_metadata"] = 0; + ++line_occur["header_make"]; + ++counter["add"]; + } else if ( + (ft1["header_metadata"] == 1) && (line_occur["header_metadata"] > 0) + && match(line, rgx.header_sub) + ) { + ft1["header_metadata"] = 1; + ft1["header_make"] = 0; + ++line_occur["header_metadata"]; + ++counter["add"]; + } else if (auto m = match(line, rgx.insert_src_fn_ssi_or_sst)) { + ft1["header_make"] = 0; + ft1["header_metadata"] = 0; + auto insert_fn = m.captures[2]; + auto insert_sub_pth = m.captures[1]; + auto fn_src_insert = + (markup_src_file_path ~ insert_sub_pth ~ insert_fn); + auto raw = new MarkupInsertRaw(); + auto markup_sourcesubfile_insert_content = + raw.markupSourceContentRawLineArray(fn_src_insert); + debug(insert) { // insert file + tell_l("red", line); + tell_l("red", fn_src_insert); + tell_l("fuchsia", "ERROR"); + writeln( + " length contents insert array: ", + markup_sourcesubfile_insert_content.length + ); + } + auto ins = new Inserts(); + /* + 1. load file, + 2. read lines; + 3. scan lines, + 4. if filename insert, and insert filename + 5. repeat 1 + 6. else + 7. add line to new array; + */ + } else { + ft1["header_make"] = 0; + ft1["header_metadata"] = 0; + cont_arbitrary_max_length_set_[counter["add"]] = line; + ++counter["add"]; ++add; + } + } + auto contents_ = cont_arbitrary_max_length_set_[0 .. counter["add"]].dup; + auto t = tuple(contents_, add); + return t; + } + auto scan_doc_source(char[][] markup_sourcefile_content, string fn_src) { + char[] cont_arbitrary_max_length_set[100000]; // 2000 pg * 50 lines + string[string] processing; + uint[string] line_occur; + auto obj_im = new ObjInlineMarkup(); + auto obj_att = new ObjAttrib(); + auto ft = flag_type.dup; + int add; + mixin ScreenTxtColors; + auto rgx = new Rgx(); + int tell_l(string color, in char[] line) { + writeln(scr_txt_marker[color], line); + return 0; + } + auto fn_pth_full = match(fn_src, rgx.src_pth); + auto markup_src_file_path = fn_pth_full.captures[1]; + foreach (line; markup_sourcefile_content) { + if (ft["curly_code"] == 1) { + ft["header_make"] = 0; + ft["header_metadata"] = 0; + if (auto m = match(line, rgx.block_curly_code_close)) { + ft["curly_code"] = 0; + } + cont_arbitrary_max_length_set[add] = line; + ++add; + } else if (auto m = match(line, rgx.block_curly_code_open)) { + ft["curly_code"] = 1; + ft["header_make"] = 0; + ft["header_metadata"] = 0; + cont_arbitrary_max_length_set[add] = line; + ++add; + } else if (ft["tic_code"] == 1) { + ft["header_make"] = 0; + ft["header_metadata"] = 0; + if (auto m = match(line, rgx.block_tic_close)) { + ft["tic_code"] = 0; + } + cont_arbitrary_max_length_set[add] = line; + ++add; + } else if (auto m = match(line, rgx.block_tic_code_open)) { + ft["tic_code"] = 1; + ft["header_make"] = 0; + ft["header_metadata"] = 0; + cont_arbitrary_max_length_set[add] = line; + } else if ((ft["header_make"] == 1) + && (line_occur["header_make"] > 0) + && match(line, rgx.header_sub)) { + ++line_occur["header_make"]; + cont_arbitrary_max_length_set[add] = line; + ++add; + } else if ((ft["header_metadata"] == 1) + && (line_occur["header_metadata"] > 0) + && match(line, rgx.header_sub)) { + ++line_occur["header_metadata"]; + cont_arbitrary_max_length_set[add] = line; + ++add; + } else if (auto m = match(line, rgx.header_make)) { + ft["header_make"] = 1; + ++line_occur["header_make"]; + cont_arbitrary_max_length_set[add] = line; + ++add; + } else if (auto m = match(line, rgx.header_metadata)) { + ft["header_metadata"] = 1; + ++line_occur["header_metadata"]; + cont_arbitrary_max_length_set[add] = line; + ++add; + } else if (auto m = match(line, rgx.insert_src_fn_ssi_or_sst)) { + ft["header_make"] = 0; + ft["header_metadata"] = 0; + auto insert_fn = m.captures[2]; + auto insert_sub_pth = m.captures[1]; + auto fn_src_insert = + (markup_src_file_path ~ insert_sub_pth ~ insert_fn); + auto raw = new MarkupInsertRaw(); + auto markup_sourcefile_insert_content = + raw.markupSourceContentRawLineArray(fn_src_insert); + debug(insert) { // insert file + tell_l("red", line); + tell_l("red", fn_src_insert); + writeln( + " length contents insert array: ", + markup_sourcefile_insert_content.length + ); + } + auto ins = new Inserts(); + auto t = ins.scan_subdoc_source( + markup_sourcefile_insert_content, to!string(fn_src_insert), add + ); + static assert(!isTypeTuple!(t)); + auto tmparr = t[0]; + auto addsub = t[1]; + cont_arbitrary_max_length_set[add .. addsub-1] = tmparr[0 .. $-1]; + add=addsub; + /* + 1. load file, + 2. read lines; + 3. scan lines, + 4. if filename insert, and insert filename + 5. repeat 1 + 6. else + 7. add line to new array; + */ + } else { + ft["header_make"] = 0; + ft["header_metadata"] = 0; + cont_arbitrary_max_length_set[add] = line; + ++add; + } + } + auto contents = cont_arbitrary_max_length_set[0..add].dup; + debug(insert) { // insert file + writeln(__LINE__); + writeln(contents.length); + writeln(add); + } + return contents; + } + } +} diff --git a/lib/sdp/ao_utils.d b/lib/sdp/ao_utils.d new file mode 100644 index 0000000..70ad667 --- /dev/null +++ b/lib/sdp/ao_utils.d @@ -0,0 +1,74 @@ +/* +#+OPTIONS: ^:nil _:nil#+OPTIONS: ^:nil _:nil +* sisu_utils.d +*/ +mixin template ScreenTxtColors() { + string[string] scr_txt_color = [ + "off" : "\033[0m", + "white" : "\033[37m", + "white_bold" : "\033[1m", + "marker" : "\033[42m", + "bold" : "\033[1m", + "underline" : "\033[4m", + "invert" : "\033[7m", + "darkgrey_hi" : "\033[100m", + "grey_hi" : "\033[47m", + "pink_hi" : "\033[105m", + "fuchsia_hi" : "\033[45m", + "red_hi" : "\033[41m", + "orange_hi" : "\033[101m", + "yellow_hi" : "\033[103m", + "brown_hi" : "\033[43m", + "lightgreen_hi" : "\033[102m", + "green_hi" : "\033[42m", + "cyan_hi" : "\033[106m", + "blue_hi" : "\033[104m", + "navy_hi" : "\033[44m", + "grey" : "\033[90m", + "pink" : "\033[95m", + "fuchsia" : "\033[35m", + "ruby" : "\033[31m", + "red" : "\033[91m", + "orange" : "\033[91m", + "yellow" : "\033[93m", + "brown" : "\033[33m", + "green" : "\033[92m", + "darkgreen" : "\033[32m", + "cyan" : "\033[36m", + "blue" : "\033[94m", + "navy" : "\033[34m", + "black" : "\033[30m" + ]; + string[string] scr_txt_marker = [ + "white" : "\033[37m*\033[0m ", + "bold" : "\033[1m*\033[0m ", + "invert" : "\033[7m*\033[0m ", + "darkgrey_hi" : "\033[100m*\033[0m ", + "grey_hi" : "\033[47m*\033[0m ", + "pink_hi" : "\033[105m*\033[0m ", + "fuchsia_hi" : "\033[45m*\033[0m ", + "red_hi" : "\033[41m*\033[0m ", + "orange_hi" : "\033[101m*\033[0m ", + "yellow_hi" : "\033[103m*\033[0m ", + "brown_hi" : "\033[43m*\033[0m ", + "lightgreen_hi" : "\033[102m*\033[0m ", + "green_hi" : "\033[42m*\033[0m ", + "cyan_hi" : "\033[106m*\033[0m ", + "blue_hi" : "\033[104m*\033[0m ", + "navy_hi" : "\033[44m*\033[0m ", + "grey" : "\033[90m*\033[0m ", + "pink" : "\033[95m*\033[0m ", + "fuchsia" : "\033[35m*\033[0m ", + "ruby" : "\033[31m*\033[0m ", + "red" : "\033[91m*\033[0m ", + "orange" : "\033[91m*\033[0m ", + "yellow" : "\033[93m*\033[0m ", + "brown" : "\033[33m*\033[0m ", + "green" : "\033[92m*\033[0m ", + "darkgreen" : "\033[32m*\033[0m ", + "cyan" : "\033[36m*\033[0m ", + "blue" : "\033[94m*\033[0m ", + "navy" : "\033[34m*\033[0m ", + "black" : "\033[30m*\033[0m " + ]; +} diff --git a/lib/sdp/sdp.d b/lib/sdp/sdp.d new file mode 100644 index 0000000..3bfbc5b --- /dev/null +++ b/lib/sdp/sdp.d @@ -0,0 +1,161 @@ +#!/usr/bin/env rdmd +/* +#+OPTIONS: ^:nil _:nil#+OPTIONS: ^:nil _:nil +* sdp.d :sdp: +*/ +import + std.stdio, + std.file, + std.regex, + std.utf, + std.string, + std.array, + std.json, + std.process, + std.exception, + std.typecons, + std.algorithm, + std.range, + std.container, + std.traits, + lib.sdp.ao_output_debugs, // ao_output_debugs.d + lib.sdp.ao_defaults, // ao_defaults.d + lib.sdp.ao_rgx, // ao_rgx.d + lib.sdp.ao_interface, // ao_interface.d + lib.sdp.ao_emitter, // ao_emitter.d + lib.sdp.ao_scan_inserts, // ao_scan_inserts.d + lib.sdp.ao_markup_source_raw, // ao_markup_source_raw.d + lib.sdp.ao_abstract_doc_source, // ao_abstract_doc_source.d + lib.sdp.ao_assertions, // ao_assertions.d + lib.sdp.ao_object_setter, // ao_object_setter.d + lib.sdp.ao_utils; // ao_utils.d +import std.conv : to; +mixin RgxInit; mixin Interfaces; mixin Emitters; +void main(string argv[]) { + mixin SiSUheader; + mixin SiSUbiblio; + mixin SiSUrgxInitFlags; + mixin SiSUmarkupRaw; + mixin SiSUdocInserts; + mixin SiSUdocAbstraction; + mixin SiSUoutputDebugs; + mixin ScreenTxtColors; + auto cli = new CLI(); + auto raw = new MarkupRaw(); + auto abs = new Abstraction(); + auto dbg = new SDPoutputDebugs(); + char[][] msc; + string[1000] fns_src; + string flag_action; + string[string] actions; + int file_count; + actions = [ + "assert" : "yes", + ]; + auto rgx = new Rgx(); + scope(success) { + debug(checkdoc) { + writeln( + scr_txt_color["cyan"], + "~ run complete, ok ~ ", + scr_txt_color["off"], + ); + } + } + scope(failure) { + debug(checkdoc) { + writeln( + scr_txt_color["fucshia"], + "~ run failure ~", + scr_txt_color["off"], + ); + } + } + foreach(cmdlnins; argv) { + if (match(cmdlnins, rgx.flag_action)) { + flag_action ~= " " ~ cmdlnins; + actions = cli.extract_actions(cmdlnins, actions); + } else if (match(cmdlnins, rgx.src_pth)) { + fns_src[file_count] = cmdlnins; + file_count++; + } + } + foreach(fn_src; fns_src) { + if (!empty(fn_src)) { + scope(success) { + debug(checkdoc) { + writeln( + scr_txt_color["green"], + "~ document complete, ok ~ ", + scr_txt_color["off"], + fn_src + ); + } + } + scope(failure) { + debug(checkdoc) { + writeln( + scr_txt_color["red"], + "~ document run failure ~", + scr_txt_color["off"], + fn_src + ); + } + } + auto markup_sourcefile_content = + raw.markupSourceContentRawLineArray(fn_src); // alternative call + debug(insert) { + string[string] sysenv; + sysenv["pwd"] = shell("pwd"); + writeln(sysenv["pwd"]); + auto m = match(fn_src, rgx.src_pth); + auto markup_src_file_path = m.captures[1]; + writeln("markup source file path: ", markup_src_file_path); // writeln(m.captures[1]); + writeln(m.captures[2]); + } + if (match(fn_src, rgx.src_fn_master)) { + auto ins = new Inserts(); + auto markup_master_sourcefile_content = + ins.scan_doc_source(markup_sourcefile_content, fn_src); + msc = markup_master_sourcefile_content; + } else { + msc = markup_sourcefile_content; + } + debug(raw) { + foreach (line; msc) { + writeln(line); + } + } + auto t = + abs.abstract_doc_source(msc); + static assert(!isTypeTuple!(t)); + auto contents = t[0]; + auto metadata_json = t[1]; + auto make_json = t[2]; + auto bookindex_unordered_hashes = t[3]; + auto biblio = t[4]; + debug(checkdoc) { + dbg.abstract_doc_source_debugs( + contents, + make_json, + metadata_json, + bookindex_unordered_hashes, + biblio, + fn_src, + actions + ); + } + scope(exit) { + destroy(msc); + destroy(t); + destroy(contents); + destroy(make_json); + destroy(metadata_json); + destroy(bookindex_unordered_hashes); + destroy(fn_src); + destroy(biblio); + } + } else { // terminate, stop + } + } +} diff --git a/lib/sdp/sdp.org b/lib/sdp/sdp.org new file mode 100644 index 0000000..076b42b --- /dev/null +++ b/lib/sdp/sdp.org @@ -0,0 +1,331 @@ +#+OPTIONS: ^:nil _:nil#+OPTIONS: ^:nil _:nil +#+PRIORITIES: A F E +* debug + objects + header + heading + poem verse ocn + +* TODO [#A] tasks +** sisu_loop.d +*** header extraction (make instructions & metadata) +**** metadata +title & author heading +**** make +***** header make: search and replace +***** auto-identify structure from make instructions +***** auto-number headings, with starting level as instructed in header make +*** markup +**** para markers: extract (hash store) & clean (remove from text) +place in hash + heading level, numeric + indent info, numeric: first, rest + bullet, bool +issue representing complex structures, consider using bits of json! +string h = "{ \"heading_level\": \"A\" }"; +string para_attrib = "{ \"indent_first\": 0, \"indent_rest\": 0, \"bullet\": false }"; +# string s = "{ +# \"indent_first\": 0, +# \"indent_rest\": 0, +# \"bullet\": false, +# \"name\": "", +# \"tags\": "", +# \"digest\": "", +# }"; +string para_attrib = "{ + \"indent_first\": 0, + \"indent_rest\": 0, + \"bullet\": false, +}"; +string obj_ids = "{ + \"name\": \"\", + \"tags\": \"\", + \"digest\": \"\", +}"; +string obj_lv = "{ + \"markup\": \"\", // [A-D1-4] lv + \"num_markup\": 0, // [0-7] ln + \"num_collapsed\": 0, // [0-7] lc +}"; +string obj_citation = "{ + \"ocn\": 0, // + \"on\": true, // +}"; + +**** structure as json? +odv +osp +node +parent + +you could keep ocn info (bool & number?) this way, consider + +also clean + block markers + book index! +**** inline markup of text for subsequent processing +regex substitution +search and replace inline ascii markup with less ambiguous utf-8 markers +***** footnotes count/number +*** ocn (on & off) +*** headings +**** heading numbering? + +**** segment naming & id tags + +*** backmatter +**** book index +clean from object, store under separate key +consider json +**** bibliography +identify and store under separate hash +consider using json to structure +**** glossary +identify and store under separate hash? +*** composite documents +import files + +<< path_to/file.sst + +** cleanup + +** ranges & node structure info: structure information (levels, ranges & relationships) +*** heading pointers +**** headings_array heading pointer +**** data_abstration_array heading pointer +*** ocn +*** heading +**** markup level [A-D1-4] +**** collapsed level [0-7] +*** parent +**** heading markup level [A-D1-4] +**** heading ocn +*** range, heading & children (till next heading of same level or higher (close level mark)) +**** array range (x..y) +includes sub headings and non-heading objects till next heading +debate whether to use relative or absolute values (i.e. array points) +**** ocn range (x..y) + +NodeStructureMetadata see node_jstr +abstract_doc: obj_att\|node_jstr\|node +emitter: ObjAttrib +heading_pointer +** misc +*** temporarily turn off ocn + +--~# ---# --+# + +~# & -# + +*** parent & children +heading parent & children +paragraph parent + +*** dir (read using dlang lib) +*** how to deal with complex data structures? +try hashes with json + +*** read files +**** config +**** make +* compile + http://www.dprogramming.com/tutorial.php + http://www.dprogramming.com/tutorial.php#newusers +** rdmd +rdmd --build-only --chatty -d-debug sdp.d +rdmd -d-debug sisu_ref0.d ../markup/test0.sst + +VER='sdp2' && rdmd --build-only --chatty lib/${VER}/sdp.d + +** dmd +dmd -debug sdp.d + +VER='sdp1' && dmd -debug -of./bin/${VER} lib/${VER}/sdp.d +VER='sdp1' && dmd -debug=checkdoc -of./bin/${VER} lib/${VER}/sdp.d + +** ldc2 +ldc2 -d-debug sdp.d + +VER='1' && ldc2 -unittest -d-debug=summary -of=./bin/sdp${VER} lib/sdp${VER}/sdp.d +VER='1' && ldc2 -d-debug=checkdoc -d-debug=summary -of=./bin/sdp${VER} lib/sdp${VER}/sdp.d +VER='1' && ldc2 -d-debug=checkdoc -d-debug=summary -of=./bin/sdp${VER} lib/sdp${VER}/sdp.d + +VER='sdp1' && ldc2 -d-debug=objects -d-debug=summary -of=./bin/${VER} lib/${VER}/sdp.d + +VER='sdp0' +VER='sdp1' +VER='sdp2' +VER='sdp3' +VER='sdp' +ldc2 -d-debug -of=./bin/sisu_${VER} lib/${VER}/sdp.d +ldc2 -d-debug -of=./bin/sisu_${VER} lib/${VER}/sdp.d +ldc2 -d-debug=heading -of=./bin/sisu_${VER} lib/${VER}/sdp.d +ldc2 -d-debug=objects -of=./bin/sisu_${VER} lib/${VER}/sdp.d + +VER='sdp1' && ldc2 -d-debug=objects -d-debug=summary -of=./bin/sdp lib/${VER}/sdp.d + +*** remove later +binbuild="sdp1"; ldc2 -d-debug ./${binbuild}.d && time ./${binbuild} markup/test0.sst +binbuild="sdp1"; gdc -fdebug -o ./${binbuild} ./${binbuild}.d && time ./${binbuild} markup/test0.sst + +binbuild="sdp1" +ldc2 -release ./${binbuild}.d && time ./${binbuild} markup/test0.sst + +#cd lib +#ldc1 -d-debug -of=../bin/sdp0 sdp/sdp.d + +** gdc +gdc -o ./bin/sdp0 lib/sdp.d +VER='sdp2' && +gdc -o ./bin/${VER} lib/${VER}/sdp.d + +VER='sdp2' && gdc-5 -o ./bin/${VER} ./lib/${VER}/sdp.d + +#VER='sdp2' && gdc -o -d-debug=objects -d-debug=summary -of=./bin/${VER} lib/${VER}/sdp.d + +*** remove later +binbuild="sdp1" +ldc2 -d-debug ${binbuild}.d && time ${binbuild} markup/test0.sst +gdc -fdebug -o ${binbuild} ${binbuild}.d && time ${binbuild} markup/test0.sst + +* run +sdp0 markup/test0.sst + +~utils/d/bin/sdp0 filename.sst +~utils/d/bin/sdp1 filename.sst + +cd markup +sdp0 test0.sst + +* compile notes +** ldc +import path[0] = /usr/include/d/ldc +import path[1] = /usr/include/d +** gdc +gdmd -help +import path[0] = /usr/include/d/4.9/x86_64-linux-gnu +import path[1] = /usr/include/d/4.9 +** dmd (non-free) install arch? +** issues + +* notes +*** read file +char[][] markup_sourcefile_content = split(cast(char[])read(fn_src), rgx_line_delimiter); +char[][] markup_sourcefile_content = markupSourceLineArray(markupSourceString(fn_src)); +** build + +** book index +// http://forum.dlang.org/post/k8of07$1bgu$1@digitalmars.com +// http://forum.dlang.org/post/dfyowpjhdaemhxhepfmk@forum.dlang.org + // recast --- + // ocns ; sub ; main + string[][string][string] bookindex; + // as --- + // ocns ; sub ; main + string[]string[][string][] bookindex_the; + // with sorted main & sub + +// gdc -release -o ./${binbuild} ./${binbuild}.d && time ./${binbuild} markup/test0.sst + +// vim ./lib/sdp1/sisu_*.d +// vim **/sdp1/sisu_*.d + +// emacs **/sdp1/sisu_*.d & +// emacs ./lib/sdp1/sisu_*.d & + +// VER='2' && ldc2 -unittest -d-debug=insert -d-debug=objects -d-debug=headings -d-debug=summary -d-debug=checkdoc -d-debug=subheader -of=./bin/sdp${VER} lib/sdp${VER}/sdp.d + +* bugs +ok +time ~dlang/bin/sdp1 --html --no-assert en/[a-eg-z]* +not ok +time ~dlang/bin/sdp1 --html --no-assert en/free_for_all.peter_wayner.sst en/gpl3.fsf.sst +works if: + poems removed from gpl3; + biblio removed from free for all +time ~dlang/bin/sdp0 --html --no-assert en/free_for_all.peter_wayner.sst en/gpl2.fsf.sst en/gpl3.fsf.sst +time ~dlang/bin/sdp0 --html --no-assert en/[fg]* +time ~dlang/bin/sdp0 --html --no-assert en/[a-z]* +leaving out free_for_all seems to make it ok +time ~dlang/bin/sdp0 --html --no-assert en/[a-eg-z]* +leaving out any two bibliography entries within free_for_all appears to fix the problem! + +works in dmd not in ldc2 + +*** Error in `/home/ralph/grotto/repo/git.repo/utils/d/bin/sdp2': corrupted double-linked list: 0x00000008b905b310 *** +in free_for_all bibliography first instance FatBrain ref +gets stuck after: +en/free_for_all.peter_wayner.sst +* desc +** process files +.sst (text) regular +.ssm (master) contains either .sst or .ssi +.ssi (insert) processed within .ssm (or called into a .ssm by another .ssi) +** header +*** metadata +*** make (@make:) +cover_image +home_button_image +home_button_text +footer +headings +num_top +breaks +substitute +bold +italics +emphasis +texpdf_font +css + +** structure +document structure is determined by headings of different levels +headings must either +(a) be explicitly marked as such, or +(b) given by a regex (in the appropriate make header) that allows the program determine headings within text +types of heading: +*** document separators (A-D) +level A is the title +*** text headings (1-4) +*** header make heading regex +**** heading levels +***** markup level [A-D1-4] +***** markup level numbers [0-7] or [1-8] +***** collapsed level numbers [0-7] or [1-8] +***** nodes +***** json search segments? chapter equivalent, decide +**** switches, ocn on off (dummy header) +** object attributes +types of object: +*** headings (document structure objects) +**** level +**** segment name +**** numbering +*** paragraphs +*** blocks +types of block object: +**** group +**** block +**** poem (verse) +**** code +**** table +**** quote +**** TODO alt? +** paragraph attributes +types of paragraph attribute: +*** indent +**** paragraph (1 - 9) +**** first line level (1 - 9), & subsequent text level (1 - 9) +indent (first, rest), bullet +*** bullets +** inline text (paragraph) attributes +bold, italics, emphasis, superscript, substcript, strike, add, monospace, footnote (number them) +types of text (within paragraph) attribute: +*** bold +*** italics +*** emphasis +*** underscore +*** strike +*** superscript +*** subscript +*** ... -- cgit v1.2.3