From 45e96028ce7696381aca7f155c21b0b718b6a610 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 30 Sep 2015 13:07:43 -0400 Subject: sdp, abstract objects, a start --- lib/sdp/ao_abstract_doc_source.d | 1695 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 1695 insertions(+) create mode 100644 lib/sdp/ao_abstract_doc_source.d (limited to 'lib/sdp/ao_abstract_doc_source.d') diff --git a/lib/sdp/ao_abstract_doc_source.d b/lib/sdp/ao_abstract_doc_source.d new file mode 100644 index 0000000..2fec35d --- /dev/null +++ b/lib/sdp/ao_abstract_doc_source.d @@ -0,0 +1,1695 @@ +/* +#+OPTIONS: ^:nil _:nil#+OPTIONS: ^:nil _:nil +* sisu_abstract_doc_source.d +*/ +mixin template SiSUdocAbstraction() { + class Abstraction { + auto abstract_doc_source(char[][] markup_sourcefile_content) { + mixin ObjectSetters; + mixin AssertionsOnMarkupDocumentStructure; + mixin AssertionsOnBlocks; + mixin ScreenTxtColors; + auto rgx = new Rgx(); + auto set_oa = new ObjectAbstractSet(); + auto set_header = new HeaderDocMetadataMakeJson(); + auto notesection = new NotesSection(); + string[string][131072] contents_arbitrary_max_length_set; // 2000 pg * 50 lines == 100000 + string[1024] notes; + string notes_str; + string[string] object, processing, head; + string biblio_tag_name, biblio_tag_entry, book_idx_tmp, st; + string[1024] biblio_arr_json = biblio_entry_tags_jsonstr; + JSONValue[1024] bib_arr_json; + uint[string] line_occur; + int counter, previous_count, count_biblio_entry, ocn, ocn_, verse_line, bib_entry, heading_pointer, notepoint; + string indent_first, indent_second; + string[][string][string] bookindex_unordered_hashes; + bool bullet = true; + uint[string] lv = [ + "lv" : 0, + "h0" : 0, + "h1" : 0, + "h2" : 0, + "h3" : 0, + "h4" : 0, + "h5" : 0, + "h6" : 0, + "h7" : 0, + "lcn" : 0, + ]; + int[string] collapsed_lev = [ + "h0" : 0, + "h1" : 0, + "h2" : 0, + "h3" : 0, + "h4" : 0, + "h5" : 0, + "h6" : 0, + "h7" : 0 + ]; + auto rgx_h_A = regex(r"^(none)"); + auto rgx_h_B = regex(r"^(none)"); + auto rgx_h_C = regex(r"^(none)"); + auto rgx_h_D = regex(r"^(none)"); + auto rgx_h_1 = regex(r"^(none)"); + auto rgx_h_2 = regex(r"^(none)"); + auto rgx_h_3 = regex(r"^(none)"); + auto rgx_h_4 = regex(r"^(none)"); + auto str_h_A = "^(none)"; + auto str_h_B = "^(none)"; + auto str_h_C = "^(none)"; + auto str_h_D = "^(none)"; + auto str_h_1 = "^(none)"; + auto str_h_2 = "^(none)"; + auto str_h_3 = "^(none)"; + auto str_h_4 = "^(none)"; + string content_non_header = "8"; + string node; + auto obj_im = new ObjInlineMarkup(); + auto obj_att = new ObjAttrib(); + auto object_citation_number = new OCNemitter(); + auto ft = flag_type.dup; + int ocn_emit(int ocn_status_flag) { + return object_citation_number.ocn_emitter(ocn_status_flag); + } + auto bookindex_extract_hash = new BookIndexNuggetHash(); + string[][string][string] bkidx_hash(string bookindex, int ocn) { + return bookindex_extract_hash.bookindex_nugget_hash(bookindex, ocn); + } + auto node_construct = new NodeStructureMetadata(); + string node_jstr( + string lvn, + int ocn_, + int counter, + int heading_pointer, + string is_ + ) { + return node_construct.node_emitter( + lvn, + ocn_, + counter, + heading_pointer, + is_ + ); + } + string node_jstr_heading( + string lvn, + string lcn, + int ocn_, + int counter, + int heading_pointer, + string is_ + ) { + return node_construct.node_emitter_heading( + lvn, + lcn, + ocn_, + counter, + heading_pointer, + is_ + ); + } + string[string] ocn_poem = [ + "start" : "", + "end" : "" + ]; + int tell_lo(string color, int ocn, in char[] line) { + writeln(scr_txt_marker[color], to!string(ocn), " ", to!string(line)); + return 0; + } + int tell_l(string color, in char[] line) { + writeln(scr_txt_marker[color], line); + return 0; + } + scope(success) { + } + scope(failure) { + } + scope(exit) { + destroy(contents_arbitrary_max_length_set); + destroy(object); + destroy(processing); + destroy(biblio_arr_json); + } + auto dochead_make = parseJSON(header_make_jsonstr).object; + auto dochead_metadata = parseJSON(header_metadata_jsonstr).object; + foreach (line; markup_sourcefile_content) { + scope(exit) { + } + scope(failure) { + writeln(__FILE__, ":", __LINE__, " failed here:"); + writeln(" line: ", line); + writeln(" is : ", object["is"]); + writeln(" node: ", node); + } + line = replaceAll(line, rgx.true_dollar, "$$$$"); + debug(source) { // source lines + writeln(line); + } + debug(srclines) { + if (!line.empty) { // source lines, not empty + writeln(scr_txt_marker["green"], line); + } + } + if ((!line.empty) && (ft["ocn_status_multi_obj"] == 0)) { + if (match(line, rgx.ocn_block_marks)) { + if (match(line, rgx.ocn_off_block)) { + ft["ocn_status_multi_obj"] = 1; + debug(ocnoff) { + tell_l("fuchsia", line); + } + } + if (match(line, rgx.ocn_off_block_dh)) { + ft["ocn_status_multi_obj"] = 2; + debug(ocnoff) { + tell_l("fuchsia", line); + } + } + } else { + if (ft["ocn_status_multi_obj"] == 0) { + if (match(line, rgx.ocn_off)) { + ft["ocn_status"] = 1; + } else if (match(line, rgx.ocn_off_dh)) { + ft["ocn_status"] = 2; + } else { + ft["ocn_status"] = 2; + ft["ocn_status"] = 0; + } + } else { + ft["ocn_status"] = ft["ocn_status_multi_obj"]; + } + } + } else if ((!line.empty) && (ft["ocn_status_multi_obj"] > 0)) { + if (auto m = match(line, rgx.ocn_off_block_close)) { + ft["ocn_status_multi_obj"] = 0; + ft["ocn_status"] = 0; + debug(ocnoff) { + tell_l("green", line); + } + } + } + if (ft["code"] == 1) { + if (ft["curly_code"] == 1) { + if (auto m = match(line, rgx.block_curly_code_close)) { + debug(code) { // code (curly) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["code"] = 2; + ft["curly_code"] = 0; + } else { + debug(code) { // code (curly) line + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // code (curly) line + } + } else if (ft["tic_code"] == 1) { + if (auto m = match(line, rgx.block_tic_close)) { + debug(code) { // code (tic) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["code"] = 2; + ft["tic_code"] = 0; + } else { + debug(code) { // code (tic) line + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // code (tic) line + } + } + } else if (!match(line, rgx.regular_parse_skip)) { + if (((match(line, rgx.heading_biblio) + || (ft["heading_biblio"] == 1))) + && (!match(line, rgx.heading)) + && (!match(line, rgx.comment))) { + if (match(line, rgx.heading_biblio)) { + ft["heading_biblio"] = 1; + } + if (empty(line) && (bib_entry == 0)) { + count_biblio_entry++; + bib_entry = 1; + } + debug(biblio) { + writeln( + scr_txt_color["yellow"], + "* ", + scr_txt_color["off"], + to!string(count_biblio_entry), + " ", + line + ); + } + if (match(line, rgx.biblio_tags)) { + auto bt = match(line, rgx.biblio_tags); + bib_entry = 0; + st=to!string(bt.captures[1]); + biblio_tag_entry=to!string(bt.captures[2]); + JSONValue j = parseJSON(biblio_arr_json[count_biblio_entry]); + if (match(st, rgx.biblio_abbreviations)) { + biblio_tag_name=biblio_tag_map[st]; + } else { + biblio_tag_name=st; + } + j.object[biblio_tag_name] = biblio_tag_entry; + auto header_tag_value=to!string(bt.captures[2]); + switch (biblio_tag_name) { + case "author_raw": // author_arr author (fn sn) + j["author_arr"]=split(header_tag_value, rgx.arr_delimiter); + string tmp; + foreach (au; j["author_arr"].array) { + if (auto x = match(au.str, rgx.name_delimiter)) { + tmp ~= x.captures[2] ~ " " ~ x.captures[1] ~ ", "; + } else { + tmp ~= au.str; + } + } + tmp = replace(tmp, rgx.trailing_comma, ""); + j["author"].str = tmp; + break; + case "editor_raw": // editor_arr editor (fn sn) + j["editor_arr"]=split(header_tag_value, rgx.arr_delimiter); + string tmp; + foreach (ed; j["editor_arr"].array) { + if (auto x = match(ed.str, rgx.name_delimiter)) { + tmp ~= x.captures[2] ~ " " ~ x.captures[1] ~ ", "; + } else { + tmp ~= ed.str; + } + } + tmp = replace(tmp, rgx.trailing_comma, ""); + j["editor"].str = tmp; + break; + case "fulltitle": // title & subtitle + break; + default: + break; + } + auto s = to!string(j); + s = j.toString(); + debug(biblio) { + writeln( + scr_txt_color["red"], + "* ", + scr_txt_color["off"], + biblio_tag_name, + ": ", + biblio_tag_entry + ); + writeln(biblio_arr_json[count_biblio_entry]); + writeln(j[biblio_tag_name], ":", j[biblio_tag_name]); + } + biblio_arr_json[count_biblio_entry] = s; + biblio_tag_entry=""; + } + } else if (ft["poem"] == 1) { + if (ft["curly_poem"] == 1) { + if (auto m = match(line, rgx.block_curly_poem_close)) { + object["obj"]="verse"; // check that this is as you please + debug(poem) { // poem (curly) close + writeln( + scr_txt_color["red"], + "* [poem curly] ", + scr_txt_color["off"], + line + ); + } + if (processing.length > 0) { + object["obj"] = processing["verse"]; + } + debug(poem) { // poem (curly) close + writeln(__LINE__); + writeln( + scr_txt_marker["fuchsia"], + ocn, + " ", + line + ); + } + if (object.length > 0) { + debug(poem) { // poem (curly) close + writeln(__LINE__); + tell_lo( + "fuchsia", + ocn, + object["obj"] + ); + writeln(__LINE__); + } + object["is"] = "verse"; + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + } + ocn_poem["end"] = to!string(ocn); + ft["blocks"] = 2; + ft["poem"] = 2; + ft["curly_poem"] = 0; + } else { + processing["verse"] ~= line ~= "\n"; + if (ft["verse_new"] == 1) { + ocn = ocn_emit(ft["ocn_status"]); + ft["verse_new"] = 0; + } else if (match(line, rgx.line_delimiter_only)) { + verse_line = 0; + ft["verse_new"] = 1; + } + if (ft["verse_new"] == 1) { + verse_line=1; + object["obj"] = processing["verse"]; + debug(poem) { // poem verse + writeln(scr_txt_marker["green"], + ocn, + " curly\n", + object["obj"]); + } + processing.remove("verse"); + object["is"] = "verse"; + node = node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + } + } + } else if (ft["tic_poem"] == 1) { + if (auto m = match(line, rgx.block_tic_close)) { // tic_poem_close + object["obj"]="verse"; // check that this is as you please + debug(poem) { // poem (curly) close + writeln( + scr_txt_color["red"], + "* [poem tic] ", + scr_txt_color["off"], + line + ); + } + if (processing.length > 0) { // needs looking at + object["obj"] = processing["verse"]; + } + if (object.length > 0) { + debug(poem) { // poem (tic) close + writeln(__LINE__); + tell_lo("fuchsia", ocn, line); + } + processing.remove("verse"); + object["is"] = "verse"; + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + ocn_poem["end"] = to!string(ocn); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + } + ft["blocks"] = 2; + ft["poem"] = 2; + ft["tic_poem"] = 0; + } else { + processing["verse"] ~= line ~= "\n"; + if (ft["verse_new"] == 1) { + ocn = ocn_emit(ft["ocn_status"]); + ft["verse_new"] = 0; + } else if (match(line, rgx.line_delimiter_only)) { + ft["verse_new"] = 1; + verse_line = 0; + } + if (ft["verse_new"] == 1) { + verse_line=1; + object["obj"] = processing["verse"]; + debug(poem) { // poem (tic) close + writeln(scr_txt_marker["green"], + ocn, + " tic\n", + object["obj"]); + } + processing.remove("verse"); + object["is"] = "verse"; + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + } + } + } + } else if (ft["group"] == 1) { + if (ft["curly_group"] == 1) { + if (auto m = match(line, rgx.block_curly_group_close)) { + debug(group) { // group (curly) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["group"] = 2; + ft["curly_group"] = 0; + } else { + debug(group) { // group + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build group array (or string) + } + } else if (ft["tic_group"] == 1) { + if (auto m = match(line, rgx.block_tic_close)) { + debug(group) { // group (tic) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["group"] = 2; + ft["tic_group"] = 0; + } else { + debug(group) { // group + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build group array (or string) + } + } + } else if (ft["block"] == 1) { + if (ft["curly_block"] == 1) { + if (auto m = match(line, rgx.block_curly_block_close)) { + debug(block) { // block (curly) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["block"] = 2; + ft["curly_block"] = 0; + } else { + debug(block) { // block + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build block array (or string) + } + } else if (ft["tic_block"] == 1) { + if (auto m = match(line, rgx.block_tic_close)) { + debug(block) { // block (tic) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["block"] = 2; + ft["tic_block"] = 0; + } else { + debug(block) { // block + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build block array (or string) + } + } + } else if (ft["quote"] == 1) { + if (ft["curly_quote"] == 1) { + if (auto m = match(line, rgx.block_curly_quote_close)) { + debug(quote) { // quote (curly) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["quote"] = 2; + ft["curly_quote"] = 0; + } else { + debug(quote) { // quote + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build quote array (or string) + } + } else if (ft["tic_quote"] == 1) { + if (auto m = match(line, rgx.block_tic_close)) { + debug(quote) { // quote (tic) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["quote"] = 2; + ft["tic_quote"] = 0; + } else { + debug(quote) { // quote + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build quote array (or string) + } + } + } else if (ft["table"] == 1) { + if (ft["curly_table"] == 1) { + if (auto m = match(line, rgx.block_curly_table_close)) { + debug(table) { // table (curly) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["table"] = 2; + ft["curly_table"] = 0; + } else { + debug(table) { // table + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build table array (or string) + } + } else if (ft["tic_table"] == 1) { + if (auto m = match(line, rgx.block_tic_close)) { + debug(table) { // table (tic) close + tell_l("blue", line); + } + ft["blocks"] = 2; + ft["table"] = 2; + ft["tic_table"] = 0; + } else { + debug(table) { // table + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; // build table array (or string) + } + } + } else { + assert( + (ft["blocks"] == 0) + || (ft["blocks"] == 2), + "block status: none or closed" + ); + assertions_flag_types_block_status_none_or_closed(ft); + if (auto m = match(line, rgx.block_curly_code_open)) { + debug(code) { // code (curly) open + writeln( + scr_txt_color["blue"], + "* [code curly] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["code"] = 1; + ft["curly_code"] = 1; + } else if (auto m = match(line, rgx.block_curly_poem_open)) { + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + debug(poem) { // poem (curly) open + writeln( + scr_txt_color["red"], + "* [poem curly] ", + scr_txt_color["off"], + line + ); + } + ocn_poem["start"] = to!string(ocn); + ft["blocks"] = 1; + ft["verse_new"] = 1; + ft["poem"] = 1; + ft["curly_poem"] = 1; + } else if (auto m = match(line, rgx.block_curly_group_open)) { + debug(group) { // group (curly) open + writeln( + scr_txt_color["blue"], + "* [group curly] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["group"] = 1; + ft["curly_group"] = 1; + } else if (auto m = match(line, rgx.block_curly_block_open)) { + debug(block) { // block (curly) open + writeln( + scr_txt_color["blue"], + "* [block curly] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["block"] = 1; + ft["curly_block"] = 1; + } else if (auto m = match(line, rgx.block_curly_quote_open)) { + debug(quote) { // quote (curly) open + writeln( + scr_txt_color["blue"], + "* [quote curly] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["quote"] = 1; + ft["curly_quote"] = 1; + } else if (auto m = match(line, rgx.block_curly_table_open)) { + debug(table) { // table (curly) open + writeln( + scr_txt_color["blue"], + "* [table curly] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["table"] = 1; + ft["curly_table"] = 1; + } else if (auto m = match(line, rgx.block_tic_code_open)) { + debug(code) { // code (tic) open + writeln( + scr_txt_color["blue"], + "* [code tic] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["code"] = 1; + ft["tic_code"] = 1; + } else if (auto m = match(line, rgx.block_tic_poem_open)) { + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + debug(poem) { // poem (tic) open + writeln( + scr_txt_color["red"], + "* [poem tic] ", + scr_txt_color["off"], + line + ); + } + ocn_poem["start"] = to!string(ocn); + ft["blocks"] = 1; + ft["verse_new"] = 1; + ft["poem"] = 1; + ft["tic_poem"] = 1; + } else if (auto m = match(line, rgx.block_tic_group_open)) { + debug(group) { // group (tic) open + writeln( + scr_txt_color["blue"], + "* [group tic] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["group"] = 1; + ft["tic_group"] = 1; + } else if (auto m = match(line, rgx.block_tic_block_open)) { + debug(block) { // block (tic) open + writeln( + scr_txt_color["blue"], + "* [block tic] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["block"] = 1; + ft["tic_block"] = 1; + } else if (auto m = match(line, rgx.block_tic_quote_open)) { + debug(quote) { // quote (tic) open + writeln( + scr_txt_color["blue"], + "* [quote tic] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["quote"] = 1; + ft["tic_quote"] = 1; + } else if (auto m = match(line, rgx.block_tic_table_open)) { + debug(table) { // table (tic) open + writeln( + scr_txt_color["blue"], + "* [table tic] ", + scr_txt_color["off"], + line + ); + } + ft["blocks"] = 1; + ft["table"] = 1; + ft["tic_table"] = 1; + } else if (!line.empty) { + assert( + !line.empty, + "line tested, line not empty surely" + ); + assert( + (ft["blocks"] == 0) + || (ft["blocks"] == 2), + "code block status: none or closed" + ); + if (ft["blocks"] == 2) { + debug(check) { // block + writeln(__LINE__); + tell_l("red", line); + } + assert( + match(line, rgx.book_index) + || match(line, rgx.book_index_open) + || ft["book_index"] == 1 + ); + } + if (auto m = match(line, rgx.book_index)) { + debug(bookindexmatch) { // book index + writeln( + scr_txt_color["blue"], "* [bookindex] ", scr_txt_color["off"], + to!string(m.captures[1]), "\n" + ); + } + object["bookindex"] = to!string(m.captures[1]); + } else if (auto m = match(line, rgx.book_index_open)) { + ft["book_index"] = 1; + book_idx_tmp = to!string(m.captures[1]); + debug(bookindexmatch) { // book index + writeln( + scr_txt_color["blue"], + "* [bookindex] ", + scr_txt_color["off"], + book_idx_tmp, "\n" + ); + } + } else if (ft["book_index"] == 1 ) { + if (auto m = match(line, rgx.book_index_close)) { + ft["book_index"] = 0; + object["bookindex"] = book_idx_tmp ~ to!string(m.captures[1]); + debug(bookindexmatch) { // book index + writeln( + scr_txt_color["blue"], + "* [bookindex] ", + scr_txt_color["off"], + book_idx_tmp, "\n" + ); + } + book_idx_tmp = ""; + } else { + book_idx_tmp ~= line; + } + } else { + if (auto m = match(line, rgx.comment)) { + debug(comment) { + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; + contents_arbitrary_max_length_set[counter] = + set_oa.contents_comment(strip(object["obj"])); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + line_occur["header_metadata"] = 0; + line_occur["header_make"] = 0; + line_occur["heading"] = 0; + line_occur["para"] = 0; + ft["header"] = 0; + ft["header_make"] = 0; + ft["header_metadata"] = 0; + ft["heading"] = 0; + ft["para"] = 0; + counter++; + } else if (auto m = match(line, rgx.header_make)) { + debug(header1) { // header + tell_l("yellow", line); + } + ft["header"] = 1; + ft["header_make"] = 1; + ft["header_metadata"] = 0; + ft["heading"] = 0; + ft["para"] = 0; + line_occur["header_make"]++; + object["obj"] ~= line ~= "\n"; + } else if (auto m = match(line, rgx.header_metadata)) { + debug(header1) { // header + tell_l("yellow", line); + } + ft["header"] = 1; + ft["header_make"] = 0; + ft["header_metadata"] = 1; + ft["heading"] = 0; + ft["para"] = 0; + line_occur["header_metadata"]++; + object["obj"] ~= line ~= "\n"; + } else if (ft["header_make"] == 1 + && (line_occur["header_make"] > 0)) { + if (auto m = match(line, rgx.header_sub)) { + debug(header1) { // header sub + tell_l("yellow", line); + } + line_occur["header_make"]++; + object["obj"] ~= line ~= "\n"; + } + } else if (ft["header_metadata"] == 1 + && (line_occur["header_metadata"] > 0)) { + if (auto m = match(line, rgx.header_sub)) { + debug(header1) { // header sub + tell_l("yellow", line); + } + line_occur["header_metadata"]++; + object["obj"] ~= line ~= "\n"; + } + } else if (((line_occur["para"] == 0) + && (line_occur["heading"] == 0)) + && ((ft["para"] == 0) + && (ft["heading"] == 0))) { + if ((to!string(dochead_make["make"]["headings"]).length > 2) + && (ft["make_headings"] == 0)) { + debug(headingsfound) { + writeln(dochead_make["make"]["headings"]); + } + auto make_headings_txt = + match( + to!string(dochead_make["make"]["headings"]), + rgx.within_quotes); + char[][] make_headings_spl = + split( + cast(char[]) make_headings_txt.captures[1], + rgx.make_heading_delimiter); + debug(headingsfound) { + writeln(make_headings_spl.length); + writeln(make_headings_spl); + } + switch (make_headings_spl.length) { + case 7 : + if (!empty(make_headings_spl[6])) { + str_h_4 = "^(" ~ to!string(make_headings_spl[6]) ~ ")"; + rgx_h_4 = regex(str_h_4); + } + goto case; + case 6 : + if (!empty(make_headings_spl[5])) { + str_h_3 = "^(" ~ to!string(make_headings_spl[5]) ~ ")"; + rgx_h_3 = regex(str_h_3); + } + goto case; + case 5 : + if (!empty(make_headings_spl[4])) { + str_h_2 = "^(" ~ to!string(make_headings_spl[4]) ~ ")"; + rgx_h_2 = regex(str_h_2); + } + goto case; + case 4 : + if (!empty(make_headings_spl[3])) { + str_h_1 = "^(" ~ to!string(make_headings_spl[3]) ~ ")"; + rgx_h_1 = regex(str_h_1); + } + goto case; + case 3 : + if (!empty(make_headings_spl[2])) { + str_h_D = "^(" ~ to!string(make_headings_spl[2]) ~ ")"; + rgx_h_D = regex(str_h_D); + } + goto case; + case 2 : + if (!empty(make_headings_spl[1])) { + str_h_C = "^(" ~ to!string(make_headings_spl[1]) ~ ")"; + rgx_h_C = regex(str_h_C); + } + goto case; + case 1 : + if (!empty(make_headings_spl[0])) { + str_h_B = "^(" ~ to!string(make_headings_spl[0]) ~ ")"; + rgx_h_B = regex(str_h_B); + } + break; + default: + break; + } + ft["make_headings"] = 1; + } + if ((ft["make_headings"] == 1) + && ((line_occur["para"] == 0) + && (line_occur["heading"] == 0)) + && ((ft["para"] == 0) + && (ft["heading"] == 0))) { + if (match(line, rgx_h_B)) { + line = "B~ " ~ line; + debug(headingsfound) { + writeln(line); + } + } + if (match(line, rgx_h_C)) { + line = "C~ " ~ line; + debug(headingsfound) { + writeln(line); + } + } + if (match(line, rgx_h_D)) { + line = "D~ " ~ line; + debug(headingsfound) { + writeln(line); + } + } + if (match(line, rgx_h_1)) { + line = "1~ " ~ line; + debug(headingsfound) { + writeln(line); + } + } + if (match(line, rgx_h_2)) { + line = "2~ " ~ line; + debug(headingsfound) { + writeln(line); + } + } + if (match(line, rgx_h_3)) { + line = "3~ " ~ line; + debug(headingsfound) { + writeln(line); + } + } + if (match(line, rgx_h_4)) { + line = "4~ " ~ line; + debug(headingsfound) { + writeln(line); + } + } + } + if (auto m = match(line, rgx.heading)) { + ft["heading"] = 1; + ft["header"] = 0; + ft["header_make"] = 0; + ft["header_metadata"] = 0; + ft["heading_biblio"] = 0; + ft["para"] = 0; + line_occur["heading"]++; + object["obj"] ~= line ~= "\n"; + object["lev"] ~= m.captures[1]; + assertions_doc_structure(object, lv); // includes most of the logic for collapsed levels + switch (to!string(object["lev"])) { + case "A": + object["lvn"]="0"; + lv["lv"] = 0; + lv["h0"]++; + lv["h1"] = 0; + lv["h2"] = 0; + lv["h3"] = 0; + lv["h4"] = 0; + lv["h5"] = 0; + lv["h6"] = 0; + lv["h7"] = 0; + collapsed_lev["h0"] = 1; + object["lcn"] = to!string(collapsed_lev["h0"]); + break; + case "B": + collapsed_lev["h1"] = collapsed_lev["h0"] + 1; + object["lcn"] = to!string(collapsed_lev["h1"]); + object["lvn"]="1"; + lv["lv"] = 1; + lv["h1"]++; + lv["h2"] = 0; + lv["h3"] = 0; + lv["h4"] = 0; + lv["h5"] = 0; + lv["h6"] = 0; + lv["h7"] = 0; + break; + case "C": + collapsed_lev["h2"] = collapsed_lev["h1"] + 1; + object["lcn"] = to!string(collapsed_lev["h2"]); + object["lvn"]="2"; + lv["lv"] = 2; + lv["h2"]++; + lv["h3"] = 0; + lv["h4"] = 0; + lv["h5"] = 0; + lv["h6"] = 0; + lv["h7"] = 0; + break; + case "D": + collapsed_lev["h3"] = collapsed_lev["h2"] + 1; + object["lcn"] = to!string(collapsed_lev["h3"]); + object["lvn"]="3"; + lv["lv"] = 3; + lv["h3"]++; + lv["h4"] = 0; + lv["h5"] = 0; + lv["h6"] = 0; + lv["h7"] = 0; + break; + case "1": + if (lv["h3"] > 0) { + collapsed_lev["h4"] = collapsed_lev["h3"] + 1; + } else if (lv["h2"] > 0) { + collapsed_lev["h4"] = collapsed_lev["h2"] + 1; + } else if (lv["h1"] > 0) { + collapsed_lev["h4"] = collapsed_lev["h1"] + 1; + } else if (lv["h0"] > 0) { + collapsed_lev["h4"] = collapsed_lev["h0"] + 1; + } + object["lcn"] = to!string(collapsed_lev["h4"]); + object["lvn"]="4"; + lv["lv"] = 4; + lv["h4"]++; + lv["h5"] = 0; + lv["h6"] = 0; + lv["h7"] = 0; + break; + case "2": + if (lv["h5"] > 0) { + object["lcn"] = to!string(collapsed_lev["h5"]); + } else if (lv["h4"] > 0) { + collapsed_lev["h5"] = collapsed_lev["h4"] + 1; + object["lcn"] = to!string(collapsed_lev["h5"]); + } + object["lvn"]="5"; + lv["lv"] = 5; + lv["h5"]++; + lv["h6"] = 0; + lv["h7"] = 0; + break; + case "3": + if (lv["h6"] > 0) { + object["lcn"] = to!string(collapsed_lev["h6"]); + } else if (lv["h5"] > 0) { + collapsed_lev["h6"] = collapsed_lev["h5"] + 1; + object["lcn"] = to!string(collapsed_lev["h6"]); + } + object["lvn"]="6"; + lv["lv"] = 6; + lv["h6"]++; + lv["h7"] = 0; + break; + case "4": + if (lv["h7"] > 0) { + object["lcn"] = to!string(collapsed_lev["h7"]); + } else if (lv["h6"] > 0) { + collapsed_lev["h7"] = collapsed_lev["h6"] + 1; + object["lcn"] = to!string(collapsed_lev["h7"]); + } + object["lvn"]="7"; + lv["lv"] = 7; + lv["h7"]++; + break; + default: + break; + } + debug(heading) { // heading + tell_l("yellow", strip(line)); + } + } else if (line_occur["para"] == 0) { + if (auto m = match(line, rgx.para_indent)) { + debug(paraindent) { // para indent + tell_l("blue", line); + } + ft["para"] = 1; + object["obj"] ~= line ~= "\n"; + indent_first = to!string(m.captures[1]); + indent_second = "0"; + bullet = false; + } else if (auto m = match(line, rgx.para_bullet)) { + debug(parabullet) { // para bullet + tell_l("blue", line); + } + ft["para"] = 1; + object["obj"] ~= line; + indent_first = "0"; + indent_second = "0"; + bullet = true; + } else if (auto m = match(line, rgx.para_indent_hang)) { + debug(paraindenthang) { // para indent hang + tell_l("blue", line); + } + ft["para"] = 1; + object["obj"] ~= line; + indent_first = to!string(m.captures[1]); + indent_second = to!string(m.captures[2]); + bullet = false; + } else if (auto m = match(line, rgx.para_bullet_indent)) { + debug(parabulletindent) { // para bullet indent + tell_l("blue", line); + } + ft["para"] = 1; + object["obj"] ~= line; + indent_first = to!string(m.captures[1]); + indent_second = "0"; + bullet = true; + } else { + ft["para"] = 1; + object["obj"] ~= line; + indent_first = "0"; + indent_second = "0"; + bullet = false; + } + line_occur["para"]++; + } + } else if (line_occur["header_make"] > 0) { + debug(header) { // para + tell_l("red", line); + } + object["obj"] ~= line ~= "\n"; + line_occur["header_make"]++; + } else if (line_occur["header_metadata"] > 0) { + debug(header) { // para + tell_l("red", line); + } + object["obj"] ~= line ~= "\n"; + line_occur["header_metadata"]++; + } else if (line_occur["heading"] > 0) { + debug(heading) { // heading + tell_l("blue", line); + } + object["obj"] ~= line ~= "\n"; + line_occur["heading"]++; + } else if (line_occur["para"] > 0) { + debug(para) { // para + tell_l("blue", line); + } + object["obj"] ~= line; + line_occur["para"]++; + } + } + } else if (ft["blocks"] == 2) { + assert( + line.empty, + "line should be empty" + ); + assert( + (ft["blocks"] == 2), + "code block status: closed" + ); + assertions_flag_types_block_status_none_or_closed(ft); + if (ft["code"] == 2) { + ocn = ocn_emit(ft["ocn_status"]); + object["bookindex"] = + ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = + bkidx_hash(object["bookindex"], ocn); + object["is"] = "code"; + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + ft["blocks"] = 0; + ft["code"] = 0; + } else if (ft["poem"] == 2) { + object["bookindex"] = + ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = + bkidx_hash(object["bookindex"], ocn); + object["is"] = "verse"; // check also + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block_ocn_string( + "poem", + "", + (ocn_poem["start"], ocn_poem["end"]), + node + ); // bookindex + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + ft["blocks"] = 0; + ft["poem"] = 0; + } else if (ft["table"] == 2) { + ocn = ocn_emit(ft["ocn_status"]); + object["bookindex"] = + ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = + bkidx_hash(object["bookindex"], ocn); + object["is"] = "table"; + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + ft["blocks"] = 0; + ft["table"] = 0; + } else if (ft["group"] == 2) { + ocn = ocn_emit(ft["ocn_status"]); + object["bookindex"] = ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = bkidx_hash(object["bookindex"], ocn); + object["is"] = "group"; + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + ft["blocks"] = 0; + ft["group"] = 0; + } else if (ft["block"] == 2) { + ocn = ocn_emit(ft["ocn_status"]); + object["bookindex"] = ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = bkidx_hash(object["bookindex"], ocn); + object["is"] = "block"; + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + ft["blocks"] = 0; + ft["block"] = 0; + } else if (ft["quote"] == 2) { + ocn = ocn_emit(ft["ocn_status"]); + object["bookindex"] = ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = + bkidx_hash(object["bookindex"], ocn); + object["is"] = "quote"; + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_block( + object["is"], + object["markup"], + object["attrib"], + ocn + ); + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + ft["blocks"] = 0; + ft["quote"] = 0; + } + } else { + assert( + line.empty, + "line should be empty" + ); + assert( + (ft["blocks"] == 0), + "code block status: none" + ); + if ((ft["header_make"] == 1) + && (line_occur["header_make"] > 0)) { + auto dochead_metadata_and_make = + set_header.header_metadata_and_make_jsonstr(strip(object["obj"]), dochead_metadata, dochead_make); + static assert(!isTypeTuple!(dochead_metadata_and_make)); + dochead_metadata = dochead_metadata_and_make[0]; + dochead_make = dochead_metadata_and_make[1]; + line_occur["header_make"] = 0; + line_occur["header_metadata"] = 0; + line_occur["heading"] = 0; + line_occur["para"]= 0; + ft["header"] = 0; + ft["heading"] = 0; + ft["para"] = 0; + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + } else if ((ft["header_metadata"] == 1) + && (line_occur["header_metadata"] > 0)) { + auto dochead_metadata_and_make = + set_header.header_metadata_and_make_jsonstr(strip(object["obj"]), dochead_metadata, dochead_make); + static assert(!isTypeTuple!(dochead_metadata_and_make)); + dochead_metadata = dochead_metadata_and_make[0]; + dochead_make = dochead_metadata_and_make[1]; + line_occur["header_make"] = 0; + line_occur["header_metadata"] = 0; + line_occur["heading"] = 0; + line_occur["para"]= 0; + ft["header"] = 0; + ft["header_make"] = 0; + ft["header_metadata"] = 0; + ft["heading"] = 0; + ft["para"] = 0; + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + } else if ((ft["heading"] == 1) + && (line_occur["heading"] > 0)) { + ocn = ocn_emit(ft["ocn_status"]); + object["bookindex"] = + ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = + bkidx_hash(object["bookindex"], ocn); + object["is"] = "heading"; + node = + node_jstr_heading( + object["lvn"], + object["lcn"], + ocn, + counter, + heading_pointer, + object["is"] + ); // heading + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + heading_pointer++; + contents_arbitrary_max_length_set[counter] = + set_oa.contents_heading( + ft["ocn_status"], + object["markup"], + object["attrib"], + ocn, object["lev"], + object["lvn"], + object["lcn"] + ); + debug(objectrelated1) { // check + tell_l("blue", line); + } + line_occur["header_make"] = 0; + line_occur["header_metadata"] = 0; + line_occur["heading"] = 0; + line_occur["para"] = 0; + ft["header"] = 0; + ft["header_make"] = 0; + ft["header_metadata"] = 0; + ft["heading"] = 0; + ft["para"] = 0; + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("lev"); + object.remove("lvn"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + } else if ((ft["para"] == 1) && (line_occur["para"] > 0)) { + ocn = ocn_emit(ft["ocn_status"]); + object["bookindex"] = + ("bookindex" in object) ? object["bookindex"] : ""; + bookindex_unordered_hashes = + bkidx_hash(object["bookindex"], ocn); + object["is"] = "para"; + node = + node_jstr( + content_non_header, + ocn, + counter, + heading_pointer-1, + object["is"] + ); + object["markup"] = + obj_im.obj_inline_markup(object["is"], object["obj"]); + object["attrib"] = + obj_att.obj_attributes(object["is"], object["obj"], node); + contents_arbitrary_max_length_set[counter] = + set_oa.contents_para( + object["is"], + object["markup"], + object["attrib"], + ocn, + indent_first, + indent_second, + bullet + ); + line_occur["header_make"] = 0; + line_occur["header_metadata"] = 0; + line_occur["heading"] = 0; + line_occur["para"] = 0; + ft["header"] = 0; + ft["header_make"] = 0; + ft["header_metadata"] = 0; + ft["heading"] = 0; + ft["para"] = 0; + indent_first = "0"; + indent_second = "0"; + bullet = false; + object.remove("obj"); + object.remove("markup"); + object.remove("is"); + object.remove("attrib"); + object.remove("bookindex"); + processing.remove("verse"); + counter++; + } else { + assert( + line == null, + "line variable should be empty, should not occur" + ); + } + } + } + } + if (((contents_arbitrary_max_length_set[counter-1]["is"] == "para") + || (contents_arbitrary_max_length_set[counter-1]["is"] == "heading")) + && (counter-1 > previous_count)) { + if (match(contents_arbitrary_max_length_set[counter-1]["obj"], + rgx.inline_notes_delimiter_al_regular_number_note)) { + previous_count=counter-1; + notesection.gather_notes_for_endnote_section(contents_arbitrary_max_length_set, counter-1); + } + } + } + debug(objectrelated2) { // check + tell_l("blue", line); + } +/* + Backmatter: + * endnotes + * glossary + * references / bibliography + * book index +*/ + obj_im.obj_inline_markup("doc_end_reset", ""); + auto en_tuple = notesection.endnote_objects(ocn); + static assert(!isTypeTuple!(en_tuple)); + auto endnotes = en_tuple[0]; + ocn = en_tuple[1]; +debug(endnotes) { + writeln(__LINE__, " ", endnotes.length); + foreach (n; endnotes) { + writeln(n); + } +} + auto contents = contents_arbitrary_max_length_set[0..counter].dup; + auto biblio_unsorted_incomplete = biblio_arr_json[0..count_biblio_entry].dup; + auto biblio = new Bibliography(); + auto biblio_ordered = biblio.bibliography(biblio_unsorted_incomplete); + auto bi = new BookIndexReportSection(); + auto bi_tuple = + bi.bookindex_build_section(bookindex_unordered_hashes, ocn); + static assert(!isTypeTuple!(bi_tuple)); + auto bookindex = bi_tuple[0]; + auto document = contents ~ endnotes ~ bookindex; + ocn = bi_tuple[1]; +debug(bookindex) { // bookindex + foreach (bi_entry; bookindex) { + writeln(bi_entry["obj"]); + } +} +debug(heading) { // heading + string spc; + foreach (o; document) { + if (o["is"] == "heading") { + switch (o["lvn"]) { + case "0": + spc=""; + break; + case "1": + spc=" "; + break; + case "2": + spc=" "; + break; + case "3": + spc=" "; + break; + case "4": + spc=" "; + break; + case "5": + spc=" "; + break; + case "6": + spc=" "; + break; + case "7": + spc=" "; + break; + case "8": + spc=" "; + break; + default: + spc=""; + break; + } + writeln( + spc, "* ", " ", + strip(o["obj"]), + "\n ", + o["attrib"] + ); + } + } +} + destroy(contents); + destroy(endnotes); + destroy(bookindex); + auto t = + tuple( + document, + dochead_make, + dochead_metadata, + bookindex_unordered_hashes, + biblio_ordered + ); + return t; + } + } +} -- cgit v1.2.3