From aae005b24ce816d89bcda6e72de2cdeadcf7ded0 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 17 Sep 2016 16:45:36 -0400 Subject: heading anchor_tags and cleaning --- src/sdp/ao_abstract_doc_source.d | 267 +++++++++++++++++++++++++++++++-------- src/sdp/ao_defaults.d | 3 +- src/sdp/ao_object_setter.d | 7 +- src/sdp/ao_output_debugs.d | 26 +++- src/sdp/ao_read_source_files.d | 8 +- src/sdp/ao_rgx.d | 9 +- 6 files changed, 258 insertions(+), 62 deletions(-) (limited to 'src') diff --git a/src/sdp/ao_abstract_doc_source.d b/src/sdp/ao_abstract_doc_source.d index 9851759..db814ac 100644 --- a/src/sdp/ao_abstract_doc_source.d +++ b/src/sdp/ao_abstract_doc_source.d @@ -23,6 +23,7 @@ template SiSUdocAbstraction() { auto rgx = Rgx(); ObjComposite[] contents_the_objects; string[string] an_object, processing; + string[] anchor_tags; auto set_abstract_object = ObjectAbstractSet(); auto note_section = NotesSection(); /+ enum +/ @@ -129,13 +130,12 @@ template SiSUdocAbstraction() { ]; auto type = flags_type_init; mixin ScreenTxtColors; - int tell_lo(int obj_cite_number, in char[] line) { + void tell_lo(int obj_cite_number, in char[] line) { writefln( "* %s %s", to!string(obj_cite_number), to!string(line) ); - return 0; } string[string] obj_cite_number_poem = [ "start" : "", @@ -221,7 +221,7 @@ template SiSUdocAbstraction() { /+ block object: code +/ code_block(line, an_object, type); continue; - } else if (!matchFirst(line, rgx.skip_code_block_from_regular_parse)) { + } else if (!matchFirst(line, rgx.skip_from_regular_parse)) { /+ object other than "code block" object (includes regular text paragraph, headings & blocks other than code) +/ if ((matchFirst(line, rgx.heading_biblio) @@ -390,8 +390,10 @@ template SiSUdocAbstraction() { heading_pointer, an_object["is"] ); // heading - an_object["substantive"] = - obj_im.obj_inline_markup(an_object["is"], an_object["obj"], dochead_make_aa); + auto substantive_object_and_anchor_tags_tuple = + obj_im.obj_inline_markup_and_anchor_tags(an_object, dochead_make_aa); // tuple this with anchor tags? + an_object["substantive"] = substantive_object_and_anchor_tags_tuple[0]; + anchor_tags = substantive_object_and_anchor_tags_tuple[1]; an_object["attrib"] = obj_att.obj_attributes(an_object["is"], an_object["obj"], node); ++heading_pointer; @@ -400,8 +402,10 @@ template SiSUdocAbstraction() { an_object["substantive"], an_object["attrib"], obj_cite_number, + anchor_tags, + to!string(an_object["lev"]), to!int(an_object["lev_markup_number"]), - to!int(an_object["lev_collapsed_number"]) + to!int(an_object["lev_collapsed_number"]), ); // track previous heading and make assertions debug(objectrelated1) { // check @@ -432,8 +436,10 @@ template SiSUdocAbstraction() { heading_pointer-1, an_object["is"] ); - an_object["substantive"] = - obj_im.obj_inline_markup(an_object["is"], an_object["obj"], dochead_make_aa); + auto substantive_object_and_anchor_tags_tuple = + obj_im.obj_inline_markup_and_anchor_tags(an_object, dochead_make_aa); + an_object["substantive"] = substantive_object_and_anchor_tags_tuple[0]; + anchor_tags = substantive_object_and_anchor_tags_tuple[1]; an_object["attrib"] = obj_att.obj_attributes(an_object["is"], an_object["obj"], node); contents_the_objects ~= @@ -504,7 +510,8 @@ template SiSUdocAbstraction() { * references / bibliography * book index +/ - obj_im.obj_inline_markup("doc_end_reset", "", dochead_make_aa); + // TODO FIGURE OUT, you need this possibility + // obj_im.obj_inline_markup_and_anchor_tags("doc_end_reset", "", dochead_make_aa); auto en_tuple = note_section.endnote_objects(obj_cite_number); static assert(!isTypeTuple!(en_tuple)); @@ -566,7 +573,7 @@ template SiSUdocAbstraction() { type["para"] = State.off; object_reset(an_object); } - auto check_obj_cite_number_status(char[] line, ref int[string] type) { + void check_obj_cite_number_status(char[] line, ref int[string] type) { if ((!line.empty) && (type["obj_cite_number_status_multi_obj"] == TriState.off)) { /+ not multi-line object, check whether obj_cite_number is on or turned off +/ if (matchFirst(line, rgx.obj_cite_number_block_marks)) { @@ -607,9 +614,8 @@ template SiSUdocAbstraction() { } } } - return 0; } - auto start_block( + void start_block( char[] line, ref int[string] type, string[string] obj_cite_number_poem @@ -752,9 +758,8 @@ template SiSUdocAbstraction() { type["table"] = TriState.on; type["tic_table"] = TriState.on; } - return 0; } - auto code_block( + void code_block( char[] line, ref string[string] an_object, ref int[string] type @@ -788,7 +793,6 @@ template SiSUdocAbstraction() { an_object["obj"] ~= line ~= "\n"; // code (tic) line } } - return 0; } final string biblio_tag_map(string abr) { auto btm = [ @@ -930,7 +934,7 @@ template SiSUdocAbstraction() { header_tag_value=""; } } - auto poem_block( + void poem_block( char[] line, ref string[string] an_object, ref int[string] type, @@ -962,14 +966,16 @@ template SiSUdocAbstraction() { } if (an_object.length > 0) { debug(poem) { // poem (curly) close - tell_lo( + writeln( obj_cite_number, an_object["obj"] ); } an_object["is"] = "verse"; - an_object["substantive"] = - obj_im.obj_inline_markup(an_object["is"], an_object["obj"], dochead_make_aa); + auto substantive_object_and_anchor_tags_tuple = + obj_im.obj_inline_markup_and_anchor_tags(an_object, dochead_make_aa); + an_object["substantive"] = substantive_object_and_anchor_tags_tuple[0]; + anchor_tags = substantive_object_and_anchor_tags_tuple[1]; an_object["attrib"] = obj_att.obj_attributes(an_object["is"], an_object["obj"], node); contents_the_objects ~= @@ -1017,8 +1023,10 @@ template SiSUdocAbstraction() { heading_pointer-1, an_object["is"] ); - an_object["substantive"] = - obj_im.obj_inline_markup(an_object["is"], an_object["obj"], dochead_make_aa); + auto substantive_object_and_anchor_tags_tuple = + obj_im.obj_inline_markup_and_anchor_tags(an_object, dochead_make_aa); + an_object["substantive"] = substantive_object_and_anchor_tags_tuple[0]; + anchor_tags = substantive_object_and_anchor_tags_tuple[1]; an_object["attrib"] = obj_att.obj_attributes(an_object["is"], an_object["obj"], node); contents_the_objects ~= @@ -1048,12 +1056,14 @@ template SiSUdocAbstraction() { if (an_object.length > 0) { debug(poem) { // poem (tic) close writeln(__LINE__); - tell_lo(obj_cite_number, line); + writeln(obj_cite_number, line); } processing.remove("verse"); an_object["is"] = "verse"; - an_object["substantive"] = - obj_im.obj_inline_markup(an_object["is"], an_object["obj"], dochead_make_aa); + auto substantive_object_and_anchor_tags_tuple = + obj_im.obj_inline_markup_and_anchor_tags(an_object, dochead_make_aa); + an_object["substantive"] = substantive_object_and_anchor_tags_tuple[0]; + anchor_tags = substantive_object_and_anchor_tags_tuple[1]; an_object["attrib"] = obj_att.obj_attributes(an_object["is"], an_object["obj"], node); contents_the_objects ~= @@ -1101,8 +1111,10 @@ template SiSUdocAbstraction() { heading_pointer-1, an_object["is"] ); - an_object["substantive"] = - obj_im.obj_inline_markup(an_object["is"], an_object["obj"], dochead_make_aa); + auto substantive_object_and_anchor_tags_tuple = + obj_im.obj_inline_markup_and_anchor_tags(an_object, dochead_make_aa); + an_object["substantive"] = substantive_object_and_anchor_tags_tuple[0]; + anchor_tags = substantive_object_and_anchor_tags_tuple[1]; an_object["attrib"] = obj_att.obj_attributes(an_object["is"], an_object["obj"], node); contents_the_objects ~= @@ -1118,9 +1130,8 @@ template SiSUdocAbstraction() { } } } - return 0; } - auto group_block( + void group_block( char[] line, ref string[string] an_object, ref int[string] type @@ -1154,9 +1165,8 @@ template SiSUdocAbstraction() { an_object["obj"] ~= line ~= "\n"; // build group array (or string) } } - return 0; } - auto block_block( + void block_block( char[] line, ref string[string] an_object, ref int[string] type @@ -1190,9 +1200,8 @@ template SiSUdocAbstraction() { an_object["obj"] ~= line ~= "\n"; // build block array (or string) } } - return 0; } - auto quote_block(char[] line, ref string[string] an_object, ref int[string] type) { + void quote_block(char[] line, ref string[string] an_object, ref int[string] type) { if (type["curly_quote"] == TriState.on) { if (matchFirst(line, rgx.block_curly_quote_close)) { debug(quote) { // quote (curly) close @@ -1222,9 +1231,8 @@ template SiSUdocAbstraction() { an_object["obj"] ~= line ~= "\n"; // build quote array (or string) } } - return 0; } - auto table_block(char[] line, ref string[string] an_object, ref int[string] type) { + void table_block(char[] line, ref string[string] an_object, ref int[string] type) { if (type["curly_table"] == TriState.on) { if (matchFirst(line, rgx.block_curly_table_close)) { debug(table) { // table (curly) close @@ -1254,9 +1262,8 @@ template SiSUdocAbstraction() { an_object["obj"] ~= line ~= "\n"; // build table array (or string) } } - return 0; } - auto block_flag_line_empty( + void block_flag_line_empty( char[] line, ref string[string] an_object, ref ObjComposite[] contents_the_objects, @@ -1294,8 +1301,10 @@ template SiSUdocAbstraction() { heading_pointer-1, an_object["is"] ); - an_object["substantive"] = - obj_im.obj_inline_markup(an_object["is"], an_object["obj"], dochead_make_aa); + auto substantive_object_and_anchor_tags_tuple = + obj_im.obj_inline_markup_and_anchor_tags(an_object, dochead_make_aa); + an_object["substantive"] = substantive_object_and_anchor_tags_tuple[0]; + anchor_tags = substantive_object_and_anchor_tags_tuple[1]; an_object["attrib"] = obj_att.obj_attributes(an_object["is"], an_object["obj"], node); contents_the_objects ~= @@ -1354,8 +1363,10 @@ template SiSUdocAbstraction() { heading_pointer-1, an_object["is"] ); - an_object["substantive"] = - obj_im.obj_inline_markup(an_object["is"], an_object["obj"], dochead_make_aa); + auto substantive_object_and_anchor_tags_tuple = + obj_im.obj_inline_markup_and_anchor_tags(an_object, dochead_make_aa); + an_object["substantive"] = substantive_object_and_anchor_tags_tuple[0]; + anchor_tags = substantive_object_and_anchor_tags_tuple[1]; an_object["attrib"] = obj_att.obj_attributes(an_object["is"], an_object["obj"], node); contents_the_objects ~= @@ -1386,8 +1397,10 @@ template SiSUdocAbstraction() { heading_pointer-1, an_object["is"] ); - an_object["substantive"] = - obj_im.obj_inline_markup(an_object["is"], an_object["obj"], dochead_make_aa); + auto substantive_object_and_anchor_tags_tuple = + obj_im.obj_inline_markup_and_anchor_tags(an_object, dochead_make_aa); + an_object["substantive"] = substantive_object_and_anchor_tags_tuple[0]; + anchor_tags = substantive_object_and_anchor_tags_tuple[1]; an_object["attrib"] = obj_att.obj_attributes(an_object["is"], an_object["obj"], node); contents_the_objects ~= @@ -1417,8 +1430,10 @@ template SiSUdocAbstraction() { heading_pointer-1, an_object["is"] ); - an_object["substantive"] = - obj_im.obj_inline_markup(an_object["is"], an_object["obj"], dochead_make_aa); + auto substantive_object_and_anchor_tags_tuple = + obj_im.obj_inline_markup_and_anchor_tags(an_object, dochead_make_aa); + an_object["substantive"] = substantive_object_and_anchor_tags_tuple[0]; + anchor_tags = substantive_object_and_anchor_tags_tuple[1]; an_object["attrib"] = obj_att.obj_attributes(an_object["is"], an_object["obj"], node); contents_the_objects ~= @@ -1449,8 +1464,10 @@ template SiSUdocAbstraction() { heading_pointer-1, an_object["is"] ); - an_object["substantive"] = - obj_im.obj_inline_markup(an_object["is"], an_object["obj"], dochead_make_aa); + auto substantive_object_and_anchor_tags_tuple = + obj_im.obj_inline_markup_and_anchor_tags(an_object, dochead_make_aa); + an_object["substantive"] = substantive_object_and_anchor_tags_tuple[0]; + anchor_tags = substantive_object_and_anchor_tags_tuple[1]; an_object["attrib"] = obj_att.obj_attributes(an_object["is"], an_object["obj"], node); contents_the_objects ~= @@ -1466,7 +1483,6 @@ template SiSUdocAbstraction() { type["blocks"] = TriState.off; type["quote"] = TriState.off; } - return 0; } auto book_index( char[] line, @@ -2215,15 +2231,149 @@ template SiSUdocAbstraction() { // struct ObjInlineMarkup : AssertObjInlineMarkup { auto munge = ObjInlineMarkupMunge(); string[string] obj_txt; - string obj_inline_markup(string obj_is_, string obj_raw, string[string][string] dochead_make_aa) + auto obj_inline_markup_and_anchor_tags(string[string] obj_, string[string][string] dochead_make_aa) in { } body { - obj_txt["munge"]=obj_raw.dup; - obj_txt["munge"]=(match(obj_is_, ctRegex!(`verse|code`))) + obj_txt["munge"]=obj_["obj"].dup; + obj_txt["munge"]=(match(obj_["is"], ctRegex!(`verse|code`))) ? obj_txt["munge"] : strip(obj_txt["munge"]); - switch (obj_is_) { + static __gshared string[] anchor_tags_ = []; + switch (obj_["is"]) { case "heading": + static __gshared string anchor_tag = ""; + if (dochead_make_aa["make"]["num_top"].length > 0) { + if (!(match(obj_txt["munge"], rgx.heading_anchor_tag))) { + static __gshared uint heading_num_top_level=9; + static __gshared uint heading_num_depth=2; + static __gshared uint heading_num_0 = 0; + static __gshared uint heading_num_1 = 0; + static __gshared uint heading_num_2 = 0; + static __gshared uint heading_num_3 = 0; + static __gshared string heading_number_auto_composite = ""; + if (heading_num_top_level==9) { + if (dochead_make_aa["make"]["num_depth"].length > 0) { + heading_num_depth = to!uint(dochead_make_aa["make"]["num_depth"]); + } + switch (dochead_make_aa["make"]["num_top"]) { + case "A": + break; + case "B": + heading_num_top_level=1; + break; + case "C": + heading_num_top_level=2; + break; + case "D": + heading_num_top_level=3; + break; + case "1": + heading_num_top_level=4; + break; + case "2": + heading_num_top_level=5; + break; + case "3": + heading_num_top_level=6; + break; + case "4": + heading_num_top_level=7; + break; + default: + break; + } + } + /+ num_depth minimum 0 (1.) default 2 (1.1.1) max 3 (1.1.1.1) implement +/ + if (heading_num_top_level > to!uint(obj_["lev_markup_number"])) { + heading_num_0 = 0; + heading_num_1 = 0; + heading_num_2 = 0; + heading_num_3 = 0; + } else if (heading_num_top_level == to!uint(obj_["lev_markup_number"])) { + heading_num_0 ++; + heading_num_1 = 0; + heading_num_2 = 0; + heading_num_3 = 0; + } else if (heading_num_top_level == (to!uint(obj_["lev_markup_number"]) - 1)) { + heading_num_1 ++; + heading_num_2 = 0; + heading_num_3 = 0; + } else if (heading_num_top_level == (to!uint(obj_["lev_markup_number"]) - 2)) { + heading_num_2 ++; + heading_num_3 = 0; + } else if (heading_num_top_level == (to!uint(obj_["lev_markup_number"]) - 3)) { + heading_num_3 ++; + } else { + } + if (heading_num_3 > 0) { + heading_number_auto_composite = + (heading_num_depth == 3) + ? ( to!string(heading_num_0) ~ "." ~ + to!string(heading_num_1) ~ "." ~ + to!string(heading_num_2) ~ "." ~ + to!string(heading_num_3) + ) + : ""; + } else if (heading_num_2 > 0) { + heading_number_auto_composite = + ((heading_num_depth >= 2) + && (heading_num_depth <= 3)) + ? ( to!string(heading_num_0) ~ "." ~ + to!string(heading_num_1) ~ "." ~ + to!string(heading_num_2) + ) + : ""; + } else if (heading_num_1 > 0) { + heading_number_auto_composite = + ((heading_num_depth >= 1) + && (heading_num_depth <= 3)) + ? ( to!string(heading_num_0) ~ "." ~ + to!string(heading_num_1) + ) + : ""; + } else if (heading_num_0 > 0) { + heading_number_auto_composite = + ((heading_num_depth >= 0) + && (heading_num_depth <= 3)) + ? (to!string(heading_num_0)) + : ""; + } else { + heading_number_auto_composite = ""; + } + debug(heading_number_auto) { + writeln(heading_number_auto_composite); + } + if (!empty(heading_number_auto_composite)) { + obj_txt["munge"]=replaceFirst(obj_txt["munge"], rgx.heading, + "$1~$2 " ~ heading_number_auto_composite ~ ". "); + obj_txt["munge"]=replaceFirst(obj_txt["munge"], rgx.heading_marker_missing_tag, + "$1~" ~ heading_number_auto_composite ~ " "); + } + } + } + // WORK ON, you still need to ensure that level 1 anchor_tags are unique + if (!(match(obj_txt["munge"], rgx.heading_anchor_tag))) { // if (anchor_tags_.length == 0) { + if (match(obj_txt["munge"], rgx.heading_identify_anchor_tag)) { + if (auto m = match(obj_txt["munge"], rgx.heading_extract_named_anchor_tag)) { + obj_txt["munge"]=replaceFirst(obj_txt["munge"], rgx.heading_marker_missing_tag, + "$1~" ~ toLower(m.captures[1]) ~ "_" ~ m.captures[2] ~ " "); + } else if (auto m = match(obj_txt["munge"], rgx.heading_extract_unnamed_anchor_tag)) { + obj_txt["munge"]=replaceFirst(obj_txt["munge"], rgx.heading_marker_missing_tag, + "$1~" ~ "s" ~ m.captures[1] ~ " "); + } + } else if (obj_["lev"] == "1") { // (if not successful) manufacture a unique anchor tag for lev=="1" + static __gshared uint heading_num_lev1 = 0; + heading_num_lev1 ++; + obj_txt["munge"]=replaceFirst(obj_txt["munge"], rgx.heading_marker_missing_tag, + "$1~" ~ "x" ~ to!string(heading_num_lev1) ~ " "); + } + } + if (auto m = match(obj_txt["munge"], rgx.heading_anchor_tag)) { + anchor_tag = m.captures[1]; + anchor_tags_ ~=anchor_tag; + } else if (obj_["lev"] == "1") { + writeln("heading anchor tag missing: ", obj_txt["munge"]); + } obj_txt["munge"]=munge.heading(obj_txt["munge"]); break; case "para": @@ -2256,7 +2406,12 @@ template SiSUdocAbstraction() { default: break; } - return obj_txt["munge"]; + auto t = tuple( + obj_txt["munge"], + anchor_tags_, + ); + anchor_tags_=[]; + return t; } invariant() { } @@ -2500,6 +2655,8 @@ template SiSUdocAbstraction() { "Book Index", attrib, obj_cite_number, + [], + to!string(lev), to!int(lev_markup_number), to!int(lev_collapsed_number) ); @@ -2515,6 +2672,8 @@ template SiSUdocAbstraction() { "Index", attrib, obj_cite_number, + ["book_index"], + to!string(lev), to!int(lev_markup_number), to!int(lev_collapsed_number) ); @@ -2674,6 +2833,8 @@ template SiSUdocAbstraction() { "Endnotes", attrib, obj_cite_number, + [], + to!string(lev), to!int(lev_markup_number), to!int(lev_collapsed_number) ); @@ -2689,6 +2850,8 @@ template SiSUdocAbstraction() { "Endnotes", attrib, obj_cite_number, + ["endnotes"], + to!string(lev), to!int(lev_markup_number), to!int(lev_collapsed_number) ); diff --git a/src/sdp/ao_defaults.d b/src/sdp/ao_defaults.d index f344fe7..ea5caae 100644 --- a/src/sdp/ao_defaults.d +++ b/src/sdp/ao_defaults.d @@ -76,6 +76,7 @@ template SiSUregisters() { "home_button_text" : "", "italics" : "", "num_top" : "", + "num_depth" : "", "substitute" : "", "texpdf_font" : "" ] @@ -195,7 +196,7 @@ template SiSUregisters() { "home_button_image", "home_button_text", "footer", "headings", - "num_top", + "num_top", "num_depth", "breaks", "substitute", "bold", diff --git a/src/sdp/ao_object_setter.d b/src/sdp/ao_object_setter.d index 0d8107c..cbb4edc 100644 --- a/src/sdp/ao_object_setter.d +++ b/src/sdp/ao_object_setter.d @@ -5,7 +5,7 @@ template ObjectSetter() { /+ structs +/ struct HeadingAttrib { - int lev = 9; // use of enum should make this redundant, remove + string lev = "9"; int lev_markup_number = 9; int lev_collapsed_number = 9; } @@ -33,6 +33,7 @@ template ObjectSetter() { string is_a = ""; string object = ""; string obj_cite_number = ""; // not used for calculations? output only? else int + string[] anchor_tags = []; HeadingAttrib heading_attrib; ParaAttrib para_attrib; BlockAttrib block_attrib; @@ -56,6 +57,8 @@ template ObjectSetter() { in string object, in string attrib, in int obj_cite_number, + in string[] tags, + in string lev, in int lev_markup_number, in int lev_collapsed_number, ) { @@ -65,6 +68,8 @@ template ObjectSetter() { object_set.is_a = "heading"; object_set.object = object; object_set.obj_cite_number = (obj_cite_number==0) ? "" : to!string(obj_cite_number); + object_set.anchor_tags ~= tags; + object_set.heading_attrib.lev = lev; object_set.heading_attrib.lev_markup_number = lev_markup_number; object_set.heading_attrib.lev_collapsed_number = lev_collapsed_number; // object_set.node_structure.node = node; diff --git a/src/sdp/ao_output_debugs.d b/src/sdp/ao_output_debugs.d index b80c53d..1bf359a 100644 --- a/src/sdp/ao_output_debugs.d +++ b/src/sdp/ao_output_debugs.d @@ -53,8 +53,7 @@ template SiSUoutputDebugs() { if (obj.use == "content") { if (obj.is_a == "heading") { writefln( - "%s%s node: %s heading: %s %s", - scr_txt_marker["cyan"], + "%s node: %s heading: %s %s", obj.obj_cite_number, obj.node, obj.lev_markup_number, @@ -272,6 +271,26 @@ template SiSUoutputDebugs() { } } + debug(anchor) { + writefln( + "%s\n%s:%s", + "-------------------------------", + __FILE__, + __LINE__, + ); + foreach (obj; contents) { + if (obj.is_a == "heading") { + writefln( + "%s~ [%s] %s %s", + obj.heading_attrib.lev, + obj.obj_cite_number, + obj.anchor_tags, + // "[", obj["is"], "] ", + obj.object + ); + } + } + } debug(heading) { // heading string spc; foreach (o; contents) { @@ -345,8 +364,7 @@ template SiSUoutputDebugs() { foreach (obj; contents) { if (obj.is_a == "heading") { writefln( - "%s%s~ [%s] %s", - scr_txt_marker["yellow"], + "%s~ [%s] %s", obj.heading_attrib.lev, obj.obj_cite_number, // "[", obj["is"], "] ", diff --git a/src/sdp/ao_read_source_files.d b/src/sdp/ao_read_source_files.d index 06c5f41..ce3c7bd 100644 --- a/src/sdp/ao_read_source_files.d +++ b/src/sdp/ao_read_source_files.d @@ -75,9 +75,13 @@ template SiSUmarkupRaw() { return source_txt_str; } final private char[][] header0Content1(in string src_text) { - /+ split string on first match of "^:?A~\s" into [header, content] tuple +/ + /+ split string on _first_ match of "^:?A~\s" into [header, content] tuple +/ char[][] header_and_content = - split(cast(char[]) src_text, rgx.heading_a); + split(cast(char[]) src_text, rgx.heading_a); // this splits on each occurance of "^:?A~\s" (does not recognize when it appears in a code block) + assert(header_and_content.length == 2, + "document markup is broken, header body split == " ~ to!string(header_and_content.length) ~ + "; (header / body array split should == 2 (split is on level A~))" + ); return header_and_content; } final private char[][] markupSourceLineArray(in char[] src_text) { diff --git a/src/sdp/ao_rgx.d b/src/sdp/ao_rgx.d index cd97a02..ccaf1bd 100644 --- a/src/sdp/ao_rgx.d +++ b/src/sdp/ao_rgx.d @@ -61,11 +61,16 @@ template RgxInit() { static native_subhead_identifier = ctRegex!(`^(?:oclc|pg|isbn)$`, "m"); static native_subhead_notes = ctRegex!(`^(?:abstract|description)$`, "m"); static native_subhead_publisher = ctRegex!(`^(?:name)$`, "m"); - static native_subhead_make = ctRegex!(`^(?:cover_image|home_button_image|home_button_text|footer|headings|num_top|breaks|substitute|bold|italics|emphasis|texpdf_font|css)$`, "m"); + static native_subhead_make = ctRegex!(`^(?:cover_image|home_button_image|home_button_text|footer|headings|num_top|num_depth|breaks|substitute|bold|italics|emphasis|texpdf_font|css)$`, "m"); /+ heading & paragraph operators +/ static heading_a = ctRegex!(`^:?[A][~] `, "m"); static heading = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?) `); static heading_marker = ctRegex!(`^:?([A-D1-4])[~]`); + static heading_anchor_tag = ctRegex!(`^:?[A-D1-4][~]([a-z0-9_.-]+) `,"i"); + static heading_identify_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(?:(?:(?:chapter|article|section|clause)\s+[0-9.]+)|(?:[0-9.]+))`,"i"); // unless dob.obj =~/^:?[A-D1-4]~\s+(?:|(?:chapter|article|section|clause)\s+)([0-9.]+)/i + static heading_extract_named_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(chapter|article|section|clause)\s+([0-9.]+)`,"i"); + static heading_extract_unnamed_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+([0-9.]+)`); + static heading_marker_missing_tag = ctRegex!(`^:?([A-D1-4])[~] `); static heading_title = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`); static heading_all = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`); // test, particularly [2] name/hashtag which may or may not be, does this affect title [3] static heading_biblio = ctRegex!(`^:?(1)[~][!](biblio(?:graphy)?|references?)`); @@ -165,7 +170,7 @@ template RgxInit() { // static auto_obj_cite_number_ignore = ctRegex!(`^[+~*$-]{3,}$`); // reminder static obj_cite_number_block_marks = ctRegex!(`^--[+~-]#$`); /+ ignore outside code blocks +/ - static skip_code_block_from_regular_parse = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); // not structural info + static skip_from_regular_parse = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); // not structural info /+ line & page breaks +/ static break_line_within_object = ctRegex!(`[\\]{2}( |$)`); // static break_line_break_within_object = ctRegex!(`( |^)[\\]{2}( |$)`); -- cgit v1.2.3