aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/doc_reform/meta/metadoc_from_src.d
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2024-04-10 22:24:34 -0400
committerRalph Amissah <ralph.amissah@gmail.com>2024-04-10 23:08:18 -0400
commit90873fabd7451e1dd8c4b39303906e19bdc481f7 (patch)
tree2dbb0e41f3e9c761645c8b37dafe979a01d38d32 /src/doc_reform/meta/metadoc_from_src.d
parent0.15.0 (diff)
0.16.0 sisudoc (src/sisudoc sisudoc spine)
- src/sisudoc (replaces src/doc_reform) - sisudoc spine (used more)
Diffstat (limited to 'src/doc_reform/meta/metadoc_from_src.d')
-rw-r--r--src/doc_reform/meta/metadoc_from_src.d1509
1 files changed, 0 insertions, 1509 deletions
diff --git a/src/doc_reform/meta/metadoc_from_src.d b/src/doc_reform/meta/metadoc_from_src.d
deleted file mode 100644
index cf4a7cc..0000000
--- a/src/doc_reform/meta/metadoc_from_src.d
+++ /dev/null
@@ -1,1509 +0,0 @@
-/+
-- Name: Spine, Doc Reform [a part of]
- - Description: documents, structuring, processing, publishing, search
- - static content generator
-
- - Author: Ralph Amissah
- [ralph.amissah@gmail.com]
-
- - Copyright: (C) 2015 - 2024 Ralph Amissah, All Rights Reserved.
-
- - License: AGPL 3 or later:
-
- Spine (SiSU), a framework for document structuring, publishing and
- search
-
- Copyright (C) Ralph Amissah
-
- This program is free software: you can redistribute it and/or modify it
- under the terms of the GNU AFERO General Public License as published by the
- Free Software Foundation, either version 3 of the License, or (at your
- option) any later version.
-
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program. If not, see [https://www.gnu.org/licenses/].
-
- If you have Internet connection, the latest version of the AGPL should be
- available at these locations:
- [https://www.fsf.org/licensing/licenses/agpl.html]
- [https://www.gnu.org/licenses/agpl.html]
-
- - Spine (by Doc Reform, related to SiSU) uses standard:
- - docReform markup syntax
- - standard SiSU markup syntax with modified headers and minor modifications
- - docReform object numbering
- - standard SiSU object citation numbering & system
-
- - Homepages:
- [https://www.doc_reform.org]
- [https://www.sisudoc.org]
-
- - Git
- [https://git.sisudoc.org/projects/?p=software/spine.git;a=summary]
-
-+/
-// document abstraction:
-// abstraction of sisu markup for downstream processing
-// metadoc_from_src.d
-module doc_reform.meta.metadoc_from_src;
-@safe:
-template docAbstraction() {
- // ↓ abstraction imports
- import
- std.algorithm,
- std.container,
- std.file,
- std.json,
- std.path;
- import
- doc_reform.meta,
- doc_reform.meta.defaults,
- doc_reform.meta.rgx,
- doc_reform.meta.metadoc_object_setter,
- doc_reform.meta.rgx;
- public import doc_reform.meta.metadoc_from_src_functions;
- mixin docAbstractionFunctions;
- @system auto docAbstraction(CMM,Opt,Mf) (
- char[][] markup_sourcefile_content,
- CMM conf_make_meta,
- Opt opt_action,
- Mf manifested,
- bool _new_doc
- ) {
- static auto rgx = RgxI();
- // ↓ abstraction init
- scope(success) {
- }
- scope(failure) {
- }
- scope(exit) {
- destroy(the_document_toc_section);
- destroy(the_document_head_section);
- destroy(the_document_body_section);
- destroy(the_document_bibliography_section);
- destroy(the_document_glossary_section);
- destroy(the_document_blurb_section);
- destroy(the_document_xml_dom_tail_section);
- destroy(an_object);
- destroy(processing);
- destroy(biblio_arr_json);
- previous_length = 0;
- reset_note_numbers = true;
- lev_anchor_tag = "";
- anchor_tag = "";
- }
- mixin spineNode;
- auto node_para_int_ = node_metadata_para_int;
- auto node_para_str_ = node_metadata_para_str;
- ObjGenericComposite comp_obj_;
- line_occur = [
- "heading" : 0,
- "para" : 0,
- "glossary" : 0,
- "blurb" : 0,
- ];
- uint[string] dochas = [
- "inline_links" : 0,
- "inline_notes" : 0,
- "inline_notes_star" : 0,
- "codeblock" : 0,
- "table" : 0,
- "block" : 0,
- "group" : 0,
- "poem" : 0,
- "quote" : 0,
- "images" : 0,
- ];
- uint[string] pith = [
- "ocn" : 1,
- "section" : 0,
- "txt_is" : 0,
- "block_is" : 0,
- "block_state" : 0,
- "block_delim" : 0,
- "make_headings" : 0,
- "dummy_heading_status" : 0,
- "dummy_heading_multiple_objects" : 0,
- "no_ocn_multiple_objects" : 0,
- "verse_new" : 0,
- ];
- string[string] object_number_poem = [
- "start" : "",
- "end" : ""
- ];
- string[] lv_ancestors_txt = [ "", "", "", "", "", "", "", "", ];
- int[string] lv = [
- "lv" : eN.bi.off,
- "h0" : eN.bi.off,
- "h1" : eN.bi.off,
- "h2" : eN.bi.off,
- "h3" : eN.bi.off,
- "h4" : eN.bi.off,
- "h5" : eN.bi.off,
- "h6" : eN.bi.off,
- "h7" : eN.bi.off,
- "lev_int_collapsed" : 0,
- ];
- int[string] collapsed_lev = [
- "h0" : eN.bi.off,
- "h1" : eN.bi.off,
- "h2" : eN.bi.off,
- "h3" : eN.bi.off,
- "h4" : eN.bi.off,
- "h5" : eN.bi.off,
- "h6" : eN.bi.off,
- "h7" : eN.bi.off
- ];
- string[string] heading_match_str = [
- "h_A": "^(none)",
- "h_B": "^(none)",
- "h_C": "^(none)",
- "h_D": "^(none)",
- "h_1": "^(none)",
- "h_2": "^(none)",
- "h_3": "^(none)",
- "h_4": "^(none)"
- ];
- Regex!char[string] heading_match_rgx = [
- "h_A": regex(r"^(none)"),
- "h_B": regex(r"^(none)"),
- "h_C": regex(r"^(none)"),
- "h_D": regex(r"^(none)"),
- "h_1": regex(r"^(none)"),
- "h_2": regex(r"^(none)"),
- "h_3": regex(r"^(none)"),
- "h_4": regex(r"^(none)")
- ];
- string _anchor_tag;
- string toc_txt_;
- an_object["glossary_nugget"] = "";
- an_object["blurb_nugget"] = "";
- comp_obj_ = set_object_heading("lev4", "frontmatter", "toc", "Table of Contents");
- comp_obj_.metainfo.identifier = "";
- comp_obj_.metainfo.dummy_heading = false;
- comp_obj_.metainfo.object_number_off = true;
- comp_obj_.metainfo.object_number_type = 0;
- comp_obj_.tags.segment_anchor_tag_epub = "toc";
- comp_obj_.tags.anchor_tag_html = comp_obj_.tags.segment_anchor_tag_epub;
- comp_obj_.tags.in_segment_html = comp_obj_.tags.anchor_tag_html;
- comp_obj_.ptr.html_segnames = html_segnames_ptr;
- comp_obj_.tags.anchor_tags = ["toc"];
- tag_assoc[comp_obj_.tags.anchor_tag_html]["seg_lv4"] = comp_obj_.tags.in_segment_html;
- tag_assoc[comp_obj_.tags.segment_anchor_tag_epub]["seg_lv1to4"] = comp_obj_.tags.segment_anchor_tag_epub;
- auto toc_head = comp_obj_;
- html_segnames_ptr_cntr++;
- the_document_toc_section = [toc_head];
- static auto mkup = InlineMarkup();
- static auto munge = ObjInlineMarkupMunge();
- auto note_section = NotesSection();
- auto bookindex_extract_hash = BookIndexNuggetHash();
- string[][string] lev4_subtoc;
- string[][string] segnames = ["html": ["toc"], "epub": ["toc"]];
- int cnt1 = 1; int cnt2 = 1; int cnt3 = 1;
- // abstraction init ↑
- debug (substitutions) {
- writeln(__LINE__, ":", __FILE__, ": DEBUG substitutions:");
- if (!(conf_make_meta.make.headings.empty)) {
- writeln(conf_make_meta.make.headings);
- }
- if (conf_make_meta.make.substitute) {
- foreach(substitution_pair; conf_make_meta.make.substitute) {
- writeln("regex to match: ", substitution_pair[Substitute.match]);
- writeln("substitution to make: ", substitution_pair[Substitute.markup]);
- }
- }
- if (conf_make_meta.make.bold) {
- writeln("regex to match: ", conf_make_meta.make.bold[Substitute.match]);
- writeln("substitution to make: ", conf_make_meta.make.bold[Substitute.markup]);
- }
- if (conf_make_meta.make.emphasis) {
- writeln("regex to match: ", conf_make_meta.make.emphasis[Substitute.match]);
- writeln("substitution to make: ", conf_make_meta.make.emphasis[Substitute.markup]);
- }
- if (conf_make_meta.make.italics) {
- writeln("regex to match: ", conf_make_meta.make.italics[Substitute.match]);
- writeln("substitution to make: ", conf_make_meta.make.italics[Substitute.markup]);
- }
- }
- auto loopMarkupSrcByLine(
- char[][] markup_sourcefile_content,
- string[string] an_object,
- uint[string] pith,
- ) {
- _loopMarkupSrcByLineStruct ret;
- srcDocLoopLineByLine_:
- foreach (line; markup_sourcefile_content) {
- // ↓ markup document/text line by line
- // "line" variable can be empty but should never be null
- // scope
- scope(exit) { }
- scope(failure) {
- stderr.writefln(
- "\n%s\n%s\n\n%s:%s\nFAILED while processing the file: ❮❮ %s ❯❯ on line with text:\n%s\n",
- __MODULE__, __FUNCTION__,
- __FILE__, __LINE__,
- manifested.src.filename, line,
- );
- }
- debug(source) { writeln(line); }
- debug(srclines) { if (!line.empty) { writefln("* %s", line); } }
- if (!line.empty) { pith = line._check_ocn_status_(pith); }
- if ( pith["block_is"] == eN.blk_is.code
- && pith["block_state"] == eN.blk_state.on
- ) {
- // block object: code
- {
- ST_txt_by_line_block_generic _get = line.txt_by_line_block_code(an_object, pith);
- {
- an_object = _get.this_object;
- pith = _get.pith;
- }
- }
- continue;
- } else if (!matchFirst(line, rgx.skip_from_regular_parse)) {
- // object other than "code block" object
- // (includes regular text paragraph, headings & blocks other than code)
- // heading, glossary, blurb, poem, group, block, quote, table
- line = line.inline_markup_faces; // by text line (rather than by text object), linebreaks in para problematic
- if (line.matchFirst(rgx.heading_biblio)
- || (pith["section"] == eN.sect.bibliography
- && ((!(line.matchFirst(rgx.heading_glossary)))
- && (!(line.matchFirst(rgx.heading_blurb)))
- && (!(line.matchFirst(rgx.heading)))
- && (!(line.matchFirst(rgx.comment)))))
- ) {
- pith["section"] = eN.sect.bibliography;
- if (opt_action.backmatter && opt_action.section_biblio) {
- {
- ST_txt_by_line_block_biblio _get = line.txt_by_line_block_biblio(pith, bib_entry, biblio_entry_str_json, biblio_arr_json);
- {
- pith = _get.pith;
- bib_entry = _get.bib_entry;
- biblio_entry_str_json = _get.biblio_entry_str_json;
- biblio_arr_json = _get.biblio_arr_json;
- }
- }
- debug(bibliobuild) {
- writeln("- ", biblio_entry_str_json);
- writeln("-> ", biblio_arr_json.length);
- }
- }
- continue;
- } else if (line.matchFirst(rgx.heading_glossary)
- || (pith["section"] == eN.sect.glossary
- && ((!(line.matchFirst(rgx.heading_biblio)))
- && (!(line.matchFirst(rgx.heading_blurb)))
- && (!(line.matchFirst(rgx.heading)))
- && (!(line.matchFirst(rgx.comment)))))
- ) {
- // within section (block object): glossary
- debug(glossary) { writeln(__LINE__); writeln(line); }
- pith["section"] = eN.sect.glossary;
- if (opt_action.backmatter && opt_action.section_glossary) {
- ST_the_section add_to_glossary_sect = line.build_the_glossary_section(pith, tag_assoc); // double check, should not be necessary to pass pith
- the_document_glossary_section ~= add_to_glossary_sect.comp_section_obj[0];
- if (add_to_glossary_sect.comp_section_obj.length > 1) { // heading
- the_document_glossary_section ~= add_to_glossary_sect.comp_section_obj[1];
- }
- pith = add_to_glossary_sect.pith;
- tag_assoc = add_to_glossary_sect.tag_assoc;
- }
- continue;
- } else if (line.matchFirst(rgx.heading_blurb)
- || (pith["section"] == eN.sect.blurb
- && ((!(line.matchFirst(rgx.heading_glossary)))
- && (!(line.matchFirst(rgx.heading_biblio)))
- && (!(line.matchFirst(rgx.heading)))
- && (!(line.matchFirst(rgx.comment)))))
- ) {
- pith["section"] = eN.sect.blurb;
- debug(blurb) { writeln(__LINE__); writeln(line); }
- if ((opt_action.backmatter && opt_action.section_blurb) && !(line.empty)) {
- ST_the_section add_to_blurb_sect = line.build_the_blurb_section(pith, tag_assoc, opt_action); // double check, should not be necessary to pass pith
- the_document_blurb_section ~= add_to_blurb_sect.comp_section_obj[0];
- if (add_to_blurb_sect.comp_section_obj.length > 1) { // heading
- the_document_blurb_section ~= add_to_blurb_sect.comp_section_obj[1];
- }
- pith = add_to_blurb_sect.pith;
- tag_assoc = add_to_blurb_sect.tag_assoc;
- }
- continue;
- } else if (pith["block_state"] == eN.blk_state.on) {
- if (pith["block_is"] == eN.blk_is.quote) {
- line = line
- ._doc_header_and_make_substitutions_(conf_make_meta)
- ._doc_header_and_make_substitutions_fontface_(conf_make_meta);
- {
- auto _get = line.txt_by_line_block_quote(an_object, pith);
- {
- an_object = _get.this_object;
- pith = _get.pith;
- }
- }
- continue;
- } else if (pith["block_is"] == eN.blk_is.group) {
- line = line
- ._doc_header_and_make_substitutions_(conf_make_meta)
- ._doc_header_and_make_substitutions_fontface_(conf_make_meta)
- .replaceAll(rgx.para_delimiter, mkup.br_line_spaced ~ "$1");
- {
- auto _get = line.txt_by_line_block_group(an_object, pith);
- {
- an_object = _get.this_object;
- pith = _get.pith;
- }
- }
- continue;
- } else if (pith["block_is"] == eN.blk_is.block) {
- line = line
- ._doc_header_and_make_substitutions_(conf_make_meta)
- ._doc_header_and_make_substitutions_fontface_(conf_make_meta);
- if (auto m = line.match(rgx.spaces_keep)) {
- line = line
- .replaceAll(rgx.spaces_keep, (m.captures[1]).translate([ ' ' : mkup.nbsp ]));
- }
- {
- auto _get = line.txt_by_line_block_block(an_object, pith);
- {
- an_object = _get.this_object;
- pith = _get.pith;
- }
- }
- continue;
- } else if (pith["block_is"] == eN.blk_is.poem) {
- {
- auto _get = line.txt_by_line_block_poem(an_object, pith, cntr, object_number_poem, conf_make_meta, tag_in_seg);
- {
- an_object = _get.this_object;
- pith = _get.pith;
- cntr = _get.cntr;
- }
- }
- continue;
- } else if (pith["block_is"] == eN.blk_is.table) {
- {
- auto _get = line.txt_by_line_block_table(an_object, pith, conf_make_meta);
- {
- an_object = _get.this_object;
- pith = _get.pith;
- conf_make_meta = _get.conf_make_meta;
- }
- }
- continue;
- }
- } else {
- // not within a block group
- assert(
- (pith["block_state"] == eN.blk_state.off)
- || (pith["block_state"] == eN.blk_state.closing),
- "block status: none or closed"
- );
- if (line.matchFirst(rgx.block_open)) {
- if (line.matchFirst(rgx.block_poem_open)) {
- // poem to verse exceptions!
- object_reset(an_object);
- processing.remove("verse");
- object_number_poem["start"] = obj_cite_digits.object_number.to!string;
- }
- {
- auto _get = line.txt_by_line_block_start(pith, dochas, object_number_poem);
- {
- pith = _get.pith;
- dochas = _get.dochas;
- object_number_poem = _get.object_number_poem;
- }
- }
- continue;
- } else if (!line.empty) {
- // line not empty - non blocks (headings, paragraphs) & closed blocks
- assert(!line.empty, "line tested, line not empty surely:\n \"" ~ line ~ "\"");
- assert(
- (pith["block_state"] == eN.blk_state.off)
- || (pith["block_state"] == eN.blk_state.closing),
- "code block status: none or closed"
- );
- if (pith["block_state"] == eN.blk_state.closing) {
- debug(check) { writeln(__LINE__); writeln(line); }
- assert(
- line.matchFirst(rgx.book_index_item)
- || line.matchFirst(rgx.book_index_item_open)
- || pith["section"] == eN.sect.book_index,
- "\nblocks closed, unless followed by book index, non-matching line:\n \""
- ~ line ~ "\""
- );
- }
- if (line.matchFirst(rgx.book_index_item)
- || line.matchFirst(rgx.book_index_item_open)
- || pith["section"] == eN.sect.book_index) {
- { // book_index
- auto _get = line.flow_book_index_(an_object, book_idx_tmp, pith, opt_action);
- {
- an_object = _get.this_object;
- pith = _get.pith;
- book_idx_tmp = _get.book_idx_tmp;
- }
- }
- } else {
- // not book_index
- an_object_key = "body_nugget";
- if (auto m = line.matchFirst(rgx.comment)) {
- // matched comment
- debug(comment) { writeln(line); }
- an_object[an_object_key] ~= line ~= "\n";
- comp_obj_comment = comp_obj_comment.init;
- comp_obj_comment.metainfo.is_of_part = "comment"; // breaks flow
- comp_obj_comment.metainfo.is_of_section = "comment"; // breaks flow
- comp_obj_comment.metainfo.is_of_type = "comment";
- comp_obj_comment.metainfo.is_a = "comment";
- comp_obj_comment.text = an_object[an_object_key].strip;
- the_document_body_section ~= comp_obj_comment;
- {
- auto _get = txt_by_line_common_reset_(line_occur, an_object, pith);
- {
- line_occur = _get.line_occur;
- an_object = _get.this_object;
- pith = _get.pith;
- }
- }
- processing.remove("verse");
- ++cntr;
- } else if ((line_occur["para"] == eN.bi.off
- && line_occur["heading"] == eN.bi.off)
- && pith["txt_is"] == eN.txt_is.off
- ) { // heading or para but neither flag nor line exists
- if ((conf_make_meta.make.headings.length > 2)
- && (pith["make_headings"] == eN.bi.off)) {
- // heading found
- {
- auto _get = line.flow_heading_found_(heading_match_str, conf_make_meta.make.headings, heading_match_rgx, pith);
- {
- heading_match_str = _get.heading_match_str;
- heading_match_rgx = _get.heading_match_rgx;
- pith = _get.pith;
- }
- }
- }
- if (pith["make_headings"] == eN.bi.on
- && (line_occur["para"] == eN.bi.off
- && line_occur["heading"] == eN.bi.off)
- && pith["txt_is"] == eN.txt_is.off
- ) {
- // heading make set
- {
- auto _get = line.flow_heading_make_set_(line_occur, heading_match_rgx, pith);
- {
- line = _get.line;
- an_object = _get.this_object;
- pith = _get.pith;
- }
- }
- }
- // TODO node info: all headings identified at this point,
- // - extract node info here??
- // - how long can it wait?
- // - should be incorporated in composite objects
- // - should happen before endnote links set (they need to be moved down?)
- if (line.matchFirst(rgx.headings)) {
- // heading match
- line = line._doc_header_and_make_substitutions_(conf_make_meta);
- {
- auto _get = line.flow_heading_matched_(
- an_object,
- line_occur,
- an_object_key,
- lv,
- collapsed_lev,
- pith,
- conf_make_meta,
- );
- {
- an_object = _get.this_object;
- pith = _get.pith;
- }
- }
- } else if (line_occur["para"] == eN.bi.off) {
- // para match
- an_object_key = "body_nugget";
- line = line
- ._doc_header_and_make_substitutions_(conf_make_meta)
- ._doc_header_and_make_substitutions_fontface_(conf_make_meta);
- {
- auto _get = line.flow_para_match_(an_object, an_object_key, indent, bullet, pith, line_occur);
- {
- an_object = _get.this_object;
- an_object_key = _get.this_object_key;
- pith = _get.pith;
- indent = _get.indent;
- bullet = _get.bullet;
- line_occur = _get.line_occur;
- }
- }
- }
- } else if (line_occur["heading"] > eN.bi.off) {
- // heading
- debug(heading) { writeln(line); }
- an_object[an_object_key] ~= line ~= "\n";
- ++line_occur["heading"];
- } else if (line_occur["para"] > eN.bi.off) {
- // paragraph
- debug(para) { writeln(an_object_key, "-> ", line); }
- line = line
- ._doc_header_and_make_substitutions_(conf_make_meta)
- ._doc_header_and_make_substitutions_fontface_(conf_make_meta);
- an_object[an_object_key] ~= " " ~ line;
- ++line_occur["para"];
- }
- }
- } else if (pith["block_state"] == eN.blk_state.closing) {
- // line empty, with blocks flag
- {
- auto _get = line.flow_block_flag_line_empty_(
- an_object,
- bookindex_extract_hash,
- the_document_body_section,
- bookindex_unordered_hashes,
- obj_cite_digits,
- comp_obj_,
- cntr,
- pith,
- object_number_poem,
- conf_make_meta,
- tag_in_seg,
- );
- {
- an_object = _get.this_object;
- the_document_body_section = _get.the_document_body_section;
- bookindex_unordered_hashes = _get.bookindex_unordered_hashes;
- obj_cite_digits = _get.obj_cite_digits;
- comp_obj_ = _get.comp_obj_;
- cntr = _get.cntr;
- pith = _get.pith;
- }
- }
- } else {
- // line.empty, post contents, empty variables:
- assert(
- line.empty,
- "\nline should be empty:\n \""
- ~ line ~ "\""
- );
- assert(
- (pith["block_state"] == eN.blk_state.off),
- "code block status: none"
- );
- if (_new_doc) {
- tag_assoc = tag_assoc.init;
- lv0to3_tags = lv0to3_tags.init;
- tag_in_seg = tag_in_seg.init;
- }
- if (pith["txt_is"] == eN.txt_is.heading
- && line_occur["heading"] > eN.bi.off
- ) {
- // heading object (current line empty)
- obj_cite_digits = (an_object["lev_markup_number"].to!int == 0)
- ? ocn_emit(eN.ocn.reset)
- : ocn_emit(pith["ocn"]);
- an_object["is"] = "heading";
- an_object_key = "body_nugget";
- ST_txtAndAnchorTagPlusHasFootnotesUrlsImages substantive_object_and_anchor_tags_struct
- = obj_im.obj_inline_markup_and_anchor_tags_and_misc(an_object, an_object_key, conf_make_meta, ((_new_doc) ? Yes._new_doc : No._new_doc));
- an_object["substantive"] = substantive_object_and_anchor_tags_struct.obj_txt;
- anchor_tag = substantive_object_and_anchor_tags_struct.anchor_tag;
- if (_new_doc) {
- cnt1 = 1;
- cnt2 = 1;
- cnt3 = 1;
- _new_doc = false;
- }
- if (
- an_object["lev_markup_number"].to!int == 4
- && (!(anchor_tag.empty)
- || (lv0to3_tags.length > 0))
- ) {
- tag_in_seg["seg_lv4"] = anchor_tag;
- tag_in_seg["seg_lv1to4"] = anchor_tag;
- lev_anchor_tag = anchor_tag;
- tag_assoc[anchor_tag]["seg_lv4"] = tag_in_seg["seg_lv4"];
- tag_assoc[anchor_tag]["seg_lv1to4"] = tag_in_seg["seg_lv1to4"];
- if (lv0to3_tags.length > 0) {
- // names used for html markup segments 1 to 4 (rather than epub which has separate segments for A to D)
- foreach (lv0_to_lv3_html_tag; lv0to3_tags) {
- tag_assoc[lv0_to_lv3_html_tag]["seg_lv4"] = anchor_tag;
- }
- }
- anchor_tag_ = anchor_tag;
- lv0to3_tags = lv0to3_tags.init;
- } else if (an_object["lev_markup_number"].to!int > 4) {
- tag_in_seg["seg_lv4"] = anchor_tag_;
- tag_in_seg["seg_lv1to4"] = anchor_tag_;
- lev_anchor_tag = anchor_tag;
- tag_assoc[anchor_tag]["seg_lv4"] = tag_in_seg["seg_lv4"];
- tag_assoc[anchor_tag]["seg_lv1to4"] = tag_in_seg["seg_lv1to4"];
- } else if (an_object["lev_markup_number"].to!int < 4) {
- string segn;
- switch (an_object["lev_markup_number"].to!int) {
- // names used for epub markup segments A to D
- case 0:
- segn = "_the_title";
- goto default;
- case 1:
- segn = "_part_" ~ cnt1.to!string;
- ++cnt1;
- goto default;
- case 2:
- segn = "_part_" ~ cnt1.to!string ~ "_" ~ cnt2.to!string;
- ++cnt2;
- goto default;
- case 3:
- segn = "_part_" ~ cnt1.to!string ~ "_" ~ cnt2.to!string ~ "_" ~ cnt3.to!string;
- ++cnt3;
- goto default;
- default:
- lv0to3_tags ~= obj_cite_digits.object_number.to!string;
- lv0to3_tags ~= segn;
- tag_in_seg["seg_lv4"] = segn; // for html segname need following lv4 not yet known
- tag_in_seg["seg_lv1to4"] = segn;
- break;
- }
- }
- an_object["bookindex_nugget"]
- = ("bookindex_nugget" in an_object) ? an_object["bookindex_nugget"] : "";
- bookindex_unordered_hashes
- = bookindex_extract_hash.bookindex_nugget_hash(an_object["bookindex_nugget"], obj_cite_digits, tag_in_seg);
- _anchor_tag = obj_cite_digits.identifier;
- // (incrementally build toc) table of contents here!
- {
- auto _get = obj_im.flow_table_of_contents_gather_headings(
- an_object,
- conf_make_meta,
- tag_in_seg,
- _anchor_tag,
- lev4_subtoc,
- the_document_toc_section,
- );
- {
- the_document_toc_section = _get.the_document_toc_section;
- lev4_subtoc = _get.lev4_subtoc;
- }
- }
- if (an_object["lev_markup_number"] == "4") {
- segnames["html"] ~= tag_in_seg["seg_lv4"];
- html_segnames_ptr = html_segnames_ptr_cntr;
- html_segnames_ptr_cntr++;
- }
- if (an_object["lev_markup_number"].to!int <= 4) {
- segnames["epub"] ~= tag_in_seg["seg_lv1to4"];
- }
- auto comp_obj_ = node_construct.node_emitter_heading(
- an_object,
- tag_in_seg,
- lev_anchor_tag,
- tag_assoc,
- obj_cite_digits, // OCNset
- cntr, // int
- heading_ptr, // int
- lv_ancestors_txt, // string[]
- html_segnames_ptr, // int
- substantive_object_and_anchor_tags_struct,
- );
- ++heading_ptr;
- debug(segments) {
- writeln(an_object["lev_markup_number"]);
- writeln(tag_in_seg["seg_lv4"]);
- writeln(tag_in_seg["seg_lv1to4"]);
- }
- the_document_body_section ~= comp_obj_;
- debug(objectrelated1) { writeln(line); } // check
- {
- auto _get = txt_by_line_common_reset_(line_occur, an_object, pith);
- {
- line_occur = _get.line_occur;
- an_object = _get.this_object;
- pith = _get.pith;
- }
- }
- an_object.remove("lev");
- an_object.remove("lev_markup_number");
- processing.remove("verse");
- ++cntr;
- } else if (pith["txt_is"] == eN.txt_is.para
- && line_occur["para"] > eN.bi.off
- ) { // paragraph object (current line empty) - repeated character paragraph separator
- if ((an_object[an_object_key].to!string).matchFirst(rgx.repeated_character_line_separator)) {
- pith["ocn"] = eN.ocn.off;
- }
- obj_cite_digits = ocn_emit(pith["ocn"]);
- an_object["bookindex_nugget"] = ("bookindex_nugget" in an_object) ? an_object["bookindex_nugget"] : "";
- bookindex_unordered_hashes = bookindex_extract_hash.bookindex_nugget_hash(an_object["bookindex_nugget"], obj_cite_digits, tag_in_seg);
- an_object["is"] = "para";
- auto comp_obj_ = node_construct.node_location_emitter(
- content_non_header,
- tag_in_seg,
- lev_anchor_tag,
- tag_assoc,
- obj_cite_digits,
- cntr,
- heading_ptr-1,
- an_object["is"],
- );
- ST_txtAndAnchorTagPlusHasFootnotesUrlsImages substantive_obj_misc_struct
- = obj_im.obj_inline_markup_and_anchor_tags_and_misc(an_object, an_object_key, conf_make_meta, No._new_doc);
- an_object["substantive"] = substantive_obj_misc_struct.obj_txt;
- anchor_tag = substantive_obj_misc_struct.anchor_tag;
- comp_obj_ = set_object_generic("body", "body", "para", "para", an_object["substantive"].to!string.strip, obj_cite_digits.object_number);
- comp_obj_.tags.html_segment_anchor_tag_is = tag_in_seg["seg_lv4"];
- comp_obj_.tags.epub_segment_anchor_tag_is = tag_in_seg["seg_lv1to4"];
- comp_obj_.metainfo.identifier = obj_cite_digits.identifier;
- comp_obj_.metainfo.object_number_off = (obj_cite_digits.off == 0) ? true : false; // TODO
- comp_obj_.metainfo.o_n_book_index = obj_cite_digits.bkidx;
- comp_obj_.metainfo.object_number_type = obj_cite_digits.type;
- comp_obj_.attrib.indent_hang = indent["hang_position"];
- comp_obj_.attrib.indent_base = indent["base_position"];
- comp_obj_.attrib.bullet = bullet;
- comp_obj_.tags.anchor_tags = [anchor_tag]; anchor_tag="";
- comp_obj_.has.inline_notes_reg = substantive_obj_misc_struct.has_notes_reg;
- comp_obj_.has.inline_notes_star = substantive_obj_misc_struct.has_notes_star;
- comp_obj_.has.inline_links = substantive_obj_misc_struct.has_links;
- comp_obj_.has.image_without_dimensions = substantive_obj_misc_struct.has_images_without_dimensions;
- the_document_body_section ~= comp_obj_;
- tag_assoc = an_object.inline_para_link_anchor(tag_in_seg, tag_assoc);
- {
- auto _get = txt_by_line_common_reset_(line_occur, an_object, pith);
- {
- line_occur = _get.line_occur;
- an_object = _get.this_object;
- pith = _get.pith;
- }
- }
- indent = [
- "hang_position" : 0,
- "base_position" : 0,
- ];
- bullet = false;
- processing.remove("verse");
- ++cntr;
- // } else { // could be useful to test line variable should be empty and never null
- }
- } // close else for line empty
- } // close else for not the above
- } // close after non code, other blocks or regular text
- // unless (the_document_body_section.length == 0) ?
- if (the_document_body_section.length > 0) {
- if (((the_document_body_section[$-1].metainfo.is_a == "para")
- || (the_document_body_section[$-1].metainfo.is_a == "heading")
- || (the_document_body_section[$-1].metainfo.is_a == "quote")
- || (the_document_body_section[$-1].metainfo.is_a == "group")
- || (the_document_body_section[$-1].metainfo.is_a == "block")
- || (the_document_body_section[$-1].metainfo.is_a == "verse"))
- && (the_document_body_section.length > previous_length)) {
- if ((the_document_body_section[$-1].metainfo.is_a == "heading")
- && (the_document_body_section[$-1].metainfo.heading_lev_markup < 5)) {
- pith["section"] = eN.sect.unset;
- }
- if (the_document_body_section[$-1].metainfo.is_a == "verse") {
- // scan for endnotes for whole poem (each verse in poem)
- foreach (i; previous_length .. the_document_body_section.length) {
- if (the_document_body_section[i].metainfo.is_a == "verse") {
- if ((the_document_body_section[i].text).match(
- rgx.inline_notes_al_all_note
- )) {
- object_notes = note_section.gather_notes_for_endnote_section(
- the_document_body_section,
- tag_in_seg,
- (i).to!int,
- );
- }
- }
- }
- } else {
- // scan object for endnotes
- previous_length = the_document_body_section.length.to!int;
- if ((the_document_body_section[$-1].text).match(
- rgx.inline_notes_al_all_note
- )) {
- previous_count = (the_document_body_section.length -1).to!int;
- object_notes = note_section.gather_notes_for_endnote_section(
- the_document_body_section,
- tag_in_seg,
- (the_document_body_section.length-1).to!int,
- );
- }
- }
- previous_length = the_document_body_section.length.to!int;
- }
- }
- }
- ret.toc = the_document_toc_section;
- ret.body = the_document_body_section;
- ret.glossary = the_document_glossary_section;
- ret.blurb = the_document_blurb_section;
- ret.object_notes = object_notes;
- ret.segnames = segnames;
- return ret;
- }
- { // loopMarkupSrcByLine
- auto _doc_by_line = loopMarkupSrcByLine(markup_sourcefile_content, an_object, pith);
- the_document_toc_section = _doc_by_line.toc;
- the_document_body_section = _doc_by_line.body;
- the_document_glossary_section = _doc_by_line.glossary;
- the_document_blurb_section = _doc_by_line.blurb;
- segnames = _doc_by_line.segnames;
- object_notes = _doc_by_line.object_notes; // endnotes, compare, not sure is used
- destroy(_doc_by_line);
- }
- { // EOF backMatter
- comp_obj_ = set_object_heading("lev1", "backmatter", "tail", "");
- comp_obj_.metainfo.identifier = "";
- comp_obj_.metainfo.dummy_heading = false;
- comp_obj_.metainfo.object_number_off = false;
- comp_obj_.metainfo.object_number_type = 0;
- comp_obj_.tags.segment_anchor_tag_epub = "_part_eof";
- comp_obj_.tags.anchor_tag_html = comp_obj_.tags.segment_anchor_tag_epub;
- comp_obj_.tags.in_segment_html = "tail";
- comp_obj_.tags.anchor_tags = ["section_eof"];
- comp_obj_.metainfo.dom_structure_markedup_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0];
- comp_obj_.metainfo.dom_structure_collapsed_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0];
- the_document_xml_dom_tail_section ~= comp_obj_;
- tag_assoc[comp_obj_.tags.anchor_tag_html]["seg_lv4"] = comp_obj_.tags.in_segment_html;
- tag_assoc[comp_obj_.tags.segment_anchor_tag_epub]["seg_lv1to4"] = comp_obj_.tags.segment_anchor_tag_epub;
- }
- // endNotes
- ST_endnotes en_st = note_section.backmatter_endnote_objects(obj_cite_digits, opt_action);
- { // endnotes
- the_document_endnotes_section = en_st.endnotes;
- obj_cite_digits = en_st.ocn;
- debug(endnotes) {
- writefln("%s %s", __LINE__, the_document_endnotes_section.length);
- foreach (o; the_document_endnotes_section) { writeln(o); }
- }
- }
- { // glossary
- if (an_object["glossary_nugget"].length == 0) {
- comp_obj_ = set_object_heading("lev1", "empty", "empty", "(skip) there is no Glossary section");
- comp_obj_.metainfo.identifier = "";
- comp_obj_.metainfo.dummy_heading = true;
- comp_obj_.metainfo.object_number_off = true;
- comp_obj_.metainfo.object_number_type = 0;
- the_document_glossary_section ~= comp_obj_;
- }
- debug(glossary) { foreach (gloss; the_document_glossary_section) { writeln(gloss.text); } }
- }
- { // bibliography
- string[] biblio_unsorted_incomplete = biblio_arr_json.dup;
- ST_biblio_section biblio_section = backmatter_make_the_bibliography_section(biblio_unsorted_incomplete, bib_arr_json);
- the_document_bibliography_section = biblio_section.bibliography_section;
- tag_assoc = biblio_section.tag_assoc;
- }
- { // bookindex
- BookIndexReportSection bi = BookIndexReportSection();
- ST_bookindex bi_st
- = bi.backmatter_bookindex_build_abstraction_section(bookindex_unordered_hashes, obj_cite_digits, opt_action);
- destroy(bookindex_unordered_hashes);
- the_document_bookindex_section = bi_st.bookindex;
- obj_cite_digits = bi_st.ocn;
- debug(bookindex) { foreach (bi_entry; the_document_bookindex_section) { writeln(bi_entry); } }
- }
- { // blurb
- if (an_object["blurb_nugget"].length == 0) {
- comp_obj_ = set_object_heading("lev1", "empty", "empty", "(skip) there is no Blurb section");
- comp_obj_.metainfo.identifier = "";
- comp_obj_.metainfo.object_number_off = true;
- comp_obj_.metainfo.object_number_type = 0;
- comp_obj_.tags.segment_anchor_tag_epub = "";
- comp_obj_.tags.anchor_tag_html = "";
- comp_obj_.tags.in_segment_html = "";
- the_document_blurb_section ~= comp_obj_;
- }
- debug(blurb) { foreach (blurb; the_document_blurb_section) { writeln(blurb.text); } }
- }
- { // toc gather backmatter
- the_document_toc_section ~= backmatter_gather_table_of_contents(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section); //
- }
- { // document head and body
- the_document_head_section ~= the_document_body_section[0];
- the_document_body_section = the_document_body_section[1..$];
- }
- { // document ancestors
- ST_ancestors get_ancestors;
- get_ancestors = the_document_body_section.after_doc_determine_ancestors(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section);
- the_document_body_section = get_ancestors.the_document_body_section;
- the_document_endnotes_section = get_ancestors.the_document_endnotes_section;
- the_document_glossary_section = get_ancestors.the_document_glossary_section;
- the_document_bibliography_section = get_ancestors.the_document_bibliography_section;
- the_document_bookindex_section = get_ancestors.the_document_bookindex_section;
- the_document_blurb_section = get_ancestors.the_document_blurb_section;
- }
- { // document segnames
- ST_segnames get_segnames;
- get_segnames = the_document_body_section.after_doc_determine_segnames(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section, segnames, html_segnames_ptr_cntr, html_segnames_ptr); //
- segnames = get_segnames.segnames;
- html_segnames_ptr_cntr = get_segnames.html_segnames_ptr_cntr;
- html_segnames_ptr = get_segnames.html_segnames_ptr;
- }
- // document head
- string[] segnames_0_to_4;
- foreach (ref obj; the_document_head_section) {
- if (obj.metainfo.is_a == "heading") {
- debug(dom) { writeln(obj.text); }
- if (obj.metainfo.heading_lev_markup <= 4) {
- segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
- }
- if (obj.metainfo.heading_lev_markup == 0) {
- // TODO second hit (of two) with same assertion failure, check, fix and reinstate
- // assert( obj.metainfo.ocn == 1,
- // "Title OCN should be 1 not: " ~ obj.metainfo.ocn.to!string); // bug introduced 0.18.1
- obj.metainfo.ocn = 1;
- obj.metainfo.identifier = "1";
- obj.metainfo.object_number_type = OCNtype.ocn;
- }
- // dom structure (marked up & collapsed)
- if (opt_action.meta_processing_xml_dom) {
- obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
- obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
- }
- obj = obj.obj_heading_ancestors(lv_ancestors_txt);
- }
- obj = _links(obj);
- }
- if (the_document_toc_section.length > 1) {
- // scroll
- dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup;
- dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup;
- foreach (ref obj; the_document_toc_section) {
- if (obj.metainfo.is_a == "heading") {
- if (obj.metainfo.heading_lev_markup <= 4) {
- segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
- if (obj.metainfo.heading_lev_markup == 4) {
- obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1];
- assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames],
- obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]);
- }
- }
- // dom structure (marked up & collapsed)
- if (opt_action.meta_processing_xml_dom) {
- obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
- obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
- }
- obj = obj.obj_heading_ancestors(lv_ancestors_txt);
- }
- obj = _links(obj);
- }
- }
- // images
- string[] _images;
- // multiple 1~ levels, loop through document body
- if (the_document_body_section.length > 1) {
- foreach (ref obj; the_document_body_section) {
- if (!(obj.metainfo.identifier.empty)) {
- if (!(((obj.metainfo.identifier) in tag_assoc)
- && ("seg_lv4" in tag_assoc[(obj.metainfo.identifier)]))
- ) {
- tag_assoc[(obj.metainfo.identifier)]["seg_lv4"]
- = obj.tags.html_segment_anchor_tag_is;
- }
- tag_assoc[(obj.metainfo.identifier)]["seg_lv1to4"]
- = obj.tags.epub_segment_anchor_tag_is;
- }
- if (obj.metainfo.is_a == "heading") {
- debug(dom) { writeln(obj.text); }
- if (obj.metainfo.heading_lev_markup <= 4) {
- segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
- if (obj.metainfo.heading_lev_markup == 4) {
- obj.tags.lev4_subtoc = lev4_subtoc[obj.tags.anchor_tag_html];
- obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1];
- if (segnames["html"].length > obj.ptr.html_segnames + 1) {
- obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1];
- }
- assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames],
- obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]);
- }
- }
- // dom structure (marked up & collapsed)
- if (opt_action.meta_processing_xml_dom) {
- obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
- obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
- }
- obj = obj.obj_heading_ancestors(lv_ancestors_txt);
- } else if (obj.metainfo.is_a == "para") {
- _images ~= extract_images(obj.text);
- obj = _image_dimensions(obj, manifested);
- }
- obj = _links(obj);
- }
- }
- auto image_list = (_images.sort()).uniq;
- // endnotes optional only one 1~ level
- if (the_document_endnotes_section.length > 1) {
- dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup;
- dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup;
- dom_structure_markedup_tags_status = dom_structure_markedup_tags_status_buffer.dup;
- dom_structure_collapsed_tags_status = dom_structure_collapsed_tags_status_buffer.dup;
- foreach (ref obj; the_document_endnotes_section) {
- if (obj.metainfo.is_a == "heading") {
- debug(dom) { writeln(obj.text); }
- if (obj.metainfo.heading_lev_markup == 1) {
- obj_cite_digits = ocn_emit(eN.ocn.on);
- obj.metainfo.ocn = obj_cite_digits.object_number;
- obj.metainfo.identifier = obj_cite_digits.identifier;
- }
- if (obj.metainfo.heading_lev_markup <= 4) {
- segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
- if (obj.metainfo.heading_lev_markup == 4) {
- obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1];
- if (segnames["html"].length > obj.ptr.html_segnames + 1) {
- obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1];
- }
- assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames],
- obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]);
- }
- }
- // dom structure (marked up & collapsed)
- if (opt_action.meta_processing_xml_dom) {
- obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
- obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
- }
- obj = obj.obj_heading_ancestors(lv_ancestors_txt);
- }
- obj = _links(obj);
- }
- }
- // glossary optional only one 1~ level
- if (the_document_glossary_section.length > 1) {
- foreach (ref obj; the_document_glossary_section) {
- if (obj.metainfo.is_a == "heading") {
- debug(dom) { writeln(obj.text); }
- if (obj.metainfo.heading_lev_markup == 1) {
- obj_cite_digits = ocn_emit(eN.ocn.on);
- obj.metainfo.ocn = obj_cite_digits.object_number;
- obj.metainfo.identifier = obj_cite_digits.identifier;
- }
- if (obj.metainfo.heading_lev_markup <= 4) {
- segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
- if (obj.metainfo.heading_lev_markup == 4) {
- obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1];
- if (segnames["html"].length > obj.ptr.html_segnames + 1) {
- obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1];
- }
- assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames],
- obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]);
- }
- }
- // dom structure (marked up & collapsed)
- if (opt_action.meta_processing_xml_dom) {
- obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
- obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
- }
- obj = obj.obj_heading_ancestors(lv_ancestors_txt);
- } else if (obj.metainfo.is_a == "glossary" && !(obj.text.empty)) {
- obj_cite_digits = ocn_emit(eN.ocn.on);
- obj.metainfo.ocn = obj_cite_digits.object_number;
- obj.metainfo.identifier = obj_cite_digits.identifier;
- }
- obj = _links(obj);
- }
- }
- // bibliography optional only one 1~ level
- if (the_document_bibliography_section.length > 1) {
- foreach (ref obj; the_document_bibliography_section) {
- if (obj.metainfo.is_a == "heading") {
- debug(dom) { writeln(obj.text); }
- if (obj.metainfo.heading_lev_markup == 1) {
- obj_cite_digits = ocn_emit(eN.ocn.on);
- obj.metainfo.ocn = obj_cite_digits.object_number;
- obj.metainfo.identifier = obj_cite_digits.identifier;
- }
- if (obj.metainfo.heading_lev_markup <= 4) {
- segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
- if (obj.metainfo.heading_lev_markup == 4) {
- obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1];
- if (segnames["html"].length > obj.ptr.html_segnames + 1) {
- obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1];
- }
- assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames],
- obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]);
- }
- }
- // dom structure (marked up & collapsed)
- if (opt_action.meta_processing_xml_dom) {
- obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
- obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
- }
- obj = obj.obj_heading_ancestors(lv_ancestors_txt);
- } else if (obj.metainfo.is_a == "bibliography") {
- obj_cite_digits = ocn_emit(eN.ocn.on);
- obj.metainfo.ocn = obj_cite_digits.object_number;
- obj.metainfo.identifier = obj_cite_digits.identifier;
- }
- obj = _links(obj);
- }
- }
- // book index, optional only one 1~ level
- int ocn_ = obj_cite_digits.object_number;
- int ocn_bkidx_ = 0;
- int ocn_bidx_;
- if (the_document_bookindex_section.length > 1) { // scroll
- dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup;
- dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup;
- foreach (ref obj; the_document_bookindex_section) {
- if (obj.metainfo.is_a == "heading") {
- // debug(dom) { }
- if (obj.metainfo.heading_lev_markup <= 4) {
- segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
- }
- if (obj.metainfo.heading_lev_markup == 1) {
- obj_cite_digits = ocn_emit(eN.ocn.on);
- obj.metainfo.ocn = obj_cite_digits.object_number;
- obj.metainfo.identifier = obj_cite_digits.identifier;
- }
- if (obj.metainfo.heading_lev_markup <= 4) {
- if (obj.metainfo.heading_lev_markup == 4) {
- obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1];
- if (segnames["html"].length > obj.ptr.html_segnames + 1) {
- obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1];
- }
- assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames],
- obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]);
- }
- }
- // dom structure (marked up & collapsed)
- if (opt_action.meta_processing_xml_dom) {
- obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
- obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
- }
- obj = obj.obj_heading_ancestors(lv_ancestors_txt);
- } else if (obj.metainfo.is_a == "bookindex") {
- obj_cite_digits = ocn_emit(eN.ocn.bkidx);
- obj.metainfo.ocn = obj_cite_digits.object_number;
- obj.metainfo.identifier = obj_cite_digits.identifier;
- obj.metainfo.o_n_book_index = obj_cite_digits.bkidx;
- obj.metainfo.object_number_type = OCNtype.bkidx;
- }
- obj = _links(obj);
- }
- // TODO assert failure, reinstate
- // assert(obj_cite_digit_bkidx == ocn_bidx_ obj_cite_digit_bkidx ~ " == ocn_" ~ ocn_ ~ "?");
- }
- // blurb optional only one 1~ level
- if (the_document_blurb_section.length > 1) {
- foreach (ref obj; the_document_blurb_section) {
- if (obj.metainfo.is_a == "heading") {
- debug(dom) { writeln(obj.text); }
- if (obj.metainfo.heading_lev_markup == 1) {
- obj_cite_digits = ocn_emit(eN.ocn.on);
- obj.metainfo.ocn = obj_cite_digits.object_number;
- obj.metainfo.identifier = obj_cite_digits.identifier;
- }
- if (obj.metainfo.heading_lev_markup <= 4) {
- segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
- if (obj.metainfo.heading_lev_markup == 4) {
- obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1];
- if (segnames["html"].length > obj.ptr.html_segnames + 1) {
- obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1];
- }
- assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames],
- obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]);
- }
- }
- // dom structure (marked up & collapsed)
- if (opt_action.meta_processing_xml_dom) {
- obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
- obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
- }
- obj = obj.obj_heading_ancestors(lv_ancestors_txt);
- } else if (obj.metainfo.is_a == "blurb") {
- obj_cite_digits = ocn_emit(eN.ocn.off);
- obj.metainfo.object_number_off = obj_cite_digits.off;
- obj.metainfo.object_number_type = OCNtype.non;
- }
- obj = _links(obj);
- }
- }
- // get descendants
- if (the_document_body_section.length > 1) {
- auto pairs = after_doc_get_descendants(
- the_document_head_section ~
- the_document_body_section ~
- the_document_endnotes_section ~
- the_document_glossary_section ~
- the_document_bibliography_section ~
- the_document_bookindex_section ~
- the_document_blurb_section ~
- the_document_xml_dom_tail_section
- );
- debug(descendants_tuple) {
- pairs = pairs.sort();
- foreach (pair; pairs) { // (pair; pairs.sort())
- writeln(pair[0], "..", pair[1]);
- }
- }
- foreach (ref obj; the_document_head_section) {
- if (obj.metainfo.is_a == "heading") {
- foreach (pair; pairs) {
- if (obj.metainfo.ocn == pair[0]) {
- obj.metainfo.last_descendant_ocn = pair[1];
- }
- }
- }
- }
- if (the_document_body_section.length > 1) {
- foreach (ref obj; the_document_body_section) {
- if (obj.metainfo.is_a == "heading") {
- foreach (pair; pairs) {
- if (obj.metainfo.ocn == pair[0]) {
- obj.metainfo.last_descendant_ocn = pair[1];
- }
- }
- }
- }
- }
- if (the_document_endnotes_section.length > 1) {
- foreach (ref obj; the_document_endnotes_section) {
- if (obj.metainfo.is_a == "heading") {
- foreach (pair; pairs) {
- if (obj.metainfo.ocn == pair[0]) {
- obj.metainfo.last_descendant_ocn = pair[1];
- }
- }
- }
- }
- }
- if (the_document_glossary_section.length > 1) {
- foreach (ref obj; the_document_glossary_section) {
- if (obj.metainfo.is_a == "heading") {
- foreach (pair; pairs) {
- if (obj.metainfo.ocn == pair[0]) {
- obj.metainfo.last_descendant_ocn = pair[1];
- }
- }
- }
- }
- }
- if (the_document_bibliography_section.length > 1) {
- foreach (ref obj; the_document_bibliography_section) {
- if (obj.metainfo.is_a == "heading") {
- foreach (pair; pairs) {
- if (obj.metainfo.ocn == pair[0]) {
- obj.metainfo.last_descendant_ocn = pair[1];
- }
- }
- }
- }
- }
- if (the_document_bookindex_section.length > 1) {
- foreach (ref obj; the_document_bookindex_section) {
- if (obj.metainfo.is_a == "heading") {
- foreach (pair; pairs) {
- if (obj.metainfo.ocn == pair[0]) {
- obj.metainfo.last_descendant_ocn = pair[1];
- }
- }
- }
- }
- }
- if (the_document_blurb_section.length > 1) {
- foreach (ref obj; the_document_blurb_section) {
- if (obj.metainfo.is_a == "heading") {
- foreach (pair; pairs) {
- if (obj.metainfo.ocn == pair[0]) {
- obj.metainfo.last_descendant_ocn = pair[1];
- }
- }
- }
- }
- }
- if (the_document_xml_dom_tail_section.length > 1) {
- foreach (ref obj; the_document_xml_dom_tail_section) {
- if (obj.metainfo.is_a == "heading") {
- foreach (pair; pairs) {
- if (obj.metainfo.ocn == pair[0]) {
- obj.metainfo.last_descendant_ocn = pair[1];
- }
- }
- }
- }
- }
- }
- // TODO
- // - note create/insert heading object sole purpose eof close all open tags
- // sort out:
- // - obj.metainfo.dom_structure_markedup_tags_status = dom_structure_markedup_tags_status;
- // - obj.metainfo.dom_structure_collapsed_tags_status = dom_structure_collapsed_tags_status;
- comp_obj_ = set_object_heading("lev1", "empty", "empty", "");
- comp_obj_.metainfo.identifier = "";
- comp_obj_.metainfo.dummy_heading = true;
- comp_obj_.metainfo.object_number_off = true;
- comp_obj_.metainfo.object_number_type = 0;
- comp_obj_.tags.segment_anchor_tag_epub = "";
- comp_obj_.tags.anchor_tag_html = "";
- comp_obj_.tags.in_segment_html = "";
- comp_obj_.tags.html_segment_anchor_tag_is = "";
- comp_obj_.tags.epub_segment_anchor_tag_is = "";
- comp_obj_.metainfo.heading_lev_markup = 9;
- comp_obj_.metainfo.heading_lev_collapsed = 9;
- comp_obj_.metainfo.parent_ocn = 0;
- comp_obj_.metainfo.parent_lev_markup = 0;
- comp_obj_.metainfo.dom_structure_markedup_tags_status = dom_structure_markedup_tags_status.dup;
- comp_obj_.metainfo.dom_structure_collapsed_tags_status = dom_structure_collapsed_tags_status.dup;
- comp_obj_ = comp_obj_.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, 0);
- comp_obj_ = comp_obj_.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, 0);
- comp_obj_ = comp_obj_.obj_heading_ancestors(lv_ancestors_txt);
- // the_dom_tail_section ~= comp_obj_; // remove tail for now, decide on later
- // the doc
- ObjGenericComposite[][string] document_the = [
- "head": the_document_head_section,
- "toc": the_document_toc_section,
- // substantive/body:
- "body": the_document_body_section,
- // backmatter:
- "endnotes": the_document_endnotes_section,
- "glossary": the_document_glossary_section,
- "bibliography": the_document_bibliography_section,
- "bookindex": the_document_bookindex_section,
- "blurb": the_document_blurb_section,
- // dom tail only
- "tail": the_document_xml_dom_tail_section,
- ];
- // document parts keys as needed
- string[][string] document_section_keys_sequenced = [
- "scroll": ["head", "toc", "body",],
- "seg": ["head", "toc", "body",],
- "sql": ["head", "body",],
- "latex": ["head", "toc", "body",]
- ];
- if (document_the["endnotes"].length > 1) {
- document_section_keys_sequenced["scroll"] ~= "endnotes";
- document_section_keys_sequenced["seg"] ~= "endnotes";
- document_section_keys_sequenced["latex"] ~= "endnotes";
- }
- if (document_the["glossary"].length > 1) {
- document_section_keys_sequenced["scroll"] ~= "glossary";
- document_section_keys_sequenced["seg"] ~= "glossary";
- document_section_keys_sequenced["sql"] ~= "glossary";
- document_section_keys_sequenced["latex"] ~= "glossary";
- }
- if (document_the["bibliography"].length > 1) {
- document_section_keys_sequenced["scroll"] ~= "bibliography";
- document_section_keys_sequenced["seg"] ~= "bibliography";
- document_section_keys_sequenced["sql"] ~= "bibliography";
- document_section_keys_sequenced["latex"] ~= "bibliography";
- }
- if (document_the["bookindex"].length > 1) {
- document_section_keys_sequenced["scroll"] ~= "bookindex";
- document_section_keys_sequenced["seg"] ~= "bookindex";
- document_section_keys_sequenced["sql"] ~= "bookindex";
- document_section_keys_sequenced["latex"] ~= "bookindex";
- }
- if (document_the["blurb"].length > 1) {
- document_section_keys_sequenced["scroll"] ~= "blurb";
- document_section_keys_sequenced["seg"] ~= "blurb";
- document_section_keys_sequenced["sql"] ~= "blurb";
- document_section_keys_sequenced["latex"] ~= "blurb";
- }
- if ((opt_action.html)
- || (opt_action.html_scroll)
- || (opt_action.html_seg)
- || (opt_action.epub)) {
- document_section_keys_sequenced["scroll"] ~= "tail";
- document_section_keys_sequenced["seg"] ~= "tail";
- }
- // segnames
- string[] segnames_4 = segnames["html"].dup;
- string[] segnames_lv1to4 = segnames["epub"].dup;
- debug(segnames) {
- writeln("segnames_lv4: ", segnames_4);
- writeln("segnames_lv1to4: ", segnames_lv1to4);
- }
- // restart
- destroy(the_document_head_section);
- destroy(the_document_toc_section);
- destroy(the_document_body_section);
- destroy(the_document_endnotes_section);
- destroy(the_document_glossary_section);
- destroy(the_document_bibliography_section);
- destroy(the_document_bookindex_section);
- destroy(the_document_blurb_section);
- destroy(the_document_xml_dom_tail_section);
- destroy(segnames);
- destroy(bookindex_unordered_hashes);
- destroy(an_object);
- obj_cite_digits = ocn_emit(eN.ocn.reset);
- biblio_arr_json = [];
- obj_cite_digit_ = 0;
- html_segnames_ptr = 0;
- html_segnames_ptr_cntr = 0;
- content_non_header = "8";
- dom_structure_markedup_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0,];
- dom_structure_markedup_tags_status_buffer = [ 0, 0, 0, 0, 0, 0, 0, 0,];
- dom_structure_collapsed_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0,];
- dom_structure_collapsed_tags_status_buffer = [ 0, 0, 0, 0, 0, 0, 0, 0,];
- lev_anchor_tag = "";
- anchor_tag = "";
- // identify parts
- struct DocHas_ {
- uint inline_links() {
- return dochas["inline_links"];
- }
- uint inline_notes_reg() {
- return dochas["inline_notes"];
- }
- uint inline_notes_star() {
- return dochas["inline_notes_star"];
- }
- uint codeblocks() {
- return dochas["codeblock"];
- }
- uint tables() {
- return dochas["table"];
- }
- uint blocks() {
- return dochas["block"];
- }
- uint groups() {
- return dochas["group"];
- }
- uint poems() {
- return dochas["poem"];
- }
- uint quotes() {
- return dochas["quote"];
- }
- ulong images() { // TODO not ideal rethink
- return (image_list.to!string.strip("[","]").split(",").length);
- }
- auto imagelist() {
- return image_list;
- }
- auto keys_seq() {
- return docSectKeysSeq!()(document_section_keys_sequenced);
- }
- string[] segnames_lv4() {
- return segnames_4;
- }
- string[] segnames_lv_0_to_4() {
- return segnames_0_to_4;
- }
- string[string][string] tag_associations() {
- return tag_assoc;
- }
- }
- auto doc_has() {
- return DocHas_();
- }
- // the doc to be returned
- struct ST_docAbstraction {
- ObjGenericComposite[][string] document_the;
- DocHas_ doc_has;
- }
- ST_docAbstraction ret;
- {
- ret.document_the = document_the;
- ret.doc_has = doc_has;
- }
- return ret;
- } // ← closed: abstract doc source
-}