aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2025-10-08 19:14:19 -0400
committerRalph Amissah <ralph.amissah@gmail.com>2025-10-08 19:14:34 -0400
commit748c9e60ae65433f225f7ac49de7b596cc1148d3 (patch)
tree940f0882e4a05e7f4eecb7c35ff954cbf6c79211 /src
parenta text output (and skel an outline) (diff)
text output, endnotes, add caller ocn (& some cleaning)
Diffstat (limited to 'src')
-rw-r--r--src/sisudoc/io_out/rgx.d24
-rw-r--r--src/sisudoc/meta/metadoc_from_src.d2
-rw-r--r--src/sisudoc/meta/metadoc_from_src_functions.d20
-rw-r--r--src/sisudoc/meta/rgx.d24
4 files changed, 45 insertions, 25 deletions
diff --git a/src/sisudoc/io_out/rgx.d b/src/sisudoc/io_out/rgx.d
index 666e71f..b4bec5f 100644
--- a/src/sisudoc/io_out/rgx.d
+++ b/src/sisudoc/io_out/rgx.d
@@ -88,28 +88,28 @@ static template spineRgxOut() {
static inline_notes_al_special = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented
static inline_notes_al_gen = ctRegex!(`【.+?】`, "m");
static inline_notes_al_gen_text = ctRegex!(`【(?P<text>.+?)】`, "m");
- static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*】`, "mg");
- static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*】`, "mg");
- // static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section
- // static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section
+ static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section
+ static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section
+ // static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*】`, "mg");
+ // static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*】`, "mg");
static inline_notes_al_special_char_note = ctRegex!(`【(?P<char>(?:[*]|[+])+)\s+(?P<note>.+?)】`, "mg");
static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m");
static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m");
static inline_al_delimiter_open_symbol_plus = ctRegex!(`【[+]\s`, "m");
static inline_text_and_note_al_ = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|.+))`, "mg");
+ static endnote_section_note = ctRegex!(`┥\s*⑆\^┨(?P<notenumber>\d+)\.┣\^┝┤(?P<link>¤?.+?)├.+`, "mg");
/+ inline markup links +/
static inline_image = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+))\s*(?P<post>.*?┝┤.*?├)`, "mg");
static inline_image_without_dimensions = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?┝┤.*?├)`, "mg");
static inline_image_info = ctRegex!(`☼?(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+)`, "mg");
static inline_link_anchor = ctRegex!(`┃(?P<anchor>\S+?)┃`, "mg"); // TODO *~text_link_anchor
- // space cleaning should not be necessary
- static inline_link = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>#?(\S+?))├`, "mg");
- static inline_link_empty = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤├`, "mg");
- static inline_link_number = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<num>[0-9]+)├`, "mg"); // not used
- static inline_link_number_only = ctRegex!(`\s*(?P<linked_text>\s*┥.+?┝)┤(?P<num>[0-9]+)├`, "mg");
- static inline_link_stow_uri = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
- static inline_link_hash = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>#(?P<hash>\S+?))├`, "mg");
- static inline_link_seg_and_hash = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>(?P<seg>[^/#├]*)#(?P<hash>.+?))├`, "mg");
+ static inline_link = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#?(\S+?))├`, "mg");
+ static inline_link_empty = ctRegex!(`┥(?P<text>.+?)┝┤├`, "mg");
+ static inline_link_number = ctRegex!(`┥(?P<text>.+?)┝┤(?P<num>[0-9]+)├`, "mg"); // not used
+ static inline_link_number_only = ctRegex!(`(?P<linked_text>┥.+?┝)┤(?P<num>[0-9]+)├`, "mg");
+ static inline_link_stow_uri = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
+ static inline_link_hash = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#(?P<hash>\S+?))├`, "mg");
+ static inline_link_seg_and_hash = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>(?P<seg>[^/#├]*)#(?P<hash>.+?))├`, "mg");
static inline_link_clean = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
static inline_link_toc_to_backmatter = ctRegex!(`┤#(?P<link>endnotes|bibliography|bookindex|glossary|blurb)├`, "mg");
static url = ctRegex!(`https?://`, "mg");
diff --git a/src/sisudoc/meta/metadoc_from_src.d b/src/sisudoc/meta/metadoc_from_src.d
index 904444a..4240a3f 100644
--- a/src/sisudoc/meta/metadoc_from_src.d
+++ b/src/sisudoc/meta/metadoc_from_src.d
@@ -770,7 +770,7 @@ template docAbstraction() {
comp_obj_.has.inline_links = substantive_obj_misc_struct.has_links;
comp_obj_.has.image_without_dimensions = substantive_obj_misc_struct.has_images_without_dimensions;
the_document_body_section ~= comp_obj_;
- tag_assoc = an_object.inline_para_link_anchor(tag_in_seg, tag_assoc);
+ tag_assoc = an_object.inline_para_link_anchor(tag_in_seg, tag_assoc);
{
ST_txt_by_line_common_reset _get = txt_by_line_common_reset_(line_occur, an_object, pith);
{
diff --git a/src/sisudoc/meta/metadoc_from_src_functions.d b/src/sisudoc/meta/metadoc_from_src_functions.d
index 6718e82..53e494b 100644
--- a/src/sisudoc/meta/metadoc_from_src_functions.d
+++ b/src/sisudoc/meta/metadoc_from_src_functions.d
@@ -4233,11 +4233,31 @@ template docAbstractionFunctions() {
int html_segnames_ptr_cntr,
int html_segnames_ptr,
) {
+ string[string][string] notes_;
+ if (the_document_body_section.length > 1) {
+ string _notes;
+ foreach (ref obj; the_document_body_section) {
+ if (obj.has.inline_notes_reg) {
+ if ((obj.text).matchFirst(rgx.inline_notes_al_gen)) {
+ foreach (m; (obj.text).matchAll(rgx.inline_notes_al_regular_number_note)) {
+ _notes ~= "\n\n" ~ m["num"] ~ ". " ~ m["note"] ~ " ≫" ~ obj.metainfo.ocn.to!string;
+ notes_[(m["num"])]["ocn"] = obj.metainfo.ocn.to!string;
+ }
+ }
+ }
+ }
+ }
if (the_document_endnotes_section.length > 1) {
segnames["html"] ~= "endnotes";
segnames["epub"] ~= "endnotes";
html_segnames_ptr = html_segnames_ptr_cntr;
foreach (ref obj; the_document_endnotes_section) {
+ auto matches = (obj.text).matchAll(rgx.endnote_section_note);
+ foreach (m; matches) {
+ obj.text = m.hit ~ " ≫" ~ notes_[(m["notenumber"])]["ocn"];
+ }
+ }
+ foreach (ref obj; the_document_endnotes_section) {
if (obj.metainfo.is_a == "heading") {
obj.metainfo.parent_ocn = obj.metainfo.markedup_ancestors[obj.metainfo.parent_lev_markup];
}
diff --git a/src/sisudoc/meta/rgx.d b/src/sisudoc/meta/rgx.d
index 1a26f73..fcac959 100644
--- a/src/sisudoc/meta/rgx.d
+++ b/src/sisudoc/meta/rgx.d
@@ -229,28 +229,28 @@ static template spineRgxIn() {
static inline_notes_al_special = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented
static inline_notes_al_gen = ctRegex!(`【.+?】`, "m");
static inline_notes_al_gen_text = ctRegex!(`【(?P<text>.+?)】`, "m");
- static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*】`, "mg");
- static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*】`, "mg");
- // static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section
- // static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section
+ static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section
+ static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section
+ // static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*】`, "mg");
+ // static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*】`, "mg");
static inline_notes_al_special_char_note = ctRegex!(`【(?P<char>(?:[*]|[+])+)\s+(?P<note>.+?)】`, "mg");
static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m");
static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m");
static inline_al_delimiter_open_symbol_plus = ctRegex!(`【[+]\s`, "m");
static inline_text_and_note_al_ = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|.+))`, "mg");
+ static endnote_section_note = ctRegex!(`┥\s*⑆\^┨(?P<notenumber>\d+)\.┣\^┝┤(?P<link>¤?.+?)├.+`, "mg");
/+ inline markup links +/
static inline_image = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+))\s*(?P<post>.*?┝┤.*?├)`, "mg");
static inline_image_without_dimensions = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?┝┤.*?├)`, "mg");
static inline_image_info = ctRegex!(`☼?(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+)`, "mg");
static inline_link_anchor = ctRegex!(`┃(?P<anchor>\S+?)┃`, "mg"); // TODO *~text_link_anchor
- // space cleaning should not be necessary
- static inline_link = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>#?(\S+?))├`, "mg");
- static inline_link_empty = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤├`, "mg");
- static inline_link_number = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<num>[0-9]+)├`, "mg"); // not used
- static inline_link_number_only = ctRegex!(`\s*(?P<linked_text>\s*┥.+?┝)┤(?P<num>[0-9]+)├`, "mg");
- static inline_link_stow_uri = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
- static inline_link_hash = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>#(?P<hash>\S+?))├`, "mg");
- static inline_link_seg_and_hash = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>(?P<seg>[^/#├]*)#(?P<hash>.+?))├`, "mg");
+ static inline_link = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#?(\S+?))├`, "mg");
+ static inline_link_empty = ctRegex!(`┥(?P<text>.+?)┝┤├`, "mg");
+ static inline_link_number = ctRegex!(`┥(?P<text>.+?)┝┤(?P<num>[0-9]+)├`, "mg"); // not used
+ static inline_link_number_only = ctRegex!(`(?P<linked_text>┥.+?┝)┤(?P<num>[0-9]+)├`, "mg");
+ static inline_link_stow_uri = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
+ static inline_link_hash = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#(?P<hash>\S+?))├`, "mg");
+ static inline_link_seg_and_hash = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>(?P<seg>[^/#├]*)#(?P<hash>.+?))├`, "mg");
static inline_link_clean = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
static inline_link_toc_to_backmatter = ctRegex!(`┤#(?P<link>endnotes|bibliography|bookindex|glossary|blurb)├`, "mg");
static url = ctRegex!(`https?://`, "mg");