From 87d62f48d6c8a2ccf9807f56c23a6ca71d1102e6 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sun, 13 Jan 2019 14:19:04 -0500 Subject: 0.4.3 stow (most) uri/links in array, separate from object text - munge independently - no need to consider special munging of uri with text - uri can easily be munged independently (encoded as need be) --- src/doc_reform/meta/metadoc_from_src.d | 27 +++++++++++++++++++++++++++ src/doc_reform/meta/object_setter.d | 4 ++++ src/doc_reform/meta/rgx.d | 4 +++- src/doc_reform/output/epub3.d | 1 + src/doc_reform/output/html.d | 1 + src/doc_reform/output/rgx.d | 4 +++- src/doc_reform/output/sqlite.d | 6 ++++++ src/doc_reform/output/xmls.d | 8 +++++++- 8 files changed, 52 insertions(+), 3 deletions(-) (limited to 'src/doc_reform') diff --git a/src/doc_reform/meta/metadoc_from_src.d b/src/doc_reform/meta/metadoc_from_src.d index 9d2935c..de2df2b 100644 --- a/src/doc_reform/meta/metadoc_from_src.d +++ b/src/doc_reform/meta/metadoc_from_src.d @@ -1731,6 +1731,25 @@ template DocReformDocAbstraction() { } return obj; } + auto _links(O)(O obj) { + if (auto m = obj.text.match(rgx.inline_link_stow_uri)) { + debug(links) { + writeln("number of link matches to stow: ", (obj.text.match(rgx.inline_link_stow_uri)).count); + writeln("links to stow: ", (obj.text.match(rgx.inline_link_stow_uri))); + } + int _n_matches = (obj.text.match(rgx.inline_link_stow_uri)).count.to!int; + for(int i=0; i < _n_matches; ++i) { + if (obj.text.match(rgx.inline_link_stow_uri)) { + obj.stow.link ~= obj.text.matchFirst(rgx.inline_link_stow_uri)[2]; + obj.text = obj.text.replaceFirst( + rgx.inline_link_stow_uri, + format(q"¶┥%s┝┤%s├¶", "$1", i) + ); + } + } + } + return obj; + } foreach (ref obj; the_document_head_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { @@ -1760,6 +1779,7 @@ template DocReformDocAbstraction() { } obj = obj_heading_ancestors(obj, lv_ancestors_txt); } + obj = _links(obj); } if (the_table_of_contents_section.length > 1) { /+ scroll +/ @@ -1787,6 +1807,7 @@ template DocReformDocAbstraction() { } obj = obj_heading_ancestors(obj, lv_ancestors_txt); } + obj = _links(obj); } } /+ multiple 1~ levels, loop through document body +/ @@ -1833,6 +1854,7 @@ template DocReformDocAbstraction() { _images ~= extract_images(obj.text); obj = _image_dimensions(manifest_matter, obj); } + obj = _links(obj); } } auto images=uniq(_images.sort()); @@ -1873,6 +1895,7 @@ template DocReformDocAbstraction() { } obj = obj_heading_ancestors(obj, lv_ancestors_txt); } + obj = _links(obj); } } /+ optional only one 1~ level +/ @@ -1912,6 +1935,7 @@ template DocReformDocAbstraction() { obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } + obj = _links(obj); } } /+ optional only one 1~ level +/ @@ -1951,6 +1975,7 @@ template DocReformDocAbstraction() { obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } + obj = _links(obj); } } /+ optional only one 1~ level +/ @@ -1999,6 +2024,7 @@ template DocReformDocAbstraction() { obj.metainfo.o_n_book_index = obj_cite_digits.bkidx; obj.metainfo.object_number_type = OCNtype.bkidx; } + obj = _links(obj); } /+ TODO assert failure, reinstate assert(obj_cite_digit_bkidx == ocn_bidx_ @@ -2042,6 +2068,7 @@ template DocReformDocAbstraction() { obj.metainfo.object_number_off = obj_cite_digits.off; obj.metainfo.object_number_type = OCNtype.non; } + obj = _links(obj); } } if (the_document_body_section.length > 1) { diff --git a/src/doc_reform/meta/object_setter.d b/src/doc_reform/meta/object_setter.d index a4eaaf6..7b94027 100644 --- a/src/doc_reform/meta/object_setter.d +++ b/src/doc_reform/meta/object_setter.d @@ -98,6 +98,9 @@ template ObjectSetter() { struct DocObj_CodeBlock_ { string syntax = ""; } + struct DocObj_Stow_ { + string[] link = []; + } struct DocObj_Pointer_ { int doc_object = 0; int html_segnames = 0; @@ -124,6 +127,7 @@ template ObjectSetter() { DocObj_Has_ has; DocObj_Table_ table; DocObj_CodeBlock_ code_block; + DocObj_Stow_ stow; DocObj_Pointer_ ptr; } struct TheObjects { diff --git a/src/doc_reform/meta/rgx.d b/src/doc_reform/meta/rgx.d index c43390d..dcfc245 100644 --- a/src/doc_reform/meta/rgx.d +++ b/src/doc_reform/meta/rgx.d @@ -257,9 +257,11 @@ static template DocReformRgxInit() { static inline_image_without_dimensions = ctRegex!(`(?P
┥)☼(?P(?P\S+?\.(?:jpg|gif|png)),w(?P0)h(?P0))\s*(?P.*?┝┤.+?├)`, "mg");
     static inline_link_anchor                             = ctRegex!(`┋(?P\S+?)┋`, "mg");
     static inline_link                                    = ctRegex!(`┥(?P.+?)┝┤(?P\S+?)├`, "mg");
+    static inline_link_number_only                        = ctRegex!(`(┥.+?┝)┤(?P[0-9]+)├`, "mg");
+    static inline_link_stow_uri                           = ctRegex!(`┥(?P.+?)┝┤(?P[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
     static inline_link_hash                               = ctRegex!(`┥(?P.+?)┝┤(?P#(?P\S+?))├`, "mg");
     static inline_link_clean                              = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
-    static inline_a_url                                   = ctRegex!(`(┤)(\S+?)(├)`, "mg");
+    static inline_a_url                                   = ctRegex!(`(┤)([^\s┥┝┤├]+)(├)`, "mg");
     static url                                            = ctRegex!(`https?://`, "mg");
     static inline_link_subtoc                             = ctRegex!(`^(?P[5-7])~ ┥(?P.+?)┝┤(?P.+?)├`, "mg");
     static fn_suffix                                      = ctRegex!(`\.fnSuffix`, "mg");
diff --git a/src/doc_reform/output/epub3.d b/src/doc_reform/output/epub3.d
index 026a2b5..1df1216 100644
--- a/src/doc_reform/output/epub3.d
+++ b/src/doc_reform/output/epub3.d
@@ -5,6 +5,7 @@ template outputEPub3() {
     std.digest.sha,
     std.file,
     std.outbuffer,
+    std.uri,
     std.zip,
     std.conv : to;
   import
diff --git a/src/doc_reform/output/html.d b/src/doc_reform/output/html.d
index 97ba0ce..dee5d53 100644
--- a/src/doc_reform/output/html.d
+++ b/src/doc_reform/output/html.d
@@ -5,6 +5,7 @@ template outputHTML() {
     std.digest.sha,
     std.file,
     std.outbuffer,
+    std.uri,
     std.zip,
     std.conv : to;
   import
diff --git a/src/doc_reform/output/rgx.d b/src/doc_reform/output/rgx.d
index 0bcb2b6..bfd2a4e 100644
--- a/src/doc_reform/output/rgx.d
+++ b/src/doc_reform/output/rgx.d
@@ -65,9 +65,11 @@ static template DocReformOutputRgxInit() {
     static inline_image_without_dimensions                = ctRegex!(`(?P
┥)☼(?P(?P\S+?\.(?:jpg|gif|png)),w(?P0)h(?P0))\s*(?P.*?┝┤.+?├)`, "mg");
     static inline_link_anchor                             = ctRegex!(`┋(?P\S+?)┋`, "mg");
     static inline_link                                    = ctRegex!(`┥(?P.+?)┝┤(?P\S+?)├`, "mg");
+    static inline_link_number_only                        = ctRegex!(`(┥.+?┝)┤(?P[0-9]+)├`, "mg");
+    static inline_link_stow_uri                           = ctRegex!(`┥(?P.+?)┝┤(?P[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
     static inline_link_hash                               = ctRegex!(`┥(?P.+?)┝┤(?P#(?P\S+?))├`, "mg");
     static inline_link_clean                              = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
-    static inline_a_url                                   = ctRegex!(`(┤)(\S+?)(├)`, "mg");
+    static inline_a_url                                   = ctRegex!(`(┤)([^\s┥┝┤├]+)(├)`, "mg");
     static url                                            = ctRegex!(`https?://`, "mg");
     static inline_link_subtoc                             = ctRegex!(`^(?P[5-7])~ ┥(?P.+?)┝┤(?P.+?)├`, "mg");
     static fn_suffix                                      = ctRegex!(`\.fnSuffix`, "mg");
diff --git a/src/doc_reform/output/sqlite.d b/src/doc_reform/output/sqlite.d
index 0e424e6..e0c9390 100644
--- a/src/doc_reform/output/sqlite.d
+++ b/src/doc_reform/output/sqlite.d
@@ -2,6 +2,7 @@ module doc_reform.output.sqlite;
 import doc_reform.output;
 import
   std.file,
+  std.uri,
   std.conv : to;
 import d2sqlite3;
 import std.typecons : Nullable;
@@ -284,6 +285,11 @@ template SQLiteFormatAndLoadObject() {
         string         _xml_type = "seg",
       ) {
         if (obj.has.inline_links) {
+          if  (obj.metainfo.is_a != "code") {
+            _txt = replaceAll!(hit =>
+                hit[1] ~ "┤" ~ to!string((obj.stow.link[hit[2].to!ulong])).encode ~ "├"
+              )(_txt, rgx.inline_link_number_only);
+          }
           if ((_txt.match(rgx.mark_internal_site_lnk))
           && (_xml_type == "scroll")) { // conditions reversed to avoid: gdc compiled program run segfault
             _txt = _txt.replaceAll(
diff --git a/src/doc_reform/output/xmls.d b/src/doc_reform/output/xmls.d
index 58cfb4e..41787ae 100644
--- a/src/doc_reform/output/xmls.d
+++ b/src/doc_reform/output/xmls.d
@@ -5,6 +5,7 @@ template outputXHTMLs() {
     std.digest.sha,
     std.file,
     std.outbuffer,
+    std.uri,
     std.zip,
     std.conv : to;
   import
@@ -121,7 +122,7 @@ template outputXHTMLs() {
         doc_matters.conf_make_meta.meta.date_modified,
         doc_matters.src.language,
         doc_matters.conf_make_meta.meta.rights_copyright,
-        doc_matters.generator_program.name_and_version,
+        doc_matters.opt.action.debug_do ? "" : doc_matters.generator_program.name_and_version,
         doc_matters.generator_program.url_home,
       );
       return o;
@@ -350,6 +351,11 @@ template outputXHTMLs() {
     ) {
       string seg_lvs;
       if (obj.has.inline_links) {
+        if  (obj.metainfo.is_a != "code") {
+          _txt = replaceAll!(hit =>
+              hit[1] ~ "┤" ~ to!string((obj.stow.link[hit[2].to!ulong])).encode ~ "├"
+            )(_txt, rgx.inline_link_number_only);
+        }
         if ((_txt.match(rgx.mark_internal_site_lnk))
         && (_xml_type == "scroll")) { // conditions reversed to avoid: gdc compiled program run segfault
           _txt = _txt.replaceAll(
-- 
cgit v1.2.3