From 87d62f48d6c8a2ccf9807f56c23a6ca71d1102e6 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sun, 13 Jan 2019 14:19:04 -0500 Subject: 0.4.3 stow (most) uri/links in array, separate from object text - munge independently - no need to consider special munging of uri with text - uri can easily be munged independently (encoded as need be) --- org/default_regex.org | 4 +- org/doc_reform.org | 112 +++++++++++++++++++++++------------------------ org/meta_abstraction.org | 49 +++++++++++++++++++++ org/output_sqlite.org | 6 +++ org/output_xmls.org | 8 +++- 5 files changed, 121 insertions(+), 58 deletions(-) (limited to 'org') diff --git a/org/default_regex.org b/org/default_regex.org index c237239..5705fb2 100644 --- a/org/default_regex.org +++ b/org/default_regex.org @@ -505,9 +505,11 @@ static inline_image = ctRegex!(`(?P
┥)
 static inline_image_without_dimensions                = ctRegex!(`(?P
┥)☼(?P(?P\S+?\.(?:jpg|gif|png)),w(?P0)h(?P0))\s*(?P.*?┝┤.+?├)`, "mg");
 static inline_link_anchor                             = ctRegex!(`┋(?P\S+?)┋`, "mg");
 static inline_link                                    = ctRegex!(`┥(?P.+?)┝┤(?P\S+?)├`, "mg");
+static inline_link_number_only                        = ctRegex!(`(┥.+?┝)┤(?P[0-9]+)├`, "mg");
+static inline_link_stow_uri                           = ctRegex!(`┥(?P.+?)┝┤(?P[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
 static inline_link_hash                               = ctRegex!(`┥(?P.+?)┝┤(?P#(?P\S+?))├`, "mg");
 static inline_link_clean                              = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
-static inline_a_url                                   = ctRegex!(`(┤)(\S+?)(├)`, "mg");
+static inline_a_url                                   = ctRegex!(`(┤)([^\s┥┝┤├]+)(├)`, "mg");
 static url                                            = ctRegex!(`https?://`, "mg");
 static inline_link_subtoc                             = ctRegex!(`^(?P[5-7])~ ┥(?P.+?)┝┤(?P.+?)├`, "mg");
 static fn_suffix                                      = ctRegex!(`\.fnSuffix`, "mg");
diff --git a/org/doc_reform.org b/org/doc_reform.org
index 55bd41d..fe66011 100644
--- a/org/doc_reform.org
+++ b/org/doc_reform.org
@@ -26,7 +26,7 @@ struct Version {
   int minor;
   int patch;
 }
-enum _ver = Version(0, 4, 2);
+enum _ver = Version(0, 4, 3);
 #+END_SRC
 
 ** compilation restrictions (supported compilers)
@@ -1369,61 +1369,61 @@ dev notes
 *** document objects (table)
 - check, keep up to date
 
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-| doc object   | doc object       | attributes              | inline         | appended            | structure            | delimiters                     |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-| is_of        | is_a             |                         |                |                     |                      |                                |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-| para/heading | heading          | - level                 | - font face    | - object number off | - level              | - delimiter == two newlines    |
-|              |                  | - object number         | - endnotes     | - book index meta   | (document structure) |                                |
-|              |                  | - object number off     |                |                     |                      |                                |
-|              |                  | - dummy (toc & seg)     |                |                     |                      |                                |
-|              |                  | - tags (internal links) |                |                     |                      |                                |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-|              | toc              | - level                 | - font face    |                     |                      | - auto generated from headings |
-|              |                  |                         | - links (auto) |                     |                      |                                |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-| para         | para             | - bullet                | - font face    | - object number off |                      | - delimiter == two newlines    |
-|              |                  | - indent                | - links/urls * | - book index meta   |                      |                                |
-|              |                  | - object number         | - images*      |                     |                      |                                |
-|              |                  | - object number off     | - endnotes     |                     |                      |                                |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-|              | toc              | - indent                | - font face    |                     |                      | - delimiter == two newlines    |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-|              | endnote          |                         | - font face    |                     |                      | (generated from                |
-|              |                  |                         |                |                     |                      | inline markup tags)            |
-|              |                  |                         |                |                     |                      | - delimiter == two newlines    |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-|              | bookindex        | - auto indent           | - font face    |                     |                      | - delimiter == two newlines    |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-|              | blurb            | - bullet                | - font face    | - object number off |                      | - delimiter == two newlines    |
-|              |                  | - indent                | - links/urls * | - book index meta   |                      |                                |
-|              |                  | - object number         | - images*      |                     |                      |                                |
-|              |                  | - object number off     | - endnotes     |                     |                      |                                |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-| group        | group            | - object number         | - font face    | - book index meta   | - para break         | - delimiter tags (group)       |
-|              |                  | - object number off     | - links/urls * |                     |                      |                                |
-|              |                  |                         | - images*      |                     |                      |                                |
-|              |                  |                         | - endnotes     |                     |                      |                                |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-|              | block            | - object number         | - font face    | - book index meta   | - new line           | - delimiter tags (block)       |
-|              |                  | - object number off     | - links/urls * |                     |                      |                                |
-|              |                  |                         | - images*      |                     |                      |                                |
-|              |                  |                         | - endnotes     |                     |                      |                                |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-|              | quote            | - object number         | - font face    | - book index meta   |                      | - delimiter tags (quote)       |
-|              |                  |                         | - endnotes     |                     |                      |                                |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-|              | poem (see verse) |                         |                | - book index meta   |                      | - delimiter tags (poem)        |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-|              | verse (of poem)  | - object number         | - font face    |                     | - new line           | - (see poem delimiter)         |
-|              |                  |                         | - endnotes     |                     | - preceeding spaces  |                                |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-| group/code   | code             | - syntax                |                |                     | - new line           | - delimiter tags (code)        |
-|              |                  | - numbered              |                |                     | - preceeding spaces  |                                |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
-| group/table  | table            | - object number         |                |                     |                      | - delimiter tags (table)       |
-|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| doc object       | doc object | attributes              | inline         | appended            | structure            | delimiters                     |
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| is_a             | is_of_type |                         |                |                     |                      |                                |
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| heading          | para       | - level                 | - font face    | - object number off | - level              | - two newlines                 |
+|                  |            | - object number         | - endnotes     | - book index meta   | (document structure) |                                |
+|                  |            | - object number off     |                |                     |                      |                                |
+|                  |            | - dummy (toc & seg)     |                |                     |                      |                                |
+|                  |            | - tags (internal links) |                |                     |                      |                                |
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| toc              | para       | - level                 | - font face    |                     |                      | - auto generated from headings |
+|                  |            | (auto-indent)           | - links (auto) |                     |                      |                                |
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| para             | para       | - bullet                | - font face    | - object number off |                      | - two newlines                 |
+|                  |            | - indent                | - links/urls * | - book index meta   |                      |                                |
+|                  |            | - object number         | - images*      |                     |                      |                                |
+|                  |            | - object number off     | - endnotes     |                     |                      |                                |
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| bookindex        | para       | - auto indent           | - font face    |                     |                      | - two newlines                 |
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| blurb            | para       | - bullet                | - font face    | - object number off |                      | - two newlines                 |
+|                  |            | - indent                | - links/urls * | - book index meta   |                      |                                |
+|                  |            | - object number         | - images*      |                     |                      |                                |
+|                  |            | - object number off     | - endnotes     |                     |                      |                                |
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| group            | block      | - object number         | - font face    | - book index meta   | - para break         | - block tags                   |
+|                  |            | - object number off     | - links/urls * |                     |                      | (group)                        |
+|                  |            |                         | - images*      |                     |                      |                                |
+|                  |            |                         | - endnotes     |                     |                      |                                |
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| block            | block      | - object number         | - font face    | - book index meta   | - new line           | - block tags                   |
+|                  |            | - object number off     | - links/urls * |                     |                      | (block)                        |
+|                  |            |                         | - images*      |                     |                      |                                |
+|                  |            |                         | - endnotes     |                     |                      |                                |
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| quote            | block      | - object number         | - font face    | - book index meta   |                      | - block tags                   |
+|                  |            |                         | - endnotes     |                     |                      | (quote)                        |
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| poem (see verse) | block      |                         |                | - book index meta   |                      | - block tags                   |
+|                  |            |                         |                |                     |                      | (poem)                         |
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| verse (of poem)  |            | - object number         | - font face    |                     | - new line           | - (see poem delimiter)         |
+|                  |            |                         | - endnotes     |                     | - preceeding spaces  |                                |
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| code             | block      | - syntax                |                |                     | - new line           | - block tags                   |
+|                  |            | - numbered              |                |                     | - preceeding spaces  | (code)                         |
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| table            | block      | - object number         |                |                     |                      | - block tags (table)           |
+|                  |            |                         |                |                     |                      | (table)                        |
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| endnote          |            |                         | - font face    |                     |                      | (generated from                |
+|                  |            |                         |                |                     |                      | inline markup tags)            |
+|                  |            |                         |                |                     |                      | - two newlines                 |
+|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
 
 - consider special treatment for links/urls (& for images?) take them out of
   document munge (for various outputs), by storing in own array (within each
diff --git a/org/meta_abstraction.org b/org/meta_abstraction.org
index 4c98f66..5249df1 100644
--- a/org/meta_abstraction.org
+++ b/org/meta_abstraction.org
@@ -2294,6 +2294,37 @@ auto _image_dimensions(M,O)(M manifest_matter, O obj) {
 }
 #+END_SRC
 
+***** links: think about!!!
+- move actual links to an array in object struct so they cannot be regex munged within text block
+  - you may wish to exclude certain types of internal document link
+    - object number links
+      - toc
+      - book index
+    - footnotes and footnote numbers
+
+#+name: abs_post
+#+BEGIN_SRC d
+auto _links(O)(O obj) {
+  if (auto m = obj.text.match(rgx.inline_link_stow_uri)) {
+    debug(links) {
+      writeln("number of link matches to stow: ", (obj.text.match(rgx.inline_link_stow_uri)).count);
+      writeln("links to stow: ", (obj.text.match(rgx.inline_link_stow_uri)));
+    }
+    int _n_matches = (obj.text.match(rgx.inline_link_stow_uri)).count.to!int;
+    for(int i=0; i < _n_matches; ++i) {
+      if (obj.text.match(rgx.inline_link_stow_uri)) {
+        obj.stow.link ~= obj.text.matchFirst(rgx.inline_link_stow_uri)[2];
+        obj.text = obj.text.replaceFirst(
+          rgx.inline_link_stow_uri,
+          format(q"¶┥%s┝┤%s├¶", "$1", i)
+        );
+      }
+    }
+  }
+  return obj;
+}
+#+END_SRC
+
 ***** ↻ Loop section: head
 
 #+name: abs_post
@@ -2327,6 +2358,7 @@ foreach (ref obj; the_document_head_section) {
     }
     obj = obj_heading_ancestors(obj, lv_ancestors_txt);
   }
+  obj = _links(obj);
 }
 #+END_SRC
 
@@ -2360,6 +2392,7 @@ if (the_table_of_contents_section.length > 1) {
       }
       obj = obj_heading_ancestors(obj, lv_ancestors_txt);
     }
+    obj = _links(obj);
   }
 }
 #+END_SRC
@@ -2412,6 +2445,7 @@ if (the_document_body_section.length > 1) {
        _images ~= extract_images(obj.text);
        obj = _image_dimensions(manifest_matter, obj);
     }
+    obj = _links(obj);
   }
 }
 auto images=uniq(_images.sort());
@@ -2460,6 +2494,7 @@ if (the_endnotes_section.length > 1) {
       }
       obj = obj_heading_ancestors(obj, lv_ancestors_txt);
     }
+    obj = _links(obj);
   }
 }
 #+END_SRC
@@ -2507,6 +2542,7 @@ if (the_glossary_section.length > 1) {
       obj.metainfo.ocn        = obj_cite_digits.object_number;
       obj.metainfo.identifier = obj_cite_digits.identifier;
     }
+    obj = _links(obj);
   }
 }
 #+END_SRC
@@ -2554,6 +2590,7 @@ if (the_bibliography_section.length > 1) {
       obj.metainfo.ocn        = obj_cite_digits.object_number;
       obj.metainfo.identifier = obj_cite_digits.identifier;
     }
+    obj = _links(obj);
   }
 }
 #+END_SRC
@@ -2610,6 +2647,7 @@ if (the_bookindex_section.length > 1) {
       obj.metainfo.o_n_book_index           = obj_cite_digits.bkidx;
       obj.metainfo.object_number_type       = OCNtype.bkidx;
     }
+    obj = _links(obj);
   }
   /+ TODO assert failure, reinstate
   assert(obj_cite_digit_bkidx == ocn_bidx_
@@ -2659,6 +2697,7 @@ if (the_blurb_section.length > 1) {
       obj.metainfo.object_number_off  = obj_cite_digits.off;
       obj.metainfo.object_number_type = OCNtype.non;
     }
+    obj = _links(obj);
   }
 }
 #+END_SRC
@@ -7861,6 +7900,15 @@ struct DocObj_CodeBlock_ {
 }
 #+END_SRC
 
+**** stow (things to be protected from regular text transformations, so far links)
+
+#+name: meta_structs_init
+#+BEGIN_SRC d
+struct DocObj_Stow_ {
+  string[]               link                               = [];
+}
+#+END_SRC
+
 **** pointers
 
 #+name: meta_structs_init
@@ -7903,6 +7951,7 @@ struct ObjGenericComposite {
   DocObj_Has_            has;
   DocObj_Table_          table;
   DocObj_CodeBlock_      code_block;
+  DocObj_Stow_           stow;
   DocObj_Pointer_        ptr;
 }
 #+END_SRC
diff --git a/org/output_sqlite.org b/org/output_sqlite.org
index b1c9cf4..bdb2ca7 100644
--- a/org/output_sqlite.org
+++ b/org/output_sqlite.org
@@ -322,6 +322,7 @@ template SQLiteDbDrop() {
 import doc_reform.output;
 import
   std.file,
+  std.uri,
   std.conv : to;
 #+END_SRC
 
@@ -534,6 +535,11 @@ auto inline_links(M,O)(
   string         _xml_type = "seg",
 ) {
   if (obj.has.inline_links) {
+    if  (obj.metainfo.is_a != "code") {
+      _txt = replaceAll!(hit =>
+          hit[1] ~ "┤" ~ to!string((obj.stow.link[hit[2].to!ulong])).encode ~ "├"
+        )(_txt, rgx.inline_link_number_only);
+    }
     if ((_txt.match(rgx.mark_internal_site_lnk))
     && (_xml_type == "scroll")) { // conditions reversed to avoid: gdc compiled program run segfault
       _txt = _txt.replaceAll(
diff --git a/org/output_xmls.org b/org/output_xmls.org
index b9302bd..45a1c3c 100644
--- a/org/output_xmls.org
+++ b/org/output_xmls.org
@@ -42,6 +42,7 @@ import
   std.digest.sha,
   std.file,
   std.outbuffer,
+  std.uri,
   std.zip,
   std.conv : to;
 import
@@ -193,7 +194,7 @@ auto header_metadata(M)(
     doc_matters.conf_make_meta.meta.date_modified,
     doc_matters.src.language,
     doc_matters.conf_make_meta.meta.rights_copyright,
-    doc_matters.generator_program.name_and_version,
+    doc_matters.opt.action.debug_do ? "" : doc_matters.generator_program.name_and_version,
     doc_matters.generator_program.url_home,
   );
   return o;
@@ -466,6 +467,11 @@ auto inline_links(M,O)(
 ) {
   string seg_lvs;
   if (obj.has.inline_links) {
+    if  (obj.metainfo.is_a != "code") {
+      _txt = replaceAll!(hit =>
+          hit[1] ~ "┤" ~ to!string((obj.stow.link[hit[2].to!ulong])).encode ~ "├"
+        )(_txt, rgx.inline_link_number_only);
+    }
     if ((_txt.match(rgx.mark_internal_site_lnk))
     && (_xml_type == "scroll")) { // conditions reversed to avoid: gdc compiled program run segfault
       _txt = _txt.replaceAll(
-- 
cgit v1.2.3