From a263d67a5dfb5fad70059f63c1029f9174bf7ff0 Mon Sep 17 00:00:00 2001
From: Ralph Amissah <ralph@amissah.com>
Date: Mon, 30 Apr 2018 21:05:13 -0400
Subject: link url matching

- smid: sisu markup identify
- inline_link_markup_regular matches series of
  marked up urls, e.g.
  { link txt }http://url, { link txt }http://url
---
 org/default_regex.org       | 17 +++++++++--------
 org/meta_abstraction.org    | 24 ++++++++++++------------
 org/meta_conf_make_meta.org |  8 ++++----
 org/output_xmls.org         | 40 ++++++++++++++++++++--------------------
 4 files changed, 45 insertions(+), 44 deletions(-)

(limited to 'org')
diff --git a/org/default_regex.org b/org/default_regex.org
index e824577..5b759b0 100644
--- a/org/default_regex.org
+++ b/org/default_regex.org
@@ -225,7 +225,8 @@ static table_col_separator                            = ctRegex!(`┊`);
 static table_col_separator_nl                         = ctRegex!(`[┊]$`, "mg");
 #+END_SRC
 
-** inline markup footnotes endnotes                        :inline:footnote:
+** inline markup                                           :inline:footnote:
+*** footnotes & endnotes
 
 #+name: meta_rgx
 #+BEGIN_SRC d
@@ -255,12 +256,12 @@ static note_ref                                       = ctRegex!(`^\S+?noteref_(
 
 #+name: meta_rgx
 #+BEGIN_SRC d
-static inline_url_generic                              = ctRegex!(`(?:^|[}(\[ ])(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_#]`, "mg");
-static inline_url                                      = ctRegex!(`((?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_]\S*)`, "mg");
-static inline_link_naked_url                           = ctRegex!(`(?P<before>^|[ ])(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤)\S+?)(?P<after>[.,;:?!'"]?(?:[ ]|$))`, "mg");
-static inline_link_markup_regular                      = ctRegex!(`(?P<before>^|[ ])\{\s*(?P<content>.+?)\s*\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P<after>(?:[,;:? ]|[!.]?(?:[ ]|$)))`, "mg");
-static inline_link_endnote_url_helper_punctuated       = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P<after>[.,;:?!]?(?:[ ]|$))`, "mg");
-static inline_link_endnote_url_helper                  = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg");
+static smid_inline_url_generic                        = ctRegex!(`(?:^|[}(\[ ])(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_#]`, "mg");
+static smid_inline_url                                = ctRegex!(`((?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_]\S*)`, "mg");
+static smid_inline_link_naked_url                     = ctRegex!(`(?P<pre>^|[ ])(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤)\S+?)(?P<post>[.,;:?!'"]?(?:[ ]|$))`, "mg"); // issue with #link #32
+static smid_inline_link_markup_regular                = ctRegex!(`(?P<pre>^|[ ]|[^\S]?)\{\s*(?P<content>.+?)\s*\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P<post>[;:!,?.]?(?:[ )\]]|$))`, "mg"); // NEXT
+static smid_inline_link_endnote_url_helper_punctuated = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P<after>[.,;:?!]?(?:[ ]|$))`, "mg");
+static smid_inline_link_endnote_url_helper            = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg");
 #+END_SRC
 
 *** images                                                         :images:
@@ -392,7 +393,7 @@ static template SiSUoutputRgxInit() {
 #+name: sp_ch_xhtml_rgx
 #+BEGIN_SRC d
 static xhtml_ampersand                            = ctRegex!(`[&]`);      // &amp;
-static xhtml_quotation                            = ctRegex!(`[&]`);      // &quot;
+static xhtml_quotation                            = ctRegex!(`["]`);      // &quot;
 static xhtml_less_than                            = ctRegex!(`[<]`);      // &lt;
 static xhtml_greater_than                         = ctRegex!(`[>]`);      // &gt;
 static xhtml_line_break                           = ctRegex!(` [\\]{2}`); // <br />
diff --git a/org/meta_abstraction.org b/org/meta_abstraction.org
index 5f058b7..27f83be 100644
--- a/org/meta_abstraction.org
+++ b/org/meta_abstraction.org
@@ -4759,11 +4759,11 @@ static struct ObjInlineMarkupMunge {
     obj_txt_in = obj_txt_in.replaceAll(rgx.inline_mono, (mng.mono ~ "{$1}" ~ mng.mono));
     /+ url matched +/
     obj_txt_in = obj_txt_in.replaceAll(rgx.inline_notes_al_special, ""); // TODO reinstate when special footnotes are implemented
-    if (obj_txt_in.match(rgx.inline_url_generic)) {
+    if (obj_txt_in.match(rgx.smid_inline_url_generic)) {
       /+ link: naked url: http://url +/
-      if (obj_txt_in.match(rgx.inline_link_naked_url)) {
+      if (obj_txt_in.match(rgx.smid_inline_link_naked_url)) {
         obj_txt_in = (obj_txt_in).replaceAll(
-            rgx.inline_link_naked_url,
+            rgx.smid_inline_link_naked_url,
             ("$1"
               ~ mkup.lnk_o ~ "$2" ~ mkup.lnk_c
               ~  mkup.url_o ~ "$2" ~  mkup.url_c
@@ -4775,10 +4775,10 @@ static struct ObjInlineMarkupMunge {
          maps to:
            { link which includes url as footnote }http://url~{ { http://url }http://url }~
       +/
-      if (obj_txt_in.match(rgx.inline_link_endnote_url_helper)) {
+      if (obj_txt_in.match(rgx.smid_inline_link_endnote_url_helper)) {
         obj_txt_in = (obj_txt_in)
           .replaceAll(
-            rgx.inline_link_endnote_url_helper_punctuated,
+            rgx.smid_inline_link_endnote_url_helper_punctuated,
             (mkup.lnk_o ~ "$1" ~ mkup.lnk_c
               ~ mkup.url_o ~ "$2" ~ mkup.url_c
               ~ "~{ " ~ mkup.lnk_o ~ " $2 " ~ mkup.lnk_c
@@ -4786,7 +4786,7 @@ static struct ObjInlineMarkupMunge {
               ~  " }~$3") // ("{ $1 }$2~{ { $2 }$2 }~$3")
           )
           .replaceAll(
-            rgx.inline_link_endnote_url_helper,
+            rgx.smid_inline_link_endnote_url_helper,
             (mkup.lnk_o ~ "$1" ~ mkup.lnk_c
               ~ mkup.url_o ~ "$2" ~ mkup.url_c
               ~ "~{ " ~ mkup.lnk_o ~ " $2 " ~ mkup.lnk_c
@@ -4797,9 +4797,9 @@ static struct ObjInlineMarkupMunge {
       /+ link with regular markup:
          { linked text or image }http://url
       +/
-      if (obj_txt_in.match(rgx.inline_link_markup_regular)) {
+      if (obj_txt_in.match(rgx.smid_inline_link_markup_regular)) {
         obj_txt_in = (obj_txt_in).replaceAll(
-          rgx.inline_link_markup_regular,
+          rgx.smid_inline_link_markup_regular,
           ("$1"
             ~ mkup.lnk_o ~ "$2" ~ mkup.lnk_c
             ~  mkup.url_o ~ "$3" ~  mkup.url_c
@@ -4910,7 +4910,7 @@ static struct ObjInlineMarkupMunge {
         (mkup.en_a_o ~ "+" ~ " $1" ~ mkup.en_a_c)
       );
     /+ url matched +/
-    if (obj_txt_in.match(rgx.inline_url)) {
+    if (obj_txt_in.match(rgx.smid_inline_url)) {
       urls = true;
       obj_txt_in = url_links(obj_txt_in);
     }
@@ -5199,7 +5199,7 @@ static struct ObjInlineMarkup {
     obj_txt["munge"] = obj_[obj_key_].dup;
     obj_txt["munge"] = (obj_["is"].match(ctRegex!(`verse|code`)))
     ? obj_txt["munge"]
-    : strip(obj_txt["munge"]);
+    : obj_txt["munge"].strip;
     static __gshared string[] anchor_tags_ = [];
     auto x = munge.init;
     bool[string] obj_notes_and_links;
@@ -5217,7 +5217,7 @@ static struct ObjInlineMarkup {
       } else if (obj_["lev"] == "1") {
         writeln("heading anchor tag missing: ", obj_txt["munge"]);
       }
-      x =munge.munge_heading(obj_txt["munge"], reset_note_numbers);
+      x = munge.munge_heading(obj_txt["munge"], reset_note_numbers);
       reset_note_numbers=false;
       goto default;
     case "para":
@@ -5778,7 +5778,7 @@ struct ObjAttributes {
     } else if (auto m = obj_txt_in.matchFirst(rgx.para_indent_hang)) {
       _obj_attributes =" \"bullet\": \"false\","
       ~ " \"indent_hang\": " ~ m.captures[1].to!string ~ ","
-      ~ " \"indent_base\": " ~  m.captures[2].to!string ~ ",";
+      ~ " \"indent_base\": " ~ m.captures[2].to!string ~ ",";
     } else if (auto m = obj_txt_in.matchFirst(rgx.para_indent)) {
       _obj_attributes =" \"bullet\": \"false\","
       ~ " \"indent_hang\": " ~ m.captures[1].to!string ~ ","
diff --git a/org/meta_conf_make_meta.org b/org/meta_conf_make_meta.org
index 9438d25..7c27496 100644
--- a/org/meta_conf_make_meta.org
+++ b/org/meta_conf_make_meta.org
@@ -79,17 +79,17 @@ auto _mkup = InlineMarkup();
 auto url_markup(string line) {
   auto line_ = (line)
     .replaceAll(
-      _rgx.inline_link_markup_regular,
+      _rgx.smid_inline_link_markup_regular,
       ("$1"
         ~ _mkup.lnk_o ~ "$2" ~ _mkup.lnk_c
-        ~  _mkup.url_o ~ "$3" ~  _mkup.url_c
+        ~ _mkup.url_o ~ "$3" ~ _mkup.url_c
         ~ "$4")            // ("$1{ $2 }$3$4")
     )
     .replaceAll(
-        _rgx.inline_link_naked_url,
+        _rgx.smid_inline_link_naked_url,
         ("$1"
           ~ _mkup.lnk_o ~ "$2" ~ _mkup.lnk_c
-          ~  _mkup.url_o ~ "$2" ~  _mkup.url_c
+          ~ _mkup.url_o ~ "$2" ~ _mkup.url_c
           ~ "$3")            // ("$1{ $2 }$2$3")
     )
     .replaceAll(
diff --git a/org/output_xmls.org b/org/output_xmls.org
index 6ac5c76..0cbde63 100644
--- a/org/output_xmls.org
+++ b/org/output_xmls.org
@@ -584,10 +584,10 @@ string lev4_heading_subtoc(O)(
   lev4_subtoc ~= "  <div class=\"nav\">\n";
   foreach (subtoc; obj.lev4_subtoc) {
     if (auto m = subtoc.match(rgx.inline_link_subtoc)) {
-      auto indent = to!string(m.captures[1]);
-      auto text = to!string(m.captures[2]);
+      auto indent = m.captures[1].to!string;
+      auto text = m.captures[2].to!string;
       text = font_face(text);
-      auto link = to!string(m.captures[3]);
+      auto link = m.captures[3].to!string;
       lev4_subtoc ~= subtoc.replaceFirst(rgx.inline_link_subtoc,
         format(q"¶    <p class="minitoc" indent="h%si%s">
       <a href="%s">%s</a>
@@ -854,7 +854,7 @@ auto para_seg(O)(
   string                     _suffix = ".html",
 ) {
   auto t = inline_markup_seg(obj, _txt, _suffix);
-  _txt = to!string(t[0]);
+  _txt = t[0].to!string;
   string[] _endnotes = t[1];
   string o = para(obj, _txt);
   auto u = tuple(
@@ -929,7 +929,7 @@ auto quote_seg(O)(
   string                     _suffix = ".html",
 ) {
   auto t = inline_markup_seg(obj, _txt, _suffix);
-  _txt = to!string(t[0]);
+  _txt = t[0].to!string;
   string[] _endnotes = t[1];
   string o = quote(obj, _txt);
   auto u = tuple(
@@ -1004,7 +1004,7 @@ auto group_seg(O)(
   string                     _suffix = ".html",
 ) {
   auto t = inline_markup_seg(obj, _txt, _suffix);
-  _txt = to!string(t[0]);
+  _txt = t[0].to!string;
   string[] _endnotes = t[1];
   string o = group(obj, _txt);
   auto u = tuple(
@@ -1075,7 +1075,7 @@ auto block_seg(O)(
   string                     _suffix = ".html",
 ) {
   auto t = inline_markup_seg(obj, _txt, _suffix);
-  _txt = to!string(t[0]);
+  _txt = t[0].to!string;
   string[] _endnotes = t[1];
   string o = block(obj, _txt);
   auto u = tuple(
@@ -1146,7 +1146,7 @@ auto verse_seg(O)(
   string                     _suffix = ".html",
 ) {
   auto t = inline_markup_seg(obj, _txt, _suffix);
-  _txt = to!string(t[0]);
+  _txt = t[0].to!string;
   string[] _endnotes = t[1];
   string o = verse(obj, _txt);
   auto u = tuple(
@@ -1554,13 +1554,13 @@ void seg(D,I)(
             doc_html[segment_filename] ~= top_level_heading;
           }
           auto t = xhtml_format.heading_seg(obj, _txt, suffix);
-          doc_html[segment_filename] ~= to!string(t[0]);
+          doc_html[segment_filename] ~= t[0].to!string;
           doc_html[segment_filename] ~= xhtml_format.lev4_heading_subtoc(obj);
           doc_html_endnotes[segment_filename] ~= t[1];
           break;
         case 5: .. case 7:
           auto t = xhtml_format.heading_seg(obj, _txt, suffix);
-          doc_html[segment_filename] ~= to!string(t[0]);
+          doc_html[segment_filename] ~= t[0].to!string;
           doc_html_endnotes[segment_filename] ~= t[1];
           break;
         case 8: .. case 9:
@@ -1584,7 +1584,7 @@ void seg(D,I)(
             switch (obj.is_a) {
             case "toc":
               auto t = xhtml_format.para_seg(obj, _txt, suffix);
-              doc_html[segment_filename] ~= to!string(t[0]);
+              doc_html[segment_filename] ~= t[0].to!string;
               break;
             default:
               if ((doc_matters.opt.action.debug_do)) {
@@ -1606,7 +1606,7 @@ void seg(D,I)(
             switch (obj.is_a) {
             case "para":
               auto t = xhtml_format.para_seg(obj, _txt, suffix);
-              doc_html[segment_filename] ~= to!string(t[0]);
+              doc_html[segment_filename] ~= t[0].to!string;
               doc_html_endnotes[segment_filename] ~= t[1];
               break;
             default:
@@ -1620,24 +1620,24 @@ void seg(D,I)(
             switch (obj.is_a) {
             case "quote":
               auto t = xhtml_format.quote_seg(obj, _txt, suffix);
-              doc_html[segment_filename] ~= to!string(t[0]);
+              doc_html[segment_filename] ~= t[0].to!string;
               doc_html_endnotes[segment_filename] ~= t[1];
               break;
             case "group":
               auto t = xhtml_format.group_seg(obj, _txt, suffix);
-              doc_html[segment_filename] ~= to!string(t[0]);
+              doc_html[segment_filename] ~= t[0].to!string;
               doc_html_endnotes[segment_filename] ~= t[1];
               break;
             case "block":
               auto t = xhtml_format.block_seg(obj, _txt, suffix);
-              doc_html[segment_filename] ~= to!string(t[0]);
+              doc_html[segment_filename] ~= t[0].to!string;
               doc_html_endnotes[segment_filename] ~= t[1];
               break;
             case "poem":
               break;
             case "verse":
               auto t = xhtml_format.verse_seg(obj, _txt, suffix);
-              doc_html[segment_filename] ~= to!string(t[0]);
+              doc_html[segment_filename] ~= t[0].to!string;
               doc_html_endnotes[segment_filename] ~= t[1];
               break;
             case "code":
@@ -2266,24 +2266,24 @@ void outputEPub3(D,I)(
             switch (obj.is_a) {
             case "quote":
               auto t = xhtml_format.quote_seg(obj, _txt, suffix);
-              doc_epub3[segment_filename] ~= to!string(t[0]);
+              doc_epub3[segment_filename] ~= t[0].to!string;
               doc_epub3_endnotes[segment_filename] ~= t[1];
               break;
             case "group":
               auto t = xhtml_format.group_seg(obj, _txt, suffix);
-              doc_epub3[segment_filename] ~= to!string(t[0]);
+              doc_epub3[segment_filename] ~= t[0].to!string;
               doc_epub3_endnotes[segment_filename] ~= t[1];
               break;
             case "block":
               auto t = xhtml_format.block_seg(obj, _txt, suffix);
-              doc_epub3[segment_filename] ~= to!string(t[0]);
+              doc_epub3[segment_filename] ~= t[0].to!string;
               doc_epub3_endnotes[segment_filename] ~= t[1];
               break;
             case "poem":
               break;
             case "verse":
               auto t = xhtml_format.verse_seg(obj, _txt, suffix);
-              doc_epub3[segment_filename] ~= to!string(t[0]);
+              doc_epub3[segment_filename] ~= t[0].to!string;
               doc_epub3_endnotes[segment_filename] ~= t[1];
               break;
             case "code":
-- 
cgit v1.2.3