From a263d67a5dfb5fad70059f63c1029f9174bf7ff0 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 30 Apr 2018 21:05:13 -0400 Subject: link url matching - smid: sisu markup identify - inline_link_markup_regular matches series of marked up urls, e.g. { link txt }http://url, { link txt }http://url --- org/default_regex.org | 17 +++++++++-------- org/meta_abstraction.org | 24 ++++++++++++------------ org/meta_conf_make_meta.org | 8 ++++---- org/output_xmls.org | 40 ++++++++++++++++++++-------------------- 4 files changed, 45 insertions(+), 44 deletions(-) (limited to 'org') diff --git a/org/default_regex.org b/org/default_regex.org index e824577..5b759b0 100644 --- a/org/default_regex.org +++ b/org/default_regex.org @@ -225,7 +225,8 @@ static table_col_separator = ctRegex!(`┊`); static table_col_separator_nl = ctRegex!(`[┊]$`, "mg"); #+END_SRC -** inline markup footnotes endnotes :inline:footnote: +** inline markup :inline:footnote: +*** footnotes & endnotes #+name: meta_rgx #+BEGIN_SRC d @@ -255,12 +256,12 @@ static note_ref = ctRegex!(`^\S+?noteref_( #+name: meta_rgx #+BEGIN_SRC d -static inline_url_generic = ctRegex!(`(?:^|[}(\[ ])(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_#]`, "mg"); -static inline_url = ctRegex!(`((?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_]\S*)`, "mg"); -static inline_link_naked_url = ctRegex!(`(?P^|[ ])(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤)\S+?)(?P[.,;:?!'"]?(?:[ ]|$))`, "mg"); -static inline_link_markup_regular = ctRegex!(`(?P^|[ ])\{\s*(?P.+?)\s*\}(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P(?:[,;:? ]|[!.]?(?:[ ]|$)))`, "mg"); -static inline_link_endnote_url_helper_punctuated = ctRegex!(`\{~\^\s+(?P.+?)\}(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P[.,;:?!]?(?:[ ]|$))`, "mg"); -static inline_link_endnote_url_helper = ctRegex!(`\{~\^\s+(?P.+?)\}(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg"); +static smid_inline_url_generic = ctRegex!(`(?:^|[}(\[ ])(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_#]`, "mg"); +static smid_inline_url = ctRegex!(`((?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_]\S*)`, "mg"); +static smid_inline_link_naked_url = ctRegex!(`(?P
^|[ ])(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤)\S+?)(?P[.,;:?!'"]?(?:[ ]|$))`, "mg"); // issue with #link #32
+static smid_inline_link_markup_regular                = ctRegex!(`(?P
^|[ ]|[^\S]?)\{\s*(?P.+?)\s*\}(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P[;:!,?.]?(?:[ )\]]|$))`, "mg"); // NEXT
+static smid_inline_link_endnote_url_helper_punctuated = ctRegex!(`\{~\^\s+(?P.+?)\}(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P[.,;:?!]?(?:[ ]|$))`, "mg");
+static smid_inline_link_endnote_url_helper            = ctRegex!(`\{~\^\s+(?P.+?)\}(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg");
 #+END_SRC
 
 *** images                                                         :images:
@@ -392,7 +393,7 @@ static template SiSUoutputRgxInit() {
 #+name: sp_ch_xhtml_rgx
 #+BEGIN_SRC d
 static xhtml_ampersand                            = ctRegex!(`[&]`);      // &
-static xhtml_quotation                            = ctRegex!(`[&]`);      // "
+static xhtml_quotation                            = ctRegex!(`["]`);      // "
 static xhtml_less_than                            = ctRegex!(`[<]`);      // <
 static xhtml_greater_than                         = ctRegex!(`[>]`);      // >
 static xhtml_line_break                           = ctRegex!(` [\\]{2}`); // 
diff --git a/org/meta_abstraction.org b/org/meta_abstraction.org index 5f058b7..27f83be 100644 --- a/org/meta_abstraction.org +++ b/org/meta_abstraction.org @@ -4759,11 +4759,11 @@ static struct ObjInlineMarkupMunge { obj_txt_in = obj_txt_in.replaceAll(rgx.inline_mono, (mng.mono ~ "{$1}" ~ mng.mono)); /+ url matched +/ obj_txt_in = obj_txt_in.replaceAll(rgx.inline_notes_al_special, ""); // TODO reinstate when special footnotes are implemented - if (obj_txt_in.match(rgx.inline_url_generic)) { + if (obj_txt_in.match(rgx.smid_inline_url_generic)) { /+ link: naked url: http://url +/ - if (obj_txt_in.match(rgx.inline_link_naked_url)) { + if (obj_txt_in.match(rgx.smid_inline_link_naked_url)) { obj_txt_in = (obj_txt_in).replaceAll( - rgx.inline_link_naked_url, + rgx.smid_inline_link_naked_url, ("$1" ~ mkup.lnk_o ~ "$2" ~ mkup.lnk_c ~ mkup.url_o ~ "$2" ~ mkup.url_c @@ -4775,10 +4775,10 @@ static struct ObjInlineMarkupMunge { maps to: { link which includes url as footnote }http://url~{ { http://url }http://url }~ +/ - if (obj_txt_in.match(rgx.inline_link_endnote_url_helper)) { + if (obj_txt_in.match(rgx.smid_inline_link_endnote_url_helper)) { obj_txt_in = (obj_txt_in) .replaceAll( - rgx.inline_link_endnote_url_helper_punctuated, + rgx.smid_inline_link_endnote_url_helper_punctuated, (mkup.lnk_o ~ "$1" ~ mkup.lnk_c ~ mkup.url_o ~ "$2" ~ mkup.url_c ~ "~{ " ~ mkup.lnk_o ~ " $2 " ~ mkup.lnk_c @@ -4786,7 +4786,7 @@ static struct ObjInlineMarkupMunge { ~ " }~$3") // ("{ $1 }$2~{ { $2 }$2 }~$3") ) .replaceAll( - rgx.inline_link_endnote_url_helper, + rgx.smid_inline_link_endnote_url_helper, (mkup.lnk_o ~ "$1" ~ mkup.lnk_c ~ mkup.url_o ~ "$2" ~ mkup.url_c ~ "~{ " ~ mkup.lnk_o ~ " $2 " ~ mkup.lnk_c @@ -4797,9 +4797,9 @@ static struct ObjInlineMarkupMunge { /+ link with regular markup: { linked text or image }http://url +/ - if (obj_txt_in.match(rgx.inline_link_markup_regular)) { + if (obj_txt_in.match(rgx.smid_inline_link_markup_regular)) { obj_txt_in = (obj_txt_in).replaceAll( - rgx.inline_link_markup_regular, + rgx.smid_inline_link_markup_regular, ("$1" ~ mkup.lnk_o ~ "$2" ~ mkup.lnk_c ~ mkup.url_o ~ "$3" ~ mkup.url_c @@ -4910,7 +4910,7 @@ static struct ObjInlineMarkupMunge { (mkup.en_a_o ~ "+" ~ " $1" ~ mkup.en_a_c) ); /+ url matched +/ - if (obj_txt_in.match(rgx.inline_url)) { + if (obj_txt_in.match(rgx.smid_inline_url)) { urls = true; obj_txt_in = url_links(obj_txt_in); } @@ -5199,7 +5199,7 @@ static struct ObjInlineMarkup { obj_txt["munge"] = obj_[obj_key_].dup; obj_txt["munge"] = (obj_["is"].match(ctRegex!(`verse|code`))) ? obj_txt["munge"] - : strip(obj_txt["munge"]); + : obj_txt["munge"].strip; static __gshared string[] anchor_tags_ = []; auto x = munge.init; bool[string] obj_notes_and_links; @@ -5217,7 +5217,7 @@ static struct ObjInlineMarkup { } else if (obj_["lev"] == "1") { writeln("heading anchor tag missing: ", obj_txt["munge"]); } - x =munge.munge_heading(obj_txt["munge"], reset_note_numbers); + x = munge.munge_heading(obj_txt["munge"], reset_note_numbers); reset_note_numbers=false; goto default; case "para": @@ -5778,7 +5778,7 @@ struct ObjAttributes { } else if (auto m = obj_txt_in.matchFirst(rgx.para_indent_hang)) { _obj_attributes =" \"bullet\": \"false\"," ~ " \"indent_hang\": " ~ m.captures[1].to!string ~ "," - ~ " \"indent_base\": " ~ m.captures[2].to!string ~ ","; + ~ " \"indent_base\": " ~ m.captures[2].to!string ~ ","; } else if (auto m = obj_txt_in.matchFirst(rgx.para_indent)) { _obj_attributes =" \"bullet\": \"false\"," ~ " \"indent_hang\": " ~ m.captures[1].to!string ~ "," diff --git a/org/meta_conf_make_meta.org b/org/meta_conf_make_meta.org index 9438d25..7c27496 100644 --- a/org/meta_conf_make_meta.org +++ b/org/meta_conf_make_meta.org @@ -79,17 +79,17 @@ auto _mkup = InlineMarkup(); auto url_markup(string line) { auto line_ = (line) .replaceAll( - _rgx.inline_link_markup_regular, + _rgx.smid_inline_link_markup_regular, ("$1" ~ _mkup.lnk_o ~ "$2" ~ _mkup.lnk_c - ~ _mkup.url_o ~ "$3" ~ _mkup.url_c + ~ _mkup.url_o ~ "$3" ~ _mkup.url_c ~ "$4") // ("$1{ $2 }$3$4") ) .replaceAll( - _rgx.inline_link_naked_url, + _rgx.smid_inline_link_naked_url, ("$1" ~ _mkup.lnk_o ~ "$2" ~ _mkup.lnk_c - ~ _mkup.url_o ~ "$2" ~ _mkup.url_c + ~ _mkup.url_o ~ "$2" ~ _mkup.url_c ~ "$3") // ("$1{ $2 }$2$3") ) .replaceAll( diff --git a/org/output_xmls.org b/org/output_xmls.org index 6ac5c76..0cbde63 100644 --- a/org/output_xmls.org +++ b/org/output_xmls.org @@ -584,10 +584,10 @@ string lev4_heading_subtoc(O)( lev4_subtoc ~= "
\n"; foreach (subtoc; obj.lev4_subtoc) { if (auto m = subtoc.match(rgx.inline_link_subtoc)) { - auto indent = to!string(m.captures[1]); - auto text = to!string(m.captures[2]); + auto indent = m.captures[1].to!string; + auto text = m.captures[2].to!string; text = font_face(text); - auto link = to!string(m.captures[3]); + auto link = m.captures[3].to!string; lev4_subtoc ~= subtoc.replaceFirst(rgx.inline_link_subtoc, format(q"¶

%s @@ -854,7 +854,7 @@ auto para_seg(O)( string _suffix = ".html", ) { auto t = inline_markup_seg(obj, _txt, _suffix); - _txt = to!string(t[0]); + _txt = t[0].to!string; string[] _endnotes = t[1]; string o = para(obj, _txt); auto u = tuple( @@ -929,7 +929,7 @@ auto quote_seg(O)( string _suffix = ".html", ) { auto t = inline_markup_seg(obj, _txt, _suffix); - _txt = to!string(t[0]); + _txt = t[0].to!string; string[] _endnotes = t[1]; string o = quote(obj, _txt); auto u = tuple( @@ -1004,7 +1004,7 @@ auto group_seg(O)( string _suffix = ".html", ) { auto t = inline_markup_seg(obj, _txt, _suffix); - _txt = to!string(t[0]); + _txt = t[0].to!string; string[] _endnotes = t[1]; string o = group(obj, _txt); auto u = tuple( @@ -1075,7 +1075,7 @@ auto block_seg(O)( string _suffix = ".html", ) { auto t = inline_markup_seg(obj, _txt, _suffix); - _txt = to!string(t[0]); + _txt = t[0].to!string; string[] _endnotes = t[1]; string o = block(obj, _txt); auto u = tuple( @@ -1146,7 +1146,7 @@ auto verse_seg(O)( string _suffix = ".html", ) { auto t = inline_markup_seg(obj, _txt, _suffix); - _txt = to!string(t[0]); + _txt = t[0].to!string; string[] _endnotes = t[1]; string o = verse(obj, _txt); auto u = tuple( @@ -1554,13 +1554,13 @@ void seg(D,I)( doc_html[segment_filename] ~= top_level_heading; } auto t = xhtml_format.heading_seg(obj, _txt, suffix); - doc_html[segment_filename] ~= to!string(t[0]); + doc_html[segment_filename] ~= t[0].to!string; doc_html[segment_filename] ~= xhtml_format.lev4_heading_subtoc(obj); doc_html_endnotes[segment_filename] ~= t[1]; break; case 5: .. case 7: auto t = xhtml_format.heading_seg(obj, _txt, suffix); - doc_html[segment_filename] ~= to!string(t[0]); + doc_html[segment_filename] ~= t[0].to!string; doc_html_endnotes[segment_filename] ~= t[1]; break; case 8: .. case 9: @@ -1584,7 +1584,7 @@ void seg(D,I)( switch (obj.is_a) { case "toc": auto t = xhtml_format.para_seg(obj, _txt, suffix); - doc_html[segment_filename] ~= to!string(t[0]); + doc_html[segment_filename] ~= t[0].to!string; break; default: if ((doc_matters.opt.action.debug_do)) { @@ -1606,7 +1606,7 @@ void seg(D,I)( switch (obj.is_a) { case "para": auto t = xhtml_format.para_seg(obj, _txt, suffix); - doc_html[segment_filename] ~= to!string(t[0]); + doc_html[segment_filename] ~= t[0].to!string; doc_html_endnotes[segment_filename] ~= t[1]; break; default: @@ -1620,24 +1620,24 @@ void seg(D,I)( switch (obj.is_a) { case "quote": auto t = xhtml_format.quote_seg(obj, _txt, suffix); - doc_html[segment_filename] ~= to!string(t[0]); + doc_html[segment_filename] ~= t[0].to!string; doc_html_endnotes[segment_filename] ~= t[1]; break; case "group": auto t = xhtml_format.group_seg(obj, _txt, suffix); - doc_html[segment_filename] ~= to!string(t[0]); + doc_html[segment_filename] ~= t[0].to!string; doc_html_endnotes[segment_filename] ~= t[1]; break; case "block": auto t = xhtml_format.block_seg(obj, _txt, suffix); - doc_html[segment_filename] ~= to!string(t[0]); + doc_html[segment_filename] ~= t[0].to!string; doc_html_endnotes[segment_filename] ~= t[1]; break; case "poem": break; case "verse": auto t = xhtml_format.verse_seg(obj, _txt, suffix); - doc_html[segment_filename] ~= to!string(t[0]); + doc_html[segment_filename] ~= t[0].to!string; doc_html_endnotes[segment_filename] ~= t[1]; break; case "code": @@ -2266,24 +2266,24 @@ void outputEPub3(D,I)( switch (obj.is_a) { case "quote": auto t = xhtml_format.quote_seg(obj, _txt, suffix); - doc_epub3[segment_filename] ~= to!string(t[0]); + doc_epub3[segment_filename] ~= t[0].to!string; doc_epub3_endnotes[segment_filename] ~= t[1]; break; case "group": auto t = xhtml_format.group_seg(obj, _txt, suffix); - doc_epub3[segment_filename] ~= to!string(t[0]); + doc_epub3[segment_filename] ~= t[0].to!string; doc_epub3_endnotes[segment_filename] ~= t[1]; break; case "block": auto t = xhtml_format.block_seg(obj, _txt, suffix); - doc_epub3[segment_filename] ~= to!string(t[0]); + doc_epub3[segment_filename] ~= t[0].to!string; doc_epub3_endnotes[segment_filename] ~= t[1]; break; case "poem": break; case "verse": auto t = xhtml_format.verse_seg(obj, _txt, suffix); - doc_epub3[segment_filename] ~= to!string(t[0]); + doc_epub3[segment_filename] ~= t[0].to!string; doc_epub3_endnotes[segment_filename] ~= t[1]; break; case "code": -- cgit v1.2.3