diff options
-rw-r--r-- | org/default_regex.org | 79 | ||||
-rw-r--r-- | src/doc_reform/meta/rgx.d | 71 | ||||
-rw-r--r-- | src/doc_reform/output/rgx.d | 36 |
3 files changed, 2 insertions, 184 deletions
diff --git a/org/default_regex.org b/org/default_regex.org index 08af2ce..e14748f 100644 --- a/org/default_regex.org +++ b/org/default_regex.org @@ -51,10 +51,9 @@ static template DocReformRgxInit() { #+name: meta_rgx #+BEGIN_SRC d /+ misc +/ -static true_dollar = ctRegex!(`\$`, "gm"); +// static true_dollar = ctRegex!(`\$`, "gm"); static sep = ctRegex!(`ā£`, "gm"); static flag_action = ctRegex!(`^(--[a-z][a-z0-9-]+)$`); -static flag_action_str = ctRegex!(` (--[a-z][a-z0-9-]+)`); static within_quotes = ctRegex!(`"(.+?)"`, "m"); static yaml_tag_is_str = ctRegex!(`:str$`); static yaml_tag_is_int = ctRegex!(`:int$`); @@ -79,7 +78,6 @@ static table_col_delimiter = ctRegex!("[ ]*\n+", "mg" static table_row_delimiter = ctRegex!("\n[ ]*\n+", "mg"); static table_row_delimiter_special = ctRegex!("[ ]*\n", "mg"); static table_col_delimiter_special = ctRegex!("[ ]*[|][ ]*", "mg"); -static levels_markup = ctRegex!(`^[A-D1-4]$`); static levels_numbered = ctRegex!(`^[0-9]$`); static levels_numbered_headings = ctRegex!(`^[0-7]$`); static numeric = ctRegex!(`[ 0-9,.-]+`); @@ -92,7 +90,6 @@ static numeric_col = ctRegex!(`^[ 0-9,.%$Ā£ā #+BEGIN_SRC d /+ comments +/ static comment = ctRegex!(`^%+ `); -static comments = ctRegex!(`^%+ |^%+$`); #+END_SRC ** config @@ -100,8 +97,6 @@ static comments = ctRegex!(`^%+ |^%+$`); #+name: meta_rgx #+BEGIN_SRC d /+ header +/ -static make_simple_substitutions_rb = ctRegex!(`(?P<substitution>/(?P<match>.+?)/,[ ]*['"](?P<replace>.+?)['"])`); -static make_simple_substitutions_d = ctRegex!(`(?P<substitution>` ~ '`' ~ `(?P<match>.+?)` ~ '`' ~ `,[ ]*['"](?P<replace>.+?)['"])`); #+END_SRC ** native headers @@ -133,9 +128,6 @@ static heading_extract_unnamed_anchor_tag = ctRegex!(`^:?[A-D1-4][~] static heading_marker_missing_tag = ctRegex!(`^:?([A-D1-4])[~] `); static heading_anchor_tag_plus_colon = ctRegex!(`^:?([A-D1-4][~])([a-z0-9_.:-]+) `,"i"); static heading_marker_tag_has_colon = ctRegex!(`([:])`); -static heading_title = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`); -static heading_all = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`); -static heading_backmatter = ctRegex!(`^:?1[~][!](glossary|bibliography|biblio|blurb)\s+`,"i"); static heading_biblio = ctRegex!(`^:?(1)[~][!](biblio(?:graphy)?|references?)`); static heading_glossary = ctRegex!(`^:?(1)[~][!](glossary)`); static heading_blurb = ctRegex!(`^:?(1)[~][!](blurb)`); @@ -165,7 +157,6 @@ static block_poem_open = ctRegex!("^((poem(?:[(][ #+name: meta_rgx #+BEGIN_SRC d /+ blocked markup tics +/ -static block_tic_open = ctRegex!("^`{3} (code(?:[.][a-z][0-9a-z#+_]+)?|(?:poem|group|block|quote)(?:[.][a-z][0-9a-z_]+)?|table)"); static block_tic_code_open = ctRegex!("^`{3} code(?:[.](?P<syntax>[a-z][0-9a-z#+_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?"); static block_tic_poem_open = ctRegex!("^`{3} poem(?:[.](?P<lang>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?"); static block_tic_group_open = ctRegex!("^`{3} group(?:[.](?P<lang>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?"); @@ -180,7 +171,6 @@ static block_tic_close = ctRegex!("^(`{3})$","m") #+name: meta_rgx #+BEGIN_SRC d /+ blocked markup curly +/ -static block_curly_open = ctRegex!(`^((?:code([.][a-z][0-9a-z#+_]+)?|(?:poem|group|block|quote)(?:[.][a-z][0-9a-z_]+)?|table)(?:[(][ a-zA-Z0-9;:,]*[)])?[{][ ]*$)`); static block_curly_code_open = ctRegex!(`^(?:code(?:[.](?P<syntax>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`); static block_curly_code_close = ctRegex!(`^([}]code)`); static block_curly_poem_open = ctRegex!(`^(poem(?:[.](?P<lang>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`); @@ -225,8 +215,6 @@ static table_col_separator_nl = ctRegex!(`[ā]$`, "mg") /+ inline markup footnotes endnotes +/ static inline_notes_curly_gen = ctRegex!(`~\{.+?\}~`, "m"); static inline_notes_curly = ctRegex!(`~\{\s*(.+?)\}~`, "mg"); -static inline_curly_delimiter_open_and_close_regular = ctRegex!(`~\{\s*|\s*\}~`, "m"); -static inline_notes_delimiter_curly_regular = ctRegex!(`~\{[ ]*(.+?)\}~`, "m"); static inline_notes_curly_sp = ctRegex!(`~\{[*+]+\s+(.+?)\}~`, "m"); static inline_notes_curly_sp_asterisk = ctRegex!(`~\{[*]+\s+(.+?)\}~`, "m"); static inline_notes_curly_sp_plus = ctRegex!(`~\{[+]+\s+(.+?)\}~`, "m"); @@ -235,12 +223,6 @@ static inline_notes_square = ctRegex!(`~\[\s*(.+?)\]~ static inline_text_and_note_square_sp = ctRegex!(`(.+?)~\[[*+]+\s+(.+?)\]~`, "mg"); static inline_text_and_note_square = ctRegex!(`(.+?)~\[\s*(.+?)\]~`, "mg"); static inline_note_square_delimiters = ctRegex!(`(~\[\s*)(.+?)(\]~)`, "mg"); -static inline_curly_delimiter_open_regular = ctRegex!(`~\{\s*`, "m"); -static inline_curly_delimiter_open_symbol_star = ctRegex!(`~\{[*]\s`, "m"); -static inline_curly_delimiter_open_symbol_plus = ctRegex!(`~\{[+]\s`, "m"); -static inline_curly_delimiter_open_star_or_plus = ctRegex!(`~\{[+*]`, "m"); -static inline_curly_delimiter_close_regular = ctRegex!(`\s*\}~`, "m"); -static inline_text_and_note_curly = ctRegex!(`(?P<text>.+?)(?:(?:[~])[{][*+ ]*)(?P<note>.+?)(?:[}][~])`, "mg"); static note_ref = ctRegex!(`^\S+?noteref_(?P<ref>[0-9]+)`, "mg"); // {^{73.}^}#noteref_73 #+END_SRC @@ -267,13 +249,6 @@ static smid_image_generic = ctRegex!(`(?:^|[ ]|[^\S static smid_image_with_dimensions = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[{ā„](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))\s+(?P<width>\d+)x(?P<height>\d+)\s*(?P<post>(?:.*?)\s*[}ā](?:image|ā¤.*?ā|(?:(?:https?|git):\/\/|Ā¤?\.\.\/|Ā¤?\.\/|Ā¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg"); static smid_mod_image_without_dimensions = ctRegex!(`[{ā„](?:~\^\s+|\s*)ā¼\S+\.(?:png|gif|jpg),w0h0.*[}ā](?:image|ā¤.*?ā|(?:https?|git):\/\/\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg"); static smid_a_image = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[{](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))(?P<post>(?:.*?)\s*[}](?:image|(?:(?:https?|git):\/\/|Ā¤?\.\.\/|Ā¤?\.\/|Ā¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg"); -static smid_a_image_generic = ctRegex!(`(?:^|[ ]|[^\S]?)[{](?:~\^\s+|\s*)\S+\.(?:png|gif|jpg).*?[}](?:image|(?:(?:https?|git):\/\/|Ā¤?\.\.\/|Ā¤?\.\/|Ā¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg"); -static smid_a_image_with_dimensions = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[{](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))\s+(?P<width>\d+)x(?P<height>\d+)\s*(?P<post>(?:.*?)\s*[}](?:image|(?:(?:https?|git):\/\/|Ā¤?\.\.\/|Ā¤?\.\/|Ā¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg"); -static smid_a_mod_image_without_dimensions = ctRegex!(`[{](?:~\^\s+|\s*)ā¼\S+\.(?:png|gif|jpg),w0h0.*[}](?:image|(?:https?|git):\/\/\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg"); -static smid_b_image = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[ā„](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))(?P<post>(?:.*?)\s*[ā](?:ā¤.*?ā|(?:(?:https?|git):\/\/|Ā¤?\.\.\/|Ā¤?\.\/|Ā¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg"); -static smid_b_image_generic = ctRegex!(`(?:^|[ ]|[^\S]?)[ā„](?:~\^\s+|\s*)\S+\.(?:png|gif|jpg).*?[ā](?:ā¤.*?ā|(?:(?:https?|git):\/\/|Ā¤?\.\.\/|Ā¤?\.\/|Ā¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg"); -static smid_b_image_with_dimensions = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[ā„](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))\s+(?P<width>\d+)x(?P<height>\d+)\s*(?P<post>(?:.*?)\s*[ā](?:ā¤.*?ā|(?:(?:https?|git):\/\/|Ā¤?\.\.\/|Ā¤?\.\/|Ā¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg"); -static smid_b_mod_image_without_dimensions = ctRegex!(`[ā„](?:~\^\s+|\s*)ā¼\S+\.(?:png|gif|jpg),w0h0.*[ā](?:ā¤.*?ā|(?:https?|git):\/\/\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg"); static smid_image_delimit = ctRegex!(`(?P<pre>^|[ ]|[^\S]?)\{\s*(?P<text>.+?)\s*\}(?:image)(?=[;:!,?.]?([ )\]]|$))`, "mg"); #+END_SRC @@ -338,21 +313,7 @@ static skip_from_regular_parse = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$` #+name: meta_rgx #+BEGIN_SRC d /+ line & page breaks +/ -static break_line_within_object = ctRegex!(`[\\]{2}( |$)`); -static break_page = ctRegex!(`^-[\\]{2}-$`); -static break_page_new = ctRegex!(`^=[\\]{2}=$`); -static break_page_line_across = ctRegex!(`^=[.]{2}=$`); static break_string = ctRegex!(`ć`); -static parent = ctRegex!(`([0-7]):([0-9]+)`); -static header_regex_content = ctRegex!(`([0-7]):([0-9]+)`); -#+END_SRC - -** json :json: - -#+name: meta_rgx -#+BEGIN_SRC d -/+ json +/ -static tailing_comma = ctRegex!(`,$`, "m"); #+END_SRC ** biblio tags :biblio:tags: @@ -390,8 +351,6 @@ static topic_register_multiple_sub_terms_split = ctRegex!(`ā£([^|ā£]+(? #+name: meta_rgx #+BEGIN_SRC d /+ language codes +/ -auto language_codes = - ctRegex!("(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)"); auto language_code_and_filename = ctRegex!("(?:^|[/])(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)/[A-Za-z0-9._-].+?[.](?:sst|ssm)$"); #+END_SRC @@ -436,8 +395,6 @@ static xhtml_line_break = ctRegex!(` [\\]{2}`, "m"); / #+name: sp_ch_xhtml_rgx #+BEGIN_SRC d -static latex_special_char_shortlist = ctRegex!(`([%$_#&\\])`); -static latex_special_char_curlybraces = ctRegex!(`([{}])`); static latex_special_char = ctRegex!(`([%${}_#&\\])`); static latex_special_char_for_escape = ctRegex!(`([%${}_#\\])`); static latex_special_char_for_escape_and_braces = ctRegex!(`([&])`); @@ -456,17 +413,10 @@ static latex_clean_bookindex_linebreak = ctRegex!(`\s*\\\\\\\\\s*`, " #+name: prgmkup_rgx #+BEGIN_SRC d static newline = ctRegex!("\n", "mg"); -static strip_br = ctRegex!("^<br>\n|<br>\n*$"); static space = ctRegex!(`[ ]`, "mg"); static spaces_keep = ctRegex!(`(?P<keep_spaces>^[ ]+|[ ]{2,})`, "mg"); // code, verse, block static spaces_line_start = ctRegex!(`^(?P<opening_spaces>[ ]+)`, "mg"); -static spaces_multiple = ctRegex!(`(?P<multiple_spaces>[ ]{2,})`, "mg"); -static two_spaces = ctRegex!(`[ ]{2}`, "mg"); static nbsp_char = ctRegex!(`ā`, "mg"); -static nbsp_chars_line_start = ctRegex!(`^ā+`, "mg"); -static nbsp_and_space = ctRegex!(` [ ]`, "mg"); -static nbsp_char_and_space = ctRegex!(`ā[ ]`, "mg"); -static special_markup_chars = ctRegex!(`[ććććā„āā¤āĀ¤āāāāā¼āæāāāāāā ]`, "mg"); #+END_SRC ** filename (& path) (including insert file) :insert:file:path:filename: @@ -477,20 +427,14 @@ static src_pth_sst_or_ssm = ctRegex!(`^(?P<path>[/]? static src_pth_pod_sst_or_ssm = ctRegex!(`^(?P<podpath>[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*?[.]ss[tm])$`); static src_pth_contents = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*)/pod[.]manifest$`); static src_pth_zip = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]zip)$`); -static src_pth_unzip_pod = ctRegex!(`^(?P<path>media/text/[a-z]{2}/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`); static src_pth_types = ctRegex!(`^(?P<path>[/]?[a-zA-Z0-9._-]+/)*(?P<gotfile>(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])|(?P<filelist>[a-zA-Z0-9._-]+/pod[.]manifest)|(?P<filezip>[a-zA-Z0-9._-]+[.]zip))$`); -static pod_content_location = - ctRegex!(`^(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])(?P<languages>(?:\s+[a-z]{2}(?:,|$))+)`, "mg"); static src_fn = ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`); static src_fn_master = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssm)$`); -static src_fn_text = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]sst)$`); -static src_fn_insert = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssi)$`); static src_fn_find_inserts = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`); static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[ti])$`); static src_base_parent_dir_name = ctRegex!(`[/](?P<dir>(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure -static src_base_parent_path = ctRegex!(`(?P<dir>(?:[/a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure static src_formalised_file_path_parts = ctRegex!(`(?P<pth>(?:[/a-zA-Z0-9._-]+?)(?P<dir>[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure #+END_SRC @@ -501,17 +445,10 @@ static src_formalised_file_path_parts = ctRegex!(`(?P<pth>(?:[/a #+name: prgmkup_rgx #+BEGIN_SRC d /+ line breaks +/ -static empty_line = ctRegex!(`^\s*$`); -static empty_block = ctRegex!(`^\s*$`, "mg"); -static br_line_natural = ctRegex!(`\n`, "mg"); static br_empty_line = ctRegex!(`\n[ ]*\n`, "mg"); static br_newlines_linebreaks = ctRegex!(`[\nāā]`, "mg"); static br_line = ctRegex!(`ā`, "mg"); static br_nl = ctRegex!(`ā`, "mg"); -static br_paragraph = ctRegex!(`ā`, "mg"); -static br_page_line = ctRegex!(`ā¼`, "mg"); -static br_page = ctRegex!(`āæ`, "mg"); -static br_page_new = ctRegex!(`ā`, "mg"); #+END_SRC *** inline (internal program) markup footnotes endnotes :inline:footnote: @@ -531,11 +468,6 @@ static inline_notes_al_special_char_note = ctRegex!(`ć(?P<char>(? static inline_al_delimiter_open_regular = ctRegex!(`ć\s`, "m"); static inline_al_delimiter_open_symbol_star = ctRegex!(`ć[*]\s`, "m"); static inline_al_delimiter_open_symbol_plus = ctRegex!(`ć[+]\s`, "m"); -static inline_al_delimiter_close_regular = ctRegex!(`ć`, "m"); -static inline_al_delimiter_open_and_close_regular = ctRegex!(`ć|ć`, "m"); -static inline_al_delimiter_open_asterisk = ctRegex!(`ć\*`, "m"); -static inline_al_delimiter_open_plus = ctRegex!(`ć\+`, "m"); -static inline_text_and_note_al = ctRegex!(`(?P<text>.+?)ć(?:[*+ ]*)(?P<note>.+?)ć`, "mg"); static inline_text_and_note_al_ = ctRegex!(`(.+?(?:ć[*+]*\s+.+?ć|$))`, "mg"); #+END_SRC @@ -548,7 +480,6 @@ static inline_image = ctRegex!(`(?P<pre>ā„)ā static inline_image_without_dimensions = ctRegex!(`(?P<pre>ā„)ā¼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?āā¤.*?ā)`, "mg"); static inline_image_info = ctRegex!(`ā¼?(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+)`, "mg"); static inline_link_anchor = ctRegex!(`ā(?P<anchor>\S+?)ā`, "mg"); // TODO *~text_link_anchor -static inline_link_ = ctRegex!(`ā„(?P<text>.+?)āā¤(?P<link>.+?)ā`, "mg"); static inline_link = ctRegex!(`ā„(?P<text>.+?)āā¤(?P<link>#?(\S+?))ā`, "mg"); static inline_link_empty = ctRegex!(`ā„(?P<text>.+?)āā¤ā`, "mg"); static inline_link_number = ctRegex!(`ā„(?P<text>.+?)āā¤(?P<num>[0-9]+)ā`, "mg"); // not used @@ -557,17 +488,14 @@ static inline_link_stow_uri = ctRegex!(`ā„(?P<text>.+ static inline_link_hash = ctRegex!(`ā„(?P<text>.+?)āā¤(?P<link>#(?P<segname>\S+?))ā`, "mg"); static inline_link_clean = ctRegex!(`ā¤(?:.+?)ā|[ā„ā]`, "mg"); static inline_link_toc_to_backmatter = ctRegex!(`ā¤#(?P<link>endnotes|bibliography|bookindex|glossary|blurb)ā`, "mg"); -static inline_a_url = ctRegex!(`(ā¤)([^\sā„āā¤ā]+)(ā)`, "mg"); static url = ctRegex!(`https?://`, "mg"); static uri = ctRegex!(`(?:https?|git)://`, "mg"); static uri_identify_components = ctRegex!(`(?P<type>(?:https?|git)://)(?P<path>\S+?/)(?P<file>[^/]+)$`, "mg"); static inline_link_subtoc = ctRegex!(`^(?P<level>[5-7])~ ā„(?P<text>.+?)āā¤(?P<link>.+?)ā`, "mg"); -static fn_suffix = ctRegex!(`\.fnSuffix`, "mg"); static inline_link_fn_suffix = ctRegex!(`Ā¤(.+?)(\.fnSuffix)`, "mg"); static inline_seg_link = ctRegex!(`(Ā¤)(?:.+?)\.fnSuffix`, "mg"); static mark_internal_site_lnk = ctRegex!(`Ā¤`, "mg"); static quotation_mark_sql_insert_delimiter = ctRegex!("[']", "mg"); -static quotation_mark_various = ctRegex!(q"ā['āāāā"`Ā“ĀØ]ā", "mg"); #+END_SRC *** inline markup font face mod :inline:font:face: @@ -575,8 +503,6 @@ static quotation_mark_various = ctRegex!(q"ā['āāā #+name: prgmkup_rgx #+BEGIN_SRC d /+ inline markup font face mod +/ -static inline_mark_faces = ctRegex!(`(?P<markup>(?P<mod>[*!/_^,+#"-])\{(?P<text>.+?)\}[*!/_^,+#"-])`, "mg"); -static inline_mark_faces_to_mod = ctRegex!(`(?P<mod>[*!/_^,+#"-])\{(?P<text>.+?)\}([*!/_^,+#"-])`, "mg"); static inline_mark_emphasis = ctRegex!(`(?P<mark>[*])\{(?P<text>.+?)\}[*]`, "mg"); static inline_mark_bold = ctRegex!(`(?P<mark>[!])\{(?P<text>.+?)\}[!]`, "mg"); static inline_mark_underscore = ctRegex!(`(?P<mark>[_])\{(?P<text>.+?)\}[_]`, "mg"); @@ -587,7 +513,6 @@ static inline_mark_strike = ctRegex!(`(?P<mark>[-])\{( static inline_mark_insert = ctRegex!(`(?P<mark>[+])\{(?P<text>.+?)\}[+]`, "mg"); static inline_mark_mono = ctRegex!(`(?P<mark>[#])\{(?P<text>.+?)\}[#]`, "mg"); static inline_mark_cite = ctRegex!(`(?P<mark>["])\{(?P<text>.+?)\}["]`, "mg"); -static inline_mark_fontface_clean = ctRegex!(`[*!_/^,+#ā "-]\{|\}[*!_/^,+#ā "-]`, "mg"); #+END_SRC #+name: prgmkup_rgx @@ -597,13 +522,11 @@ static inline_emphasis_line = ctRegex!(`^\*_ (?P<text> static inline_bold_line = ctRegex!(`^!_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`); static inline_italics_line = ctRegex!(`^/_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`); static inline_underscore_line = ctRegex!(`^__ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`); -static no_header_rgx = ctRegex!(`^=NULL$`); #+END_SRC #+name: prgmkup_rgx #+BEGIN_SRC d /+ inline markup font face mod +/ -static inline_faces = ctRegex!(`(?P<markup>(?P<mod>[*!_^,+ā ā-])āØ(?P<text>.+?)ā£[*!_^,+ā ā-])`, "mg"); static inline_emphasis = ctRegex!(`[*]āØ(?P<text>.+?)ā£[*]`, "mg"); static inline_bold = ctRegex!(`[!]āØ(?P<text>.+?)ā£[!]`, "mg"); static inline_underscore = ctRegex!(`[_]āØ(?P<text>.+?)ā£[_]`, "mg"); diff --git a/src/doc_reform/meta/rgx.d b/src/doc_reform/meta/rgx.d index 11f4aa5..a7826be 100644 --- a/src/doc_reform/meta/rgx.d +++ b/src/doc_reform/meta/rgx.d @@ -6,10 +6,9 @@ static template DocReformRgxInit() { import doc_reform.meta.defaults; static struct Rgx { /+ misc +/ - static true_dollar = ctRegex!(`\$`, "gm"); + // static true_dollar = ctRegex!(`\$`, "gm"); static sep = ctRegex!(`ā£`, "gm"); static flag_action = ctRegex!(`^(--[a-z][a-z0-9-]+)$`); - static flag_action_str = ctRegex!(` (--[a-z][a-z0-9-]+)`); static within_quotes = ctRegex!(`"(.+?)"`, "m"); static yaml_tag_is_str = ctRegex!(`:str$`); static yaml_tag_is_int = ctRegex!(`:int$`); @@ -34,17 +33,13 @@ static template DocReformRgxInit() { static table_row_delimiter = ctRegex!("\n[ ]*\n+", "mg"); static table_row_delimiter_special = ctRegex!("[ ]*\n", "mg"); static table_col_delimiter_special = ctRegex!("[ ]*[|][ ]*", "mg"); - static levels_markup = ctRegex!(`^[A-D1-4]$`); static levels_numbered = ctRegex!(`^[0-9]$`); static levels_numbered_headings = ctRegex!(`^[0-7]$`); static numeric = ctRegex!(`[ 0-9,.-]+`); static numeric_col = ctRegex!(`^[ 0-9,.%$Ā£ā¤Šā¬ā¬Ā„()-]+$`); /+ comments +/ static comment = ctRegex!(`^%+ `); - static comments = ctRegex!(`^%+ |^%+$`); /+ header +/ - static make_simple_substitutions_rb = ctRegex!(`(?P<substitution>/(?P<match>.+?)/,[ ]*['"](?P<replace>.+?)['"])`); - static make_simple_substitutions_d = ctRegex!(`(?P<substitution>` ~ '`' ~ `(?P<match>.+?)` ~ '`' ~ `,[ ]*['"](?P<replace>.+?)['"])`); /+ header +/ static variable_doc_title = ctRegex!(`@title`); static variable_doc_author = ctRegex!(`@author|@creator`); @@ -63,9 +58,6 @@ static template DocReformRgxInit() { static heading_marker_missing_tag = ctRegex!(`^:?([A-D1-4])[~] `); static heading_anchor_tag_plus_colon = ctRegex!(`^:?([A-D1-4][~])([a-z0-9_.:-]+) `,"i"); static heading_marker_tag_has_colon = ctRegex!(`([:])`); - static heading_title = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`); - static heading_all = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`); - static heading_backmatter = ctRegex!(`^:?1[~][!](glossary|bibliography|biblio|blurb)\s+`,"i"); static heading_biblio = ctRegex!(`^:?(1)[~][!](biblio(?:graphy)?|references?)`); static heading_glossary = ctRegex!(`^:?(1)[~][!](glossary)`); static heading_blurb = ctRegex!(`^:?(1)[~][!](blurb)`); @@ -82,7 +74,6 @@ static template DocReformRgxInit() { static block_open = ctRegex!("^((code(?:[.][a-z][0-9a-z#+_]+)?|(?:poem|group|block|quote)(?:[.][a-z][0-9a-z_]+)?|table)(?:[(][ a-zA-Z0-9;:,]*[)])?[{][ ]*$)|^`{3} (code(?:[.][a-z][0-9a-z#+_]+)?|(?:poem|group|block|quote)(?:[.][a-z][0-9a-z_]+)?|table)(?:[(][ a-zA-Z0-9;:,]*[)])?|^[{]table[(](?:h;)?(?P<columns>(?:[ ,]+[0-9]+)+)[)][}]"); static block_poem_open = ctRegex!("^((poem(?:[(][ a-zA-Z0-9;:,]*[)])?[{][ ]*$)|`{3} poem(?:[(][ a-zA-Z0-9;:,]*[)])?)"); /+ blocked markup tics +/ - static block_tic_open = ctRegex!("^`{3} (code(?:[.][a-z][0-9a-z#+_]+)?|(?:poem|group|block|quote)(?:[.][a-z][0-9a-z_]+)?|table)"); static block_tic_code_open = ctRegex!("^`{3} code(?:[.](?P<syntax>[a-z][0-9a-z#+_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?"); static block_tic_poem_open = ctRegex!("^`{3} poem(?:[.](?P<lang>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?"); static block_tic_group_open = ctRegex!("^`{3} group(?:[.](?P<lang>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?"); @@ -91,7 +82,6 @@ static template DocReformRgxInit() { static block_tic_table_open = ctRegex!("^`{3} table(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?"); // ctRegex!("^`{3} table(?:\(.*?\))?"); static block_tic_close = ctRegex!("^(`{3})$","m"); /+ blocked markup curly +/ - static block_curly_open = ctRegex!(`^((?:code([.][a-z][0-9a-z#+_]+)?|(?:poem|group|block|quote)(?:[.][a-z][0-9a-z_]+)?|table)(?:[(][ a-zA-Z0-9;:,]*[)])?[{][ ]*$)`); static block_curly_code_open = ctRegex!(`^(?:code(?:[.](?P<syntax>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`); static block_curly_code_close = ctRegex!(`^([}]code)`); static block_curly_poem_open = ctRegex!(`^(poem(?:[.](?P<lang>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`); @@ -116,8 +106,6 @@ static template DocReformRgxInit() { /+ inline markup footnotes endnotes +/ static inline_notes_curly_gen = ctRegex!(`~\{.+?\}~`, "m"); static inline_notes_curly = ctRegex!(`~\{\s*(.+?)\}~`, "mg"); - static inline_curly_delimiter_open_and_close_regular = ctRegex!(`~\{\s*|\s*\}~`, "m"); - static inline_notes_delimiter_curly_regular = ctRegex!(`~\{[ ]*(.+?)\}~`, "m"); static inline_notes_curly_sp = ctRegex!(`~\{[*+]+\s+(.+?)\}~`, "m"); static inline_notes_curly_sp_asterisk = ctRegex!(`~\{[*]+\s+(.+?)\}~`, "m"); static inline_notes_curly_sp_plus = ctRegex!(`~\{[+]+\s+(.+?)\}~`, "m"); @@ -126,12 +114,6 @@ static template DocReformRgxInit() { static inline_text_and_note_square_sp = ctRegex!(`(.+?)~\[[*+]+\s+(.+?)\]~`, "mg"); static inline_text_and_note_square = ctRegex!(`(.+?)~\[\s*(.+?)\]~`, "mg"); static inline_note_square_delimiters = ctRegex!(`(~\[\s*)(.+?)(\]~)`, "mg"); - static inline_curly_delimiter_open_regular = ctRegex!(`~\{\s*`, "m"); - static inline_curly_delimiter_open_symbol_star = ctRegex!(`~\{[*]\s`, "m"); - static inline_curly_delimiter_open_symbol_plus = ctRegex!(`~\{[+]\s`, "m"); - static inline_curly_delimiter_open_star_or_plus = ctRegex!(`~\{[+*]`, "m"); - static inline_curly_delimiter_close_regular = ctRegex!(`\s*\}~`, "m"); - static inline_text_and_note_curly = ctRegex!(`(?P<text>.+?)(?:(?:[~])[{][*+ ]*)(?P<note>.+?)(?:[}][~])`, "mg"); static note_ref = ctRegex!(`^\S+?noteref_(?P<ref>[0-9]+)`, "mg"); // {^{73.}^}#noteref_73 static webserv_url_doc_root = ctRegex!(`(?P<url>(?P<domain>https?:\/\/[^ /]+)\/(?P<path>\S*))`, "mg"); static smid_inline_url_generic = ctRegex!(`(?:^|[}(\[ ])(?:(?:https?|git):\/\/|Ā¤?\.\.\/|Ā¤?\.\/|Ā¤|#)[a-zA-Z0-9_#]`, "mg"); @@ -146,13 +128,6 @@ static template DocReformRgxInit() { static smid_image_with_dimensions = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[{ā„](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))\s+(?P<width>\d+)x(?P<height>\d+)\s*(?P<post>(?:.*?)\s*[}ā](?:image|ā¤.*?ā|(?:(?:https?|git):\/\/|Ā¤?\.\.\/|Ā¤?\.\/|Ā¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg"); static smid_mod_image_without_dimensions = ctRegex!(`[{ā„](?:~\^\s+|\s*)ā¼\S+\.(?:png|gif|jpg),w0h0.*[}ā](?:image|ā¤.*?ā|(?:https?|git):\/\/\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg"); static smid_a_image = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[{](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))(?P<post>(?:.*?)\s*[}](?:image|(?:(?:https?|git):\/\/|Ā¤?\.\.\/|Ā¤?\.\/|Ā¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg"); - static smid_a_image_generic = ctRegex!(`(?:^|[ ]|[^\S]?)[{](?:~\^\s+|\s*)\S+\.(?:png|gif|jpg).*?[}](?:image|(?:(?:https?|git):\/\/|Ā¤?\.\.\/|Ā¤?\.\/|Ā¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg"); - static smid_a_image_with_dimensions = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[{](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))\s+(?P<width>\d+)x(?P<height>\d+)\s*(?P<post>(?:.*?)\s*[}](?:image|(?:(?:https?|git):\/\/|Ā¤?\.\.\/|Ā¤?\.\/|Ā¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg"); - static smid_a_mod_image_without_dimensions = ctRegex!(`[{](?:~\^\s+|\s*)ā¼\S+\.(?:png|gif|jpg),w0h0.*[}](?:image|(?:https?|git):\/\/\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg"); - static smid_b_image = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[ā„](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))(?P<post>(?:.*?)\s*[ā](?:ā¤.*?ā|(?:(?:https?|git):\/\/|Ā¤?\.\.\/|Ā¤?\.\/|Ā¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg"); - static smid_b_image_generic = ctRegex!(`(?:^|[ ]|[^\S]?)[ā„](?:~\^\s+|\s*)\S+\.(?:png|gif|jpg).*?[ā](?:ā¤.*?ā|(?:(?:https?|git):\/\/|Ā¤?\.\.\/|Ā¤?\.\/|Ā¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg"); - static smid_b_image_with_dimensions = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[ā„](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))\s+(?P<width>\d+)x(?P<height>\d+)\s*(?P<post>(?:.*?)\s*[ā](?:ā¤.*?ā|(?:(?:https?|git):\/\/|Ā¤?\.\.\/|Ā¤?\.\/|Ā¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg"); - static smid_b_mod_image_without_dimensions = ctRegex!(`[ā„](?:~\^\s+|\s*)ā¼\S+\.(?:png|gif|jpg),w0h0.*[ā](?:ā¤.*?ā|(?:https?|git):\/\/\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg"); static smid_image_delimit = ctRegex!(`(?P<pre>^|[ ]|[^\S]?)\{\s*(?P<text>.+?)\s*\}(?:image)(?=[;:!,?.]?([ )\]]|$))`, "mg"); /+ inline markup book index +/ static book_index = ctRegex!(`^=\{\s*(?P<bookindex>.+?)\}$`, "m"); @@ -180,15 +155,7 @@ static template DocReformRgxInit() { /+ ignore outside code blocks +/ static skip_from_regular_parse = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); /+ line & page breaks +/ - static break_line_within_object = ctRegex!(`[\\]{2}( |$)`); - static break_page = ctRegex!(`^-[\\]{2}-$`); - static break_page_new = ctRegex!(`^=[\\]{2}=$`); - static break_page_line_across = ctRegex!(`^=[.]{2}=$`); static break_string = ctRegex!(`ć`); - static parent = ctRegex!(`([0-7]):([0-9]+)`); - static header_regex_content = ctRegex!(`([0-7]):([0-9]+)`); - /+ json +/ - static tailing_comma = ctRegex!(`,$`, "m"); /+ biblio tags +/ static biblio_tags = ctRegex!(`^(is|au|author_raw|author|author_arr|editor_raw|ed|editor_arr|ti|title|subtitle|fulltitle|lng|language|trans|src|jo|journal|in|vol|volume|edn|edition|yr|year|pl|place|pb|pub|publisher|url|pg|pages|note|short_name|id):\s+(.+)`); static biblio_abbreviations = ctRegex!(`^(au|ed|ti|lng|jo|vol|edn|yr|pl|pb|pub|pg|pgs|sn)$`); @@ -202,53 +169,31 @@ static template DocReformRgxInit() { static topic_register_sub_terms_split = ctRegex!(`\s*\|\s*`); static topic_register_multiple_sub_terms_split = ctRegex!(`ā£([^|ā£]+(?:\|[^|ā£]+)+)`); /+ language codes +/ - auto language_codes = - ctRegex!("(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)"); auto language_code_and_filename = ctRegex!("(?:^|[/])(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)/[A-Za-z0-9._-].+?[.](?:sst|ssm)$"); static newline = ctRegex!("\n", "mg"); - static strip_br = ctRegex!("^<br>\n|<br>\n*$"); static space = ctRegex!(`[ ]`, "mg"); static spaces_keep = ctRegex!(`(?P<keep_spaces>^[ ]+|[ ]{2,})`, "mg"); // code, verse, block static spaces_line_start = ctRegex!(`^(?P<opening_spaces>[ ]+)`, "mg"); - static spaces_multiple = ctRegex!(`(?P<multiple_spaces>[ ]{2,})`, "mg"); - static two_spaces = ctRegex!(`[ ]{2}`, "mg"); static nbsp_char = ctRegex!(`ā`, "mg"); - static nbsp_chars_line_start = ctRegex!(`^ā+`, "mg"); - static nbsp_and_space = ctRegex!(` [ ]`, "mg"); - static nbsp_char_and_space = ctRegex!(`ā[ ]`, "mg"); - static special_markup_chars = ctRegex!(`[ććććā„āā¤āĀ¤āāāāā¼āæāāāāāā ]`, "mg"); static src_pth_sst_or_ssm = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.](?P<extension>ss[tm]))$`); static src_pth_pod_sst_or_ssm = ctRegex!(`^(?P<podpath>[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*?[.]ss[tm])$`); static src_pth_contents = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*)/pod[.]manifest$`); static src_pth_zip = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]zip)$`); - static src_pth_unzip_pod = ctRegex!(`^(?P<path>media/text/[a-z]{2}/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`); static src_pth_types = ctRegex!(`^(?P<path>[/]?[a-zA-Z0-9._-]+/)*(?P<gotfile>(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])|(?P<filelist>[a-zA-Z0-9._-]+/pod[.]manifest)|(?P<filezip>[a-zA-Z0-9._-]+[.]zip))$`); - static pod_content_location = - ctRegex!(`^(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])(?P<languages>(?:\s+[a-z]{2}(?:,|$))+)`, "mg"); static src_fn = ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`); static src_fn_master = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssm)$`); - static src_fn_text = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]sst)$`); - static src_fn_insert = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssi)$`); static src_fn_find_inserts = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`); static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[ti])$`); static src_base_parent_dir_name = ctRegex!(`[/](?P<dir>(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure - static src_base_parent_path = ctRegex!(`(?P<dir>(?:[/a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure static src_formalised_file_path_parts = ctRegex!(`(?P<pth>(?:[/a-zA-Z0-9._-]+?)(?P<dir>[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure /+ line breaks +/ - static empty_line = ctRegex!(`^\s*$`); - static empty_block = ctRegex!(`^\s*$`, "mg"); - static br_line_natural = ctRegex!(`\n`, "mg"); static br_empty_line = ctRegex!(`\n[ ]*\n`, "mg"); static br_newlines_linebreaks = ctRegex!(`[\nāā]`, "mg"); static br_line = ctRegex!(`ā`, "mg"); static br_nl = ctRegex!(`ā`, "mg"); - static br_paragraph = ctRegex!(`ā`, "mg"); - static br_page_line = ctRegex!(`ā¼`, "mg"); - static br_page = ctRegex!(`āæ`, "mg"); - static br_page_new = ctRegex!(`ā`, "mg"); /+ inline markup footnotes endnotes +/ static inline_notes_al = ctRegex!(`ć(?:[*+]\s+|\s*)(.+?)ć`, "mg"); static inline_notes_al_special = ctRegex!(`ć(?:[*+]\s+)(.+?)ć`, "mg"); // TODO remove match when special footnotes are implemented @@ -262,18 +207,12 @@ static template DocReformRgxInit() { static inline_al_delimiter_open_regular = ctRegex!(`ć\s`, "m"); static inline_al_delimiter_open_symbol_star = ctRegex!(`ć[*]\s`, "m"); static inline_al_delimiter_open_symbol_plus = ctRegex!(`ć[+]\s`, "m"); - static inline_al_delimiter_close_regular = ctRegex!(`ć`, "m"); - static inline_al_delimiter_open_and_close_regular = ctRegex!(`ć|ć`, "m"); - static inline_al_delimiter_open_asterisk = ctRegex!(`ć\*`, "m"); - static inline_al_delimiter_open_plus = ctRegex!(`ć\+`, "m"); - static inline_text_and_note_al = ctRegex!(`(?P<text>.+?)ć(?:[*+ ]*)(?P<note>.+?)ć`, "mg"); static inline_text_and_note_al_ = ctRegex!(`(.+?(?:ć[*+]*\s+.+?ć|$))`, "mg"); /+ inline markup links +/ static inline_image = ctRegex!(`(?P<pre>ā„)ā¼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+))\s*(?P<post>.*?āā¤.*?ā)`, "mg"); static inline_image_without_dimensions = ctRegex!(`(?P<pre>ā„)ā¼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?āā¤.*?ā)`, "mg"); static inline_image_info = ctRegex!(`ā¼?(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+)`, "mg"); static inline_link_anchor = ctRegex!(`ā(?P<anchor>\S+?)ā`, "mg"); // TODO *~text_link_anchor - static inline_link_ = ctRegex!(`ā„(?P<text>.+?)āā¤(?P<link>.+?)ā`, "mg"); static inline_link = ctRegex!(`ā„(?P<text>.+?)āā¤(?P<link>#?(\S+?))ā`, "mg"); static inline_link_empty = ctRegex!(`ā„(?P<text>.+?)āā¤ā`, "mg"); static inline_link_number = ctRegex!(`ā„(?P<text>.+?)āā¤(?P<num>[0-9]+)ā`, "mg"); // not used @@ -282,20 +221,15 @@ static template DocReformRgxInit() { static inline_link_hash = ctRegex!(`ā„(?P<text>.+?)āā¤(?P<link>#(?P<segname>\S+?))ā`, "mg"); static inline_link_clean = ctRegex!(`ā¤(?:.+?)ā|[ā„ā]`, "mg"); static inline_link_toc_to_backmatter = ctRegex!(`ā¤#(?P<link>endnotes|bibliography|bookindex|glossary|blurb)ā`, "mg"); - static inline_a_url = ctRegex!(`(ā¤)([^\sā„āā¤ā]+)(ā)`, "mg"); static url = ctRegex!(`https?://`, "mg"); static uri = ctRegex!(`(?:https?|git)://`, "mg"); static uri_identify_components = ctRegex!(`(?P<type>(?:https?|git)://)(?P<path>\S+?/)(?P<file>[^/]+)$`, "mg"); static inline_link_subtoc = ctRegex!(`^(?P<level>[5-7])~ ā„(?P<text>.+?)āā¤(?P<link>.+?)ā`, "mg"); - static fn_suffix = ctRegex!(`\.fnSuffix`, "mg"); static inline_link_fn_suffix = ctRegex!(`Ā¤(.+?)(\.fnSuffix)`, "mg"); static inline_seg_link = ctRegex!(`(Ā¤)(?:.+?)\.fnSuffix`, "mg"); static mark_internal_site_lnk = ctRegex!(`Ā¤`, "mg"); static quotation_mark_sql_insert_delimiter = ctRegex!("[']", "mg"); - static quotation_mark_various = ctRegex!(q"ā['āāāā"`Ā“ĀØ]ā", "mg"); /+ inline markup font face mod +/ - static inline_mark_faces = ctRegex!(`(?P<markup>(?P<mod>[*!/_^,+#"-])\{(?P<text>.+?)\}[*!/_^,+#"-])`, "mg"); - static inline_mark_faces_to_mod = ctRegex!(`(?P<mod>[*!/_^,+#"-])\{(?P<text>.+?)\}([*!/_^,+#"-])`, "mg"); static inline_mark_emphasis = ctRegex!(`(?P<mark>[*])\{(?P<text>.+?)\}[*]`, "mg"); static inline_mark_bold = ctRegex!(`(?P<mark>[!])\{(?P<text>.+?)\}[!]`, "mg"); static inline_mark_underscore = ctRegex!(`(?P<mark>[_])\{(?P<text>.+?)\}[_]`, "mg"); @@ -306,15 +240,12 @@ static template DocReformRgxInit() { static inline_mark_insert = ctRegex!(`(?P<mark>[+])\{(?P<text>.+?)\}[+]`, "mg"); static inline_mark_mono = ctRegex!(`(?P<mark>[#])\{(?P<text>.+?)\}[#]`, "mg"); static inline_mark_cite = ctRegex!(`(?P<mark>["])\{(?P<text>.+?)\}["]`, "mg"); - static inline_mark_fontface_clean = ctRegex!(`[*!_/^,+#ā "-]\{|\}[*!_/^,+#ā "-]`, "mg"); static inline_faces_line = ctRegex!(`^[*!/_]_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`); static inline_emphasis_line = ctRegex!(`^\*_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`); static inline_bold_line = ctRegex!(`^!_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`); static inline_italics_line = ctRegex!(`^/_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`); static inline_underscore_line = ctRegex!(`^__ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`); - static no_header_rgx = ctRegex!(`^=NULL$`); /+ inline markup font face mod +/ - static inline_faces = ctRegex!(`(?P<markup>(?P<mod>[*!_^,+ā ā-])āØ(?P<text>.+?)ā£[*!_^,+ā ā-])`, "mg"); static inline_emphasis = ctRegex!(`[*]āØ(?P<text>.+?)ā£[*]`, "mg"); static inline_bold = ctRegex!(`[!]āØ(?P<text>.+?)ā£[!]`, "mg"); static inline_underscore = ctRegex!(`[_]āØ(?P<text>.+?)ā£[_]`, "mg"); diff --git a/src/doc_reform/output/rgx.d b/src/doc_reform/output/rgx.d index 89d5858..1c569f9 100644 --- a/src/doc_reform/output/rgx.d +++ b/src/doc_reform/output/rgx.d @@ -6,48 +6,28 @@ static template DocReformOutputRgxInit() { import doc_reform.output.defaults; static struct Rgx { static newline = ctRegex!("\n", "mg"); - static strip_br = ctRegex!("^<br>\n|<br>\n*$"); static space = ctRegex!(`[ ]`, "mg"); static spaces_keep = ctRegex!(`(?P<keep_spaces>^[ ]+|[ ]{2,})`, "mg"); // code, verse, block static spaces_line_start = ctRegex!(`^(?P<opening_spaces>[ ]+)`, "mg"); - static spaces_multiple = ctRegex!(`(?P<multiple_spaces>[ ]{2,})`, "mg"); - static two_spaces = ctRegex!(`[ ]{2}`, "mg"); static nbsp_char = ctRegex!(`ā`, "mg"); - static nbsp_chars_line_start = ctRegex!(`^ā+`, "mg"); - static nbsp_and_space = ctRegex!(` [ ]`, "mg"); - static nbsp_char_and_space = ctRegex!(`ā[ ]`, "mg"); - static special_markup_chars = ctRegex!(`[ććććā„āā¤āĀ¤āāāāā¼āæāāāāāā ]`, "mg"); static src_pth_sst_or_ssm = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.](?P<extension>ss[tm]))$`); static src_pth_pod_sst_or_ssm = ctRegex!(`^(?P<podpath>[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*?[.]ss[tm])$`); static src_pth_contents = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*)/pod[.]manifest$`); static src_pth_zip = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]zip)$`); - static src_pth_unzip_pod = ctRegex!(`^(?P<path>media/text/[a-z]{2}/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`); static src_pth_types = ctRegex!(`^(?P<path>[/]?[a-zA-Z0-9._-]+/)*(?P<gotfile>(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])|(?P<filelist>[a-zA-Z0-9._-]+/pod[.]manifest)|(?P<filezip>[a-zA-Z0-9._-]+[.]zip))$`); - static pod_content_location = - ctRegex!(`^(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])(?P<languages>(?:\s+[a-z]{2}(?:,|$))+)`, "mg"); static src_fn = ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`); static src_fn_master = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssm)$`); - static src_fn_text = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]sst)$`); - static src_fn_insert = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssi)$`); static src_fn_find_inserts = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`); static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[ti])$`); static src_base_parent_dir_name = ctRegex!(`[/](?P<dir>(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure - static src_base_parent_path = ctRegex!(`(?P<dir>(?:[/a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure static src_formalised_file_path_parts = ctRegex!(`(?P<pth>(?:[/a-zA-Z0-9._-]+?)(?P<dir>[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure /+ line breaks +/ - static empty_line = ctRegex!(`^\s*$`); - static empty_block = ctRegex!(`^\s*$`, "mg"); - static br_line_natural = ctRegex!(`\n`, "mg"); static br_empty_line = ctRegex!(`\n[ ]*\n`, "mg"); static br_newlines_linebreaks = ctRegex!(`[\nāā]`, "mg"); static br_line = ctRegex!(`ā`, "mg"); static br_nl = ctRegex!(`ā`, "mg"); - static br_paragraph = ctRegex!(`ā`, "mg"); - static br_page_line = ctRegex!(`ā¼`, "mg"); - static br_page = ctRegex!(`āæ`, "mg"); - static br_page_new = ctRegex!(`ā`, "mg"); /+ inline markup footnotes endnotes +/ static inline_notes_al = ctRegex!(`ć(?:[*+]\s+|\s*)(.+?)ć`, "mg"); static inline_notes_al_special = ctRegex!(`ć(?:[*+]\s+)(.+?)ć`, "mg"); // TODO remove match when special footnotes are implemented @@ -61,18 +41,12 @@ static template DocReformOutputRgxInit() { static inline_al_delimiter_open_regular = ctRegex!(`ć\s`, "m"); static inline_al_delimiter_open_symbol_star = ctRegex!(`ć[*]\s`, "m"); static inline_al_delimiter_open_symbol_plus = ctRegex!(`ć[+]\s`, "m"); - static inline_al_delimiter_close_regular = ctRegex!(`ć`, "m"); - static inline_al_delimiter_open_and_close_regular = ctRegex!(`ć|ć`, "m"); - static inline_al_delimiter_open_asterisk = ctRegex!(`ć\*`, "m"); - static inline_al_delimiter_open_plus = ctRegex!(`ć\+`, "m"); - static inline_text_and_note_al = ctRegex!(`(?P<text>.+?)ć(?:[*+ ]*)(?P<note>.+?)ć`, "mg"); static inline_text_and_note_al_ = ctRegex!(`(.+?(?:ć[*+]*\s+.+?ć|$))`, "mg"); /+ inline markup links +/ static inline_image = ctRegex!(`(?P<pre>ā„)ā¼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+))\s*(?P<post>.*?āā¤.*?ā)`, "mg"); static inline_image_without_dimensions = ctRegex!(`(?P<pre>ā„)ā¼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?āā¤.*?ā)`, "mg"); static inline_image_info = ctRegex!(`ā¼?(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+)`, "mg"); static inline_link_anchor = ctRegex!(`ā(?P<anchor>\S+?)ā`, "mg"); // TODO *~text_link_anchor - static inline_link_ = ctRegex!(`ā„(?P<text>.+?)āā¤(?P<link>.+?)ā`, "mg"); static inline_link = ctRegex!(`ā„(?P<text>.+?)āā¤(?P<link>#?(\S+?))ā`, "mg"); static inline_link_empty = ctRegex!(`ā„(?P<text>.+?)āā¤ā`, "mg"); static inline_link_number = ctRegex!(`ā„(?P<text>.+?)āā¤(?P<num>[0-9]+)ā`, "mg"); // not used @@ -81,20 +55,15 @@ static template DocReformOutputRgxInit() { static inline_link_hash = ctRegex!(`ā„(?P<text>.+?)āā¤(?P<link>#(?P<segname>\S+?))ā`, "mg"); static inline_link_clean = ctRegex!(`ā¤(?:.+?)ā|[ā„ā]`, "mg"); static inline_link_toc_to_backmatter = ctRegex!(`ā¤#(?P<link>endnotes|bibliography|bookindex|glossary|blurb)ā`, "mg"); - static inline_a_url = ctRegex!(`(ā¤)([^\sā„āā¤ā]+)(ā)`, "mg"); static url = ctRegex!(`https?://`, "mg"); static uri = ctRegex!(`(?:https?|git)://`, "mg"); static uri_identify_components = ctRegex!(`(?P<type>(?:https?|git)://)(?P<path>\S+?/)(?P<file>[^/]+)$`, "mg"); static inline_link_subtoc = ctRegex!(`^(?P<level>[5-7])~ ā„(?P<text>.+?)āā¤(?P<link>.+?)ā`, "mg"); - static fn_suffix = ctRegex!(`\.fnSuffix`, "mg"); static inline_link_fn_suffix = ctRegex!(`Ā¤(.+?)(\.fnSuffix)`, "mg"); static inline_seg_link = ctRegex!(`(Ā¤)(?:.+?)\.fnSuffix`, "mg"); static mark_internal_site_lnk = ctRegex!(`Ā¤`, "mg"); static quotation_mark_sql_insert_delimiter = ctRegex!("[']", "mg"); - static quotation_mark_various = ctRegex!(q"ā['āāāā"`Ā“ĀØ]ā", "mg"); /+ inline markup font face mod +/ - static inline_mark_faces = ctRegex!(`(?P<markup>(?P<mod>[*!/_^,+#"-])\{(?P<text>.+?)\}[*!/_^,+#"-])`, "mg"); - static inline_mark_faces_to_mod = ctRegex!(`(?P<mod>[*!/_^,+#"-])\{(?P<text>.+?)\}([*!/_^,+#"-])`, "mg"); static inline_mark_emphasis = ctRegex!(`(?P<mark>[*])\{(?P<text>.+?)\}[*]`, "mg"); static inline_mark_bold = ctRegex!(`(?P<mark>[!])\{(?P<text>.+?)\}[!]`, "mg"); static inline_mark_underscore = ctRegex!(`(?P<mark>[_])\{(?P<text>.+?)\}[_]`, "mg"); @@ -105,15 +74,12 @@ static template DocReformOutputRgxInit() { static inline_mark_insert = ctRegex!(`(?P<mark>[+])\{(?P<text>.+?)\}[+]`, "mg"); static inline_mark_mono = ctRegex!(`(?P<mark>[#])\{(?P<text>.+?)\}[#]`, "mg"); static inline_mark_cite = ctRegex!(`(?P<mark>["])\{(?P<text>.+?)\}["]`, "mg"); - static inline_mark_fontface_clean = ctRegex!(`[*!_/^,+#ā "-]\{|\}[*!_/^,+#ā "-]`, "mg"); static inline_faces_line = ctRegex!(`^[*!/_]_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`); static inline_emphasis_line = ctRegex!(`^\*_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`); static inline_bold_line = ctRegex!(`^!_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`); static inline_italics_line = ctRegex!(`^/_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`); static inline_underscore_line = ctRegex!(`^__ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`); - static no_header_rgx = ctRegex!(`^=NULL$`); /+ inline markup font face mod +/ - static inline_faces = ctRegex!(`(?P<markup>(?P<mod>[*!_^,+ā ā-])āØ(?P<text>.+?)ā£[*!_^,+ā ā-])`, "mg"); static inline_emphasis = ctRegex!(`[*]āØ(?P<text>.+?)ā£[*]`, "mg"); static inline_bold = ctRegex!(`[!]āØ(?P<text>.+?)ā£[!]`, "mg"); static inline_underscore = ctRegex!(`[_]āØ(?P<text>.+?)ā£[_]`, "mg"); @@ -134,8 +100,6 @@ static template DocReformOutputRgxInit() { static xhtml_less_than = ctRegex!(`[<]`, "m"); // < static xhtml_greater_than = ctRegex!(`[>]`, "m"); // > static xhtml_line_break = ctRegex!(` [\\]{2}`, "m"); // <br /> - static latex_special_char_shortlist = ctRegex!(`([%$_#&\\])`); - static latex_special_char_curlybraces = ctRegex!(`([{}])`); static latex_special_char = ctRegex!(`([%${}_#&\\])`); static latex_special_char_for_escape = ctRegex!(`([%${}_#\\])`); static latex_special_char_for_escape_and_braces = ctRegex!(`([&])`); |