diff options
author | Ralph Amissah <ralph@amissah.com> | 2017-05-06 11:33:10 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2019-04-10 15:14:14 -0400 |
commit | 7a73aa10276b0c1151500d26b194336b56af7aba (patch) | |
tree | e267901f73375393cc429c5c17ce661216de111a /org/default_regex.org | |
parent | xmls work particularly with epub output (diff) |
cleaningdoc-reform_v0.0.14
Diffstat (limited to 'org/default_regex.org')
-rw-r--r-- | org/default_regex.org | 29 |
1 files changed, 14 insertions, 15 deletions
diff --git a/org/default_regex.org b/org/default_regex.org index baeca25..01aa763 100644 --- a/org/default_regex.org +++ b/org/default_regex.org @@ -62,8 +62,8 @@ static line_delimiter_ws_strip = ctRegex!("[ ]*\n[ ]*"); static para_delimiter = ctRegex!("\n[ ]*\n+"); static table_col_delimiter = ctRegex!("[ ]*\n+", "mg"); static table_row_delimiter = ctRegex!("\n[ ]*\n+", "mg"); -static table_row_delimiter_special = ctRegex!("[ ]*\n", "mg"); // -static table_col_delimiter_special = ctRegex!("[ ]*[|][ ]*", "mg"); // +static table_row_delimiter_special = ctRegex!("[ ]*\n", "mg"); +static table_col_delimiter_special = ctRegex!("[ ]*[|][ ]*", "mg"); static levels_markup = ctRegex!(`^[A-D1-4]$`); static levels_numbered = ctRegex!(`^[0-9]$`); static levels_numbered_headings = ctRegex!(`^[0-7]$`); @@ -128,12 +128,11 @@ static heading_seg_and_above = ctRegex!(`^:?([A-D1])[~] static heading_marker = ctRegex!(`^:?([A-D1-4])[~]`); static heading_anchor_tag = ctRegex!(`^:?[A-D1-4][~]([a-z0-9_.-]+) `,"i"); static heading_identify_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(?:(?:(?:chapter|article|section|clause)\s+[0-9.]+)|(?:[0-9]+))`,"i"); - // unless dob.obj =~/^:?[A-D1-4]~\s+(?:|(?:chapter|article|section|clause)\s+)([0-9.]+)/i static heading_extract_named_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(chapter|article|section|clause)\s+((?:[0-9]+.)*[0-9]+)(?:[.:;, ]|$)`,"i"); static heading_extract_unnamed_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+((?:[0-9]+.)*[0-9]+)(?:[.:;, ]|$)`); static heading_marker_missing_tag = ctRegex!(`^:?([A-D1-4])[~] `); static heading_title = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`); -static heading_all = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`); // test, particularly [2] name/hashtag which may or may not be, does this affect title [3] +static heading_all = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`); static heading_backmatter = ctRegex!(`^:?1[~][!](glossary|bibliography|biblio|blurb)\s+`,"i"); static heading_biblio = ctRegex!(`^:?(1)[~][!](biblio(?:graphy)?|references?)`); static heading_glossary = ctRegex!(`^:?(1)[~][!](glossary)`); @@ -191,7 +190,7 @@ static block_curly_quote_open = ctRegex!(`^(quote[{].*?$ static block_curly_quote_close = ctRegex!(`^([}]quote)`); static block_curly_table_open = ctRegex!(`^table[{](.*)`); static block_curly_table_close = ctRegex!(`^([}]table)`); -static block_curly_table_special_markup = ctRegex!(`^[{]table((~h)?(?P<columns>(?:[ ]+[0-9]+;)+))[}]`, "mg"); // sepcial table block markup +static block_curly_table_special_markup = ctRegex!(`^[{]table((~h)?(?P<columns>(?:[ ]+[0-9]+;)+))[}]`, "mg"); #+END_SRC *** block sub-matches :block:curly: @@ -225,9 +224,9 @@ static inline_text_and_note_square_sp = ctRegex!(`(.+?)~\[[*+]+\ static inline_text_and_note_square = ctRegex!(`(.+?)~\[\s*(.+?)\]~`, "mg"); static inline_note_square_delimiters = ctRegex!(`(~\[\s*)(.+?)(\]~)`, "mg"); static inline_curly_delimiter_open_regular = ctRegex!(`~\{\s*`, "m"); -static inline_curly_delimiter_open_symbol_star = ctRegex!(`~\{[*]\s`, "m"); // -static inline_curly_delimiter_open_symbol_plus = ctRegex!(`~\{[+]\s`, "m"); // -static inline_curly_delimiter_open_star_or_plus = ctRegex!(`~\{[+*]`, "m"); // +static inline_curly_delimiter_open_symbol_star = ctRegex!(`~\{[*]\s`, "m"); +static inline_curly_delimiter_open_symbol_plus = ctRegex!(`~\{[+]\s`, "m"); +static inline_curly_delimiter_open_star_or_plus = ctRegex!(`~\{[+*]`, "m"); static inline_curly_delimiter_close_regular = ctRegex!(`\s*\}~`, "m"); static inline_text_and_note_curly = ctRegex!(`(?P<text>.+?)(?:(?:[~])[{][*+ ]*)(?P<note>.+?)(?:[}][~])`, "mg"); static note_ref = ctRegex!(`^\S+?noteref_([0-9]+)`, "mg"); // {^{73.}^}#noteref_73 @@ -259,7 +258,7 @@ static image = ctRegex!(`([a-zA-Z0-9._ /+ inline markup book index +/ static book_index = ctRegex!(`^=\{\s*(.+?)\}$`, "m"); static book_index_open = ctRegex!(`^=\{\s*([^}]+?)$`); -static book_index_close = ctRegex!(`^(.*?)\}$`, "m"); // strip +static book_index_close = ctRegex!(`^(.*?)\}$`, "m"); #+END_SRC ** no obj_cite_number object :ocn:off:object: @@ -288,7 +287,7 @@ static obj_cite_number_block_marks = ctRegex!(`^--[+~-]#$`); #+name: ao_rgx #+BEGIN_SRC d /+ ignore outside code blocks +/ -static skip_from_regular_parse = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); // not structural info +static skip_from_regular_parse = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); #+END_SRC ** line & page breaks :break: @@ -387,7 +386,7 @@ static newline = ctRegex!("\n", "mg"); static strip_br = ctRegex!("^<br>\n|<br>\n*$"); static space = ctRegex!(`[ ]`, "mg"); static spaces_line_start = ctRegex!(`^(?P<opening_spaces>[ ]+)`, "mg"); -static spaces_multiple = ctRegex!(`(?P<multiple_spaces>[ ]{2,})`, "mg"); // could be issues for endnotes +static spaces_multiple = ctRegex!(`(?P<multiple_spaces>[ ]{2,})`, "mg"); static two_spaces = ctRegex!(`[ ]{2}`, "mg"); static nbsp_char = ctRegex!(`░`, "mg"); static nbsp_chars_line_start = ctRegex!(`^░+`, "mg"); @@ -395,7 +394,7 @@ static nbsp_and_space = ctRegex!(` [ ]`, "m static nbsp_char_and_space = ctRegex!(`░[ ]`, "mg"); #+END_SRC -** filename (and path) matching (including markup insert file) :insert:file:path:filename: +** filename (& path) (including insert file) :insert:file:path:filename: #+name: prgmkup_rgx #+BEGIN_SRC d @@ -443,7 +442,7 @@ static inline_seg_link = ctRegex!(`(¤)(?:.+?)\.f static mark_internal_site_lnk = ctRegex!(`¤`, "mg"); #+END_SRC -*** inline markup font face mod :inline:font:face: +*** inline markup font face mod :inline:font:face: #+name: prgmkup_rgx #+BEGIN_SRC d @@ -471,6 +470,6 @@ static inline_underscore_line = ctRegex!(`^__ (?P<text>. #+name: prgmkup_rgx #+BEGIN_SRC d /+ table delimiters +/ -static table_delimiter_col = ctRegex!("[ ]*[┊][ ]*", "mg"); // -static table_delimiter_row = ctRegex!("[ ]*\n", "mg"); // +static table_delimiter_col = ctRegex!("[ ]*[┊][ ]*", "mg"); +static table_delimiter_row = ctRegex!("[ ]*\n", "mg"); #+END_SRC |