From 47df905518ae9fb249f922c487cb7cd8c081f14b Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 15 Nov 2016 10:51:45 -0500 Subject: 0.9.1 munge links & urls * includes auto place url in endnote helper, needs to occur before endnotes extracted --- src/sdp/ao_rgx.d | 302 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 154 insertions(+), 148 deletions(-) (limited to 'src/sdp/ao_rgx.d') diff --git a/src/sdp/ao_rgx.d b/src/sdp/ao_rgx.d index c58aeca..68a16c1 100644 --- a/src/sdp/ao_rgx.d +++ b/src/sdp/ao_rgx.d @@ -4,174 +4,180 @@ template RgxInit() { struct Rgx { /+ misc +/ - static flag_action = ctRegex!(`^(--[a-z][a-z0-9-]+)$`); - static flag_action_str = ctRegex!(` (--[a-z][a-z0-9-]+)`); - static within_quotes = ctRegex!(`"(.+?)"`); - static make_heading_delimiter = ctRegex!(`[;][ ]*`); - static arr_delimiter = ctRegex!(`[ ]*[;][ ]*`); - static name_delimiter = ctRegex!(`^([^,]+)[ ]*,[ ]+(.+?)$`); - static book_index_go = ctRegex!("([0-9]+)(?:-[0-9]+)?"); - static trailing_comma = ctRegex!(",[ ]*$"); - static trailing_linebreak = ctRegex!(",[ ]{1,2}\\\\\\\\\n[ ]{4}$","m"); - static newline_eol_delimiter = ctRegex!("\n"); - static newline_eol_strip_preceeding = ctRegex!("[ ]*\n"); - static newline_eol_delimiter_only = ctRegex!("^\n"); - static line_delimiter_ws_strip = ctRegex!("[ ]*\n[ ]*"); - static para_delimiter = ctRegex!("\n[ ]*\n+"); - static levels_markup = ctRegex!(`^[A-D1-4]$`); - static levels_numbered = ctRegex!(`^[0-9]$`); - static levels_numbered_headings = ctRegex!(`^[0-7]$`); - static src_pth = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); - static src_fn = + static true_dollar = ctRegex!(`\$`, "gm"); + static flag_action = ctRegex!(`^(--[a-z][a-z0-9-]+)$`); + static flag_action_str = ctRegex!(` (--[a-z][a-z0-9-]+)`); + static within_quotes = ctRegex!(`"(.+?)"`); + static make_heading_delimiter = ctRegex!(`[;][ ]*`); + static arr_delimiter = ctRegex!(`[ ]*[;][ ]*`); + static name_delimiter = ctRegex!(`^([^,]+)[ ]*,[ ]+(.+?)$`); + static book_index_go = ctRegex!("([0-9]+)(?:-[0-9]+)?"); + static trailing_comma = ctRegex!(",[ ]*$"); + static trailing_linebreak = ctRegex!(",[ ]{1,2}\\\\\\\\\n[ ]{4}$","m"); + static newline_eol_delimiter = ctRegex!("\n"); + static newline_eol_strip_preceeding = ctRegex!("[ ]*\n"); + static newline_eol_delimiter_only = ctRegex!("^\n"); + static line_delimiter_ws_strip = ctRegex!("[ ]*\n[ ]*"); + static para_delimiter = ctRegex!("\n[ ]*\n+"); + static levels_markup = ctRegex!(`^[A-D1-4]$`); + static levels_numbered = ctRegex!(`^[0-9]$`); + static levels_numbered_headings = ctRegex!(`^[0-7]$`); + static src_pth = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); + static src_fn = ctRegex!(`^([a-zA-Z0-9._-]+/)*(?P(?P[a-zA-Z0-9._-]+)[.](?Pss[tm]))$`); - static src_fn_master = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ssm)$`); - static src_fn_text = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]sst)$`); - static src_fn_insert = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ssi)$`); - static src_fn_find_inserts = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[im])$`); - static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[ti])$`); - /+ insert markup file +/ + static src_fn_master = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ssm)$`); + static src_fn_text = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]sst)$`); + static src_fn_insert = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ssi)$`); + static src_fn_find_inserts = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[im])$`); + static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[ti])$`); /+ comments +/ - static comment = ctRegex!(`^%+ `); - static comments = ctRegex!(`^%+ |^%+$`); + static comment = ctRegex!(`^%+ `); + static comments = ctRegex!(`^%+ |^%+$`); /+ header +/ - static main_headers = + static main_headers = ctRegex!(`^(?:creator|title|rights|date|original|classify|identifier|notes|publisher|make|links)$`, "m"); - static native_header = ctRegex!(`^@([a-z_]+):(?:\s|$)`); - static native_header_make = ctRegex!(`^@(make):(?:\s|$)`); - static native_header_meta = + static native_header = ctRegex!(`^@([a-z_]+):(?:\s|$)`); + static native_header_make = ctRegex!(`^@(make):(?:\s|$)`); + static native_header_meta = ctRegex!(`^@(?:creator|title|rights|date|original|classify|identifier|notes|publisher|links):(?:\s|$)`); - static native_header_main = ctRegex!(`^@(?P
[a-z_]+):\s*(?P.*)`, "m"); - static native_header_sub = ctRegex!(`^[ ]*:(?P[a-z_]+):\s+(?P.+)`, "m"); - static native_header_meta_title = ctRegex!(`^@title:\s`, "m"); - static variable_doc_title = ctRegex!(`@title`); - static variable_doc_author = ctRegex!(`@author|@creator`); - static raw_author_munge = ctRegex!(`(\S.+?),\s+(.+)`,"i"); + static native_header_main = ctRegex!(`^@(?P
[a-z_]+):\s*(?P.*)`, "m"); + static native_header_sub = ctRegex!(`^[ ]*:(?P[a-z_]+):\s+(?P.+)`, "m"); + static native_header_meta_title = ctRegex!(`^@title:\s`, "m"); + static variable_doc_title = ctRegex!(`@title`); + static variable_doc_author = ctRegex!(`@author|@creator`); + static raw_author_munge = ctRegex!(`(\S.+?),\s+(.+)`,"i"); /+ head +/ - static native_subhead_creator = ctRegex!(`^(?:author|translator|illustrator)$`, "m"); - static native_subhead_title = ctRegex!(`^(?:main|sub(?:title)?|full|language|edition|note)$`, "m"); - static native_subhead_rights = ctRegex!(`^(?:copyright|illustrations|license|cover)$`, "m"); - static native_subhead_date = ctRegex!(`^(?:published|created|issued|available|valid|modified|added_to_site)$`, "m"); - static native_subhead_original = ctRegex!(`^(?:title|language|source)$`, "m"); - static native_subhead_classify = ctRegex!(`^(?:topic_register|subject|keywords|loc|dewey)$`, "m"); - static native_subhead_identifier = ctRegex!(`^(?:oclc|pg|isbn)$`, "m"); - static native_subhead_notes = ctRegex!(`^(?:abstract|description)$`, "m"); - static native_subhead_publisher = ctRegex!(`^(?:name)$`, "m"); - static native_subhead_make = ctRegex!(`^(?:cover_image|home_button_image|home_button_text|footer|headings|num_top|num_depth|breaks|substitute|bold|italics|emphasis|texpdf_font|css)$`, "m"); + static native_subhead_creator = ctRegex!(`^(?:author|translator|illustrator)$`, "m"); + static native_subhead_title = ctRegex!(`^(?:main|sub(?:title)?|full|language|edition|note)$`, "m"); + static native_subhead_rights = ctRegex!(`^(?:copyright|illustrations|license|cover)$`, "m"); + static native_subhead_date = ctRegex!(`^(?:published|created|issued|available|valid|modified|added_to_site)$`, "m"); + static native_subhead_original = ctRegex!(`^(?:title|language|source)$`, "m"); + static native_subhead_classify = ctRegex!(`^(?:topic_register|subject|keywords|loc|dewey)$`, "m"); + static native_subhead_identifier = ctRegex!(`^(?:oclc|pg|isbn)$`, "m"); + static native_subhead_notes = ctRegex!(`^(?:abstract|description)$`, "m"); + static native_subhead_publisher = ctRegex!(`^(?:name)$`, "m"); + static native_subhead_make = ctRegex!(`^(?:cover_image|home_button_image|home_button_text|footer|headings|num_top|num_depth|breaks|substitute|bold|italics|emphasis|texpdf_font|css)$`, "m"); /+ heading & paragraph operators +/ - static heading_a = ctRegex!(`^:?[A][~] `, "m"); - static heading = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+`,"i"); - static heading_seg_and_above = ctRegex!(`^:?([A-D1])[~]([a-z0-9_.-]*[?]?)\s+`,"i"); - static heading_marker = ctRegex!(`^:?([A-D1-4])[~]`); - static heading_anchor_tag = ctRegex!(`^:?[A-D1-4][~]([a-z0-9_.-]+) `,"i"); - static heading_identify_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(?:(?:(?:chapter|article|section|clause)\s+[0-9.]+)|(?:[0-9]+))`,"i"); // unless dob.obj =~/^:?[A-D1-4]~\s+(?:|(?:chapter|article|section|clause)\s+)([0-9.]+)/i - static heading_extract_named_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(chapter|article|section|clause)\s+((?:[0-9]+.)*[0-9]+)(?:[.:;, ]|$)`,"i"); - static heading_extract_unnamed_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+((?:[0-9]+.)*[0-9]+)(?:[.:;, ]|$)`); - static heading_marker_missing_tag = ctRegex!(`^:?([A-D1-4])[~] `); - static heading_title = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`); - static heading_all = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`); // test, particularly [2] name/hashtag which may or may not be, does this affect title [3] - static heading_backmatter = ctRegex!(`^:?1[~][!](glossary|bibliography|biblio|blurb)\s+`,"i"); - static heading_biblio = ctRegex!(`^:?(1)[~][!](biblio(?:graphy)?|references?)`); - static heading_glossary = ctRegex!(`^:?(1)[~][!](glossary)`); - static heading_blurb = ctRegex!(`^:?(1)[~][!](blurb)`); - static para_bullet = ctRegex!(`^_[*] `); - static para_bullet_indent = ctRegex!(`^_([1-9])[*] `); - static para_indent = ctRegex!(`^_([1-9]) `); - static para_indent_hang = ctRegex!(`^_([0-9])_([0-9]) `); - static para_attribs = ctRegex!(`^_(([0-9])(_([0-9]))?|_([1-9])?[*]) `); + static heading_a = ctRegex!(`^:?[A][~] `, "m"); + static heading = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+`,"i"); + static heading_seg_and_above = ctRegex!(`^:?([A-D1])[~]([a-z0-9_.-]*[?]?)\s+`,"i"); + static heading_marker = ctRegex!(`^:?([A-D1-4])[~]`); + static heading_anchor_tag = ctRegex!(`^:?[A-D1-4][~]([a-z0-9_.-]+) `,"i"); + static heading_identify_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(?:(?:(?:chapter|article|section|clause)\s+[0-9.]+)|(?:[0-9]+))`,"i"); + // unless dob.obj =~/^:?[A-D1-4]~\s+(?:|(?:chapter|article|section|clause)\s+)([0-9.]+)/i + static heading_extract_named_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(chapter|article|section|clause)\s+((?:[0-9]+.)*[0-9]+)(?:[.:;, ]|$)`,"i"); + static heading_extract_unnamed_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+((?:[0-9]+.)*[0-9]+)(?:[.:;, ]|$)`); + static heading_marker_missing_tag = ctRegex!(`^:?([A-D1-4])[~] `); + static heading_title = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`); + static heading_all = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`); // test, particularly [2] name/hashtag which may or may not be, does this affect title [3] + static heading_backmatter = ctRegex!(`^:?1[~][!](glossary|bibliography|biblio|blurb)\s+`,"i"); + static heading_biblio = ctRegex!(`^:?(1)[~][!](biblio(?:graphy)?|references?)`); + static heading_glossary = ctRegex!(`^:?(1)[~][!](glossary)`); + static heading_blurb = ctRegex!(`^:?(1)[~][!](blurb)`); + static para_bullet = ctRegex!(`^_[*] `); + static para_bullet_indent = ctRegex!(`^_([1-9])[*] `); + static para_indent = ctRegex!(`^_([1-9]) `); + static para_indent_hang = ctRegex!(`^_([0-9])_([0-9]) `); + static para_attribs = ctRegex!(`^_(([0-9])(_([0-9]))?|_([1-9])?[*]) `); /+ blocked markup +/ - static block_open = ctRegex!("^((code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)[{].*?$)|^`{3} (code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)"); - static block_poem_open = ctRegex!("^((poem[{].*?$)|`{3} poem)"); + static block_open = ctRegex!("^((code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)[{].*?$)|^`{3} (code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)"); + static block_poem_open = ctRegex!("^((poem[{].*?$)|`{3} poem)"); /+ blocked markup tics +/ - static block_tic_open = ctRegex!("^`{3} (code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)"); - static block_tic_code_open = ctRegex!("^`{3} (code)([.][a-z][0-9a-z_]+)?"); - static block_tic_poem_open = ctRegex!("^`{3} (poem)"); - static block_tic_group_open = ctRegex!("^`{3} (group)"); - static block_tic_block_open = ctRegex!("^`{3} (block)"); - static block_tic_quote_open = ctRegex!("^`{3} (quote)"); - static block_tic_table_open = ctRegex!("^`{3} (table)"); - static block_tic_close = ctRegex!("^(`{3})$","m"); + static block_tic_open = ctRegex!("^`{3} (code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)"); + static block_tic_code_open = ctRegex!("^`{3} (code)([.][a-z][0-9a-z_]+)?"); + static block_tic_poem_open = ctRegex!("^`{3} (poem)"); + static block_tic_group_open = ctRegex!("^`{3} (group)"); + static block_tic_block_open = ctRegex!("^`{3} (block)"); + static block_tic_quote_open = ctRegex!("^`{3} (quote)"); + static block_tic_table_open = ctRegex!("^`{3} (table)"); + static block_tic_close = ctRegex!("^(`{3})$","m"); /+ blocked markup curly +/ - static block_curly_open = ctRegex!(`^((code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)[{].*?$)`); - static block_curly_code_open = ctRegex!(`^(code([.][a-z][0-9a-z_]+)?[{].*?$)`); - static block_curly_code_close = ctRegex!(`^([}]code)`); - static block_curly_poem_open = ctRegex!(`^(poem[{].*?$)`); - static block_curly_poem_close = ctRegex!(`^([}]poem)`); - static block_curly_group_open = ctRegex!(`^(group[{].*?$)`); - static block_curly_group_close = ctRegex!(`^([}]group)`); - static block_curly_block_open = ctRegex!(`^(block[{].*?$)`); - static block_curly_block_close = ctRegex!(`^([}]block)`); - static block_curly_quote_open = ctRegex!(`^(quote[{].*?$)`); - static block_curly_quote_close = ctRegex!(`^([}]quote)`); - static block_curly_table_open = ctRegex!(`^(table[{].*?$)`); - static block_curly_table_close = ctRegex!(`^([}]table)`); + static block_curly_open = ctRegex!(`^((code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)[{].*?$)`); + static block_curly_code_open = ctRegex!(`^(code([.][a-z][0-9a-z_]+)?[{].*?$)`); + static block_curly_code_close = ctRegex!(`^([}]code)`); + static block_curly_poem_open = ctRegex!(`^(poem[{].*?$)`); + static block_curly_poem_close = ctRegex!(`^([}]poem)`); + static block_curly_group_open = ctRegex!(`^(group[{].*?$)`); + static block_curly_group_close = ctRegex!(`^([}]group)`); + static block_curly_block_open = ctRegex!(`^(block[{].*?$)`); + static block_curly_block_close = ctRegex!(`^([}]block)`); + static block_curly_quote_open = ctRegex!(`^(quote[{].*?$)`); + static block_curly_quote_close = ctRegex!(`^([}]quote)`); + static block_curly_table_open = ctRegex!(`^(table[{].*?$)`); + static block_curly_table_close = ctRegex!(`^([}]table)`); /+ inline markup font face mod +/ - static inline_emphasis = ctRegex!(`\*\{(?P.+?)\}\*`); - static inline_bold = ctRegex!(`!\{(?P.+?)\}!`); - static inline_italics = ctRegex!(`/\{(?P.+?)\}/`); - static inline_superscript = ctRegex!(`\^\{(?P.+?)\}\^`); - static inline_subscript = ctRegex!(`,\{(?P.+?)\},`); - static inline_strike = ctRegex!(`-\{(?P.+?)\}-`); - static inline_insert = ctRegex!(`\+\{(?P.+?)\}\+`); - static inline_mono = ctRegex!(`#\{(?P.+?)\}#`); - /+ inline markup footnotes +/ - static true_dollar = ctRegex!(`\$`, "gm"); - static inline_notes_al = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg"); - static inline_notes_al_gen = ctRegex!(`【.+?】`, "m"); - static inline_notes_curly_gen = ctRegex!(`~\{.+?\}~`, "m"); - static inline_notes_curly = ctRegex!(`~\{\s*(.+?)\}~`, "mg"); - static inline_al_delimiter_open_regular = ctRegex!(`【`, "m"); - static inline_al_delimiter_close_regular = ctRegex!(`】`, "m"); - static inline_al_delimiter_open_and_close_regular = ctRegex!(`【|】`, "m"); - static inline_notes_delimiter_al_regular = ctRegex!(`【(.+?)】`, "m"); - static inline_notes_delimiter_al_regular_number_note = ctRegex!(`【(\d+)\s+(.+?)】`, "m"); - static inline_al_delimiter_open_asterisk = ctRegex!(`【\*`, "m"); - static inline_al_delimiter_open_plus = ctRegex!(`【\+`, "m"); - static inline_curly_delimiter_open_regular = ctRegex!(`~\{\s*`, "m"); - static inline_curly_delimiter_close_regular = ctRegex!(`\s*\}~`, "m"); - static inline_curly_delimiter_open_and_close_regular = ctRegex!(`~\{\s*|\s*\}~`, "m"); - static inline_notes_delimiter_curly_regular = ctRegex!(`~\{[ ]*(.+?)\}~`, "m"); - static inline_notes_curly_sp = ctRegex!(`~\{[*+]+\s+(.+?)\}~`, "m"); - static inline_notes_curly_sp_asterisk = ctRegex!(`~\{[*]+\s+(.+?)\}~`, "m"); - static inline_notes_curly_sp_plus = ctRegex!(`~\{[+]+\s+(.+?)\}~`, "m"); - static inline_text_and_note_al = ctRegex!(`(?P.+?)【(?:[*+ ]*)(?P.+?)】`, "mg"); - static inline_text_and_note_curly = ctRegex!(`(?P.+?)(?:(?:[~])[{][*+ ]*)(?P.+?)(?:[}][~])`, "mg"); - static inline_note_curly_delimiters = ctRegex!(`(~\{[*+]?\s*)(.+?)(\}~)`, "mg"); - static inline_notes_square = ctRegex!(`~\[\s*(.+?)\]~`, "mg"); - static inline_text_and_note_square_sp = ctRegex!(`(.+?)~\[[*+]+\s+(.+?)\]~`, "mg"); - static inline_text_and_note_square = ctRegex!(`(.+?)~\[\s*(.+?)\]~`, "mg"); - static inline_note_square_delimiters = ctRegex!(`(~\[\s*)(.+?)(\]~)`, "mg"); - static note_ref = ctRegex!(`^\S+?noteref_([0-9]+)`, "mg"); // {^{73.}^}#noteref_73 + static inline_emphasis = ctRegex!(`\*\{(?P.+?)\}\*`); + static inline_bold = ctRegex!(`!\{(?P.+?)\}!`); + static inline_italics = ctRegex!(`/\{(?P.+?)\}/`); + static inline_superscript = ctRegex!(`\^\{(?P.+?)\}\^`); + static inline_subscript = ctRegex!(`,\{(?P.+?)\},`); + static inline_strike = ctRegex!(`-\{(?P.+?)\}-`); + static inline_insert = ctRegex!(`\+\{(?P.+?)\}\+`); + static inline_mono = ctRegex!(`#\{(?P.+?)\}#`); + /+ inline markup footnotes endnotes +/ + static inline_notes_curly_gen = ctRegex!(`~\{.+?\}~`, "m"); + static inline_notes_curly = ctRegex!(`~\{\s*(.+?)\}~`, "mg"); + static inline_curly_delimiter_open_and_close_regular = ctRegex!(`~\{\s*|\s*\}~`, "m"); + static inline_notes_delimiter_curly_regular = ctRegex!(`~\{[ ]*(.+?)\}~`, "m"); + static inline_notes_curly_sp = ctRegex!(`~\{[*+]+\s+(.+?)\}~`, "m"); + static inline_notes_curly_sp_asterisk = ctRegex!(`~\{[*]+\s+(.+?)\}~`, "m"); + static inline_notes_curly_sp_plus = ctRegex!(`~\{[+]+\s+(.+?)\}~`, "m"); + static inline_note_curly_delimiters = ctRegex!(`(~\{[*+]?\s*)(.+?)(\}~)`, "mg"); + static inline_notes_square = ctRegex!(`~\[\s*(.+?)\]~`, "mg"); + static inline_text_and_note_square_sp = ctRegex!(`(.+?)~\[[*+]+\s+(.+?)\]~`, "mg"); + static inline_text_and_note_square = ctRegex!(`(.+?)~\[\s*(.+?)\]~`, "mg"); + static inline_note_square_delimiters = ctRegex!(`(~\[\s*)(.+?)(\]~)`, "mg"); + static inline_curly_delimiter_open_regular = ctRegex!(`~\{\s*`, "m"); + static inline_curly_delimiter_close_regular = ctRegex!(`\s*\}~`, "m"); + static inline_text_and_note_curly = ctRegex!(`(?P.+?)(?:(?:[~])[{][*+ ]*)(?P.+?)(?:[}][~])`, "mg"); + static note_ref = ctRegex!(`^\S+?noteref_([0-9]+)`, "mg"); // {^{73.}^}#noteref_73 + /+ inline markup footnotes endnotes +/ + static inline_notes_al = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg"); + static inline_notes_al_gen = ctRegex!(`【.+?】`, "m"); + static inline_al_delimiter_open_regular = ctRegex!(`【`, "m"); + static inline_al_delimiter_close_regular = ctRegex!(`】`, "m"); + static inline_al_delimiter_open_and_close_regular = ctRegex!(`【|】`, "m"); + static inline_notes_delimiter_al_regular = ctRegex!(`【(.+?)】`, "m"); + static inline_notes_delimiter_al_regular_number_note = ctRegex!(`【(\d+)\s+(.+?)】`, "m"); + static inline_al_delimiter_open_asterisk = ctRegex!(`【\*`, "m"); + static inline_al_delimiter_open_plus = ctRegex!(`【\+`, "m"); + static inline_text_and_note_al = ctRegex!(`(?P.+?)【(?:[*+ ]*)(?P.+?)】`, "mg"); + static inline_url = ctRegex!(`((?:https?|git):\/\/\S+)`, "mg"); + static inline_link_naked_url = ctRegex!(`(^|[ ])((?:https?|git):\/\/\S+?)([.,;:?!]?(?:[ ]|$))`, "mg"); + static inline_link_markup_regular = ctRegex!(`(^|[ ])\{\s*(.+?)\s*\}((?:https?|git):\/\/\S+?)([.,;:?!]?(?:[ ]|$))`, "mg"); + static inline_link_endnote_url_helper_punctuated = ctRegex!(`\{~\^\s+(.+?)\}((?:https?|git):\/\/\S+?)([.,;:?!]?(?:[ ]|$))`, "mg"); + static inline_link_endnote_url_helper = ctRegex!(`\{~\^\s+(.+?)\}((?:https?|git):\/\/\S+)`, "mg"); /+ inline markup book index +/ - static book_index = ctRegex!(`^=\{\s*(.+?)\}$`, "m"); - static book_index_open = ctRegex!(`^=\{\s*([^}]+?)$`); - static book_index_close = ctRegex!(`^(.*?)\}$`, "m"); // strip + static book_index = ctRegex!(`^=\{\s*(.+?)\}$`, "m"); + static book_index_open = ctRegex!(`^=\{\s*([^}]+?)$`); + static book_index_close = ctRegex!(`^(.*?)\}$`, "m"); // strip /+ no obj_cite_number object +/ - static obj_cite_number_off = ctRegex!(`~#$`, "m"); - static obj_cite_number_off_dh = ctRegex!(`-#$`, "m"); - static obj_cite_number_off_all = ctRegex!(`[~-]#$`, "m"); + static obj_cite_number_off = ctRegex!(`~#$`, "m"); + static obj_cite_number_off_dh = ctRegex!(`-#$`, "m"); + static obj_cite_number_off_all = ctRegex!(`[~-]#$`, "m"); /+ no obj_cite_number block +/ - static obj_cite_number_off_block = ctRegex!(`^--~#$`); - static obj_cite_number_off_block_dh = ctRegex!(`^---#$`); - static obj_cite_number_off_block_close = ctRegex!(`^--\+#$`); - static obj_cite_number_block_marks = ctRegex!(`^--[+~-]#$`); + static obj_cite_number_off_block = ctRegex!(`^--~#$`); + static obj_cite_number_off_block_dh = ctRegex!(`^---#$`); + static obj_cite_number_off_block_close = ctRegex!(`^--\+#$`); + static obj_cite_number_block_marks = ctRegex!(`^--[+~-]#$`); /+ ignore outside code blocks +/ static skip_from_regular_parse = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); // not structural info /+ line & page breaks +/ - static break_line_within_object = ctRegex!(`[\\]{2}( |$)`); - static break_page = ctRegex!(`^-[\\]{2}-$`); - static break_page_new = ctRegex!(`^=[\\]{2}=$`); - static break_page_line_across = ctRegex!(`^=[.]{2}=$`); - static break_string = ctRegex!(`』`); - static parent = ctRegex!(`([0-7]):([0-9]+)`); + static break_line_within_object = ctRegex!(`[\\]{2}( |$)`); + static break_page = ctRegex!(`^-[\\]{2}-$`); + static break_page_new = ctRegex!(`^=[\\]{2}=$`); + static break_page_line_across = ctRegex!(`^=[.]{2}=$`); + static break_string = ctRegex!(`』`); + static parent = ctRegex!(`([0-7]):([0-9]+)`); /+ json +/ - static tailing_comma = ctRegex!(`,$`, "m"); + static tailing_comma = ctRegex!(`,$`, "m"); /+ biblio tags +/ - static biblio_tags = ctRegex!(`^(is|au|author_raw|author|author_arr|editor_raw|ed|editor_arr|ti|title|subtitle|fulltitle|lng|language|trans|src|jo|journal|in|vol|volume|edn|edition|yr|year|pl|place|pb|pub|publisher|url|pg|pages|note|short_name|id):\s+(.+)`); - static biblio_abbreviations = ctRegex!(`^(au|ed|ti|lng|jo|vol|edn|yr|pl|pb|pub|pg|pgs|sn)$`); + static biblio_tags = ctRegex!(`^(is|au|author_raw|author|author_arr|editor_raw|ed|editor_arr|ti|title|subtitle|fulltitle|lng|language|trans|src|jo|journal|in|vol|volume|edn|edition|yr|year|pl|place|pb|pub|publisher|url|pg|pages|note|short_name|id):\s+(.+)`); + static biblio_abbreviations = ctRegex!(`^(au|ed|ti|lng|jo|vol|edn|yr|pl|pb|pub|pg|pgs|sn)$`); /+ bookindex split +/ - static bi_main_terms_split = ctRegex!(`\s*;\s*`); - static bi_main_term_plus_rest_split = ctRegex!(`\s*:\s*`); + static bi_main_terms_split = ctRegex!(`\s*;\s*`); + static bi_main_term_plus_rest_split = ctRegex!(`\s*:\s*`); static bi_sub_terms_plus_obj_cite_number_offset_split = ctRegex!(`\s*\|\s*`); - static bi_term_and_obj_cite_numbers_match = ctRegex!(`^(.+?)\+(\d+)`); + static bi_term_and_obj_cite_numbers_match = ctRegex!(`^(.+?)\+(\d+)`); } } -- cgit v1.2.3