/+ - Name: SisuDoc Spine, Doc Reform [a part of] - Description: documents, structuring, processing, publishing, search - static content generator - Author: Ralph Amissah [ralph.amissah@gmail.com] - Copyright: (C) 2015 - 2024 Ralph Amissah, All Rights Reserved. - License: AGPL 3 or later: Spine (SiSU), a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU AFERO General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see [https://www.gnu.org/licenses/]. If you have Internet connection, the latest version of the AGPL should be available at these locations: [https://www.fsf.org/licensing/licenses/agpl.html] [https://www.gnu.org/licenses/agpl.html] - Spine (by Doc Reform, related to SiSU) uses standard: - docReform markup syntax - standard SiSU markup syntax with modified headers and minor modifications - docReform object numbering - standard SiSU object citation numbering & system - Homepages: [https://www.sisudoc.org] [https://www.doc-reform.org] - Git [https://git.sisudoc.org/] +/ /++ regex: regular expressions used in sisu document parser +/ module sisudoc.io_out.rgx; @safe: static template spineRgxOut() { static struct RgxO { static make_breakpage = ctRegex!(`new=(?P.+?)(?:;|$)`); static make_breakcolumn = ctRegex!(`break=(?P.+?)(?:;|$)`,); static newline = ctRegex!("\n", "mg"); static space = ctRegex!(`[ ]`, "mg"); static spaces_keep = ctRegex!(`(?P^[ ]+|[ ]{2,})`, "mg"); // code, verse, block static spaces_line_start = ctRegex!(`^(?P[ ]+)`, "mg"); static nbsp_char = ctRegex!(`░`, "mg"); static nbsp_chars = ctRegex!(`[░]+`, "mg"); static middle_dot = ctRegex!(`·`, "mg"); static src_pth_sst_or_ssm = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.](?Pss[tm]))$`); static src_pth_pod_sst_or_ssm = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P[a-zA-Z0-9][a-zA-Z0-9._-]*?[.]ss[tm])$`); static src_pth_contents = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9][a-zA-Z0-9._-]*)/pod[.]manifest$`); static src_pth_zip = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]zip)$`); static src_pth_types = ctRegex!(`^(?P[/]?[a-zA-Z0-9._-]+/)*(?P(?P[a-zA-Z0-9._-]+[.]ss[tm])|(?P[a-zA-Z0-9._-]+/pod[.]manifest)|(?P[a-zA-Z0-9._-]+[.]zip))$`); static src_fn = ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P(?P[a-zA-Z0-9._-]+)[.](?Pss[tm]))$`); static src_fn_master = ctRegex!(`^(?P/?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]ssm)$`); static src_fn_find_inserts = ctRegex!(`^(?P/?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]ss[im])$`); static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[ti])$`); static src_base_parent_dir_name = ctRegex!(`[/](?P(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure static src_formalised_file_path_parts = ctRegex!(`(?P(?:[/a-zA-Z0-9._-]+?)(?P[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure /+ line breaks +/ static br_empty_line = ctRegex!(`\n[ ]*\n`, "mg"); static br_linebreaks_newlines = ctRegex!(`[\n┘┙]`, "mg"); static br_linebreaks = ctRegex!(`[┘┙]`, "mg"); static br_line = ctRegex!(`┘`, "mg"); static br_line_inline = ctRegex!(`┙`, "mg"); static br_line_spaced = ctRegex!(`┚`, "mg"); /+ quotation marks +/ static quotes_open_and_close = ctRegex!(`[“”]`, "mg"); /+ inline markup footnotes endnotes +/ static inline_notes_al = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg"); static inline_notes_al_special = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented static inline_notes_al_gen = ctRegex!(`【.+?】`, "m"); static inline_notes_al_gen_text = ctRegex!(`【(?P.+?)】`, "m"); static inline_notes_al_all_note = ctRegex!(`【(?P\d+|(?:[*]|[+])+)\s+(?P.+?)\s*】`, "mg"); static inline_notes_al_regular_number_note = ctRegex!(`【(?P\d+)\s+(?P.+?)\s*】`, "mg"); static inline_notes_al_special_char_note = ctRegex!(`【(?P(?:[*]|[+])+)\s+(?P.+?)】`, "mg"); static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m"); static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m"); static inline_al_delimiter_open_symbol_plus = ctRegex!(`【[+]\s`, "m"); static inline_text_and_note_al_ = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|.+))`, "mg"); /+ inline markup links +/ static inline_image = ctRegex!(`(?P
┥)☼(?P(?P[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P\d+)h(?P\d+))\s*(?P.*?┝┤.*?├)`, "mg");
    static inline_image_without_dimensions          = ctRegex!(`(?P
┥)☼(?P(?P[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P0)h(?P0))\s*(?P.*?┝┤.*?├)`, "mg");
    static inline_image_info                        = ctRegex!(`☼?(?P[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P\d+)h(?P\d+)`, "mg");
    static inline_link_anchor                       = ctRegex!(`┃(?P\S+?)┃`, "mg"); // TODO *~text_link_anchor
    static inline_link                              = ctRegex!(`┥(?P.+?)┝┤(?P#?(\S+?))├`, "mg");
    static inline_link_empty                        = ctRegex!(`┥(?P.+?)┝┤├`, "mg");
    static inline_link_number                       = ctRegex!(`┥(?P.+?)┝┤(?P[0-9]+)├`, "mg"); // not used
    static inline_link_number_only                  = ctRegex!(`(?P┥.+?┝)┤(?P[0-9]+)├`, "mg");
    static inline_link_stow_uri                     = ctRegex!(`┥(?P.+?)┝┤(?P[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
    static inline_link_hash                         = ctRegex!(`┥(?P.+?)┝┤(?P#(?P\S+?))├`, "mg");
    static inline_link_seg_and_hash                 = ctRegex!(`┥(?P.+?)┝┤(?P(?P[^/#├]*)#(?P.+?))├`, "mg");
    static inline_link_clean                        = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
    static inline_link_toc_to_backmatter            = ctRegex!(`┤#(?Pendnotes|bibliography|bookindex|glossary|blurb)├`, "mg");
    static url                                      = ctRegex!(`https?://`, "mg");
    static uri                                      = ctRegex!(`(?:https?|git)://`, "mg");
    static uri_identify_components                  = ctRegex!(`(?P(?:https?|git)://)(?P\S+?/)(?P[^/]+)$`, "mg");
    static inline_link_subtoc                       = ctRegex!(`^(?P[5-7])~ ┥(?P.+?)┝┤(?P.+?)├`, "mg");
    static inline_link_fn_suffix                    = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg");
    static inline_seg_link                          = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg");
    static mark_internal_site_lnk                   = ctRegex!(`¤`, "mg");
    static quotation_mark_sql_insert_delimiter      = ctRegex!("[']", "mg");
    /+ inline markup font face mod +/
    static inline_emphasis                          = ctRegex!(`⑆[*]┨(?P.+?)┣[*]`, "mg");
    static inline_bold                              = ctRegex!(`⑆[!]┨(?P.+?)┣[!]`, "mg");
    static inline_underscore                        = ctRegex!(`⑆[_]┨(?P.+?)┣[_]`, "mg");
    static inline_italics                           = ctRegex!(`⑆[/]┨(?P.+?)┣[/]`, "mg");
    static inline_superscript                       = ctRegex!(`⑆\^┨(?P.+?)┣\^`, "mg");
    static inline_subscript                         = ctRegex!(`⑆[,]┨(?P.+?)┣[,]`, "mg");
    static inline_strike                            = ctRegex!(`⑆[-]┨(?P.+?)┣[-]`, "mg");
    static inline_insert                            = ctRegex!(`⑆[+]┨(?P.+?)┣[+]`, "mg");
    static inline_mono                              = ctRegex!(`⑆[■]┨(?P.+?)┣[■]`, "mg");
    static inline_cite                              = ctRegex!(`⑆[‖]┨(?P.+?)┣[‖]`, "mg");
    /+ table delimiters +/
    static table_delimiter_col                      = ctRegex!("[ ]*[┊][ ]*", "mg");
    static table_delimiter_row                      = ctRegex!("[ ]*\n", "mg");
    /+ paragraph operators +/
    static grouped_para_indent_1                    = ctRegex!(`^_1[ ]`, "m");
    static grouped_para_indent_2                    = ctRegex!(`^_2[ ]`, "m");
    static grouped_para_indent_3                    = ctRegex!(`^_3[ ]`, "m");
    static grouped_para_indent_4                    = ctRegex!(`^_4[ ]`, "m");
    static grouped_para_indent_5                    = ctRegex!(`^_5[ ]`, "m");
    static grouped_para_indent_6                    = ctRegex!(`^_6[ ]`, "m");
    static grouped_para_indent_7                    = ctRegex!(`^_7[ ]`, "m");
    static grouped_para_indent_8                    = ctRegex!(`^_8[ ]`, "m");
    static grouped_para_indent_9                    = ctRegex!(`^_9[ ]`, "m");
    static grouped_para_bullet                      = ctRegex!(`^_[*] `, "m");
    static grouped_para_bullet_indent_1             = ctRegex!(`^_1[*] `, "m");
    static grouped_para_bullet_indent_2             = ctRegex!(`^_2[*] `, "m");
    static grouped_para_bullet_indent_3             = ctRegex!(`^_3[*] `, "m");
    static grouped_para_bullet_indent_4             = ctRegex!(`^_4[*] `, "m");
    static grouped_para_bullet_indent_5             = ctRegex!(`^_5[*] `, "m");
    static grouped_para_bullet_indent_6             = ctRegex!(`^_6[*] `, "m");
    static grouped_para_bullet_indent_7             = ctRegex!(`^_7[*] `, "m");
    static grouped_para_bullet_indent_8             = ctRegex!(`^_8[*] `, "m");
    static grouped_para_bullet_indent_9             = ctRegex!(`^_9[*] `, "m");
    static grouped_para_bullet_indent               = ctRegex!(`^_(?P[1-9])[*] `, "m");
    static grouped_para_indent_hang                 = ctRegex!(`^_(?P[0-9])_(?P[0-9])[ ]`, "m");
  }
}