aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/sisudoc/io_out/rgx.d
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2024-04-10 22:24:34 -0400
committerRalph Amissah <ralph.amissah@gmail.com>2024-04-10 23:08:18 -0400
commit90873fabd7451e1dd8c4b39303906e19bdc481f7 (patch)
tree2dbb0e41f3e9c761645c8b37dafe979a01d38d32 /src/sisudoc/io_out/rgx.d
parent0.15.0 (diff)
0.16.0 sisudoc (src/sisudoc sisudoc spine)
- src/sisudoc (replaces src/doc_reform) - sisudoc spine (used more)
Diffstat (limited to 'src/sisudoc/io_out/rgx.d')
-rw-r--r--src/sisudoc/io_out/rgx.d157
1 files changed, 157 insertions, 0 deletions
diff --git a/src/sisudoc/io_out/rgx.d b/src/sisudoc/io_out/rgx.d
new file mode 100644
index 0000000..474a120
--- /dev/null
+++ b/src/sisudoc/io_out/rgx.d
@@ -0,0 +1,157 @@
+/+
+- Name: SisuDoc Spine, Doc Reform [a part of]
+ - Description: documents, structuring, processing, publishing, search
+ - static content generator
+
+ - Author: Ralph Amissah
+ [ralph.amissah@gmail.com]
+
+ - Copyright: (C) 2015 - 2024 Ralph Amissah, All Rights Reserved.
+
+ - License: AGPL 3 or later:
+
+ Spine (SiSU), a framework for document structuring, publishing and
+ search
+
+ Copyright (C) Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU AFERO General Public License as published by the
+ Free Software Foundation, either version 3 of the License, or (at your
+ option) any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see [https://www.gnu.org/licenses/].
+
+ If you have Internet connection, the latest version of the AGPL should be
+ available at these locations:
+ [https://www.fsf.org/licensing/licenses/agpl.html]
+ [https://www.gnu.org/licenses/agpl.html]
+
+ - Spine (by Doc Reform, related to SiSU) uses standard:
+ - docReform markup syntax
+ - standard SiSU markup syntax with modified headers and minor modifications
+ - docReform object numbering
+ - standard SiSU object citation numbering & system
+
+ - Homepages:
+ [https://www.sisudoc.org]
+ [https://www.doc-reform.org]
+
+ - Git
+ [https://git.sisudoc.org/]
+
++/
+/++
+ regex: regular expressions used in sisu document parser
++/
+module sisudoc.io_out.rgx;
+@safe:
+static template spineRgxOut() {
+ static struct RgxO {
+ static make_breakpage = ctRegex!(`new=(?P<breakpage>.+?)(?:;|$)`);
+ static make_breakcolumn = ctRegex!(`break=(?P<breakcolumn>.+?)(?:;|$)`,);
+ static newline = ctRegex!("\n", "mg");
+ static space = ctRegex!(`[ ]`, "mg");
+ static spaces_keep = ctRegex!(`(?P<keep_spaces>^[ ]+|[ ]{2,})`, "mg"); // code, verse, block
+ static spaces_line_start = ctRegex!(`^(?P<opening_spaces>[ ]+)`, "mg");
+ static nbsp_char = ctRegex!(`░`, "mg");
+ static nbsp_chars = ctRegex!(`[░]+`, "mg");
+ static middle_dot = ctRegex!(`·`, "mg");
+ static src_pth_sst_or_ssm = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.](?P<extension>ss[tm]))$`);
+ static src_pth_pod_sst_or_ssm = ctRegex!(`^(?P<podpath>[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*?[.]ss[tm])$`);
+ static src_pth_contents = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*)/pod[.]manifest$`);
+ static src_pth_zip = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]zip)$`);
+ static src_pth_types = ctRegex!(`^(?P<path>[/]?[a-zA-Z0-9._-]+/)*(?P<gotfile>(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])|(?P<filelist>[a-zA-Z0-9._-]+/pod[.]manifest)|(?P<filezip>[a-zA-Z0-9._-]+[.]zip))$`);
+ static src_fn = ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`);
+ static src_fn_master = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssm)$`);
+ static src_fn_find_inserts = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`);
+ static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[ti])$`);
+ static src_base_parent_dir_name = ctRegex!(`[/](?P<dir>(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
+ static src_formalised_file_path_parts = ctRegex!(`(?P<pth>(?:[/a-zA-Z0-9._-]+?)(?P<dir>[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
+ /+ line breaks +/
+ static br_empty_line = ctRegex!(`\n[ ]*\n`, "mg");
+ static br_linebreaks_newlines = ctRegex!(`[\n┘┙]`, "mg");
+ static br_linebreaks = ctRegex!(`[┘┙]`, "mg");
+ static br_line = ctRegex!(`┘`, "mg");
+ static br_line_inline = ctRegex!(`┙`, "mg");
+ static br_line_spaced = ctRegex!(`┚`, "mg");
+ /+ quotation marks +/
+ static quotes_open_and_close = ctRegex!(`[“”]`, "mg");
+ /+ inline markup footnotes endnotes +/
+ static inline_notes_al = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg");
+ static inline_notes_al_special = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented
+ static inline_notes_al_gen = ctRegex!(`【.+?】`, "m");
+ static inline_notes_al_gen_text = ctRegex!(`【(?P<text>.+?)】`, "m");
+ static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*】`, "mg");
+ static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*】`, "mg");
+ static inline_notes_al_special_char_note = ctRegex!(`【(?P<char>(?:[*]|[+])+)\s+(?P<note>.+?)】`, "mg");
+ static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m");
+ static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m");
+ static inline_al_delimiter_open_symbol_plus = ctRegex!(`【[+]\s`, "m");
+ static inline_text_and_note_al_ = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|.+))`, "mg");
+ /+ inline markup links +/
+ static inline_image = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+))\s*(?P<post>.*?┝┤.*?├)`, "mg");
+ static inline_image_without_dimensions = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?┝┤.*?├)`, "mg");
+ static inline_image_info = ctRegex!(`☼?(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+)`, "mg");
+ static inline_link_anchor = ctRegex!(`┃(?P<anchor>\S+?)┃`, "mg"); // TODO *~text_link_anchor
+ static inline_link = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#?(\S+?))├`, "mg");
+ static inline_link_empty = ctRegex!(`┥(?P<text>.+?)┝┤├`, "mg");
+ static inline_link_number = ctRegex!(`┥(?P<text>.+?)┝┤(?P<num>[0-9]+)├`, "mg"); // not used
+ static inline_link_number_only = ctRegex!(`(?P<linked_text>┥.+?┝)┤(?P<num>[0-9]+)├`, "mg");
+ static inline_link_stow_uri = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
+ static inline_link_hash = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#(?P<hash>\S+?))├`, "mg");
+ static inline_link_seg_and_hash = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>(?P<seg>[^/#├]*)#(?P<hash>.+?))├`, "mg");
+ static inline_link_clean = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
+ static inline_link_toc_to_backmatter = ctRegex!(`┤#(?P<link>endnotes|bibliography|bookindex|glossary|blurb)├`, "mg");
+ static url = ctRegex!(`https?://`, "mg");
+ static uri = ctRegex!(`(?:https?|git)://`, "mg");
+ static uri_identify_components = ctRegex!(`(?P<type>(?:https?|git)://)(?P<path>\S+?/)(?P<file>[^/]+)$`, "mg");
+ static inline_link_subtoc = ctRegex!(`^(?P<level>[5-7])~ ┥(?P<text>.+?)┝┤(?P<link>.+?)├`, "mg");
+ static inline_link_fn_suffix = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg");
+ static inline_seg_link = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg");
+ static mark_internal_site_lnk = ctRegex!(`¤`, "mg");
+ static quotation_mark_sql_insert_delimiter = ctRegex!("[']", "mg");
+ /+ inline markup font face mod +/
+ static inline_emphasis = ctRegex!(`⑆[*]┨(?P<text>.+?)┣[*]`, "mg");
+ static inline_bold = ctRegex!(`⑆[!]┨(?P<text>.+?)┣[!]`, "mg");
+ static inline_underscore = ctRegex!(`⑆[_]┨(?P<text>.+?)┣[_]`, "mg");
+ static inline_italics = ctRegex!(`⑆[/]┨(?P<text>.+?)┣[/]`, "mg");
+ static inline_superscript = ctRegex!(`⑆\^┨(?P<text>.+?)┣\^`, "mg");
+ static inline_subscript = ctRegex!(`⑆[,]┨(?P<text>.+?)┣[,]`, "mg");
+ static inline_strike = ctRegex!(`⑆[-]┨(?P<text>.+?)┣[-]`, "mg");
+ static inline_insert = ctRegex!(`⑆[+]┨(?P<text>.+?)┣[+]`, "mg");
+ static inline_mono = ctRegex!(`⑆[■]┨(?P<text>.+?)┣[■]`, "mg");
+ static inline_cite = ctRegex!(`⑆[‖]┨(?P<text>.+?)┣[‖]`, "mg");
+ /+ table delimiters +/
+ static table_delimiter_col = ctRegex!("[ ]*[┊][ ]*", "mg");
+ static table_delimiter_row = ctRegex!("[ ]*\n", "mg");
+ /+ paragraph operators +/
+ static grouped_para_indent_1 = ctRegex!(`^_1[ ]`, "m");
+ static grouped_para_indent_2 = ctRegex!(`^_2[ ]`, "m");
+ static grouped_para_indent_3 = ctRegex!(`^_3[ ]`, "m");
+ static grouped_para_indent_4 = ctRegex!(`^_4[ ]`, "m");
+ static grouped_para_indent_5 = ctRegex!(`^_5[ ]`, "m");
+ static grouped_para_indent_6 = ctRegex!(`^_6[ ]`, "m");
+ static grouped_para_indent_7 = ctRegex!(`^_7[ ]`, "m");
+ static grouped_para_indent_8 = ctRegex!(`^_8[ ]`, "m");
+ static grouped_para_indent_9 = ctRegex!(`^_9[ ]`, "m");
+ static grouped_para_bullet = ctRegex!(`^_[*] `, "m");
+ static grouped_para_bullet_indent_1 = ctRegex!(`^_1[*] `, "m");
+ static grouped_para_bullet_indent_2 = ctRegex!(`^_2[*] `, "m");
+ static grouped_para_bullet_indent_3 = ctRegex!(`^_3[*] `, "m");
+ static grouped_para_bullet_indent_4 = ctRegex!(`^_4[*] `, "m");
+ static grouped_para_bullet_indent_5 = ctRegex!(`^_5[*] `, "m");
+ static grouped_para_bullet_indent_6 = ctRegex!(`^_6[*] `, "m");
+ static grouped_para_bullet_indent_7 = ctRegex!(`^_7[*] `, "m");
+ static grouped_para_bullet_indent_8 = ctRegex!(`^_8[*] `, "m");
+ static grouped_para_bullet_indent_9 = ctRegex!(`^_9[*] `, "m");
+ static grouped_para_bullet_indent = ctRegex!(`^_(?P<indent>[1-9])[*] `, "m");
+ static grouped_para_indent_hang = ctRegex!(`^_(?P<hang>[0-9])_(?P<indent>[0-9])[ ]`, "m");
+ }
+}