From e95c49b76f4ac7bf72c383ee43a0567dfcbf1603 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 10 Sep 2018 18:15:02 -0400 Subject: 0.1.0 renamed doc-reform, doc_reform (& rad) - from sdp --- src/doc_reform/output/rgx.d | 106 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 src/doc_reform/output/rgx.d (limited to 'src/doc_reform/output/rgx.d') diff --git a/src/doc_reform/output/rgx.d b/src/doc_reform/output/rgx.d new file mode 100644 index 0000000..0d23f11 --- /dev/null +++ b/src/doc_reform/output/rgx.d @@ -0,0 +1,106 @@ +/++ + regex: regular expressions used in sisu document parser ++/ +module doc_reform.output.rgx; +static template SiSUoutputRgxInit() { + import doc_reform.output.defaults; + static struct Rgx { + static newline = ctRegex!("\n", "mg"); + static strip_br = ctRegex!("^
\n|
\n*$"); + static space = ctRegex!(`[ ]`, "mg"); + static spaces_line_start = ctRegex!(`^(?P[ ]+)`, "mg"); + static spaces_multiple = ctRegex!(`(?P[ ]{2,})`, "mg"); + static two_spaces = ctRegex!(`[ ]{2}`, "mg"); + static nbsp_char = ctRegex!(`░`, "mg"); + static nbsp_chars_line_start = ctRegex!(`^░+`, "mg"); + static nbsp_and_space = ctRegex!(` [ ]`, "mg"); + static nbsp_char_and_space = ctRegex!(`░[ ]`, "mg"); + static special_markup_chars = ctRegex!(`[【】〖〗┥┝┤├¤░┘┙┚┼┿╂┊┏┚┆■]`, "mg"); + static src_pth_sst_or_ssm = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.](?Pss[tm]))$`); + static src_pth_pod_sst_or_ssm = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); + static src_pth_contents = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+)/sisupod[.]manifest$`); + static src_pth_pod_root = ctRegex!(`^(?P(?:[/]?(?:[a-zA-Z0-9._-]+/)*)(sisupod))$`); + static src_pth_zip = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]zip)$`); + static src_pth_unzip_pod = ctRegex!(`^(?Pmedia/text/[a-z]{2}/)*(?P[a-zA-Z0-9._-]+[.]ss[im])$`); + static src_pth_types = + ctRegex!(`^(?P[/]?[a-zA-Z0-9._-]+/)*(?P(?P[a-zA-Z0-9._-]+[.]ss[tm])|(?P[a-zA-Z0-9._-]+/sisupod[.]manifest)|(?P[a-zA-Z0-9._-]+[.]zip))$`); + static pod_content_location = + ctRegex!(`^(?P[a-zA-Z0-9._-]+[.]ss[tm])(?P(?:\s+[a-z]{2}(?:,|$))+)`, "mg"); + static src_fn = + ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P(?P[a-zA-Z0-9._-]+)[.](?Pss[tm]))$`); + static src_fn_master = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ssm)$`); + static src_fn_text = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]sst)$`); + static src_fn_insert = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ssi)$`); + static src_fn_find_inserts = ctRegex!(`^(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[im])$`); + static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[ti])$`); + static src_base_parent_dir_name = ctRegex!(`[/](?P(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure + static src_base_parent_path = ctRegex!(`(?P(?:[/a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure + static src_formalised_file_path_parts = ctRegex!(`(?P(?:[/a-zA-Z0-9._-]+?)(?P[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure + /+ line breaks +/ + static br_line = ctRegex!(`┘`, "mg"); + static br_nl = ctRegex!(`┙`, "mg"); + static br_paragraph = ctRegex!(`┚`, "mg"); + static br_page_line = ctRegex!(`┼`, "mg"); + static br_page = ctRegex!(`┿`, "mg"); + static br_page_new = ctRegex!(`╂`, "mg"); + /+ inline markup footnotes endnotes +/ + static inline_notes_al = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg"); + static inline_notes_al_special = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented + static inline_notes_al_gen = ctRegex!(`【.+?】`, "m"); + static inline_notes_al_gen_text = ctRegex!(`【(?P.+?)】`, "m"); + static inline_notes_al_gen_ref = ctRegex!(`【(?P[*+]\s+)\s*(?P.+?)】`, "mg"); + static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m"); + static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m"); + static inline_al_delimiter_open_symbol_plus = ctRegex!(`【[+]\s`, "m"); + static inline_al_delimiter_close_regular = ctRegex!(`】`, "m"); + static inline_al_delimiter_open_and_close_regular = ctRegex!(`【|】`, "m"); + static inline_notes_delimiter_al_regular = ctRegex!(`【(.+?)】`, "mg"); + static inline_notes_delimiter_al_regular_number_note = ctRegex!(`【(\d+)\s+(.+?)】`, "mg"); + static inline_al_delimiter_open_asterisk = ctRegex!(`【\*`, "m"); + static inline_al_delimiter_open_plus = ctRegex!(`【\+`, "m"); + static inline_text_and_note_al = ctRegex!(`(?P.+?)【(?:[*+ ]*)(?P.+?)】`, "mg"); + static inline_text_and_note_al_ = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|$))`, "mg"); + /+ inline markup footnotes endnotes +/ + static inline_image = ctRegex!(`(?P
┥)☼(?P(?P\S+?\.(?:jpg|gif|png)),w(?P\d+)h(?P\d+))\s*(?P.*?┝┤.+?├)`, "mg");
+    static inline_image_without_dimensions                = ctRegex!(`(?P
┥)☼(?P(?P\S+?\.(?:jpg|gif|png)),w(?P0)h(?P0))\s*(?P.*?┝┤.+?├)`, "mg");
+    static inline_link                                    = ctRegex!(`┥(?P.+?)┝┤(?P.+?)├`, "mg");
+    static inline_link_clean                              = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
+    static inline_a_url                                   = ctRegex!(`(┤)(\S+?)(├)`, "mg");
+    static url                                            = ctRegex!(`https?://`, "mg");
+    static inline_link_subtoc                             = ctRegex!(`^(?P[5-7])~ ┥(?P.+?)┝┤(?P.+?)├`, "mg");
+    static fn_suffix                                      = ctRegex!(`\.fnSuffix`, "mg");
+    static inline_link_fn_suffix                          = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg");
+    static inline_seg_link                                = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg");
+    static mark_internal_site_lnk                         = ctRegex!(`¤`, "mg");
+    static quotation_mark_sql_insert_delimiter            = ctRegex!("[']", "mg");
+    static quotation_mark_various                         = ctRegex!(q"¶['‘’“”"`´¨]¶", "mg");
+    /+ inline markup font face mod +/
+    static inline_faces                                   = ctRegex!(`(?P(?P[*!_^,+#-])\{(?P.+?)\}[*!_^,+#-])`, "mg");
+    static inline_emphasis                                = ctRegex!(`\*\{(?P.+?)\}\*`, "mg");
+    static inline_bold                                    = ctRegex!(`!\{(?P.+?)\}!`, "mg");
+    static inline_underscore                              = ctRegex!(`_\{(?P.+?)\}_`, "mg");
+    static inline_italics                                 = ctRegex!(`/\{(?P.+?)\}/`, "mg");
+    static inline_superscript                             = ctRegex!(`\^\{(?P.+?)\}\^`, "mg");
+    static inline_subscript                               = ctRegex!(`,\{(?P.+?)\},`, "mg");
+    static inline_strike                                  = ctRegex!(`-\{(?P.+?)\}-`, "mg");
+    static inline_insert                                  = ctRegex!(`\+\{(?P.+?)\}\+`, "mg");
+    static inline_mono                                    = ctRegex!(`#\{(?P.+?)\}#`, "mg");
+    static inline_mono_box                                = ctRegex!(`■\{(?P.+?)\}■`, "mg");
+    static inline_cite                                    = ctRegex!(`"\{(?P.+?)\}"`, "mg");
+    static inline_faces_line                              = ctRegex!(`^[*!/_]_ (?P.+?)((?: [\\]{2}|[~]#){0,2}$)`);
+    static inline_emphasis_line                           = ctRegex!(`^\*_ (?P.+?)((?: [\\]{2}|[~]#){0,2}$)`);
+    static inline_bold_line                               = ctRegex!(`^!_ (?P.+?)((?: [\\]{2}|[~]#){0,2}$)`);
+    static inline_italics_line                            = ctRegex!(`^/_ (?P.+?)((?: [\\]{2}|[~]#){0,2}$)`);
+    static inline_underscore_line                         = ctRegex!(`^__ (?P.+?)((?: [\\]{2}|[~]#){0,2}$)`);
+    static inline_fontface_clean                          = ctRegex!(`[*!_/^,+#■"-]\{|\}[*!_/^,+#■"-]`, "mg");
+    static no_header_rgx                                  = ctRegex!(`^=NULL$`);
+    /+ table delimiters +/
+    static table_delimiter_col                           = ctRegex!("[ ]*[┊][ ]*", "mg");
+    static table_delimiter_row                           = ctRegex!("[ ]*\n", "mg");
+    static xhtml_ampersand                            = ctRegex!(`[&]`);      // &
+    static xhtml_quotation                            = ctRegex!(`["]`);      // "
+    static xhtml_less_than                            = ctRegex!(`[<]`);      // <
+    static xhtml_greater_than                         = ctRegex!(`[>]`);      // >
+    static xhtml_line_break                           = ctRegex!(` [\\]{2}`); // 
+ } +} -- cgit v1.2.3