aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/doc_reform/output/rgx.d
blob: 8cb4581e4b833b9fedf7f02d5eab3a47f4f83d38 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/++
  regex: regular expressions used in sisu document parser
+/
module doc_reform.output.rgx;
static template DocReformOutputRgxInit() {
  import doc_reform.output.defaults;
  static struct Rgx {
    static newline                                        = ctRegex!("\n", "mg");
    static strip_br                                       = ctRegex!("^<br>\n|<br>\n*$");
    static space                                          = ctRegex!(`[ ]`, "mg");
    static spaces_keep                                    = ctRegex!(`(?P<keep_spaces>^[ ]+|[ ]{2,})`, "mg"); // code, verse, block
    static spaces_line_start                              = ctRegex!(`^(?P<opening_spaces>[ ]+)`, "mg");
    static spaces_multiple                                = ctRegex!(`(?P<multiple_spaces>[ ]{2,})`, "mg");
    static two_spaces                                     = ctRegex!(`[ ]{2}`, "mg");
    static nbsp_char                                      = ctRegex!(`░`, "mg");
    static nbsp_chars_line_start                          = ctRegex!(`^░+`, "mg");
    static nbsp_and_space                                 = ctRegex!(`&nbsp;[ ]`, "mg");
    static nbsp_char_and_space                            = ctRegex!(`░[ ]`, "mg");
    static special_markup_chars                           = ctRegex!(`[【】〖〗┥┝┤├¤░┘┙┚┼┿╂┊┏┚┆■]`, "mg");
    static src_pth_sst_or_ssm                             = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.](?P<extension>ss[tm]))$`);
    static src_pth_pod_sst_or_ssm                         = ctRegex!(`^(?P<podpath>[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])$`);
    static src_pth_contents                               = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+)/pod[.]manifest$`);
    static src_pth_pod_root                               = ctRegex!(`^(?P<podroot>(?:[/]?(?:[a-zA-Z0-9._-]+/)*)(pod))$`);
    static src_pth_zip                                    = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]zip)$`);
    static src_pth_unzip_pod                              = ctRegex!(`^(?P<path>media/text/[a-z]{2}/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`);
    static src_pth_types                                  =
      ctRegex!(`^(?P<path>[/]?[a-zA-Z0-9._-]+/)*(?P<gotfile>(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])|(?P<filelist>[a-zA-Z0-9._-]+/pod[.]manifest)|(?P<filezip>[a-zA-Z0-9._-]+[.]zip))$`);
    static pod_content_location                           =
      ctRegex!(`^(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])(?P<languages>(?:\s+[a-z]{2}(?:,|$))+)`, "mg");
    static src_fn                                         =
      ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`);
    static src_fn_master                                  = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssm)$`);
    static src_fn_text                                    = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]sst)$`);
    static src_fn_insert                                  = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssi)$`);
    static src_fn_find_inserts                            = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`);
    static insert_src_fn_ssi_or_sst                       = ctRegex!(`^<<\s*(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[ti])$`);
    static src_base_parent_dir_name                       = ctRegex!(`[/](?P<dir>(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
    static src_base_parent_path                           = ctRegex!(`(?P<dir>(?:[/a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
    static src_formalised_file_path_parts                 = ctRegex!(`(?P<pth>(?:[/a-zA-Z0-9._-]+?)(?P<dir>[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
    /+ line breaks +/
    static empty_line                                     = ctRegex!(`^\s*$`);
    static empty_block                                    = ctRegex!(`^\s*$`, "mg");
    static br_line_natural                                = ctRegex!(`\n`, "mg");
    static br_empty_line                                  = ctRegex!(`\n[ ]*\n`, "mg");
    static br_newlines_linebreaks                         = ctRegex!(`[\n┘┙]`, "mg");
    static br_line                                        = ctRegex!(`┘`, "mg");
    static br_nl                                          = ctRegex!(`┙`, "mg");
    static br_paragraph                                   = ctRegex!(`┚`, "mg");
    static br_page_line                                   = ctRegex!(`┼`, "mg");
    static br_page                                        = ctRegex!(`┿`, "mg");
    static br_page_new                                    = ctRegex!(`╂`, "mg");
    /+ inline markup footnotes endnotes +/
    static inline_notes_al                                = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg");
    static inline_notes_al_special                        = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented
    static inline_notes_al_gen                            = ctRegex!(`【.+?】`, "m");
    static inline_notes_al_regular                        = ctRegex!(`【(.+?)】`, "mg");
    static inline_notes_al_gen_text                       = ctRegex!(`【(?P<text>.+?)】`, "m");
    static inline_notes_al_gen_ref                        = ctRegex!(`【(?P<ref>[*+]\s+)\s*(?P<text>.+?)】`, "mg");
    static inline_notes_al_regular_number_note            = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*】`, "mg");
    static inline_al_delimiter_open_regular               = ctRegex!(`【\s`, "m");
    static inline_al_delimiter_open_symbol_star           = ctRegex!(`【[*]\s`, "m");
    static inline_al_delimiter_open_symbol_plus           = ctRegex!(`【[+]\s`, "m");
    static inline_al_delimiter_close_regular              = ctRegex!(`】`, "m");
    static inline_al_delimiter_open_and_close_regular     = ctRegex!(`【|】`, "m");
    static inline_al_delimiter_open_asterisk              = ctRegex!(`【\*`, "m");
    static inline_al_delimiter_open_plus                  = ctRegex!(`【\+`, "m");
    static inline_text_and_note_al                        = ctRegex!(`(?P<text>.+?)【(?:[*+ ]*)(?P<note>.+?)】`, "mg");
    static inline_text_and_note_al_                       = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|$))`, "mg");
    /+ inline markup links +/
    static inline_image                                   = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+))\s*(?P<post>.*?┝┤.*?├)`, "mg");
    static inline_image_without_dimensions                = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?┝┤.*?├)`, "mg");
    static inline_image_info                              = ctRegex!(`☼?(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+)`, "mg");
    static inline_link_anchor                             = ctRegex!(`┋(?P<anchor>\S+?)┋`, "mg"); // TODO *~text_link_anchor
    static inline_link_                                   = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>.+?)├`, "mg");
    static inline_link                                    = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>\S+?)├`, "mg");
    static inline_link_empty                              = ctRegex!(`┥(?P<text>.+?)┝┤├`, "mg");
    static inline_link_number                             = ctRegex!(`┥(?P<text>.+?)┝┤(?P<num>[0-9]+)├`, "mg"); // not used
    static inline_link_number_only                        = ctRegex!(`(┥.+?┝)┤(?P<num>[0-9]+)├`, "mg");
    static inline_link_stow_uri                           = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
    static inline_link_hash                               = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#(?P<segname>\S+?))├`, "mg");
    static inline_link_clean                              = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
    static inline_a_url                                   = ctRegex!(`(┤)([^\s┥┝┤├]+)(├)`, "mg");
    static url                                            = ctRegex!(`https?://`, "mg");
    static uri                                            = ctRegex!(`(?:https?|git)://`, "mg");
    static inline_link_subtoc                             = ctRegex!(`^(?P<level>[5-7])~ ┥(?P<text>.+?)┝┤(?P<link>.+?)├`, "mg");
    static fn_suffix                                      = ctRegex!(`\.fnSuffix`, "mg");
    static inline_link_fn_suffix                          = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg");
    static inline_seg_link                                = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg");
    static mark_internal_site_lnk                         = ctRegex!(`¤`, "mg");
    static quotation_mark_sql_insert_delimiter            = ctRegex!("[']", "mg");
    static quotation_mark_various                         = ctRegex!(q"┋['‘’“”"`´¨]┋", "mg");
    /+ inline markup font face mod +/
    static inline_mark_faces                            = ctRegex!(`(?P<markup>(?P<mod>[*!/_^,+#"-])\{(?P<text>.+?)\}[*!/_^,+#"-])`, "mg");
    static inline_mark_faces_to_mod                     = ctRegex!(`(?P<mod>[*!/_^,+#"-])\{(?P<text>.+?)\}([*!/_^,+#"-])`, "mg");
    static inline_mark_emphasis                         = ctRegex!(`([*])\{(?P<text>.+?)\}[*]`, "mg");
    static inline_mark_bold                             = ctRegex!(`([!])\{(?P<text>.+?)\}[!]`, "mg");
    static inline_mark_underscore                       = ctRegex!(`([_])\{(?P<text>.+?)\}[_]`, "mg");
    static inline_mark_italics                          = ctRegex!(`([/])\{(?P<text>.+?)\}[/]`, "mg");
    static inline_mark_superscript                      = ctRegex!(`(\^)\{(?P<text>.+?)\}\^`, "mg");
    static inline_mark_subscript                        = ctRegex!(`([,])\{(?P<text>.+?)\}[,]`, "mg");
    static inline_mark_strike                           = ctRegex!(`([-])\{(?P<text>.+?)\}[-]`, "mg");
    static inline_mark_insert                           = ctRegex!(`([+])\{(?P<text>.+?)\}[+]`, "mg");
    static inline_mark_mono                             = ctRegex!(`([#])\{(?P<text>.+?)\}[#]`, "mg");
    static inline_mark_cite                             = ctRegex!(`(["])\{(?P<text>.+?)\}["]`, "mg");
    static inline_mark_fontface_clean                   = ctRegex!(`[*!_/^,+#■"-]\{|\}[*!_/^,+#■"-]`, "mg");
    static inline_faces_line                              = ctRegex!(`^[*!/_]_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);
    static inline_emphasis_line                           = ctRegex!(`^\*_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
    static inline_bold_line                               = ctRegex!(`^!_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
    static inline_italics_line                            = ctRegex!(`^/_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
    static inline_underscore_line                         = ctRegex!(`^__ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
    static no_header_rgx                                  = ctRegex!(`^=NULL$`);
    /+ inline markup font face mod +/
    static inline_faces                                   = ctRegex!(`(?P<markup>(?P<mod>[*!_^,+■‖-])┨(?P<text>.+?)┣[*!_^,+■‖-])`, "mg");
    static inline_emphasis                                = ctRegex!(`[*]┨(?P<text>.+?)┣[*]`, "mg");
    static inline_bold                                    = ctRegex!(`[!]┨(?P<text>.+?)┣[!]`, "mg");
    static inline_underscore                              = ctRegex!(`[_]┨(?P<text>.+?)┣[_]`, "mg");
    static inline_italics                                 = ctRegex!(`[/]┨(?P<text>.+?)┣[/]`, "mg");
    static inline_superscript                             = ctRegex!(`\^┨(?P<text>.+?)┣\^`, "mg");
    // static inline_superscript                             = ctRegex!(`[\^]┨(?P<text>.+?)┣[\^]`, "mg");
    static inline_subscript                               = ctRegex!(`[,]┨(?P<text>.+?)┣[,]`, "mg");
    static inline_strike                                  = ctRegex!(`[-]┨(?P<text>.+?)┣[-]`, "mg");
    static inline_insert                                  = ctRegex!(`[+]┨(?P<text>.+?)┣[+]`, "mg");
    static inline_mono                                    = ctRegex!(`[■]┨(?P<text>.+?)┣[■]`, "mg");
    static inline_cite                                    = ctRegex!(`[‖]┨(?P<text>.+?)┣[‖]`, "mg");
    static inline_fontface_clean                          = ctRegex!(`[*!_/^,+■‖-]┨|┣[*!_/^,+■‖-]`, "mg");
    /+ table delimiters +/
    static table_delimiter_col                           = ctRegex!("[ ]*[┊][ ]*", "mg");
    static table_delimiter_row                           = ctRegex!("[ ]*\n", "mg");
    static xhtml_ampersand                            = ctRegex!(`[&]`, "m");      // &amp;
    static xhtml_quotation                            = ctRegex!(`["]`, "m");      // &quot;
    static xhtml_less_than                            = ctRegex!(`[<]`, "m");      // &lt;
    static xhtml_greater_than                         = ctRegex!(`[>]`, "m");      // &gt;
    static xhtml_line_break                           = ctRegex!(` [\\]{2}`, "m"); // <br />
  }
}