aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/doc_reform/output/rgx.d
blob: bf1d766fd78379f3177c9693ddb153653f917e54 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/++
  regex: regular expressions used in sisu document parser
+/
module doc_reform.output.rgx;
static template DocReformOutputRgxInit() {
  import doc_reform.output.defaults;
  static struct Rgx {
    static newline                                        = ctRegex!("\n", "mg");
    static strip_br                                       = ctRegex!("^<br>\n|<br>\n*$");
    static space                                          = ctRegex!(`[ ]`, "mg");
    static spaces_keep                                    = ctRegex!(`(?P<keep_spaces>^[ ]+|[ ]{2,})`, "mg"); // code, verse, block
    static spaces_line_start                              = ctRegex!(`^(?P<opening_spaces>[ ]+)`, "mg");
    static spaces_multiple                                = ctRegex!(`(?P<multiple_spaces>[ ]{2,})`, "mg");
    static two_spaces                                     = ctRegex!(`[ ]{2}`, "mg");
    static nbsp_char                                      = ctRegex!(`░`, "mg");
    static nbsp_chars_line_start                          = ctRegex!(`^░+`, "mg");
    static nbsp_and_space                                 = ctRegex!(`&nbsp;[ ]`, "mg");
    static nbsp_char_and_space                            = ctRegex!(`░[ ]`, "mg");
    static special_markup_chars                           = ctRegex!(`[【】〖〗┥┝┤├¤░┘┙┚┼┿╂┊┏┚┆■]`, "mg");
    static src_pth_sst_or_ssm                             = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.](?P<extension>ss[tm]))$`);
    static src_pth_pod_sst_or_ssm                         = ctRegex!(`^(?P<podpath>[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])$`);
    static src_pth_contents                               = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+)/pod[.]manifest$`);
    static src_pth_pod_root                               = ctRegex!(`^(?P<podroot>(?:[/]?(?:[a-zA-Z0-9._-]+/)*)(pod))$`);
    static src_pth_zip                                    = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]zip)$`);
    static src_pth_unzip_pod                              = ctRegex!(`^(?P<path>media/text/[a-z]{2}/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`);
    static src_pth_types                                  =
      ctRegex!(`^(?P<path>[/]?[a-zA-Z0-9._-]+/)*(?P<gotfile>(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])|(?P<filelist>[a-zA-Z0-9._-]+/pod[.]manifest)|(?P<filezip>[a-zA-Z0-9._-]+[.]zip))$`);
    static pod_content_location                           =
      ctRegex!(`^(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])(?P<languages>(?:\s+[a-z]{2}(?:,|$))+)`, "mg");
    static src_fn                                         =
      ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`);
    static src_fn_master                                  = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssm)$`);
    static src_fn_text                                    = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]sst)$`);
    static src_fn_insert                                  = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssi)$`);
    static src_fn_find_inserts                            = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`);
    static insert_src_fn_ssi_or_sst                       = ctRegex!(`^<<\s*(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[ti])$`);
    static src_base_parent_dir_name                       = ctRegex!(`[/](?P<dir>(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
    static src_base_parent_path                           = ctRegex!(`(?P<dir>(?:[/a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
    static src_formalised_file_path_parts                 = ctRegex!(`(?P<pth>(?:[/a-zA-Z0-9._-]+?)(?P<dir>[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
    /+ line breaks +/
    static br_line                                        = ctRegex!(`┘`, "mg");
    static br_nl                                          = ctRegex!(`┙`, "mg");
    static br_paragraph                                   = ctRegex!(`┚`, "mg");
    static br_page_line                                   = ctRegex!(`┼`, "mg");
    static br_page                                        = ctRegex!(`┿`, "mg");
    static br_page_new                                    = ctRegex!(`╂`, "mg");
    /+ inline markup footnotes endnotes +/
    static inline_notes_al                                = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg");
    static inline_notes_al_special                        = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented
    static inline_notes_al_gen                            = ctRegex!(`【.+?】`, "m");
    static inline_notes_al_gen_text                       = ctRegex!(`【(?P<text>.+?)】`, "m");
    static inline_notes_al_gen_ref                        = ctRegex!(`【(?P<ref>[*+]\s+)\s*(?P<text>.+?)】`, "mg");
    static inline_al_delimiter_open_regular               = ctRegex!(`【\s`, "m");
    static inline_al_delimiter_open_symbol_star           = ctRegex!(`【[*]\s`, "m");
    static inline_al_delimiter_open_symbol_plus           = ctRegex!(`【[+]\s`, "m");
    static inline_al_delimiter_close_regular              = ctRegex!(`】`, "m");
    static inline_al_delimiter_open_and_close_regular     = ctRegex!(`【|】`, "m");
    static inline_notes_delimiter_al_regular              = ctRegex!(`【(.+?)】`, "mg");
    static inline_notes_delimiter_al_regular_number_note  = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)】`, "mg");
    static inline_al_delimiter_open_asterisk              = ctRegex!(`【\*`, "m");
    static inline_al_delimiter_open_plus                  = ctRegex!(`【\+`, "m");
    static inline_text_and_note_al                        = ctRegex!(`(?P<text>.+?)【(?:[*+ ]*)(?P<note>.+?)】`, "mg");
    static inline_text_and_note_al_                       = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|$))`, "mg");
    /+ inline markup footnotes endnotes +/
    static inline_image                                   = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>\S+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+))\s*(?P<post>.*?┝┤.*?├)`, "mg");
    static inline_image_without_dimensions                = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>\S+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?┝┤.*?├)`, "mg");
    static inline_link_anchor                             = ctRegex!(`┋(?P<anchor>\S+?)┋`, "mg"); // TODO *~text_link_anchor
    static inline_link_                                   = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>.+?)├`, "mg");
    static inline_link                                    = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>\S+?)├`, "mg");
    static inline_link_empty                              = ctRegex!(`┥(?P<text>.+?)┝┤├`, "mg");
    static inline_link_number                             = ctRegex!(`┥(?P<text>.+?)┝┤(?P<num>[0-9]+)├`, "mg"); // not used
    static inline_link_number_only                        = ctRegex!(`(┥.+?┝)┤(?P<num>[0-9]+)├`, "mg");
    static inline_link_stow_uri                           = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
    static inline_link_hash                               = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#(?P<segname>\S+?))├`, "mg");
    static inline_link_clean                              = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
    static inline_a_url                                   = ctRegex!(`(┤)([^\s┥┝┤├]+)(├)`, "mg");
    static url                                            = ctRegex!(`https?://`, "mg");
    static inline_link_subtoc                             = ctRegex!(`^(?P<level>[5-7])~ ┥(?P<text>.+?)┝┤(?P<link>.+?)├`, "mg");
    static fn_suffix                                      = ctRegex!(`\.fnSuffix`, "mg");
    static inline_link_fn_suffix                          = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg");
    static inline_seg_link                                = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg");
    static mark_internal_site_lnk                         = ctRegex!(`¤`, "mg");
    static quotation_mark_sql_insert_delimiter            = ctRegex!("[']", "mg");
    static quotation_mark_various                         = ctRegex!(q"┋['‘’“”"`´¨]┋", "mg");
    /+ inline markup font face mod +/
    static inline_mark_faces                            = ctRegex!(`(?P<markup>(?P<mod>[*!/_^,+#"-])\{(?P<text>.+?)\}[*!/_^,+#"-])`, "mg");
    static inline_mark_faces_to_mod                     = ctRegex!(`(?P<mod>[*!/_^,+#"-])\{(?P<text>.+?)\}([*!/_^,+#"-])`, "mg");
    static inline_mark_emphasis                         = ctRegex!(`([*])\{(?P<text>.+?)\}[*]`, "mg");
    static inline_mark_bold                             = ctRegex!(`([!])\{(?P<text>.+?)\}[!]`, "mg");
    static inline_mark_underscore                       = ctRegex!(`([_])\{(?P<text>.+?)\}[_]`, "mg");
    static inline_mark_italics                          = ctRegex!(`([/])\{(?P<text>.+?)\}[/]`, "mg");
    static inline_mark_superscript                      = ctRegex!(`(\^)\{(?P<text>.+?)\}\^`, "mg");
    static inline_mark_subscript                        = ctRegex!(`([,])\{(?P<text>.+?)\}[,]`, "mg");
    static inline_mark_strike                           = ctRegex!(`([-])\{(?P<text>.+?)\}[-]`, "mg");
    static inline_mark_insert                           = ctRegex!(`([+])\{(?P<text>.+?)\}[+]`, "mg");
    static inline_mark_mono                             = ctRegex!(`([#])\{(?P<text>.+?)\}[#]`, "mg");
    static inline_mark_cite                             = ctRegex!(`(["])\{(?P<text>.+?)\}["]`, "mg");
    static inline_mark_fontface_clean                   = ctRegex!(`[*!_/^,+#■"-]\{|\}[*!_/^,+#■"-]`, "mg");
    static inline_faces_line                              = ctRegex!(`^[*!/_]_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);
    static inline_emphasis_line                           = ctRegex!(`^\*_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
    static inline_bold_line                               = ctRegex!(`^!_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
    static inline_italics_line                            = ctRegex!(`^/_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
    static inline_underscore_line                         = ctRegex!(`^__ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
    static no_header_rgx                                  = ctRegex!(`^=NULL$`);
    /+ inline markup font face mod +/
    static inline_faces                                   = ctRegex!(`(?P<markup>(?P<mod>[*!_^,+■‖-])┨(?P<text>.+?)┣[*!_^,+■‖-])`, "mg");
    static inline_emphasis                                = ctRegex!(`[*]┨(?P<text>.+?)┣[*]`, "mg");
    static inline_bold                                    = ctRegex!(`[!]┨(?P<text>.+?)┣[!]`, "mg");
    static inline_underscore                              = ctRegex!(`[_]┨(?P<text>.+?)┣[_]`, "mg");
    static inline_italics                                 = ctRegex!(`[/]┨(?P<text>.+?)┣[/]`, "mg");
    static inline_superscript                             = ctRegex!(`\^┨(?P<text>.+?)┣\^`, "mg");
    // static inline_superscript                             = ctRegex!(`[\^]┨(?P<text>.+?)┣[\^]`, "mg");
    static inline_subscript                               = ctRegex!(`[,]┨(?P<text>.+?)┣[,]`, "mg");
    static inline_strike                                  = ctRegex!(`[-]┨(?P<text>.+?)┣[-]`, "mg");
    static inline_insert                                  = ctRegex!(`[+]┨(?P<text>.+?)┣[+]`, "mg");
    static inline_mono                                    = ctRegex!(`[■]┨(?P<text>.+?)┣[■]`, "mg");
    static inline_cite                                    = ctRegex!(`[‖]┨(?P<text>.+?)┣[‖]`, "mg");
    static inline_fontface_clean                          = ctRegex!(`[*!_/^,+■‖-]┨|┣[*!_/^,+■‖-]`, "mg");
    /+ table delimiters +/
    static table_delimiter_col                           = ctRegex!("[ ]*[┊][ ]*", "mg");
    static table_delimiter_row                           = ctRegex!("[ ]*\n", "mg");
    static xhtml_ampersand                            = ctRegex!(`[&]`, "m");      // &amp;
    static xhtml_quotation                            = ctRegex!(`["]`, "m");      // &quot;
    static xhtml_less_than                            = ctRegex!(`[<]`, "m");      // &lt;
    static xhtml_greater_than                         = ctRegex!(`[>]`, "m");      // &gt;
    static xhtml_line_break                           = ctRegex!(` [\\]{2}`, "m"); // <br />
  }
}