aboutsummaryrefslogtreecommitdiffhomepage
path: root/org/default_regex.org
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2019-05-22 10:50:33 -0400
committerRalph Amissah <ralph.amissah@gmail.com>2019-10-17 19:07:20 -0400
commite973365c4b74be2b2cff9be970ccba5928dbe368 (patch)
treef5af8c28ba939095b9c1310c5ea7b91816c12ddf /org/default_regex.org
parent0.7.2 latex (for pdf) (initial stub) (diff)
0.7.3 start to look at document harvest (initial stub)
Diffstat (limited to 'org/default_regex.org')
-rw-r--r--org/default_regex.org13
1 files changed, 12 insertions, 1 deletions
diff --git a/org/default_regex.org b/org/default_regex.org
index 2958027..6d17f0c 100644
--- a/org/default_regex.org
+++ b/org/default_regex.org
@@ -51,6 +51,7 @@ static template DocReformRgxInit() {
#+BEGIN_SRC d
/+ misc +/
static true_dollar = ctRegex!(`\$`, "gm");
+static sep = ctRegex!(`␣`, "gm");
static flag_action = ctRegex!(`^(--[a-z][a-z0-9-]+)$`);
static flag_action_str = ctRegex!(` (--[a-z][a-z0-9-]+)`);
static within_quotes = ctRegex!(`"(.+?)"`, "m");
@@ -106,7 +107,7 @@ static make_simple_substitutions_d = ctRegex!(`(?P<substituti
/+ header +/
static variable_doc_title = ctRegex!(`@title`);
static variable_doc_author = ctRegex!(`@author|@creator`);
-static raw_author_munge = ctRegex!(`(\S.+?),\s+(.+)`,"i");
+static raw_author_munge = ctRegex!(`(?P<last>\S.+?),\s+(?P<first>.+)`,"i");
static toml_header_meta_title = ctRegex!(`^\s*(title\s*=\s*"|\[title\])`, "m");
#+END_SRC
@@ -368,6 +369,16 @@ static bi_sub_terms_plus_object_number_offset_split = ctRegex!(`\s*\|\s*`);
static bi_term_and_object_numbers_match = ctRegex!(`^(.+?)\+(\d+)`);
#+END_SRC
+** topic register split (document classify)
+
+#+name: meta_rgx
+#+BEGIN_SRC d
+static topic_register_main_terms_split = ctRegex!(`\s*;\s*`);
+static topic_register_main_term_plus_rest_split = ctRegex!(`\s*:\s*`);
+static topic_register_sub_terms_split = ctRegex!(`\s*\|\s*`);
+static topic_register_multiple_sub_terms_split = ctRegex!(`␣([^|␣]+(?:\|[^|␣]+)+)`);
+#+END_SRC
+
** language codes :language:codes:
#+name: meta_rgx