diff options
author | Ralph Amissah <ralph.amissah@gmail.com> | 2019-05-22 10:50:33 -0400 |
---|---|---|
committer | Ralph Amissah <ralph.amissah@gmail.com> | 2019-10-17 19:07:20 -0400 |
commit | e973365c4b74be2b2cff9be970ccba5928dbe368 (patch) | |
tree | f5af8c28ba939095b9c1310c5ea7b91816c12ddf /org/default_regex.org | |
parent | 0.7.2 latex (for pdf) (initial stub) (diff) |
0.7.3 start to look at document harvest (initial stub)
Diffstat (limited to 'org/default_regex.org')
-rw-r--r-- | org/default_regex.org | 13 |
1 files changed, 12 insertions, 1 deletions
diff --git a/org/default_regex.org b/org/default_regex.org index 2958027..6d17f0c 100644 --- a/org/default_regex.org +++ b/org/default_regex.org @@ -51,6 +51,7 @@ static template DocReformRgxInit() { #+BEGIN_SRC d /+ misc +/ static true_dollar = ctRegex!(`\$`, "gm"); +static sep = ctRegex!(`␣`, "gm"); static flag_action = ctRegex!(`^(--[a-z][a-z0-9-]+)$`); static flag_action_str = ctRegex!(` (--[a-z][a-z0-9-]+)`); static within_quotes = ctRegex!(`"(.+?)"`, "m"); @@ -106,7 +107,7 @@ static make_simple_substitutions_d = ctRegex!(`(?P<substituti /+ header +/ static variable_doc_title = ctRegex!(`@title`); static variable_doc_author = ctRegex!(`@author|@creator`); -static raw_author_munge = ctRegex!(`(\S.+?),\s+(.+)`,"i"); +static raw_author_munge = ctRegex!(`(?P<last>\S.+?),\s+(?P<first>.+)`,"i"); static toml_header_meta_title = ctRegex!(`^\s*(title\s*=\s*"|\[title\])`, "m"); #+END_SRC @@ -368,6 +369,16 @@ static bi_sub_terms_plus_object_number_offset_split = ctRegex!(`\s*\|\s*`); static bi_term_and_object_numbers_match = ctRegex!(`^(.+?)\+(\d+)`); #+END_SRC +** topic register split (document classify) + +#+name: meta_rgx +#+BEGIN_SRC d +static topic_register_main_terms_split = ctRegex!(`\s*;\s*`); +static topic_register_main_term_plus_rest_split = ctRegex!(`\s*:\s*`); +static topic_register_sub_terms_split = ctRegex!(`\s*\|\s*`); +static topic_register_multiple_sub_terms_split = ctRegex!(`␣([^|␣]+(?:\|[^|␣]+)+)`); +#+END_SRC + ** language codes :language:codes: #+name: meta_rgx |