aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/doc_reform/meta
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2019-05-22 10:50:33 -0400
committerRalph Amissah <ralph.amissah@gmail.com>2019-10-17 19:07:20 -0400
commite973365c4b74be2b2cff9be970ccba5928dbe368 (patch)
treef5af8c28ba939095b9c1310c5ea7b91816c12ddf /src/doc_reform/meta
parent0.7.2 latex (for pdf) (initial stub) (diff)
0.7.3 start to look at document harvest (initial stub)
Diffstat (limited to 'src/doc_reform/meta')
-rw-r--r--src/doc_reform/meta/conf_make_meta_json.d215
-rw-r--r--src/doc_reform/meta/conf_make_meta_structs.d3
-rw-r--r--src/doc_reform/meta/metadoc.d1
-rw-r--r--src/doc_reform/meta/metadoc_harvest.d30
-rw-r--r--src/doc_reform/meta/metadoc_summary.d4
-rw-r--r--src/doc_reform/meta/metadochead.d84
-rw-r--r--src/doc_reform/meta/rgx.d7
7 files changed, 170 insertions, 174 deletions
diff --git a/src/doc_reform/meta/conf_make_meta_json.d b/src/doc_reform/meta/conf_make_meta_json.d
index fcd52c1..5fd4499 100644
--- a/src/doc_reform/meta/conf_make_meta_json.d
+++ b/src/doc_reform/meta/conf_make_meta_json.d
@@ -5,6 +5,8 @@
module doc_reform.meta.conf_make_meta_json;
static template contentJSONtoDocReformStruct() {
import
+ std.algorithm,
+ std.array,
std.exception,
std.regex,
std.stdio,
@@ -16,6 +18,7 @@ static template contentJSONtoDocReformStruct() {
import
doc_reform.meta.conf_make_meta_structs,
doc_reform.meta.conf_make_meta_json,
+ doc_reform.meta.defaults,
doc_reform.meta.rgx;
ConfCompositePlus _struct_composite;
auto contentJSONtoDocReformStruct(C,J)(C _struct_composite, J _json, string _identifier) {
@@ -361,6 +364,108 @@ static template contentJSONtoDocReformStruct() {
}
}
/+ meta ------------------------------------------------------------------- +/
+ if (_struct_composite.meta.creator_author.empty) {
+ if ("creator" in _json.object) {
+ if ("author" in _json.object["creator"]
+ && (_json.object["creator"]["author"].type().to!string == "string")
+ ) {
+ _struct_composite.meta.creator_author = _json.object["creator"]["author"].str;
+ }
+ if ("email" in _json.object["creator"]
+ && (_json.object["creator"]["email"].type().to!string == "string")
+ ) {
+ _struct_composite.meta.creator_author_email = _json.object["creator"]["email"].str;
+ }
+ if ("illustrator" in _json.object["creator"]
+ && (_json.object["creator"]["illustrator"].type().to!string == "string")
+ ) {
+ _struct_composite.meta.creator_illustrator = _json.object["creator"]["illustrator"].str;
+ }
+ if ("translator" in _json.object["creator"]
+ && (_json.object["creator"]["translator"].type().to!string == "string")
+ ) {
+ _struct_composite.meta.creator_translator = _json.object["creator"]["translator"].str;
+ }
+ }
+ string[] authors_arr;
+ string[][string] authors_hash_arr = [ "first" : [], "last" : [], "full" : [], "last_first" : [], "as_input" : [] ];
+ string[] authors_raw_arr
+ = _struct_composite.meta.creator_author.split(rgx.arr_delimiter);
+ auto _lastname = appender!(char[])();
+ foreach (author_raw; authors_raw_arr) {
+ authors_arr ~= author_raw.replace(rgx.raw_author_munge, "$2 $1");
+ authors_hash_arr["first"] ~= author_raw.replace(rgx.raw_author_munge, "$2");
+ authors_hash_arr["last"] ~= author_raw.replace(rgx.raw_author_munge, "$1");
+ authors_hash_arr["full"] ~= author_raw.replace(rgx.raw_author_munge, "$2 $1");
+ authors_hash_arr["as_input"] ~= author_raw;
+ if (auto m = author_raw.match(rgx.raw_author_munge)) {
+ (m.captures[1]).map!toUpper.copy(_lastname);
+ authors_hash_arr["last_first"] ~= _lastname.data.to!string ~ ", " ~ m.captures[2];
+ _lastname = appender!(char[])();
+ }
+ }
+ _struct_composite.meta.creator_author = authors_arr.join(", ").chomp.chomp;
+ string _author_name_last_first = authors_hash_arr["last_first"].join("; ").chomp.chomp;
+ _struct_composite.meta.creator_author_surname_fn = (_author_name_last_first.length > 0)
+ ? _author_name_last_first
+ : authors_hash_arr["as_input"].join("; ").chomp.chomp;
+ }
+ if (_struct_composite.meta.title_main.empty) {
+ if ("title" in _json.object) {
+ if ((_json.object["title"].type().to!string) == "string") {
+ _struct_composite.meta.title_main = _json.object["title"].str;
+ } else {
+ if ("edition" in _json.object["title"]
+ && (_json.object["title"]["edition"].type().to!string == "string")
+ ) {
+ _struct_composite.meta.title_edition = _json.object["title"]["edition"].str;
+ }
+ if ("full" in _json.object["title"]
+ && (_json.object["title"]["full"].type().to!string == "string")
+ ) {}
+ if ("language" in _json.object["title"]
+ && (_json.object["title"]["language"].type().to!string == "string")
+ ) {
+ _struct_composite.meta.title_language = _json.object["title"]["language"].str;
+ }
+ if ("main" in _json.object["title"]
+ && (_json.object["title"]["main"].type().to!string == "string")
+ ) {
+ _struct_composite.meta.title_main = _json.object["title"]["main"].str;
+ } else if ("title" in _json.object["title"]
+ && (_json.object["title"]["title"].type().to!string == "string")
+ ) {
+ _struct_composite.meta.title_main = _json.object["title"]["title"].str;
+ }
+ if ("note" in _json.object["title"]
+ && (_json.object["title"]["note"].type().to!string == "string")
+ ) {
+ _struct_composite.meta.title_note = _json.object["title"]["note"].str;
+ }
+ if ("sub" in _json.object["title"]
+ && (_json.object["title"]["sub"].type().to!string == "string")
+ ) {
+ _struct_composite.meta.title_sub = _json.object["title"]["sub"].str;
+ }
+ if ("subtitle" in _json.object["title"]
+ && (_json.object["title"]["subtitle"].type().to!string == "string")
+ ) {
+ _struct_composite.meta.title_subtitle = _json.object["title"]["subtitle"].str;
+ }
+ }
+ }
+ if ((!(_struct_composite.meta.title_subtitle.empty))
+ && (_struct_composite.meta.title_sub.empty)) {
+ _struct_composite.meta.title_sub = _struct_composite.meta.title_subtitle;
+ }
+ _struct_composite.meta.title_full = (_struct_composite.meta.title_sub.empty)
+ ? _struct_composite.meta.title_main
+ : format(
+ "%s - %s",
+ _struct_composite.meta.title_main,
+ _struct_composite.meta.title_sub,
+ );
+ }
if ("classify" in _json.object) {
if ("dewey" in _json.object["classify"]
&& (_json.object["classify"]["dewey"].type().to!string == "string")
@@ -386,6 +491,22 @@ static template contentJSONtoDocReformStruct() {
&& (_json.object["classify"]["topic_register"].type().to!string == "string")
) {
_struct_composite.meta.classify_topic_register = _json.object["classify"]["topic_register"].str;
+ string[] main_topics_ = _struct_composite.meta.classify_topic_register.split(rgx.topic_register_main_terms_split);
+ string[] topics;
+ string topics_tmp;
+ string[] multiple_sub_terms;
+ foreach (mt; main_topics_) {
+ topics_tmp = mt.replaceAll(rgx.topic_register_main_term_plus_rest_split, mkup.sep);
+ if (auto m = topics_tmp.match(rgx.topic_register_multiple_sub_terms_split)) {
+ multiple_sub_terms = m.captures[1].split(rgx.topic_register_sub_terms_split);
+ foreach (subterm; multiple_sub_terms) {
+ topics ~= m.captures.pre ~ mkup.sep ~ subterm;
+ }
+ } else {
+ topics ~= topics_tmp;
+ }
+ }
+ _struct_composite.meta.classify_topic_register_arr = topics;
}
}
if ("date" in _json.object) {
@@ -424,6 +545,13 @@ static template contentJSONtoDocReformStruct() {
) {
_struct_composite.meta.date_valid = _json.object["date"]["valid"].str;
}
+ _struct_composite.meta.author_date_title = format(
+ "%s %s \"%s\"",
+ _struct_composite.meta.creator_author_surname_fn,
+ (_struct_composite.meta.date_published.length > 0)
+ ? "(" ~ _struct_composite.meta.date_published ~ ")" : "",
+ _struct_composite.meta.title_full,
+ );
}
if ("links" in _json.object) {}
if ("notes" in _json.object) {
@@ -508,93 +636,6 @@ static template contentJSONtoDocReformStruct() {
_struct_composite.meta.rights_license = _json.object["rights"]["license"].str;
}
}
- if (_struct_composite.meta.creator_author.empty) {
- if ("creator" in _json.object) {
- if ("author" in _json.object["creator"]
- && (_json.object["creator"]["author"].type().to!string == "string")
- ) {
- _struct_composite.meta.creator_author = _json.object["creator"]["author"].str;
- }
- if ("email" in _json.object["creator"]
- && (_json.object["creator"]["email"].type().to!string == "string")
- ) {
- _struct_composite.meta.creator_author_email = _json.object["creator"]["email"].str;
- }
- if ("illustrator" in _json.object["creator"]
- && (_json.object["creator"]["illustrator"].type().to!string == "string")
- ) {
- _struct_composite.meta.creator_illustrator = _json.object["creator"]["illustrator"].str;
- }
- if ("translator" in _json.object["creator"]
- && (_json.object["creator"]["translator"].type().to!string == "string")
- ) {
- _struct_composite.meta.creator_translator = _json.object["creator"]["translator"].str;
- }
- }
- string[] authors_arr;
- string[] authors_raw_arr
- = _struct_composite.meta.creator_author.split(rgx.arr_delimiter);
- foreach (author_raw; authors_raw_arr) {
- authors_arr ~= author_raw.replace(rgx.raw_author_munge, "$2 $1");
- }
- _struct_composite.meta.creator_author = join(authors_arr, ", ").chomp.chomp;
- }
- if (_struct_composite.meta.title_main.empty) {
- if ("title" in _json.object) {
- if ((_json.object["title"].type().to!string) == "string") {
- _struct_composite.meta.title_main = _json.object["title"].str;
- } else {
- if ("edition" in _json.object["title"]
- && (_json.object["title"]["edition"].type().to!string == "string")
- ) {
- _struct_composite.meta.title_edition = _json.object["title"]["edition"].str;
- }
- if ("full" in _json.object["title"]
- && (_json.object["title"]["full"].type().to!string == "string")
- ) {}
- if ("language" in _json.object["title"]
- && (_json.object["title"]["language"].type().to!string == "string")
- ) {
- _struct_composite.meta.title_language = _json.object["title"]["language"].str;
- }
- if ("main" in _json.object["title"]
- && (_json.object["title"]["main"].type().to!string == "string")
- ) {
- _struct_composite.meta.title_main = _json.object["title"]["main"].str;
- } else if ("title" in _json.object["title"]
- && (_json.object["title"]["title"].type().to!string == "string")
- ) {
- _struct_composite.meta.title_main = _json.object["title"]["title"].str;
- }
- if ("note" in _json.object["title"]
- && (_json.object["title"]["note"].type().to!string == "string")
- ) {
- _struct_composite.meta.title_note = _json.object["title"]["note"].str;
- }
- if ("sub" in _json.object["title"]
- && (_json.object["title"]["sub"].type().to!string == "string")
- ) {
- _struct_composite.meta.title_sub = _json.object["title"]["sub"].str;
- }
- if ("subtitle" in _json.object["title"]
- && (_json.object["title"]["subtitle"].type().to!string == "string")
- ) {
- _struct_composite.meta.title_subtitle = _json.object["title"]["subtitle"].str;
- }
- }
- }
- if ((!(_struct_composite.meta.title_subtitle.empty))
- && (_struct_composite.meta.title_sub.empty)) {
- _struct_composite.meta.title_sub = _struct_composite.meta.title_subtitle;
- }
- _struct_composite.meta.title_full = (_struct_composite.meta.title_sub.empty)
- ? _struct_composite.meta.title_main
- : format(
- "%s - %s",
- _struct_composite.meta.title_main,
- _struct_composite.meta.title_sub,
- );
- }
return _struct_composite;
}
}
diff --git a/src/doc_reform/meta/conf_make_meta_structs.d b/src/doc_reform/meta/conf_make_meta_structs.d
index 874e509..ff1ec76 100644
--- a/src/doc_reform/meta/conf_make_meta_structs.d
+++ b/src/doc_reform/meta/conf_make_meta_structs.d
@@ -181,7 +181,9 @@ struct MetaComposite {
string classify_loc;
string classify_subject;
string classify_topic_register;
+ string[] classify_topic_register_arr;
string creator_author;
+ string creator_author_surname_fn;
string creator_author_email;
string creator_illustrator;
string creator_translator;
@@ -223,6 +225,7 @@ struct MetaComposite {
string title_short;
string title_sub;
string title_subtitle;
+ string author_date_title;
}
struct ConfComposite {
MetaComposite meta;
diff --git a/src/doc_reform/meta/metadoc.d b/src/doc_reform/meta/metadoc.d
index d8cc19f..a4b920b 100644
--- a/src/doc_reform/meta/metadoc.d
+++ b/src/doc_reform/meta/metadoc.d
@@ -9,6 +9,7 @@ template DocReformAbstraction() {
import
doc_reform.meta,
doc_reform.meta.metadoc_summary,
+ doc_reform.meta.metadoc_harvest,
doc_reform.meta.metadoc_from_src,
doc_reform.meta.conf_make_meta_structs,
doc_reform.meta.conf_make_meta_toml,
diff --git a/src/doc_reform/meta/metadoc_harvest.d b/src/doc_reform/meta/metadoc_harvest.d
new file mode 100644
index 0000000..c3534f9
--- /dev/null
+++ b/src/doc_reform/meta/metadoc_harvest.d
@@ -0,0 +1,30 @@
+module doc_reform.meta.metadoc_harvest;
+template DocReformMetaDocHarvest() {
+ auto DocReformMetaDocHarvest(T,H)(
+ T doc_matters,
+ H harvest,
+ ) {
+ import
+ doc_reform.meta.defaults,
+ doc_reform.meta.rgx;
+ import
+ std.array,
+ std.exception,
+ std.regex,
+ std.stdio,
+ std.string,
+ std.traits,
+ std.typecons,
+ std.uni,
+ std.utf,
+ std.conv : to;
+ mixin InternalMarkup;
+ auto markup = InlineMarkup();
+ harvest.title = doc_matters.conf_make_meta.meta.title_full;
+ harvest.author = doc_matters.conf_make_meta.meta.creator_author;
+ harvest.author_date_title = doc_matters.conf_make_meta.meta.author_date_title;
+ harvest.date_published = doc_matters.conf_make_meta.meta.date_published;
+ harvest.topic_register_arr = doc_matters.conf_make_meta.meta.classify_topic_register_arr;
+ return harvest;
+ }
+}
diff --git a/src/doc_reform/meta/metadoc_summary.d b/src/doc_reform/meta/metadoc_summary.d
index 768cebd..4beada8 100644
--- a/src/doc_reform/meta/metadoc_summary.d
+++ b/src/doc_reform/meta/metadoc_summary.d
@@ -1,6 +1,6 @@
module doc_reform.meta.metadoc_summary;
-template DocReformAbstractionSummary() {
- void DocReformAbstractionSummary(S,T)(
+template DocReformMetaDocSummary() {
+ void DocReformMetaDocSummary(S,T)(
const S doc_abstraction,
T doc_matters,
) {
diff --git a/src/doc_reform/meta/metadochead.d b/src/doc_reform/meta/metadochead.d
deleted file mode 100644
index 05be0a8..0000000
--- a/src/doc_reform/meta/metadochead.d
+++ /dev/null
@@ -1,84 +0,0 @@
-module doc_reform.meta.metadochead;
-template DocReformHarvestGetFromHead() { // TODO
- import
- std.datetime,
- std.getopt,
- std.file,
- std.path,
- std.process;
- import
- doc_reform.meta,
- doc_reform.meta.metadoc_summary,
- doc_reform.meta.metadoc_from_src,
- doc_reform.meta.conf_make_meta_structs,
- doc_reform.meta.conf_make_meta_toml,
- doc_reform.meta.conf_make_meta_json,
- doc_reform.meta.defaults,
- doc_reform.meta.doc_debugs,
- doc_reform.meta.rgx,
- doc_reform.source.paths_source,
- doc_reform.source.read_config_files,
- doc_reform.source.read_source_files,
- doc_reform.output.hub;
- mixin DocReformRgxInit;
- mixin contentJSONtoDocReformStruct;
- mixin DocReformBiblio;
- mixin DocReformRgxInitFlags;
- mixin outputHub;
- enum headBody { header, body_content, insert_file_list, image_list }
- enum makeMeta { make, meta }
- static auto rgx = Rgx();
- auto DocReformHarvestGetFromHead(E,O,M)( // TODO
- E _env,
- O _opt_action,
- M _manifest
- ){
- auto _config_document_struct = readConfigDoc!()(_manifest, _env); // document config file
- auto _config_local_site_struct = readConfigSite!()(_manifest, _env); // local site config
- ConfCompositePlus _make_and_meta_struct;
- _make_and_meta_struct = configParseTOMLreturnDocReformStruct!()(_make_and_meta_struct, _config_document_struct);
- _make_and_meta_struct = configParseTOMLreturnDocReformStruct!()(_make_and_meta_struct, _config_local_site_struct);
- /+ ↓ read file (filename with path) +/
- /+ ↓ file tuple of header and content +/
- if ((_opt_action.debug_do)
- || (_opt_action.very_verbose)
- ) {
- writeln("step1 commence → (get document header & body & insert file list & if needed image list)"
- );
- }
- auto _header_body_insertfilelist_imagelist
- = DocReformRawMarkupContent!()(_opt_action, _manifest.src.path_and_fn);
- static assert(!isTypeTuple!(_header_body_insertfilelist_imagelist));
- static assert(_header_body_insertfilelist_imagelist.length==4);
- if ((_opt_action.debug_do)
- || (_opt_action.very_verbose)
- ) {
- writeln("- step1 complete");
- }
- debug(header_and_body) {
- writeln(header);
- writeln(_header_body_insertfilelist_imagelist.length);
- writeln(_header_body_insertfilelist_imagelist.length[headBody.body_content][0]);
- }
- /+ ↓ split header into make and meta +/
- if ((_opt_action.debug_do)
- || (_opt_action.very_verbose)
- ) {
- writeln("step2 commence → (read document header - toml, return struct)");
- }
- _make_and_meta_struct =
- docHeaderMakeAndMetaTupTomlExtractAndConvertToStruct!()(
- _make_and_meta_struct,
- _header_body_insertfilelist_imagelist[headBody.header]
- );
- if ((_opt_action.debug_do)
- || (_opt_action.very_verbose)
- ) {
- writeln("- step2 complete");
- }
-
- auto t = tuple(doc_matters_shared, doc_matters_abridged_collected);
- static assert(t.length==2);
- return t;
- }
-}
diff --git a/src/doc_reform/meta/rgx.d b/src/doc_reform/meta/rgx.d
index 373400f..544b432 100644
--- a/src/doc_reform/meta/rgx.d
+++ b/src/doc_reform/meta/rgx.d
@@ -7,6 +7,7 @@ static template DocReformRgxInit() {
static struct Rgx {
/+ misc +/
static true_dollar = ctRegex!(`\$`, "gm");
+ static sep = ctRegex!(`␣`, "gm");
static flag_action = ctRegex!(`^(--[a-z][a-z0-9-]+)$`);
static flag_action_str = ctRegex!(` (--[a-z][a-z0-9-]+)`);
static within_quotes = ctRegex!(`"(.+?)"`, "m");
@@ -43,7 +44,7 @@ static template DocReformRgxInit() {
/+ header +/
static variable_doc_title = ctRegex!(`@title`);
static variable_doc_author = ctRegex!(`@author|@creator`);
- static raw_author_munge = ctRegex!(`(\S.+?),\s+(.+)`,"i");
+ static raw_author_munge = ctRegex!(`(?P<last>\S.+?),\s+(?P<first>.+)`,"i");
static toml_header_meta_title = ctRegex!(`^\s*(title\s*=\s*"|\[title\])`, "m");
/+ heading & paragraph operators +/
static heading_a = ctRegex!(`^:?[A][~] `, "m");
@@ -191,6 +192,10 @@ static template DocReformRgxInit() {
static bi_main_term_plus_rest_split = ctRegex!(`\s*:\s*`);
static bi_sub_terms_plus_object_number_offset_split = ctRegex!(`\s*\|\s*`);
static bi_term_and_object_numbers_match = ctRegex!(`^(.+?)\+(\d+)`);
+ static topic_register_main_terms_split = ctRegex!(`\s*;\s*`);
+ static topic_register_main_term_plus_rest_split = ctRegex!(`\s*:\s*`);
+ static topic_register_sub_terms_split = ctRegex!(`\s*\|\s*`);
+ static topic_register_multiple_sub_terms_split = ctRegex!(`␣([^|␣]+(?:\|[^|␣]+)+)`);
/+ language codes +/
auto language_codes =
ctRegex!("(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)");