From e973365c4b74be2b2cff9be970ccba5928dbe368 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 22 May 2019 10:50:33 -0400 Subject: 0.7.3 start to look at document harvest (initial stub) --- src/doc_reform/meta/conf_make_meta_json.d | 215 ++++++++++++++++++------------ 1 file changed, 128 insertions(+), 87 deletions(-) (limited to 'src/doc_reform/meta/conf_make_meta_json.d') diff --git a/src/doc_reform/meta/conf_make_meta_json.d b/src/doc_reform/meta/conf_make_meta_json.d index fcd52c1..5fd4499 100644 --- a/src/doc_reform/meta/conf_make_meta_json.d +++ b/src/doc_reform/meta/conf_make_meta_json.d @@ -5,6 +5,8 @@ module doc_reform.meta.conf_make_meta_json; static template contentJSONtoDocReformStruct() { import + std.algorithm, + std.array, std.exception, std.regex, std.stdio, @@ -16,6 +18,7 @@ static template contentJSONtoDocReformStruct() { import doc_reform.meta.conf_make_meta_structs, doc_reform.meta.conf_make_meta_json, + doc_reform.meta.defaults, doc_reform.meta.rgx; ConfCompositePlus _struct_composite; auto contentJSONtoDocReformStruct(C,J)(C _struct_composite, J _json, string _identifier) { @@ -361,6 +364,108 @@ static template contentJSONtoDocReformStruct() { } } /+ meta ------------------------------------------------------------------- +/ + if (_struct_composite.meta.creator_author.empty) { + if ("creator" in _json.object) { + if ("author" in _json.object["creator"] + && (_json.object["creator"]["author"].type().to!string == "string") + ) { + _struct_composite.meta.creator_author = _json.object["creator"]["author"].str; + } + if ("email" in _json.object["creator"] + && (_json.object["creator"]["email"].type().to!string == "string") + ) { + _struct_composite.meta.creator_author_email = _json.object["creator"]["email"].str; + } + if ("illustrator" in _json.object["creator"] + && (_json.object["creator"]["illustrator"].type().to!string == "string") + ) { + _struct_composite.meta.creator_illustrator = _json.object["creator"]["illustrator"].str; + } + if ("translator" in _json.object["creator"] + && (_json.object["creator"]["translator"].type().to!string == "string") + ) { + _struct_composite.meta.creator_translator = _json.object["creator"]["translator"].str; + } + } + string[] authors_arr; + string[][string] authors_hash_arr = [ "first" : [], "last" : [], "full" : [], "last_first" : [], "as_input" : [] ]; + string[] authors_raw_arr + = _struct_composite.meta.creator_author.split(rgx.arr_delimiter); + auto _lastname = appender!(char[])(); + foreach (author_raw; authors_raw_arr) { + authors_arr ~= author_raw.replace(rgx.raw_author_munge, "$2 $1"); + authors_hash_arr["first"] ~= author_raw.replace(rgx.raw_author_munge, "$2"); + authors_hash_arr["last"] ~= author_raw.replace(rgx.raw_author_munge, "$1"); + authors_hash_arr["full"] ~= author_raw.replace(rgx.raw_author_munge, "$2 $1"); + authors_hash_arr["as_input"] ~= author_raw; + if (auto m = author_raw.match(rgx.raw_author_munge)) { + (m.captures[1]).map!toUpper.copy(_lastname); + authors_hash_arr["last_first"] ~= _lastname.data.to!string ~ ", " ~ m.captures[2]; + _lastname = appender!(char[])(); + } + } + _struct_composite.meta.creator_author = authors_arr.join(", ").chomp.chomp; + string _author_name_last_first = authors_hash_arr["last_first"].join("; ").chomp.chomp; + _struct_composite.meta.creator_author_surname_fn = (_author_name_last_first.length > 0) + ? _author_name_last_first + : authors_hash_arr["as_input"].join("; ").chomp.chomp; + } + if (_struct_composite.meta.title_main.empty) { + if ("title" in _json.object) { + if ((_json.object["title"].type().to!string) == "string") { + _struct_composite.meta.title_main = _json.object["title"].str; + } else { + if ("edition" in _json.object["title"] + && (_json.object["title"]["edition"].type().to!string == "string") + ) { + _struct_composite.meta.title_edition = _json.object["title"]["edition"].str; + } + if ("full" in _json.object["title"] + && (_json.object["title"]["full"].type().to!string == "string") + ) {} + if ("language" in _json.object["title"] + && (_json.object["title"]["language"].type().to!string == "string") + ) { + _struct_composite.meta.title_language = _json.object["title"]["language"].str; + } + if ("main" in _json.object["title"] + && (_json.object["title"]["main"].type().to!string == "string") + ) { + _struct_composite.meta.title_main = _json.object["title"]["main"].str; + } else if ("title" in _json.object["title"] + && (_json.object["title"]["title"].type().to!string == "string") + ) { + _struct_composite.meta.title_main = _json.object["title"]["title"].str; + } + if ("note" in _json.object["title"] + && (_json.object["title"]["note"].type().to!string == "string") + ) { + _struct_composite.meta.title_note = _json.object["title"]["note"].str; + } + if ("sub" in _json.object["title"] + && (_json.object["title"]["sub"].type().to!string == "string") + ) { + _struct_composite.meta.title_sub = _json.object["title"]["sub"].str; + } + if ("subtitle" in _json.object["title"] + && (_json.object["title"]["subtitle"].type().to!string == "string") + ) { + _struct_composite.meta.title_subtitle = _json.object["title"]["subtitle"].str; + } + } + } + if ((!(_struct_composite.meta.title_subtitle.empty)) + && (_struct_composite.meta.title_sub.empty)) { + _struct_composite.meta.title_sub = _struct_composite.meta.title_subtitle; + } + _struct_composite.meta.title_full = (_struct_composite.meta.title_sub.empty) + ? _struct_composite.meta.title_main + : format( + "%s - %s", + _struct_composite.meta.title_main, + _struct_composite.meta.title_sub, + ); + } if ("classify" in _json.object) { if ("dewey" in _json.object["classify"] && (_json.object["classify"]["dewey"].type().to!string == "string") @@ -386,6 +491,22 @@ static template contentJSONtoDocReformStruct() { && (_json.object["classify"]["topic_register"].type().to!string == "string") ) { _struct_composite.meta.classify_topic_register = _json.object["classify"]["topic_register"].str; + string[] main_topics_ = _struct_composite.meta.classify_topic_register.split(rgx.topic_register_main_terms_split); + string[] topics; + string topics_tmp; + string[] multiple_sub_terms; + foreach (mt; main_topics_) { + topics_tmp = mt.replaceAll(rgx.topic_register_main_term_plus_rest_split, mkup.sep); + if (auto m = topics_tmp.match(rgx.topic_register_multiple_sub_terms_split)) { + multiple_sub_terms = m.captures[1].split(rgx.topic_register_sub_terms_split); + foreach (subterm; multiple_sub_terms) { + topics ~= m.captures.pre ~ mkup.sep ~ subterm; + } + } else { + topics ~= topics_tmp; + } + } + _struct_composite.meta.classify_topic_register_arr = topics; } } if ("date" in _json.object) { @@ -424,6 +545,13 @@ static template contentJSONtoDocReformStruct() { ) { _struct_composite.meta.date_valid = _json.object["date"]["valid"].str; } + _struct_composite.meta.author_date_title = format( + "%s %s \"%s\"", + _struct_composite.meta.creator_author_surname_fn, + (_struct_composite.meta.date_published.length > 0) + ? "(" ~ _struct_composite.meta.date_published ~ ")" : "", + _struct_composite.meta.title_full, + ); } if ("links" in _json.object) {} if ("notes" in _json.object) { @@ -508,93 +636,6 @@ static template contentJSONtoDocReformStruct() { _struct_composite.meta.rights_license = _json.object["rights"]["license"].str; } } - if (_struct_composite.meta.creator_author.empty) { - if ("creator" in _json.object) { - if ("author" in _json.object["creator"] - && (_json.object["creator"]["author"].type().to!string == "string") - ) { - _struct_composite.meta.creator_author = _json.object["creator"]["author"].str; - } - if ("email" in _json.object["creator"] - && (_json.object["creator"]["email"].type().to!string == "string") - ) { - _struct_composite.meta.creator_author_email = _json.object["creator"]["email"].str; - } - if ("illustrator" in _json.object["creator"] - && (_json.object["creator"]["illustrator"].type().to!string == "string") - ) { - _struct_composite.meta.creator_illustrator = _json.object["creator"]["illustrator"].str; - } - if ("translator" in _json.object["creator"] - && (_json.object["creator"]["translator"].type().to!string == "string") - ) { - _struct_composite.meta.creator_translator = _json.object["creator"]["translator"].str; - } - } - string[] authors_arr; - string[] authors_raw_arr - = _struct_composite.meta.creator_author.split(rgx.arr_delimiter); - foreach (author_raw; authors_raw_arr) { - authors_arr ~= author_raw.replace(rgx.raw_author_munge, "$2 $1"); - } - _struct_composite.meta.creator_author = join(authors_arr, ", ").chomp.chomp; - } - if (_struct_composite.meta.title_main.empty) { - if ("title" in _json.object) { - if ((_json.object["title"].type().to!string) == "string") { - _struct_composite.meta.title_main = _json.object["title"].str; - } else { - if ("edition" in _json.object["title"] - && (_json.object["title"]["edition"].type().to!string == "string") - ) { - _struct_composite.meta.title_edition = _json.object["title"]["edition"].str; - } - if ("full" in _json.object["title"] - && (_json.object["title"]["full"].type().to!string == "string") - ) {} - if ("language" in _json.object["title"] - && (_json.object["title"]["language"].type().to!string == "string") - ) { - _struct_composite.meta.title_language = _json.object["title"]["language"].str; - } - if ("main" in _json.object["title"] - && (_json.object["title"]["main"].type().to!string == "string") - ) { - _struct_composite.meta.title_main = _json.object["title"]["main"].str; - } else if ("title" in _json.object["title"] - && (_json.object["title"]["title"].type().to!string == "string") - ) { - _struct_composite.meta.title_main = _json.object["title"]["title"].str; - } - if ("note" in _json.object["title"] - && (_json.object["title"]["note"].type().to!string == "string") - ) { - _struct_composite.meta.title_note = _json.object["title"]["note"].str; - } - if ("sub" in _json.object["title"] - && (_json.object["title"]["sub"].type().to!string == "string") - ) { - _struct_composite.meta.title_sub = _json.object["title"]["sub"].str; - } - if ("subtitle" in _json.object["title"] - && (_json.object["title"]["subtitle"].type().to!string == "string") - ) { - _struct_composite.meta.title_subtitle = _json.object["title"]["subtitle"].str; - } - } - } - if ((!(_struct_composite.meta.title_subtitle.empty)) - && (_struct_composite.meta.title_sub.empty)) { - _struct_composite.meta.title_sub = _struct_composite.meta.title_subtitle; - } - _struct_composite.meta.title_full = (_struct_composite.meta.title_sub.empty) - ? _struct_composite.meta.title_main - : format( - "%s - %s", - _struct_composite.meta.title_main, - _struct_composite.meta.title_sub, - ); - } return _struct_composite; } } -- cgit v1.2.3