From b7347229818ecc8727242ae7275acbf326805cc1 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Thu, 20 Jun 2019 13:40:57 -0400 Subject: harvest html output --- src/doc_reform/doc_reform.d | 598 ++++++++++++++++++++++++--- src/doc_reform/meta/conf_make_meta_json.d | 12 +- src/doc_reform/meta/conf_make_meta_structs.d | 2 + src/doc_reform/meta/metadoc_harvest.d | 6 + 4 files changed, 551 insertions(+), 67 deletions(-) (limited to 'src/doc_reform') diff --git a/src/doc_reform/doc_reform.d b/src/doc_reform/doc_reform.d index dcceef4..550ee28 100755 --- a/src/doc_reform/doc_reform.d +++ b/src/doc_reform/doc_reform.d @@ -93,16 +93,21 @@ void main(string[] args) { mixin outputHub; struct Harvest { string title = ""; + string[] author_arr = []; string author = ""; + string author_surname = ""; string author_surname_fn = ""; string language = ""; string language_original = ""; string uid = ""; string date_published = ""; - string[] topic_register_arr = [""]; - string html_seg_toc = ""; - string html_scroll = ""; - string epub = ""; + string[] topic_register_arr = []; + string path_html_seg = ""; + string path_html_scroll = ""; + string path_epub = ""; + string url_html_seg = ""; + string url_html_scroll = ""; + string url_epub = ""; } Harvest harvested; Harvest[] harvests; @@ -829,7 +834,9 @@ void main(string[] args) { string[] _document_topic_register; string[] _topic_register; string[] _sub_topic_register; - string[][string][string][string][string] subject_trees; + Harvest[][string][string][string][string] subject_trees; + string[] topics = []; + string _auth = ""; foreach(k, doc_harvest; harvests) { _topic_register = []; foreach(topic; doc_harvest.topic_register_arr.sort) { @@ -838,38 +845,16 @@ void main(string[] args) { string[] subject_tree = topic.split(mkup.sep); switch (subject_tree.length) { case 1: - if (subject_tree[0] in subject_trees) { - subject_trees[subject_tree[0]]["_a"]["_a"]["_a"] ~= doc_harvest.uid; - } else { - subject_trees[subject_tree[0]]["_a"]["_a"]["_a"] = [doc_harvest.uid]; - } + subject_trees[subject_tree[0]]["_a"]["_a"]["_a"] ~= doc_harvest; break; case 2: - if (subject_tree[0] in subject_trees - && subject_tree[1] in subject_trees[subject_tree[0]]) { - subject_trees[subject_tree[0]][subject_tree[1]]["_a"]["_a"] ~= doc_harvest.uid; - } else { - subject_trees[subject_tree[0]][subject_tree[1]]["_a"]["_a"] = [doc_harvest.uid]; - } + subject_trees[subject_tree[0]][subject_tree[1]]["_a"]["_a"] ~= doc_harvest; break; case 3: - if (subject_tree[0] in subject_trees - && subject_tree[1] in subject_trees[subject_tree[0]] - && subject_tree[2] in subject_trees[subject_tree[0]][subject_tree[1]]) { - subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]]["_a"] ~= doc_harvest.uid; - } else { - subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]]["_a"] = [doc_harvest.uid]; - } + subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]]["_a"] ~= doc_harvest; break; case 4: - if (subject_tree[0] in subject_trees - && subject_tree[1] in subject_trees[subject_tree[0]] - && subject_tree[2] in subject_trees[subject_tree[0]][subject_tree[1]] - && subject_tree[3] in subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]]) { - subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]][subject_tree[3]] ~= doc_harvest.uid; - } else { - subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]][subject_tree[3]] = [doc_harvest.uid]; - } + subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]][subject_tree[3]] ~= doc_harvest; break; default: break; @@ -889,71 +874,560 @@ void main(string[] args) { _topic_register.sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable).release.join("\n"), ); } + topics ~= format(q"┃ + + + +Metadata Harvest - Topics + + + + + + + + + + + + + +

Metadata Harvest - Topics (output organised by language & filetype)

+

[ HOME ] also see Metadata Harvest - Authors

+

ABCDEFGHIJKLMNOPQRSTUVWXYZ,  +

+
+

English   

+
+┃") ~ "\n"; + char _prev_k = "_".to!char; + int _kn; foreach(k0; - (subject_trees.keys) + subject_trees.keys .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) ) { + if (k0.toUpper.to!(char[])[0] != _prev_k) { + topics ~= format(q"┃

%s

┃", + k0.toUpper.to!(char[])[0], + k0.toUpper.to!(char[])[0], + ); + _prev_k = k0.toUpper.to!(char[])[0]; + } if (k0 != "_a") { + topics ~= format(q"┃

%s

┃", + k0, k0,) ~ "\n"; writeln("", k0); if ("_a" in subject_trees[k0]) { - foreach (uid; subject_trees[k0]["_a"]["_a"]["_a"].sort) { - writeln("- ", uid); - } - } - } - foreach(k1; - (subject_trees[k0].keys) - .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) - ) { - if (k1 != "_a") { - writeln(" ", k1); - if ("_a" in subject_trees[k0][k1]) { - foreach (uid; subject_trees[k0][k1]["_a"]["_a"].sort) { - writeln(" - ", uid); + foreach (t_a_; + subject_trees[k0]["_a"]["_a"]["_a"] + .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) + ) { + _auth = []; + if (t_a_.author_arr.length < 2) { + _auth = format(q"┃ %s┃", + t_a_.author_surname, + t_a_.author, + ); + } else { + foreach (a; t_a_.author_arr) { + _auth ~= format(q"┃ %s,┃", + t_a_.author_surname, + a, + ); + } } + topics ~= format(q"┃

"%s" -%s┃", + "url", + t_a_.title, + _auth, + ) ~ "\n"; + writeln("- ", t_a_.title, " - ", t_a_.author); } } - foreach(k2; - (subject_trees[k0][k1].keys) + foreach(k1; + subject_trees[k0].keys .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) ) { - if (k2 != "_a") { - writeln(" ", k2); - if ("_a" in subject_trees[k0][k1][k2]) { - foreach (uid; subject_trees[k0][k1][k2]["_a"].sort) { - writeln(" - ", uid); + if (k1 != "_a") { + topics ~= format(q"┃

%s

┃", + k1, k1,) ~ "\n"; + writeln(" ", k1); + if ("_a" in subject_trees[k0][k1]) { + foreach (t_a_; + subject_trees[k0][k1]["_a"]["_a"] + .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) + ) { + _auth = []; + if (t_a_.author_arr.length < 2) { + _auth = format(q"┃ %s┃", + t_a_.author_surname, + t_a_.author, + ); + } else { + foreach (a; t_a_.author_arr) { + _auth ~= format(q"┃ %s,┃", + t_a_.author_surname, + a, + ); + } + } + topics ~= format(q"┃

%s -%s┃", + "url", + t_a_.title, + _auth, + ) ~ "\n"; + writeln(" - ", t_a_.title, " - ", t_a_.author); } } } - foreach(k3; - (subject_trees[k0][k1][k2].keys) + foreach(k2; + subject_trees[k0][k1].keys .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) ) { - if (k3 != "_a") { - writeln(" ", k3); - foreach (uid; subject_trees[k0][k1][k2][k3]) { - writeln(" - ", uid); + if (k2 != "_a") { + topics ~= format(q"┃

%s

┃", + k2, k2,) ~ "\n"; + writeln(" ", k2); + if ("_a" in subject_trees[k0][k1][k2]) { + foreach (t_a_; + subject_trees[k0][k1][k2]["_a"] + .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) + ) { + _auth = []; + if (t_a_.author_arr.length < 2) { + _auth = format(q"┃ %s┃", + t_a_.author_surname, + t_a_.author, + ); + } else { + foreach (a; t_a_.author_arr) { + _auth ~= format(q"┃ %s,┃", + t_a_.author_surname, + a, + ); + } + } + topics ~= format(q"┃

%s -%s┃", + "url", + t_a_.title, + _auth, + ) ~ "\n"; + writeln(" - ", t_a_.title, " - ", t_a_.author); + } + } + } + foreach(k3; + subject_trees[k0][k1][k2].keys + .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) + ) { + if (k3 != "_a") { + topics ~= format(q"┃

%s

┃", + k3, k3,) ~ "\n"; + writeln(" ", k3); + { + foreach (t_a_; + subject_trees[k0][k1][k2][k3] + .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) + ) { + _auth = []; + if (t_a_.author_arr.length < 2) { + _auth = format(q"┃%s┃", + t_a_.author_surname, + t_a_.author, + ); + } else { + foreach (a; t_a_.author_arr) { + _auth ~= format(q"┃ %s,┃", + t_a_.author_surname, + a, + ); + } + } + topics ~= format(q"┃

%s -%s┃", + "url", + t_a_.title, + _auth, + ) ~ "\n"; + writeln(" - ", t_a_.title, " - ", t_a_.author); + } + } } } } } } } + topics ~= format(q"┃ +


+ + + + + + + + +┃") ~ "\n"; + try { + auto f = File("topics.html", "w"); + foreach (o; topics) { + f.writeln(o); + } + } catch (ErrnoException ex) { + // Handle error + } } if ((_opt_action.verbose || _opt_action.very_verbose) && harvests.length > 0 ) { + string[] authors = []; + authors ~= format(q"┃ + + + + +Metadata Harvest - Authors + + + + + + + + + + + + + +

Metadata Harvest - Authors (output organised by language & filetype)

+

[ HOME ] also see Metadata Harvest - Topics

+

+
+

English   

+
+

ABCDEFGHIJKLMNOPQRSTUVWXYZ,  +┃") ~ "\n"; + string[string] _au; + string[] _auth_date_title; string[] _author_date_title; - foreach(doc_harvest; harvests) { - _author_date_title ~= format( - "%s %s \"%s\" [%s]", + string _prev_auth = ""; + char _prev_k = "_".to!char; + foreach(doc_harvest; + harvests + .multiSort!( + "toUpper(a.author_surname_fn) < toUpper(b.author_surname_fn)", + "a.date_published < b.date_published", + "a.title < b.title", + SwapStrategy.unstable + ) + ) { + if (doc_harvest.author_surname_fn != _prev_auth) { + _au[doc_harvest.author_surname_fn] + = format(q"┃

%s

%s "%s" [%s]

┃", + doc_harvest.author_surname, + doc_harvest.author_surname_fn, + (doc_harvest.date_published.length > 0) + ? doc_harvest.date_published : "", + "url", + doc_harvest.title, + doc_harvest.language, + ); + _prev_auth = doc_harvest.author_surname_fn; + } else { + _au[doc_harvest.author_surname_fn] + ~= format(q"┃

%s "%s" [%s]

┃", + (doc_harvest.date_published.length > 0) + ? doc_harvest.date_published : "", + "url", + doc_harvest.title, + doc_harvest.language, + ); + } + _author_date_title ~= format(q"┃%s %s "%s" [%s]┃", doc_harvest.author_surname_fn, (doc_harvest.date_published.length > 0) ? "(" ~ doc_harvest.date_published ~ ")" : "", doc_harvest.title, doc_harvest.language, ); + // writeln(doc_harvest.author_date_title); + } + foreach (k; _au.keys.sort) { + if (k.toUpper.to!(char[])[0] != _prev_k) { + authors ~= format(q"┃

%s

┃", + k.toUpper.to!(char[])[0], + k.toUpper.to!(char[])[0], + ); + _prev_k = k.toUpper.to!(char[])[0]; + } + authors ~= _au[k]; + } + authors ~= format(q"┃ +
+ + + + + + + + +┃") ~ "\n"; + try { + auto f = File("authors.html", "w"); + foreach (o; authors) { + f.writeln(o); + } + } catch (ErrnoException ex) { + // Handle error } foreach(_adt; _author_date_title.sort) { writeln(_adt); diff --git a/src/doc_reform/meta/conf_make_meta_json.d b/src/doc_reform/meta/conf_make_meta_json.d index 45d3a5a..7ad4744 100644 --- a/src/doc_reform/meta/conf_make_meta_json.d +++ b/src/doc_reform/meta/conf_make_meta_json.d @@ -387,13 +387,13 @@ static template contentJSONtoDocReformStruct() { _struct_composite.meta.creator_translator = _json.object["creator"]["translator"].str; } } - string[] authors_arr; + string[] author_arr; string[][string] authors_hash_arr = [ "first" : [], "last" : [], "full" : [], "last_first" : [], "as_input" : [] ]; string[] authors_raw_arr = _struct_composite.meta.creator_author.split(rgx.arr_delimiter); auto _lastname = appender!(char[])(); foreach (author_raw; authors_raw_arr) { - authors_arr ~= author_raw.replace(rgx.raw_author_munge, "$2 $1"); + author_arr ~= author_raw.replace(rgx.raw_author_munge, "$2 $1"); authors_hash_arr["first"] ~= author_raw.replace(rgx.raw_author_munge, "$2"); authors_hash_arr["last"] ~= author_raw.replace(rgx.raw_author_munge, "$1"); authors_hash_arr["full"] ~= author_raw.replace(rgx.raw_author_munge, "$2 $1"); @@ -404,7 +404,9 @@ static template contentJSONtoDocReformStruct() { _lastname = appender!(char[])(); } } - _struct_composite.meta.creator_author = authors_arr.join(", ").chomp.chomp; + _struct_composite.meta.creator_author_arr = author_arr; + _struct_composite.meta.creator_author = author_arr.join(", ").chomp.chomp; + _struct_composite.meta.creator_author_surname = authors_hash_arr["last"][0]; string _author_name_last_first = authors_hash_arr["last_first"].join("; ").chomp.chomp; _struct_composite.meta.creator_author_surname_fn = (_author_name_last_first.length > 0) ? _author_name_last_first @@ -490,8 +492,8 @@ static template contentJSONtoDocReformStruct() { if ("topic_register" in _json.object["classify"] && (_json.object["classify"]["topic_register"].type().to!string == "string") ) { - _struct_composite.meta.classify_topic_register = _json.object["classify"]["topic_register"].str; - string[] main_topics_ = _struct_composite.meta.classify_topic_register.split(rgx.topic_register_main_terms_split); + _struct_composite.meta.classify_topic_register = _json.object["classify"]["topic_register"].str.strip; + string[] main_topics_ = _struct_composite.meta.classify_topic_register.strip.split(rgx.topic_register_main_terms_split); string[] topics; string topics_tmp; string[] multiple_sub_terms; diff --git a/src/doc_reform/meta/conf_make_meta_structs.d b/src/doc_reform/meta/conf_make_meta_structs.d index 3bd29e2..6c230df 100644 --- a/src/doc_reform/meta/conf_make_meta_structs.d +++ b/src/doc_reform/meta/conf_make_meta_structs.d @@ -182,8 +182,10 @@ struct MetaComposite { string classify_subject; string classify_topic_register; string[] classify_topic_register_arr; + string[] creator_author_arr; string creator_author; string creator_author_surname_fn; + string creator_author_surname; string creator_author_email; string creator_illustrator; string creator_translator; diff --git a/src/doc_reform/meta/metadoc_harvest.d b/src/doc_reform/meta/metadoc_harvest.d index da1cae8..26f2b38 100644 --- a/src/doc_reform/meta/metadoc_harvest.d +++ b/src/doc_reform/meta/metadoc_harvest.d @@ -20,14 +20,20 @@ template DocReformMetaDocHarvest() { std.conv : to; mixin InternalMarkup; auto markup = InlineMarkup(); + import doc_reform.output.paths_output; + auto pth_html = DocReformPathsHTML!()(doc_matters.output_path, doc_matters.src.language); harvest.title = doc_matters.conf_make_meta.meta.title_full; harvest.author = doc_matters.conf_make_meta.meta.creator_author; + harvest.author_surname = doc_matters.conf_make_meta.meta.creator_author_surname; harvest.author_surname_fn = doc_matters.conf_make_meta.meta.creator_author_surname_fn; + harvest.author_arr = doc_matters.conf_make_meta.meta.creator_author_arr; harvest.language_original = doc_matters.conf_make_meta.meta.original_language; harvest.language = doc_matters.src.language; harvest.uid = doc_matters.src.doc_uid; harvest.date_published = doc_matters.conf_make_meta.meta.date_published; harvest.topic_register_arr = doc_matters.conf_make_meta.meta.classify_topic_register_arr; + harvest.path_html_scroll = pth_html.fn_scroll(doc_matters.src.filename); + harvest.path_html_seg = pth_html.fn_seg(doc_matters.src.filename, "toc"); return harvest; } } -- cgit v1.2.3