#+TITLE: doc_reform hub #+DESCRIPTION: documents - structuring, various output representations & search #+FILETAGS: :doc_reform:hub: #+AUTHOR: Ralph Amissah #+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] #+COPYRIGHT: Copyright (C) 2015 - 2019 Ralph Amissah #+LANGUAGE: en #+STARTUP: indent content hideblocks hidestars #+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t #+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc #+OPTIONS: author:nil email:nil creator:nil timestamp:nil #+PROPERTY: header-args :results silent :padline no :exports code :cache no :noweb yes #+EXPORT_SELECT_TAGS: export #+EXPORT_EXCLUDE_TAGS: noexport #+TAGS: assert(a) class(c) debug(d) mixin(m) doc_reform(s) tangle(T) template(t) WEB(W) noexport(n) [[../maker.org][maker.org makefile]] [[./][org/]] * 0. version.txt (set version) :version: ** set program version - set program version #+BEGIN_SRC d :tangle "../views/version.txt" /+ obt - org generated file +/ struct Version { int major; int minor; int patch; } enum _ver = Version(0, 7, 3); #+END_SRC ** compilation restrictions (supported compilers) - set compilation restrictions http://dlang.org/spec/version.html#predefined-versions #+BEGIN_SRC d :tangle "../views/version.txt" version (Posix) { version (DigitalMars) { } else version (LDC) { } else version (GNU) { } else { static assert (0, "Unsupported D compiler"); } } else { static assert (0, "Unsupported D compiler"); } #+END_SRC * 1. doc_reform (sisu document parser) :doc_reform: ** notes - deal with imports - get options - get command line instructions - read config instructions - process files as instructed by options - read in file - process file - output ** 0. doc_reform src/doc_reform :template: - process files (act according to requirements of each type) - by sourcefilename - by sourcefiles contents identifier - by zip filename #+BEGIN_SRC d :tangle "../src/doc_reform/doc_reform.d" :shebang #!/usr/bin/env rdmd <> module doc_reform.sisu_document_parser; import doc_reform.conf.compile_time_info, doc_reform.meta.metadoc; <> import std.algorithm; import std.parallelism; <> <> string program_name = "doc-reform"; /++ name "doc_reform" description "A SiSU inspired document parser writen in D." homepage "http://sisudoc.org" +/ void main(string[] args) { <> <> <> <> if (_manifests.length > 1 // _manifests[0] initialized dummy element && _opt_action.abstraction) { if (_opt_action.parallelise) { // note cannot parallelise sqlite shared db foreach(manifest; parallel(_manifests[1..$])) { if (!empty(manifest.src.filename)) { <> <> <> <> <> } else { <> } } } else { foreach(manifest; _manifests[1..$]) { writeln("parallelisation off: actions include sqlite shared db"); if (!empty(manifest.src.filename)) { <> <> <> <> <> } else { <> } } } } if (_opt_action.very_verbose && harvests.length > 0 ) { auto min_repeat_number = 42; string[] _document_topic_register; string[] _topic_register; string[] _sub_topic_register; Harvest[][string][string][string][string] subject_trees; string[] topics = []; string _auth = ""; foreach(k, doc_harvest; harvests) { _topic_register = []; foreach(topic; doc_harvest.topic_register_arr.sort) { _sub_topic_register = []; string _spaces; string[] subject_tree = topic.split(mkup.sep); switch (subject_tree.length) { case 1: subject_trees[subject_tree[0]]["_a"]["_a"]["_a"] ~= doc_harvest; break; case 2: subject_trees[subject_tree[0]][subject_tree[1]]["_a"]["_a"] ~= doc_harvest; break; case 3: subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]]["_a"] ~= doc_harvest; break; case 4: subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]][subject_tree[3]] ~= doc_harvest; break; default: break; } _topic_register ~= _sub_topic_register.join("\n"); } auto char_repeat_number = (doc_harvest.title.length + doc_harvest.author.length + 16); char_repeat_number = (char_repeat_number > min_repeat_number) ? char_repeat_number : min_repeat_number; _document_topic_register ~= format( "\"%s\", %s%s\n%s", doc_harvest.title, doc_harvest.author, (doc_harvest.date_published.length > 0) ? " (" ~ doc_harvest.date_published ~ ")" : "", _topic_register.sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable).release.join("\n"), ); } topics ~= format(q"┃ Metadata Harvest - Topics

Metadata Harvest - Topics (output organised by language & filetype)

[ HOME ] also see Metadata Harvest - Authors

ABCDEFGHIJKLMNOPQRSTUVWXYZ


English   


┃") ~ "\n"; char _prev_k = "_".to!char; int _kn; foreach(k0; subject_trees.keys .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) ) { if (k0.toUpper.to!(char[])[0] != _prev_k) { topics ~= format(q"┃

%s

┃", k0.toUpper.to!(char[])[0], k0.toUpper.to!(char[])[0], ); _prev_k = k0.toUpper.to!(char[])[0]; } if (k0 != "_a") { topics ~= format(q"┃

%s

┃", k0, k0,) ~ "\n"; writeln("", k0); if ("_a" in subject_trees[k0]) { foreach (t_a_; subject_trees[k0]["_a"]["_a"]["_a"] .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) ) { _auth = []; if (t_a_.author_arr.length < 2) { _auth = format(q"┃ %s┃", t_a_.author_surname, t_a_.author, ); } else { foreach (a; t_a_.author_arr) { _auth ~= format(q"┃ %s,┃", t_a_.author_surname, a, ); } } topics ~= format(q"┃

"%s" -%s┃", "url", t_a_.title, _auth, ) ~ "\n"; writeln("- ", t_a_.title, " - ", t_a_.author); } } foreach(k1; subject_trees[k0].keys .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) ) { if (k1 != "_a") { topics ~= format(q"┃

%s

┃", k1, k1,) ~ "\n"; writeln(" ", k1); if ("_a" in subject_trees[k0][k1]) { foreach (t_a_; subject_trees[k0][k1]["_a"]["_a"] .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) ) { _auth = []; if (t_a_.author_arr.length < 2) { _auth = format(q"┃ %s┃", t_a_.author_surname, t_a_.author, ); } else { foreach (a; t_a_.author_arr) { _auth ~= format(q"┃ %s,┃", t_a_.author_surname, a, ); } } topics ~= format(q"┃

%s -%s┃", "url", t_a_.title, _auth, ) ~ "\n"; writeln(" - ", t_a_.title, " - ", t_a_.author); } } } foreach(k2; subject_trees[k0][k1].keys .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) ) { if (k2 != "_a") { topics ~= format(q"┃

%s

┃", k2, k2,) ~ "\n"; writeln(" ", k2); if ("_a" in subject_trees[k0][k1][k2]) { foreach (t_a_; subject_trees[k0][k1][k2]["_a"] .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) ) { _auth = []; if (t_a_.author_arr.length < 2) { _auth = format(q"┃ %s┃", t_a_.author_surname, t_a_.author, ); } else { foreach (a; t_a_.author_arr) { _auth ~= format(q"┃ %s,┃", t_a_.author_surname, a, ); } } topics ~= format(q"┃

%s -%s┃", "url", t_a_.title, _auth, ) ~ "\n"; writeln(" - ", t_a_.title, " - ", t_a_.author); } } } foreach(k3; subject_trees[k0][k1][k2].keys .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) ) { if (k3 != "_a") { topics ~= format(q"┃

%s

┃", k3, k3,) ~ "\n"; writeln(" ", k3); { foreach (t_a_; subject_trees[k0][k1][k2][k3] .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) ) { _auth = []; if (t_a_.author_arr.length < 2) { _auth = format(q"┃%s┃", t_a_.author_surname, t_a_.author, ); } else { foreach (a; t_a_.author_arr) { _auth ~= format(q"┃ %s,┃", t_a_.author_surname, a, ); } } topics ~= format(q"┃

%s -%s┃", "url", t_a_.title, _auth, ) ~ "\n"; writeln(" - ", t_a_.title, " - ", t_a_.author); } } } } } } } } topics ~= format(q"┃


┃") ~ "\n"; try { auto f = File("topics.html", "w"); foreach (o; topics) { f.writeln(o); } } catch (ErrnoException ex) { // Handle error } } if ((_opt_action.verbose || _opt_action.very_verbose) && harvests.length > 0 ) { string[] authors = []; authors ~= format(q"┃ Metadata Harvest - Authors

Metadata Harvest - Authors (output organised by language & filetype)

[ HOME ] also see Metadata Harvest - Topics


English   


ABCDEFGHIJKLMNOPQRSTUVWXYZ,  ┃") ~ "\n"; string[string] _au; string[] _auth_date_title; string[] _author_date_title; string _prev_auth = ""; char _prev_k = "_".to!char; foreach(doc_harvest; harvests .multiSort!( "toUpper(a.author_surname_fn) < toUpper(b.author_surname_fn)", "a.date_published < b.date_published", "a.title < b.title", SwapStrategy.unstable ) ) { if (doc_harvest.author_surname_fn != _prev_auth) { _au[doc_harvest.author_surname_fn] = format(q"┃

%s

%s "%s" [%s]

┃", doc_harvest.author_surname, doc_harvest.author_surname_fn, (doc_harvest.date_published.length > 0) ? doc_harvest.date_published : "", "url", doc_harvest.title, doc_harvest.language, ); _prev_auth = doc_harvest.author_surname_fn; } else { _au[doc_harvest.author_surname_fn] ~= format(q"┃

%s "%s" [%s]

┃", (doc_harvest.date_published.length > 0) ? doc_harvest.date_published : "", "url", doc_harvest.title, doc_harvest.language, ); } _author_date_title ~= format(q"┃%s %s "%s" [%s]┃", doc_harvest.author_surname_fn, (doc_harvest.date_published.length > 0) ? "(" ~ doc_harvest.date_published ~ ")" : "", doc_harvest.title, doc_harvest.language, ); // writeln(doc_harvest.author_date_title); } foreach (k; _au.keys.sort) { if (k.toUpper.to!(char[])[0] != _prev_k) { authors ~= format(q"┃

%s

┃", k.toUpper.to!(char[])[0], k.toUpper.to!(char[])[0], ); _prev_k = k.toUpper.to!(char[])[0]; } authors ~= _au[k]; } authors ~= format(q"┃
┃") ~ "\n"; try { auto f = File("authors.html", "w"); foreach (o; authors) { f.writeln(o); } } catch (ErrnoException ex) { // Handle error } foreach(_adt; _author_date_title.sort) { writeln(_adt); } } } #+END_SRC *** document header including copyright & license #+NAME: doc_header_including_copyright_and_license #+BEGIN_SRC text /+ - Name: Doc Reform - Description: documents, structuring, processing, publishing, search - static content generator - Author: Ralph Amissah [ralph.amissah@gmail.com] - Copyright: (C) 2015 - 2019 Ralph Amissah, All Rights Reserved. - License: AGPL 3 or later: Doc Reform (SiSU), a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU AFERO General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see [http://www.gnu.org/licenses/]. If you have Internet connection, the latest version of the AGPL should be available at these locations: [http://www.fsf.org/licensing/licenses/agpl.html] [http://www.gnu.org/licenses/agpl.html] - Doc Reform (related to SiSU) uses standard: - docReform markup syntax - standard SiSU markup syntax with modified headers and minor modifications - docReform object numbering - standard SiSU object citation numbering & system - Hompages: [http://www.doc_reform.org] [http://www.sisudoc.org] - Git [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/html.rb;hb=HEAD] +/ #+END_SRC ** 1. pre-loop init :init: *** init **** imports :import: ***** doc_reform :doc_reform: #+NAME: imports_doc_reform #+BEGIN_SRC d import std.datetime, std.getopt, std.file, std.path, std.process; import doc_reform.meta, doc_reform.meta.metadoc_summary, doc_reform.meta.metadoc_harvest, doc_reform.meta.metadoc_from_src, doc_reform.meta.conf_make_meta_structs, doc_reform.meta.conf_make_meta_toml, doc_reform.meta.conf_make_meta_json, doc_reform.meta.defaults, doc_reform.meta.doc_debugs, doc_reform.meta.rgx, doc_reform.source.paths_source, doc_reform.source.read_config_files, doc_reform.source.read_source_files, doc_reform.output.hub; #+END_SRC ****** notes ├── src │   ├── doc_reform.d │   └── doc_reform │ ├── conf │ ├── meta │ └── output └── views    └── version.txt [[./meta_abstraction.org][meta_abstraction]] [[./meta_conf_make_meta.org][meta_conf_make_meta]] [[./meta_defaults.org][meta_defaults]] [[./meta_output_debugs.org][meta_output_debugs]] [[./source_read_files.org][source_read_files]] [[./compile_time_info.org][compile time info]] [[./output.org][output]] [[./doc_reform.org][doc_reform]] keep up to date, configuration in ../maker.org check: - http://github.com/Abscissa/SDLang-D - https://github.com/abscissa/libInputVisitor std.conv, std.variant, - https://code.dlang.org/packages/toml - https://github.com/toml-lang/toml - https://github.com/toml-lang/toml/blob/master/README.md **** mixins :mixin: ***** version.txt :version: #+NAME: mixin_doc_reform_version #+BEGIN_SRC d mixin(import("version.txt")); #+END_SRC ***** pre main mixins #+NAME: mixin_pre_main #+BEGIN_SRC d mixin CompileTimeInfo; #+END_SRC ***** doc_reform "main" mixins :doc_reform: #+NAME: doc_reform_mixin #+BEGIN_SRC d mixin DocReformRgxInit; mixin contentJSONtoDocReformStruct; mixin DocReformBiblio; mixin DocReformRgxInitFlags; mixin outputHub; #+END_SRC **** init :init: #+NAME: doc_reform_init #+BEGIN_SRC d struct Harvest { string title = ""; string[] author_arr = []; string author = ""; string author_surname = ""; string author_surname_fn = ""; string language = ""; string language_original = ""; string uid = ""; string date_published = ""; string[] topic_register_arr = []; string path_html_seg = ""; string path_html_scroll = ""; string path_epub = ""; string url_html_seg = ""; string url_html_scroll = ""; string url_epub = ""; } Harvest harvested; Harvest[] harvests; #+END_SRC **** args :args: #+NAME: doc_reform_init #+BEGIN_SRC d string flag_action; string arg_unrecognized; enum dAM { abstraction, matters } static auto rgx = Rgx(); #+END_SRC *** scope (run complete) :scope: #+NAME: doc_reform_init #+BEGIN_SRC d scope(success) { writefln( "~ run complete, ok ~ (%s-%s.%s.%s, %s D:%s, %s %s)", program_name, _ver.major, _ver.minor, _ver.patch, __VENDOR__, __VERSION__, bits, os, ); } scope(failure) { debug(checkdoc) { stderr.writefln( "run failure", ); } } #+END_SRC *** getopt args for loop :args:getopt: **** set getopt options - set getopt options #+NAME: doc_reform_args #+BEGIN_SRC d bool[string] opts = [ "abstraction" : false, "assertions" : false, "concordance" : false, "dark" : false, "debug" : false, "digest" : false, "epub" : false, "harvest" : false, "harvest-authors" : false, "harvest-topics" : false, "html" : false, "html-seg" : false, "html-scroll" : false, "latex" : false, "light" : false, "manifest" : false, "ocn" : true, "odf" : false, "odt" : false, "parallel" : false, "parallel-subprocesses" : false, "pdf" : false, "quiet" : false, "pod" : false, "serial" : false, "source" : false, "sqlite-discrete" : false, "sqlite-db-create" : false, "sqlite-db-drop" : false, "sqlite-db-recreate" : false, "sqlite-delete" : false, "sqlite-insert" : false, "sqlite-update" : false, "text" : false, "verbose" : false, "very-verbose" : false, "xhtml" : false, "section_toc" : true, "section_body" : true, "section_endnotes" : true, "section_glossary" : true, "section_biblio" : true, "section_bookindex" : true, "section_blurb" : true, "backmatter" : true, "skip-output" : false, "theme-dark" : false, "theme-light" : false, "workon" : false, ]; string[string] settings = [ "output-dir" : "", "site-config-dir" : "", "lang" : "all", "sqlite-filename" : "documents", ]; auto helpInfo = getopt(args, std.getopt.config.passThrough, "abstraction", "--abstraction document abstraction ", &opts["abstraction"], "assert", "--assert set optional assertions on", &opts["assertions"], "concordance", "--concordance file for document", &opts["concordance"], "dark", "--dark alternative dark theme", &opts["dark"], "debug", "--debug", &opts["debug"], "digest", "--digest hash digest for each object", &opts["digest"], "epub", "--epub process epub output", &opts["epub"], "harvest", "--harvest extract info on authors & topics from document header metadata", &opts["harvest"], "harvest-authors", "--harvest-authors extract info on authors from document header metadata", &opts["harvest-authors"], "harvest-topics", "--harvest-topics extract info on topics from document header metadata", &opts["harvest-topics"], "html", "--html process html output", &opts["html"], "html-seg", "--html-seg process html output", &opts["html-seg"], "html-scroll", "--html-seg process html output", &opts["html-scroll"], "latex", "--latex output for pdfs", &opts["latex"], "light", "--light default light theme", &opts["light"], "manifest", "--manifest process manifest output", &opts["manifest"], "ocn", "--ocn object cite numbers (default)", &opts["ocn"], "odf", "--odf open document format text (--odt)", &opts["odf"], "odt", "--odt open document format text", &opts["odt"], "parallel", "--parallel parallelisation", &opts["parallel"], "parallel-subprocesses", "--parallel-subprocesses nested parallelisation", &opts["parallel-subprocesses"], "quiet|q", "--quiet output to terminal", &opts["quiet"], "pdf", "--pdf latex output for pdfs", &opts["pdf"], "pod", "--pod doc reform pod source content bundled", &opts["pod"], "serial", "--serial serial processing", &opts["serial"], "source", "--source markup source text content", &opts["source"], "sqlite-discrete", "--sqlite process discrete sqlite output", &opts["sqlite-discrete"], "sqlite-db-create", "--sqlite-db-create create db, create tables", &opts["sqlite-db-create"], "sqlite-db-drop", "--sqlite-db-drop drop tables & db", &opts["sqlite-db-drop"], "sqlite-db-recreate", "--sqlite-db-recreate create db, create tables", &opts["sqlite-db-recreate"], "sqlite-delete", "--sqlite-delete process sqlite output", &opts["sqlite-delete"], "sqlite-insert", "--sqlite-insert process sqlite output", &opts["sqlite-insert"], "sqlite-update", "--sqlite-update process sqlite output", &opts["sqlite-update"], "text", "--text process text output", &opts["text"], "txt", "--txt process text output", &opts["text"], "verbose|v", "--verbose output to terminal", &opts["verbose"], "very-verbose", "--very-verbose output to terminal", &opts["very-verbose"], "xhtml", "--xhtml process xhtml output", &opts["xhtml"], "section-toc", "--section-toc process table of contents (default)", &opts["section_toc"], "section-body", "--section-body process document body (default)", &opts["section_body"], "section-endnotes", "--section-endnotes process document endnotes (default)", &opts["section_endnotes"], "section-glossary", "--section-glossary process document glossary (default)", &opts["section_glossary"], "section-biblio", "--section-biblio process document biblio (default)", &opts["section_biblio"], "section-bookindex", "--section-bookindex process document bookindex (default)", &opts["section_bookindex"], "section-blurb", "--section-blurb process document blurb (default)", &opts["section_blurb"], "backmatter", "--section-backmatter process document backmatter (default)", &opts["backmatter"], "skip-output", "--skip-output", &opts["skip-output"], "theme-dark", "--theme-dark alternative dark theme", &opts["theme-dark"], "theme-light", "--theme-light default light theme", &opts["theme-light"], "workon", "--workon (reserved for some matters under development & testing)", &opts["workon"], "output-dir", "--output-dir=[dir path]", &settings["output-dir"], "site-config-dir", "--site-config-dir=[dir path]", &settings["site-config-dir"], "sqlite-filename", "--sqlite-filename=[filename].sqlite", &settings["sqlite-filename"], "lang", "--lang=[lang code e.g. =en or =en,es]", &settings["lang"], ); if (helpInfo.helpWanted) { defaultGetoptPrinter("Some information about the program.", helpInfo.options); } #+END_SRC **** getopt hash to struct - move getopt options to struct #+NAME: doc_reform_args #+BEGIN_SRC d enum outTask { pod, source, sqlite, sqlite_multi, latex, odt, epub, html_scroll, html_seg, html_stuff } struct OptActions { bool assertions() { return opts["assertions"]; } bool concordance() { return opts["concordance"]; } bool css_theme_default() { bool _is_light; if (opts["light"] || opts["theme-light"]) { _is_light = true; } else if (opts["dark"] || opts["theme-dark"]) { _is_light = false; } else { _is_light = true; } return _is_light; } bool debug_do() { return opts["debug"]; } bool digest() { return opts["digest"]; } bool epub() { return opts["epub"]; } bool harvest() { bool _is = ( opts["harvest"] || opts["harvest-authors"] || opts["harvest-topics"] ) ? true : false; return _is; } bool harvest_authors() { return opts["harvest-authors"]; } bool harvest_topics() { return opts["harvest-topics"]; } bool html() { bool _is; if ( opts["html"] || opts["html-seg"] || opts["html-scroll"]) { _is = true; } else { _is = false; } return _is; } bool html_seg() { bool _is; if ( opts["html"] || opts["html-seg"]) { _is = true; } else { _is = false; } return _is; } bool html_scroll() { bool _is; if ( opts["html"] || opts["html-scroll"]) { _is = true; } else { _is = false; } return _is; } bool html_stuff() { bool _is; if (opts["html"] || opts["html-scroll"] || opts["html-seg"]) { _is = true; } else { _is = false; } return _is; } bool latex() { bool _is; if ( opts["latex"] || opts["pdf"]) { _is = true; } else { _is = false; } return _is; } bool odt() { bool _is; if ( opts["odf"] || opts["odt"]) { _is = true; } else { _is = false; } return _is; } bool manifest() { return opts["manifest"]; } bool ocn() { return opts["ocn"]; } bool quiet() { return opts["quiet"]; } bool pod() { return opts["pod"]; } bool source() { return opts["source"]; } bool sqlite_discrete() { return opts["sqlite-discrete"]; } bool sqlite_db_drop() { bool _is; if ( opts["sqlite-db-recreate"] || opts["sqlite-db-drop"]) { _is = true; } else { _is = false; } return _is; } bool sqlite_db_create() { bool _is; if ( opts["sqlite-db-recreate"] || opts["sqlite-db-create"]) { _is = true; } else { _is = false; } return _is; } bool sqlite_delete() { return opts["sqlite-delete"]; } bool sqlite_update() { bool _is; if (opts["sqlite-update"] || opts["sqlite-insert"]) { _is = true; } else { _is = false; } return _is; } bool sqlite_shared_db_action() { bool _is; if (opts["sqlite-db-recreate"] || opts["sqlite-db-create"] || opts["sqlite-delete"] || opts["sqlite-insert"] || opts["sqlite-update"] ) { _is = true; } else { _is = false; } return _is; } bool text() { return opts["text"]; } bool verbose() { bool _is; if (opts["verbose"] || opts["very-verbose"]) { _is = true; } else { _is = false; } return _is; } bool very_verbose() { return opts["very-verbose"]; } bool xhtml() { return opts["xhtml"]; } bool section_toc() { return opts["section_toc"]; } bool section_body() { return opts["section_body"]; } bool section_endnotes() { return opts["section_endnotes"]; } bool section_glossary() { return opts["section_glossary"]; } bool section_biblio() { return opts["section_biblio"]; } bool section_bookindex() { return opts["section_bookindex"]; } bool section_blurb() { return opts["section_blurb"]; } bool backmatter() { return opts["backmatter"]; } bool skip_output() { return opts["skip-output"]; } bool workon() { return opts["workon"]; } auto languages_set() { return settings["lang"].split(","); } auto output_dir_set() { return settings["output-dir"]; } auto sqlite_filename() { return settings["sqlite-filename"]; } bool parallelise() { bool _is; if (opts["parallel"] == true) { _is = true; if (sqlite_shared_db_action) { _is = false; } } else if (opts["parallel"] == false && opts["serial"] == true) { _is = false; } else if (opts["abstraction"] || concordance || html || epub || odt || latex || manifest || pod || source || sqlite_discrete ) { _is = true; } else { _is = false; } return _is; } bool parallelise_subprocesses() { return opts["parallel-subprocesses"]; } auto output_task_scheduler() { int[] schedule; if (pod) { schedule ~= outTask.pod; } if (source) { schedule ~= outTask.source; } if (sqlite_discrete) { schedule ~= outTask.sqlite; } if (epub) { schedule ~= outTask.epub; } if (html_scroll) { schedule ~= outTask.html_scroll; } if (html_seg) { schedule ~= outTask.html_seg; } if (html_stuff) { schedule ~= outTask.html_stuff; } if (odt) { schedule ~= outTask.odt; } if (latex) { schedule ~= outTask.latex; } return schedule.sort().uniq; } bool abstraction() { bool _is; if (opts["abstraction"] || concordance || source || pod || html || epub || odt || latex || manifest || sqlite_discrete || sqlite_delete || sqlite_update ) { _is = true; } else { _is = false; } return _is; } bool meta_processing_general() { bool _is; if (opts["abstraction"] || html || epub || odt || latex || sqlite_discrete || sqlite_update ) { _is = true; } else { _is = false; } return _is; } bool meta_processing_xml_dom() { bool _is; if (opts["abstraction"] || html || epub || odt || sqlite_discrete || sqlite_update ) { _is = true; } else { _is = false; } return _is; } } auto _opt_action = OptActions(); #+END_SRC **** env - environmental info #+NAME: doc_reform_args #+BEGIN_SRC d auto program_info() { struct ProgramInfo { string name() { return program_name; } string ver() { string ver_ = format( "%s.%s.%s", _ver.major, _ver.minor, _ver.patch, ); return ver_; } } return ProgramInfo(); } auto _env = [ "pwd" : environment["PWD"], "home" : environment["HOME"], ]; #+END_SRC *** opt actions on processing files & file paths (pods, src etc.) #+NAME: doc_reform_args #+BEGIN_SRC d auto _manifest_start = PodManifest!()(""); auto _manifest_matter = PathMatters!()(_opt_action, _env, ""); auto _manifests = [ _manifest_matter ]; foreach(arg; args[1..$]) { _manifest_start = PodManifest!()(arg); if (arg.match(rgx.flag_action)) { flag_action ~= " " ~ arg; // flags not taken by getopt } else if ( !(arg.match(rgx.src_pth_sst_or_ssm)) && _manifest_start.pod_manifest_file_with_path && _opt_action.abstraction ) { string contents_location_raw_; string contents_location_; string sisudoc_txt_ = _manifest_start.pod_manifest_file_with_path; enforce( exists(sisudoc_txt_)!=0, "file not found: «" ~ sisudoc_txt_ ~ "»" ); if (exists(sisudoc_txt_)) { try { if (exists(sisudoc_txt_)) { contents_location_raw_ = sisudoc_txt_.readText; } } catch (ErrnoException ex) { } catch (FileException ex) { // Handle errors } if (contents_location_raw_.match(rgx.pod_content_location)) { // (file name followed by language codes \n)+ foreach (m; contents_location_raw_.matchAll(rgx.pod_content_location)) { foreach (n; m.captures[2].matchAll(rgx.language_codes)) { contents_location_ ~= "media/text/" ~ n.captures[1].to!string ~ "/" ~ m.captures[1].to!string ~ "\n"; } } } else { contents_location_ = contents_location_raw_; } } else { writeln("manifest not found: ", sisudoc_txt_); } auto contents_locations_arr = (cast(char[]) contents_location_).split; auto tmp_dir_ = (sisudoc_txt_).dirName.array; foreach (contents_location; contents_locations_arr) { assert(contents_location.match(rgx.src_pth_sst_or_ssm), "not a recognised file: «" ~ contents_location ~ "»" ); auto contents_location_pth_ = (contents_location).to!string; Regex!(char) lang_rgx_ = regex(r"/(" ~ _opt_action.languages_set.join("|") ~ ")/"); if (_opt_action.languages_set[0] == "all" || (contents_location_pth_).match(lang_rgx_) ) { auto _fns = (((tmp_dir_).chainPath(contents_location_pth_)).array).to!string; _manifest_matter = PathMatters!()(_opt_action, _env, arg, _fns, contents_locations_arr); _manifests ~= _manifest_matter; } } } else if (arg.match(rgx.src_pth_sst_or_ssm)) { if (exists(arg)==0) { writeln("ERROR >> Processing Skipped! File not found: ", arg); } else { _manifest_matter = PathMatters!()(_opt_action, _env, arg, arg); _manifests ~= _manifest_matter; } } else if (arg.match(rgx.src_pth_zip)) { // fns_src ~= arg; // gather input markup source file names for processing } else { // anything remaining, unused arg_unrecognized ~= " " ~ arg; } } #+END_SRC *** config files load & read #+NAME: doc_reform_conf_files_in_toml #+BEGIN_SRC d ConfCompositePlus _make_and_meta_struct; { /+ document config file +/ auto _config_document_struct = readConfigDoc!()(_manifest, _env); _make_and_meta_struct = _config_document_struct.configParseTOMLreturnDocReformStruct!()(_make_and_meta_struct, _manifest); } { /+ local site config +/ auto _config_local_site_struct = readConfigSite!()(_manifest, _env); _make_and_meta_struct = _config_local_site_struct.configParseTOMLreturnDocReformStruct!()(_make_and_meta_struct, _manifest); } #+END_SRC *** opt actions independent of processing files (no files no processing loop) #+NAME: doc_reform_do_selected #+BEGIN_SRC d if (!(_opt_action.skip_output)) { if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("step0 commence → (without processing files)"); } outputHubOp!()(_env, _opt_action); if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("- step0 complete"); } } #+END_SRC ** _2. processing: (loop each file)_ [+2] :loop:files: *** scope (loop) :scope: #+NAME: doc_reform_each_file_do_scope #+BEGIN_SRC d scope(success) { if (!(_opt_action.quiet)) { writefln( "%s", "-- ~ document complete, ok ~ ------------------------------------", ); } } scope(failure) { debug(checkdoc) { stderr.writefln( "~ document run failure ~ (%s v%s)\n\t%s\n%s", __VENDOR__, __VERSION__, manifest.src.filename, "------------------------------------------------------------------", ); } } enforce( manifest.src.filename.match(rgx.src_pth_types), "not a sisu markup filename: «" ~ manifest.src.filename ~ "»" ); #+END_SRC *** 1. _document abstraction_ [#A] - return tuple of: - doc_abstraction (the document) - doc_matters #+NAME: doc_reform_each_file_do_abstraction #+BEGIN_SRC d if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("--->\nstepX commence → (document abstraction)"); } auto t = DocReformAbstraction!()(_env, program_info, _opt_action, manifest); static assert(!isTypeTuple!(t)); static assert(t.length==2); auto doc_abstraction = t[dAM.abstraction]; auto doc_matters = t[dAM.matters]; if ((doc_matters.opt.action.debug_do) || (doc_matters.opt.action.very_verbose) ) { writeln("- stepX complete"); } #+END_SRC *** 2. _output processing_ (post abstraction processing) **** 0. abstraction _print summary_ :abstraction:summary: - abstraction summary #+NAME: doc_reform_each_file_do_debugs_checkdoc #+BEGIN_SRC d /+ ↓ debugs +/ if (doc_matters.opt.action.verbose) { DocReformMetaDocSummary!()(doc_abstraction, doc_matters); } #+END_SRC **** 0. abstraction harvest :abstraction:harvest: - abstraction harvest #+NAME: doc_reform_each_file_do_debugs_checkdoc #+BEGIN_SRC d if (doc_matters.opt.action.harvest) { if (doc_matters.opt.action.harvest_authors) { } if (doc_matters.opt.action.harvest_topics) { } Harvest[] DocReformMetaDocHarvests()( Harvest harvested, Harvest[] harvests, ) { harvests ~= harvested; return harvests; } harvested = DocReformMetaDocHarvest!()(doc_matters, harvested); harvests = DocReformMetaDocHarvests!()(harvested, harvests); } #+END_SRC **** 1. _debug_ (document parts, checkdoc) :debug:checkdoc: - [[./meta_output_debugs.org][meta_output_debugs]] #+NAME: doc_reform_each_file_do_debugs_checkdoc #+BEGIN_SRC d /+ ↓ debugs +/ if (doc_matters.opt.action.debug_do) { DocReformDebugs!()(doc_abstraction, doc_matters); } #+END_SRC **** 2. _process outputs_ :outputs: - [[./output_hub.org][output_hub]] #+NAME: doc_reform_each_file_do_selected_output #+BEGIN_SRC d /+ ↓ output hub +/ if (!(doc_matters.opt.action.skip_output)) { if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("step5 commence → (process outputs)"); } doc_abstraction.outputHub!()(doc_matters); if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("- step5 complete"); } } #+END_SRC *** scope (on loop exit) :scope:exit: #+NAME: doc_reform_each_file_do_scope_exit #+BEGIN_SRC d scope(exit) { if (!(_opt_action.quiet)) { writefln( "processed file: %s (%s)", manifest.src.filename, manifest.src.language ); } destroy(manifest); } #+END_SRC ** +2c. no valid filename provided+ #+NAME: doc_reform_no_filename_provided #+BEGIN_SRC d /+ no recognized filename provided +/ writeln("no recognized filename"); break; // terminate, stop #+END_SRC * 2. pre-processing ** Output _document abstraction functions_ :module:doc_reform:abstraction: *** 0 module template - abstraction template #+BEGIN_SRC d :tangle "../src/doc_reform/meta/metadoc.d" module doc_reform.meta.metadoc; template DocReformAbstraction() { <> <> enum headBody { header, body_content, insert_file_list, image_list } enum makeMeta { make, meta } enum docAbst { doc_abstract_obj, doc_has } static auto rgx = Rgx(); auto DocReformAbstraction(E,P,O,M)( E _env, P program_info, O _opt_action, M _manifest ){ <> <> <> <> <> auto t = tuple(doc_abstraction, doc_matters); static assert(t.length==2); return t; } } #+END_SRC ** Output & Harvest pre-processing *** 1. raw file content split, doc: _header_, _content_ +(lists: subdocs? images?) - [[./source_read_files.org][source_read_files]] return tuple: header; body; insert file list; image list - read in the _marked up source document_ and - split the document into: - document header - document body - from markup source - if master document from sub documents content - if a master document - make a list of insert files - if build source pod requested - scan for list of images (action avoided if not needed at this stage) - _return a tuple of_: - header - body - insert file list - image list (if build source pod requested) if build source pod requested all information needed to build it available at this point - manifest related information _manifest - insert file list _header_body_insertfilelist_imagelist[headBody.insert_file_list] - image list _header_body_insertfilelist_imagelist[headBody.image_list] #+NAME: doc_reform_each_file_do_read_and_split_sisu_markup_file_content_into_header_and_body #+BEGIN_SRC d /+ ↓ read file (filename with path) +/ /+ ↓ file tuple of header and content +/ if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("step1 commence → (get document header & body & insert file list & if needed image list)" ); } auto _header_body_insertfilelist_imagelist = DocReformRawMarkupContent!()(_opt_action, _manifest.src.path_and_fn); static assert(!isTypeTuple!(_header_body_insertfilelist_imagelist)); static assert(_header_body_insertfilelist_imagelist.length==4); if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("- step1 complete"); } debug(header_and_body) { writeln(header); writeln(_header_body_insertfilelist_imagelist.length); writeln(_header_body_insertfilelist_imagelist.length[headBody.body_content][0]); } #+END_SRC *** 2. _document metadata_ & _make instructions_ (struct from toml) - [[./meta_conf_make_meta.org][meta_conf_make_meta]] return tuple: document metadata; make instructions - read _document header_, split into: - metadata - make instructions - read config files - consolidate make instructions - _return tuple of_: - document metadata - make instructions (from configuration files & document header make instructions) #+NAME: doc_reform_each_file_do_split_sisu_markup_file_header_into_make_and_meta_structs #+BEGIN_SRC d /+ ↓ split header into make and meta +/ if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("step2 commence → (read document header - toml, return struct)"); } _make_and_meta_struct = docHeaderMakeAndMetaTupTomlExtractAndConvertToStruct!()( _make_and_meta_struct, _header_body_insertfilelist_imagelist[headBody.header], _manifest, ); if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("- step2 complete"); } #+END_SRC *** 3. _document abstraction, tuple_ (output-pre-processing) [#A] - [[./meta_abstraction.org][meta_abstraction]] return tuple: document abstraction; abstraction keys; segnames; image list - prepare the document abstraction used in downstream processing - _return tuple of_: - document abstraction (_the_document_ or doc_abstraction) - document abstraction keys - (head, toc, body, endnotes, glossary, bibliography, bookindex, blurb, tail) - (transfer to _doc_matters_) - segnames for html epub (transfer to _doc_matters_) - image list (transfer to _doc_matters_) #+NAME: doc_reform_each_file_do_document_abstraction #+BEGIN_SRC d /+ ↓ document abstraction: process document, return abstraction as tuple +/ if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("step3 commence → (document abstraction (da); da keys; segnames; doc_matters)"); } auto da = DocReformDocAbstraction!()( _header_body_insertfilelist_imagelist[headBody.body_content], _make_and_meta_struct, _opt_action, _manifest, true, ); static assert(!isTypeTuple!(da)); static assert(da.length==2); auto doc_abstraction = da[docAbst.doc_abstract_obj]; /+ head ~ toc ~ body ~ endnotes_seg ~ glossary ~ bibliography ~ bookindex ~ blurb; +/ auto _doc_has_struct = da[docAbst.doc_has]; if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("- step3 complete"); } #+END_SRC *** 4. _document matters_ (doc info gathered, various sources) [#A] - gather doc matters - prepare document_matters, miscellany about processing and the document of use in downstream processing **** verbose message #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("step4 commence → (doc_matters)"); } struct DocumentMatters { #+END_SRC **** generator related #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d auto generator_program() { struct Prog_ { auto name() { return program_info.name; } auto ver() { return program_info.ver; } auto name_and_version() { return format( "%s-%s", program_info.name, program_info.ver, ); } auto url_home() { return "http://sisudoc.org"; } auto url_git() { return "https://git.sisudoc.org/software/sisu"; } } return Prog_(); } auto generated_time() { auto _st = Clock.currTime(UTC()); auto _time = _st.year.to!string ~ "-" ~ _st.month.to!int.to!string // prefer as month number ~ "-" ~ _st.day.to!string ~ " [" ~ _st.isoWeek.to!string ~ "/" ~ _st.dayOfWeek.to!int.to!string ~ "]" ~ " " ~ _st.hour.to!string ~ ":" ~ _st.minute.to!string ~ ":" ~ _st.second.to!string; return _time; } #+END_SRC **** config make & meta #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d auto conf_make_meta() { // TODO meld with all make instructions return _make_and_meta_struct; } auto has() { return _doc_has_struct; } #+END_SRC **** env related #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d auto env() { struct Env_ { auto pwd() { return _manifest.env.pwd; } auto home() { return _manifest.env.home; } } return Env_(); } #+END_SRC **** opt #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d auto opt() { struct Opt_ { auto action() { /+ getopt options, commandline instructions, raw - processing instructions --epub --html etc. - command line config instructions --output-path +/ return _opt_action; } } return Opt_(); } #+END_SRC **** output related #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d auto src() { return _manifest.src; } auto src_path_info() { return DocReformPathsSRC!()(_manifest.env.pwd, _manifest.src.file_with_absolute_path); // would like (to have and use) relative path } auto pod() { return _manifest.pod; } auto sqlite() { struct SQLite_ { string filename() { return _opt_action.sqlite_filename; } } return SQLite_(); } auto output_path() { return _manifest.output.path; } auto srcs() { struct SRC_ { auto file_insert_list() { return _header_body_insertfilelist_imagelist[headBody.insert_file_list]; } auto image_list() { return _doc_has_struct.imagelist; } } return SRC_(); } #+END_SRC **** } close #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d } auto doc_matters = DocumentMatters(); #+END_SRC **** step complete message #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("- step4 complete"); } #+END_SRC * 3. document abstraction _summary_ :module:doc_reform:metadoc_summary: ** 0. module template metadoc summary - document summary from abstraction #+BEGIN_SRC d :tangle "../src/doc_reform/meta/metadoc_summary.d" module doc_reform.meta.metadoc_summary; template DocReformMetaDocSummary() { void DocReformMetaDocSummary(S,T)( const S doc_abstraction, T doc_matters, ) { <> mixin InternalMarkup; <> if (doc_matters.opt.action.verbose) { <> } } } #+END_SRC ** init *** imports #+name: metadoc_summary_imports #+BEGIN_SRC d import doc_reform.meta.defaults, doc_reform.meta.rgx; import std.array, std.exception, std.regex, std.stdio, std.string, std.traits, std.typecons, std.uni, std.utf, std.conv : to; #+END_SRC *** initialize :report: #+name: metadoc_summary_initialize #+BEGIN_SRC d auto markup = InlineMarkup(); #+END_SRC ** (last ocn) #+name: meta_metadoc_summary_document #+BEGIN_SRC d string[string] check = [ "last_object_number" : "NA [debug \"checkdoc\" not run]", "last_object_number_body" : "0", "last_object_number_book_index" : "0", ]; foreach (k; doc_matters.has.keys_seq.seg) { foreach (obj; doc_abstraction[k]) { if (obj.metainfo.is_of_part != "empty") { if (!empty(obj.metainfo.object_number)) { if (k == "body") { check["last_object_number_body"] = obj.metainfo.object_number; } if (!(obj.metainfo.object_number.empty)) { check["last_object_number"] = obj.metainfo.object_number; } } if (k == "bookindex") { if (obj.metainfo.object_number_type == 2) { check["last_object_number_book_index"] = obj.metainfo.object_number_book_index; } } } } } #+END_SRC ** document summary #+name: meta_metadoc_summary_document #+BEGIN_SRC d auto min_repeat_number = 66; auto char_repeat_number = (doc_matters.conf_make_meta.meta.title_full.length + doc_matters.conf_make_meta.meta.creator_author.length + 4); char_repeat_number = (char_repeat_number > min_repeat_number) ? char_repeat_number : min_repeat_number; writefln( "%s\n\"%s\", %s\n%s\n%s\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%s", markup.repeat_character_by_number_provided("-", char_repeat_number), doc_matters.conf_make_meta.meta.title_full, doc_matters.conf_make_meta.meta.creator_author, doc_matters.src.filename, markup.repeat_character_by_number_provided("-", char_repeat_number), "- toc arr length:", to!int(doc_abstraction["toc"].length), "- doc_abstraction arr length:", to!int(doc_abstraction["body"].length), " doc body last obj on.#:", to!int(check["last_object_number_body"]), " - number of tables:", doc_matters.has.tables, " - number of codeblocks:", doc_matters.has.codeblocks, " - number of poems:", doc_matters.has.poems, " - number of blocks:", doc_matters.has.blocks, " - number of groups:", doc_matters.has.groups, " - number of images:", doc_matters.has.images, "- endnotes length:", // subtract headings (doc_abstraction["endnotes"].length > 2) ? (to!int(doc_abstraction["endnotes"].length - 2)) : 0, "- glossary length:", (doc_abstraction["glossary"].length > 1) ? (to!int(doc_abstraction["glossary"].length)) : 0, "- biblio length:", (doc_abstraction["bibliography"].length > 1) ? (to!int(doc_abstraction["bibliography"].length)) : 0, "- bookindex length:", (doc_abstraction["bookindex"].length > 1) ? (to!int(doc_abstraction["bookindex"].length)) : 0, " book idx last obj on.#:", to!int(check["last_object_number_book_index"]), "- blurb length:", (doc_abstraction["blurb"].length > 1) ? (to!int(doc_abstraction["blurb"].length)) : 0, "* last obj on.#:", to!int(check["last_object_number"]), "number of segments:", (doc_matters.has.segnames_lv4.length > 1) ? (to!int(doc_matters.has.segnames_lv4.length)) : 0, markup.repeat_character_by_number_provided("-", min_repeat_number), ); #+END_SRC ** 0. module template metadoc harvest #+BEGIN_SRC d :tangle "../src/doc_reform/meta/metadoc_harvest.d" module doc_reform.meta.metadoc_harvest; template DocReformMetaDocHarvest() { auto DocReformMetaDocHarvest(T,H)( T doc_matters, H harvest, ) { <> mixin InternalMarkup; <> <> } } #+END_SRC ** init *** imports #+name: metadoc_harvest_imports #+BEGIN_SRC d import doc_reform.meta.defaults, doc_reform.meta.rgx; import std.array, std.exception, std.regex, std.stdio, std.string, std.traits, std.typecons, std.uni, std.utf, std.conv : to; #+END_SRC *** initialize :report: #+name: metadoc_harvest_initialize #+BEGIN_SRC d auto markup = InlineMarkup(); #+END_SRC ** harvest summary #+name: meta_metadoc_harvest_summary #+BEGIN_SRC d auto min_repeat_number = 66; auto char_repeat_number = (doc_matters.conf_make_meta.meta.title_full.length + doc_matters.conf_make_meta.meta.creator_author.length + 4); char_repeat_number = (char_repeat_number > min_repeat_number) ? char_repeat_number : min_repeat_number; writefln( "%s\n\"%s\", %s\n%s\n%s\n%s", markup.repeat_character_by_number_provided("-", char_repeat_number), doc_matters.conf_make_meta.meta.title_full, doc_matters.conf_make_meta.meta.creator_author, doc_matters.src.filename, doc_matters.conf_make_meta.meta.classify_topic_register_arr, markup.repeat_character_by_number_provided("-", char_repeat_number), ); #+END_SRC ** return harvest #+name: meta_metadoc_harvest #+BEGIN_SRC d import doc_reform.output.paths_output; auto pth_html = DocReformPathsHTML!()(doc_matters.output_path, doc_matters.src.language); harvest.title = doc_matters.conf_make_meta.meta.title_full; harvest.author = doc_matters.conf_make_meta.meta.creator_author; harvest.author_surname = doc_matters.conf_make_meta.meta.creator_author_surname; harvest.author_surname_fn = doc_matters.conf_make_meta.meta.creator_author_surname_fn; harvest.author_arr = doc_matters.conf_make_meta.meta.creator_author_arr; harvest.language_original = doc_matters.conf_make_meta.meta.original_language; harvest.language = doc_matters.src.language; harvest.uid = doc_matters.src.doc_uid; harvest.date_published = doc_matters.conf_make_meta.meta.date_published; harvest.topic_register_arr = doc_matters.conf_make_meta.meta.classify_topic_register_arr; harvest.path_html_scroll = pth_html.fn_scroll(doc_matters.src.filename); harvest.path_html_seg = pth_html.fn_seg(doc_matters.src.filename, "toc"); return harvest; #+END_SRC * __END__ dev notes ** the document notes *** document sections (table) |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | section | part | opt. | | objects | ocn | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | front matter | head | * | | | no | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | toc | toc | | | generated from headings | no | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | body | body | * | | default section | yes | | | | | | | | - headings | | | | | | | | | - paras | | | | | | | | | - code | | | | | | | | | - poem | | | | | | | | | - group | | | | | | | | | - block | | | | | | | | | - quote | | | | | | | | | - table | | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | back matter | endnote | | | generated from inline note markup | no (each endnote belongs to | | | | | | | | | a (body) object) | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | | glossary | | | identified section, limited markup | possibly, to make searchable | | | | | | | | - heading | hidden | | | | | | | | - paras | | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | | bibliography | | | generated from inline special markup | possibly, to make searchable | | | | | | | | appended to paragraphs contained in body section | hidden | | | | | | | | - heading | | | | | | | | | - paras | | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | | book index | | | generated from inline special markup | possibly, special numbering or | | | | | | | | - heading | could use term as anchor? | | | | | | | | - paras | to make searchable | | | | | | | | | hidden | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | | blurb | | | identified section, limited markup | no (unless non-substantive | | | | | | | | - heading | given special numbering) | | | | | | | | - paras | | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| *** document objects (table) - check, keep up to date |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| | doc object | doc object | attributes | inline | appended | structure | delimiters | |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| | is_a | is_of_type | | | | | | |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| | heading | para | - level | - font face | - object number off | - level | - two newlines | | | | - object number | - endnotes | - book index meta | (document structure) | | | | | - object number off | | | | | | | | - dummy (toc & seg) | | | | | | | | - tags (internal links) | | | | | |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| | toc | para | - level | - font face | | | - auto generated from headings | | | | (auto-indent) | - links (auto) | | | | |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| | para | para | - bullet | - font face | - object number off | | - two newlines | | | | - indent | - links/urls * | - book index meta | | | | | | - object number | - images* | | | | | | | - object number off | - endnotes | | | | |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| | bookindex | para | - auto indent | - font face | | | - two newlines | |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| | blurb | para | - bullet | - font face | - object number off | | - two newlines | | | | - indent | - links/urls * | - book index meta | | | | | | - object number | - images* | | | | | | | - object number off | - endnotes | | | | |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| | group | block | - object number | - font face | - book index meta | - para break | - block tags | | | | - object number off | - links/urls * | | | (group) | | | | | - images* | | | | | | | | - endnotes | | | | |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| | block | block | - object number | - font face | - book index meta | - new line | - block tags | | | | - object number off | - links/urls * | | | (block) | | | | | - images* | | | | | | | | - endnotes | | | | |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| | quote | block | - object number | - font face | - book index meta | | - block tags | | | | | - endnotes | | | (quote) | |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| | poem (see verse) | block | | | - book index meta | | - block tags | | | | | | | | (poem) | |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| | verse (of poem) | | - object number | - font face | | - new line | - (see poem delimiter) | | | | | - endnotes | | - preceeding spaces | | |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| | code | block | - syntax | | | - new line | - block tags | | | | - numbered | | | - preceeding spaces | (code) | |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| | table | block | - object number | | | | - block tags (table) | | | | | | | | (table) | |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| | endnote | | | - font face | | | (generated from | | | | | | | | inline markup tags) | | | | | | | | - two newlines | |------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| - consider special treatment for links/urls (& for images?) take them out of document munge (for various outputs), by storing in own array (within each object struct), and providing info on where in array to extract them from, debating whether necessary or even worthwhile as is extra work **** check |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | | | identified by | object notes | attributes | inline | embedded | special | | | | | | | | appended | characters | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | para | heading | level markers | | | - italics | - endnotes | | | | | at start of line | | | | - bibliography | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | | paragraph | delimited by two new lines | default object | - indent | - bold | - endnotes | | | | | | [discard leading & | - bullet | - italics | - bibliography | | | | | | newline whitespace] | | - underscore | | | | | | | | | - strikethrough | | | | | | | | | - superscript | | | | | | | | | - subscript | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | block | | open and close tags | | | | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | TODO | quote | | | - language? | | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | TODO | group | | - inline markup applied | - language? | as paragraph | - endnotes | | | | | | - [discard leading & | | | - bibliography | | | | | | newline whitespace] | | | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | TODO | block | | - inline markup applied | | as paragraph | - endnotes | | | | | | - whitespace indentation | | | - bibliography | | | | | | & newlines | | | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | | poem / verse | open and close tags | verse is the object | | | - endnotes | | | | | | - inline markup applied | | | - bibliography | | | | | (for poem) | - whitespace indentation | | | | | | | | | & newlines | | | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | | code | | - contents untouched | - syntax | | | | | | | | - whitespace indentation | - numbered | | | | | | | | & newlines | | | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | | table | | | - column width | | | | | | | | | - heading row | | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| *** on abstraction - abstract for downstream processing - identify document structure and objects - identify document structure (headings/levels/sections) - identify objects (headings, paragraphs, tables, code blocks, verse ...) - set document, generate common abstraction for downstream parsing - set different _document sections_: - _head_, toc, _body_, endnotes, glossary, bibliography, book index, blurb - _object numbers_, heading/ chapter numbering etc, endnote numbers - _regular ocn_ - body objects - glossary objects - bibliography objects - _special ocn_ - non substantive text (provide special numbers) - blurb objects - book index - special (_exceptions_) - endnotes - unify object representations - multiple markups for same object type given single representation - extract object attributes - unify inline markup on objects - inline markup made easier to identify - simplify downstream parsing *** ocn |-------------+-----------------------+-----------------------+----------------+------| | objects | section / part | ocn described | how used | type | |-------------+-----------------------+-----------------------+----------------+------| | regular ocn | | | | | |-------------+-----------------------+-----------------------+----------------+------| | | body objects | seq. digit | anchor | ocn | | | | [0-9]+ | visible | | |-------------+-----------------------+-----------------------+----------------+------| | | glossary objects | seq. digit | anchor | ocn | | | | [0-9]+ | not-visible | | | | | | (for search) | | |-------------+-----------------------+-----------------------+----------------+------| | | bibliography objects | seq. digit | anchor | ocn | | | | [0-9]+ | not-visible | | | | | | (for search) | | |-------------+-----------------------+-----------------------+----------------+------| | special ocn | | | | | |-------------+-----------------------+-----------------------+----------------+------| | | non-substantive text | x char + seq. digit | anchor | non | | | (within body & blurb) | x[0-9]+ | not-visible | | | | | | (for search) | | |-------------+-----------------------+-----------------------+----------------+------| | | book index | i char + seq. digit | anchor | idx | | | | i[0-9]+ | not-visible | | | | | | (for search) | | |-------------+-----------------------+-----------------------+----------------+------| | without ocn | | | | | |-------------+-----------------------+-----------------------+----------------+------| | | endnotes | ocn of parent object | no ocn | fn | | | | + footnote seq. digit | anchor visible | | |-------------+-----------------------+-----------------------+----------------+------| ** doc_reform glossary / terms |------------+-------------------------------------| | doc_reform | sisu document parser | |------------+-------------------------------------| | dmso | document markup, structure, objects | |------------+-------------------------------------| | meta | meta document, document abstraction | | mda | meta, meta document abstraction | | adr | abstract document representation | | dar | document abstract representation | | (da) | (document abstraction) | | | (code representation of document) | |------------+-------------------------------------| | ao | abstract objects | | | (code representation of objects) | |------------+-------------------------------------| consider |-------+----------------------------------------------| | dao | document abstraction, objects | |-------+----------------------------------------------| | daso | document abstraction, structure, objects | |-------+----------------------------------------------| | drso | document representation, structure, objects | |-------+----------------------------------------------| | daows | document abstraction, objects with structure | |-------+----------------------------------------------| ** make config - _composite make_ work on composite make a unification of make instructions for each document run extract instructions from all config files, unify the make instructions and provide the result as a single set of make instructions for each document parsed - 1. general, document_make config file (to be applied to all documents unless overridden by document or command line instruction) - 2. local, site specific (site local instructions such as the site's url, cgi location etc.) - 3. each document header, make (the document header contains metadata and may include make instructions for that document) - make - meta - 4. command line instruction, make (some make instructions may be passed through the command line) *** instruction sources |----+---------------------------------+----------------------------------------+---------------------+---| | | make instruction source | | varies (applies to) | | |----+---------------------------------+----------------------------------------+---------------------+---| | 0. | unify the following as a single | take into account all the instructions | | | | | set of make instructions | provided below, provide interface | | | |----+---------------------------------+----------------------------------------+---------------------+---| | 1. | document_make file | to be applied to all documents | per directory | | | | "config_document" | (unless subsequently overridden) | (all docs within) | | |----+---------------------------------+----------------------------------------+---------------------+---| | 2. | config file | local site specific | per directory | | | | "config_local_site" | | (all docs within) | | |----+---------------------------------+----------------------------------------+---------------------+---| | 3. | document header make | make instructions contained | per document | | | | | in document header | (single doc) | | |----+---------------------------------+----------------------------------------+---------------------+---| | 4. | command line instruction | make instruction passed | each command | | | | | | (all docs within) | | |----+---------------------------------+----------------------------------------+---------------------+---| *** config & metadata (from instruction sources) |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | 1. document make file | 2. config file | 3. document header | 4. command line instruction | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | comment, fixed: | per dir (pod) | per dir | per document (pod) | per command instruction | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | sdl_root_config_document | sdl_root_config_local_site | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | local site specific | | * | | *? | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | webserv | | | | | | - url_root | | | | | | - path | | | | | | - images | | | | | | - cgi | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | webserv_cgi | | | | | | - host | | | | | | - base_path | | | | | | - port | | | | | | - user | | | | | | - file_links | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | processing | | | | | | - path | | | | | | - dir | | | | | | - concord_max | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | flag (configure) | | (call) | | | | - act0 | | act0 | | | | - act1 | | act1 | | | | - act2 | | act2 | | | | - act3 | | act3 | | | | - act4 | | act4 | | | | - act5 | | act5 | | | | - act6 | | act6 | | | | - act7 | | act7 | | | | - act8 | | act8 | | | | - act9 | | act9 | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | default | | | | | | - papersize | | | | | | - text_wrap | | | | | | - emphasis | | | | | | - language | | | | | | - digest | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | permission | | | | | | - share_source | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | program_select | | | | | | - editor | | | | | | - epub_viewer | | | | | | - html_viewer | | | | | | - odf_viewer | | | | | | - pdf_viewer | | | | | | - xml_viewer | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | search | | | | | | - flag | | | | | | - action | | | | | | - db | | | | | | - title | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | make instruction | ** | omit or override share? | ** | *? | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | make | make | make | | | | - bold | - bold | - bold | | | | - breaks | - breaks | - breaks | | | | - cover_image | - cover_image | - cover_image | | | | - css | - css | - css | | | | - emphasis | - emphasis | - emphasis | | | | - footer | - footer | - footer | | | | - headings | - headings | - headings | | | | - home_button_image | - home_button_image | - home_button_image | | | | - home_button_text | - home_button_text | - home_button_text | | | | - italics | - italics | - italics | | | | - num_top | - num_top | - num_top | | | | - auto_num_depth | - auto_num_depth | - auto_num_depth | | | | - substitute | - substitute | - substitute | | | | - texpdf_font | - texpdf_font | - texpdf_font | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | actions | | | | * | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | | assertions | | | | | | concordance | | | | | | debug | | | | | | digest | | | | | | docbook | | | | | | epub | | | | | | html | | | | | | html-seg | | | | | | html-scroll | | | | | | manifest | | | | | | ocn | | | | | | odt | | | | | | pdf | | | | | | postgresql | | | | | | qrcode | | | | | | pod | | | | | | source | | | | | | sqlite | | | | | | sqlite-db-create | | | | | | sqlite-db-drop | | | | | | text | | | | | | verbose | | | | | | xhtml | | | | | | xml-dom | | | | | | xml-sax | | | | | | section_toc | | | | | | section_body | | | | | | section_endnotes | | | | | | section_glossary | | | | | | section_biblio | | | | | | section_bookindex | | | | | | section_blurb | | | | | | backmatter | | | | | | skip-output | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | metadata | | | * | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | classify | | | | | | - dewey | | | | | | - keywords | | | | | | - loc | | | | | | - subject | | | | | | - topic_register | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | creator | | | | | | - author | | | | | | - author_email | | | | | | - illustrator | | | | | | - translator | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | date | | | | | | - added_to_site | | | | | | - available | | | | | | - created | | | | | | - issued | | | | | | - modified | | | | | | - published | | | | | | - valid | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | identifier | | | | | | - isbn | | | | | | - oclc | | | | | | - pg | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | links | | | | | | - link | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | notes | | | | | | - abstract | | | | | | - description | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | original | | | | | | - language | | | | | | - source | | | | | | - title | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | publisher | | | | | | - name | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | rights | | | | | | - copyright | | | | | | - cover | | | | | | - illustrations | | | | | | - license | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | title | | | | | | - edition | | | | | | - full | | | | | | - language | | | | | | - main | | | | | | - note | | | | | | - sub | | | | | | - subtitle | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------|