#+TITLE: doc_reform hub #+DESCRIPTION: documents - structuring, various output representations & search #+FILETAGS: :doc_reform:hub: #+AUTHOR: Ralph Amissah #+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] #+LANGUAGE: en #+STARTUP: indent content hideblocks hidestars #+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t #+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc #+OPTIONS: author:nil email:nil creator:nil timestamp:nil #+PROPERTY: header-args :padline no :exports code :cache no :noweb yes #+EXPORT_SELECT_TAGS: export #+EXPORT_EXCLUDE_TAGS: noexport #+TAGS: assert(a) class(c) debug(d) mixin(m) doc_reform(s) tangle(T) template(t) WEB(W) noexport(n) [[../maker.org][maker.org makefile]] [[./][org/]] * 0. version.txt (set version) :version: ** set program version - set program version #+BEGIN_SRC d :tangle "../views/version.txt" /+ obt - org generated file +/ struct Version { int major; int minor; int patch; } enum ver = Version(0, 3, 3); #+END_SRC ** compilation restrictions (supported compilers) - set compilation restrictions http://dlang.org/spec/version.html#predefined-versions #+BEGIN_SRC d :tangle "../views/version.txt" version (Posix) { version (DigitalMars) { } else version (LDC) { } else version (GNU) { } else { static assert (0, "Unsupported D compiler"); } } else { static assert (0, "Unsupported D compiler"); } #+END_SRC * 1. doc_reform (sisu document parser) :doc_reform: ** notes - deal with imports - get options - get command line instructions - read config instructions - process files as instructed by options - read in file - process file - output ** 0. sdp src/sdp :template: - process files (act according to requirements of each type) - by sourcefilename - by sourcefiles contents identifier - by zip filename #+BEGIN_SRC d :tangle "../src/doc_reform/doc_reform.d" :shebang #!/usr/bin/env rdmd module doc_reform.sisu_document_parser; import doc_reform.conf.compile_time_info, doc_reform.meta.metadoc; <> import std.algorithm; import std.parallelism; <> <> string program_name = "doc-reform"; /++ name "doc_reform" description "A SiSU inspired document parser writen in D." homepage "http://sisudoc.org" +/ void main(string[] args) { <> <> <> if (_manifests.length > 1 // _manifests[0] initialized dummy element && _opt_action.abstraction) { if (_opt_action.parallelise) { // note cannot parallelise sqlite shared db foreach(manifest; parallel(_manifests[1..$])) { if (!empty(manifest.src.filename)) { <> <> <> <> <> } else { <> } } } else { foreach(manifest; _manifests[1..$]) { writeln("parallelisation off: actions include sqlite shared db"); if (!empty(manifest.src.filename)) { <> <> <> <> <> } else { <> } } } } } #+END_SRC ** 1. pre-loop init :init: *** init **** imports :import: ***** doc_reform :doc_reform: #+NAME: imports_doc_reform #+BEGIN_SRC d import std.getopt, std.file, std.path, std.process; import doc_reform.meta, doc_reform.meta.metadoc_summary, doc_reform.meta.metadoc_from_src, doc_reform.meta.conf_make_meta_structs, doc_reform.meta.conf_make_meta_toml, doc_reform.meta.conf_make_meta_json, doc_reform.meta.defaults, doc_reform.meta.doc_debugs, doc_reform.meta.rgx, doc_reform.source.paths_source, doc_reform.source.read_config_files, doc_reform.source.read_source_files, doc_reform.output.hub; #+END_SRC ****** notes ├── src │   ├── doc_reform.d │   └── doc_reform │ ├── conf │ ├── meta │ └── output └── views    └── version.txt [[./meta_abstraction.org][meta_abstraction]] [[./meta_conf_make_meta.org][meta_conf_make_meta]] [[./meta_defaults.org][meta_defaults]] [[./meta_output_debugs.org][meta_output_debugs]] [[./source_read_files.org][source_read_files]] [[./compile_time_info.org][compile time info]] [[./output.org][output]] [[./doc_reform.org][doc_reform]] keep up to date, configuration in ../maker.org check: - http://github.com/Abscissa/SDLang-D - https://github.com/abscissa/libInputVisitor std.conv, std.variant, - https://code.dlang.org/packages/toml - https://github.com/toml-lang/toml - https://github.com/toml-lang/toml/blob/master/README.md **** mixins :mixin: ***** version.txt :version: #+NAME: mixin_doc_reform_version #+BEGIN_SRC d mixin(import("version.txt")); #+END_SRC ***** pre main mixins #+NAME: mixin_pre_main #+BEGIN_SRC d mixin CompileTimeInfo; #+END_SRC ***** doc_reform "main" mixins :doc_reform: #+NAME: doc_reform_mixin #+BEGIN_SRC d mixin DocReformRgxInit; mixin contentJSONtoDocReformStruct; mixin DocReformBiblio; mixin DocReformRgxInitFlags; mixin outputHub; #+END_SRC **** init :init: #+NAME: doc_reform_args #+BEGIN_SRC d string flag_action; string arg_unrecognized; enum dAM { abstraction, matters } static auto rgx = Rgx(); #+END_SRC *** scope (run complete) :scope: #+NAME: doc_reform_args #+BEGIN_SRC d scope(success) { debug(checkdoc) { writefln( "~ run complete, ok ~ (%s-%s.%s.%s, %s D:%s, %s %s)", program_name, _ver.major, _ver.minor, _ver.patch, __VENDOR__, __VERSION__, bits, os, ); } } scope(failure) { debug(checkdoc) { stderr.writefln( "run failure", ); } } #+END_SRC *** getopt args for loop :args:getopt: **** set getopt options - set getopt options #+NAME: doc_reform_args #+BEGIN_SRC d bool[string] opts = [ "abstraction" : false, "assertions" : false, "concordance" : false, "dark" : false, "debug" : false, "digest" : false, "epub" : false, "html" : false, "html-seg" : false, "html-scroll" : false, "light" : false, "manifest" : false, "ocn" : true, "parallelise" : true, "parallelise-subprocesses" : false, "quiet" : false, "pod" : false, "source" : false, "sqlite-discrete" : false, "sqlite-db-create" : false, "sqlite-db-drop" : false, "sqlite-db-recreate" : false, "sqlite-delete" : false, "sqlite-insert" : false, "sqlite-update" : false, "text" : false, "verbose" : false, "very-verbose" : false, "xhtml" : false, "section_toc" : true, "section_body" : true, "section_endnotes" : true, "section_glossary" : true, "section_biblio" : true, "section_bookindex" : true, "section_blurb" : true, "backmatter" : true, "skip-output" : false, "theme-dark" : false, "theme-light" : false, "workon" : false, ]; string[string] settings = [ "output-dir" : "", "site-config-dir" : "", "lang" : "all", "sqlite-filename" : "documents", ]; auto helpInfo = getopt(args, std.getopt.config.passThrough, "abstraction", "--abstraction document abstraction ", &opts["abstraction"], "assert", "--assert set optional assertions on", &opts["assertions"], "concordance", "--concordance file for document", &opts["concordance"], "dark", "--dark alternative dark theme", &opts["dark"], "debug", "--debug", &opts["debug"], "digest", "--digest hash digest for each object", &opts["digest"], "epub", "--epub process epub output", &opts["epub"], "html", "--html process html output", &opts["html"], "html-seg", "--html-seg process html output", &opts["html-seg"], "html-scroll", "--html-seg process html output", &opts["html-scroll"], "light", "--light default light theme", &opts["light"], "manifest", "--manifest process manifest output", &opts["manifest"], "ocn", "--ocn object cite numbers (default)", &opts["ocn"], "parallelise", "--parallelise parallelisation", &opts["parallelise"], "parallelise-subprocesses", "--parallelise-subprocesses nested parallelisation", &opts["parallelise-subprocesses"], "quiet", "--quiet output to terminal", &opts["quiet"], "pod", "--pod doc reform pod source content bundled", &opts["pod"], "source", "--source markup source text content", &opts["source"], "sqlite-discrete", "--sqlite process discrete sqlite output", &opts["sqlite-discrete"], "sqlite-db-create", "--sqlite-db-create create db, create tables", &opts["sqlite-db-create"], "sqlite-db-drop", "--sqlite-db-drop drop tables & db", &opts["sqlite-db-drop"], "sqlite-db-recreate", "--sqlite-db-recreate create db, create tables", &opts["sqlite-db-recreate"], "sqlite-delete", "--sqlite-delete process sqlite output", &opts["sqlite-delete"], "sqlite-insert", "--sqlite-insert process sqlite output", &opts["sqlite-insert"], "sqlite-update", "--sqlite-update process sqlite output", &opts["sqlite-update"], "text", "--text process text output", &opts["text"], "txt", "--txt process text output", &opts["text"], "verbose|v", "--verbose output to terminal", &opts["verbose"], "very-verbose", "--very-verbose output to terminal", &opts["very-verbose"], "xhtml", "--xhtml process xhtml output", &opts["xhtml"], "section-toc", "--section-toc process table of contents (default)", &opts["section_toc"], "section-body", "--section-body process document body (default)", &opts["section_body"], "section-endnotes", "--section-endnotes process document endnotes (default)", &opts["section_endnotes"], "section-glossary", "--section-glossary process document glossary (default)", &opts["section_glossary"], "section-biblio", "--section-biblio process document biblio (default)", &opts["section_biblio"], "section-bookindex", "--section-bookindex process document bookindex (default)", &opts["section_bookindex"], "section-blurb", "--section-blurb process document blurb (default)", &opts["section_blurb"], "backmatter", "--section-backmatter process document backmatter (default)", &opts["backmatter"], "skip-output", "--skip-output", &opts["skip-output"], "theme-dark", "--theme-dark alternative dark theme", &opts["theme-dark"], "theme-light", "--theme-light default light theme", &opts["theme-light"], "workon", "--workon (reserved for some matters under development & testing)", &opts["workon"], "output-dir", "--output-dir=[dir path]", &settings["output-dir"], "site-config-dir", "--site-config-dir=[dir path]", &settings["site-config-dir"], "sqlite-filename", "--sqlite-filename=[filename].sqlite", &settings["sqlite-filename"], "lang", "--lang=[lang code e.g. =en or =en,es]", &settings["lang"], ); if (helpInfo.helpWanted) { defaultGetoptPrinter("Some information about the program.", helpInfo.options); } #+END_SRC **** getopt hash to struct - move getopt options to struct #+NAME: doc_reform_args #+BEGIN_SRC d enum outTask { pod, source, sqlite, sqlite_multi, epub, html_scroll, html_seg, html_stuff } struct OptActions { auto assertions() { return opts["assertions"]; } auto concordance() { return opts["concordance"]; } auto css_theme_default() { bool _is_light; if (opts["light"] || opts["theme-light"]) { _is_light = true; } else if (opts["dark"] || opts["theme-dark"]) { _is_light = false; } else { _is_light = true; } return _is_light; } auto debug_do() { return opts["debug"]; } auto digest() { return opts["digest"]; } auto epub() { return opts["epub"]; } auto html() { bool _is = ( opts["html"] || opts["html-seg"] || opts["html-scroll"] ) ? true : false; return _is; } auto html_seg() { bool _is = ( opts["html"] || opts["html-seg"] ) ? true : false; return _is; } auto html_scroll() { bool _is = ( opts["html"] || opts["html-scroll"] ) ? true : false; return _is; } auto html_stuff() { bool _is = ( opts["html"] || opts["html-scroll"] || opts["html-seg"] ) ? true : false; return _is; } auto manifest() { return opts["manifest"]; } auto ocn() { return opts["ocn"]; } auto quiet() { return opts["quiet"]; } auto pod() { return opts["pod"]; } auto source() { return opts["source"]; } auto sqlite_discrete() { return opts["sqlite-discrete"]; } auto sqlite_db_drop() { bool _is = ( opts["sqlite-db-recreate"] || opts["sqlite-db-drop"] ) ? true : false; return _is; } auto sqlite_db_create() { bool _is = ( opts["sqlite-db-recreate"] || opts["sqlite-db-create"] ) ? true : false; return _is; } auto sqlite_delete() { return opts["sqlite-delete"]; } auto sqlite_update() { bool _is = ( opts["sqlite-update"] || opts["sqlite-insert"] ) ? true : false; return _is; } auto sqlite_shared_db_action() { bool _is = ( opts["sqlite-db-recreate"] || opts["sqlite-db-create"] || opts["sqlite-delete"] || opts["sqlite-insert"] || opts["sqlite-update"] ) ? true : false; return _is; } auto text() { return opts["text"]; } auto verbose() { bool _is = ( opts["verbose"] || opts["very-verbose"] ) ? true : false; return _is; } auto very_verbose() { return opts["very-verbose"]; } auto xhtml() { return opts["xhtml"]; } auto section_toc() { return opts["section_toc"]; } auto section_body() { return opts["section_body"]; } auto section_endnotes() { return opts["section_endnotes"]; } auto section_glossary() { return opts["section_glossary"]; } auto section_biblio() { return opts["section_biblio"]; } auto section_bookindex() { return opts["section_bookindex"]; } auto section_blurb() { return opts["section_blurb"]; } auto backmatter() { return opts["backmatter"]; } auto skip_output() { return opts["skip-output"]; } auto workon() { return opts["workon"]; } auto languages_set() { return settings["lang"].split(","); } auto output_dir_set() { return settings["output-dir"]; } auto sqlite_filename() { return settings["sqlite-filename"]; } auto parallelise() { bool _is = ( opts["parallelise"]) ? true : false; _is = ( sqlite_shared_db_action ) ? false : true; return _is; } auto parallelise_subprocesses() { return opts["parallelise-subprocesses"]; } auto output_task_scheduler() { int[] schedule; if (pod) { schedule ~= outTask.pod; } if (source) { schedule ~= outTask.source; } if (sqlite_discrete) { schedule ~= outTask.sqlite; } if (epub) { schedule ~= outTask.epub; } if (html_scroll) { schedule ~= outTask.html_scroll; } if (html_seg) { schedule ~= outTask.html_seg; } if (html_stuff) { schedule ~= outTask.html_stuff; } return schedule.sort().uniq; } auto abstraction() { bool _is = ( opts["abstraction"] || concordance || epub || html || manifest || pod || source || sqlite_discrete || sqlite_delete || sqlite_update ) ? true : false; return _is; } } auto _opt_action = OptActions(); #+END_SRC **** env - environmental info #+NAME: doc_reform_args #+BEGIN_SRC d auto _env = [ "pwd" : environment["PWD"], "home" : environment["HOME"], ]; #+END_SRC *** opt actions on processing files & file paths (pods, src etc.) #+NAME: doc_reform_args #+BEGIN_SRC d auto _manifest_start = PodManifest!()(""); auto _manifest_matter = PathMatters!()(_opt_action, _env, ""); auto _manifests = [ _manifest_matter ]; foreach(arg; args[1..$]) { _manifest_start = PodManifest!()(arg); if (arg.match(rgx.flag_action)) { flag_action ~= " " ~ arg; // flags not taken by getopt } else if ( !(arg.match(rgx.src_pth_sst_or_ssm)) && _manifest_start.pod_manifest_file_with_path && _opt_action.abstraction ) { string contents_location_raw_; string contents_location_; string sisudoc_txt_ = _manifest_start.pod_manifest_file_with_path; enforce( exists(sisudoc_txt_)!=0, "file not found: «" ~ sisudoc_txt_ ~ "»" ); if (exists(sisudoc_txt_)) { try { if (exists(sisudoc_txt_)) { contents_location_raw_ = sisudoc_txt_.readText; } } catch (ErrnoException ex) { } catch (FileException ex) { // Handle errors } if (contents_location_raw_.match(rgx.pod_content_location)) { // (file name followed by language codes \n)+ foreach (m; contents_location_raw_.matchAll(rgx.pod_content_location)) { foreach (n; m.captures[2].matchAll(rgx.language_codes)) { contents_location_ ~= "media/text/" ~ n.captures[1].to!string ~ "/" ~ m.captures[1].to!string ~ "\n"; } } } else { contents_location_ = contents_location_raw_; } } else { writeln("manifest not found: ", sisudoc_txt_); } auto contents_locations_arr = (cast(char[]) contents_location_).split; auto tmp_dir_ = (sisudoc_txt_).dirName.array; foreach (contents_location; contents_locations_arr) { assert(contents_location.match(rgx.src_pth_sst_or_ssm), "not a recognised file: «" ~ contents_location ~ "»" ); auto contents_location_pth_ = (contents_location).to!string; auto lang_rgx_ = regex(r"/(" ~ _opt_action.languages_set.join("|") ~ ")/"); if (_opt_action.languages_set[0] == "all" || (contents_location_pth_).match(lang_rgx_) ) { auto _fns = (((tmp_dir_).chainPath(contents_location_pth_)).array).to!string; _manifest_matter = PathMatters!()(_opt_action, _env, arg, _fns, contents_locations_arr); _manifests ~= _manifest_matter; } } } else if (arg.match(rgx.src_pth_sst_or_ssm)) { if (exists(arg)==0) { writeln("ERROR >> Processing Skipped! File not found: ", arg); } else { _manifest_matter = PathMatters!()(_opt_action, _env, arg, arg); _manifests ~= _manifest_matter; } } else if (arg.match(rgx.src_pth_zip)) { // fns_src ~= arg; // gather input markup source file names for processing } else { // anything remaining, unused arg_unrecognized ~= " " ~ arg; } } #+END_SRC *** config files load & read #+NAME: doc_reform_conf_files_in_toml #+BEGIN_SRC d auto _config_document_struct = readConfigDoc!()(_manifest, _env); // document config file auto _config_local_site_struct = readConfigSite!()(_manifest, _env); // local site config ConfCompositePlus _make_and_meta_struct; _make_and_meta_struct = configParseTOMLreturnDocReformStruct!()(_make_and_meta_struct, _config_document_struct); _make_and_meta_struct = configParseTOMLreturnDocReformStruct!()(_make_and_meta_struct, _config_local_site_struct); #+END_SRC *** opt actions independent of processing files (no files no processing loop) #+NAME: doc_reform_do_selected #+BEGIN_SRC d if (!(_opt_action.skip_output)) { if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("step0 commence → (without processing files)"); } outputHubOp!()(_env, _opt_action); if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("- step0 complete"); } } #+END_SRC ** _2. processing: (loop each file)_ [+2] :loop:files: *** scope (loop) :scope: #+NAME: doc_reform_each_file_do_scope #+BEGIN_SRC d scope(success) { if (!(_opt_action.quiet)) { writefln( "%s", "-- ~ document complete, ok ~ ------------------------------------", ); } } scope(failure) { debug(checkdoc) { stderr.writefln( "~ document run failure ~ (%s v%s)\n\t%s\n%s", __VENDOR__, __VERSION__, manifest.src.filename, "------------------------------------------------------------------", ); } } enforce( manifest.src.filename.match(rgx.src_pth_types), "not a sisu markup filename: «" ~ manifest.src.filename ~ "»" ); #+END_SRC *** 1. _document abstraction_ [#A] - return tuple of: - doc_abstraction (the document) - doc_matters #+NAME: doc_reform_each_file_do_abstraction #+BEGIN_SRC d if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("--->\nstepX commence → (document abstraction)"); } auto t = DocReformAbstraction!()(_env, _opt_action, manifest); static assert(!isTypeTuple!(t)); static assert(t.length==2); auto doc_abstraction = t[dAM.abstraction]; auto doc_matters = t[dAM.matters]; if ((doc_matters.opt.action.debug_do) || (doc_matters.opt.action.very_verbose) ) { writeln("- stepX complete"); } #+END_SRC *** 2. _output processing_ (post abstraction processing) **** 0. abstraction _print summary_ :abstraction:summary: - abstraction summary #+NAME: doc_reform_each_file_do_debugs_checkdoc #+BEGIN_SRC d /+ ↓ debugs +/ if (doc_matters.opt.action.verbose) { DocReformAbstractionSummary!()(doc_abstraction, doc_matters); } #+END_SRC **** 1. _debug_ (document parts, checkdoc) :debug:checkdoc: - [[./meta_output_debugs.org][meta_output_debugs]] #+NAME: doc_reform_each_file_do_debugs_checkdoc #+BEGIN_SRC d /+ ↓ debugs +/ if (doc_matters.opt.action.debug_do) { DocReformDebugs!()(doc_abstraction, doc_matters); } #+END_SRC **** 2. _process outputs_ :outputs: - [[./output_hub.org][output_hub]] #+NAME: doc_reform_each_file_do_selected_output #+BEGIN_SRC d /+ ↓ output hub +/ if (!(doc_matters.opt.action.skip_output)) { if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("step5 commence → (process outputs)"); } outputHub!()(doc_abstraction, doc_matters); if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("- step5 complete"); } } #+END_SRC *** scope (on loop exit) :scope:exit: #+NAME: doc_reform_each_file_do_scope_exit #+BEGIN_SRC d scope(exit) { if (!(_opt_action.quiet)) { writefln( "processed file: %s (%s)", manifest.src.filename, manifest.src.language ); } destroy(manifest); } #+END_SRC ** +2c. no valid filename provided+ #+NAME: doc_reform_no_filename_provided #+BEGIN_SRC d /+ no recognized filename provided +/ writeln("no recognized filename"); break; // terminate, stop #+END_SRC * 2. _document abstraction functions_ :module:doc_reform:abstraction: ** 0. module template #+BEGIN_SRC d :tangle "../src/doc_reform/meta/metadoc.d" module doc_reform.meta.metadoc; template DocReformAbstraction() { <> <> enum headBody { header, body_content, insert_file_list, image_list } enum makeMeta { make, meta } enum docAbst { doc_abstraction, section_keys, segnames, segnames_0_4, tag_assoc, images } static auto rgx = Rgx(); auto DocReformAbstraction(E,O,M)( E _env, O _opt_action, M _manifest ){ <> <> <> <> <> auto t = tuple(doc_abstraction, doc_matters); static assert(t.length==2); return t; } } #+END_SRC ** Output & Harvest pre-processing *** 1. raw file content split, doc: _header_, _content_ +(lists: subdocs? images?) - [[./source_read_files.org][source_read_files]] return tuple: header; body; insert file list; image list - read in the _marked up source document_ and - split the document into: - document header - document body - from markup source - if master document from sub documents content - if a master document - make a list of insert files - if build source pod requested - scan for list of images (action avoided if not needed at this stage) - _return a tuple of_: - header - body - insert file list - image list (if build source pod requested) if build source pod requested all information needed to build it available at this point - manifest related information _manifest - insert file list _header_body_insertfilelist_imagelist[headBody.insert_file_list] - image list _header_body_insertfilelist_imagelist[headBody.image_list] #+NAME: doc_reform_each_file_do_read_and_split_sisu_markup_file_content_into_header_and_body #+BEGIN_SRC d /+ ↓ read file (filename with path) +/ /+ ↓ file tuple of header and content +/ if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("step1 commence → (get document header & body & insert file list & if needed image list)" ); } auto _header_body_insertfilelist_imagelist = DocReformRawMarkupContent!()(_opt_action, _manifest.src.path_and_fn); static assert(!isTypeTuple!(_header_body_insertfilelist_imagelist)); static assert(_header_body_insertfilelist_imagelist.length==4); if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("- step1 complete"); } debug(header_and_body) { writeln(header); writeln(_header_body_insertfilelist_imagelist.length); writeln(_header_body_insertfilelist_imagelist.length[headBody.body_content][0]); } #+END_SRC *** 2. _document metadata_ & _make instructions_ (struct from toml) - [[./meta_conf_make_meta.org][meta_conf_make_meta]] return tuple: document metadata; make instructions - read _document header_, split into: - metadata - make instructions - read config files - consolidate make instructions - _return tuple of_: - document metadata - make instructions (from configuration files & document header make instructions) #+NAME: doc_reform_each_file_do_split_sisu_markup_file_header_into_make_and_meta_structs #+BEGIN_SRC d /+ ↓ split header into make and meta +/ if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("step2 commence → (read document header - toml, return struct)"); } _make_and_meta_struct = docHeaderMakeAndMetaTupTomlExtractAndConvertToStruct!()( _make_and_meta_struct, _header_body_insertfilelist_imagelist[headBody.header] ); if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("- step2 complete"); } #+END_SRC *** 3. _document abstraction, tuple_ (output-pre-processing) [#A] - [[./meta_abstraction.org][meta_abstraction]] return tuple: document abstraction; abstraction keys; segnames; image list - prepare the document abstraction used in downstream processing - _return tuple of_: - document abstraction (_the_document_ or doc_abstraction) - document abstraction keys - (head, toc, body, endnotes, glossary, bibliography, bookindex, blurb, tail) - (transfer to _doc_matters_) - segnames for html epub (transfer to _doc_matters_) - image list (transfer to _doc_matters_) #+NAME: doc_reform_each_file_do_document_abstraction #+BEGIN_SRC d /+ ↓ document abstraction: process document, return abstraction as tuple +/ if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("step3 commence → (document abstraction (da); da keys; segnames; doc_matters)"); } auto da = DocReformDocAbstraction!()( _header_body_insertfilelist_imagelist[headBody.body_content], _make_and_meta_struct, _opt_action, _manifest, true, ); static assert(!isTypeTuple!(da)); static assert(da.length==6); auto doc_abstraction = da[docAbst.doc_abstraction]; /+ head ~ toc ~ body ~ endnotes_seg ~ glossary ~ bibliography ~ bookindex ~ blurb; +/ auto _document_section_keys_sequenced = da[docAbst.section_keys]; string[] _doc_html_segnames = da[docAbst.segnames]; string[] _doc_epub_segnames_0_4 = da[docAbst.segnames_0_4]; debug(segnames) { writeln("segnames lv4: ", _doc_html_segnames); writeln("segnames lv0 to 4: ", _doc_epub_segnames_0_4); } auto _doc_tag_assoc = da[docAbst.tag_assoc]; auto _images = da[docAbst.images]; if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("- step3 complete"); } #+END_SRC *** 4. _document matters_ (doc info gathered, various sources) [#A] - gather doc matters - prepare document_matters, miscellany about processing and the document of use in downstream processing **** verbose message #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("step4 commence → (doc_matters)"); } struct DocumentMatters { #+END_SRC **** config make & meta #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d auto conf_make_meta() { // TODO meld with all make instructions return _make_and_meta_struct; } #+END_SRC **** env related #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d auto env() { struct Env_ { auto pwd() { return _manifest.env.pwd; } auto home() { return _manifest.env.home; } } return Env_(); } #+END_SRC **** opt #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d auto opt() { struct Opt_ { auto action() { /+ getopt options, commandline instructions, raw - processing instructions --epub --html etc. - command line config instructions --output-path +/ return _opt_action; } } return Opt_(); } #+END_SRC **** output related #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d auto src() { return _manifest.src; } auto src_path_info() { return DocReformPathsSRC!()(_manifest.env.pwd, _manifest.src.file_with_absolute_path); // would like (to have and use) relative path } auto srcs() { struct SRC_ { auto file_insert_list() { return _header_body_insertfilelist_imagelist[headBody.insert_file_list]; } auto image_list() { return _images; } } return SRC_(); } auto pod() { return _manifest.pod; } auto sqlite() { struct SQLite_ { string filename() { return _opt_action.sqlite_filename; } } return SQLite_(); } auto xml() { struct XML_ { auto keys_seq() { /+ contains .seg & .scroll sequences +/ return _document_section_keys_sequenced; } string[] segnames() { return _doc_html_segnames; } string[] segnames_lv_0_to_4() { return _doc_epub_segnames_0_4; } auto tag_associations() { return _doc_tag_assoc; } } return XML_(); } auto output_path() { return _manifest.output.path; } #+END_SRC **** } close #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d } auto doc_matters = DocumentMatters(); #+END_SRC **** step complete message #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d if ((_opt_action.debug_do) || (_opt_action.very_verbose) ) { writeln("- step4 complete"); } #+END_SRC * 3. document abstraction _summary_ :module:doc_reform:metadoc_summary: ** 0. module template - document summary from abstraction #+BEGIN_SRC d :tangle "../src/doc_reform/meta/metadoc_summary.d" module doc_reform.meta.metadoc_summary; template DocReformAbstractionSummary() { auto DocReformAbstractionSummary(S,T)( auto ref const S doc_abstraction, auto ref T doc_matters, ) { <> mixin InternalMarkup; <> if (doc_matters.opt.action.verbose) { <> } } } #+END_SRC ** init *** imports #+name: metadoc_summary_imports #+BEGIN_SRC d import doc_reform.meta.defaults, doc_reform.meta.rgx; import std.array, std.exception, std.regex, std.stdio, std.string, std.traits, std.typecons, std.uni, std.utf, std.conv : to; #+END_SRC *** initialize :report: #+name: metadoc_summary_initialize #+BEGIN_SRC d auto markup = InlineMarkup(); #+END_SRC ** (last ocn) #+name: meta_metadoc_summary #+BEGIN_SRC d string[string] check = [ "last_object_number" : "NA [debug \"checkdoc\" not run]", "last_object_number_body" : "0", "last_object_number_book_index" : "0", ]; foreach (k; doc_matters.xml.keys_seq.seg) { foreach (obj; doc_abstraction[k]) { if (obj.metainfo.is_of_part != "empty") { if (!empty(obj.metainfo.object_number)) { if (k == "body") { check["last_object_number_body"] = obj.metainfo.object_number; } if (!(obj.metainfo.object_number.empty)) { check["last_object_number"] = obj.metainfo.object_number; } } if (k == "bookindex") { if (obj.metainfo.object_number_type == 2) { check["last_object_number_book_index"] = obj.metainfo.object_number_book_index; } } } } } #+END_SRC ** summary #+name: meta_metadoc_summary #+BEGIN_SRC d auto min_repeat_number = 66; auto char_repeat_number = (doc_matters.conf_make_meta.meta.title_full.length + doc_matters.conf_make_meta.meta.creator_author.length + 4); char_repeat_number = (char_repeat_number > min_repeat_number) ? char_repeat_number : min_repeat_number; writefln( "%s\n\"%s\", %s\n%s\n%s\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n%30-s%10-d\n(%s: %s)\n%s", markup.repeat_character_by_number_provided("-", char_repeat_number), doc_matters.conf_make_meta.meta.title_full, doc_matters.conf_make_meta.meta.creator_author, doc_matters.src.filename, markup.repeat_character_by_number_provided("-", char_repeat_number), "length toc arr:", to!int(doc_abstraction["toc"].length), "length doc_abstraction arr:", to!int(doc_abstraction["body"].length), "last doc body ocn:", to!int(check["last_object_number_body"]), "last object_number:", to!int(check["last_object_number"]), "length endnotes:", // subtract headings (doc_abstraction["endnotes"].length > 2) ? (to!int(doc_abstraction["endnotes"].length - 2)) : 0, "length glossary:", (doc_abstraction["glossary"].length > 1) ? (to!int(doc_abstraction["glossary"].length)) : 0, "length biblio:", (doc_abstraction["bibliography"].length > 1) ? (to!int(doc_abstraction["bibliography"].length)) : 0, "length bookindex:", (doc_abstraction["bookindex"].length > 1) ? (to!int(doc_abstraction["bookindex"].length)) : 0, " last book idx ocn:", to!int(check["last_object_number_book_index"]), "length blurb:", (doc_abstraction["blurb"].length > 1) ? (to!int(doc_abstraction["blurb"].length)) : 0, "number of segments:", (doc_matters.xml.segnames.length > 1) ? (to!int(doc_matters.xml.segnames.length)) : 0, __FILE__, __LINE__, markup.repeat_character_by_number_provided("-", min_repeat_number), ); #+END_SRC * __END__ dev notes ** doc_reform glossary / terms |------------+-------------------------------------| | doc_reform | sisu document parser | |------------+-------------------------------------| | dmso | document markup, structure, objects | |------------+-------------------------------------| | meta | meta document, document abstraction | | mda | meta, meta document abstraction | | adr | abstract document representation | | dar | document abstract representation | | (da) | (document abstraction) | | | (code representation of document) | |------------+-------------------------------------| | ao | abstract objects | | | (code representation of objects) | |------------+-------------------------------------| consider |-------+----------------------------------------------| | dao | document abstraction, objects | |-------+----------------------------------------------| | daso | document abstraction, structure, objects | |-------+----------------------------------------------| | drso | document representation, structure, objects | |-------+----------------------------------------------| | daows | document abstraction, objects with structure | |-------+----------------------------------------------| ** the document notes *** document sections **** summary |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | section | part | opt. | | objects | ocn | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | front matter | head | * | | | no | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | toc | toc | | | generated from headings | no | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | body | body | * | | default section | yes | | | | | | | | - headings | | | | | | | | | - paras | | | | | | | | | - code | | | | | | | | | - poem | | | | | | | | | - group | | | | | | | | | - block | | | | | | | | | - quote | | | | | | | | | - table | | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | back matter | endnote | | | generated from inline note markup | no (each endnote belongs to | | | | | | | | | a (body) object) | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | | glossary | | | identified section, limited markup | possibly, to make searchable | | | | | | | | - heading | hidden | | | | | | | | - paras | | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | | bibliography | | | generated from inline special markup | possibly, to make searchable | | | | | | | | appended to paragraphs contained in body section | hidden | | | | | | | | - heading | | | | | | | | | - paras | | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | | book index | | | generated from inline special markup | possibly, special numbering or | | | | | | | | - heading | could use term as anchor? | | | | | | | | - paras | to make searchable | | | | | | | | | hidden | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| | | blurb | | | identified section, limited markup | no (unless non-substantive | | | | | | | | - heading | given special numbering) | | | | | | | | - paras | | | | |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---| **** on abstraction - abstract for downstream processing - identify document structure and objects - identify document structure (headings/levels/sections) - identify objects (headings, paragraphs, tables, code blocks, verse ...) - set document, generate common abstraction for downstream parsing - set different _document sections_: - _head_, toc, _body_, endnotes, glossary, bibliography, book index, blurb - _object numbers_, heading/ chapter numbering etc, endnote numbers - _regular ocn_ - body objects - glossary objects - bibliography objects - _special ocn_ - non substantive text (provide special numbers) - blurb objects - book index - special (_exceptions_) - endnotes - unify object representations - multiple markups for same object type given single representation - extract object attributes - unify inline markup on objects - inline markup made easier to identify - simplify downstream parsing *** objects **** summary |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | | | identified by | object notes | attributes | inline | embedded | special | | | | | | | | appended | characters | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | para | heading | level markers | | | - italics | - endnotes | | | | | at start of line | | | | - bibliography | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | | paragraph | delimited by two new lines | default object | - indent | - bold | - endnotes | | | | | | [discard leading & | - bullet | - italics | - bibliography | | | | | | newline whitespace] | | - underscore | | | | | | | | | - strikethrough | | | | | | | | | - superscript | | | | | | | | | - subscript | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | block | | open and close tags | | | | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | TODO | quote | | | - language? | | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | TODO | group | | - inline markup applied | - language? | as paragraph | - endnotes | | | | | | - [discard leading & | | | - bibliography | | | | | | newline whitespace] | | | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | TODO | block | | - inline markup applied | | as paragraph | - endnotes | | | | | | - whitespace indentation | | | - bibliography | | | | | | & newlines | | | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | | poem / verse | open and close tags | verse is the object | | | - endnotes | | | | | | - inline markup applied | | | - bibliography | | | | | (for poem) | - whitespace indentation | | | | | | | | | & newlines | | | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | | code | | - contents untouched | - syntax | | | | | | | | - whitespace indentation | - numbered | | | | | | | | & newlines | | | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| | | table | | | - column width | | | | | | | | | - heading row | | | | |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------| **** ocn |-------------+-----------------------+-----------------------+----------------+------| | objects | section / part | ocn described | how used | type | |-------------+-----------------------+-----------------------+----------------+------| | regular ocn | | | | | |-------------+-----------------------+-----------------------+----------------+------| | | body objects | seq. digit | anchor | ocn | | | | [0-9]+ | visible | | |-------------+-----------------------+-----------------------+----------------+------| | | glossary objects | seq. digit | anchor | ocn | | | | [0-9]+ | not-visible | | | | | | (for search) | | |-------------+-----------------------+-----------------------+----------------+------| | | bibliography objects | seq. digit | anchor | ocn | | | | [0-9]+ | not-visible | | | | | | (for search) | | |-------------+-----------------------+-----------------------+----------------+------| | special ocn | | | | | |-------------+-----------------------+-----------------------+----------------+------| | | non-substantive text | x char + seq. digit | anchor | non | | | (within body & blurb) | x[0-9]+ | not-visible | | | | | | (for search) | | |-------------+-----------------------+-----------------------+----------------+------| | | book index | i char + seq. digit | anchor | idx | | | | i[0-9]+ | not-visible | | | | | | (for search) | | |-------------+-----------------------+-----------------------+----------------+------| | without ocn | | | | | |-------------+-----------------------+-----------------------+----------------+------| | | endnotes | ocn of parent object | no ocn | fn | | | | + footnote seq. digit | anchor visible | | |-------------+-----------------------+-----------------------+----------------+------| ** make config - _composite make_ work on composite make a unification of make instructions for each document run extract instructions from all config files, unify the make instructions and provide the result as a single set of make instructions for each document parsed - 1. general, document_make config file (to be applied to all documents unless overridden by document or command line instruction) - 2. local, site specific (site local instructions such as the site's url, cgi location etc.) - 3. each document header, make (the document header contains metadata and may include make instructions for that document) - make - meta - 4. command line instruction, make (some make instructions may be passed through the command line) *** instruction sources |----+---------------------------------+----------------------------------------+---------------------+---| | | make instruction source | | varies (applies to) | | |----+---------------------------------+----------------------------------------+---------------------+---| | 0. | unify the following as a single | take into account all the instructions | | | | | set of make instructions | provided below, provide interface | | | |----+---------------------------------+----------------------------------------+---------------------+---| | 1. | document_make file | to be applied to all documents | per directory | | | | "config_document" | (unless subsequently overridden) | (all docs within) | | |----+---------------------------------+----------------------------------------+---------------------+---| | 2. | config file | local site specific | per directory | | | | "config_local_site" | | (all docs within) | | |----+---------------------------------+----------------------------------------+---------------------+---| | 3. | document header make | make instructions contained | per document | | | | | in document header | (single doc) | | |----+---------------------------------+----------------------------------------+---------------------+---| | 4. | command line instruction | make instruction passed | each command | | | | | | (all docs within) | | |----+---------------------------------+----------------------------------------+---------------------+---| *** config & metadata (from instruction sources) |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | 1. document make file | 2. config file | 3. document header | 4. command line instruction | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | comment, fixed: | per dir (pod) | per dir | per document (pod) | per command instruction | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | sdl_root_config_document | sdl_root_config_local_site | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | local site specific | | * | | *? | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | webserv | | | | | | - url_root | | | | | | - path | | | | | | - images | | | | | | - cgi | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | webserv_cgi | | | | | | - host | | | | | | - base_path | | | | | | - port | | | | | | - user | | | | | | - file_links | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | processing | | | | | | - path | | | | | | - dir | | | | | | - concord_max | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | flag (configure) | | (call) | | | | - act0 | | act0 | | | | - act1 | | act1 | | | | - act2 | | act2 | | | | - act3 | | act3 | | | | - act4 | | act4 | | | | - act5 | | act5 | | | | - act6 | | act6 | | | | - act7 | | act7 | | | | - act8 | | act8 | | | | - act9 | | act9 | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | default | | | | | | - papersize | | | | | | - text_wrap | | | | | | - emphasis | | | | | | - language | | | | | | - digest | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | permission | | | | | | - share_source | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | program_select | | | | | | - editor | | | | | | - epub_viewer | | | | | | - html_viewer | | | | | | - odf_viewer | | | | | | - pdf_viewer | | | | | | - xml_viewer | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | search | | | | | | - flag | | | | | | - action | | | | | | - db | | | | | | - title | | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | make instruction | ** | omit or override share? | ** | *? | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | make | make | make | | | | - bold | - bold | - bold | | | | - breaks | - breaks | - breaks | | | | - cover_image | - cover_image | - cover_image | | | | - css | - css | - css | | | | - emphasis | - emphasis | - emphasis | | | | - footer | - footer | - footer | | | | - headings | - headings | - headings | | | | - home_button_image | - home_button_image | - home_button_image | | | | - home_button_text | - home_button_text | - home_button_text | | | | - italics | - italics | - italics | | | | - num_top | - num_top | - num_top | | | | - auto_num_depth | - auto_num_depth | - auto_num_depth | | | | - substitute | - substitute | - substitute | | | | - texpdf_font | - texpdf_font | - texpdf_font | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | actions | | | | * | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | | assertions | | | | | | concordance | | | | | | debug | | | | | | digest | | | | | | docbook | | | | | | epub | | | | | | html | | | | | | html-seg | | | | | | html-scroll | | | | | | manifest | | | | | | ocn | | | | | | odt | | | | | | pdf | | | | | | postgresql | | | | | | qrcode | | | | | | pod | | | | | | source | | | | | | sqlite | | | | | | sqlite-db-create | | | | | | sqlite-db-drop | | | | | | text | | | | | | verbose | | | | | | xhtml | | | | | | xml-dom | | | | | | xml-sax | | | | | | section_toc | | | | | | section_body | | | | | | section_endnotes | | | | | | section_glossary | | | | | | section_biblio | | | | | | section_bookindex | | | | | | section_blurb | | | | | | backmatter | | | | | | skip-output | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | metadata | | | * | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | classify | | | | | | - dewey | | | | | | - keywords | | | | | | - loc | | | | | | - subject | | | | | | - topic_register | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | creator | | | | | | - author | | | | | | - author_email | | | | | | - illustrator | | | | | | - translator | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | date | | | | | | - added_to_site | | | | | | - available | | | | | | - created | | | | | | - issued | | | | | | - modified | | | | | | - published | | | | | | - valid | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | identifier | | | | | | - isbn | | | | | | - oclc | | | | | | - pg | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | links | | | | | | - link | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | notes | | | | | | - abstract | | | | | | - description | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | original | | | | | | - language | | | | | | - source | | | | | | - title | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | publisher | | | | | | - name | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | rights | | | | | | - copyright | | | | | | - cover | | | | | | - illustrations | | | | | | - license | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------| | | | | title | | | | | | - edition | | | | | | - full | | | | | | - language | | | | | | - main | | | | | | - note | | | | | | - sub | | | | | | - subtitle | | |---------------------+--------------------------+----------------------------+---------------------+-----------------------------|