From e95c49b76f4ac7bf72c383ee43a0567dfcbf1603 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 10 Sep 2018 18:15:02 -0400 Subject: 0.1.0 renamed doc-reform, doc_reform (& rad) - from sdp --- src/doc_reform/doc_reform.d | 512 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 512 insertions(+) create mode 100755 src/doc_reform/doc_reform.d (limited to 'src/doc_reform/doc_reform.d') diff --git a/src/doc_reform/doc_reform.d b/src/doc_reform/doc_reform.d new file mode 100755 index 0000000..f7c9751 --- /dev/null +++ b/src/doc_reform/doc_reform.d @@ -0,0 +1,512 @@ +#!/usr/bin/env rdmd +module doc_reform.sisu_document_parser; +import + doc_reform.conf.compile_time_info, + doc_reform.meta.metadoc; +import + std.getopt, + std.file, + std.path, + std.process; +import + doc_reform.meta, + doc_reform.meta.metadoc_summary, + doc_reform.meta.metadoc_from_src, + doc_reform.meta.conf_make_meta_structs, + doc_reform.meta.conf_make_meta_toml, + doc_reform.meta.conf_make_meta_json, + doc_reform.meta.defaults, + doc_reform.meta.doc_debugs, + doc_reform.meta.rgx, + doc_reform.source.paths_source, + doc_reform.source.read_config_files, + doc_reform.source.read_source_files, + doc_reform.output.hub; +import std.algorithm; +import std.parallelism; +mixin(import("version.txt")); +mixin CompileTimeInfo; +/++ +name "doc_reform" +description "A SiSU inspired document parser writen in D." +homepage "http://sisudoc.org" ++/ +void main(string[] args) { + mixin SiSUrgxInit; + mixin contentJSONtoSiSUstruct; + mixin SiSUbiblio; + mixin SiSUrgxInitFlags; + mixin outputHub; + string flag_action; + string arg_unrecognized; + enum dAM { abstraction, matters } + static auto rgx = Rgx(); + scope(success) { + debug(checkdoc) { + writefln( + "~ run complete, ok ~ (doc_reform-%s.%s.%s, %s D:%s, %s %s)", + ver.major, ver.minor, ver.patch, + __VENDOR__, __VERSION__, + bits, os, + ); + } + } + scope(failure) { + debug(checkdoc) { + stderr.writefln( + "run failure", + ); + } + } + bool[string] opts = [ + "abstraction" : false, + "assertions" : false, + "concordance" : false, + "debug" : false, + "digest" : false, + "epub" : false, + "html" : false, + "html-seg" : false, + "html-scroll" : false, + "manifest" : false, + "ocn" : true, + "pp2" : false, + "quiet" : false, + "sisupod" : false, + "source" : false, + "sqlite-discrete" : false, + "sqlite-db-create" : false, + "sqlite-db-drop" : false, + "sqlite-db-recreate" : false, + "sqlite-delete" : false, + "sqlite-insert" : false, + "sqlite-update" : false, + "text" : false, + "verbose" : false, + "very-verbose" : false, + "xhtml" : false, + "section_toc" : true, + "section_body" : true, + "section_endnotes" : true, + "section_glossary" : true, + "section_biblio" : true, + "section_bookindex" : true, + "section_blurb" : true, + "backmatter" : true, + "skip-output" : false, + ]; + string[string] settings = [ + "output-dir" : "", + "site-config-dir" : "", + "lang" : "all", + "sqlite-filename" : "documents", + ]; + auto helpInfo = getopt(args, + std.getopt.config.passThrough, + "abstraction", "--abstraction document abstraction ", &opts["abstraction"], + "assert", "--assert set optional assertions on", &opts["assertions"], + "concordance", "--concordance file for document", &opts["concordance"], + "debug", "--debug", &opts["debug"], + "digest", "--digest hash digest for each object", &opts["digest"], + "epub", "--epub process epub output", &opts["epub"], + "html", "--html process html output", &opts["html"], + "html-seg", "--html-seg process html output", &opts["html-seg"], + "html-scroll", "--html-seg process html output", &opts["html-scroll"], + "manifest", "--manifest process manifest output", &opts["manifest"], + "ocn", "--ocn object cite numbers (default)", &opts["ocn"], + "pp2", "--pp2 nested parallelisation", &opts["pp2"], + "quiet", "--quiet output to terminal", &opts["quiet"], + "sisupod", "--sisupod sisupod source content bundled", &opts["sisupod"], + "source", "--source markup source text content", &opts["source"], + "sqlite-discrete", "--sqlite process discrete sqlite output", &opts["sqlite-discrete"], + "sqlite-db-create", "--sqlite-db-create create db, create tables", &opts["sqlite-db-create"], + "sqlite-db-drop", "--sqlite-db-drop drop tables & db", &opts["sqlite-db-drop"], + "sqlite-db-recreate", "--sqlite-db-recreate create db, create tables", &opts["sqlite-db-recreate"], + // "sqlite-db-populate", "--sqlite-db-populate create db & tables, insert specified", &opts["sqlite-db-populate"], + "sqlite-delete", "--sqlite process sqlite output", &opts["sqlite-delete"], + "sqlite-insert", "--sqlite process sqlite output", &opts["sqlite-insert"], + "sqlite-update", "--sqlite process sqlite output", &opts["sqlite-update"], + "text", "--text process text output", &opts["text"], + "txt", "--txt process text output", &opts["text"], + "verbose|v", "--verbose output to terminal", &opts["verbose"], + "very-verbose", "--very-verbose output to terminal", &opts["very-verbose"], + "xhtml", "--xhtml process xhtml output", &opts["xhtml"], + "section-toc", "--section-toc process table of contents (default)", &opts["section_toc"], + "section-body", "--section-body process document body (default)", &opts["section_body"], + "section-endnotes", "--section-endnotes process document endnotes (default)", &opts["section_endnotes"], + "section-glossary", "--section-glossary process document glossary (default)", &opts["section_glossary"], + "section-biblio", "--section-biblio process document biblio (default)", &opts["section_biblio"], + "section-bookindex", "--section-bookindex process document bookindex (default)", &opts["section_bookindex"], + "section-blurb", "--section-blurb process document blurb (default)", &opts["section_blurb"], + "backmatter", "--section-backmatter process document backmatter (default)", &opts["backmatter"], + "skip-output", "--skip-output", &opts["skip-output"], + "output-dir", "--output-dir=[dir path]", &settings["output-dir"], + "site-config-dir", "--site-config-dir=[dir path]", &settings["site-config-dir"], + "sqlite-filename", "--sqlite-filename=[filename].sqlite", &settings["sqlite-filename"], + "lang", "--lang=[lang code e.g. =en or =en,es]", &settings["lang"], + ); + if (helpInfo.helpWanted) { + defaultGetoptPrinter("Some information about the program.", helpInfo.options); + } + enum outTask { sisupod, source, sqlite, sqlite_multi, epub, html_scroll, html_seg, html_stuff } + struct OptActions { + auto assertions() { + return opts["assertions"]; + } + auto concordance() { + return opts["concordance"]; + } + auto debug_do() { + return opts["debug"]; + } + auto digest() { + return opts["digest"]; + } + auto epub() { + return opts["epub"]; + } + auto html() { + bool _is = ( + opts["html"] + || opts["html-seg"] + || opts["html-scroll"] + ) + ? true + : false; + return _is; + } + auto html_seg() { + bool _is = ( + opts["html"] + || opts["html-seg"] + ) + ? true + : false; + return _is; + } + auto html_scroll() { + bool _is = ( + opts["html"] + || opts["html-scroll"] + ) + ? true + : false; + return _is; + } + auto html_stuff() { + bool _is = ( + opts["html"] + || opts["html-scroll"] + || opts["html-seg"] + ) + ? true + : false; + return _is; + } + auto manifest() { + return opts["manifest"]; + } + auto ocn() { + return opts["ocn"]; + } + auto quiet() { + return opts["quiet"]; + } + auto sisupod() { + return opts["sisupod"]; + } + auto source() { + return opts["source"]; + } + auto sqlite_discrete() { + return opts["sqlite-discrete"]; + } + auto sqlite_db_drop() { + bool _is = ( + opts["sqlite-db-recreate"] + || opts["sqlite-db-drop"] + ) + ? true + : false; + return _is; + } + auto sqlite_db_create() { + bool _is = ( + opts["sqlite-db-recreate"] + || opts["sqlite-db-create"] + ) + ? true + : false; + return _is; + } + auto sqlite_insert() { + return opts["sqlite-insert"]; + } + auto sqlite_delete() { + return opts["sqlite-delete"]; + } + auto sqlite_update() { + return opts["sqlite-update"]; + } + auto text() { + return opts["text"]; + } + auto verbose() { + bool _is = ( + opts["verbose"] + || opts["very-verbose"] + ) + ? true + : false; + return _is; + } + auto very_verbose() { + return opts["very-verbose"]; + } + auto xhtml() { + return opts["xhtml"]; + } + auto section_toc() { + return opts["section_toc"]; + } + auto section_body() { + return opts["section_body"]; + } + auto section_endnotes() { + return opts["section_endnotes"]; + } + auto section_glossary() { + return opts["section_glossary"]; + } + auto section_biblio() { + return opts["section_biblio"]; + } + auto section_bookindex() { + return opts["section_bookindex"]; + } + auto section_blurb() { + return opts["section_blurb"]; + } + auto backmatter() { + return opts["backmatter"]; + } + auto skip_output() { + return opts["skip-output"]; + } + auto languages_set() { + return settings["lang"].split(","); + } + auto output_dir_set() { + return settings["output-dir"]; + } + auto sqlite_filename() { + return settings["sqlite-filename"]; + } + auto pp2() { + return opts["pp2"]; + } + auto output_task_scheduler() { + int[] schedule; + if (sisupod) { + schedule ~= outTask.sisupod; + } + if (source) { + schedule ~= outTask.source; + } + if (sqlite_discrete) { + schedule ~= outTask.sqlite; + } + if (epub) { + schedule ~= outTask.epub; + } + if (html_scroll) { + schedule ~= outTask.html_scroll; + } + if (html_seg) { + schedule ~= outTask.html_seg; + } + if (html_stuff) { + schedule ~= outTask.html_stuff; + } + return schedule.sort().uniq; + } + auto abstraction() { + bool _is = ( + opts["abstraction"] + || concordance + || epub + || html + || manifest + || sisupod + || source + || sqlite_discrete + || sqlite_delete + || sqlite_insert + || sqlite_update + ) + ? true + : false; + return _is; + } + } + auto _opt_action = OptActions(); + auto _env = [ + "pwd" : environment["PWD"], + "home" : environment["HOME"], + ]; + auto _manifest_start = PodManifest!()(""); + auto _manifest_matter = PathMatters!()(_opt_action, _env, ""); + auto _manifests = [ _manifest_matter ]; + foreach(arg; args[1..$]) { + _manifest_start = PodManifest!()(arg); + if (arg.match(rgx.flag_action)) { + flag_action ~= " " ~ arg; // flags not taken by getopt + } else if ( + !(arg.match(rgx.src_pth_sst_or_ssm)) + && _manifest_start.pod_manifest_file_with_path + && _opt_action.abstraction + ) { + string contents_location_raw_; + string contents_location_; + string sisudoc_txt_ = _manifest_start.pod_manifest_file_with_path; + enforce( + exists(sisudoc_txt_)!=0, + "file not found: «" ~ + sisudoc_txt_ ~ "»" + ); + if (exists(sisudoc_txt_)) { + try { + if (exists(sisudoc_txt_)) { + contents_location_raw_ = sisudoc_txt_.readText; + } + } catch (ErrnoException ex) { + } catch (FileException ex) { + // Handle errors + } + if (contents_location_raw_.match(rgx.pod_content_location)) { // (file name followed by language codes \n)+ + foreach (m; contents_location_raw_.matchAll(rgx.pod_content_location)) { + foreach (n; m.captures[2].matchAll(rgx.language_codes)) { + contents_location_ ~= "media/text/" ~ n.captures[1].to!string ~ "/" ~ m.captures[1].to!string ~ "\n"; + } + } + } else { + contents_location_ = contents_location_raw_; + } + } else { + writeln("manifest not found: ", sisudoc_txt_); + } + auto contents_locations_arr + = (cast(char[]) contents_location_).split; + auto tmp_dir_ = (sisudoc_txt_).dirName.array; + foreach (contents_location; contents_locations_arr) { + assert(contents_location.match(rgx.src_pth_sst_or_ssm), + "not a recognised file: «" ~ + contents_location ~ "»" + ); + auto contents_location_pth_ = (contents_location).to!string; + auto lang_rgx_ = regex(r"/(" ~ _opt_action.languages_set.join("|") ~ ")/"); + if (_opt_action.languages_set[0] == "all" + || (contents_location_pth_).match(lang_rgx_) + ) { + auto _fns = (((tmp_dir_).chainPath(contents_location_pth_)).array).to!string; + _manifest_matter = PathMatters!()(_opt_action, _env, arg, _fns, contents_locations_arr); + _manifests ~= _manifest_matter; + } + } + } else if (arg.match(rgx.src_pth_sst_or_ssm)) { + if (exists(arg)==0) { + writeln("ERROR >> Processing Skipped! File not found: ", arg); + } else { + _manifest_matter = PathMatters!()(_opt_action, _env, arg, arg); + _manifests ~= _manifest_matter; + } + } else if (arg.match(rgx.src_pth_zip)) { + // fns_src ~= arg; // gather input markup source file names for processing + } else { // anything remaining, unused + arg_unrecognized ~= " " ~ arg; + } + } + if (!(_opt_action.skip_output)) { + debug(steps) { + writeln("step0 commence → (without processing files)"); + } + outputHubOp!()(_env, _opt_action); + debug(steps) { + writeln("- step0 complete"); + } + } + if (_manifests.length > 1) { // _manifests[0] initialized dummy element + foreach(manifest; parallel(_manifests[1..$])) { + if (!empty(manifest.src.filename)) { + scope(success) { + if (!(_opt_action.quiet)) { + writefln( + "%s\n%s", + "~ document complete, ok ~", + "------------------------------------------------------------------", + ); + } + } + scope(failure) { + debug(checkdoc) { + stderr.writefln( + "~ document run failure ~ (%s v%s)\n\t%s\n%s", + __VENDOR__, __VERSION__, + manifest.src.filename, + "------------------------------------------------------------------", + ); + } + } + enforce( + manifest.src.filename.match(rgx.src_pth_types), + "not a sisu markup filename: «" ~ + manifest.src.filename ~ "»" + ); + debug(steps) { + writeln("--->\nstepX commence → (document abstraction)"); + } + auto t = SiSUabstraction!()(_env, _opt_action, manifest); + static assert(!isTypeTuple!(t)); + static assert(t.length==2); + auto doc_abstraction = t[dAM.abstraction]; + auto doc_matters = t[dAM.matters]; + debug(steps) { + writeln("- stepX complete"); + } + /+ ↓ debugs +/ + if (doc_matters.opt.action.verbose) { + SiSUabstractionSummary!()(doc_abstraction, doc_matters); + } + /+ ↓ debugs +/ + if ((doc_matters.opt.action.debug_do) + || (doc_matters.opt.action.verbose) + ) { + SiSUdebugs!()(doc_abstraction, doc_matters); + } + /+ ↓ output hub +/ + if (!(doc_matters.opt.action.skip_output)) { + debug(steps) { + writeln("step5 commence → (process outputs)"); + } + outputHub!()(doc_abstraction, doc_matters); + debug(steps) { + writeln("- step5 complete"); + } + } + scope(exit) { + if (!(_opt_action.quiet)) { + writefln( + "processed file: %s", + manifest.src.filename + ); + } + destroy(manifest); + } + } else { + /+ no recognized filename provided +/ + writeln("no recognized filename"); + break; // terminate, stop + } + } + } +} -- cgit v1.2.3