From 096d12cb15e191dbd83f3399ba9bfef57bc9d826 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 11 Apr 2018 21:37:45 -0400 Subject: 0.26.0 sqlite single statement insertion of objects - d2sqlite3 db.run, begin commit used with insert statement - can be used after upstream fix that should follow d2sqlite3 0.16.0 --- org/default_misc.org | 6 ++ org/default_paths.org | 66 +++++++++++++-- org/default_regex.org | 11 ++- org/meta_conf_make_meta.org | 2 +- org/output_sqlite.org | 52 ++++++------ org/output_sqlite_discrete.org | 182 +++++++++++++++-------------------------- org/output_xmls.org | 2 +- org/sdp.org | 8 +- 8 files changed, 176 insertions(+), 153 deletions(-) (limited to 'org') diff --git a/org/default_misc.org b/org/default_misc.org index a2a6a72..d021565 100644 --- a/org/default_misc.org +++ b/org/default_misc.org @@ -211,6 +211,12 @@ import ** defaults shared by meta & output (generic) *** template: internal markup :markup: +21 special characters used: + +#+BEGIN_SRC text +【】〖〗┥┝┤├¤░┘┙┚┼┿╂┊┏┚┆■ +#+END_SRC + #+name: defaults_template_markup #+BEGIN_SRC d template InternalMarkup() { diff --git a/org/default_paths.org b/org/default_paths.org index ff14d50..cc78c46 100644 --- a/org/default_paths.org +++ b/org/default_paths.org @@ -46,7 +46,7 @@ template PodManifest() { mixin SiSUrgxInit; static auto rgx = Rgx(); auto PodManifest(P)( - P _pth = "", + P _pth ) { struct ManifestFile_ { auto pod_manifest_filename() { @@ -68,8 +68,8 @@ template PodManifest() { _manifest_path = m.captures["podpath"]; } } else { - writeln("WARNING, issue with manifest_path: ", _pth); - _manifest_path = _pth; // _manifest_path = null; + writeln("WARNING, issue with manifest_path: ", _pth); // remove? + _manifest_path = null; // _manifest_path = ""; } return _manifest_path; } @@ -118,7 +118,7 @@ template PathMatters() { auto PathMatters(O,E,P,F)( O _opt_actions, E _env, - P _pth = "", + P _pth, F _fns = "", char[][] _manifest_fn_list = [[]], ) { @@ -168,6 +168,9 @@ template PathMatters() { auto manifest_path() { return _manifest.pod_manifest_path; } + auto pod_name() { // TODO decide what returned if src_is_pod == false + return _manifest.pod_manifest_path.baseName; + } auto manifest_file_with_path() { return _manifest.pod_manifest_file_with_path; } @@ -243,7 +246,8 @@ template PathMatters() { assert(_dir == m.captures["dir"]); } else { _dir = asNormalizedPath(path_and_fn.chainPath("../../../")).array; - assert(_dir == absolute_path_to_src.match(rgx.src_base_parent_dir_name).captures["dir"]); + assert(_dir == absolute_path_to_src + .match(rgx.src_base_parent_dir_name).captures["dir"]); } if ((_opt_actions.debug_do)) { writeln("--> (base_dir) ", _dir); @@ -843,6 +847,14 @@ import sdp.meta.rgx; #+END_SRC ** shared out path, base directory :out: +- output paths for content, minimize chance of filename/directory collisions + in particular pods can contain markup that is published under a name that may + be used elsewere + - if the pod directory has a different name from the file append the pod + directory name to the output file name: pod_directory.markup_filename e.g. + sisu-manual.sisu_markup + - if pod and file have same name, keep name (makes no sense to repeat pod + name) NO sisu_markup.sisu_markup should be sisu_markup #+name: template_paths_out #+BEGIN_SRC d @@ -870,6 +882,50 @@ template SiSUoutPaths() { } #+END_SRC +** set + +#+name: template_paths_out +#+BEGIN_SRC d +template SiSUoutPathsFnPd() { + /+ TODO stuff to work out here +/ + auto SiSUoutPathsFnPd(Fn,Pn)( + Fn fn_src_pth, + Pn pod_name + // Pn pod_name = "", + ) { + struct _PathsStruct { + string base_filename() { + return fn_src_pth.baseName.stripExtension; + } + string base_pod_and_filename() { // TODO + /+ + - if pod, + - pod_name + - file_name + - if pod_name == file_name + - file_name + - else if pod_name != file_name + - pod_name.file_name + +/ + auto _fn_src = fn_src_pth.baseName.stripExtension; + string _output_base_name; + if (!(pod_name.empty)) { + if (pod_name == _fn_src) { + _output_base_name = _fn_src; + } else { + _output_base_name = pod_name ~ "." ~ _fn_src; + } + } else { + _output_base_name = _fn_src; + } + return _output_base_name; + } + } + return _PathsStruct(); + } +} +#+END_SRC + ** _html_ :html: #+name: template_paths_html diff --git a/org/default_regex.org b/org/default_regex.org index 9dbab82..c848109 100644 --- a/org/default_regex.org +++ b/org/default_regex.org @@ -22,6 +22,12 @@ http://dlang.org/phobos/std_regex.html - Regex!char (wchar/dchar) that contains a pattern in the form of compiled bytecode. - StaticRegex!char (wchar/dchar) that contains a pattern in the form of compiled native machine code. +21 special characters used: + +#+BEGIN_SRC text +【】〖〗┥┝┤├¤░┘┙┚┼┿╂┊┏┚┆■ +#+END_SRC + ** 0. module template #+name: tangle_meta_rgx @@ -251,7 +257,7 @@ static note_ref = ctRegex!(`^\S+?noteref_( #+BEGIN_SRC d static inline_url_generic = ctRegex!(`(?:^|[}(\[ ])(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_#]`, "mg"); static inline_url = ctRegex!(`((?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_]\S*)`, "mg"); -static inline_link_naked_url = ctRegex!(`(?P^|[ ])(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P[.,;:?!'"]?(?:[ ]|$))`, "mg"); +static inline_link_naked_url = ctRegex!(`(?P^|[ ])(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤)\S+?)(?P[.,;:?!'"]?(?:[ ]|$))`, "mg"); static inline_link_markup_regular = ctRegex!(`(?P^|[ ])\{\s*(?P.+?)\s*\}(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P(?:[,;:? ]|[!.]?(?:[ ]|$)))`, "mg"); static inline_link_endnote_url_helper_punctuated = ctRegex!(`\{~\^\s+(?P.+?)\}(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P[.,;:?!]?(?:[ ]|$))`, "mg"); static inline_link_endnote_url_helper = ctRegex!(`\{~\^\s+(?P.+?)\}(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg"); @@ -407,6 +413,7 @@ static nbsp_char = ctRegex!(`░`, "mg"); static nbsp_chars_line_start = ctRegex!(`^░+`, "mg"); static nbsp_and_space = ctRegex!(` [ ]`, "mg"); static nbsp_char_and_space = ctRegex!(`░[ ]`, "mg"); +static special_markup_chars = ctRegex!(`[【】〖〗┥┝┤├¤░┘┙┚┼┿╂┊┏┚┆■]`, "mg"); #+END_SRC ** filename (& path) (including insert file) :insert:file:path:filename: @@ -486,6 +493,8 @@ static fn_suffix = ctRegex!(`\.fnSuffix`, " static inline_link_fn_suffix = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg"); static inline_seg_link = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg"); static mark_internal_site_lnk = ctRegex!(`¤`, "mg"); +static quotation_mark_sql_insert_delimiter = ctRegex!("[']", "mg"); +static quotation_mark_various = ctRegex!(q"¶['‘’“”"`´¨]¶", "mg"); #+END_SRC *** inline markup font face mod :inline:font:face: diff --git a/org/meta_conf_make_meta.org b/org/meta_conf_make_meta.org index fba8663..9438d25 100644 --- a/org/meta_conf_make_meta.org +++ b/org/meta_conf_make_meta.org @@ -501,7 +501,7 @@ if ("make" in _json.object) { if ("footer" in _json.object["make"] && (_json.object["make"]["footer"].type().to!string == "STRING") ) { - char[][] __match_footer_array + char[][] __match_footer_array = (cast(char[]) _json.object["make"]["footer"].str) .split(_rgx.make_heading_delimiter); _struct_composite.make_str.footer = __match_footer_array.to!(string[]); diff --git a/org/output_sqlite.org b/org/output_sqlite.org index 82b4310..0e7f125 100644 --- a/org/output_sqlite.org +++ b/org/output_sqlite.org @@ -158,7 +158,7 @@ auto munge_html(O)( auto return ref const O obj, ) { string _html_special_characters(string _txt){ - _txt = (_txt) + _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") @@ -168,17 +168,17 @@ auto munge_html(O)( return _txt; } string _html_font_face(string _txt){ - _txt = (_txt) - .replaceAll(rgx.inline_emphasis, ("$1")) - .replaceAll(rgx.inline_bold, ("$1")) - .replaceAll(rgx.inline_underscore, ("$1")) - .replaceAll(rgx.inline_italics, ("$1")) - .replaceAll(rgx.inline_superscript, ("$1")) - .replaceAll(rgx.inline_subscript, ("$1")) - .replaceAll(rgx.inline_strike, ("$1")) - .replaceAll(rgx.inline_insert, ("$1")) - .replaceAll(rgx.inline_mono, ("$1")) - .replaceAll(rgx.inline_cite, ("$1")); + _txt = _txt + .replaceAll(rgx.inline_emphasis, "$1") + .replaceAll(rgx.inline_bold, "$1") + .replaceAll(rgx.inline_underscore, "$1") + .replaceAll(rgx.inline_italics, "$1") + .replaceAll(rgx.inline_superscript, "$1") + .replaceAll(rgx.inline_subscript, "$1") + .replaceAll(rgx.inline_strike, "$1") + .replaceAll(rgx.inline_insert, "$1") + .replaceAll(rgx.inline_mono, "$1") + .replaceAll(rgx.inline_cite, "$1"); return _txt; } string _notes; @@ -210,7 +210,7 @@ auto munge_html(O)( #+name: sanitize_and_munge_inline_html #+BEGIN_SRC d string html_special_characters(string _txt){ - _txt = (_txt) + _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") @@ -226,7 +226,7 @@ string html_special_characters(string _txt){ #+name: sanitize_and_munge_inline_html #+BEGIN_SRC d string html_special_characters_code(string _txt){ - _txt = (_txt) + _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") @@ -241,17 +241,17 @@ string html_special_characters_code(string _txt){ #+name: sanitize_and_munge_inline_html #+BEGIN_SRC d string html_font_face(string _txt){ - _txt = (_txt) - .replaceAll(rgx.inline_emphasis, ("$1")) - .replaceAll(rgx.inline_bold, ("$1")) - .replaceAll(rgx.inline_underscore, ("$1")) - .replaceAll(rgx.inline_italics, ("$1")) - .replaceAll(rgx.inline_superscript, ("$1")) - .replaceAll(rgx.inline_subscript, ("$1")) - .replaceAll(rgx.inline_strike, ("$1")) - .replaceAll(rgx.inline_insert, ("$1")) - .replaceAll(rgx.inline_mono, ("$1")) - .replaceAll(rgx.inline_cite, ("$1")); + _txt = _txt + .replaceAll(rgx.inline_emphasis, "$1") + .replaceAll(rgx.inline_bold, "$1") + .replaceAll(rgx.inline_underscore, "$1") + .replaceAll(rgx.inline_italics, "$1") + .replaceAll(rgx.inline_superscript, "$1") + .replaceAll(rgx.inline_subscript, "$1") + .replaceAll(rgx.inline_strike, "$1") + .replaceAll(rgx.inline_insert, "$1") + .replaceAll(rgx.inline_mono, "$1") + .replaceAll(rgx.inline_cite, "$1"); return _txt; } #+END_SRC @@ -413,7 +413,7 @@ auto html_table(O)( auto return ref const O obj, string _txt, ) { - string[] _table_rows = (_txt).split(rgx.table_delimiter_row); + string[] _table_rows = _txt.split(rgx.table_delimiter_row); string[] _table_cols; string _table; string _tablenote; diff --git a/org/output_sqlite_discrete.org b/org/output_sqlite_discrete.org index c659441..fced21d 100644 --- a/org/output_sqlite_discrete.org +++ b/org/output_sqlite_discrete.org @@ -45,7 +45,8 @@ template SQLiteDiscreteBuildTablesAndPopulate() { } } template SQLiteInstruct() { - Statement SQLiteInstruct(I)( + void SQLiteInstruct(Db,I)( + Db db, auto ref I doc_matters, ) { <> @@ -56,13 +57,13 @@ template SQLiteDiscreteBuildTablesAndPopulate() { <> <> //<> - <> } } template SQLiteObjectsLoop() { void SQLiteObjectsLoop(P)( auto ref P doc_parts, ) { + <> <> } } @@ -112,7 +113,6 @@ auto generic_munge_sanitize_text_for_search( ) { string _notes; string _urls; - _txt = _txt.replaceAll(rgx.inline_fontface_clean, ""); if (_txt.matchFirst(rgx.inline_notes_al_gen)) { foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { _notes ~= "\n" ~ m["text"]; @@ -210,7 +210,7 @@ auto munge_html(O)( #+name: sanitize_and_munge_inline_html #+BEGIN_SRC d string html_special_characters(string _txt){ - _txt = (_txt) + _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") @@ -226,7 +226,7 @@ string html_special_characters(string _txt){ #+name: sanitize_and_munge_inline_html #+BEGIN_SRC d string html_special_characters_code(string _txt){ - _txt = (_txt) + _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") @@ -241,7 +241,7 @@ string html_special_characters_code(string _txt){ #+name: sanitize_and_munge_inline_html #+BEGIN_SRC d string html_font_face(string _txt){ - _txt = (_txt) + _txt = _txt .replaceAll(rgx.inline_emphasis, "$1") .replaceAll(rgx.inline_bold, "$1") .replaceAll(rgx.inline_underscore, "$1") @@ -474,10 +474,6 @@ auto html_table(O)( #+name: sqlite_db_initialize #+BEGIN_SRC d -auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language); -pth_sqlite.base.mkdirRecurse; -auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename)); -// auto db = Database(":memory:"); // open database in memory db.run(" #+END_SRC @@ -896,7 +892,6 @@ insert_metadata.bind(":rights_license", doc_matters.conf_make // insert_metadata.bind(":links", doc_matters.conf_make_meta.meta.links); insert_metadata.execute(); insert_metadata.reset(); /+ watch +/ -writeln(" ", pth_sqlite.sqlite_file(doc_matters.src.filename)); if ((doc_matters.opt.action.verbose)) { writeln("sql statement executed"); } @@ -920,15 +915,22 @@ insert_metadata.inject( #+END_SRC ****** 2. _doc objects_ (used with doc_objects in document loop) -******* prepare sql statement +******* prepare sql statement, sql utf8 ******** d { sql statement #+name: sqlite_insert_doc_objects #+BEGIN_SRC d -Statement _insert_doc_objects = db.prepare(" +string insertDocObjectsRow(O)(O obj) { + auto sql_insert_delimiter(string _txt) { + _txt = _txt + .replaceAll(rgx.quotation_mark_sql_insert_delimiter, "$0$0"); + return _txt; + } + string _insert_doc_objects_row; + _insert_doc_objects_row = format(q"¶ #+END_SRC -********* sql insert into +********* sql statement: formatted lid unique, increment by 1 per object, not ocn @@ -937,90 +939,54 @@ either: - increment by adding 1 for each document, - make hash of document filename or url and use? +********** INSERT INTO + #+name: sqlite_insert_doc_objects #+BEGIN_SRC sql - INSERT INTO doc_objects ( - lid, - metadata_tid, - ocn, - ocnd, - ocns, - clean, - body, - book_idx, - seg, - lev_an, - lev, - lev0, - lev1, - lev2, - lev3, - lev4, - lev5, - lev6, - lev7, - en_a, - en_z, - en_a_asterisk, - en_z_asterisk, - en_a_plus, - en_z_plus, - t_of, - t_is, - node, - parent, - digest_clean, - digest_all, - types - ) + INSERT INTO doc_objects ( + ocn, + clean, + body, + lev, + t_of, + t_is + ) #+END_SRC -********* sql values +********** VALUES #+name: sqlite_insert_doc_objects #+BEGIN_SRC sql - VALUES ( - :lid, - :metadata_tid, - :ocn, - :ocnd, - :ocns, - :clean, - :body, - :book_idx, - :seg, - :lev_an, - :lev, - :lev0, - :lev1, - :lev2, - :lev3, - :lev4, - :lev5, - :lev6, - :lev7, - :en_a, - :en_z, - :en_a_asterisk, - :en_z_asterisk, - :en_a_plus, - :en_z_plus, - :t_of, - :t_is, - :node, - :parent, - :digest_clean, - :digest_all, - :types - ) + VALUES ( + %s, + '%s', + '%s', + %s, + '%s', + '%s' + ); +#+END_SRC + +********* sql statement: values for formatting + +#+name: sqlite_insert_doc_objects +#+BEGIN_SRC sql + ¶", + obj.ocn, + sql_insert_delimiter(obj_txt["text"]), + sql_insert_delimiter(obj_txt["html"]), + obj.heading_lev_markup, + obj.is_of, + obj.is_a, + ); #+END_SRC ******** d } #+name: sqlite_insert_doc_objects #+BEGIN_SRC d -"); -return _insert_doc_objects; + return _insert_doc_objects_row; +} #+END_SRC ******* TODO (within loop not here - insert doc objects @@ -1273,10 +1239,15 @@ auto table(O)( #+name: sqlite_objects_loop #+BEGIN_SRC d -Statement insert_doc_objects = SQLiteInstruct!()(doc_matters); +auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language); +pth_sqlite.base.mkdirRecurse; +auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename)); +SQLiteInstruct!()(db, doc_matters); // consider best location, need to feed individual objects for sqlite table: doc_objects, possibly a separate template? auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters); string[string] obj_txt; string doc_text; +string[] _insert_doc_objects; +_insert_doc_objects ~= "BEGIN;\n"; foreach (part; doc_parts) { foreach (obj; doc_abstraction[part]) { switch (obj.of_part) { @@ -1421,36 +1392,17 @@ foreach (part; doc_parts) { ); } } - insert_doc_objects.bind(":t_of", obj.is_of); - insert_doc_objects.bind(":t_is", obj.is_a); - insert_doc_objects.bind(":ocn", obj.ocn); - insert_doc_objects.bind(":clean", obj_txt["text"]); // consider whether book index info should be made available within clear text for search - insert_doc_objects.bind(":body", obj_txt["html"]); - // insert_doc_objects.bind(":book_idx", ""); // not needed, but, consider whether should be made available within object for clear text search - insert_doc_objects.bind(":lev", obj.heading_lev_markup); - // // insert_doc_objects.bind(":dom_markedup", ""); // should make lev sequence below obsolete - // // insert_doc_objects.bind(":dom_collapsed", ""); // should add info - // insert_doc_objects.bind(":lev0", ""); - // insert_doc_objects.bind(":lev1", ""); - // insert_doc_objects.bind(":lev2", ""); - // insert_doc_objects.bind(":lev3", ""); - // insert_doc_objects.bind(":lev4", ""); - // insert_doc_objects.bind(":lev5", ""); - // insert_doc_objects.bind(":lev6", ""); - // insert_doc_objects.bind(":lev7", ""); - // insert_doc_objects.bind(":node", ""); - // insert_doc_objects.bind(":type", ""); - // insert_doc_objects.bind(":parent_ocn", ""); - // insert_doc_objects.bind(":ancestors", ""); - // insert_doc_objects.bind(":heading_lev_markup", ""); - // insert_doc_objects.bind(":heading_lev_collapsed", ""); - // insert_doc_objects.bind(":parent_lev_markup", ""); - // insert_doc_objects.bind(":heading_ancestors", ""); - // insert_doc_objects.bind(":node", ""); - insert_doc_objects.execute(); insert_doc_objects.reset(); - } + if (!(obj.is_a == "comment")) { + _insert_doc_objects ~= insertDocObjectsRow(obj); + } + } // loop closes +} +_insert_doc_objects ~= "COMMIT"; +debug(sql_statement) { + writeln("#+BEGIN_SRC sql\n", _insert_doc_objects.join, "\n#+END_SRC"); } -insert_doc_objects.finalize(); +std.utf.validate(_insert_doc_objects.join); // TODO +db.run(_insert_doc_objects.join.to!(char[]).toUTF8); #+END_SRC * __END__ diff --git a/org/output_xmls.org b/org/output_xmls.org index 0c79bf9..5855d65 100644 --- a/org/output_xmls.org +++ b/org/output_xmls.org @@ -241,7 +241,7 @@ auto site_info_button(Dm)( .replaceAll( rgx.br_nl, ""); } else { - writeln("WARNING home button text expected"); + _locations = "

SiSU

\n

www.sisudoc.org

\n

sources / git

"; } string o; o = format(q"¶
diff --git a/org/sdp.org b/org/sdp.org index 202bf9b..4057d96 100644 --- a/org/sdp.org +++ b/org/sdp.org @@ -26,7 +26,7 @@ struct Version { int minor; int patch; } -enum ver = Version(0, 25, 0); +enum ver = Version(0, 26, 0); #+END_SRC ** compilation restrictions (supported compilers) @@ -449,8 +449,8 @@ auto _env = [ "pwd" : environment["PWD"], "home" : environment["HOME"], ]; -auto _manifest_start = PodManifest!()(); -auto _manifest_matter = PathMatters!()(_opt_action, _env); +auto _manifest_start = PodManifest!()(""); +auto _manifest_matter = PathMatters!()(_opt_action, _env, ""); auto _manifests = [ _manifest_matter ]; foreach(arg; args[1..$]) { _manifest_start = PodManifest!()(arg); @@ -504,7 +504,7 @@ foreach(arg; args[1..$]) { if (_opt_action.languages_set[0] == "all" || (contents_location_pth_).match(lang_rgx_) ) { - auto _fns = (((tmp_dir_).chainPath(contents_location_pth_)).array).to!(string); + auto _fns = (((tmp_dir_).chainPath(contents_location_pth_)).array).to!string; _manifest_matter = PathMatters!()(_opt_action, _env, arg, _fns, contents_locations_arr); _manifests ~= _manifest_matter; // TODO how to capture? } -- cgit v1.2.3