From 2a359a57ce021ba6ddf7232b3f3d79726d4738ba Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 17 Oct 2017 13:36:41 -0400 Subject: sqlite document db discrete & collection, started - discrete document , drop & recreate document --sqlite-disctrete - document collection - create tables --sqlite-create - populate db with specified document --sqlite-update --- docs/ao_abstract_doc_source.html | 17 - docs/ao_conf_make_meta.html | 22 - docs/ao_conf_make_meta_native.html | 16 - docs/ao_conf_make_meta_sdlang.html | 16 - docs/ao_defaults.html | 15 - docs/ao_object_setter.html | 17 - docs/ao_output_debugs.html | 15 - docs/ao_read_config_files.html | 17 - docs/ao_read_source_files.html | 17 - docs/ao_rgx.html | 16 - docs/compile_time_info.html | 15 - docs/output_html.html | 10 - docs/output_hub.html | 16 - docs/sdp.html | 16 - org/default_misc.org | 204 ++--- org/default_paths.org | 33 + org/meta_abstraction.org | 27 +- org/meta_conf_make_meta.org | 14 +- org/output_hub.org | 15 +- org/output_sqlite.org | 187 +++-- org/output_sqlite_discrete.org | 1437 ++++++++++++++++++++++++++++++++++ org/output_xmls.org | 1 + org/sdp.org | 6 +- src/sdp/meta/conf_make_meta_sdlang.d | 14 +- src/sdp/meta/defaults.d | 204 ++--- src/sdp/meta/metadoc_from_src.d | 27 +- src/sdp/output/epub3.d | 1 + src/sdp/output/hub.d | 8 +- src/sdp/output/paths_output.d | 26 + src/sdp/output/sqlite.d | 372 +++++---- src/sdp/output/sqlite_discrete.d | 1085 +++++++++++++++++++++++++ src/sdp/sdp.d | 6 +- 32 files changed, 3172 insertions(+), 720 deletions(-) delete mode 100644 docs/ao_abstract_doc_source.html delete mode 100644 docs/ao_conf_make_meta.html delete mode 100644 docs/ao_conf_make_meta_native.html delete mode 100644 docs/ao_conf_make_meta_sdlang.html delete mode 100644 docs/ao_defaults.html delete mode 100644 docs/ao_object_setter.html delete mode 100644 docs/ao_output_debugs.html delete mode 100644 docs/ao_read_config_files.html delete mode 100644 docs/ao_read_source_files.html delete mode 100644 docs/ao_rgx.html delete mode 100644 docs/compile_time_info.html delete mode 100644 docs/output_html.html delete mode 100644 docs/output_hub.html delete mode 100644 docs/sdp.html create mode 100644 org/output_sqlite_discrete.org create mode 100644 src/sdp/output/sqlite_discrete.d diff --git a/docs/ao_abstract_doc_source.html b/docs/ao_abstract_doc_source.html deleted file mode 100644 index bbd7f34..0000000 --- a/docs/ao_abstract_doc_source.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - ao_abstract_doc_source - -

ao_abstract_doc_source

-

-
template SiSUdocAbstraction()
-
document abstraction: - abstraction of sisu markup for downstream processing - ao_abstract_doc_source.d

- -
-
- -
Page generated by Ddoc. - diff --git a/docs/ao_conf_make_meta.html b/docs/ao_conf_make_meta.html deleted file mode 100644 index a807764..0000000 --- a/docs/ao_conf_make_meta.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - ao_conf_make_meta - -

ao_conf_make_meta

-

-
template SiSUheaderExtractHub()
-
extract native/orig header return associative array
-

-the header is passed as text (lopped off top of a sisu markup file until the - required first heading ^A~), determine whether is a native header or sdlang one - with a regex check if whether it contains the "native header" required tag/field - @title: then process accordingly as a "native header" or "sdlang header" - converting the metadata and make instructions to a common json format used by - program internally. Moved to associative array.

- -
-
- -
Page generated by Ddoc. - diff --git a/docs/ao_conf_make_meta_native.html b/docs/ao_conf_make_meta_native.html deleted file mode 100644 index 387e379..0000000 --- a/docs/ao_conf_make_meta_native.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - ao_conf_make_meta_native - -

ao_conf_make_meta_native

-

-
template SiSUheaderExtractNative()
-
native headers using
@title:
:subtitle:
type tags
- extract native/orig header return associative array

- -
-
- -
Page generated by Ddoc. - diff --git a/docs/ao_conf_make_meta_sdlang.html b/docs/ao_conf_make_meta_sdlang.html deleted file mode 100644 index cfb6f24..0000000 --- a/docs/ao_conf_make_meta_sdlang.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - ao_conf_make_meta_sdlang - -

ao_conf_make_meta_sdlang

-

-
template SiSUheaderExtractSDLang()
-
sdlang headers
- extract sdlang header return sdlang

- -
-
- -
Page generated by Ddoc. - diff --git a/docs/ao_defaults.html b/docs/ao_defaults.html deleted file mode 100644 index 280220e..0000000 --- a/docs/ao_defaults.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - ao_defaults - -

ao_defaults

-

-
template SiSUregisters()
-
default settings

- -
-
- -
Page generated by Ddoc. - diff --git a/docs/ao_object_setter.html b/docs/ao_object_setter.html deleted file mode 100644 index 05e4e67..0000000 --- a/docs/ao_object_setter.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - ao_object_setter - -

ao_object_setter

-

-
template ObjectSetter()
-
object setter: - setting of sisu objects for downstream processing - ao_object_setter.d

- -
-
- -
Page generated by Ddoc. - diff --git a/docs/ao_output_debugs.html b/docs/ao_output_debugs.html deleted file mode 100644 index d57e800..0000000 --- a/docs/ao_output_debugs.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - ao_output_debugs - -

ao_output_debugs

-

-
template SiSUoutputDebugs()
-
output debugs

- -
-
- -
Page generated by Ddoc. - diff --git a/docs/ao_read_config_files.html b/docs/ao_read_config_files.html deleted file mode 100644 index d5db7c1..0000000 --- a/docs/ao_read_config_files.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - ao_read_config_files - -

ao_read_config_files

-

-
template SiSUconfigIn()
-
read configuration files
- - read config files
- ao_config_files.d

- -
-
- -
Page generated by Ddoc. - diff --git a/docs/ao_read_source_files.html b/docs/ao_read_source_files.html deleted file mode 100644 index a77a5b2..0000000 --- a/docs/ao_read_source_files.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - ao_read_source_files - -

ao_read_source_files

-

-
template SiSUmarkupRaw()
-
module ao_read_source_files;
- - open markup files
- - if master file scan for addional files to import/insert

- -
-
- -
Page generated by Ddoc. - diff --git a/docs/ao_rgx.html b/docs/ao_rgx.html deleted file mode 100644 index eedd524..0000000 --- a/docs/ao_rgx.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - ao_rgx - -

ao_rgx

-

-
template RgxInit()
-
regex:
-regular expressions used in sisu document parser

- -
-
- -
Page generated by Ddoc. - diff --git a/docs/compile_time_info.html b/docs/compile_time_info.html deleted file mode 100644 index 5e992f8..0000000 --- a/docs/compile_time_info.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - compile_time_info - -

compile_time_info

-

-
template CompileTimeInfo()
-
compile_time_info

- -
-
- -
Page generated by Ddoc. - diff --git a/docs/output_html.html b/docs/output_html.html deleted file mode 100644 index 041521b..0000000 --- a/docs/output_html.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - output_html - -

output_html

-

- -
Page generated by Ddoc. - diff --git a/docs/output_hub.html b/docs/output_hub.html deleted file mode 100644 index fd1c3b3..0000000 --- a/docs/output_hub.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - output_hub - -

output_hub

-

-
template SiSUoutputHub()
-
output hub
- check & generate output types requested

- -
-
- -
Page generated by Ddoc. - diff --git a/docs/sdp.html b/docs/sdp.html deleted file mode 100644 index 60af1b6..0000000 --- a/docs/sdp.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - sdp - -

sdp

-

-
void main(string[] args); -
-
A SiSU document parser writen in D.

- -
-
- -
Page generated by Ddoc. - diff --git a/org/default_misc.org b/org/default_misc.org index 920ed95..b6845b7 100644 --- a/org/default_misc.org +++ b/org/default_misc.org @@ -65,109 +65,119 @@ import #+name: meta_defaults_template_registers #+BEGIN_SRC d struct ConfCompositeMake { - string bold = ""; - string breaks = ""; - string cover_image = ""; - string css = ""; - string emphasis = ""; - string footer = ""; - string headings = ""; - string home_button_image = ""; - string home_button_text = ""; - string italics = ""; - string num_top = ""; - string num_depth = ""; - string substitute = ""; - string texpdf_font = ""; + string bold = ""; + string breaks = ""; + string cover_image = ""; + string css = ""; + string emphasis = ""; + string footer = ""; + string headings = ""; + string home_button_image = ""; + string home_button_text = ""; + string italics = ""; + string num_top = ""; + string num_depth = ""; + string substitute = ""; + string texpdf_font = ""; } struct ConfCompositeMakeStr { - string bold = ""; - string breaks = ""; - string cover_image = ""; - string css = ""; - string emphasis = ""; - string footer = ""; - string headings = ""; - string home_button_image = ""; - string home_button_text = ""; - string italics = ""; - string num_top = ""; - string num_depth = ""; - string substitute = ""; - string texpdf_font = ""; + string bold = ""; + string breaks = ""; + string cover_image = ""; + string css = ""; + string emphasis = ""; + string footer = ""; + string headings = ""; + string home_button_image = ""; + string home_button_text = ""; + string italics = ""; + string num_top = ""; + string num_depth = ""; + string substitute = ""; + string texpdf_font = ""; } struct ConfCompositeSiteLocal { - string webserv_url_root = ""; - string webserv_path = ""; - string webserv_images = ""; - string webserv_cgi = ""; - string webserv_cgi_host = ""; - string webserv_cgi_host_path = ""; - string webserv_cgi_port = ""; - string webserv_cgi_user = ""; - string webserv_cgi_file_links = ""; - string processing_path = ""; - string processing_dir = ""; - string processing_concord_max = ""; - string flag_act0 = ""; - string flag_act1 = ""; - string flag_act2 = ""; - string flag_act3 = ""; - string flag_act4 = ""; - string flag_act5 = ""; - string flag_act6 = ""; - string flag_act7 = ""; - string flag_act8 = ""; - string flag_act9 = ""; - string default_papersize = ""; - string default_text_wrap = ""; - string default_emphasis = ""; - string default_language = ""; - string default_digest = ""; - string permission_share_source = ""; - string search_flag = ""; - string search_action = ""; - string search_db = ""; - string search_title = ""; + string webserv_url_root = ""; + string webserv_path = ""; + string webserv_images = ""; + string webserv_cgi = ""; + string webserv_cgi_host = ""; + string webserv_cgi_host_path = ""; + string webserv_cgi_port = ""; + string webserv_cgi_user = ""; + string webserv_cgi_file_links = ""; + string processing_path = ""; + string processing_dir = ""; + string processing_concord_max = ""; + string flag_act0 = ""; + string flag_act1 = ""; + string flag_act2 = ""; + string flag_act3 = ""; + string flag_act4 = ""; + string flag_act5 = ""; + string flag_act6 = ""; + string flag_act7 = ""; + string flag_act8 = ""; + string flag_act9 = ""; + string default_papersize = ""; + string default_text_wrap = ""; + string default_emphasis = ""; + string default_language = ""; + string default_digest = ""; + string permission_share_source = ""; + string search_flag = ""; + string search_action = ""; + string search_db = ""; + string search_title = ""; } struct MetaComposite { - string classify_dewey = ""; - string classify_keywords = ""; - string classify_loc = ""; - string classify_subject = ""; - string classify_topic_register = ""; - string creator_author = ""; - string creator_author_email = ""; - string creator_illustrator = ""; - string creator_translator = ""; - string date_added_to_site = ""; - string date_available = ""; - string date_created = ""; - string date_issued = ""; - string date_modified = ""; - string date_published = ""; - string date_valid = ""; - string identifier_isbn = ""; - string identifier_oclc = ""; - string identifier_pg = ""; - string links = ""; - string notes_abstract = ""; - string notes_description = ""; - string original_language = ""; - string original_source = ""; - string original_title = ""; - string publisher = ""; - string rights_copyright = ""; - string rights_cover = ""; - string rights_illustrations = ""; - string rights_license = ""; - string title_edition = ""; - string title_full = ""; - string title_language = ""; - string title_main = ""; - string title_note = ""; - string title_sub = ""; - string title_subtitle = ""; + string classify_dewey = ""; + string classify_keywords = ""; + string classify_loc = ""; + string classify_subject = ""; + string classify_topic_register = ""; + string creator_author = ""; + string creator_author_email = ""; + string creator_illustrator = ""; + string creator_translator = ""; + string date_added_to_site = ""; + string date_available = ""; + string date_created = ""; + string date_issued = ""; + string date_modified = ""; + string date_published = ""; + string date_valid = ""; + string identifier_isbn = ""; + string identifier_oclc = ""; + string identifier_pg = ""; + string language_document = ""; + string language_document_char = ""; + string links = ""; + string notes_abstract = ""; + string notes_description = ""; + string original_language = ""; + string original_language_char = ""; + string original_publisher = ""; + string original_source = ""; + string original_title = ""; + string publisher = ""; + string rights_copyright = ""; + string rights_copyright_audio = ""; + string rights_copyright_cover = ""; + string rights_copyright_illustrations = ""; + string rights_copyright_photographs = ""; + string rights_copyright_text = ""; + string rights_copyright_translation = ""; + string rights_copyright_video = ""; + string rights_license = ""; + string title_edition = ""; + string title_full = ""; + string title_language = ""; + string title_main = ""; + string title_note = ""; + string title_short = ""; + string title_sub = ""; + string title_subtitle = ""; } struct ConfComposite { MetaComposite meta; diff --git a/org/default_paths.org b/org/default_paths.org index 49a43b7..ebb50bc 100644 --- a/org/default_paths.org +++ b/org/default_paths.org @@ -451,6 +451,39 @@ template SiSUpathsEPUB() { #+END_SRC ** _sqlite_ :sqlite: +*** discrete + +#+name: template_paths_sqlite +#+BEGIN_SRC d +template SiSUpathsSQLiteDiscrete() { + mixin SiSUrgxInit; + static auto rgx = Rgx(); + auto SiSUpathsSQLiteDiscrete(Ps,Lng)( + Ps src_pth_info, + Lng lng, + ) { + auto out_pth = SiSUoutPaths!()(src_pth_info, lng); + string base_dir = "sqlite"; + struct _PathsStruct { + string base_filename(string fn_src) { + return fn_src.baseName.stripExtension; + } + string base() { + return (out_pth.output_base).chainPath(base_dir).array; + } + string seg(string fn_src) { + return base.chainPath(base_filename(fn_src)).array; + } + string sqlite_file(string fn_src) { + return base.chainPath(base_filename(fn_src) ~ ".sqlite").array; + } + } + return _PathsStruct(); + } +} +#+END_SRC + +*** TODO collection #+name: template_paths_sqlite #+BEGIN_SRC d diff --git a/org/meta_abstraction.org b/org/meta_abstraction.org index 299d70c..4a6b859 100644 --- a/org/meta_abstraction.org +++ b/org/meta_abstraction.org @@ -1880,7 +1880,8 @@ foreach (ref obj; the_document_head_section) { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -1912,7 +1913,8 @@ if (the_table_of_contents_section["scroll"].length > 1) { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -1975,7 +1977,8 @@ if (the_document_body_section.length > 1) { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -2022,7 +2025,8 @@ if (the_endnotes_section.length > 1) { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -2062,7 +2066,8 @@ if (the_glossary_section.length > 1) { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -2106,7 +2111,8 @@ if (the_bibliography_section.length > 1) { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -2155,7 +2161,8 @@ if (the_bookindex_section["scroll"].length > 1) { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -2193,7 +2200,8 @@ if (the_bookindex_section["scroll"].length > 1) { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -2238,7 +2246,8 @@ if (the_blurb_section.length > 1) { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); diff --git a/org/meta_conf_make_meta.org b/org/meta_conf_make_meta.org index f441a00..ce9dc7c 100644 --- a/org/meta_conf_make_meta.org +++ b/org/meta_conf_make_meta.org @@ -241,6 +241,7 @@ private auto docSDLtoStruct(C,Tag)(C _conf_composite, Tag header_sdlang) { } if ("original" in header_sdlang.maybe.tags) { _conf_composite.meta.original_language = extractSDLangTabOrAttrib(header_sdlang, "original", "language"); + _conf_composite.meta.original_language_char = extractSDLangTabOrAttrib(header_sdlang, "original", "language_char"); _conf_composite.meta.original_source = extractSDLangTabOrAttrib(header_sdlang, "original", "source"); _conf_composite.meta.original_title = extractSDLangTabOrAttrib(header_sdlang, "original", "title"); } @@ -248,10 +249,15 @@ private auto docSDLtoStruct(C,Tag)(C _conf_composite, Tag header_sdlang) { // _conf_composite.meta.publisher = extractSDLangTabOrAttrib(header_sdlang, "publisher", ""); } if ("rights" in header_sdlang.maybe.tags) { - _conf_composite.meta.rights_copyright = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright"); - _conf_composite.meta.rights_cover = extractSDLangTabOrAttrib(header_sdlang, "rights", "cover"); - _conf_composite.meta.rights_illustrations = extractSDLangTabOrAttrib(header_sdlang, "rights", "illustrations"); - _conf_composite.meta.rights_license = extractSDLangTabOrAttrib(header_sdlang, "rights", "license"); + _conf_composite.meta.rights_copyright = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright"); + _conf_composite.meta.rights_copyright_text = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright_text"); + _conf_composite.meta.rights_copyright_audio = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright_audio"); + _conf_composite.meta.rights_copyright_cover = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright_cover"); + _conf_composite.meta.rights_copyright_illustrations = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright_illustrations"); + _conf_composite.meta.rights_copyright_photographs = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright_photographs"); + _conf_composite.meta.rights_copyright_translation = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright_translation"); + _conf_composite.meta.rights_copyright_video = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright_video"); + _conf_composite.meta.rights_license = extractSDLangTabOrAttrib(header_sdlang, "rights", "license"); } if (_conf_composite.meta.creator_author.empty) { if ("creator" in header_sdlang.maybe.tags) { diff --git a/org/output_hub.org b/org/output_hub.org index f159b7f..699e1e6 100644 --- a/org/output_hub.org +++ b/org/output_hub.org @@ -50,6 +50,7 @@ import sdp.output, sdp.output.epub3, sdp.output.html, sdp.output.sqlite, + sdp.output.sqlite_discrete, sdp.output.xmls, sdp.output.source_sisupod, sdp.output.create_zip_file, @@ -162,10 +163,21 @@ if (doc_matters.opt_action["odt"]) { #+END_SRC *** sqlite +**** discrete #+name: output_options #+BEGIN_SRC d -if (doc_matters.opt_action["sqlite"]) { +if (doc_matters.opt_action["sqlite-discrete"]) { + if ((doc_matters.opt_action["verbose"])) { writeln("sqlite processing... "); } + SQLiteDiscreteBuildTablesAndPopulate!()(doc_abstraction, doc_matters); +} +#+END_SRC + +**** collection + +#+name: output_options +#+BEGIN_SRC d +if (doc_matters.opt_action["sqlite-update"]) { if ((doc_matters.opt_action["verbose"])) { writeln("sqlite processing... "); } SQLiteBuildTablesAndPopulate!()(doc_abstraction, doc_matters); } @@ -173,6 +185,7 @@ if (doc_matters.opt_action["sqlite"]) { *** sqlite (create, drop) +**** collection #+name: output_options_op #+BEGIN_SRC d if ((config["sqlite-create"])) { diff --git a/org/output_sqlite.org b/org/output_sqlite.org index bb679e2..4f5029e 100644 --- a/org/output_sqlite.org +++ b/org/output_sqlite.org @@ -1,4 +1,4 @@ -#+TITLE: sdp output html +#+TITLE: sdp output sqlite #+AUTHOR: Ralph Amissah #+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] #+DESCRIPTION: documents - structuring, publishing in multiple formats & search @@ -27,7 +27,6 @@ template SQLiteBuildTablesAndPopulate() { ) { import d2sqlite3; import std.typecons : Nullable; - import sdp.output.sqlite; mixin SiSUoutputRgxInit; static auto rgx = Rgx(); template SQLiteFormatAndLoadObject() { @@ -50,13 +49,13 @@ template SQLiteBuildTablesAndPopulate() { auto ref I doc_matters, ) { <> - <> - <> - <> - <> + <> + <> + /* */<> + <> <> <> - <> + //<> <> } } @@ -90,7 +89,6 @@ import sdp.output; import std.file, std.conv : to; -import sdp.output.sqlite; #+END_SRC ** 2. format and load template @@ -469,9 +467,11 @@ auto html_table(O)( #+name: sqlite_db_initialize #+BEGIN_SRC d auto pth_sqlite = SiSUpathsSQLite!()(doc_matters.src_path_info, doc_matters.language); -auto db = Database(pth_sqlite.sqlite_file(doc_matters.source_filename)); +auto db = Database(pth_sqlite.sqlite_file(doc_matters.environment["pwd"].baseName)); // auto db = Database(":memory:"); // open database in memory -db.run(" +if (doc_matters.opt_action["sqlite-create"]) { + if ((doc_matters.opt_action["verbose"])) { writeln("sqlite create tables... "); } + db.run(" #+END_SRC ******* 0. drop table if exists @@ -499,6 +499,7 @@ CREATE TABLE metadata_and_text ( title_language VARCHAR(100) NULL, title_language_char VARCHAR(6) NULL, creator_author VARCHAR(600) NULL, + creator_author_email VARCHAR(100) NULL, creator_author_hon VARCHAR(100) NULL, creator_author_nationality VARCHAR(100) NULL, creator_editor VARCHAR(600) NULL, @@ -531,14 +532,14 @@ CREATE TABLE metadata_and_text ( original_source VARCHAR(600) NULL, original_institution VARCHAR(600) NULL, original_nationality VARCHAR(100) NULL, - rights VARCHAR(2500) NULL, - rights_copyright_text VARCHAR(2500) NULL, - rights_copyright_translation VARCHAR(2500) NULL, + original_title VARCHAR(800) NULL, + rights_copyright VARCHAR(2500) NULL, + rights_copyright_audio VARCHAR(2500) NULL, + rights_copyright_cover VARCHAR(2500) NULL, rights_copyright_illustrations VARCHAR(2500) NULL, rights_copyright_photographs VARCHAR(2500) NULL, - rights_copyright_preparation VARCHAR(2500) NULL, - rights_copyright_digitization VARCHAR(2500) NULL, - rights_copyright_audio VARCHAR(2500) NULL, + rights_copyright_text VARCHAR(2500) NULL, + rights_copyright_translation VARCHAR(2500) NULL, rights_copyright_video VARCHAR(2500) NULL, rights_license VARCHAR(2500) NULL, identifier_oclc VARCHAR(30) NULL, @@ -614,11 +615,41 @@ CREATE TABLE doc_objects ( #+name: sqlite_close_initialize #+BEGIN_SRC d -"); + "); +} #+END_SRC ***** prepare insert statements and do pre-loop inserts ****** 1. _metadata & src_ text +******* TODO sql remove selected +******** d { sql statement + +##+name: sqlite_remove_selected +#+BEGIN_SRC d +Statement insert_metadata = db.prepare(" +#+END_SRC + +********* remove selected + +identify and remove, identify by +- filename & language code +- (in ruby get_first_value and remove by fn & ln) + +##+name: sqlite_remove_selected +#+BEGIN_SRC sql + SELECT tid + FROM metadata_and_text + WHERE src_filename = '#{doc_matters.conf_make_meta.meta.src_filename}' + AND metadata_and_text.language_document_char = '#{doc_matters.conf_make_meta.meta.language_document_char}' +#+END_SRC + +******** d } + +##+name: sqlite_remove_selected +#+BEGIN_SRC d +"); +#+END_SRC + ******* sql statement ******** d { sql statement @@ -647,6 +678,7 @@ either: title_language, title_language_char, creator_author, + creator_author_email, creator_author_hon, creator_author_nationality, creator_editor, @@ -679,14 +711,14 @@ either: original_source, original_institution, original_nationality, - rights, - rights_copyright_text, - rights_copyright_translation, + original_title, + rights_copyright, + rights_copyright_audio, + rights_copyright_cover, rights_copyright_illustrations, rights_copyright_photographs, - rights_copyright_preparation, - rights_copyright_digitization, - rights_copyright_audio, + rights_copyright_text, + rights_copyright_translation, rights_copyright_video, rights_license, identifier_oclc, @@ -733,6 +765,7 @@ either: :title_language, :title_language_char, :creator_author, + :creator_author_email, :creator_author_hon, :creator_author_nationality, :creator_editor, @@ -765,14 +798,14 @@ either: :original_source, :original_institution, :original_nationality, - :rights, - :rights_copyright_text, - :rights_copyright_translation, + :original_title, + :rights_copyright, + :rights_copyright_audio, + :rights_copyright_cover, :rights_copyright_illustrations, :rights_copyright_photographs, - :rights_copyright_preparation, - :rights_copyright_digitization, - :rights_copyright_audio, + :rights_copyright_text, + :rights_copyright_translation, :rights_copyright_video, :rights_license, :identifier_oclc, @@ -815,74 +848,66 @@ either: #+name: sqlite_insert_metadata_and_src_text #+BEGIN_SRC d +// insert_metadata.bind(":tid", ); // unique identifier you could try use sha of title author & language? or auto increment?? insert_metadata.bind(":title", doc_matters.conf_make_meta.meta.title_full); -// insert_metadata.bind(":title_main", doc_matters.conf_make_meta.meta.title_main); -// insert_metadata.bind(":title_sub", doc_matters.conf_make_meta.meta.title_subtitle); -// insert_metadata.bind(":title_short", doc_matters.conf_make_meta.meta.title_short); // -// insert_metadata.bind(":title_edition", doc_matters.conf_make_meta.meta.title_edition); -// insert_metadata.bind(":title_note", doc_matters.conf_make_meta.meta.title_note); -// insert_metadata.bind(":title_language", doc_matters.conf_make_meta.meta.title_language); +insert_metadata.bind(":title_main", doc_matters.conf_make_meta.meta.title_main); +insert_metadata.bind(":title_sub", doc_matters.conf_make_meta.meta.title_subtitle); +insert_metadata.bind(":title_short", doc_matters.conf_make_meta.meta.title_short); // +insert_metadata.bind(":title_edition", doc_matters.conf_make_meta.meta.title_edition); +insert_metadata.bind(":title_language", doc_matters.conf_make_meta.meta.title_language); // insert_metadata.bind(":title_language_char", doc_matters.conf_make_meta.meta.title_language_char); +insert_metadata.bind(":classify_dewey", doc_matters.conf_make_meta.meta.classify_dewey); +insert_metadata.bind(":classify_keywords", doc_matters.conf_make_meta.meta.classify_keywords); +insert_metadata.bind(":classify_loc", doc_matters.conf_make_meta.meta.classify_loc); +insert_metadata.bind(":classify_subject", doc_matters.conf_make_meta.meta.classify_subject); +insert_metadata.bind(":classify_topic_register", doc_matters.conf_make_meta.meta.classify_topic_register); insert_metadata.bind(":creator_author", doc_matters.conf_make_meta.meta.creator_author); -// insert_metadata.bind(":creator_author_hon", doc_matters.conf_make_meta.meta.creator_author_hon); -// insert_metadata.bind(":creator_author_nationality", doc_matters.conf_make_meta.meta.creator_author_nationality); +insert_metadata.bind(":creator_author_email", doc_matters.conf_make_meta.meta.creator_author_email); // insert_metadata.bind(":creator_editor", doc_matters.conf_make_meta.meta.creator_editor); // insert_metadata.bind(":creator_contributor", doc_matters.conf_make_meta.meta.creator_contributor); -// insert_metadata.bind(":creator_illustrator", doc_matters.conf_make_meta.meta.creator_illustrator); +insert_metadata.bind(":creator_illustrator", doc_matters.conf_make_meta.meta.creator_illustrator); // insert_metadata.bind(":creator_photographer", doc_matters.conf_make_meta.meta.creator_photographer); -// insert_metadata.bind(":creator_translator", doc_matters.conf_make_meta.meta.creator_translator); -// insert_metadata.bind(":creator_prepared_by", doc_matters.conf_make_meta.meta.creator_prepared_by); -// insert_metadata.bind(":creator_digitized_by", doc_matters.conf_make_meta.meta.creator_digitized_by); +insert_metadata.bind(":creator_translator", doc_matters.conf_make_meta.meta.creator_translator); // insert_metadata.bind(":creator_audio", doc_matters.conf_make_meta.meta.creator_audio); // insert_metadata.bind(":creator_video", doc_matters.conf_make_meta.meta.creator_video); -// insert_metadata.bind(":language_document", doc_matters.conf_make_meta.meta.language_document); -// insert_metadata.bind(":language_document_char", doc_matters.conf_make_meta.meta.language_document_char); -// insert_metadata.bind(":language_original", doc_matters.conf_make_meta.meta.language_original); -// insert_metadata.bind(":language_original_char", doc_matters.conf_make_meta.meta.language_original_char); -// insert_metadata.bind(":date_added_to_site", doc_matters.conf_make_meta.meta.date_added_to_site); -// insert_metadata.bind(":date_available", doc_matters.conf_make_meta.meta.date_available); -// insert_metadata.bind(":date_created", doc_matters.conf_make_meta.meta.date_created); -// insert_metadata.bind(":date_issued", doc_matters.conf_make_meta.meta.date_issued); -// insert_metadata.bind(":date_modified", doc_matters.conf_make_meta.meta.date_modified); -// insert_metadata.bind(":date_published", doc_matters.conf_make_meta.meta.date_published); -// insert_metadata.bind(":date_valid", doc_matters.conf_make_meta.meta.date_valid); +insert_metadata.bind(":date_added_to_site", doc_matters.conf_make_meta.meta.date_added_to_site); +insert_metadata.bind(":date_available", doc_matters.conf_make_meta.meta.date_available); +insert_metadata.bind(":date_created", doc_matters.conf_make_meta.meta.date_created); +insert_metadata.bind(":date_issued", doc_matters.conf_make_meta.meta.date_issued); +insert_metadata.bind(":date_modified", doc_matters.conf_make_meta.meta.date_modified); +insert_metadata.bind(":date_published", doc_matters.conf_make_meta.meta.date_published); +insert_metadata.bind(":date_valid", doc_matters.conf_make_meta.meta.date_valid); // insert_metadata.bind(":date_translated", doc_matters.conf_make_meta.meta.date_translated); // insert_metadata.bind(":date_original_publication", doc_matters.conf_make_meta.meta.date_original_publication); // insert_metadata.bind(":date_generated", doc_matters.conf_make_meta.meta.date_generated); -// insert_metadata.bind(":publisher", doc_matters.conf_make_meta.meta.publisher)); -// insert_metadata.bind(":original_publisher", doc_matters.conf_make_meta.meta.original_publisher); -// insert_metadata.bind(":original_language", doc_matters.conf_make_meta.meta.original_language); -// insert_metadata.bind(":original_language_char", doc_matters.conf_make_meta.meta.original_language_char); -// insert_metadata.bind(":original_source", doc_matters.conf_make_meta.meta.original_source); -// insert_metadata.bind(":original_institution", doc_matters.conf_make_meta.meta.original_institution); -// insert_metadata.bind(":original_nationality", doc_matters.conf_make_meta.meta.original_nationality); +insert_metadata.bind(":identifier_isbn", doc_matters.conf_make_meta.meta.identifier_isbn); +insert_metadata.bind(":identifier_oclc", doc_matters.conf_make_meta.meta.identifier_oclc); +insert_metadata.bind(":language_document", doc_matters.conf_make_meta.meta.language_document); +insert_metadata.bind(":language_document_char", doc_matters.conf_make_meta.meta.language_document_char); +// insert_metadata.bind(":language_original", doc_matters.conf_make_meta.meta.language_original); +// insert_metadata.bind(":language_original_char", doc_matters.conf_make_meta.meta.language_original_char); +insert_metadata.bind(":notes_abstract", doc_matters.conf_make_meta.meta.notes_abstract); +insert_metadata.bind(":notes_description", doc_matters.conf_make_meta.meta.notes_description); +insert_metadata.bind(":original_publisher", doc_matters.conf_make_meta.meta.original_publisher); +insert_metadata.bind(":original_language", doc_matters.conf_make_meta.meta.original_language); +insert_metadata.bind(":original_language_char", doc_matters.conf_make_meta.meta.original_language_char); +insert_metadata.bind(":original_source", doc_matters.conf_make_meta.meta.original_source); +insert_metadata.bind(":original_title", doc_matters.conf_make_meta.meta.original_title); +insert_metadata.bind(":publisher", doc_matters.conf_make_meta.meta.publisher); // insert_metadata.bind(":rights", doc_matters.conf_make_meta.meta.rights); -// insert_metadata.bind(":rights_copyright_text", doc_matters.conf_make_meta.meta.rights_copyright_text); -// insert_metadata.bind(":rights_copyright_translation", doc_matters.conf_make_meta.meta.rights_copyright_translation); -// insert_metadata.bind(":rights_copyright_illustrations", doc_matters.conf_make_meta.meta.rights_illustrations); -// insert_metadata.bind(":rights_copyright_photographs", doc_matters.conf_make_meta.meta.rights_photographs); +insert_metadata.bind(":rights_copyright", doc_matters.conf_make_meta.meta.rights_copyright); +insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_copyright_audio); +insert_metadata.bind(":rights_copyright_cover", doc_matters.conf_make_meta.meta.rights_copyright_cover); +insert_metadata.bind(":rights_copyright_illustrations", doc_matters.conf_make_meta.meta.rights_copyright_illustrations); +insert_metadata.bind(":rights_copyright_photographs", doc_matters.conf_make_meta.meta.rights_copyright_photographs); // insert_metadata.bind(":rights_copyright_preparation", doc_matters.conf_make_meta.meta.rights_preparation); +insert_metadata.bind(":rights_copyright_text", doc_matters.conf_make_meta.meta.rights_copyright_text); +insert_metadata.bind(":rights_copyright_translation", doc_matters.conf_make_meta.meta.rights_copyright_translation); +insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_copyright_video); // insert_metadata.bind(":rights_copyright_digitization", doc_matters.conf_make_meta.meta.rights_digitization); // insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_audio); // insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_video); -// insert_metadata.bind(":rights_license", doc_matters.conf_make_meta.meta.rights_license); -// insert_metadata.bind(":identifier_oclc", doc_matters.conf_make_meta.meta.identifier_oclc); -// insert_metadata.bind(":identifier_isbn", doc_matters.conf_make_meta.meta.identifier_isbn); -// insert_metadata.bind(":classify_topic_register", doc_matters.conf_make_meta.meta.classify_topic_register); -// insert_metadata.bind(":classify_subject", doc_matters.conf_make_meta.meta.classify_subject); -// insert_metadata.bind(":classify_loc", doc_matters.conf_make_meta.meta.classify_loc); -// insert_metadata.bind(":notes_abstract", doc_matters.conf_make_meta.meta.notes_abstract); -// insert_metadata.bind(":notes_description", doc_matters.conf_make_meta.meta.notes_description); -// insert_metadata.bind(":notes_comment", doc_matters.conf_make_meta.meta.notes_comment); -// insert_metadata.bind(":notes_coverage", doc_matters.conf_make_meta.meta.notes_coverage); -// insert_metadata.bind(":notes_relation", doc_matters.conf_make_meta.meta.notes_relation); -// insert_metadata.bind(":notes_history", doc_matters.conf_make_meta.meta.notes_history); -// insert_metadata.bind(":notes_type", doc_matters.conf_make_meta.meta.notes_type); -// insert_metadata.bind(":notes_format", doc_matters.conf_make_meta.meta.notes_format); -// insert_metadata.bind(":notes_prefix", doc_matters.conf_make_meta.meta.notes_prefix); -// insert_metadata.bind(":notes_prefix_a", doc_matters.conf_make_meta.meta.notes_prefix_a); -// insert_metadata.bind(":notes_prefix_b", doc_matters.conf_make_meta.meta.notes_prefix_b); -// insert_metadata.bind(":notes_suffix", doc_matters.conf_make_meta.meta.notes_suffix); +insert_metadata.bind(":rights_license", doc_matters.conf_make_meta.meta.rights_license); // insert_metadata.bind(":src_filename", doc_matters.conf_make_meta.meta.src_filename); // insert_metadata.bind(":src_fingerprint", doc_matters.conf_make_meta.meta.src_fingerprint); // insert_metadata.bind(":src_filesize", doc_matters.conf_make_meta.meta.src_filesize); diff --git a/org/output_sqlite_discrete.org b/org/output_sqlite_discrete.org new file mode 100644 index 0000000..41096b5 --- /dev/null +++ b/org/output_sqlite_discrete.org @@ -0,0 +1,1437 @@ +#+TITLE: sdp output sqlite discrete +#+AUTHOR: Ralph Amissah +#+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] +#+DESCRIPTION: documents - structuring, publishing in multiple formats & search +#+KEYWORDS +#+LANGUAGE: en +#+STARTUP: indent content +#+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t +#+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc +#+OPTIONS: author:nil email:nil creator:nil timestamp:nil +#+PROPERTY: header-args :padline no :exports code :noweb yes +#+EXPORT_SELECT_TAGS: export +#+EXPORT_EXCLUDE_TAGS: noexport +#+FILETAGS: :sdp:niu:output: +#+TAGS: assert(a) class(c) debug(d) mixin(m) sdp(s) tangle(T) template(t) WEB(W) noexport(n) + +* sql +** 0. module, templates + +#+BEGIN_SRC d :tangle ../src/sdp/output/sqlite_discrete.d +module sdp.output.sqlite_discrete; +<> +template SQLiteDiscreteBuildTablesAndPopulate() { + void SQLiteDiscreteBuildTablesAndPopulate(D,I)( + auto ref const D doc_abstraction, + auto ref I doc_matters, + ) { + import d2sqlite3; + import std.typecons : Nullable; + mixin SiSUoutputRgxInit; + static auto rgx = Rgx(); + template SQLiteFormatAndLoadObject() { + auto SQLiteFormatAndLoadObject(I)( + auto ref I doc_matters, + ) { + mixin SiSUoutputRgxInit; + struct sqlite_format_and_load_objects { + <> + <> + <> + <> + <> + } + return sqlite_format_and_load_objects(); + } + } + template SQLiteInstruct() { + Statement SQLiteInstruct(I)( + auto ref I doc_matters, + ) { + <> + <> + <> + /* */<> + <> + <> + <> + //<> + <> + } + } + template SQLiteObjectsLoop() { + void SQLiteObjectsLoop(P)( + auto ref P doc_parts, + ) { + <> + } + } + SQLiteObjectsLoop!()(doc_matters.keys_seq.sql); + } +} +template SQLiteDiscreteTablesCreate() { + void SQLiteDiscreteTablesCreate()() { + writeln("table create"); + } +} +template SQLiteDiscreteTablesDrop() { + void SQLiteDiscreteTablesDrop()() { + writeln("table drop"); + } +} +#+END_SRC + +** 1. imports + +#+name: output_imports +#+BEGIN_SRC d +import sdp.output; +import + std.file, + std.conv : to; +#+END_SRC + +** 2. format and load template +*** 1. prepare objects (munge, sanitize, markup) +**** 1. _text_ generic munge (sanitize text for search) + +- [3/4] (search text, applies to all but code blocks) + - [ ] remove whitespace, paragraph on single line (formatting kept so far) + - [X] remove font face attributes + - [X] move embedded endnotes + - [X] place after text object + - [X] remove embedded endnote numbers (rely on html output to represent) + - [X] urls + - [X] clean url markers, leave plain link text + - [X] place urls after text object and its endnotes + +#+name: sanitize_text_for_search +#+BEGIN_SRC d +auto generic_munge_sanitize_text_for_search( + string _txt, +) { + string _notes; + string _urls; + _txt = _txt.replaceAll(rgx.inline_fontface_clean, ""); + if (_txt.matchFirst(rgx.inline_notes_al_gen)) { + foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { + _notes ~= "\n" ~ m["text"]; + } + _txt = _txt.replaceAll(rgx.inline_notes_al_gen, ""); + } + if (_txt.matchFirst(rgx.inline_link)) { + foreach (m; _txt.matchAll(rgx.inline_link)) { + if (m["link"].match(rgx.url)) { + _urls ~= "\n" ~ m["link"]; + } + } + _txt = _txt.replaceAll(rgx.inline_link_clean, ""); + } + if (_notes.length > 0) { + _txt ~= _notes; + } + if (_urls.length > 0) { + _txt ~= _urls; + } + if (doc_matters.opt_action["debug"]) { + writeln(_txt, "\n"); + } + debug(sql_text_clean) { + writeln(_txt); + } + return _txt; +} +#+END_SRC + +**** 2. _html_ + +light html objects +- place endnotes after text object +- light inline html markup + +***** munge +****** general munge (special characters, inline markup, move notes) + +#+name: sanitize_and_mmunge_inline_html +#+BEGIN_SRC d +auto munge_html(O)( + auto return ref const O obj, +) { + string _html_special_characters(string _txt){ + _txt = (_txt) + .replaceAll(rgx.xhtml_ampersand, "&") + .replaceAll(rgx.xhtml_quotation, """) + .replaceAll(rgx.xhtml_less_than, "<") + .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx.nbsp_char, " ") + .replaceAll(rgx.xhtml_line_break, "
"); + return _txt; + } + string _html_font_face(string _txt){ + _txt = (_txt) + .replaceAll(rgx.inline_emphasis, ("$1")) + .replaceAll(rgx.inline_bold, ("$1")) + .replaceAll(rgx.inline_underscore, ("$1")) + .replaceAll(rgx.inline_italics, ("$1")) + .replaceAll(rgx.inline_superscript, ("$1")) + .replaceAll(rgx.inline_subscript, ("$1")) + .replaceAll(rgx.inline_strike, ("$1")) + .replaceAll(rgx.inline_insert, ("$1")) + .replaceAll(rgx.inline_mono, ("$1")) + .replaceAll(rgx.inline_cite, ("$1")); + return _txt; + } + string _notes; + string _urls; + string _txt = _html_font_face(_html_special_characters(obj.text)); + if (_txt.matchFirst(rgx.inline_notes_al_gen)) { + foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { + _notes ~= "\n" ~ m["text"]; + } + _txt = _txt.replaceAll(rgx.inline_notes_al_gen_ref, "$1 "); + } + if (_txt.matchFirst(rgx.inline_link)) { + foreach (m; _txt.matchAll(rgx.inline_link)) { + } + _txt = _txt.replaceAll(rgx.inline_link_clean, ""); + } + if (_notes.length > 0) { + _txt ~= _notes; + } + if (doc_matters.opt_action["debug"]) { + writeln(_txt, "\n"); + } + return _txt; +} +#+END_SRC + +****** special characters + +#+name: sanitize_and_mmunge_inline_html +#+BEGIN_SRC d +string html_special_characters(string _txt){ + _txt = (_txt) + .replaceAll(rgx.xhtml_ampersand, "&") + .replaceAll(rgx.xhtml_quotation, """) + .replaceAll(rgx.xhtml_less_than, "<") + .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx.nbsp_char, " ") + .replaceAll(rgx.xhtml_line_break, "
"); + return _txt; +} +#+END_SRC + +****** special characters for code + +#+name: sanitize_and_mmunge_inline_html +#+BEGIN_SRC d +string html_special_characters_code(string _txt){ + _txt = (_txt) + .replaceAll(rgx.xhtml_ampersand, "&") + .replaceAll(rgx.xhtml_quotation, """) + .replaceAll(rgx.xhtml_less_than, "<") + .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx.nbsp_char, " "); + return _txt; +} +#+END_SRC + +****** font_face + +#+name: sanitize_and_mmunge_inline_html +#+BEGIN_SRC d +string html_font_face(string _txt){ + _txt = (_txt) + .replaceAll(rgx.inline_emphasis, ("$1")) + .replaceAll(rgx.inline_bold, ("$1")) + .replaceAll(rgx.inline_underscore, ("$1")) + .replaceAll(rgx.inline_italics, ("$1")) + .replaceAll(rgx.inline_superscript, ("$1")) + .replaceAll(rgx.inline_subscript, ("$1")) + .replaceAll(rgx.inline_strike, ("$1")) + .replaceAll(rgx.inline_insert, ("$1")) + .replaceAll(rgx.inline_mono, ("$1")) + .replaceAll(rgx.inline_cite, ("$1")); + return _txt; +} +#+END_SRC + +***** objects +****** heading + +#+name: html_objects +#+BEGIN_SRC d +auto html_heading(O)( + auto return ref const O obj, +) { + string _txt = munge_html(obj); + string o = format(q"¶

+ %s +

¶", + obj.is_a, + _txt, + ); + return o; +} +#+END_SRC + +******* +fancy+ + +##+name: prepare_objects_html +#+BEGIN_SRC d +auto html_heading(O)( + auto return ref const O obj, +) { + string o; + string _txt = munge_html(obj); + o = format(q"¶ + %s + ¶", + obj.heading_lev_markup, + obj.is_a, + _txt, + obj.heading_lev_markup, + ); + return o; +} +#+END_SRC + +****** para + +#+name: html_objects +#+BEGIN_SRC d +auto html_para(O)( + auto return ref const O obj, +) { + string _txt = munge_html(obj); + _txt = (obj.bullet) ? ("●  " ~ _txt) : _txt; + string o = format(q"¶

+ %s +

¶", + obj.is_a, + obj.indent_hang, + obj.indent_base, + _txt + ); + return o; +} +#+END_SRC + +****** quote + +#+name: html_objects +#+BEGIN_SRC d +auto html_quote(O)( + auto return ref const O obj, +) { + string _txt = munge_html(obj); + string o = format(q"¶

+ %s +

¶", + obj.is_a, + _txt + ); + return o; +} +#+END_SRC + +****** group + +#+name: html_objects +#+BEGIN_SRC d +auto html_group(O)( + auto return ref const O obj, +) { + string _txt = munge_html(obj); + string o = format(q"¶

+ %s +

¶", + obj.is_a, + _txt + ); + return o; +} +#+END_SRC + +****** block + +#+name: html_objects +#+BEGIN_SRC d +auto html_block(O)( + auto return ref const O obj, +) { + string _txt = munge_html(obj); + string o = format(q"¶ +

%s

¶", + obj.is_a, + _txt.stripRight + ); + return o; +} +#+END_SRC + +****** verse + +#+name: html_objects +#+BEGIN_SRC d +auto html_verse(O)( + auto return ref const O obj, +) { + string _txt = munge_html(obj); + string o = format(q"¶

%s

¶", + obj.is_a, + _txt + ); + return o; +} +#+END_SRC + +****** code + +#+name: html_objects +#+BEGIN_SRC d +auto html_code(O)( + auto return ref const O obj, +) { + string _txt = html_special_characters_code(obj.text); + string o = format(q"¶

%s

¶", + obj.is_a, + _txt + ); + return o; +} +#+END_SRC + +****** table + +#+name: html_objects +#+BEGIN_SRC d +auto html_table(O)( + auto return ref const O obj, +) { + auto _tablarize(O)( + auto return ref const O obj, + string _txt, + ) { + string[] _table_rows = (_txt).split(rgx.table_delimiter_row); + string[] _table_cols; + string _table; + string _tablenote; + foreach(row_idx, row; _table_rows) { + _table_cols = row.split(rgx.table_delimiter_col); + _table ~= ""; + foreach(col_idx, cell; _table_cols) { + if ((_table_cols.length == 1) + && (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx) + _tablenote ~= cell; + } else { + string _col_is = (row_idx == 0 && obj.table_heading) ? "th" : "td"; + string _align = ("style=\"text-align:" + ~ ((obj.table_column_aligns[col_idx] == "l") + ? "left\"" : "right\"")); + _table ~= "<" ~ _col_is ~ " width=\"" ~ obj.table_column_widths[col_idx].to!string ~ "%\" " ~ _align ~ ">"; + _table ~= cell; + _table ~= ""; + } + } + _table ~= ""; + } + auto t = tuple( + _table, + _tablenote, + ); + return t; + } + string _txt = munge_html(obj); + auto t = _tablarize(obj, _txt); + _txt = t[0]; + string _note = t[1]; + string o = format(q"¶

+ + %s +
+ %s +

¶", + obj.is_a, + _txt, + _note + ); + return o; +} +#+END_SRC + +*** 2. _sqlite_ (loop preparation & pre-loop action) +***** initialize: prepare statements drop & create db & tables anew +****** d sqlite statements (initialize database) { + +#+name: sqlite_db_initialize +#+BEGIN_SRC d +auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.src_path_info, doc_matters.language); +auto db = Database(pth_sqlite.sqlite_file(doc_matters.source_filename)); +// auto db = Database(":memory:"); // open database in memory +db.run(" +#+END_SRC + +******* 0. drop table if exists + +#+name: sqlite_table_initialize +#+BEGIN_SRC sql +DROP TABLE IF EXISTS metadata_and_text; +DROP TABLE IF EXISTS doc_objects; +DROP TABLE IF EXISTS urls; +#+END_SRC + +******* 1. create tables +******** metadata & src text + +#+name: sqlite_create_table_metadata_and_src_txt +#+BEGIN_SRC sql +CREATE TABLE metadata_and_text ( + tid BIGINT PRIMARY KEY, + title VARCHAR(800) NOT NULL, + title_main VARCHAR(400) NULL, /*NOT*/ + title_sub VARCHAR(400) NULL, + title_short VARCHAR(400) NULL, + title_edition VARCHAR(10) NULL, + title_note VARCHAR(2500) NULL, + title_language VARCHAR(100) NULL, + title_language_char VARCHAR(6) NULL, + creator_author VARCHAR(600) NULL, + creator_author_email VARCHAR(100) NULL, + creator_author_hon VARCHAR(100) NULL, + creator_author_nationality VARCHAR(100) NULL, + creator_editor VARCHAR(600) NULL, + creator_contributor VARCHAR(600) NULL, + creator_illustrator VARCHAR(600) NULL, + creator_photographer VARCHAR(600) NULL, + creator_translator VARCHAR(600) NULL, + creator_prepared_by VARCHAR(600) NULL, + creator_digitized_by VARCHAR(600) NULL, + creator_audio VARCHAR(600) NULL, + creator_video VARCHAR(600) NULL, + language_document VARCHAR(100) NULL, + language_document_char VARCHAR(6) NULL, /*NOT*/ + language_original VARCHAR(100) NULL, + language_original_char VARCHAR(6) NULL, + date_added_to_site VARCHAR(10) NULL, + date_available VARCHAR(10) NULL, + date_created VARCHAR(10) NULL, + date_issued VARCHAR(10) NULL, + date_modified VARCHAR(10) NULL, + date_published VARCHAR(10) NULL, + date_valid VARCHAR(10) NULL, + date_translated VARCHAR(10) NULL, + date_original_publication VARCHAR(10) NULL, + date_generated VARCHAR(10) NULL, + publisher VARCHAR(600) NULL, + original_publisher VARCHAR(600) NULL, + original_language VARCHAR(100) NULL, + original_language_char VARCHAR(6) NULL, + original_source VARCHAR(600) NULL, + original_institution VARCHAR(600) NULL, + original_nationality VARCHAR(100) NULL, + original_title VARCHAR(800) NULL, + rights_copyright VARCHAR(2500) NULL, + rights_copyright_audio VARCHAR(2500) NULL, + rights_copyright_cover VARCHAR(2500) NULL, + rights_copyright_illustrations VARCHAR(2500) NULL, + rights_copyright_photographs VARCHAR(2500) NULL, + rights_copyright_text VARCHAR(2500) NULL, + rights_copyright_translation VARCHAR(2500) NULL, + rights_copyright_video VARCHAR(2500) NULL, + rights_license VARCHAR(2500) NULL, + identifier_oclc VARCHAR(30) NULL, + identifier_isbn VARCHAR(16) NULL, + classify_topic_register VARCHAR(2500) NULL, + classify_subject VARCHAR(600) NULL, + classify_loc VARCHAR(30) NULL, + classify_dewey VARCHAR(30) NULL, + classify_keywords VARCHAR(600) NULL, + notes_abstract TEXT NULL, + notes_description TEXT NULL, + notes_comment TEXT NULL, + notes_coverage VARCHAR(200) NULL, + notes_relation VARCHAR(200) NULL, + notes_history VARCHAR(600) NULL, + notes_type VARCHAR(600) NULL, + notes_format VARCHAR(600) NULL, + notes_prefix TEXT NULL, + notes_prefix_a TEXT NULL, + notes_prefix_b TEXT NULL, + notes_suffix TEXT NULL, + src_filename VARCHAR(256) NULL, + src_fingerprint VARCHAR(256) NULL, + src_filesize VARCHAR(10) NULL, + src_wordcount VARCHAR(10) NULL, + src_text TEXT NULL, + fulltext TEXT NULL, + links TEXT NULL +); +#+END_SRC + +******** doc_objects + +#+name: sqlite_create_table_objects +#+BEGIN_SRC sql +CREATE TABLE doc_objects ( + lid BIGINT PRIMARY KEY, + metadata_tid BIGINT REFERENCES metadata_and_text, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + clean TEXT NULL, + body TEXT NULL, + book_idx TEXT NULL, + seg VARCHAR(256) NULL, + lev_an VARCHAR(1), + lev SMALLINT NULL, + lev0 SMALLINT, + lev1 SMALLINT, + lev2 SMALLINT, + lev3 SMALLINT, + lev4 SMALLINT, + lev5 SMALLINT, + lev6 SMALLINT, + lev7 SMALLINT, + en_a SMALLINT NULL, + en_z SMALLINT NULL, + en_a_asterisk SMALLINT NULL, + en_z_asterisk SMALLINT NULL, + en_a_plus SMALLINT NULL, + en_z_plus SMALLINT NULL, + t_of VARCHAR(16), + t_is VARCHAR(16), + node VARCHAR(16) NULL, + parent VARCHAR(16) NULL, + digest_clean CHAR(256), + digest_all CHAR(256), + types CHAR(1) NULL +) +#+END_SRC + +****** d } + +#+name: sqlite_close_initialize +#+BEGIN_SRC d +"); +#+END_SRC + +***** prepare insert statements and do pre-loop inserts +****** 1. _metadata & src_ text +******* sql statement +******** d { sql statement + +#+name: sqlite_insert_metadata_and_src_text +#+BEGIN_SRC d +Statement insert_metadata = db.prepare(" +#+END_SRC + +tid document number unique +either: +- increment by adding 1 for each document, +- make hash of document filename or url and use? + +********* sql insert into + +#+name: sqlite_insert_metadata_and_src_text +#+BEGIN_SRC sql + INSERT INTO metadata_and_text ( + tid, + title, + title_main, + title_sub, + title_short, + title_edition, + title_note, + title_language, + title_language_char, + creator_author, + creator_author_email, + creator_author_hon, + creator_author_nationality, + creator_editor, + creator_contributor, + creator_illustrator, + creator_photographer, + creator_translator, + creator_prepared_by, + creator_digitized_by, + creator_audio, + creator_video, + language_document, + language_document_char, + language_original, + language_original_char, + date_added_to_site, + date_available, + date_created, + date_issued, + date_modified, + date_published, + date_valid, + date_translated, + date_original_publication, + date_generated, + publisher, + original_publisher, + original_language, + original_language_char, + original_source, + original_institution, + original_nationality, + original_title, + rights_copyright, + rights_copyright_audio, + rights_copyright_cover, + rights_copyright_illustrations, + rights_copyright_photographs, + rights_copyright_text, + rights_copyright_translation, + rights_copyright_video, + rights_license, + identifier_oclc, + identifier_isbn, + classify_topic_register, + classify_subject, + classify_loc, + classify_dewey, + classify_keywords, + notes_abstract, + notes_description, + notes_comment, + notes_coverage, + notes_relation, + notes_history, + notes_type, + notes_format, + notes_prefix, + notes_prefix_a, + notes_prefix_b, + notes_suffix, + src_filename, + src_fingerprint, + src_filesize, + src_wordcount, + src_text, + fulltext, + links + ) +#+END_SRC + +********* sql values + +#+name: sqlite_insert_metadata_and_src_text +#+BEGIN_SRC sql + VALUES ( + :tid, + :title, + :title_main, + :title_sub, + :title_short, + :title_edition, + :title_note, + :title_language, + :title_language_char, + :creator_author, + :creator_author_email, + :creator_author_hon, + :creator_author_nationality, + :creator_editor, + :creator_contributor, + :creator_illustrator, + :creator_photographer, + :creator_translator, + :creator_prepared_by, + :creator_digitized_by, + :creator_audio, + :creator_video, + :language_document, + :language_document_char, + :language_original, + :language_original_char, + :date_added_to_site, + :date_available, + :date_created, + :date_issued, + :date_modified, + :date_published, + :date_valid, + :date_translated, + :date_original_publication, + :date_generated, + :publisher, + :original_publisher, + :original_language, + :original_language_char, + :original_source, + :original_institution, + :original_nationality, + :original_title, + :rights_copyright, + :rights_copyright_audio, + :rights_copyright_cover, + :rights_copyright_illustrations, + :rights_copyright_photographs, + :rights_copyright_text, + :rights_copyright_translation, + :rights_copyright_video, + :rights_license, + :identifier_oclc, + :identifier_isbn, + :classify_topic_register, + :classify_subject, + :classify_loc, + :classify_dewey, + :classify_keywords, + :notes_abstract, + :notes_description, + :notes_comment, + :notes_coverage, + :notes_relation, + :notes_history, + :notes_type, + :notes_format, + :notes_prefix, + :notes_prefix_a, + :notes_prefix_b, + :notes_suffix, + :src_filename, + :src_fingerprint, + :src_filesize, + :src_wordcount, + :src_text, + :fulltext, + :links + ) +#+END_SRC + +******** d } + +#+name: sqlite_insert_metadata_and_src_text +#+BEGIN_SRC d +"); +#+END_SRC + +******* TODO insert document_metadata + +#+name: sqlite_insert_metadata_and_src_text +#+BEGIN_SRC d +// insert_metadata.bind(":tid", ); // unique identifier you could try use sha of title author & language? or auto increment?? +insert_metadata.bind(":title", doc_matters.conf_make_meta.meta.title_full); +insert_metadata.bind(":title_main", doc_matters.conf_make_meta.meta.title_main); +insert_metadata.bind(":title_sub", doc_matters.conf_make_meta.meta.title_subtitle); +insert_metadata.bind(":title_short", doc_matters.conf_make_meta.meta.title_short); // +insert_metadata.bind(":title_edition", doc_matters.conf_make_meta.meta.title_edition); +insert_metadata.bind(":title_language", doc_matters.conf_make_meta.meta.title_language); +// insert_metadata.bind(":title_language_char", doc_matters.conf_make_meta.meta.title_language_char); +insert_metadata.bind(":classify_dewey", doc_matters.conf_make_meta.meta.classify_dewey); +insert_metadata.bind(":classify_keywords", doc_matters.conf_make_meta.meta.classify_keywords); +insert_metadata.bind(":classify_loc", doc_matters.conf_make_meta.meta.classify_loc); +insert_metadata.bind(":classify_subject", doc_matters.conf_make_meta.meta.classify_subject); +insert_metadata.bind(":classify_topic_register", doc_matters.conf_make_meta.meta.classify_topic_register); +insert_metadata.bind(":creator_author", doc_matters.conf_make_meta.meta.creator_author); +insert_metadata.bind(":creator_author_email", doc_matters.conf_make_meta.meta.creator_author_email); +// insert_metadata.bind(":creator_editor", doc_matters.conf_make_meta.meta.creator_editor); +// insert_metadata.bind(":creator_contributor", doc_matters.conf_make_meta.meta.creator_contributor); +insert_metadata.bind(":creator_illustrator", doc_matters.conf_make_meta.meta.creator_illustrator); +// insert_metadata.bind(":creator_photographer", doc_matters.conf_make_meta.meta.creator_photographer); +insert_metadata.bind(":creator_translator", doc_matters.conf_make_meta.meta.creator_translator); +// insert_metadata.bind(":creator_audio", doc_matters.conf_make_meta.meta.creator_audio); +// insert_metadata.bind(":creator_video", doc_matters.conf_make_meta.meta.creator_video); +insert_metadata.bind(":date_added_to_site", doc_matters.conf_make_meta.meta.date_added_to_site); +insert_metadata.bind(":date_available", doc_matters.conf_make_meta.meta.date_available); +insert_metadata.bind(":date_created", doc_matters.conf_make_meta.meta.date_created); +insert_metadata.bind(":date_issued", doc_matters.conf_make_meta.meta.date_issued); +insert_metadata.bind(":date_modified", doc_matters.conf_make_meta.meta.date_modified); +insert_metadata.bind(":date_published", doc_matters.conf_make_meta.meta.date_published); +insert_metadata.bind(":date_valid", doc_matters.conf_make_meta.meta.date_valid); +// insert_metadata.bind(":date_translated", doc_matters.conf_make_meta.meta.date_translated); +// insert_metadata.bind(":date_original_publication", doc_matters.conf_make_meta.meta.date_original_publication); +// insert_metadata.bind(":date_generated", doc_matters.conf_make_meta.meta.date_generated); +insert_metadata.bind(":identifier_isbn", doc_matters.conf_make_meta.meta.identifier_isbn); +insert_metadata.bind(":identifier_oclc", doc_matters.conf_make_meta.meta.identifier_oclc); +insert_metadata.bind(":language_document", doc_matters.conf_make_meta.meta.language_document); +insert_metadata.bind(":language_document_char", doc_matters.conf_make_meta.meta.language_document_char); +// insert_metadata.bind(":language_original", doc_matters.conf_make_meta.meta.language_original); +// insert_metadata.bind(":language_original_char", doc_matters.conf_make_meta.meta.language_original_char); +insert_metadata.bind(":notes_abstract", doc_matters.conf_make_meta.meta.notes_abstract); +insert_metadata.bind(":notes_description", doc_matters.conf_make_meta.meta.notes_description); +insert_metadata.bind(":original_publisher", doc_matters.conf_make_meta.meta.original_publisher); +insert_metadata.bind(":original_language", doc_matters.conf_make_meta.meta.original_language); +insert_metadata.bind(":original_language_char", doc_matters.conf_make_meta.meta.original_language_char); +insert_metadata.bind(":original_source", doc_matters.conf_make_meta.meta.original_source); +insert_metadata.bind(":original_title", doc_matters.conf_make_meta.meta.original_title); +insert_metadata.bind(":publisher", doc_matters.conf_make_meta.meta.publisher); +// insert_metadata.bind(":rights", doc_matters.conf_make_meta.meta.rights); +insert_metadata.bind(":rights_copyright", doc_matters.conf_make_meta.meta.rights_copyright); +insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_copyright_audio); +insert_metadata.bind(":rights_copyright_cover", doc_matters.conf_make_meta.meta.rights_copyright_cover); +insert_metadata.bind(":rights_copyright_illustrations", doc_matters.conf_make_meta.meta.rights_copyright_illustrations); +insert_metadata.bind(":rights_copyright_photographs", doc_matters.conf_make_meta.meta.rights_copyright_photographs); +// insert_metadata.bind(":rights_copyright_preparation", doc_matters.conf_make_meta.meta.rights_preparation); +insert_metadata.bind(":rights_copyright_text", doc_matters.conf_make_meta.meta.rights_copyright_text); +insert_metadata.bind(":rights_copyright_translation", doc_matters.conf_make_meta.meta.rights_copyright_translation); +insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_copyright_video); +// insert_metadata.bind(":rights_copyright_digitization", doc_matters.conf_make_meta.meta.rights_digitization); +// insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_audio); +// insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_video); +insert_metadata.bind(":rights_license", doc_matters.conf_make_meta.meta.rights_license); +// insert_metadata.bind(":src_filename", doc_matters.conf_make_meta.meta.src_filename); +// insert_metadata.bind(":src_fingerprint", doc_matters.conf_make_meta.meta.src_fingerprint); +// insert_metadata.bind(":src_filesize", doc_matters.conf_make_meta.meta.src_filesize); +// insert_metadata.bind(":src_wordcount", doc_matters.conf_make_meta.meta.src_wordcount); +// insert_metadata.bind(":src_text", doc_matters.conf_make_meta.meta.src_text); +// insert_metadata.bind(":fulltext", doc_matters.conf_make_meta.meta.fulltext); +// insert_metadata.bind(":links", doc_matters.conf_make_meta.meta.links); +insert_metadata.execute(); insert_metadata.reset(); +/+ watch +/ +writeln("sql statement executed"); +assert(db.totalChanges == 1); +#+END_SRC + +###+name: sqlite_insert_metadata_and_src_text +#+BEGIN_SRC d +insert_metadata.inject( + 1, + doc_matters.dochead_meta["title"]["full"], + "", + "", + "", + "", + "", + "", + "", + doc_matters.dochead_meta["creator"]["author"] +); +#+END_SRC + +****** 2. _doc objects_ (used with doc_objects in document loop) +******* prepare sql statement +******** d { sql statement + +#+name: sqlite_insert_doc_objects +#+BEGIN_SRC d +Statement insert_doc_objects = db.prepare(" +#+END_SRC + +********* sql insert into + +lid unique, increment by 1 per object, not ocn + +metadata tid document number unique +either: +- increment by adding 1 for each document, +- make hash of document filename or url and use? + +#+name: sqlite_insert_doc_objects +#+BEGIN_SRC sql + INSERT INTO doc_objects ( + lid, + metadata_tid, + ocn, + ocnd, + ocns, + clean, + body, + book_idx, + seg, + lev_an, + lev, + lev0, + lev1, + lev2, + lev3, + lev4, + lev5, + lev6, + lev7, + en_a, + en_z, + en_a_asterisk, + en_z_asterisk, + en_a_plus, + en_z_plus, + t_of, + t_is, + node, + parent, + digest_clean, + digest_all, + types + ) +#+END_SRC + +********* sql values + +#+name: sqlite_insert_doc_objects +#+BEGIN_SRC sql + VALUES ( + :lid, + :metadata_tid, + :ocn, + :ocnd, + :ocns, + :clean, + :body, + :book_idx, + :seg, + :lev_an, + :lev, + :lev0, + :lev1, + :lev2, + :lev3, + :lev4, + :lev5, + :lev6, + :lev7, + :en_a, + :en_z, + :en_a_asterisk, + :en_z_asterisk, + :en_a_plus, + :en_z_plus, + :t_of, + :t_is, + :node, + :parent, + :digest_clean, + :digest_all, + :types + ) +#+END_SRC + +******** d } + +#+name: sqlite_insert_doc_objects +#+BEGIN_SRC d +"); +return insert_doc_objects; +#+END_SRC + +******* TODO (within loop not here - insert doc objects + +work out + +*** 3. hub (sqlite_format_and_load_objects) +***** sql related + +#+name: sqlite_load_object +#+BEGIN_SRC d +auto sqlite_load_string(O,Dm)( + auto return ref const O obj, + auto return ref Dm doc_matters, +) { + string o; + return o; +} +#+END_SRC + +#+name: sqlite_load_object +#+BEGIN_SRC d +auto postgresql_load_string(O,Dm)( + auto return ref const O obj, + auto return ref Dm doc_matters, +) { + string o; + return o; +} +#+END_SRC + +#+name: sqlite_load_object +#+BEGIN_SRC d +string sqlite_statement(O)( + auto return ref const O obj, + string _txt, + string _html, +) { + void _sql_exe(O)( + string _sql, + ) { + writeln(_html); + writeln(_sql); + } + string _sql; + return _sql; +} +#+END_SRC + +***** heading + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto heading(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_heading(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC + +***** para + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto para(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_para(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC + +***** quote + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto quote(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_quote(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC +***** group + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto group(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_group(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC + +***** block + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto block(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_block(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC + +***** verse + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto verse(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_verse(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC + +***** code + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto code(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_code(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC + +***** table + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto table(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_table(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC + +** 3. loop, identify, load - loop template + +#+name: sqlite_objects_loop +#+BEGIN_SRC d +Statement insert_doc_objects = SQLiteInstruct!()(doc_matters); +auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters); +string[string] obj_txt; +string doc_text; +foreach (part; doc_parts) { + foreach (obj; doc_abstraction[part]) { + switch (obj.of_part) { + case "frontmatter": assert(part == "head"); + switch (obj.is_of) { + case "para": + switch (obj.is_a) { + case "heading": + obj_txt = format_and_sqlite_load.heading(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_of); + } + break; + } + break; + case "body": // assert(part == "body"); // TODO broken + switch (obj.is_of) { + case "para": + switch (obj.is_a) { + case "heading": + debug (asserts) { // TODO consider and fix or remove + if (part != "body") { + writeln(__LINE__, ": ", obj.text); + } + } + obj_txt = format_and_sqlite_load.heading(obj); + break; + case "para": + obj_txt = format_and_sqlite_load.para(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + case "block": + switch (obj.is_a) { + case "quote": + obj_txt = format_and_sqlite_load.quote(obj); + break; + case "group": + obj_txt = format_and_sqlite_load.group(obj); + break; + case "block": + obj_txt = format_and_sqlite_load.block(obj); + break; + case "poem": // double check on keeping both poem & verse + break; + case "verse": + obj_txt = format_and_sqlite_load.verse(obj); + break; + case "code": + obj_txt = format_and_sqlite_load.code(obj); + break; + case "table": + obj_txt = format_and_sqlite_load.table(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_of); + } + break; + } + break; + case "backmatter": + assert(part == "endnotes" || "glossary" || "bibliography" || "bookindex_scroll" || "blurb" || "tail"); + switch (obj.is_of) { + case "para": + switch (obj.is_a) { + case "heading": + obj_txt = format_and_sqlite_load.heading(obj); + break; + case "endnote": assert(part == "endnotes"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "glossary": assert(part == "glossary"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "bibliography": assert(part == "bibliography"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "bookindex": assert(part == "bookindex_scroll"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "blurb": assert(part == "blurb"); + obj_txt = format_and_sqlite_load.para(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_of); + } + break; + } + break; + case "comment": + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.of_part); // check where empty value could come from + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + writeln(__FILE__, ":", __LINE__, ": ", obj.text); // check where empty value could come from + } + break; + } + if (obj.is_a == "heading") { + writeln( + "markup: ", obj.heading_lev_markup, + "> ", obj.dom_markedup, + "; collapsed: ", obj.heading_lev_collapsed, + "> ", obj.dom_collapsed, + "; ocn: ", obj.ocn, + " node: ", obj.node, + "; parent: ", obj.parent_lev_markup, + "; ocn: ", obj.parent_ocn, + "; ", + ); + } + insert_doc_objects.bind(":t_of", obj.is_of); + insert_doc_objects.bind(":t_is", obj.is_a); + insert_doc_objects.bind(":ocn", obj.ocn); + insert_doc_objects.bind(":clean", obj_txt["text"]); // consider whether book index info should be made available within clear text for search + insert_doc_objects.bind(":body", obj_txt["html"]); + // insert_doc_objects.bind(":book_idx", ""); // not needed, but, consider whether should be made available within object for clear text search + insert_doc_objects.bind(":lev", obj.heading_lev_markup); + // // insert_doc_objects.bind(":dom_markedup", ""); // should make lev sequence below obsolete + // // insert_doc_objects.bind(":dom_collapsed", ""); // should add info + // insert_doc_objects.bind(":lev0", ""); + // insert_doc_objects.bind(":lev1", ""); + // insert_doc_objects.bind(":lev2", ""); + // insert_doc_objects.bind(":lev3", ""); + // insert_doc_objects.bind(":lev4", ""); + // insert_doc_objects.bind(":lev5", ""); + // insert_doc_objects.bind(":lev6", ""); + // insert_doc_objects.bind(":lev7", ""); + // insert_doc_objects.bind(":node", ""); + // insert_doc_objects.bind(":type", ""); + // insert_doc_objects.bind(":parent_ocn", ""); + // insert_doc_objects.bind(":ancestors", ""); + // insert_doc_objects.bind(":heading_lev_markup", ""); + // insert_doc_objects.bind(":heading_lev_collapsed", ""); + // insert_doc_objects.bind(":parent_lev_markup", ""); + // insert_doc_objects.bind(":heading_ancestors", ""); + // insert_doc_objects.bind(":node", ""); + insert_doc_objects.execute(); insert_doc_objects.reset(); + } +} +#+END_SRC diff --git a/org/output_xmls.org b/org/output_xmls.org index 793827e..a24e1e2 100644 --- a/org/output_xmls.org +++ b/org/output_xmls.org @@ -2369,6 +2369,7 @@ void outputEPub3(D,I)( } } if (obj.is_a == "heading") { + assert(obj.text.length > 0); if (obj.heading_lev_markup <= 4) { oepbs_content_parts["manifest_documents"] ~= format(q"¶ diff --git a/org/sdp.org b/org/sdp.org index a37104e..74deb39 100644 --- a/org/sdp.org +++ b/org/sdp.org @@ -248,7 +248,8 @@ bool[string] opts = [ "qrcode" : false, "sisupod" : false, "source" : false, - "sqlite" : false, + "sqlite-discrete" : false, + "sqlite-update" : false, "sqlite-create" : false, "sqlite-drop" : false, "text" : false, @@ -287,9 +288,10 @@ auto helpInfo = getopt(args, "qrcode", "--qrcode with document metadata", &opts["qrcode"], "sisupod", "--sisupod sisupod source content bundled", &opts["sisupod"], "source", "--source markup source text content", &opts["source"], + "sqlite-discrete", "--sqlite process discrete sqlite output", &opts["sqlite-discrete"], "sqlite-create", "--sqlite-create create db, create tables", &opts["sqlite-create"], "sqlite-drop", "--sqlite-drop drop tables & db", &opts["sqlite-drop"], - "sqlite", "--sqlite process sqlite output", &opts["sqlite"], + "sqlite-update", "--sqlite process sqlite output", &opts["sqlite-update"], "text", "--text process text output", &opts["text"], "txt", "--txt process text output", &opts["text"], "verbose|v", "--verbose output to terminal", &opts["verbose"], diff --git a/src/sdp/meta/conf_make_meta_sdlang.d b/src/sdp/meta/conf_make_meta_sdlang.d index 2bded69..478cea4 100644 --- a/src/sdp/meta/conf_make_meta_sdlang.d +++ b/src/sdp/meta/conf_make_meta_sdlang.d @@ -161,6 +161,7 @@ static template SiSUextractSDLang() { } if ("original" in header_sdlang.maybe.tags) { _conf_composite.meta.original_language = extractSDLangTabOrAttrib(header_sdlang, "original", "language"); + _conf_composite.meta.original_language_char = extractSDLangTabOrAttrib(header_sdlang, "original", "language_char"); _conf_composite.meta.original_source = extractSDLangTabOrAttrib(header_sdlang, "original", "source"); _conf_composite.meta.original_title = extractSDLangTabOrAttrib(header_sdlang, "original", "title"); } @@ -168,10 +169,15 @@ static template SiSUextractSDLang() { // _conf_composite.meta.publisher = extractSDLangTabOrAttrib(header_sdlang, "publisher", ""); } if ("rights" in header_sdlang.maybe.tags) { - _conf_composite.meta.rights_copyright = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright"); - _conf_composite.meta.rights_cover = extractSDLangTabOrAttrib(header_sdlang, "rights", "cover"); - _conf_composite.meta.rights_illustrations = extractSDLangTabOrAttrib(header_sdlang, "rights", "illustrations"); - _conf_composite.meta.rights_license = extractSDLangTabOrAttrib(header_sdlang, "rights", "license"); + _conf_composite.meta.rights_copyright = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright"); + _conf_composite.meta.rights_copyright_text = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright_text"); + _conf_composite.meta.rights_copyright_audio = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright_audio"); + _conf_composite.meta.rights_copyright_cover = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright_cover"); + _conf_composite.meta.rights_copyright_illustrations = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright_illustrations"); + _conf_composite.meta.rights_copyright_photographs = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright_photographs"); + _conf_composite.meta.rights_copyright_translation = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright_translation"); + _conf_composite.meta.rights_copyright_video = extractSDLangTabOrAttrib(header_sdlang, "rights", "copyright_video"); + _conf_composite.meta.rights_license = extractSDLangTabOrAttrib(header_sdlang, "rights", "license"); } if (_conf_composite.meta.creator_author.empty) { if ("creator" in header_sdlang.maybe.tags) { diff --git a/src/sdp/meta/defaults.d b/src/sdp/meta/defaults.d index ede1523..e598aa8 100644 --- a/src/sdp/meta/defaults.d +++ b/src/sdp/meta/defaults.d @@ -23,109 +23,119 @@ template SiSUregisters() { std.utf, std.conv : to; struct ConfCompositeMake { - string bold = ""; - string breaks = ""; - string cover_image = ""; - string css = ""; - string emphasis = ""; - string footer = ""; - string headings = ""; - string home_button_image = ""; - string home_button_text = ""; - string italics = ""; - string num_top = ""; - string num_depth = ""; - string substitute = ""; - string texpdf_font = ""; + string bold = ""; + string breaks = ""; + string cover_image = ""; + string css = ""; + string emphasis = ""; + string footer = ""; + string headings = ""; + string home_button_image = ""; + string home_button_text = ""; + string italics = ""; + string num_top = ""; + string num_depth = ""; + string substitute = ""; + string texpdf_font = ""; } struct ConfCompositeMakeStr { - string bold = ""; - string breaks = ""; - string cover_image = ""; - string css = ""; - string emphasis = ""; - string footer = ""; - string headings = ""; - string home_button_image = ""; - string home_button_text = ""; - string italics = ""; - string num_top = ""; - string num_depth = ""; - string substitute = ""; - string texpdf_font = ""; + string bold = ""; + string breaks = ""; + string cover_image = ""; + string css = ""; + string emphasis = ""; + string footer = ""; + string headings = ""; + string home_button_image = ""; + string home_button_text = ""; + string italics = ""; + string num_top = ""; + string num_depth = ""; + string substitute = ""; + string texpdf_font = ""; } struct ConfCompositeSiteLocal { - string webserv_url_root = ""; - string webserv_path = ""; - string webserv_images = ""; - string webserv_cgi = ""; - string webserv_cgi_host = ""; - string webserv_cgi_host_path = ""; - string webserv_cgi_port = ""; - string webserv_cgi_user = ""; - string webserv_cgi_file_links = ""; - string processing_path = ""; - string processing_dir = ""; - string processing_concord_max = ""; - string flag_act0 = ""; - string flag_act1 = ""; - string flag_act2 = ""; - string flag_act3 = ""; - string flag_act4 = ""; - string flag_act5 = ""; - string flag_act6 = ""; - string flag_act7 = ""; - string flag_act8 = ""; - string flag_act9 = ""; - string default_papersize = ""; - string default_text_wrap = ""; - string default_emphasis = ""; - string default_language = ""; - string default_digest = ""; - string permission_share_source = ""; - string search_flag = ""; - string search_action = ""; - string search_db = ""; - string search_title = ""; + string webserv_url_root = ""; + string webserv_path = ""; + string webserv_images = ""; + string webserv_cgi = ""; + string webserv_cgi_host = ""; + string webserv_cgi_host_path = ""; + string webserv_cgi_port = ""; + string webserv_cgi_user = ""; + string webserv_cgi_file_links = ""; + string processing_path = ""; + string processing_dir = ""; + string processing_concord_max = ""; + string flag_act0 = ""; + string flag_act1 = ""; + string flag_act2 = ""; + string flag_act3 = ""; + string flag_act4 = ""; + string flag_act5 = ""; + string flag_act6 = ""; + string flag_act7 = ""; + string flag_act8 = ""; + string flag_act9 = ""; + string default_papersize = ""; + string default_text_wrap = ""; + string default_emphasis = ""; + string default_language = ""; + string default_digest = ""; + string permission_share_source = ""; + string search_flag = ""; + string search_action = ""; + string search_db = ""; + string search_title = ""; } struct MetaComposite { - string classify_dewey = ""; - string classify_keywords = ""; - string classify_loc = ""; - string classify_subject = ""; - string classify_topic_register = ""; - string creator_author = ""; - string creator_author_email = ""; - string creator_illustrator = ""; - string creator_translator = ""; - string date_added_to_site = ""; - string date_available = ""; - string date_created = ""; - string date_issued = ""; - string date_modified = ""; - string date_published = ""; - string date_valid = ""; - string identifier_isbn = ""; - string identifier_oclc = ""; - string identifier_pg = ""; - string links = ""; - string notes_abstract = ""; - string notes_description = ""; - string original_language = ""; - string original_source = ""; - string original_title = ""; - string publisher = ""; - string rights_copyright = ""; - string rights_cover = ""; - string rights_illustrations = ""; - string rights_license = ""; - string title_edition = ""; - string title_full = ""; - string title_language = ""; - string title_main = ""; - string title_note = ""; - string title_sub = ""; - string title_subtitle = ""; + string classify_dewey = ""; + string classify_keywords = ""; + string classify_loc = ""; + string classify_subject = ""; + string classify_topic_register = ""; + string creator_author = ""; + string creator_author_email = ""; + string creator_illustrator = ""; + string creator_translator = ""; + string date_added_to_site = ""; + string date_available = ""; + string date_created = ""; + string date_issued = ""; + string date_modified = ""; + string date_published = ""; + string date_valid = ""; + string identifier_isbn = ""; + string identifier_oclc = ""; + string identifier_pg = ""; + string language_document = ""; + string language_document_char = ""; + string links = ""; + string notes_abstract = ""; + string notes_description = ""; + string original_language = ""; + string original_language_char = ""; + string original_publisher = ""; + string original_source = ""; + string original_title = ""; + string publisher = ""; + string rights_copyright = ""; + string rights_copyright_audio = ""; + string rights_copyright_cover = ""; + string rights_copyright_illustrations = ""; + string rights_copyright_photographs = ""; + string rights_copyright_text = ""; + string rights_copyright_translation = ""; + string rights_copyright_video = ""; + string rights_license = ""; + string title_edition = ""; + string title_full = ""; + string title_language = ""; + string title_main = ""; + string title_note = ""; + string title_short = ""; + string title_sub = ""; + string title_subtitle = ""; } struct ConfComposite { MetaComposite meta; diff --git a/src/sdp/meta/metadoc_from_src.d b/src/sdp/meta/metadoc_from_src.d index b4bee7a..80de1f2 100644 --- a/src/sdp/meta/metadoc_from_src.d +++ b/src/sdp/meta/metadoc_from_src.d @@ -1375,7 +1375,8 @@ template SiSUdocAbstraction() { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -1401,7 +1402,8 @@ template SiSUdocAbstraction() { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -1458,7 +1460,8 @@ template SiSUdocAbstraction() { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -1499,7 +1502,8 @@ template SiSUdocAbstraction() { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -1533,7 +1537,8 @@ template SiSUdocAbstraction() { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -1571,7 +1576,8 @@ template SiSUdocAbstraction() { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -1614,7 +1620,8 @@ template SiSUdocAbstraction() { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -1652,7 +1659,8 @@ template SiSUdocAbstraction() { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); @@ -1691,7 +1699,8 @@ template SiSUdocAbstraction() { || (opt_action["html-scroll"]) || (opt_action["html-seg"]) || (opt_action["epub"]) - || (opt_action["sqlite"]) + || (opt_action["sqlite-discrete"]) + || (opt_action["sqlite-update"]) || (opt_action["postgresql"])) { obj = obj_dom_structure_set_markup_tags(obj, dom_markedup, obj.heading_lev_markup); obj = obj_dom_set_collapsed_tags(obj, dom_collapsed, obj.heading_lev_collapsed); diff --git a/src/sdp/output/epub3.d b/src/sdp/output/epub3.d index 7851c09..b6987ae 100644 --- a/src/sdp/output/epub3.d +++ b/src/sdp/output/epub3.d @@ -508,6 +508,7 @@ template outputEPub3() { } } if (obj.is_a == "heading") { + assert(obj.text.length > 0); if (obj.heading_lev_markup <= 4) { oepbs_content_parts["manifest_documents"] ~= format(q"¶ diff --git a/src/sdp/output/hub.d b/src/sdp/output/hub.d index d25905f..4f5a363 100644 --- a/src/sdp/output/hub.d +++ b/src/sdp/output/hub.d @@ -8,6 +8,7 @@ template outputHub() { sdp.output.epub3, sdp.output.html, sdp.output.sqlite, + sdp.output.sqlite_discrete, sdp.output.xmls, sdp.output.source_sisupod, sdp.output.create_zip_file, @@ -73,7 +74,11 @@ template outputHub() { /+ mixin outputODT; +/ writeln("odt processing"); } - if (doc_matters.opt_action["sqlite"]) { + if (doc_matters.opt_action["sqlite-discrete"]) { + if ((doc_matters.opt_action["verbose"])) { writeln("sqlite processing... "); } + SQLiteDiscreteBuildTablesAndPopulate!()(doc_abstraction, doc_matters); + } + if (doc_matters.opt_action["sqlite-update"]) { if ((doc_matters.opt_action["verbose"])) { writeln("sqlite processing... "); } SQLiteBuildTablesAndPopulate!()(doc_abstraction, doc_matters); } @@ -88,6 +93,7 @@ template outputHubOp() { sdp.output.epub3, sdp.output.html, sdp.output.sqlite, + sdp.output.sqlite_discrete, sdp.output.xmls, sdp.output.source_sisupod, sdp.output.create_zip_file, diff --git a/src/sdp/output/paths_output.d b/src/sdp/output/paths_output.d index ae4d4ac..d66ada8 100644 --- a/src/sdp/output/paths_output.d +++ b/src/sdp/output/paths_output.d @@ -295,6 +295,32 @@ template SiSUpathsEPUB() { return _PathsStruct(); } } +template SiSUpathsSQLiteDiscrete() { + mixin SiSUrgxInit; + static auto rgx = Rgx(); + auto SiSUpathsSQLiteDiscrete(Ps,Lng)( + Ps src_pth_info, + Lng lng, + ) { + auto out_pth = SiSUoutPaths!()(src_pth_info, lng); + string base_dir = "sqlite"; + struct _PathsStruct { + string base_filename(string fn_src) { + return fn_src.baseName.stripExtension; + } + string base() { + return (out_pth.output_base).chainPath(base_dir).array; + } + string seg(string fn_src) { + return base.chainPath(base_filename(fn_src)).array; + } + string sqlite_file(string fn_src) { + return base.chainPath(base_filename(fn_src) ~ ".sqlite").array; + } + } + return _PathsStruct(); + } +} template SiSUpathsSQLite() { mixin SiSUrgxInit; static auto rgx = Rgx(); diff --git a/src/sdp/output/sqlite.d b/src/sdp/output/sqlite.d index e7ef6bf..69c7d51 100644 --- a/src/sdp/output/sqlite.d +++ b/src/sdp/output/sqlite.d @@ -3,7 +3,6 @@ import sdp.output; import std.file, std.conv : to; -import sdp.output.sqlite; template SQLiteBuildTablesAndPopulate() { void SQLiteBuildTablesAndPopulate(D,I)( auto ref const D doc_abstraction, @@ -11,7 +10,6 @@ template SQLiteBuildTablesAndPopulate() { ) { import d2sqlite3; import std.typecons : Nullable; - import sdp.output.sqlite; mixin SiSUoutputRgxInit; static auto rgx = Rgx(); template SQLiteFormatAndLoadObject() { @@ -459,128 +457,132 @@ template SQLiteBuildTablesAndPopulate() { auto ref I doc_matters, ) { auto pth_sqlite = SiSUpathsSQLite!()(doc_matters.src_path_info, doc_matters.language); - auto db = Database(pth_sqlite.sqlite_file(doc_matters.source_filename)); + auto db = Database(pth_sqlite.sqlite_file(doc_matters.environment["pwd"].baseName)); // auto db = Database(":memory:"); // open database in memory - db.run(" - DROP TABLE IF EXISTS metadata_and_text; - DROP TABLE IF EXISTS doc_objects; - DROP TABLE IF EXISTS urls; - CREATE TABLE metadata_and_text ( - tid BIGINT PRIMARY KEY, - title VARCHAR(800) NOT NULL, - title_main VARCHAR(400) NULL, /*NOT*/ - title_sub VARCHAR(400) NULL, - title_short VARCHAR(400) NULL, - title_edition VARCHAR(10) NULL, - title_note VARCHAR(2500) NULL, - title_language VARCHAR(100) NULL, - title_language_char VARCHAR(6) NULL, - creator_author VARCHAR(600) NULL, - creator_author_hon VARCHAR(100) NULL, - creator_author_nationality VARCHAR(100) NULL, - creator_editor VARCHAR(600) NULL, - creator_contributor VARCHAR(600) NULL, - creator_illustrator VARCHAR(600) NULL, - creator_photographer VARCHAR(600) NULL, - creator_translator VARCHAR(600) NULL, - creator_prepared_by VARCHAR(600) NULL, - creator_digitized_by VARCHAR(600) NULL, - creator_audio VARCHAR(600) NULL, - creator_video VARCHAR(600) NULL, - language_document VARCHAR(100) NULL, - language_document_char VARCHAR(6) NULL, /*NOT*/ - language_original VARCHAR(100) NULL, - language_original_char VARCHAR(6) NULL, - date_added_to_site VARCHAR(10) NULL, - date_available VARCHAR(10) NULL, - date_created VARCHAR(10) NULL, - date_issued VARCHAR(10) NULL, - date_modified VARCHAR(10) NULL, - date_published VARCHAR(10) NULL, - date_valid VARCHAR(10) NULL, - date_translated VARCHAR(10) NULL, - date_original_publication VARCHAR(10) NULL, - date_generated VARCHAR(10) NULL, - publisher VARCHAR(600) NULL, - original_publisher VARCHAR(600) NULL, - original_language VARCHAR(100) NULL, - original_language_char VARCHAR(6) NULL, - original_source VARCHAR(600) NULL, - original_institution VARCHAR(600) NULL, - original_nationality VARCHAR(100) NULL, - rights VARCHAR(2500) NULL, - rights_copyright_text VARCHAR(2500) NULL, - rights_copyright_translation VARCHAR(2500) NULL, - rights_copyright_illustrations VARCHAR(2500) NULL, - rights_copyright_photographs VARCHAR(2500) NULL, - rights_copyright_preparation VARCHAR(2500) NULL, - rights_copyright_digitization VARCHAR(2500) NULL, - rights_copyright_audio VARCHAR(2500) NULL, - rights_copyright_video VARCHAR(2500) NULL, - rights_license VARCHAR(2500) NULL, - identifier_oclc VARCHAR(30) NULL, - identifier_isbn VARCHAR(16) NULL, - classify_topic_register VARCHAR(2500) NULL, - classify_subject VARCHAR(600) NULL, - classify_loc VARCHAR(30) NULL, - classify_dewey VARCHAR(30) NULL, - classify_keywords VARCHAR(600) NULL, - notes_abstract TEXT NULL, - notes_description TEXT NULL, - notes_comment TEXT NULL, - notes_coverage VARCHAR(200) NULL, - notes_relation VARCHAR(200) NULL, - notes_history VARCHAR(600) NULL, - notes_type VARCHAR(600) NULL, - notes_format VARCHAR(600) NULL, - notes_prefix TEXT NULL, - notes_prefix_a TEXT NULL, - notes_prefix_b TEXT NULL, - notes_suffix TEXT NULL, - src_filename VARCHAR(256) NULL, - src_fingerprint VARCHAR(256) NULL, - src_filesize VARCHAR(10) NULL, - src_wordcount VARCHAR(10) NULL, - src_text TEXT NULL, - fulltext TEXT NULL, - links TEXT NULL - ); - - CREATE TABLE doc_objects ( - lid BIGINT PRIMARY KEY, - metadata_tid BIGINT REFERENCES metadata_and_text, - ocn SMALLINT, - ocnd VARCHAR(6), - ocns VARCHAR(6), - clean TEXT NULL, - body TEXT NULL, - book_idx TEXT NULL, - seg VARCHAR(256) NULL, - lev_an VARCHAR(1), - lev SMALLINT NULL, - lev0 SMALLINT, - lev1 SMALLINT, - lev2 SMALLINT, - lev3 SMALLINT, - lev4 SMALLINT, - lev5 SMALLINT, - lev6 SMALLINT, - lev7 SMALLINT, - en_a SMALLINT NULL, - en_z SMALLINT NULL, - en_a_asterisk SMALLINT NULL, - en_z_asterisk SMALLINT NULL, - en_a_plus SMALLINT NULL, - en_z_plus SMALLINT NULL, - t_of VARCHAR(16), - t_is VARCHAR(16), - node VARCHAR(16) NULL, - parent VARCHAR(16) NULL, - digest_clean CHAR(256), - digest_all CHAR(256), - types CHAR(1) NULL - ) - "); + if (doc_matters.opt_action["sqlite-create"]) { + if ((doc_matters.opt_action["verbose"])) { writeln("sqlite create tables... "); } + db.run(" + DROP TABLE IF EXISTS metadata_and_text; + DROP TABLE IF EXISTS doc_objects; + DROP TABLE IF EXISTS urls; + CREATE TABLE metadata_and_text ( + tid BIGINT PRIMARY KEY, + title VARCHAR(800) NOT NULL, + title_main VARCHAR(400) NULL, /*NOT*/ + title_sub VARCHAR(400) NULL, + title_short VARCHAR(400) NULL, + title_edition VARCHAR(10) NULL, + title_note VARCHAR(2500) NULL, + title_language VARCHAR(100) NULL, + title_language_char VARCHAR(6) NULL, + creator_author VARCHAR(600) NULL, + creator_author_email VARCHAR(100) NULL, + creator_author_hon VARCHAR(100) NULL, + creator_author_nationality VARCHAR(100) NULL, + creator_editor VARCHAR(600) NULL, + creator_contributor VARCHAR(600) NULL, + creator_illustrator VARCHAR(600) NULL, + creator_photographer VARCHAR(600) NULL, + creator_translator VARCHAR(600) NULL, + creator_prepared_by VARCHAR(600) NULL, + creator_digitized_by VARCHAR(600) NULL, + creator_audio VARCHAR(600) NULL, + creator_video VARCHAR(600) NULL, + language_document VARCHAR(100) NULL, + language_document_char VARCHAR(6) NULL, /*NOT*/ + language_original VARCHAR(100) NULL, + language_original_char VARCHAR(6) NULL, + date_added_to_site VARCHAR(10) NULL, + date_available VARCHAR(10) NULL, + date_created VARCHAR(10) NULL, + date_issued VARCHAR(10) NULL, + date_modified VARCHAR(10) NULL, + date_published VARCHAR(10) NULL, + date_valid VARCHAR(10) NULL, + date_translated VARCHAR(10) NULL, + date_original_publication VARCHAR(10) NULL, + date_generated VARCHAR(10) NULL, + publisher VARCHAR(600) NULL, + original_publisher VARCHAR(600) NULL, + original_language VARCHAR(100) NULL, + original_language_char VARCHAR(6) NULL, + original_source VARCHAR(600) NULL, + original_institution VARCHAR(600) NULL, + original_nationality VARCHAR(100) NULL, + original_title VARCHAR(800) NULL, + rights_copyright VARCHAR(2500) NULL, + rights_copyright_audio VARCHAR(2500) NULL, + rights_copyright_cover VARCHAR(2500) NULL, + rights_copyright_illustrations VARCHAR(2500) NULL, + rights_copyright_photographs VARCHAR(2500) NULL, + rights_copyright_text VARCHAR(2500) NULL, + rights_copyright_translation VARCHAR(2500) NULL, + rights_copyright_video VARCHAR(2500) NULL, + rights_license VARCHAR(2500) NULL, + identifier_oclc VARCHAR(30) NULL, + identifier_isbn VARCHAR(16) NULL, + classify_topic_register VARCHAR(2500) NULL, + classify_subject VARCHAR(600) NULL, + classify_loc VARCHAR(30) NULL, + classify_dewey VARCHAR(30) NULL, + classify_keywords VARCHAR(600) NULL, + notes_abstract TEXT NULL, + notes_description TEXT NULL, + notes_comment TEXT NULL, + notes_coverage VARCHAR(200) NULL, + notes_relation VARCHAR(200) NULL, + notes_history VARCHAR(600) NULL, + notes_type VARCHAR(600) NULL, + notes_format VARCHAR(600) NULL, + notes_prefix TEXT NULL, + notes_prefix_a TEXT NULL, + notes_prefix_b TEXT NULL, + notes_suffix TEXT NULL, + src_filename VARCHAR(256) NULL, + src_fingerprint VARCHAR(256) NULL, + src_filesize VARCHAR(10) NULL, + src_wordcount VARCHAR(10) NULL, + src_text TEXT NULL, + fulltext TEXT NULL, + links TEXT NULL + ); + /* */ + CREATE TABLE doc_objects ( + lid BIGINT PRIMARY KEY, + metadata_tid BIGINT REFERENCES metadata_and_text, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + clean TEXT NULL, + body TEXT NULL, + book_idx TEXT NULL, + seg VARCHAR(256) NULL, + lev_an VARCHAR(1), + lev SMALLINT NULL, + lev0 SMALLINT, + lev1 SMALLINT, + lev2 SMALLINT, + lev3 SMALLINT, + lev4 SMALLINT, + lev5 SMALLINT, + lev6 SMALLINT, + lev7 SMALLINT, + en_a SMALLINT NULL, + en_z SMALLINT NULL, + en_a_asterisk SMALLINT NULL, + en_z_asterisk SMALLINT NULL, + en_a_plus SMALLINT NULL, + en_z_plus SMALLINT NULL, + t_of VARCHAR(16), + t_is VARCHAR(16), + node VARCHAR(16) NULL, + parent VARCHAR(16) NULL, + digest_clean CHAR(256), + digest_all CHAR(256), + types CHAR(1) NULL + ) + "); + } Statement insert_metadata = db.prepare(" INSERT INTO metadata_and_text ( tid, @@ -593,6 +595,7 @@ template SQLiteBuildTablesAndPopulate() { title_language, title_language_char, creator_author, + creator_author_email, creator_author_hon, creator_author_nationality, creator_editor, @@ -625,14 +628,14 @@ template SQLiteBuildTablesAndPopulate() { original_source, original_institution, original_nationality, - rights, - rights_copyright_text, - rights_copyright_translation, + original_title, + rights_copyright, + rights_copyright_audio, + rights_copyright_cover, rights_copyright_illustrations, rights_copyright_photographs, - rights_copyright_preparation, - rights_copyright_digitization, - rights_copyright_audio, + rights_copyright_text, + rights_copyright_translation, rights_copyright_video, rights_license, identifier_oclc, @@ -673,6 +676,7 @@ template SQLiteBuildTablesAndPopulate() { :title_language, :title_language_char, :creator_author, + :creator_author_email, :creator_author_hon, :creator_author_nationality, :creator_editor, @@ -705,14 +709,14 @@ template SQLiteBuildTablesAndPopulate() { :original_source, :original_institution, :original_nationality, - :rights, - :rights_copyright_text, - :rights_copyright_translation, + :original_title, + :rights_copyright, + :rights_copyright_audio, + :rights_copyright_cover, :rights_copyright_illustrations, :rights_copyright_photographs, - :rights_copyright_preparation, - :rights_copyright_digitization, - :rights_copyright_audio, + :rights_copyright_text, + :rights_copyright_translation, :rights_copyright_video, :rights_license, :identifier_oclc, @@ -743,74 +747,66 @@ template SQLiteBuildTablesAndPopulate() { :links ) "); + // insert_metadata.bind(":tid", ); // unique identifier you could try use sha of title author & language? or auto increment?? insert_metadata.bind(":title", doc_matters.conf_make_meta.meta.title_full); - // insert_metadata.bind(":title_main", doc_matters.conf_make_meta.meta.title_main); - // insert_metadata.bind(":title_sub", doc_matters.conf_make_meta.meta.title_subtitle); - // insert_metadata.bind(":title_short", doc_matters.conf_make_meta.meta.title_short); // - // insert_metadata.bind(":title_edition", doc_matters.conf_make_meta.meta.title_edition); - // insert_metadata.bind(":title_note", doc_matters.conf_make_meta.meta.title_note); - // insert_metadata.bind(":title_language", doc_matters.conf_make_meta.meta.title_language); + insert_metadata.bind(":title_main", doc_matters.conf_make_meta.meta.title_main); + insert_metadata.bind(":title_sub", doc_matters.conf_make_meta.meta.title_subtitle); + insert_metadata.bind(":title_short", doc_matters.conf_make_meta.meta.title_short); // + insert_metadata.bind(":title_edition", doc_matters.conf_make_meta.meta.title_edition); + insert_metadata.bind(":title_language", doc_matters.conf_make_meta.meta.title_language); // insert_metadata.bind(":title_language_char", doc_matters.conf_make_meta.meta.title_language_char); + insert_metadata.bind(":classify_dewey", doc_matters.conf_make_meta.meta.classify_dewey); + insert_metadata.bind(":classify_keywords", doc_matters.conf_make_meta.meta.classify_keywords); + insert_metadata.bind(":classify_loc", doc_matters.conf_make_meta.meta.classify_loc); + insert_metadata.bind(":classify_subject", doc_matters.conf_make_meta.meta.classify_subject); + insert_metadata.bind(":classify_topic_register", doc_matters.conf_make_meta.meta.classify_topic_register); insert_metadata.bind(":creator_author", doc_matters.conf_make_meta.meta.creator_author); - // insert_metadata.bind(":creator_author_hon", doc_matters.conf_make_meta.meta.creator_author_hon); - // insert_metadata.bind(":creator_author_nationality", doc_matters.conf_make_meta.meta.creator_author_nationality); + insert_metadata.bind(":creator_author_email", doc_matters.conf_make_meta.meta.creator_author_email); // insert_metadata.bind(":creator_editor", doc_matters.conf_make_meta.meta.creator_editor); // insert_metadata.bind(":creator_contributor", doc_matters.conf_make_meta.meta.creator_contributor); - // insert_metadata.bind(":creator_illustrator", doc_matters.conf_make_meta.meta.creator_illustrator); + insert_metadata.bind(":creator_illustrator", doc_matters.conf_make_meta.meta.creator_illustrator); // insert_metadata.bind(":creator_photographer", doc_matters.conf_make_meta.meta.creator_photographer); - // insert_metadata.bind(":creator_translator", doc_matters.conf_make_meta.meta.creator_translator); - // insert_metadata.bind(":creator_prepared_by", doc_matters.conf_make_meta.meta.creator_prepared_by); - // insert_metadata.bind(":creator_digitized_by", doc_matters.conf_make_meta.meta.creator_digitized_by); + insert_metadata.bind(":creator_translator", doc_matters.conf_make_meta.meta.creator_translator); // insert_metadata.bind(":creator_audio", doc_matters.conf_make_meta.meta.creator_audio); // insert_metadata.bind(":creator_video", doc_matters.conf_make_meta.meta.creator_video); - // insert_metadata.bind(":language_document", doc_matters.conf_make_meta.meta.language_document); - // insert_metadata.bind(":language_document_char", doc_matters.conf_make_meta.meta.language_document_char); - // insert_metadata.bind(":language_original", doc_matters.conf_make_meta.meta.language_original); - // insert_metadata.bind(":language_original_char", doc_matters.conf_make_meta.meta.language_original_char); - // insert_metadata.bind(":date_added_to_site", doc_matters.conf_make_meta.meta.date_added_to_site); - // insert_metadata.bind(":date_available", doc_matters.conf_make_meta.meta.date_available); - // insert_metadata.bind(":date_created", doc_matters.conf_make_meta.meta.date_created); - // insert_metadata.bind(":date_issued", doc_matters.conf_make_meta.meta.date_issued); - // insert_metadata.bind(":date_modified", doc_matters.conf_make_meta.meta.date_modified); - // insert_metadata.bind(":date_published", doc_matters.conf_make_meta.meta.date_published); - // insert_metadata.bind(":date_valid", doc_matters.conf_make_meta.meta.date_valid); + insert_metadata.bind(":date_added_to_site", doc_matters.conf_make_meta.meta.date_added_to_site); + insert_metadata.bind(":date_available", doc_matters.conf_make_meta.meta.date_available); + insert_metadata.bind(":date_created", doc_matters.conf_make_meta.meta.date_created); + insert_metadata.bind(":date_issued", doc_matters.conf_make_meta.meta.date_issued); + insert_metadata.bind(":date_modified", doc_matters.conf_make_meta.meta.date_modified); + insert_metadata.bind(":date_published", doc_matters.conf_make_meta.meta.date_published); + insert_metadata.bind(":date_valid", doc_matters.conf_make_meta.meta.date_valid); // insert_metadata.bind(":date_translated", doc_matters.conf_make_meta.meta.date_translated); // insert_metadata.bind(":date_original_publication", doc_matters.conf_make_meta.meta.date_original_publication); // insert_metadata.bind(":date_generated", doc_matters.conf_make_meta.meta.date_generated); - // insert_metadata.bind(":publisher", doc_matters.conf_make_meta.meta.publisher)); - // insert_metadata.bind(":original_publisher", doc_matters.conf_make_meta.meta.original_publisher); - // insert_metadata.bind(":original_language", doc_matters.conf_make_meta.meta.original_language); - // insert_metadata.bind(":original_language_char", doc_matters.conf_make_meta.meta.original_language_char); - // insert_metadata.bind(":original_source", doc_matters.conf_make_meta.meta.original_source); - // insert_metadata.bind(":original_institution", doc_matters.conf_make_meta.meta.original_institution); - // insert_metadata.bind(":original_nationality", doc_matters.conf_make_meta.meta.original_nationality); + insert_metadata.bind(":identifier_isbn", doc_matters.conf_make_meta.meta.identifier_isbn); + insert_metadata.bind(":identifier_oclc", doc_matters.conf_make_meta.meta.identifier_oclc); + insert_metadata.bind(":language_document", doc_matters.conf_make_meta.meta.language_document); + insert_metadata.bind(":language_document_char", doc_matters.conf_make_meta.meta.language_document_char); + // insert_metadata.bind(":language_original", doc_matters.conf_make_meta.meta.language_original); + // insert_metadata.bind(":language_original_char", doc_matters.conf_make_meta.meta.language_original_char); + insert_metadata.bind(":notes_abstract", doc_matters.conf_make_meta.meta.notes_abstract); + insert_metadata.bind(":notes_description", doc_matters.conf_make_meta.meta.notes_description); + insert_metadata.bind(":original_publisher", doc_matters.conf_make_meta.meta.original_publisher); + insert_metadata.bind(":original_language", doc_matters.conf_make_meta.meta.original_language); + insert_metadata.bind(":original_language_char", doc_matters.conf_make_meta.meta.original_language_char); + insert_metadata.bind(":original_source", doc_matters.conf_make_meta.meta.original_source); + insert_metadata.bind(":original_title", doc_matters.conf_make_meta.meta.original_title); + insert_metadata.bind(":publisher", doc_matters.conf_make_meta.meta.publisher); // insert_metadata.bind(":rights", doc_matters.conf_make_meta.meta.rights); - // insert_metadata.bind(":rights_copyright_text", doc_matters.conf_make_meta.meta.rights_copyright_text); - // insert_metadata.bind(":rights_copyright_translation", doc_matters.conf_make_meta.meta.rights_copyright_translation); - // insert_metadata.bind(":rights_copyright_illustrations", doc_matters.conf_make_meta.meta.rights_illustrations); - // insert_metadata.bind(":rights_copyright_photographs", doc_matters.conf_make_meta.meta.rights_photographs); + insert_metadata.bind(":rights_copyright", doc_matters.conf_make_meta.meta.rights_copyright); + insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_copyright_audio); + insert_metadata.bind(":rights_copyright_cover", doc_matters.conf_make_meta.meta.rights_copyright_cover); + insert_metadata.bind(":rights_copyright_illustrations", doc_matters.conf_make_meta.meta.rights_copyright_illustrations); + insert_metadata.bind(":rights_copyright_photographs", doc_matters.conf_make_meta.meta.rights_copyright_photographs); // insert_metadata.bind(":rights_copyright_preparation", doc_matters.conf_make_meta.meta.rights_preparation); + insert_metadata.bind(":rights_copyright_text", doc_matters.conf_make_meta.meta.rights_copyright_text); + insert_metadata.bind(":rights_copyright_translation", doc_matters.conf_make_meta.meta.rights_copyright_translation); + insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_copyright_video); // insert_metadata.bind(":rights_copyright_digitization", doc_matters.conf_make_meta.meta.rights_digitization); // insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_audio); // insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_video); - // insert_metadata.bind(":rights_license", doc_matters.conf_make_meta.meta.rights_license); - // insert_metadata.bind(":identifier_oclc", doc_matters.conf_make_meta.meta.identifier_oclc); - // insert_metadata.bind(":identifier_isbn", doc_matters.conf_make_meta.meta.identifier_isbn); - // insert_metadata.bind(":classify_topic_register", doc_matters.conf_make_meta.meta.classify_topic_register); - // insert_metadata.bind(":classify_subject", doc_matters.conf_make_meta.meta.classify_subject); - // insert_metadata.bind(":classify_loc", doc_matters.conf_make_meta.meta.classify_loc); - // insert_metadata.bind(":notes_abstract", doc_matters.conf_make_meta.meta.notes_abstract); - // insert_metadata.bind(":notes_description", doc_matters.conf_make_meta.meta.notes_description); - // insert_metadata.bind(":notes_comment", doc_matters.conf_make_meta.meta.notes_comment); - // insert_metadata.bind(":notes_coverage", doc_matters.conf_make_meta.meta.notes_coverage); - // insert_metadata.bind(":notes_relation", doc_matters.conf_make_meta.meta.notes_relation); - // insert_metadata.bind(":notes_history", doc_matters.conf_make_meta.meta.notes_history); - // insert_metadata.bind(":notes_type", doc_matters.conf_make_meta.meta.notes_type); - // insert_metadata.bind(":notes_format", doc_matters.conf_make_meta.meta.notes_format); - // insert_metadata.bind(":notes_prefix", doc_matters.conf_make_meta.meta.notes_prefix); - // insert_metadata.bind(":notes_prefix_a", doc_matters.conf_make_meta.meta.notes_prefix_a); - // insert_metadata.bind(":notes_prefix_b", doc_matters.conf_make_meta.meta.notes_prefix_b); - // insert_metadata.bind(":notes_suffix", doc_matters.conf_make_meta.meta.notes_suffix); + insert_metadata.bind(":rights_license", doc_matters.conf_make_meta.meta.rights_license); // insert_metadata.bind(":src_filename", doc_matters.conf_make_meta.meta.src_filename); // insert_metadata.bind(":src_fingerprint", doc_matters.conf_make_meta.meta.src_fingerprint); // insert_metadata.bind(":src_filesize", doc_matters.conf_make_meta.meta.src_filesize); @@ -822,7 +818,7 @@ template SQLiteBuildTablesAndPopulate() { /+ watch +/ writeln("sql statement executed"); assert(db.totalChanges == 1); - + // Statement insert_doc_objects = db.prepare(" INSERT INTO doc_objects ( lid, diff --git a/src/sdp/output/sqlite_discrete.d b/src/sdp/output/sqlite_discrete.d new file mode 100644 index 0000000..c8238f4 --- /dev/null +++ b/src/sdp/output/sqlite_discrete.d @@ -0,0 +1,1085 @@ +module sdp.output.sqlite_discrete; +import sdp.output; +import + std.file, + std.conv : to; +template SQLiteDiscreteBuildTablesAndPopulate() { + void SQLiteDiscreteBuildTablesAndPopulate(D,I)( + auto ref const D doc_abstraction, + auto ref I doc_matters, + ) { + import d2sqlite3; + import std.typecons : Nullable; + mixin SiSUoutputRgxInit; + static auto rgx = Rgx(); + template SQLiteFormatAndLoadObject() { + auto SQLiteFormatAndLoadObject(I)( + auto ref I doc_matters, + ) { + mixin SiSUoutputRgxInit; + struct sqlite_format_and_load_objects { + auto generic_munge_sanitize_text_for_search( + string _txt, + ) { + string _notes; + string _urls; + _txt = _txt.replaceAll(rgx.inline_fontface_clean, ""); + if (_txt.matchFirst(rgx.inline_notes_al_gen)) { + foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { + _notes ~= "\n" ~ m["text"]; + } + _txt = _txt.replaceAll(rgx.inline_notes_al_gen, ""); + } + if (_txt.matchFirst(rgx.inline_link)) { + foreach (m; _txt.matchAll(rgx.inline_link)) { + if (m["link"].match(rgx.url)) { + _urls ~= "\n" ~ m["link"]; + } + } + _txt = _txt.replaceAll(rgx.inline_link_clean, ""); + } + if (_notes.length > 0) { + _txt ~= _notes; + } + if (_urls.length > 0) { + _txt ~= _urls; + } + if (doc_matters.opt_action["debug"]) { + writeln(_txt, "\n"); + } + debug(sql_text_clean) { + writeln(_txt); + } + return _txt; + } + auto munge_html(O)( + auto return ref const O obj, + ) { + string _html_special_characters(string _txt){ + _txt = (_txt) + .replaceAll(rgx.xhtml_ampersand, "&") + .replaceAll(rgx.xhtml_quotation, """) + .replaceAll(rgx.xhtml_less_than, "<") + .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx.nbsp_char, " ") + .replaceAll(rgx.xhtml_line_break, "
"); + return _txt; + } + string _html_font_face(string _txt){ + _txt = (_txt) + .replaceAll(rgx.inline_emphasis, ("$1")) + .replaceAll(rgx.inline_bold, ("$1")) + .replaceAll(rgx.inline_underscore, ("$1")) + .replaceAll(rgx.inline_italics, ("$1")) + .replaceAll(rgx.inline_superscript, ("$1")) + .replaceAll(rgx.inline_subscript, ("$1")) + .replaceAll(rgx.inline_strike, ("$1")) + .replaceAll(rgx.inline_insert, ("$1")) + .replaceAll(rgx.inline_mono, ("$1")) + .replaceAll(rgx.inline_cite, ("$1")); + return _txt; + } + string _notes; + string _urls; + string _txt = _html_font_face(_html_special_characters(obj.text)); + if (_txt.matchFirst(rgx.inline_notes_al_gen)) { + foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { + _notes ~= "\n" ~ m["text"]; + } + _txt = _txt.replaceAll(rgx.inline_notes_al_gen_ref, "$1 "); + } + if (_txt.matchFirst(rgx.inline_link)) { + foreach (m; _txt.matchAll(rgx.inline_link)) { + } + _txt = _txt.replaceAll(rgx.inline_link_clean, ""); + } + if (_notes.length > 0) { + _txt ~= _notes; + } + if (doc_matters.opt_action["debug"]) { + writeln(_txt, "\n"); + } + return _txt; + } + string html_special_characters(string _txt){ + _txt = (_txt) + .replaceAll(rgx.xhtml_ampersand, "&") + .replaceAll(rgx.xhtml_quotation, """) + .replaceAll(rgx.xhtml_less_than, "<") + .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx.nbsp_char, " ") + .replaceAll(rgx.xhtml_line_break, "
"); + return _txt; + } + string html_special_characters_code(string _txt){ + _txt = (_txt) + .replaceAll(rgx.xhtml_ampersand, "&") + .replaceAll(rgx.xhtml_quotation, """) + .replaceAll(rgx.xhtml_less_than, "<") + .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx.nbsp_char, " "); + return _txt; + } + string html_font_face(string _txt){ + _txt = (_txt) + .replaceAll(rgx.inline_emphasis, ("$1")) + .replaceAll(rgx.inline_bold, ("$1")) + .replaceAll(rgx.inline_underscore, ("$1")) + .replaceAll(rgx.inline_italics, ("$1")) + .replaceAll(rgx.inline_superscript, ("$1")) + .replaceAll(rgx.inline_subscript, ("$1")) + .replaceAll(rgx.inline_strike, ("$1")) + .replaceAll(rgx.inline_insert, ("$1")) + .replaceAll(rgx.inline_mono, ("$1")) + .replaceAll(rgx.inline_cite, ("$1")); + return _txt; + } + auto html_heading(O)( + auto return ref const O obj, + ) { + string _txt = munge_html(obj); + string o = format(q"¶

+ %s +

¶", + obj.is_a, + _txt, + ); + return o; + } + auto html_para(O)( + auto return ref const O obj, + ) { + string _txt = munge_html(obj); + _txt = (obj.bullet) ? ("●  " ~ _txt) : _txt; + string o = format(q"¶

+ %s +

¶", + obj.is_a, + obj.indent_hang, + obj.indent_base, + _txt + ); + return o; + } + auto html_quote(O)( + auto return ref const O obj, + ) { + string _txt = munge_html(obj); + string o = format(q"¶

+ %s +

¶", + obj.is_a, + _txt + ); + return o; + } + auto html_group(O)( + auto return ref const O obj, + ) { + string _txt = munge_html(obj); + string o = format(q"¶

+ %s +

¶", + obj.is_a, + _txt + ); + return o; + } + auto html_block(O)( + auto return ref const O obj, + ) { + string _txt = munge_html(obj); + string o = format(q"¶ +

%s

¶", + obj.is_a, + _txt.stripRight + ); + return o; + } + auto html_verse(O)( + auto return ref const O obj, + ) { + string _txt = munge_html(obj); + string o = format(q"¶

%s

¶", + obj.is_a, + _txt + ); + return o; + } + auto html_code(O)( + auto return ref const O obj, + ) { + string _txt = html_special_characters_code(obj.text); + string o = format(q"¶

%s

¶", + obj.is_a, + _txt + ); + return o; + } + auto html_table(O)( + auto return ref const O obj, + ) { + auto _tablarize(O)( + auto return ref const O obj, + string _txt, + ) { + string[] _table_rows = (_txt).split(rgx.table_delimiter_row); + string[] _table_cols; + string _table; + string _tablenote; + foreach(row_idx, row; _table_rows) { + _table_cols = row.split(rgx.table_delimiter_col); + _table ~= ""; + foreach(col_idx, cell; _table_cols) { + if ((_table_cols.length == 1) + && (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx) + _tablenote ~= cell; + } else { + string _col_is = (row_idx == 0 && obj.table_heading) ? "th" : "td"; + string _align = ("style=\"text-align:" + ~ ((obj.table_column_aligns[col_idx] == "l") + ? "left\"" : "right\"")); + _table ~= "<" ~ _col_is ~ " width=\"" ~ obj.table_column_widths[col_idx].to!string ~ "%\" " ~ _align ~ ">"; + _table ~= cell; + _table ~= ""; + } + } + _table ~= ""; + } + auto t = tuple( + _table, + _tablenote, + ); + return t; + } + string _txt = munge_html(obj); + auto t = _tablarize(obj, _txt); + _txt = t[0]; + string _note = t[1]; + string o = format(q"¶

+ + %s +
+ %s +

¶", + obj.is_a, + _txt, + _note + ); + return o; + } + auto sqlite_load_string(O,Dm)( + auto return ref const O obj, + auto return ref Dm doc_matters, + ) { + string o; + return o; + } + auto postgresql_load_string(O,Dm)( + auto return ref const O obj, + auto return ref Dm doc_matters, + ) { + string o; + return o; + } + string sqlite_statement(O)( + auto return ref const O obj, + string _txt, + string _html, + ) { + void _sql_exe(O)( + string _sql, + ) { + writeln(_html); + writeln(_sql); + } + string _sql; + return _sql; + } + auto heading(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_heading(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + auto para(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_para(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + auto quote(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_quote(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + auto group(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_group(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + auto block(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_block(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + auto verse(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_verse(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + auto code(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_code(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + auto table(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_table(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + } + return sqlite_format_and_load_objects(); + } + } + template SQLiteInstruct() { + Statement SQLiteInstruct(I)( + auto ref I doc_matters, + ) { + auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.src_path_info, doc_matters.language); + auto db = Database(pth_sqlite.sqlite_file(doc_matters.source_filename)); + // auto db = Database(":memory:"); // open database in memory + db.run(" + DROP TABLE IF EXISTS metadata_and_text; + DROP TABLE IF EXISTS doc_objects; + DROP TABLE IF EXISTS urls; + CREATE TABLE metadata_and_text ( + tid BIGINT PRIMARY KEY, + title VARCHAR(800) NOT NULL, + title_main VARCHAR(400) NULL, /*NOT*/ + title_sub VARCHAR(400) NULL, + title_short VARCHAR(400) NULL, + title_edition VARCHAR(10) NULL, + title_note VARCHAR(2500) NULL, + title_language VARCHAR(100) NULL, + title_language_char VARCHAR(6) NULL, + creator_author VARCHAR(600) NULL, + creator_author_email VARCHAR(100) NULL, + creator_author_hon VARCHAR(100) NULL, + creator_author_nationality VARCHAR(100) NULL, + creator_editor VARCHAR(600) NULL, + creator_contributor VARCHAR(600) NULL, + creator_illustrator VARCHAR(600) NULL, + creator_photographer VARCHAR(600) NULL, + creator_translator VARCHAR(600) NULL, + creator_prepared_by VARCHAR(600) NULL, + creator_digitized_by VARCHAR(600) NULL, + creator_audio VARCHAR(600) NULL, + creator_video VARCHAR(600) NULL, + language_document VARCHAR(100) NULL, + language_document_char VARCHAR(6) NULL, /*NOT*/ + language_original VARCHAR(100) NULL, + language_original_char VARCHAR(6) NULL, + date_added_to_site VARCHAR(10) NULL, + date_available VARCHAR(10) NULL, + date_created VARCHAR(10) NULL, + date_issued VARCHAR(10) NULL, + date_modified VARCHAR(10) NULL, + date_published VARCHAR(10) NULL, + date_valid VARCHAR(10) NULL, + date_translated VARCHAR(10) NULL, + date_original_publication VARCHAR(10) NULL, + date_generated VARCHAR(10) NULL, + publisher VARCHAR(600) NULL, + original_publisher VARCHAR(600) NULL, + original_language VARCHAR(100) NULL, + original_language_char VARCHAR(6) NULL, + original_source VARCHAR(600) NULL, + original_institution VARCHAR(600) NULL, + original_nationality VARCHAR(100) NULL, + original_title VARCHAR(800) NULL, + rights_copyright VARCHAR(2500) NULL, + rights_copyright_audio VARCHAR(2500) NULL, + rights_copyright_cover VARCHAR(2500) NULL, + rights_copyright_illustrations VARCHAR(2500) NULL, + rights_copyright_photographs VARCHAR(2500) NULL, + rights_copyright_text VARCHAR(2500) NULL, + rights_copyright_translation VARCHAR(2500) NULL, + rights_copyright_video VARCHAR(2500) NULL, + rights_license VARCHAR(2500) NULL, + identifier_oclc VARCHAR(30) NULL, + identifier_isbn VARCHAR(16) NULL, + classify_topic_register VARCHAR(2500) NULL, + classify_subject VARCHAR(600) NULL, + classify_loc VARCHAR(30) NULL, + classify_dewey VARCHAR(30) NULL, + classify_keywords VARCHAR(600) NULL, + notes_abstract TEXT NULL, + notes_description TEXT NULL, + notes_comment TEXT NULL, + notes_coverage VARCHAR(200) NULL, + notes_relation VARCHAR(200) NULL, + notes_history VARCHAR(600) NULL, + notes_type VARCHAR(600) NULL, + notes_format VARCHAR(600) NULL, + notes_prefix TEXT NULL, + notes_prefix_a TEXT NULL, + notes_prefix_b TEXT NULL, + notes_suffix TEXT NULL, + src_filename VARCHAR(256) NULL, + src_fingerprint VARCHAR(256) NULL, + src_filesize VARCHAR(10) NULL, + src_wordcount VARCHAR(10) NULL, + src_text TEXT NULL, + fulltext TEXT NULL, + links TEXT NULL + ); + /* */ + CREATE TABLE doc_objects ( + lid BIGINT PRIMARY KEY, + metadata_tid BIGINT REFERENCES metadata_and_text, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + clean TEXT NULL, + body TEXT NULL, + book_idx TEXT NULL, + seg VARCHAR(256) NULL, + lev_an VARCHAR(1), + lev SMALLINT NULL, + lev0 SMALLINT, + lev1 SMALLINT, + lev2 SMALLINT, + lev3 SMALLINT, + lev4 SMALLINT, + lev5 SMALLINT, + lev6 SMALLINT, + lev7 SMALLINT, + en_a SMALLINT NULL, + en_z SMALLINT NULL, + en_a_asterisk SMALLINT NULL, + en_z_asterisk SMALLINT NULL, + en_a_plus SMALLINT NULL, + en_z_plus SMALLINT NULL, + t_of VARCHAR(16), + t_is VARCHAR(16), + node VARCHAR(16) NULL, + parent VARCHAR(16) NULL, + digest_clean CHAR(256), + digest_all CHAR(256), + types CHAR(1) NULL + ) + "); + Statement insert_metadata = db.prepare(" + INSERT INTO metadata_and_text ( + tid, + title, + title_main, + title_sub, + title_short, + title_edition, + title_note, + title_language, + title_language_char, + creator_author, + creator_author_email, + creator_author_hon, + creator_author_nationality, + creator_editor, + creator_contributor, + creator_illustrator, + creator_photographer, + creator_translator, + creator_prepared_by, + creator_digitized_by, + creator_audio, + creator_video, + language_document, + language_document_char, + language_original, + language_original_char, + date_added_to_site, + date_available, + date_created, + date_issued, + date_modified, + date_published, + date_valid, + date_translated, + date_original_publication, + date_generated, + publisher, + original_publisher, + original_language, + original_language_char, + original_source, + original_institution, + original_nationality, + original_title, + rights_copyright, + rights_copyright_audio, + rights_copyright_cover, + rights_copyright_illustrations, + rights_copyright_photographs, + rights_copyright_text, + rights_copyright_translation, + rights_copyright_video, + rights_license, + identifier_oclc, + identifier_isbn, + classify_topic_register, + classify_subject, + classify_loc, + classify_dewey, + classify_keywords, + notes_abstract, + notes_description, + notes_comment, + notes_coverage, + notes_relation, + notes_history, + notes_type, + notes_format, + notes_prefix, + notes_prefix_a, + notes_prefix_b, + notes_suffix, + src_filename, + src_fingerprint, + src_filesize, + src_wordcount, + src_text, + fulltext, + links + ) + VALUES ( + :tid, + :title, + :title_main, + :title_sub, + :title_short, + :title_edition, + :title_note, + :title_language, + :title_language_char, + :creator_author, + :creator_author_email, + :creator_author_hon, + :creator_author_nationality, + :creator_editor, + :creator_contributor, + :creator_illustrator, + :creator_photographer, + :creator_translator, + :creator_prepared_by, + :creator_digitized_by, + :creator_audio, + :creator_video, + :language_document, + :language_document_char, + :language_original, + :language_original_char, + :date_added_to_site, + :date_available, + :date_created, + :date_issued, + :date_modified, + :date_published, + :date_valid, + :date_translated, + :date_original_publication, + :date_generated, + :publisher, + :original_publisher, + :original_language, + :original_language_char, + :original_source, + :original_institution, + :original_nationality, + :original_title, + :rights_copyright, + :rights_copyright_audio, + :rights_copyright_cover, + :rights_copyright_illustrations, + :rights_copyright_photographs, + :rights_copyright_text, + :rights_copyright_translation, + :rights_copyright_video, + :rights_license, + :identifier_oclc, + :identifier_isbn, + :classify_topic_register, + :classify_subject, + :classify_loc, + :classify_dewey, + :classify_keywords, + :notes_abstract, + :notes_description, + :notes_comment, + :notes_coverage, + :notes_relation, + :notes_history, + :notes_type, + :notes_format, + :notes_prefix, + :notes_prefix_a, + :notes_prefix_b, + :notes_suffix, + :src_filename, + :src_fingerprint, + :src_filesize, + :src_wordcount, + :src_text, + :fulltext, + :links + ) + "); + // insert_metadata.bind(":tid", ); // unique identifier you could try use sha of title author & language? or auto increment?? + insert_metadata.bind(":title", doc_matters.conf_make_meta.meta.title_full); + insert_metadata.bind(":title_main", doc_matters.conf_make_meta.meta.title_main); + insert_metadata.bind(":title_sub", doc_matters.conf_make_meta.meta.title_subtitle); + insert_metadata.bind(":title_short", doc_matters.conf_make_meta.meta.title_short); // + insert_metadata.bind(":title_edition", doc_matters.conf_make_meta.meta.title_edition); + insert_metadata.bind(":title_language", doc_matters.conf_make_meta.meta.title_language); + // insert_metadata.bind(":title_language_char", doc_matters.conf_make_meta.meta.title_language_char); + insert_metadata.bind(":classify_dewey", doc_matters.conf_make_meta.meta.classify_dewey); + insert_metadata.bind(":classify_keywords", doc_matters.conf_make_meta.meta.classify_keywords); + insert_metadata.bind(":classify_loc", doc_matters.conf_make_meta.meta.classify_loc); + insert_metadata.bind(":classify_subject", doc_matters.conf_make_meta.meta.classify_subject); + insert_metadata.bind(":classify_topic_register", doc_matters.conf_make_meta.meta.classify_topic_register); + insert_metadata.bind(":creator_author", doc_matters.conf_make_meta.meta.creator_author); + insert_metadata.bind(":creator_author_email", doc_matters.conf_make_meta.meta.creator_author_email); + // insert_metadata.bind(":creator_editor", doc_matters.conf_make_meta.meta.creator_editor); + // insert_metadata.bind(":creator_contributor", doc_matters.conf_make_meta.meta.creator_contributor); + insert_metadata.bind(":creator_illustrator", doc_matters.conf_make_meta.meta.creator_illustrator); + // insert_metadata.bind(":creator_photographer", doc_matters.conf_make_meta.meta.creator_photographer); + insert_metadata.bind(":creator_translator", doc_matters.conf_make_meta.meta.creator_translator); + // insert_metadata.bind(":creator_audio", doc_matters.conf_make_meta.meta.creator_audio); + // insert_metadata.bind(":creator_video", doc_matters.conf_make_meta.meta.creator_video); + insert_metadata.bind(":date_added_to_site", doc_matters.conf_make_meta.meta.date_added_to_site); + insert_metadata.bind(":date_available", doc_matters.conf_make_meta.meta.date_available); + insert_metadata.bind(":date_created", doc_matters.conf_make_meta.meta.date_created); + insert_metadata.bind(":date_issued", doc_matters.conf_make_meta.meta.date_issued); + insert_metadata.bind(":date_modified", doc_matters.conf_make_meta.meta.date_modified); + insert_metadata.bind(":date_published", doc_matters.conf_make_meta.meta.date_published); + insert_metadata.bind(":date_valid", doc_matters.conf_make_meta.meta.date_valid); + // insert_metadata.bind(":date_translated", doc_matters.conf_make_meta.meta.date_translated); + // insert_metadata.bind(":date_original_publication", doc_matters.conf_make_meta.meta.date_original_publication); + // insert_metadata.bind(":date_generated", doc_matters.conf_make_meta.meta.date_generated); + insert_metadata.bind(":identifier_isbn", doc_matters.conf_make_meta.meta.identifier_isbn); + insert_metadata.bind(":identifier_oclc", doc_matters.conf_make_meta.meta.identifier_oclc); + insert_metadata.bind(":language_document", doc_matters.conf_make_meta.meta.language_document); + insert_metadata.bind(":language_document_char", doc_matters.conf_make_meta.meta.language_document_char); + // insert_metadata.bind(":language_original", doc_matters.conf_make_meta.meta.language_original); + // insert_metadata.bind(":language_original_char", doc_matters.conf_make_meta.meta.language_original_char); + insert_metadata.bind(":notes_abstract", doc_matters.conf_make_meta.meta.notes_abstract); + insert_metadata.bind(":notes_description", doc_matters.conf_make_meta.meta.notes_description); + insert_metadata.bind(":original_publisher", doc_matters.conf_make_meta.meta.original_publisher); + insert_metadata.bind(":original_language", doc_matters.conf_make_meta.meta.original_language); + insert_metadata.bind(":original_language_char", doc_matters.conf_make_meta.meta.original_language_char); + insert_metadata.bind(":original_source", doc_matters.conf_make_meta.meta.original_source); + insert_metadata.bind(":original_title", doc_matters.conf_make_meta.meta.original_title); + insert_metadata.bind(":publisher", doc_matters.conf_make_meta.meta.publisher); + // insert_metadata.bind(":rights", doc_matters.conf_make_meta.meta.rights); + insert_metadata.bind(":rights_copyright", doc_matters.conf_make_meta.meta.rights_copyright); + insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_copyright_audio); + insert_metadata.bind(":rights_copyright_cover", doc_matters.conf_make_meta.meta.rights_copyright_cover); + insert_metadata.bind(":rights_copyright_illustrations", doc_matters.conf_make_meta.meta.rights_copyright_illustrations); + insert_metadata.bind(":rights_copyright_photographs", doc_matters.conf_make_meta.meta.rights_copyright_photographs); + // insert_metadata.bind(":rights_copyright_preparation", doc_matters.conf_make_meta.meta.rights_preparation); + insert_metadata.bind(":rights_copyright_text", doc_matters.conf_make_meta.meta.rights_copyright_text); + insert_metadata.bind(":rights_copyright_translation", doc_matters.conf_make_meta.meta.rights_copyright_translation); + insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_copyright_video); + // insert_metadata.bind(":rights_copyright_digitization", doc_matters.conf_make_meta.meta.rights_digitization); + // insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_audio); + // insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_video); + insert_metadata.bind(":rights_license", doc_matters.conf_make_meta.meta.rights_license); + // insert_metadata.bind(":src_filename", doc_matters.conf_make_meta.meta.src_filename); + // insert_metadata.bind(":src_fingerprint", doc_matters.conf_make_meta.meta.src_fingerprint); + // insert_metadata.bind(":src_filesize", doc_matters.conf_make_meta.meta.src_filesize); + // insert_metadata.bind(":src_wordcount", doc_matters.conf_make_meta.meta.src_wordcount); + // insert_metadata.bind(":src_text", doc_matters.conf_make_meta.meta.src_text); + // insert_metadata.bind(":fulltext", doc_matters.conf_make_meta.meta.fulltext); + // insert_metadata.bind(":links", doc_matters.conf_make_meta.meta.links); + insert_metadata.execute(); insert_metadata.reset(); + /+ watch +/ + writeln("sql statement executed"); + assert(db.totalChanges == 1); + // + Statement insert_doc_objects = db.prepare(" + INSERT INTO doc_objects ( + lid, + metadata_tid, + ocn, + ocnd, + ocns, + clean, + body, + book_idx, + seg, + lev_an, + lev, + lev0, + lev1, + lev2, + lev3, + lev4, + lev5, + lev6, + lev7, + en_a, + en_z, + en_a_asterisk, + en_z_asterisk, + en_a_plus, + en_z_plus, + t_of, + t_is, + node, + parent, + digest_clean, + digest_all, + types + ) + VALUES ( + :lid, + :metadata_tid, + :ocn, + :ocnd, + :ocns, + :clean, + :body, + :book_idx, + :seg, + :lev_an, + :lev, + :lev0, + :lev1, + :lev2, + :lev3, + :lev4, + :lev5, + :lev6, + :lev7, + :en_a, + :en_z, + :en_a_asterisk, + :en_z_asterisk, + :en_a_plus, + :en_z_plus, + :t_of, + :t_is, + :node, + :parent, + :digest_clean, + :digest_all, + :types + ) + "); + return insert_doc_objects; + } + } + template SQLiteObjectsLoop() { + void SQLiteObjectsLoop(P)( + auto ref P doc_parts, + ) { + Statement insert_doc_objects = SQLiteInstruct!()(doc_matters); + auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters); + string[string] obj_txt; + string doc_text; + foreach (part; doc_parts) { + foreach (obj; doc_abstraction[part]) { + switch (obj.of_part) { + case "frontmatter": assert(part == "head"); + switch (obj.is_of) { + case "para": + switch (obj.is_a) { + case "heading": + obj_txt = format_and_sqlite_load.heading(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_of); + } + break; + } + break; + case "body": // assert(part == "body"); // TODO broken + switch (obj.is_of) { + case "para": + switch (obj.is_a) { + case "heading": + debug (asserts) { // TODO consider and fix or remove + if (part != "body") { + writeln(__LINE__, ": ", obj.text); + } + } + obj_txt = format_and_sqlite_load.heading(obj); + break; + case "para": + obj_txt = format_and_sqlite_load.para(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + case "block": + switch (obj.is_a) { + case "quote": + obj_txt = format_and_sqlite_load.quote(obj); + break; + case "group": + obj_txt = format_and_sqlite_load.group(obj); + break; + case "block": + obj_txt = format_and_sqlite_load.block(obj); + break; + case "poem": // double check on keeping both poem & verse + break; + case "verse": + obj_txt = format_and_sqlite_load.verse(obj); + break; + case "code": + obj_txt = format_and_sqlite_load.code(obj); + break; + case "table": + obj_txt = format_and_sqlite_load.table(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_of); + } + break; + } + break; + case "backmatter": + assert(part == "endnotes" || "glossary" || "bibliography" || "bookindex_scroll" || "blurb" || "tail"); + switch (obj.is_of) { + case "para": + switch (obj.is_a) { + case "heading": + obj_txt = format_and_sqlite_load.heading(obj); + break; + case "endnote": assert(part == "endnotes"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "glossary": assert(part == "glossary"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "bibliography": assert(part == "bibliography"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "bookindex": assert(part == "bookindex_scroll"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "blurb": assert(part == "blurb"); + obj_txt = format_and_sqlite_load.para(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_of); + } + break; + } + break; + case "comment": + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.of_part); // check where empty value could come from + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + writeln(__FILE__, ":", __LINE__, ": ", obj.text); // check where empty value could come from + } + break; + } + if (obj.is_a == "heading") { + writeln( + "markup: ", obj.heading_lev_markup, + "> ", obj.dom_markedup, + "; collapsed: ", obj.heading_lev_collapsed, + "> ", obj.dom_collapsed, + "; ocn: ", obj.ocn, + " node: ", obj.node, + "; parent: ", obj.parent_lev_markup, + "; ocn: ", obj.parent_ocn, + "; ", + ); + } + insert_doc_objects.bind(":t_of", obj.is_of); + insert_doc_objects.bind(":t_is", obj.is_a); + insert_doc_objects.bind(":ocn", obj.ocn); + insert_doc_objects.bind(":clean", obj_txt["text"]); // consider whether book index info should be made available within clear text for search + insert_doc_objects.bind(":body", obj_txt["html"]); + // insert_doc_objects.bind(":book_idx", ""); // not needed, but, consider whether should be made available within object for clear text search + insert_doc_objects.bind(":lev", obj.heading_lev_markup); + // // insert_doc_objects.bind(":dom_markedup", ""); // should make lev sequence below obsolete + // // insert_doc_objects.bind(":dom_collapsed", ""); // should add info + // insert_doc_objects.bind(":lev0", ""); + // insert_doc_objects.bind(":lev1", ""); + // insert_doc_objects.bind(":lev2", ""); + // insert_doc_objects.bind(":lev3", ""); + // insert_doc_objects.bind(":lev4", ""); + // insert_doc_objects.bind(":lev5", ""); + // insert_doc_objects.bind(":lev6", ""); + // insert_doc_objects.bind(":lev7", ""); + // insert_doc_objects.bind(":node", ""); + // insert_doc_objects.bind(":type", ""); + // insert_doc_objects.bind(":parent_ocn", ""); + // insert_doc_objects.bind(":ancestors", ""); + // insert_doc_objects.bind(":heading_lev_markup", ""); + // insert_doc_objects.bind(":heading_lev_collapsed", ""); + // insert_doc_objects.bind(":parent_lev_markup", ""); + // insert_doc_objects.bind(":heading_ancestors", ""); + // insert_doc_objects.bind(":node", ""); + insert_doc_objects.execute(); insert_doc_objects.reset(); + } + } + } + } + SQLiteObjectsLoop!()(doc_matters.keys_seq.sql); + } +} +template SQLiteDiscreteTablesCreate() { + void SQLiteDiscreteTablesCreate()() { + writeln("table create"); + } +} +template SQLiteDiscreteTablesDrop() { + void SQLiteDiscreteTablesDrop()() { + writeln("table drop"); + } +} diff --git a/src/sdp/sdp.d b/src/sdp/sdp.d index c3b307c..00afb54 100755 --- a/src/sdp/sdp.d +++ b/src/sdp/sdp.d @@ -77,7 +77,8 @@ void main(string[] args) { "qrcode" : false, "sisupod" : false, "source" : false, - "sqlite" : false, + "sqlite-discrete" : false, + "sqlite-update" : false, "sqlite-create" : false, "sqlite-drop" : false, "text" : false, @@ -116,9 +117,10 @@ void main(string[] args) { "qrcode", "--qrcode with document metadata", &opts["qrcode"], "sisupod", "--sisupod sisupod source content bundled", &opts["sisupod"], "source", "--source markup source text content", &opts["source"], + "sqlite-discrete", "--sqlite process discrete sqlite output", &opts["sqlite-discrete"], "sqlite-create", "--sqlite-create create db, create tables", &opts["sqlite-create"], "sqlite-drop", "--sqlite-drop drop tables & db", &opts["sqlite-drop"], - "sqlite", "--sqlite process sqlite output", &opts["sqlite"], + "sqlite-update", "--sqlite process sqlite output", &opts["sqlite-update"], "text", "--text process text output", &opts["text"], "txt", "--txt process text output", &opts["text"], "verbose|v", "--verbose output to terminal", &opts["verbose"], -- cgit v1.2.3