From 1319336d6f8ce2de9a1a6319917bee6be278ad5a Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 8 Jul 2019 17:41:06 -0400 Subject: 0.7.4 sqlite related, also some xml paths --- org/output_sqlite.org | 379 +++++++++++++++++++++++++++++--------------------- 1 file changed, 218 insertions(+), 161 deletions(-) (limited to 'org/output_sqlite.org') diff --git a/org/output_sqlite.org b/org/output_sqlite.org index 2dc29f7..fc2603d 100644 --- a/org/output_sqlite.org +++ b/org/output_sqlite.org @@ -29,7 +29,9 @@ module doc_reform.output.sqlite; import d2sqlite3; import std.typecons : Nullable; mixin DocReformOutputRgxInit; +mixin InternalMarkup; static auto rgx = Rgx(); +static auto mkup = InlineMarkup(); long _metadata_tid_lastrowid; template SQLiteHubBuildTablesAndPopulate() { void SQLiteHubBuildTablesAndPopulate(D,M)( @@ -64,6 +66,7 @@ template SQLiteHubDiscreteBuildTablesAndPopulate() { const D doc_abstraction, M doc_matters, ) { + auto url_html = DocReformUrlsHTML!()(doc_matters.conf_make_meta.conf.webserv_url_doc_root, doc_matters.src.language); auto pth_sqlite = DocReformPathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language); pth_sqlite.base.mkdirRecurse; auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename)); @@ -163,6 +166,7 @@ template SQLiteTablesReCreate() { <> <> <> + <> <> <> ┃",); @@ -192,6 +196,7 @@ template SQLiteInsertMetadata() { M doc_matters, ) { <> + <> return _insert_metadata; } } @@ -206,7 +211,7 @@ template SQLiteInsertDocObjectsLoop() { M doc_matters, ) { string _uid = SQLinsertDelimiter!()(doc_matters.src.doc_uid); - auto site_url = DocReformPathsUrl!()(doc_matters); + auto url_html = DocReformUrlsHTML!()(doc_matters.conf_make_meta.conf.webserv_url_doc_root, doc_matters.src.language); string insertDocObjectsRow(O)(O obj) { <> return _insert_doc_objects_row; @@ -229,6 +234,7 @@ template SQLiteTablesCreate() { <> <> <> + <> <> <> ┃",); @@ -1457,6 +1463,7 @@ DROP INDEX IF EXISTS idx_classify_topic_register; #+name: sqlite_statement_drop_existing_tables #+BEGIN_SRC sql DROP TABLE IF EXISTS metadata_and_text; +DROP TABLE IF EXISTS topic_register; DROP TABLE IF EXISTS doc_objects; DROP TABLE IF EXISTS urls; #+END_SRC @@ -1467,15 +1474,14 @@ DROP TABLE IF EXISTS urls; #+name: sqlite_statement_create_table_metadata_and_src_txt #+BEGIN_SRC sql CREATE TABLE metadata_and_text ( - uid VARCHAR(256) UNIQUE, /* filename, language char, pod/txt (decide on delimiter [,;:/]) */ - src_composite_id_per_txt VARCHAR(256) NOT NULL, /* UNIQUE, /* z pod name if any + src filename + language code */ - src_composite_id_per_pod VARCHAR(256) NOT NULL, /* z pod name if any + src filename */ + uid VARCHAR(256) UNIQUE, -- filename, language char, pod/txt (decide on delimiter [,;:/]) + src_composite_id_per_txt VARCHAR(256) NOT NULL, -- UNIQUE, z pod name if any + src filename + language code + src_composite_id_per_pod VARCHAR(256) NOT NULL, -- z pod name if any + src filename title VARCHAR(800) NOT NULL, title_main VARCHAR(400) NOT NULL, title_sub VARCHAR(400) NULL, title_short VARCHAR(400) NULL, title_edition VARCHAR(10) NULL, - title_note VARCHAR(2500) NULL, title_language VARCHAR(100) NULL, title_language_char VARCHAR(6) NULL, creator_author VARCHAR(600) NOT NULL, @@ -1505,14 +1511,13 @@ CREATE TABLE metadata_and_text ( date_translated VARCHAR(10) NULL, date_original_publication VARCHAR(10) NULL, date_generated VARCHAR(10) NULL, - publisher VARCHAR(600) NULL, + original_title VARCHAR(800) NULL, original_publisher VARCHAR(600) NULL, original_language VARCHAR(100) NULL, original_language_char VARCHAR(6) NULL, original_source VARCHAR(600) NULL, original_institution VARCHAR(600) NULL, original_nationality VARCHAR(100) NULL, - original_title VARCHAR(800) NULL, rights_copyright VARCHAR(2500) NULL, rights_copyright_audio VARCHAR(2500) NULL, rights_copyright_cover VARCHAR(2500) NULL, @@ -1541,18 +1546,58 @@ CREATE TABLE metadata_and_text ( notes_prefix_a TEXT NULL, notes_prefix_b TEXT NULL, notes_suffix TEXT NULL, + publisher VARCHAR(600) NULL, src_filename VARCHAR(256) NOT NULL, src_fingerprint VARCHAR(256) NULL, src_filesize VARCHAR(10) NULL, src_wordcount VARCHAR(10) NULL, - pod_name VARCHAR(256) NULL, /* zipped pod, work to be done here */ - pod_fingerprint VARCHAR(256) NULL, /* zipped pod, work to be done here */ - pod_size VARCHAR(10) NULL, /* zipped pod, work to be done here */ - site_url_doc_root VARCHAR(256) NULL, /* url path to doc root */ + pod_name VARCHAR(256) NULL, -- zipped pod, work to be done here + pod_fingerprint VARCHAR(256) NULL, -- zipped pod, work to be done here + pod_size VARCHAR(10) NULL, -- zipped pod, work to be done here + site_url_doc_root VARCHAR(256) NULL, -- url path to doc root + site_url_html_toc VARCHAR(256) NULL, + site_url_html_scroll VARCHAR(256) NULL, + site_url_epub VARCHAR(256) NULL, links TEXT NULL ); #+END_SRC +**** CREATE TABLE topic_register + +#+name: sqlite_statement_create_table_topic_register +#+BEGIN_SRC sql +CREATE TABLE topic_register ( + -- tid BIGINT PRIMARY KEY, + uid_metadata_and_text VARCHAR(256) REFERENCES metadata_and_text(uid) ON DELETE CASCADE, + -- src_composite_id_per_txt VARCHAR(256) NOT NULL, - UNIQUE, - z pod name if any + src filename + language code + -- src_composite_id_per_pod VARCHAR(256) NOT NULL, - z pod name if any + src filename + topic_register_lv0 VARCHAR(250) NOT NULL, + topic_register_lv1 VARCHAR(250) NULL, + topic_register_lv2 VARCHAR(250) NULL, + topic_register_lv3 VARCHAR(250) NULL, + topic_register_lv4 VARCHAR(250) NULL, + site_url_doc_root VARCHAR(256) NULL, -- url path to doc root + site_url_html_toc VARCHAR(256) NULL, + site_url_html_scroll VARCHAR(256) NULL +); +#+END_SRC + + +**** CREATE TABLE site_urls ? + +#+name: sqlite_statement_create_table_site_urls +#+BEGIN_SRC sql +CREATE TABLE site_urls ( + -- tid BIGINT PRIMARY KEY, + uid_metadata_and_text VARCHAR(256) REFERENCES metadata_and_text(uid) ON DELETE CASCADE, + src_composite_id_per_txt VARCHAR(256) NOT NULL, -- UNIQUE, - z pod name if any + src filename + language code + src_composite_id_per_pod VARCHAR(256) NOT NULL, -- z pod name if any + src filename + site_url_doc_root VARCHAR(256) NULL, -- url path to doc root + site_url_html_toc VARCHAR(256) NULL, + site_url_html_scroll VARCHAR(256) NULL +); +#+END_SRC + **** CREATE TABLE doc_objects #+name: sqlite_statement_create_table_objects @@ -1571,7 +1616,7 @@ CREATE TABLE doc_objects ( lev SMALLINT NULL, node VARCHAR(16) NULL, parent VARCHAR(16) NULL, - last_decendant VARCHAR(16) NULL, /* headings only */ + last_decendant VARCHAR(16) NULL, -- headings only digest_clean CHAR(256), digest_all CHAR(256), html_seg_url CHAR(256), @@ -1628,73 +1673,6 @@ WHERE uid_metadata_and_text = '%s'; #+END_SRC *** inserts -**** INSERT doc objects - -lid unique, increment by 1 per object, not ocn - -metadata tid document number unique -either: -- increment by adding 1 for each document, -- make hash of document filename or url and use? - -***** sql statement: dlang format - -#+name: sqlite_formatted_insertions_doc_objects -#+BEGIN_SRC d -string _insert_doc_objects_row = format(q"┃ -#+END_SRC - -***** INSERT INTO - -#+name: sqlite_formatted_insertions_doc_objects -#+BEGIN_SRC sql - INSERT INTO doc_objects ( - uid_metadata_and_text, - ocn, - obj_id, - clean, - body, - lev, - is_of_type, - is_a, - html_seg_url - ) -#+END_SRC - -***** VALUES - -#+name: sqlite_formatted_insertions_doc_objects -#+BEGIN_SRC sql - VALUES ( - '%s', - %s, - '%s', - '%s', - '%s', - %s, - '%s', - '%s', - '%s' - ); -#+END_SRC - -***** dlang values for formatting - -#+name: sqlite_formatted_insertions_doc_objects -#+BEGIN_SRC d -┃", - _uid, - obj.metainfo.ocn, - obj.metainfo.identifier, - SQLinsertDelimiter!()(obj_txt["text"]), - SQLinsertDelimiter!()(obj_txt["html"]), - obj.metainfo.heading_lev_markup, - obj.metainfo.is_of_type, - obj.metainfo.is_a, - site_url.html_seg(obj.tags.html_segment_anchor_tag_is, obj.metainfo.identifier), -); -#+END_SRC - **** INSERT doc matters & metadata ***** sql statement: dlang format @@ -1719,15 +1697,12 @@ string _insert_metadata = format(q"┃ title_short, title_edition, title_language, - classify_dewey, - classify_keywords, - classify_loc, - classify_subject, - classify_topic_register, creator_author, creator_author_email, creator_illustrator, creator_translator, + language_document, + language_document_char, date_added_to_site, date_available, date_created, @@ -1735,18 +1710,6 @@ string _insert_metadata = format(q"┃ date_modified, date_published, date_valid, - identifier_isbn, - identifier_oclc, - language_document, - language_document_char, - notes_abstract, - notes_description, - original_publisher, - original_language, - original_language_char, - original_source, - original_title, - publisher, rights_copyright, rights_copyright_audio, rights_copyright_cover, @@ -1756,6 +1719,21 @@ string _insert_metadata = format(q"┃ rights_copyright_translation, rights_copyright_video, rights_license, + identifier_oclc, + identifier_isbn, + classify_dewey, + classify_keywords, + classify_loc, + classify_subject, + classify_topic_register, + original_title, + original_publisher, + original_language, + original_language_char, + original_source, + notes_abstract, + notes_description, + publisher, site_url_doc_root ) #+END_SRC @@ -1765,54 +1743,7 @@ string _insert_metadata = format(q"┃ #+name: sqlite_formatted_insertions_doc_matters_metadata #+BEGIN_SRC sql VALUES ( - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s', - '%s' + '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s' ); #+END_SRC @@ -1831,15 +1762,12 @@ string _insert_metadata = format(q"┃ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_short), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_edition), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_language), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_dewey), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_keywords), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_loc), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_subject), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_topic_register), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author_email), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_illustrator), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_translator), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document_char), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_added_to_site), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_available), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_created), @@ -1847,18 +1775,6 @@ string _insert_metadata = format(q"┃ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_modified), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_published), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_valid), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_isbn), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_oclc), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document_char), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_abstract), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_description), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_publisher), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language_char), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_source), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_title), - SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.publisher), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_audio), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_cover), @@ -1868,8 +1784,149 @@ string _insert_metadata = format(q"┃ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_translation), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_video), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_license), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_oclc), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_isbn), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_dewey), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_keywords), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_loc), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_subject), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_topic_register), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_abstract), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_description), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_title), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_publisher), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language_char), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_source), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.publisher), SQLinsertDelimiter!()(doc_matters.conf_make_meta.conf.webserv_url_doc_root) ); #+END_SRC +**** INSERT topic register + +writeln(doc_matters.conf_make_meta.meta.classify_topic_register_arr); + +***** { if topic register then loop topic register array + +#+name: sqlite_formatted_insertions_topic_register +#+BEGIN_SRC d +if (doc_matters.conf_make_meta.meta.classify_topic_register_arr.length > 0) { + + string _insert_topics; + foreach (topic; doc_matters.conf_make_meta.meta.classify_topic_register_arr) { + string[] subject_tree = topic.split(mkup.sep); +#+END_SRC + +***** sql statement: dlang format + +#+name: sqlite_formatted_insertions_topic_register +#+BEGIN_SRC d +_insert_topics ~= format(q"┃ +#+END_SRC + +***** INSERT INTO + +#+name: sqlite_formatted_insertions_topic_register +#+BEGIN_SRC sql + INSERT INTO topic_register ( + uid_metadata_and_text, + topic_register_lv0, + topic_register_lv1, + topic_register_lv2, + topic_register_lv3, + topic_register_lv4 + ) +#+END_SRC + +***** VALUES + +#+name: sqlite_formatted_insertions_topic_register +#+BEGIN_SRC sql + VALUES ( + '%s', '%s', '%s', '%s', '%s', '%s' + ); +#+END_SRC + +***** dlang values for formatting + +#+name: sqlite_formatted_insertions_topic_register +#+BEGIN_SRC d +┃", + _uid, + (subject_tree.length > 0) ? subject_tree[0] : "", + (subject_tree.length > 1) ? subject_tree[1] : "", + (subject_tree.length > 2) ? subject_tree[2] : "", + (subject_tree.length > 3) ? subject_tree[3] : "", + (subject_tree.length > 4) ? subject_tree[4] : "" +); +#+END_SRC + +***** } close topic register & loop topic register array + +#+name: sqlite_formatted_insertions_topic_register +#+BEGIN_SRC d + } +} +#+END_SRC + +**** INSERT doc objects + +lid unique, increment by 1 per object, not ocn + +metadata tid document number unique +either: +- increment by adding 1 for each document, +- make hash of document filename or url and use? + +***** sql statement: dlang format + +#+name: sqlite_formatted_insertions_doc_objects +#+BEGIN_SRC d +string _insert_doc_objects_row = format(q"┃ +#+END_SRC + +***** INSERT INTO + +#+name: sqlite_formatted_insertions_doc_objects +#+BEGIN_SRC sql + INSERT INTO doc_objects ( + uid_metadata_and_text, + ocn, + obj_id, + clean, + body, + lev, + is_of_type, + is_a, + html_seg_url + ) +#+END_SRC + +***** VALUES + +#+name: sqlite_formatted_insertions_doc_objects +#+BEGIN_SRC sql + VALUES ( + '%s', %s, '%s', '%s', '%s', %s, '%s', '%s', '%s' + ); +#+END_SRC + +***** dlang values for formatting + +#+name: sqlite_formatted_insertions_doc_objects +#+BEGIN_SRC d +┃", + _uid, + obj.metainfo.ocn, + obj.metainfo.identifier, + SQLinsertDelimiter!()(obj_txt["text"]), + SQLinsertDelimiter!()(obj_txt["html"]), + obj.metainfo.heading_lev_markup, + obj.metainfo.is_of_type, + obj.metainfo.is_a, + url_html.fn_seg_obj_num(doc_matters.src.filename, obj.tags.html_segment_anchor_tag_is, obj.metainfo.identifier), +); +#+END_SRC + * __END__ -- cgit v1.2.3