aboutsummaryrefslogtreecommitdiffhomepage
path: root/org/output_sqlite.org
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2019-07-08 17:41:06 -0400
committerRalph Amissah <ralph.amissah@gmail.com>2019-10-17 19:07:20 -0400
commit1319336d6f8ce2de9a1a6319917bee6be278ad5a (patch)
tree132773a7a8bb44e9d456347f4d7de0c4ef2611d7 /org/output_sqlite.org
parentharvests reorganized; adjustments: output, paths (diff)
0.7.4 sqlite related, also some xml paths
Diffstat (limited to 'org/output_sqlite.org')
-rw-r--r--org/output_sqlite.org379
1 files changed, 218 insertions, 161 deletions
diff --git a/org/output_sqlite.org b/org/output_sqlite.org
index 2dc29f7..fc2603d 100644
--- a/org/output_sqlite.org
+++ b/org/output_sqlite.org
@@ -29,7 +29,9 @@ module doc_reform.output.sqlite;
import d2sqlite3;
import std.typecons : Nullable;
mixin DocReformOutputRgxInit;
+mixin InternalMarkup;
static auto rgx = Rgx();
+static auto mkup = InlineMarkup();
long _metadata_tid_lastrowid;
template SQLiteHubBuildTablesAndPopulate() {
void SQLiteHubBuildTablesAndPopulate(D,M)(
@@ -64,6 +66,7 @@ template SQLiteHubDiscreteBuildTablesAndPopulate() {
const D doc_abstraction,
M doc_matters,
) {
+ auto url_html = DocReformUrlsHTML!()(doc_matters.conf_make_meta.conf.webserv_url_doc_root, doc_matters.src.language);
auto pth_sqlite = DocReformPathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language);
pth_sqlite.base.mkdirRecurse;
auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename));
@@ -163,6 +166,7 @@ template SQLiteTablesReCreate() {
<<sqlite_statement_drop_existing_index>>
<<sqlite_statement_drop_existing_tables>>
<<sqlite_statement_create_table_metadata_and_src_txt>>
+ <<sqlite_statement_create_table_topic_register>>
<<sqlite_statement_create_table_objects>>
<<sqlite_statement_create_index>>
┃",);
@@ -192,6 +196,7 @@ template SQLiteInsertMetadata() {
M doc_matters,
) {
<<sqlite_formatted_insertions_doc_matters_metadata>>
+ <<sqlite_formatted_insertions_topic_register>>
return _insert_metadata;
}
}
@@ -206,7 +211,7 @@ template SQLiteInsertDocObjectsLoop() {
M doc_matters,
) {
string _uid = SQLinsertDelimiter!()(doc_matters.src.doc_uid);
- auto site_url = DocReformPathsUrl!()(doc_matters);
+ auto url_html = DocReformUrlsHTML!()(doc_matters.conf_make_meta.conf.webserv_url_doc_root, doc_matters.src.language);
string insertDocObjectsRow(O)(O obj) {
<<sqlite_formatted_insertions_doc_objects>>
return _insert_doc_objects_row;
@@ -229,6 +234,7 @@ template SQLiteTablesCreate() {
<<sqlite_statement_drop_existing_index>>
<<sqlite_statement_drop_existing_tables>>
<<sqlite_statement_create_table_metadata_and_src_txt>>
+ <<sqlite_statement_create_table_topic_register>>
<<sqlite_statement_create_table_objects>>
<<sqlite_statement_create_index>>
┃",);
@@ -1457,6 +1463,7 @@ DROP INDEX IF EXISTS idx_classify_topic_register;
#+name: sqlite_statement_drop_existing_tables
#+BEGIN_SRC sql
DROP TABLE IF EXISTS metadata_and_text;
+DROP TABLE IF EXISTS topic_register;
DROP TABLE IF EXISTS doc_objects;
DROP TABLE IF EXISTS urls;
#+END_SRC
@@ -1467,15 +1474,14 @@ DROP TABLE IF EXISTS urls;
#+name: sqlite_statement_create_table_metadata_and_src_txt
#+BEGIN_SRC sql
CREATE TABLE metadata_and_text (
- uid VARCHAR(256) UNIQUE, /* filename, language char, pod/txt (decide on delimiter [,;:/]) */
- src_composite_id_per_txt VARCHAR(256) NOT NULL, /* UNIQUE, /* z pod name if any + src filename + language code */
- src_composite_id_per_pod VARCHAR(256) NOT NULL, /* z pod name if any + src filename */
+ uid VARCHAR(256) UNIQUE, -- filename, language char, pod/txt (decide on delimiter [,;:/])
+ src_composite_id_per_txt VARCHAR(256) NOT NULL, -- UNIQUE, z pod name if any + src filename + language code
+ src_composite_id_per_pod VARCHAR(256) NOT NULL, -- z pod name if any + src filename
title VARCHAR(800) NOT NULL,
title_main VARCHAR(400) NOT NULL,
title_sub VARCHAR(400) NULL,
title_short VARCHAR(400) NULL,
title_edition VARCHAR(10) NULL,
- title_note VARCHAR(2500) NULL,
title_language VARCHAR(100) NULL,
title_language_char VARCHAR(6) NULL,
creator_author VARCHAR(600) NOT NULL,
@@ -1505,14 +1511,13 @@ CREATE TABLE metadata_and_text (
date_translated VARCHAR(10) NULL,
date_original_publication VARCHAR(10) NULL,
date_generated VARCHAR(10) NULL,
- publisher VARCHAR(600) NULL,
+ original_title VARCHAR(800) NULL,
original_publisher VARCHAR(600) NULL,
original_language VARCHAR(100) NULL,
original_language_char VARCHAR(6) NULL,
original_source VARCHAR(600) NULL,
original_institution VARCHAR(600) NULL,
original_nationality VARCHAR(100) NULL,
- original_title VARCHAR(800) NULL,
rights_copyright VARCHAR(2500) NULL,
rights_copyright_audio VARCHAR(2500) NULL,
rights_copyright_cover VARCHAR(2500) NULL,
@@ -1541,18 +1546,58 @@ CREATE TABLE metadata_and_text (
notes_prefix_a TEXT NULL,
notes_prefix_b TEXT NULL,
notes_suffix TEXT NULL,
+ publisher VARCHAR(600) NULL,
src_filename VARCHAR(256) NOT NULL,
src_fingerprint VARCHAR(256) NULL,
src_filesize VARCHAR(10) NULL,
src_wordcount VARCHAR(10) NULL,
- pod_name VARCHAR(256) NULL, /* zipped pod, work to be done here */
- pod_fingerprint VARCHAR(256) NULL, /* zipped pod, work to be done here */
- pod_size VARCHAR(10) NULL, /* zipped pod, work to be done here */
- site_url_doc_root VARCHAR(256) NULL, /* url path to doc root */
+ pod_name VARCHAR(256) NULL, -- zipped pod, work to be done here
+ pod_fingerprint VARCHAR(256) NULL, -- zipped pod, work to be done here
+ pod_size VARCHAR(10) NULL, -- zipped pod, work to be done here
+ site_url_doc_root VARCHAR(256) NULL, -- url path to doc root
+ site_url_html_toc VARCHAR(256) NULL,
+ site_url_html_scroll VARCHAR(256) NULL,
+ site_url_epub VARCHAR(256) NULL,
links TEXT NULL
);
#+END_SRC
+**** CREATE TABLE topic_register
+
+#+name: sqlite_statement_create_table_topic_register
+#+BEGIN_SRC sql
+CREATE TABLE topic_register (
+ -- tid BIGINT PRIMARY KEY,
+ uid_metadata_and_text VARCHAR(256) REFERENCES metadata_and_text(uid) ON DELETE CASCADE,
+ -- src_composite_id_per_txt VARCHAR(256) NOT NULL, - UNIQUE, - z pod name if any + src filename + language code
+ -- src_composite_id_per_pod VARCHAR(256) NOT NULL, - z pod name if any + src filename
+ topic_register_lv0 VARCHAR(250) NOT NULL,
+ topic_register_lv1 VARCHAR(250) NULL,
+ topic_register_lv2 VARCHAR(250) NULL,
+ topic_register_lv3 VARCHAR(250) NULL,
+ topic_register_lv4 VARCHAR(250) NULL,
+ site_url_doc_root VARCHAR(256) NULL, -- url path to doc root
+ site_url_html_toc VARCHAR(256) NULL,
+ site_url_html_scroll VARCHAR(256) NULL
+);
+#+END_SRC
+
+
+**** CREATE TABLE site_urls ?
+
+#+name: sqlite_statement_create_table_site_urls
+#+BEGIN_SRC sql
+CREATE TABLE site_urls (
+ -- tid BIGINT PRIMARY KEY,
+ uid_metadata_and_text VARCHAR(256) REFERENCES metadata_and_text(uid) ON DELETE CASCADE,
+ src_composite_id_per_txt VARCHAR(256) NOT NULL, -- UNIQUE, - z pod name if any + src filename + language code
+ src_composite_id_per_pod VARCHAR(256) NOT NULL, -- z pod name if any + src filename
+ site_url_doc_root VARCHAR(256) NULL, -- url path to doc root
+ site_url_html_toc VARCHAR(256) NULL,
+ site_url_html_scroll VARCHAR(256) NULL
+);
+#+END_SRC
+
**** CREATE TABLE doc_objects
#+name: sqlite_statement_create_table_objects
@@ -1571,7 +1616,7 @@ CREATE TABLE doc_objects (
lev SMALLINT NULL,
node VARCHAR(16) NULL,
parent VARCHAR(16) NULL,
- last_decendant VARCHAR(16) NULL, /* headings only */
+ last_decendant VARCHAR(16) NULL, -- headings only
digest_clean CHAR(256),
digest_all CHAR(256),
html_seg_url CHAR(256),
@@ -1628,73 +1673,6 @@ WHERE uid_metadata_and_text = '%s';
#+END_SRC
*** inserts
-**** INSERT doc objects
-
-lid unique, increment by 1 per object, not ocn
-
-metadata tid document number unique
-either:
-- increment by adding 1 for each document,
-- make hash of document filename or url and use?
-
-***** sql statement: dlang format
-
-#+name: sqlite_formatted_insertions_doc_objects
-#+BEGIN_SRC d
-string _insert_doc_objects_row = format(q"┃
-#+END_SRC
-
-***** INSERT INTO
-
-#+name: sqlite_formatted_insertions_doc_objects
-#+BEGIN_SRC sql
- INSERT INTO doc_objects (
- uid_metadata_and_text,
- ocn,
- obj_id,
- clean,
- body,
- lev,
- is_of_type,
- is_a,
- html_seg_url
- )
-#+END_SRC
-
-***** VALUES
-
-#+name: sqlite_formatted_insertions_doc_objects
-#+BEGIN_SRC sql
- VALUES (
- '%s',
- %s,
- '%s',
- '%s',
- '%s',
- %s,
- '%s',
- '%s',
- '%s'
- );
-#+END_SRC
-
-***** dlang values for formatting
-
-#+name: sqlite_formatted_insertions_doc_objects
-#+BEGIN_SRC d
-┃",
- _uid,
- obj.metainfo.ocn,
- obj.metainfo.identifier,
- SQLinsertDelimiter!()(obj_txt["text"]),
- SQLinsertDelimiter!()(obj_txt["html"]),
- obj.metainfo.heading_lev_markup,
- obj.metainfo.is_of_type,
- obj.metainfo.is_a,
- site_url.html_seg(obj.tags.html_segment_anchor_tag_is, obj.metainfo.identifier),
-);
-#+END_SRC
-
**** INSERT doc matters & metadata
***** sql statement: dlang format
@@ -1719,15 +1697,12 @@ string _insert_metadata = format(q"┃
title_short,
title_edition,
title_language,
- classify_dewey,
- classify_keywords,
- classify_loc,
- classify_subject,
- classify_topic_register,
creator_author,
creator_author_email,
creator_illustrator,
creator_translator,
+ language_document,
+ language_document_char,
date_added_to_site,
date_available,
date_created,
@@ -1735,18 +1710,6 @@ string _insert_metadata = format(q"┃
date_modified,
date_published,
date_valid,
- identifier_isbn,
- identifier_oclc,
- language_document,
- language_document_char,
- notes_abstract,
- notes_description,
- original_publisher,
- original_language,
- original_language_char,
- original_source,
- original_title,
- publisher,
rights_copyright,
rights_copyright_audio,
rights_copyright_cover,
@@ -1756,6 +1719,21 @@ string _insert_metadata = format(q"┃
rights_copyright_translation,
rights_copyright_video,
rights_license,
+ identifier_oclc,
+ identifier_isbn,
+ classify_dewey,
+ classify_keywords,
+ classify_loc,
+ classify_subject,
+ classify_topic_register,
+ original_title,
+ original_publisher,
+ original_language,
+ original_language_char,
+ original_source,
+ notes_abstract,
+ notes_description,
+ publisher,
site_url_doc_root
)
#+END_SRC
@@ -1765,54 +1743,7 @@ string _insert_metadata = format(q"┃
#+name: sqlite_formatted_insertions_doc_matters_metadata
#+BEGIN_SRC sql
VALUES (
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s',
- '%s'
+ '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'
);
#+END_SRC
@@ -1831,15 +1762,12 @@ string _insert_metadata = format(q"┃
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_short),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_edition),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_language),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_dewey),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_keywords),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_loc),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_subject),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_topic_register),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author_email),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_illustrator),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_translator),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document_char),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_added_to_site),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_available),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_created),
@@ -1847,18 +1775,6 @@ string _insert_metadata = format(q"┃
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_modified),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_published),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_valid),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_isbn),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_oclc),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document_char),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_abstract),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_description),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_publisher),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language_char),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_source),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_title),
- SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.publisher),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_audio),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_cover),
@@ -1868,8 +1784,149 @@ string _insert_metadata = format(q"┃
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_translation),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_video),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_license),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_oclc),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_isbn),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_dewey),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_keywords),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_loc),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_subject),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_topic_register),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_abstract),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_description),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_title),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_publisher),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language_char),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_source),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.publisher),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.conf.webserv_url_doc_root)
);
#+END_SRC
+**** INSERT topic register
+
+writeln(doc_matters.conf_make_meta.meta.classify_topic_register_arr);
+
+***** { if topic register then loop topic register array
+
+#+name: sqlite_formatted_insertions_topic_register
+#+BEGIN_SRC d
+if (doc_matters.conf_make_meta.meta.classify_topic_register_arr.length > 0) {
+
+ string _insert_topics;
+ foreach (topic; doc_matters.conf_make_meta.meta.classify_topic_register_arr) {
+ string[] subject_tree = topic.split(mkup.sep);
+#+END_SRC
+
+***** sql statement: dlang format
+
+#+name: sqlite_formatted_insertions_topic_register
+#+BEGIN_SRC d
+_insert_topics ~= format(q"┃
+#+END_SRC
+
+***** INSERT INTO
+
+#+name: sqlite_formatted_insertions_topic_register
+#+BEGIN_SRC sql
+ INSERT INTO topic_register (
+ uid_metadata_and_text,
+ topic_register_lv0,
+ topic_register_lv1,
+ topic_register_lv2,
+ topic_register_lv3,
+ topic_register_lv4
+ )
+#+END_SRC
+
+***** VALUES
+
+#+name: sqlite_formatted_insertions_topic_register
+#+BEGIN_SRC sql
+ VALUES (
+ '%s', '%s', '%s', '%s', '%s', '%s'
+ );
+#+END_SRC
+
+***** dlang values for formatting
+
+#+name: sqlite_formatted_insertions_topic_register
+#+BEGIN_SRC d
+┃",
+ _uid,
+ (subject_tree.length > 0) ? subject_tree[0] : "",
+ (subject_tree.length > 1) ? subject_tree[1] : "",
+ (subject_tree.length > 2) ? subject_tree[2] : "",
+ (subject_tree.length > 3) ? subject_tree[3] : "",
+ (subject_tree.length > 4) ? subject_tree[4] : ""
+);
+#+END_SRC
+
+***** } close topic register & loop topic register array
+
+#+name: sqlite_formatted_insertions_topic_register
+#+BEGIN_SRC d
+ }
+}
+#+END_SRC
+
+**** INSERT doc objects
+
+lid unique, increment by 1 per object, not ocn
+
+metadata tid document number unique
+either:
+- increment by adding 1 for each document,
+- make hash of document filename or url and use?
+
+***** sql statement: dlang format
+
+#+name: sqlite_formatted_insertions_doc_objects
+#+BEGIN_SRC d
+string _insert_doc_objects_row = format(q"┃
+#+END_SRC
+
+***** INSERT INTO
+
+#+name: sqlite_formatted_insertions_doc_objects
+#+BEGIN_SRC sql
+ INSERT INTO doc_objects (
+ uid_metadata_and_text,
+ ocn,
+ obj_id,
+ clean,
+ body,
+ lev,
+ is_of_type,
+ is_a,
+ html_seg_url
+ )
+#+END_SRC
+
+***** VALUES
+
+#+name: sqlite_formatted_insertions_doc_objects
+#+BEGIN_SRC sql
+ VALUES (
+ '%s', %s, '%s', '%s', '%s', %s, '%s', '%s', '%s'
+ );
+#+END_SRC
+
+***** dlang values for formatting
+
+#+name: sqlite_formatted_insertions_doc_objects
+#+BEGIN_SRC d
+┃",
+ _uid,
+ obj.metainfo.ocn,
+ obj.metainfo.identifier,
+ SQLinsertDelimiter!()(obj_txt["text"]),
+ SQLinsertDelimiter!()(obj_txt["html"]),
+ obj.metainfo.heading_lev_markup,
+ obj.metainfo.is_of_type,
+ obj.metainfo.is_a,
+ url_html.fn_seg_obj_num(doc_matters.src.filename, obj.tags.html_segment_anchor_tag_is, obj.metainfo.identifier),
+);
+#+END_SRC
+
* __END__