-*- mode: org -*-
#+TITLE: sisudoc spine (doc_reform) output sqlite
#+DESCRIPTION: documents - structuring, publishing in multiple formats & search
#+FILETAGS: :spine:output:db:sql:sqlite:
#+AUTHOR: Ralph Amissah
#+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]]
#+COPYRIGHT: Copyright (C) 2015 - 2024 Ralph Amissah
#+LANGUAGE: en
#+STARTUP: content hideblocks hidestars noindent entitiespretty
#+PROPERTY: header-args :exports code
#+PROPERTY: header-args+ :noweb yes
#+PROPERTY: header-args+ :results no
#+PROPERTY: header-args+ :cache no
#+PROPERTY: header-args+ :padline no
#+PROPERTY: header-args+ :mkdirp yes
#+OPTIONS: H:3 num:nil toc:t \n:t ::t |:t ^:nil -:t f:t *:t
- [[./doc-reform.org][doc-reform.org]] [[./][org/]]
- [[./output_hub.org][output_hub]]
sudo chown ralph:ralph /var/www
- create db
~dr/bin/spine-ldc -v \
--sqlite-db-create --sqlite-db-filename="spine.search.db" \
--output=/var/www/html \
~grotto/repo/git.repo/code/project-spine/doc-reform-markup/markup_samples/markup/pod/*
- update db
~dr/bin/spine-ldc -v \
--sqlite-update --sqlite-db-filename="spine.search.db" \
--output=/var/www/html \
~grotto/repo/git.repo/code/project-spine/doc-reform-markup/markup_samples/markup/pod/*
- produce html (and some other) output files
~dr/bin/spine-ldc -v --html --epub --latex --odt \
--output=/var/www \
~grotto/repo/git.repo/code/project-spine/doc-reform-markup/markup_samples/markup/pod/*
* sql
** _module, templates_ :module:
*** template
#+HEADER: :tangle "../src/sisudoc/io_out/sqlite.d"
#+HEADER: :noweb yes
#+BEGIN_SRC d
<>
module sisudoc.io_out.sqlite;
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
#+END_SRC
*** hub
**** common
#+NAME: sqlite_common
#+BEGIN_SRC d
mixin spineRgxOut;
mixin spineRgxXHTML;
mixin InternalMarkup;
static auto rgx = RgxO();
static auto rgx_xhtml = RgxXHTML();
static auto mkup = InlineMarkup();
long _metadata_tid_lastrowid;
#+END_SRC
**** collection
#+NAME: sqlite_collection
#+HEADER: :noweb yes
#+BEGIN_SRC d
template SQLiteHubBuildTablesAndPopulate() {
void SQLiteHubBuildTablesAndPopulate(D,M)(
const D doc_abstraction,
M doc_matters,
) {
auto pth_sqlite = spinePathsSQLite!()(doc_matters.sqlite.filename, doc_matters.sqlite.path);
if ((isValidPath(pth_sqlite.base) && exists(pth_sqlite.base) != 0 && pth_sqlite.base.isDir)) {
} else {
try {
pth_sqlite.base.mkdirRecurse;
} catch (FileException ex) { }
}
template SQLiteDbStatementComposite() {
void SQLiteDbStatementComposite(Db,D,M)(
Db db,
const D doc_abstraction,
M doc_matters,
) {
<>
if (doc_matters.opt.action.vox_gt0) {
writeln(" ", pth_sqlite.sqlite_file);
}
}
}
try {
auto db = Database(pth_sqlite.sqlite_file);
SQLiteDbStatementComposite!()(db, doc_abstraction, doc_matters);
}
catch (FileException e) {
writeln("Failed (FileException): ", e.msg, " ", pth_sqlite.sqlite_file);
writeln(e.file, " line: ", e.line);
import core.runtime;
core.runtime.Runtime.terminate();
}
catch (ErrnoException e) {
writeln("Failed (ErrnoException): ", e.msg, " ", pth_sqlite.sqlite_file);
writeln(e.file, " line: ", e.line);
import core.runtime;
core.runtime.Runtime.terminate();
}
catch (Exception e) {
writeln("Failed (Exception): ", e.msg, " ", pth_sqlite.sqlite_file);
writeln(e.file, " line: ", e.line);
import core.runtime;
core.runtime.Runtime.terminate();
}
catch (Throwable) {
writeln("Failed (Trowable): ", pth_sqlite.sqlite_file);
import core.runtime;
core.runtime.Runtime.terminate();
}
}
}
#+END_SRC
**** discrete
#+NAME: sqlite_discrete
#+HEADER: :noweb yes
#+BEGIN_SRC d
template SQLiteHubDiscreteBuildTablesAndPopulate() {
void SQLiteHubDiscreteBuildTablesAndPopulate(D,M)(
const D doc_abstraction,
M doc_matters,
) {
auto url_html = spineUrlsHTML!()(doc_matters.conf_make_meta.conf.w_srv_data_root_url_html, doc_matters.src.language);
auto pth_sqlite = spinePathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language); // doc_matters.db_path
if ((isValidPath(pth_sqlite.base) && exists(pth_sqlite.base) != 0 && pth_sqlite.base.isDir)) {
} else {
try {
pth_sqlite.base.mkdirRecurse;
} catch (FileException ex) { }
}
auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename));
template SQLiteDiscreteDbStatementComposite() {
void SQLiteDiscreteDbStatementComposite(Db,D,M)(
Db db,
const D doc_abstraction,
M doc_matters,
) {
try {
<>
}
catch (FileException e) {
writeln("Failed (FileException): ", e.msg);
writeln(e.file, " line: ", e.line);
import core.runtime;
core.runtime.Runtime.terminate();
}
catch (ErrnoException e) {
writeln("Failed (ErrnoException): ", e.msg);
writeln(e.file, " line: ", e.line);
import core.runtime;
core.runtime.Runtime.terminate();
}
catch (Exception e) {
writeln("Failed (Exception): ", e.msg);
writeln(e.file, " line: ", e.line);
import core.runtime;
core.runtime.Runtime.terminate();
}
catch (Throwable) {
import core.runtime;
core.runtime.Runtime.terminate();
}
if (doc_matters.opt.action.vox_gt0) {
writeln(" ", pth_sqlite.sqlite_file(doc_matters.src.filename));
}
}
}
SQLiteDiscreteDbStatementComposite!()(db, doc_abstraction, doc_matters);
}
}
#+END_SRC
*** db run
#+NAME: sqlite_run
#+BEGIN_SRC d
template SQLiteDbRun() {
void SQLiteDbRun(Db,St,O)(
Db db,
St db_statement,
O opt_action,
string note,
) {
debug(sql_statement) {
writeln(db_statement);
}
try {
db.run(
"\nBEGIN TRANSACTION;\n" ~
db_statement ~
"\nCOMMIT TRANSACTION;\n"
);
} catch (ErrnoException ex) {
writeln("ERROR SQLite : ", ex);
} catch (Exception ex) {
writeln("ERROR SQLite : ", ex);
}
{ /+ debug +/
if (opt_action.debug_do_sqlite) {
writeln(note);
if (opt_action.vox_gt2) {
writeln(db_statement);
}
}
}
}
}
#+END_SRC
*** munge
#+NAME: sqlite_munge
#+HEADER: :noweb yes
#+BEGIN_SRC d
template SQLinsertDelimiter() {
string SQLinsertDelimiter(string _txt) {
_txt = _txt
.replaceAll(rgx.quotation_mark_sql_insert_delimiter, "$0$0");
return _txt;
}
}
template SQLiteFormatAndLoadObject() {
auto SQLiteFormatAndLoadObject(M)(
M doc_matters,
) {
mixin spineRgxOut;
mixin spineRgxXHTML;
struct sqlite_format_and_load_objects {
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
<>
}
return sqlite_format_and_load_objects();
}
}
#+END_SRC
*** sqlite instructions
**** create
#+NAME: sqlite_create
#+HEADER: :noweb yes
#+BEGIN_SRC d
template SQLiteTablesReCreate() {
string SQLiteTablesReCreate()() {
string _sql_instruct;
_sql_instruct = format(q"┃
<>
<>
<>
<>
<>
<>
┃",);
return _sql_instruct;
}
}
#+END_SRC
**** delete
#+NAME: sqlite_delete
#+HEADER: :noweb yes
#+BEGIN_SRC d
template SQLiteDeleteDocument() {
string SQLiteDeleteDocument(M)(
M doc_matters,
) {
<>
<>
<>
return _delete_uid;
}
}
#+END_SRC
**** insert metadata
#+NAME: sqlite_insert_metadata
#+HEADER: :noweb yes
#+BEGIN_SRC d
template SQLiteInsertMetadata() {
string SQLiteInsertMetadata(M)(
M doc_matters,
) {
<>
<>
<>
<>
return _insert_metadata;
}
}
#+END_SRC
**** insert metadata topics
#+NAME: sqlite_insert_metadata_topics
#+HEADER: :noweb yes
#+BEGIN_SRC d
template SQLiteInsertMetadataTopics() {
string SQLiteInsertMetadataTopics(M)(
M doc_matters,
) {
<>
<>
<>
<>
<>
}
return _insert_topics.join.to!(char[]).toUTF8;
}
}
#+END_SRC
**** insert doc objects loop
#+NAME: sqlite_insert_doc_objects_loop
#+HEADER: :noweb yes
#+BEGIN_SRC d
template SQLiteInsertDocObjectsLoop() {
string SQLiteInsertDocObjectsLoop(D,M)(
const D doc_abstraction,
M doc_matters,
) {
string _uid = SQLinsertDelimiter!()(doc_matters.src.doc_uid);
auto url_html = spineUrlsHTML!()(doc_matters.conf_make_meta.conf.w_srv_data_root_url_html, doc_matters.src.language);
string insertDocObjectsRow(O)(O obj) {
<>
<>
<>
<>
return _insert_doc_objects_row;
}
<>
}
}
#+END_SRC
**** tables create
#+NAME: sqlite_tables_create
#+HEADER: :noweb yes
#+BEGIN_SRC d
template SQLiteTablesCreate() {
void SQLiteTablesCreate(E,O,C)(E env, O opt_action, C config) {
import d2sqlite3;
template SQLiteTablesReCreate() {
string SQLiteTablesReCreate()() {
string _sql_instruct;
_sql_instruct = format(q"┃
<>
<>
<>
<>
<>
<>
┃",);
return _sql_instruct;
}
}
try {
<>
}
catch (FileException e) {
writeln("Failed (FileException): ", e.msg);
writeln(e.file, " line: ", e.line);
import core.runtime;
core.runtime.Runtime.terminate();
}
catch (ErrnoException e) {
writeln("Failed (ErrnoException): ", e.msg);
writeln(e.file, " line: ", e.line);
import core.runtime;
core.runtime.Runtime.terminate();
}
catch (Exception e) {
writeln("Failed (Exception): ", e.msg);
writeln(e.file, " line: ", e.line);
import core.runtime;
core.runtime.Runtime.terminate();
}
catch (Throwable) {
import core.runtime;
core.runtime.Runtime.terminate();
}
}
}
#+END_SRC
**** db create
#+NAME: sqlite_db_create
#+HEADER: :noweb yes
#+BEGIN_SRC d
if (opt_action.sqlite_db_create) {
string _db_statement;
string db_filename = (opt_action.sqliteDB_filename.length > 0)
? opt_action.sqliteDB_filename
: (config.conf.w_srv_db_sqlite_filename.length > 0)
? config.conf.w_srv_db_sqlite_filename
: "";
string db_path = (opt_action.sqliteDB_path.length > 0)
? opt_action.sqliteDB_path
: (config.conf.w_srv_db_sqlite_path.length > 0)
? config.conf.w_srv_db_sqlite_path
: "";
if (db_filename.length > 0 && db_path.length > 0) {
if (opt_action.vox_gt2) {
writeln("db name: ", db_filename);
writeln("db path: ", db_path);
writeln("db name & path: ", db_path, "/", db_filename);
}
if (opt_action.vox_gt1) {
writeln("attempting to create db: ", db_path, "/", db_filename);
}
auto pth_sqlite = spinePathsSQLite!()(db_filename, db_path);
if ((isValidPath(pth_sqlite.base) && exists(pth_sqlite.base) != 0 && pth_sqlite.base.isDir)) {
} else {
try {
pth_sqlite.base.mkdirRecurse;
} catch (FileException ex) { }
}
auto db = Database(pth_sqlite.sqlite_file);
{
_db_statement ~= SQLiteTablesReCreate!()();
}
SQLiteDbRun!()(db, _db_statement, opt_action, "TABLE RE-CREATE");
} else {
writeln("must provide db name & output root path either on the command line or in configuration file");
writeln("db name: ", db_filename);
writeln("db path: ", db_path);
}
}
#+END_SRC
**** tables drop
#+NAME: sqlite_tables_drop
#+BEGIN_SRC d
template SQLiteDbDrop() {
void SQLiteDbDrop(O,C)(O opt_action, C config) {
writeln("db drop");
if ((opt_action.sqlite_db_drop)) {
string db_filename = (opt_action.sqliteDB_filename.length > 0)
? opt_action.sqliteDB_filename
: (config.conf.w_srv_db_sqlite_filename.length > 0)
? config.conf.w_srv_db_sqlite_filename
: "";
string db_path = (opt_action.sqliteDB_path.length > 0) //
? opt_action.sqliteDB_path
: (config.conf.w_srv_db_sqlite_path.length > 0)
? config.conf.w_srv_db_sqlite_path
: "";
if (db_filename.length > 0 && db_path.length > 0) {
auto pth_sqlite = spinePathsSQLite!()(db_filename, db_path);
writeln("remove(", pth_sqlite.sqlite_file, ")");
try {
remove(pth_sqlite.sqlite_file);
} catch (FileException ex) {
// handle error
}
} else {
writeln("must provide db name & output root path either on the command line or in configuration file");
writeln("db name: ", db_filename);
writeln("db path: ", db_path);
}
}
}
}
#+END_SRC
** 1. [#A] sqlite_db_statement :statement:
*** collection
#+NAME: sqlite_db_statement_composite_collection
#+BEGIN_SRC d
string _db_statement;
if ((doc_matters.opt.action.sqlite_db_create)) {
auto pth_sqlite = spinePathsSQLite!()(doc_matters.sqlite.filename, doc_matters.sqlite.path);
if ((isValidPath(pth_sqlite.base) && exists(pth_sqlite.base) != 0 && pth_sqlite.base.isDir)) {
} else {
try {
pth_sqlite.base.mkdirRecurse;
} catch (FileException ex) { }
}
_db_statement ~= SQLiteTablesReCreate!()();
SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "TABLE RE-CREATE");
_db_statement = [];
}
if (doc_matters.opt.action.sqlite_delete) {
_db_statement ~= SQLiteDeleteDocument!()(doc_matters);
SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "DELETE Document");
_db_statement = [];
}
if (doc_matters.opt.action.sqlite_update) {
_db_statement ~= SQLiteDeleteDocument!()(doc_matters);
SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "DELETE Document");
_db_statement = [];
_db_statement ~= SQLiteInsertMetadata!()(doc_matters);
SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "INSERT MetaData");
_db_statement = [];
/+ get tid (lastrowid or max) for use in doc_objects table +/
_db_statement ~= doc_abstraction.SQLiteInsertDocObjectsLoop!()(doc_matters);
SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "INSERT DocObjects");
_db_statement = [];
_db_statement ~= SQLiteInsertMetadataTopics!()(doc_matters);
SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "INSERT MetaDataTopics");
_db_statement = [];
}
db.close;
#+END_SRC
*** discrete
#+NAME: sqlite_db_statement_composite_discrete
#+BEGIN_SRC d
{
string _db_statement;
_db_statement ~= SQLiteTablesReCreate!()();
_db_statement ~= SQLiteInsertMetadata!()(doc_matters);
_db_statement ~= SQLiteInsertMetadataTopics!()(doc_matters);
_db_statement ~= doc_abstraction.SQLiteInsertDocObjectsLoop!()(doc_matters);
SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "table CREATE Tables, INSERT DocObjects");
}
db.close;
#+END_SRC
** 2. imports
#+NAME: sqlite_imports
#+BEGIN_SRC d
import
sisudoc.io_out,
sisudoc.io_out.rgx,
sisudoc.io_out.rgx_xhtml;
import
std.file,
std.uri;
import std.conv : to;
import std.typecons : Nullable;
import d2sqlite3;
#+END_SRC
** 3. format and load template
*** 1. prepare objects (munge, sanitize, markup)
**** 1. _text_ generic munge (sanitize text for search)
- [3/4] (search text, applies to all but code blocks)
- [ ] remove whitespace, paragraph on single line (formatting kept so far)
- [X] remove font face attributes
- [X] move embedded endnotes
- [X] place after text object
- [X] remove embedded endnote numbers (rely on html output to represent)
- [X] urls
- [X] clean url markers, leave plain link text
- [X] place urls after text object and its endnotes
#+NAME: sanitize_text_for_search
#+BEGIN_SRC d
string generic_munge_sanitize_text_for_search(
string _txt,
) {
string _notes;
string _urls;
if (_txt.matchFirst(rgx.inline_notes_al_gen)) {
foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) {
_notes ~= "\n" ~ m["text"];
}
_txt = _txt.replaceAll(rgx.inline_notes_al_gen, "");
}
if (_txt.matchFirst(rgx.inline_link)) {
foreach (m; _txt.matchAll(rgx.inline_link)) {
if (m["link"].match(rgx.url)) {
_urls ~= "\n" ~ m["link"];
}
}
_txt = _txt.replaceAll(rgx.inline_link_clean, "");
}
if (_notes.length > 0) {
_txt ~= _notes;
}
if (_urls.length > 0) {
_txt ~= _urls;
}
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
writeln(_txt, "\n");
}
}
debug(sql_text_clean) {
writeln(_txt);
}
return _txt;
}
#+END_SRC
**** 2. _html_
light html objects
- place endnotes after text object
- light inline html markup
***** munge
****** general munge (special characters, inline markup, move notes)
#+NAME: sanitize_and_munge_inline_html_munge
#+BEGIN_SRC d
string munge_html(M,O)(
M doc_matters,
const O obj,
) {
string _html_special_characters(string _txt){
_txt = _txt
.replaceAll(rgx_xhtml.ampersand, "&")
.replaceAll(rgx_xhtml.quotation, """)
.replaceAll(rgx_xhtml.less_than, "<")
.replaceAll(rgx_xhtml.greater_than, ">")
.replaceAll(rgx.nbsp_char, " ")
.replaceAll(rgx.br_line_inline, "
")
.replaceAll(rgx.br_line, "
")
.replaceAll(rgx.br_line_spaced, "
")
.replaceAll(rgx_xhtml.line_break, "
");
return _txt;
}
string _html_font_face(string _txt){
_txt = _txt
.replaceAll(rgx.inline_emphasis, "$1")
.replaceAll(rgx.inline_bold, "$1")
.replaceAll(rgx.inline_underscore, "$1")
.replaceAll(rgx.inline_italics, "$1")
.replaceAll(rgx.inline_superscript, "$1")
.replaceAll(rgx.inline_subscript, "$1")
.replaceAll(rgx.inline_strike, "$1")
.replaceAll(rgx.inline_insert, "$1")
.replaceAll(rgx.inline_mono, "$1")
.replaceAll(rgx.inline_cite, "$1");
return _txt;
}
string _notes;
string _urls;
string _txt = _html_font_face(_html_special_characters(obj.text));
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
writeln(_txt, "\n");
}
}
return _txt;
}
#+END_SRC
****** special characters
#+NAME: sanitize_and_munge_inline_html_special_characters
#+BEGIN_SRC d
string html_special_characters(string _txt){
_txt = _txt
.replaceAll(rgx_xhtml.ampersand, "&")
.replaceAll(rgx_xhtml.quotation, """)
.replaceAll(rgx_xhtml.less_than, "<")
.replaceAll(rgx_xhtml.greater_than, ">")
.replaceAll(rgx.nbsp_char, " ")
.replaceAll(rgx.br_line_inline, "
")
.replaceAll(rgx.br_line, "
")
.replaceAll(rgx.br_line_spaced, "
")
.replaceAll(rgx_xhtml.line_break, "
");
return _txt;
}
#+END_SRC
****** special characters for code
#+NAME: sanitize_and_munge_inline_html_special_characters_code
#+BEGIN_SRC d
string html_special_characters_code(string _txt){
_txt = _txt
.replaceAll(rgx_xhtml.ampersand, "&")
.replaceAll(rgx_xhtml.quotation, """)
.replaceAll(rgx_xhtml.less_than, "<")
.replaceAll(rgx_xhtml.greater_than, ">")
.replaceAll(rgx.nbsp_char, " ");
return _txt;
}
#+END_SRC
****** font_face
#+NAME: sanitize_and_munge_inline_html_font_face
#+BEGIN_SRC d
string html_font_face(string _txt){
_txt = _txt
.replaceAll(rgx.inline_emphasis, "$1")
.replaceAll(rgx.inline_bold, "$1")
.replaceAll(rgx.inline_underscore, "$1")
.replaceAll(rgx.inline_italics, "$1")
.replaceAll(rgx.inline_superscript, "$1")
.replaceAll(rgx.inline_subscript, "$1")
.replaceAll(rgx.inline_strike, "$1")
.replaceAll(rgx.inline_insert, "$1")
.replaceAll(rgx.inline_mono, "$1")
.replaceAll(rgx.inline_cite, "$1");
return _txt;
}
#+END_SRC
****** inline markup
******* grouped text
#+NAME: sanitize_and_munge_inline_html_grouped_text_bullets_indents
#+BEGIN_SRC d
string inline_grouped_text_bullets_indents(M,O)(
M doc_matters,
const O obj,
string _txt,
string _suffix = ".html",
string _xml_type = "seg",
) {
static auto rgx = RgxO();
static auto rgx_xhtml = RgxXHTML();
if (obj.metainfo.is_a == "group") {
_txt = (_txt)
.replaceAll(rgx.grouped_para_indent_1,
" ")
.replaceAll(rgx.grouped_para_indent_2,
" ")
.replaceAll(rgx.grouped_para_indent_3,
" ")
.replaceAll(rgx.grouped_para_indent_4,
" ")
.replaceAll(rgx.grouped_para_indent_5,
" ")
.replaceAll(rgx.grouped_para_indent_6,
" ")
.replaceAll(rgx.grouped_para_indent_7,
" ")
.replaceAll(rgx.grouped_para_indent_8,
" ")
.replaceAll(rgx.grouped_para_indent_9,
" ")
.replaceAll(rgx.grouped_para_indent_hang, " ")
.replaceAll(rgx.grouped_para_bullet, "● ")
.replaceAll(rgx.grouped_para_bullet_indent_1,
" ● ")
.replaceAll(rgx.grouped_para_bullet_indent_2,
" ● ")
.replaceAll(rgx.grouped_para_bullet_indent_3,
" ● ")
.replaceAll(rgx.grouped_para_bullet_indent_4,
" ● ")
.replaceAll(rgx.grouped_para_bullet_indent_5,
" ● ")
.replaceAll(rgx.grouped_para_bullet_indent_6,
" ● ")
.replaceAll(rgx.grouped_para_bullet_indent_7,
" ● ")
.replaceAll(rgx.grouped_para_bullet_indent_8,
" ● ")
.replaceAll(rgx.grouped_para_bullet_indent_9,
" ● ");
}
return _txt;
}
#+END_SRC
******* images
#+NAME: sanitize_and_munge_inline_html_images
#+BEGIN_SRC d
string inline_images(M,O)(
M doc_matters,
const O obj,
string _txt,
string _suffix = ".html",
string _xml_type = "seg",
) {
string _img_pth;
if (_xml_type == "epub") {
_img_pth = "image/";
} else if (_xml_type == "scroll") {
_img_pth = "../../image/";
} else if (_xml_type == "seg") {
_img_pth = "../../../image/";
}
if (_txt.match(rgx.inline_image)) {
_txt = _txt.replaceAll( // TODO bug where image dimensions (w or h) not given & consequently set to 0; should not be used (calculate earlier, abstraction)
rgx.inline_image,
("$1 $6"));
}
return _txt;
}
#+END_SRC
******* links
******** scroll, seg, epub
#+NAME: sanitize_and_munge_inline_html_inline_links
#+BEGIN_SRC d
string inline_links(M,O)(
M doc_matters,
const O obj,
string _txt,
string _xml_type = "seg",
) {
if (obj.has.inline_links) {
if (obj.metainfo.is_a != "code") {
_txt = replaceAll!(m =>
m["linked_text"] ~ "┤" ~ to!string((obj.stow.link[m["num"].to!ulong])).encode ~ "├"
)(_txt, rgx.inline_link_number_only);
}
if ((_txt.match(rgx.mark_internal_site_lnk))
&& (_xml_type == "scroll")) { // conditions reversed to avoid: gdc compiled program run segfault
_txt = _txt.replaceAll(
rgx.inline_seg_link,
"$1");
}
auto pth_html = spinePathsHTML!()(doc_matters.output_path, doc_matters.src.language);
if (_xml_type == "seg") {
foreach (m; _txt.matchAll(rgx.inline_link_seg_and_hash)) {
if (m.captures["hash"] in doc_matters.has.tag_associations) {
if (m.captures["hash"] == doc_matters.has.tag_associations[(m.captures["hash"])]["seg_lv4"]) {
_txt = _txt.replaceFirst(
rgx.inline_link_seg_and_hash,
"┥$1┝┤"
~ doc_matters.conf_make_meta.conf.w_srv_data_root_url_html
~ "/"
~ pth_html.tail_fn_seg(doc_matters.src.filename, "$2.html")
~ "├"
);
} else {
_txt = _txt.replaceFirst(
rgx.inline_link_seg_and_hash,
"┥$1┝┤"
~ doc_matters.conf_make_meta.conf.w_srv_data_root_url_html
~ "/"
~ doc_matters.src.filename_base
~ "/"
~ doc_matters.has.tag_associations[(m.captures["hash"])]["seg_lv4"]
~ ".html"
~ "#" ~ m.captures["hash"]
~ "├"
);
}
} else {
if (doc_matters.opt.action.vox_gt0) {
writeln(
"WARNING on internal document links, anchor to link <<"
~ m.captures["hash"]
~ ">> not found in document, "
~ "anchor: " ~ m.captures["hash"]
~ " document: " ~ doc_matters.src.filename
);
}
}
}
} else {
if (auto m = _txt.match(rgx.inline_link_seg_and_hash)) {
_txt = _txt.replaceFirst(
rgx.inline_link_seg_and_hash,
"┥$1┝┤"
~ doc_matters.conf_make_meta.conf.w_srv_data_root_url_html
~ "/"
~ pth_html.tail_fn_scroll(doc_matters.src.filename)
~ "#" ~ m.captures["hash"]
~ "├"
);
}
}
_txt = _txt
.replaceAll(
rgx.inline_link_fn_suffix,
("$1.html"))
.replaceAll(
rgx.inline_link,
("$1"))
.replaceAll(
rgx.mark_internal_site_lnk,
"");
}
debug(markup_links) {
if (_txt.match(rgx.inline_link)) {
writeln(__LINE__,
" (missed) markup link identified (",
obj.has.inline_links,
"): ", obj.metainfo.is_a, ": ",
obj.text
);
}
// if (obj.metainfo.is_a == "bookindex") { // DEBUG LINE
// if (_txt.match(regex(r""
~ "" ~ m.captures["num"] ~ "."
~ m.captures["note"]
~ "
";
}
_txt = replaceAll!(m =>
(" " ~ "" ~ m["num"] ~ ""))
(_txt, rgx.inline_notes_al_regular_number_note)
~ _endnotes.join("\n");
}
debug(markup_endnotes) {
if (_txt.match(rgx.inline_notes_al_regular_number_note)) {
writeln(__LINE__, " (missed) markup endnote: ", obj.metainfo.is_a, ": ", obj.text);
}
}
debug(markup) {
if (_txt.match(rgx.inline_notes_al_regular_number_note)) {
writeln(__LINE__, " (missed) markup endnote: ", obj.metainfo.is_a, ": ", obj.text);
}
}
return _txt;
}
#+END_SRC
******* inline markup (formatting)
#+NAME: sanitize_and_munge_inline_html_inline_markup
#+BEGIN_SRC d
string xml_type="seg"; /+ set html document type to be linked to here (seg|scroll) +/
string inline_markup(M,O)(
M doc_matters,
const O obj,
string _txt,
) {
if (obj.metainfo.is_a == "group") {
_txt = inline_grouped_text_bullets_indents(doc_matters, obj, _txt, xml_type);
}
_txt = inline_images(doc_matters, obj, _txt, xml_type);
_txt = inline_links(doc_matters, obj, _txt, xml_type);
_txt = inline_notes_scroll(doc_matters, obj, _txt);
return _txt;
}
#+END_SRC
***** objects (formatting)
****** heading
#+NAME: html_objects_heading
#+BEGIN_SRC d
string html_heading(M,O)(
M doc_matters,
const O obj,
) {
assert(obj.metainfo.is_of_part == "body" || "frontmatter" || "backmatter");
assert(obj.metainfo.is_of_section == "body" || "toc" || "endnotes" || "glossary" || "bibliography" || "bookindex" || "blurb");
assert(obj.metainfo.is_of_type == "para");
assert(obj.metainfo.is_a == "heading");
string _txt = munge_html(doc_matters, obj);
_txt = inline_markup(doc_matters, obj, _txt);
string o = format(q"┃
%s
┃",
obj.metainfo.is_a,
_txt,
);
return o;
}
#+END_SRC
******* +fancy+
##+NAME: prepare_objects_html
#+BEGIN_SRC d
string html_heading(M,O)(
M doc_matters,
const O obj,
) {
string o;
string _txt = munge_html(doc_matters, obj);
o = format(q"┃
%s
┃",
obj.metainfo.heading_lev_markup,
obj.metainfo.is_a,
_txt,
obj.metainfo.heading_lev_markup,
);
return o;
}
#+END_SRC
****** para
#+NAME: html_objects_para
#+BEGIN_SRC d
string html_para(M,O)(
M doc_matters,
const O obj,
) {
assert(obj.metainfo.is_of_part == "body" || "frontmatter" || "backmatter");
assert(obj.metainfo.is_of_section == "body" || "toc" || "endnotes" || "glossary" || "bibliography" || "bookindex" || "blurb");
assert(obj.metainfo.is_of_type == "para");
assert(obj.metainfo.is_a == "para" || "toc" || "endnote" || "glossary" || "bibliography" || "bookindex" || "blurb");
string _txt = munge_html(doc_matters, obj);
_txt = (obj.attrib.bullet) ? ("● " ~ _txt) : _txt;
_txt = inline_markup(doc_matters, obj, _txt);
string o = format(q"┃
%s
┃",
obj.metainfo.is_a,
obj.attrib.indent_hang,
obj.attrib.indent_base,
_txt
);
return o;
}
#+END_SRC
****** quote
#+NAME: html_objects_quote
#+BEGIN_SRC d
string html_quote(M,O)(
M doc_matters,
const O obj,
) {
assert(obj.metainfo.is_of_part == "body");
assert(obj.metainfo.is_of_section == "body" || "glossary" || "bibliography" || "bookindex" || "blurb");
assert(obj.metainfo.is_of_type == "block");
assert(obj.metainfo.is_a == "quote");
string _txt = munge_html(doc_matters, obj);
string o = format(q"┃
%s
┃",
obj.metainfo.is_a,
_txt
);
return o;
}
#+END_SRC
****** group
#+NAME: html_objects_group
#+BEGIN_SRC d
string html_group(M,O)(
M doc_matters,
const O obj,
) {
assert(obj.metainfo.is_of_part == "body");
assert(obj.metainfo.is_of_section == "body" || "glossary" || "bibliography" || "bookindex" || "blurb");
assert(obj.metainfo.is_of_type == "block");
assert(obj.metainfo.is_a == "group");
string _txt = munge_html(doc_matters, obj);
_txt = inline_markup(doc_matters, obj, _txt);
string o = format(q"┃
%s
┃",
obj.metainfo.is_a,
_txt
);
return o;
}
#+END_SRC
****** block
#+NAME: html_objects_block
#+BEGIN_SRC d
string html_block(M,O)(
M doc_matters,
const O obj,
) {
assert(obj.metainfo.is_of_part == "body");
assert(obj.metainfo.is_of_section == "body" || "glossary" || "bibliography" || "bookindex" || "blurb");
assert(obj.metainfo.is_of_type == "block");
assert(obj.metainfo.is_a == "block");
string _txt = munge_html(doc_matters, obj);
_txt = inline_markup(doc_matters, obj, _txt);
string o = format(q"┃
%s
┃",
obj.metainfo.is_a,
_txt.stripRight
);
return o;
}
#+END_SRC
****** verse
#+NAME: html_objects_verse
#+BEGIN_SRC d
string html_verse(M,O)(
M doc_matters,
const O obj,
) {
assert(obj.metainfo.is_of_part == "body");
assert(obj.metainfo.is_of_section == "body" || "glossary" || "bibliography" || "bookindex" || "blurb");
assert(obj.metainfo.is_of_type == "block");
assert(obj.metainfo.is_a == "verse");
string _txt = munge_html(doc_matters, obj);
string o = format(q"┃%s
┃",
obj.metainfo.is_a,
_txt
);
return o;
}
#+END_SRC
****** code
#+NAME: html_objects_code
#+BEGIN_SRC d
string html_code(O)(
const O obj,
) {
assert(obj.metainfo.is_of_part == "body");
assert(obj.metainfo.is_of_section == "body");
assert(obj.metainfo.is_of_type == "block");
assert(obj.metainfo.is_a == "code");
string _txt = html_special_characters_code(obj.text);
string o = format(q"┃%s
┃",
obj.metainfo.is_a,
_txt
);
return o;
}
#+END_SRC
****** table
#+NAME: html_objects_table
#+BEGIN_SRC d
string html_table(M,O)(
M doc_matters,
const O obj,
) {
assert(obj.metainfo.is_of_part == "body");
assert(obj.metainfo.is_of_section == "body");
assert(obj.metainfo.is_of_type == "block");
assert(obj.metainfo.is_a == "table");
Tuple!(string, string) _tablarize(O)(
const O obj,
string _txt,
) {
string[] _table_rows = _txt.split(rgx.table_delimiter_row);
string[] _table_cols;
string _table;
string _tablenote;
foreach(row_idx, row; _table_rows) {
_table_cols = row.split(rgx.table_delimiter_col);
_table ~= "";
foreach(col_idx, cell; _table_cols) {
if ((_table_cols.length == 1)
&& (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx)
_tablenote ~= cell;
} else {
string _col_is = (row_idx == 0 && obj.table.heading) ? "th" : "td";
string _align = ("style=\"text-align:"
~ ((obj.table.column_aligns[col_idx] == "l")
? "left\"" : "right\""));
_table ~= "<"
~ _col_is
~ " width=\""
~ obj.table.column_widths[col_idx].to!string
~ "%\" "
~ _align
~ ">";
_table ~= cell;
_table ~= ""
~ _col_is
~ ">";
}
}
_table ~= "
";
}
Tuple!(string, string) t = tuple(
_table,
_tablenote,
);
return t;
}
string _txt = munge_html(doc_matters, obj);
Tuple!(string, string) t = _tablarize(obj, _txt);
_txt = t[0];
string _note = t[1];
string o = format(q"┃
%s
┃",
obj.metainfo.is_a,
_txt,
_note
);
return o;
}
#+END_SRC
*** 2. hub (sqlite_format_and_load_objects)
**** sql related
#+NAME: sqlite_load_object_string
#+BEGIN_SRC d
string sqlite_load_string(M,O)(
M doc_matters,
const O obj,
) {
string o;
return o;
}
#+END_SRC
#+NAME: sqlite_load_object_statement
#+BEGIN_SRC d
string sqlite_statement(O)(
const O obj,
string _txt,
string _html,
) {
void _sql_exe(O)(
string _sql,
) {
writeln(_html);
writeln(_sql);
}
string _sql;
return _sql;
}
#+END_SRC
**** heading
#+NAME: hub_format_and_sqlite_load_objects_heading
#+BEGIN_SRC d
string[string] heading(M,O)(
M doc_matters,
const O obj,
) {
string[string] obj_txt = [
"text": generic_munge_sanitize_text_for_search(obj.text),
"html": html_heading(doc_matters, obj)
];
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
debug(sql_txt) {
writeln(obj_txt["text"]);
}
debug(sql_html) {
writeln(obj_txt["html"]);
}
} else {
// load sql
}
}
return obj_txt;
}
#+END_SRC
**** para
#+NAME: hub_format_and_sqlite_load_objects_para
#+BEGIN_SRC d
string[string] para(M,O)(
M doc_matters,
const O obj,
) {
string[string] obj_txt = [
"text": generic_munge_sanitize_text_for_search(obj.text),
"html": html_para(doc_matters, obj)
];
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
debug(sql_txt) {
writeln(obj_txt["text"]);
}
debug(sql_html) {
writeln(obj_txt["html"]);
}
} else {
// load sql
}
}
return obj_txt;
}
#+END_SRC
**** quote
#+NAME: hub_format_and_sqlite_load_objects_quote
#+BEGIN_SRC d
string[string] quote(M,O)(
M doc_matters,
const O obj,
) {
string[string] obj_txt = [
"text": generic_munge_sanitize_text_for_search(obj.text),
"html": html_quote(doc_matters, obj)
];
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
debug(sql_txt) {
writeln(obj_txt["text"]);
}
debug(sql_html) {
writeln(obj_txt["html"]);
}
} else {
// load sql
}
}
return obj_txt;
}
#+END_SRC
**** group
#+NAME: hub_format_and_sqlite_load_objects_group
#+BEGIN_SRC d
string[string] group(M,O)(
M doc_matters,
const O obj,
) {
string[string] obj_txt = [
"text": generic_munge_sanitize_text_for_search(obj.text),
"html": html_group(doc_matters, obj)
];
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
debug(sql_txt) {
writeln(obj_txt["text"]);
}
debug(sql_html) {
writeln(obj_txt["html"]);
}
} else {
// load sql
}
}
return obj_txt;
}
#+END_SRC
**** block
#+NAME: hub_format_and_sqlite_load_objects_block
#+BEGIN_SRC d
string[string] block(M,O)(
M doc_matters,
const O obj,
) {
string[string] obj_txt = [
"text": generic_munge_sanitize_text_for_search(obj.text),
"html": html_block(doc_matters, obj)
];
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
debug(sql_txt) {
writeln(obj_txt["text"]);
}
debug(sql_html) {
writeln(obj_txt["html"]);
}
} else {
// load sql
}
}
return obj_txt;
}
#+END_SRC
**** verse
#+NAME: hub_format_and_sqlite_load_objects_verse
#+BEGIN_SRC d
string[string] verse(M,O)(
M doc_matters,
const O obj,
) {
string[string] obj_txt = [
"text": generic_munge_sanitize_text_for_search(obj.text),
"html": html_verse(doc_matters, obj)
];
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
debug(sql_txt) {
writeln(obj_txt["text"]);
}
debug(sql_html) {
writeln(obj_txt["html"]);
}
} else {
// load sql
}
}
return obj_txt;
}
#+END_SRC
**** code
#+NAME: hub_format_and_sqlite_load_objects_code
#+BEGIN_SRC d
string[string] code(M,O)(
M doc_matters,
const O obj,
) {
string[string] obj_txt = [
"text": generic_munge_sanitize_text_for_search(obj.text),
"html": html_code(obj)
];
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
debug(sql_txt) {
writeln(obj_txt["text"]);
}
debug(sql_html) {
writeln(obj_txt["html"]);
}
} else {
// load sql
}
}
return obj_txt;
}
#+END_SRC
**** table
#+NAME: hub_format_and_sqlite_load_objects_table
#+BEGIN_SRC d
string[string] table(M,O)(
M doc_matters,
const O obj,
) {
string[string] obj_txt = [
"text": generic_munge_sanitize_text_for_search(obj.text),
"html": html_table(doc_matters, obj)
];
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
debug(sql_txt) {
writeln(obj_txt["text"]);
}
debug(sql_html) {
writeln(obj_txt["html"]);
}
} else {
// load sql
}
}
return obj_txt;
}
#+END_SRC
** 4. ↻ loop, identify, load - loop template
#+NAME: sqlite_objects_loop
#+BEGIN_SRC d
auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters);
string[string] obj_txt;
string doc_text;
string[] _insert_doc_objects;
foreach (part; doc_matters.has.keys_seq.sql) {
foreach (obj; doc_abstraction[part]) {
switch (obj.metainfo.is_of_part) {
case "frontmatter": assert(part == "head", part);
switch (obj.metainfo.is_of_type) {
case "para":
switch (obj.metainfo.is_a) {
case "heading":
obj_txt = format_and_sqlite_load.heading(doc_matters, obj);
break;
default:
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_a);
}
}
break;
}
break;
default:
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_of_type);
}
}
break;
}
break;
case "body": // assert(part == "body", part);
switch (obj.metainfo.is_of_type) {
case "para":
switch (obj.metainfo.is_a) {
case "heading":
debug (asserts) {
if (part != "body") {
writeln(__LINE__, ": ", obj.text);
}
}
obj_txt = format_and_sqlite_load.heading(doc_matters, obj);
break;
case "para":
obj_txt = format_and_sqlite_load.para(doc_matters, obj);
break;
default:
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_a);
}
}
break;
}
break;
case "block":
switch (obj.metainfo.is_a) {
case "quote":
obj_txt = format_and_sqlite_load.quote(doc_matters, obj);
break;
case "group":
obj_txt = format_and_sqlite_load.group(doc_matters, obj);
break;
case "block":
obj_txt = format_and_sqlite_load.block(doc_matters, obj);
break;
case "poem": // double check on keeping both poem & verse
break;
case "verse":
obj_txt = format_and_sqlite_load.verse(doc_matters, obj);
break;
case "code":
obj_txt = format_and_sqlite_load.code(doc_matters, obj);
break;
case "table":
obj_txt = format_and_sqlite_load.table(doc_matters, obj);
break;
default:
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_a);
}
}
break;
}
break;
default:
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_of_type);
}
}
break;
}
break;
case "backmatter":
assert(part == "glossary" || "bibliography" || "bookindex" || "blurb" || "tail", part);
switch (obj.metainfo.is_of_type) {
case "para":
switch (obj.metainfo.is_a) {
case "heading":
obj_txt = format_and_sqlite_load.heading(doc_matters, obj);
break;
case "glossary": assert(part == "glossary", part);
obj_txt = format_and_sqlite_load.para(doc_matters, obj);
break;
case "bibliography": assert(part == "bibliography", part);
obj_txt = format_and_sqlite_load.para(doc_matters, obj);
break;
case "bookindex": assert(part == "bookindex", part);
obj_txt = format_and_sqlite_load.para(doc_matters, obj);
break;
case "blurb": assert(part == "blurb", part);
obj_txt = format_and_sqlite_load.para(doc_matters, obj);
break;
default:
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_a);
}
}
break;
}
break;
default:
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_of_type);
}
}
break;
}
break;
case "comment":
break;
default:
{ /+ debug +/
if (doc_matters.opt.action.debug_do_sqlite) {
writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_of_part); // check where empty value could come from
writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_a);
writeln(__FILE__, ":", __LINE__, ": ", obj.text); // check where empty value could come from
}
}
break;
}
if (obj.metainfo.is_a == "heading") {
if (doc_matters.opt.action.show_sqlite) {
if (obj.metainfo.heading_lev_markup == 0) {
writeln(doc_matters.src.filename);
}
writeln(
"markup: ", obj.metainfo.heading_lev_markup,
"> ", obj.metainfo.dom_structure_markedup_tags_status,
"; collapsed: ", obj.metainfo.heading_lev_collapsed,
"> ", obj.metainfo.dom_structure_collapsed_tags_status,
"; ocn: ", obj.metainfo.ocn,
" node: ", obj.metainfo.node,
"; parent: ", obj.metainfo.parent_lev_markup,
"; ocn: ", obj.metainfo.parent_ocn,
"; ",
);
}
}
if (!(obj.metainfo.is_a == "comment")) {
_insert_doc_objects ~= insertDocObjectsRow(obj);
}
} // loop closes
}
return _insert_doc_objects.join.to!(char[]).toUTF8;
#+END_SRC
** 5. SQL statements :statement:
*** drop index and tables
**** DROP INDEX IF EXISTS
#+NAME: sqlite_statement_drop_existing_index
#+BEGIN_SRC sql
DROP INDEX IF EXISTS idx_ocn;
DROP INDEX IF EXISTS idx_uid;
DROP INDEX IF EXISTS idx_digest_clean;
DROP INDEX IF EXISTS idx_digest_all;
DROP INDEX IF EXISTS idx_clean;
DROP INDEX IF EXISTS idx_title;
DROP INDEX IF EXISTS idx_author;
DROP INDEX IF EXISTS src_filename_base;
DROP INDEX IF EXISTS idx_language_document_char;
DROP INDEX IF EXISTS idx_classify_topic_register;
DROP INDEX IF EXISTS idx_topic_list;
#+END_SRC
**** DROP TABLE IF EXISTS
#+NAME: sqlite_statement_drop_existing_tables
#+BEGIN_SRC sql
DROP TABLE IF EXISTS metadata_and_text;
DROP TABLE IF EXISTS topic_register;
DROP TABLE IF EXISTS doc_objects;
DROP TABLE IF EXISTS urls;
#+END_SRC
*** create tables and index
**** CREATE TABLE IF NOT EXISTS metadata_and_text
#+NAME: sqlite_statement_create_table_metadata_and_src_txt
#+BEGIN_SRC sql
CREATE TABLE IF NOT EXISTS metadata_and_text (
uid VARCHAR(256) UNIQUE, -- filename, language char, pod/txt (decide on delimiter [,;:/])
src_composite_id_per_txt VARCHAR(256) NOT NULL, -- UNIQUE, z pod name if any + src filename + language code
src_composite_id_per_pod VARCHAR(256) NOT NULL, -- z pod name if any + src filename
title VARCHAR(800) NOT NULL,
title_main VARCHAR(400) NOT NULL,
title_sub VARCHAR(400) NULL,
title_short VARCHAR(400) NULL,
title_edition VARCHAR(10) NULL,
title_language VARCHAR(100) NULL,
title_language_char VARCHAR(6) NULL,
creator_author VARCHAR(600) NOT NULL,
creator_author_last_first VARCHAR(600) NOT NULL,
creator_author_email VARCHAR(100) NULL,
creator_author_hon VARCHAR(100) NULL,
creator_author_nationality VARCHAR(100) NULL,
creator_editor VARCHAR(600) NULL,
creator_contributor VARCHAR(600) NULL,
creator_illustrator VARCHAR(600) NULL,
creator_photographer VARCHAR(600) NULL,
creator_translator VARCHAR(600) NULL,
creator_prepared_by VARCHAR(600) NULL,
creator_digitized_by VARCHAR(600) NULL,
creator_audio VARCHAR(600) NULL,
creator_video VARCHAR(600) NULL,
language_document VARCHAR(100) NULL,
language_document_char VARCHAR(6) NOT NULL,
language_original VARCHAR(100) NULL,
language_original_char VARCHAR(6) NULL,
date_added_to_site VARCHAR(10) NULL,
date_available VARCHAR(10) NULL,
date_created VARCHAR(10) NULL,
date_issued VARCHAR(10) NULL,
date_modified VARCHAR(10) NULL,
date_published VARCHAR(10) NULL,
date_valid VARCHAR(10) NULL,
date_translated VARCHAR(10) NULL,
date_original_publication VARCHAR(10) NULL,
date_generated VARCHAR(10) NULL,
original_title VARCHAR(800) NULL,
original_publisher VARCHAR(600) NULL,
original_language VARCHAR(100) NULL,
original_language_char VARCHAR(6) NULL,
original_source VARCHAR(600) NULL,
original_institution VARCHAR(600) NULL,
original_nationality VARCHAR(100) NULL,
rights_copyright VARCHAR(2500) NULL,
rights_copyright_audio VARCHAR(2500) NULL,
rights_copyright_cover VARCHAR(2500) NULL,
rights_copyright_illustrations VARCHAR(2500) NULL,
rights_copyright_photographs VARCHAR(2500) NULL,
rights_copyright_text VARCHAR(2500) NULL,
rights_copyright_translation VARCHAR(2500) NULL,
rights_copyright_video VARCHAR(2500) NULL,
rights_license VARCHAR(2500) NULL,
identifier_oclc VARCHAR(30) NULL,
identifier_isbn VARCHAR(16) NULL,
classify_topic_register VARCHAR(2500) NULL,
classify_subject VARCHAR(600) NULL,
classify_loc VARCHAR(30) NULL,
classify_dewey VARCHAR(30) NULL,
classify_keywords VARCHAR(600) NULL,
notes_abstract TEXT NULL,
notes_description TEXT NULL,
notes_comment TEXT NULL,
notes_coverage VARCHAR(200) NULL,
notes_relation VARCHAR(200) NULL,
notes_history VARCHAR(600) NULL,
notes_type VARCHAR(600) NULL,
notes_format VARCHAR(600) NULL,
notes_prefix TEXT NULL,
notes_prefix_a TEXT NULL,
notes_prefix_b TEXT NULL,
notes_suffix TEXT NULL,
publisher VARCHAR(600) NULL,
src_filename_base VARCHAR(256) NOT NULL,
src_filename_suffix VARCHAR(6) NOT NULL,
src_fingerprint VARCHAR(256) NULL,
src_filesize VARCHAR(10) NULL,
src_wordcount VARCHAR(10) NULL,
pod_name VARCHAR(256) NULL, -- zipped pod, work to be done here
pod_fingerprint VARCHAR(256) NULL, -- zipped pod, work to be done here
pod_size VARCHAR(10) NULL, -- zipped pod, work to be done here
site_url_doc_root VARCHAR(256) NULL, -- url path to doc root
site_url_html_toc VARCHAR(256) NULL,
site_url_html_scroll VARCHAR(256) NULL,
site_url_epub VARCHAR(256) NULL,
links TEXT NULL
);
#+END_SRC
**** CREATE TABLE IF NOT EXISTS topic_register
#+NAME: sqlite_statement_create_table_topic_register
#+BEGIN_SRC sql
CREATE TABLE IF NOT EXISTS topic_register (
-- tid BIGINT PRIMARY KEY,
uid_metadata_and_text VARCHAR(256) REFERENCES metadata_and_text(uid) ON DELETE CASCADE,
-- src_composite_id_per_txt VARCHAR(256) NOT NULL, - UNIQUE, - z pod name if any + src filename + language code
-- src_composite_id_per_pod VARCHAR(256) NOT NULL, - z pod name if any + src filename
topic_register VARCHAR(250) NOT NULL,
site_url_doc_root VARCHAR(256) NULL, -- url path to doc root
site_url_html_toc VARCHAR(256) NULL,
site_url_html_scroll VARCHAR(256) NULL
);
#+END_SRC
**** CREATE TABLE site_urls ?
#+NAME: sqlite_statement_create_table_site_urls
#+BEGIN_SRC sql
CREATE TABLE IF NOT EXISTS site_urls (
-- tid BIGINT PRIMARY KEY,
uid_metadata_and_text VARCHAR(256) REFERENCES metadata_and_text(uid) ON DELETE CASCADE,
src_composite_id_per_txt VARCHAR(256) NOT NULL, -- UNIQUE, - z pod name if any + src filename + language code
src_composite_id_per_pod VARCHAR(256) NOT NULL, -- z pod name if any + src filename
site_url_doc_root VARCHAR(256) NULL, -- url path to doc root
site_url_html_toc VARCHAR(256) NULL,
site_url_html_scroll VARCHAR(256) NULL
);
#+END_SRC
**** CREATE TABLE doc_objects
#+NAME: sqlite_statement_create_table_objects
#+BEGIN_SRC sql
CREATE TABLE IF NOT EXISTS doc_objects (
lid BIGINT PRIMARY KEY,
uid_metadata_and_text VARCHAR(256) REFERENCES metadata_and_text(uid) ON DELETE CASCADE,
ocn SMALLINT,
obj_id VARCHAR(6) NULL,
clean TEXT NULL,
body TEXT NULL,
seg VARCHAR(256) NULL,
lev_an VARCHAR(1),
is_of_type VARCHAR(16),
is_a VARCHAR(16),
lev SMALLINT NULL,
node VARCHAR(16) NULL,
parent VARCHAR(16) NULL,
last_descendant VARCHAR(16) NULL, -- headings only
digest_clean CHAR(256),
digest_all CHAR(256),
seg_name CHAR(256),
types CHAR(1) NULL
);
#+END_SRC
**** CREATE INDEX
#+NAME: sqlite_statement_create_index
#+BEGIN_SRC sql
CREATE INDEX IF NOT EXISTS idx_ocn ON doc_objects(ocn);
CREATE INDEX IF NOT EXISTS idx_digest_clean ON doc_objects(digest_clean);
CREATE INDEX IF NOT EXISTS idx_digest_all ON doc_objects(digest_all);
CREATE INDEX IF NOT EXISTS idx_clean ON doc_objects(clean);
CREATE INDEX IF NOT EXISTS idx_title ON metadata_and_text(title);
CREATE INDEX IF NOT EXISTS idx_author ON metadata_and_text(creator_author_last_first);
CREATE INDEX IF NOT EXISTS idx_uid ON metadata_and_text(uid);
CREATE INDEX IF NOT EXISTS idx_filename ON metadata_and_text(src_filename_base);
CREATE INDEX IF NOT EXISTS idx_language ON metadata_and_text(language_document_char);
CREATE INDEX IF NOT EXISTS idx_topics ON metadata_and_text(classify_topic_register);
CREATE INDEX IF NOT EXISTS idx_topic_list ON topic_register(topic_register);
#+END_SRC
*** TODO local site link & info
*** delete rows (delete document)
**** DELETE uid rows doc matters & metadata
***** sql statement: dlang format
#+NAME: sqlite_formatted_delete_format
#+BEGIN_SRC d
string _uid = doc_matters.src.doc_uid;
string _delete_uid = format(q"┃
#+END_SRC
***** DELETE FROM ... WHERE
#+NAME: sqlite_formatted_delete_sql
#+BEGIN_SRC sql
DELETE FROM metadata_and_text
WHERE uid = '%s';
DELETE FROM doc_objects
WHERE uid_metadata_and_text = '%s';
#+END_SRC
***** VALUES
#+NAME: sqlite_formatted_delete_values
#+BEGIN_SRC d
┃",
_uid,
_uid,
);
#+END_SRC
*** inserts
**** INSERT doc matters & metadata
***** sql statement: dlang format
#+NAME: sqlite_formatted_insertions_doc_matters_metadata_format
#+BEGIN_SRC d
string _uid = SQLinsertDelimiter!()(doc_matters.src.doc_uid);
string _insert_metadata = format(q"┃
#+END_SRC
***** INSERT INTO
#+NAME: sqlite_formatted_insertions_doc_matters_metadata_sql
#+BEGIN_SRC sql
INSERT INTO metadata_and_text (
uid,
src_filename_base,
src_filename_suffix,
src_composite_id_per_txt,
src_composite_id_per_pod,
title,
title_main,
title_sub,
title_short,
title_edition,
title_language,
creator_author,
creator_author_last_first,
creator_author_email,
creator_illustrator,
creator_translator,
language_document,
language_document_char,
date_added_to_site,
date_available,
date_created,
date_issued,
date_modified,
date_published,
date_valid,
rights_copyright,
rights_copyright_audio,
rights_copyright_cover,
rights_copyright_illustrations,
rights_copyright_photographs,
rights_copyright_text,
rights_copyright_translation,
rights_copyright_video,
rights_license,
identifier_oclc,
identifier_isbn,
classify_dewey,
classify_keywords,
classify_loc,
classify_subject,
classify_topic_register,
original_title,
original_publisher,
original_language,
original_language_char,
original_source,
notes_abstract,
notes_description,
publisher,
site_url_doc_root
)
#+END_SRC
***** VALUES
#+NAME: sqlite_formatted_insertions_doc_matters_metadata_sql_values
#+BEGIN_SRC sql
VALUES (
'%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'
);
#+END_SRC
***** dlang values for formatting
#+NAME: sqlite_formatted_insertions_doc_matters_metadata_values
#+BEGIN_SRC d
┃",
_uid,
SQLinsertDelimiter!()(doc_matters.src.filename_base),
SQLinsertDelimiter!()(doc_matters.src.filename_extension),
SQLinsertDelimiter!()(doc_matters.src.docname_composite_unique_per_src_doc),
SQLinsertDelimiter!()(doc_matters.src.docname_composite_unique_per_src_pod),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_full),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_main),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_subtitle),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_short),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_edition),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_language),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author_surname_fn),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author_email),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_illustrator),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_translator),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document_char),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_added_to_site),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_available),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_created),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_issued),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_modified),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_published),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_valid),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_audio),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_cover),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_illustrations),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_photographs),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_text),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_translation),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_video),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_license),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_oclc),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_isbn),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_dewey),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_keywords),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_loc),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_subject),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_topic_register),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_abstract),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_description),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_title),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_publisher),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language_char),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_source),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.publisher),
SQLinsertDelimiter!()(doc_matters.conf_make_meta.conf.w_srv_data_root_url_html)
);
#+END_SRC
**** INSERT topic register
writeln(doc_matters.conf_make_meta.meta.classify_topic_register_arr);
***** loop sql statement: dlang format
#+NAME: sqlite_formatted_insertions_topic_register_loop
#+BEGIN_SRC d
string _uid = SQLinsertDelimiter!()(doc_matters.src.doc_uid);
string[] _insert_topics;
foreach (topic_line; doc_matters.conf_make_meta.meta.classify_topic_register_expanded_arr) {
// writeln(topic_line);
#+END_SRC
***** sql statement: dlang format
#+NAME: sqlite_formatted_insertions_topic_register_format
#+BEGIN_SRC d
_insert_topics ~= format(q"┃
#+END_SRC
***** INSERT INTO
#+NAME: sqlite_formatted_insertions_topic_register_sql
#+BEGIN_SRC sql
INSERT INTO topic_register (
uid_metadata_and_text,
topic_register
)
#+END_SRC
***** VALUES
#+NAME: sqlite_formatted_insertions_topic_register_sql_values
#+BEGIN_SRC sql
VALUES (
'%s',
'%s'
);
#+END_SRC
***** dlang values for formatting
#+NAME: sqlite_formatted_insertions_topic_register_values
#+BEGIN_SRC d
┃",
_uid,
SQLinsertDelimiter!()(topic_line)
);
#+END_SRC
**** INSERT doc objects
lid unique, increment by 1 per object, not ocn
metadata tid document number unique
either:
- increment by adding 1 for each document,
- make hash of document filename or url and use?
***** sql statement: dlang format
#+NAME: sqlite_formatted_insertions_doc_objects_format
#+BEGIN_SRC d
string _insert_doc_objects_row = format(q"┃
#+END_SRC
***** INSERT INTO
#+NAME: sqlite_formatted_insertions_doc_objects_sql
#+BEGIN_SRC sql
INSERT INTO doc_objects (
uid_metadata_and_text,
ocn,
obj_id,
clean,
body,
lev,
is_of_type,
is_a,
seg_name
)
#+END_SRC
***** VALUES
#+NAME: sqlite_formatted_insertions_doc_objects_sql_values
#+BEGIN_SRC sql
VALUES (
'%s', %s, '%s', '%s', '%s', %s, '%s', '%s', '%s'
);
#+END_SRC
***** dlang values for formatting
#+NAME: sqlite_formatted_insertions_doc_objects_values
#+BEGIN_SRC d
┃",
_uid,
obj.metainfo.ocn,
obj.metainfo.identifier,
SQLinsertDelimiter!()(obj_txt["text"]),
SQLinsertDelimiter!()(obj_txt["html"]),
obj.metainfo.heading_lev_markup,
obj.metainfo.is_of_type,
obj.metainfo.is_a,
obj.tags.html_segment_anchor_tag_is
);
#+END_SRC
* document header including copyright & license
#+NAME: doc_header_including_copyright_and_license
#+HEADER: :noweb yes
#+BEGIN_SRC emacs-lisp
<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:spine_doc_header_including_copyright_and_license()>>
#+END_SRC
* __END__