diff options
Diffstat (limited to 'org/output_sqlite.org')
-rw-r--r-- | org/output_sqlite.org | 1209 |
1 files changed, 585 insertions, 624 deletions
diff --git a/org/output_sqlite.org b/org/output_sqlite.org index 0e7f125..7fb843d 100644 --- a/org/output_sqlite.org +++ b/org/output_sqlite.org @@ -20,18 +20,71 @@ #+BEGIN_SRC d :tangle ../src/sdp/output/sqlite.d module sdp.output.sqlite; <<output_imports>> +long _metadata_tid_lastrowid; +template SQLiteDbRun() { + void SQLiteDbRun(Db,St,O)( + auto ref Db db, + auto ref St db_statement, + auto ref O opt_action, + string note, + ) { + debug(sql_statement) { + writeln(db_statement); + } + try { + db.run( + "\nBEGIN;\n" ~ + db_statement ~ + "\nCOMMIT;\n" + ); + if (!(opt_action.sqlite_create)) { + _metadata_tid_lastrowid = db.lastInsertRowid(); + writeln("last row id: ", _metadata_tid_lastrowid); + } + } catch (ErrnoException ex) { + writeln("ERROR SQLite : ", ex); + } catch (Exception ex) { + writeln("ERROR SQLite : ", ex); + } + if (opt_action.debug_do) { + writeln(note); + if (opt_action.verbose) { + writeln(db_statement); + } + } + } +} template SQLiteBuildTablesAndPopulate() { void SQLiteBuildTablesAndPopulate(D,I)( auto ref const D doc_abstraction, - auto ref I doc_matters, + auto ref I doc_matters, ) { import d2sqlite3; import std.typecons : Nullable; mixin SiSUoutputRgxInit; static auto rgx = Rgx(); + auto pth_sqlite = SiSUpathsSQLite!()(doc_matters.output_path); + pth_sqlite.base.mkdirRecurse; + auto db = Database(pth_sqlite.sqlite_file(doc_matters.env.pwd.baseName)); + template SQLiteDbStatementComposite() { + void SQLiteDbStatementComposite(Db,D,I)( + auto ref Db db, + auto ref const D doc_abstraction, + auto ref I doc_matters, + ) { + <<sqlite_db_statement_composite>> + } + } + template SQLinsertDelimiter() { + auto SQLinsertDelimiter(string _txt) { + _txt = _txt + .replaceAll(rgx.quotation_mark_sql_insert_delimiter, "$0$0"); + return _txt; + } + } template SQLiteFormatAndLoadObject() { auto SQLiteFormatAndLoadObject(I)( - auto ref I doc_matters, + auto ref I doc_matters, ) { mixin SiSUoutputRgxInit; struct sqlite_format_and_load_objects { @@ -44,34 +97,68 @@ template SQLiteBuildTablesAndPopulate() { return sqlite_format_and_load_objects(); } } - template SQLiteInstruct() { - Statement SQLiteInstruct(I)( - auto ref I doc_matters, + template SQLiteTablesReCreate() { + string SQLiteTablesReCreate()() { + string _sql_instruct; + _sql_instruct = format(q"¶ + <<sqlite_statement_drop_existing_index>> + <<sqlite_statement_drop_existing_tables>> + <<sqlite_statement_create_table_metadata_and_src_txt>> + <<sqlite_statement_create_table_objects>> + <<sqlite_statement_create_index>> + ¶",); + return _sql_instruct; + } + } + template SQLiteInsertMetadata() { + string SQLiteInsertMetadata(I)( + auto ref I doc_matters, ) { - <<sqlite_db_initialize>> - <<sqlite_table_initialize>> - <<sqlite_create_table_metadata_and_src_txt>> - /* */<<sqlite_create_table_urls>> - <<sqlite_create_table_objects>> - <<sqlite_close_initialize>> - <<sqlite_insert_metadata_and_src_text>> - //<<sqlite_insert_urls>> - <<sqlite_insert_doc_objects>> + <<sqlite_formatted_insertions_doc_matters_metadata>> + return _insert_metadata; } } - template SQLiteObjectsLoop() { - void SQLiteObjectsLoop(P)( - auto ref P doc_parts, + template SQLiteInsertDocObjectsLoop() { + string SQLiteInsertDocObjectsLoop(P,I)( + auto ref P doc_parts, + auto ref I _metadata_tid, ) { + string insertDocObjectsRow(O)(O obj) { + <<sqlite_formatted_insertions_doc_objects>> + return _insert_doc_objects_row; + } <<sqlite_objects_loop>> } } - SQLiteObjectsLoop!()(doc_matters.xml.keys_seq.sql); + SQLiteDbStatementComposite!()(db, doc_abstraction, doc_matters); } } template SQLiteTablesCreate() { - void SQLiteTablesCreate()() { - writeln("table create"); + void SQLiteTablesCreate(E,O)(E env, O opt_action) { + import d2sqlite3; + template SQLiteTablesReCreate() { + string SQLiteTablesReCreate()() { + string _sql_instruct; + _sql_instruct = format(q"¶ + <<sqlite_statement_drop_existing_index>> + <<sqlite_statement_drop_existing_tables>> + <<sqlite_statement_create_table_metadata_and_src_txt>> + <<sqlite_statement_create_table_objects>> + <<sqlite_statement_create_index>> + ¶",); + return _sql_instruct; + } + } + if (opt_action.sqlite_create) { + string _db_statement; + auto pth_sqlite = SiSUpathsSQLite!()(opt_action.output_dir_set); // ISSUE + pth_sqlite.base.mkdirRecurse; + auto db = Database(pth_sqlite.sqlite_file); // ISSUE + { + _db_statement ~= SQLiteTablesReCreate!()(); + } + SQLiteDbRun!()(db, _db_statement, opt_action, "TABLE RE-CREATE"); + } } } template SQLiteTablesDrop() { @@ -81,7 +168,35 @@ template SQLiteTablesDrop() { } #+END_SRC -** 1. imports +** 1. [#A] sqlite_db_statement + +#+name: sqlite_db_statement_composite +#+BEGIN_SRC d +{ + { + string _db_statement; + { + if ((doc_matters.opt.action.sqlite_create)) { + auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language); + pth_sqlite.base.mkdirRecurse; + _db_statement ~= SQLiteTablesReCreate!()(); + SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "TABLE RE-CREATE"); + } + if ((doc_matters.opt.action.sqlite_update)) { // TODO + _db_statement ~= SQLiteInsertMetadata!()(doc_matters); + SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "table INSERT MetaData"); + /+ get tid (lastrowid or max) for use in doc_objects table +/ + _metadata_tid_lastrowid = db.lastInsertRowid(); + _db_statement ~= SQLiteInsertDocObjectsLoop!()(doc_matters.xml.keys_seq.sql, _metadata_tid_lastrowid); // FIX + SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "table INSERT DocObjects"); + } + } + db.close; + } +} +#+END_SRC + +** 2. imports #+name: output_imports #+BEGIN_SRC d @@ -91,7 +206,7 @@ import std.conv : to; #+END_SRC -** 2. format and load template +** 3. format and load template *** 1. prepare objects (munge, sanitize, markup) **** 1. _text_ generic munge (sanitize text for search) @@ -112,7 +227,6 @@ auto generic_munge_sanitize_text_for_search( ) { string _notes; string _urls; - _txt = _txt.replaceAll(rgx.inline_fontface_clean, ""); if (_txt.matchFirst(rgx.inline_notes_al_gen)) { foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { _notes ~= "\n" ~ m["text"]; @@ -429,9 +543,17 @@ auto html_table(O)( string _align = ("style=\"text-align:" ~ ((obj.table_column_aligns[col_idx] == "l") ? "left\"" : "right\"")); - _table ~= "<" ~ _col_is ~ " width=\"" ~ obj.table_column_widths[col_idx].to!string ~ "%\" " ~ _align ~ ">"; + _table ~= "<" + ~ _col_is + ~ " width=\"" + ~ obj.table_column_widths[col_idx].to!string + ~ "%\" " + ~ _align + ~ ">"; _table ~= cell; - _table ~= "</" ~ _col_is ~ ">"; + _table ~= "</" + ~ _col_is + ~ ">"; } } _table ~= "</tr>"; @@ -461,176 +583,17 @@ auto html_table(O)( #+END_SRC *** 2. _sqlite_ (loop preparation & pre-loop action) -***** initialize: prepare statements drop & create db & tables anew -****** d sqlite statements (initialize database) { - -#+name: sqlite_db_initialize -#+BEGIN_SRC d -auto pth_sqlite = SiSUpathsSQLite!()(doc_matters.output_path, doc_matters.src.language); -pth_sqlite.base.mkdirRecurse; -auto db = Database(pth_sqlite.sqlite_file(doc_matters.env.pwd.baseName)); -// auto db = Database(":memory:"); // open database in memory -if (doc_matters.opt.action.sqlite_create) { - if ((doc_matters.opt.action.verbose)) { writeln("sqlite create tables... "); } - db.run(" -#+END_SRC - -******* 0. drop table if exists - -#+name: sqlite_table_initialize -#+BEGIN_SRC sql -DROP TABLE IF EXISTS metadata_and_text; -DROP TABLE IF EXISTS doc_objects; -DROP TABLE IF EXISTS urls; -#+END_SRC - -******* 1. create tables -******** metadata & src text - -#+name: sqlite_create_table_metadata_and_src_txt -#+BEGIN_SRC sql -CREATE TABLE metadata_and_text ( - tid BIGINT PRIMARY KEY, - title VARCHAR(800) NOT NULL, - title_main VARCHAR(400) NULL, /*NOT*/ - title_sub VARCHAR(400) NULL, - title_short VARCHAR(400) NULL, - title_edition VARCHAR(10) NULL, - title_note VARCHAR(2500) NULL, - title_language VARCHAR(100) NULL, - title_language_char VARCHAR(6) NULL, - creator_author VARCHAR(600) NULL, - creator_author_email VARCHAR(100) NULL, - creator_author_hon VARCHAR(100) NULL, - creator_author_nationality VARCHAR(100) NULL, - creator_editor VARCHAR(600) NULL, - creator_contributor VARCHAR(600) NULL, - creator_illustrator VARCHAR(600) NULL, - creator_photographer VARCHAR(600) NULL, - creator_translator VARCHAR(600) NULL, - creator_prepared_by VARCHAR(600) NULL, - creator_digitized_by VARCHAR(600) NULL, - creator_audio VARCHAR(600) NULL, - creator_video VARCHAR(600) NULL, - language_document VARCHAR(100) NULL, - language_document_char VARCHAR(6) NULL, /*NOT*/ - language_original VARCHAR(100) NULL, - language_original_char VARCHAR(6) NULL, - date_added_to_site VARCHAR(10) NULL, - date_available VARCHAR(10) NULL, - date_created VARCHAR(10) NULL, - date_issued VARCHAR(10) NULL, - date_modified VARCHAR(10) NULL, - date_published VARCHAR(10) NULL, - date_valid VARCHAR(10) NULL, - date_translated VARCHAR(10) NULL, - date_original_publication VARCHAR(10) NULL, - date_generated VARCHAR(10) NULL, - publisher VARCHAR(600) NULL, - original_publisher VARCHAR(600) NULL, - original_language VARCHAR(100) NULL, - original_language_char VARCHAR(6) NULL, - original_source VARCHAR(600) NULL, - original_institution VARCHAR(600) NULL, - original_nationality VARCHAR(100) NULL, - original_title VARCHAR(800) NULL, - rights_copyright VARCHAR(2500) NULL, - rights_copyright_audio VARCHAR(2500) NULL, - rights_copyright_cover VARCHAR(2500) NULL, - rights_copyright_illustrations VARCHAR(2500) NULL, - rights_copyright_photographs VARCHAR(2500) NULL, - rights_copyright_text VARCHAR(2500) NULL, - rights_copyright_translation VARCHAR(2500) NULL, - rights_copyright_video VARCHAR(2500) NULL, - rights_license VARCHAR(2500) NULL, - identifier_oclc VARCHAR(30) NULL, - identifier_isbn VARCHAR(16) NULL, - classify_topic_register VARCHAR(2500) NULL, - classify_subject VARCHAR(600) NULL, - classify_loc VARCHAR(30) NULL, - classify_dewey VARCHAR(30) NULL, - classify_keywords VARCHAR(600) NULL, - notes_abstract TEXT NULL, - notes_description TEXT NULL, - notes_comment TEXT NULL, - notes_coverage VARCHAR(200) NULL, - notes_relation VARCHAR(200) NULL, - notes_history VARCHAR(600) NULL, - notes_type VARCHAR(600) NULL, - notes_format VARCHAR(600) NULL, - notes_prefix TEXT NULL, - notes_prefix_a TEXT NULL, - notes_prefix_b TEXT NULL, - notes_suffix TEXT NULL, - src_filename VARCHAR(256) NULL, - src_fingerprint VARCHAR(256) NULL, - src_filesize VARCHAR(10) NULL, - src_wordcount VARCHAR(10) NULL, - src_text TEXT NULL, - fulltext TEXT NULL, - links TEXT NULL -); -#+END_SRC - -******** doc_objects - -#+name: sqlite_create_table_objects -#+BEGIN_SRC sql -CREATE TABLE doc_objects ( - lid BIGINT PRIMARY KEY, - metadata_tid BIGINT REFERENCES metadata_and_text, - ocn SMALLINT, - ocnd VARCHAR(6), - ocns VARCHAR(6), - clean TEXT NULL, - body TEXT NULL, - book_idx TEXT NULL, - seg VARCHAR(256) NULL, - lev_an VARCHAR(1), - lev SMALLINT NULL, - lev0 SMALLINT, - lev1 SMALLINT, - lev2 SMALLINT, - lev3 SMALLINT, - lev4 SMALLINT, - lev5 SMALLINT, - lev6 SMALLINT, - lev7 SMALLINT, - en_a SMALLINT NULL, - en_z SMALLINT NULL, - en_a_asterisk SMALLINT NULL, - en_z_asterisk SMALLINT NULL, - en_a_plus SMALLINT NULL, - en_z_plus SMALLINT NULL, - t_of VARCHAR(16), - t_is VARCHAR(16), - node VARCHAR(16) NULL, - parent VARCHAR(16) NULL, - digest_clean CHAR(256), - digest_all CHAR(256), - types CHAR(1) NULL -) -#+END_SRC - -****** d } - -#+name: sqlite_close_initialize -#+BEGIN_SRC d - "); -} -#+END_SRC - -***** prepare insert statements and do pre-loop inserts -****** 1. _metadata & src_ text -******* TODO sql remove selected -******** d { sql statement +**** prepare insert statements and do pre-loop inserts +***** 1. _metadata & src_ text +****** TODO sql remove selected +******* d { sql statement ##+name: sqlite_remove_selected #+BEGIN_SRC d Statement insert_metadata = db.prepare(" #+END_SRC -********* remove selected +******** remove selected identify and remove, identify by - filename & language code @@ -644,413 +607,20 @@ identify and remove, identify by AND metadata_and_text.language_document_char = '#{doc_matters.conf_make_meta.meta.language_document_char}' #+END_SRC -******** d } +******* d } ##+name: sqlite_remove_selected #+BEGIN_SRC d "); #+END_SRC -******* sql statement -******** d { sql statement - -#+name: sqlite_insert_metadata_and_src_text -#+BEGIN_SRC d -Statement insert_metadata = db.prepare(" -#+END_SRC - -tid document number unique -either: -- increment by adding 1 for each document, -- make hash of document filename or url and use? - -********* sql insert into - -#+name: sqlite_insert_metadata_and_src_text -#+BEGIN_SRC sql - INSERT INTO metadata_and_text ( - tid, - title, - title_main, - title_sub, - title_short, - title_edition, - title_note, - title_language, - title_language_char, - creator_author, - creator_author_email, - creator_author_hon, - creator_author_nationality, - creator_editor, - creator_contributor, - creator_illustrator, - creator_photographer, - creator_translator, - creator_prepared_by, - creator_digitized_by, - creator_audio, - creator_video, - language_document, - language_document_char, - language_original, - language_original_char, - date_added_to_site, - date_available, - date_created, - date_issued, - date_modified, - date_published, - date_valid, - date_translated, - date_original_publication, - date_generated, - publisher, - original_publisher, - original_language, - original_language_char, - original_source, - original_institution, - original_nationality, - original_title, - rights_copyright, - rights_copyright_audio, - rights_copyright_cover, - rights_copyright_illustrations, - rights_copyright_photographs, - rights_copyright_text, - rights_copyright_translation, - rights_copyright_video, - rights_license, - identifier_oclc, - identifier_isbn, - classify_topic_register, - classify_subject, - classify_loc, - classify_dewey, - classify_keywords, - notes_abstract, - notes_description, - notes_comment, - notes_coverage, - notes_relation, - notes_history, - notes_type, - notes_format, - notes_prefix, - notes_prefix_a, - notes_prefix_b, - notes_suffix, - src_filename, - src_fingerprint, - src_filesize, - src_wordcount, - src_text, - fulltext, - links - ) -#+END_SRC - -********* sql values - -#+name: sqlite_insert_metadata_and_src_text -#+BEGIN_SRC sql - VALUES ( - :tid, - :title, - :title_main, - :title_sub, - :title_short, - :title_edition, - :title_note, - :title_language, - :title_language_char, - :creator_author, - :creator_author_email, - :creator_author_hon, - :creator_author_nationality, - :creator_editor, - :creator_contributor, - :creator_illustrator, - :creator_photographer, - :creator_translator, - :creator_prepared_by, - :creator_digitized_by, - :creator_audio, - :creator_video, - :language_document, - :language_document_char, - :language_original, - :language_original_char, - :date_added_to_site, - :date_available, - :date_created, - :date_issued, - :date_modified, - :date_published, - :date_valid, - :date_translated, - :date_original_publication, - :date_generated, - :publisher, - :original_publisher, - :original_language, - :original_language_char, - :original_source, - :original_institution, - :original_nationality, - :original_title, - :rights_copyright, - :rights_copyright_audio, - :rights_copyright_cover, - :rights_copyright_illustrations, - :rights_copyright_photographs, - :rights_copyright_text, - :rights_copyright_translation, - :rights_copyright_video, - :rights_license, - :identifier_oclc, - :identifier_isbn, - :classify_topic_register, - :classify_subject, - :classify_loc, - :classify_dewey, - :classify_keywords, - :notes_abstract, - :notes_description, - :notes_comment, - :notes_coverage, - :notes_relation, - :notes_history, - :notes_type, - :notes_format, - :notes_prefix, - :notes_prefix_a, - :notes_prefix_b, - :notes_suffix, - :src_filename, - :src_fingerprint, - :src_filesize, - :src_wordcount, - :src_text, - :fulltext, - :links - ) -#+END_SRC - -******** d } - -#+name: sqlite_insert_metadata_and_src_text -#+BEGIN_SRC d -"); -#+END_SRC - -******* TODO insert document_metadata - -#+name: sqlite_insert_metadata_and_src_text -#+BEGIN_SRC d -// insert_metadata.bind(":tid", ); // unique identifier you could try use sha of title author & language? or auto increment?? -insert_metadata.bind(":title", doc_matters.conf_make_meta.meta.title_full); -insert_metadata.bind(":title_main", doc_matters.conf_make_meta.meta.title_main); -insert_metadata.bind(":title_sub", doc_matters.conf_make_meta.meta.title_subtitle); -insert_metadata.bind(":title_short", doc_matters.conf_make_meta.meta.title_short); // -insert_metadata.bind(":title_edition", doc_matters.conf_make_meta.meta.title_edition); -insert_metadata.bind(":title_language", doc_matters.conf_make_meta.meta.title_language); -// insert_metadata.bind(":title_language_char", doc_matters.conf_make_meta.meta.title_language_char); -insert_metadata.bind(":classify_dewey", doc_matters.conf_make_meta.meta.classify_dewey); -insert_metadata.bind(":classify_keywords", doc_matters.conf_make_meta.meta.classify_keywords); -insert_metadata.bind(":classify_loc", doc_matters.conf_make_meta.meta.classify_loc); -insert_metadata.bind(":classify_subject", doc_matters.conf_make_meta.meta.classify_subject); -insert_metadata.bind(":classify_topic_register", doc_matters.conf_make_meta.meta.classify_topic_register); -insert_metadata.bind(":creator_author", doc_matters.conf_make_meta.meta.creator_author); -insert_metadata.bind(":creator_author_email", doc_matters.conf_make_meta.meta.creator_author_email); -// insert_metadata.bind(":creator_editor", doc_matters.conf_make_meta.meta.creator_editor); -// insert_metadata.bind(":creator_contributor", doc_matters.conf_make_meta.meta.creator_contributor); -insert_metadata.bind(":creator_illustrator", doc_matters.conf_make_meta.meta.creator_illustrator); -// insert_metadata.bind(":creator_photographer", doc_matters.conf_make_meta.meta.creator_photographer); -insert_metadata.bind(":creator_translator", doc_matters.conf_make_meta.meta.creator_translator); -// insert_metadata.bind(":creator_audio", doc_matters.conf_make_meta.meta.creator_audio); -// insert_metadata.bind(":creator_video", doc_matters.conf_make_meta.meta.creator_video); -insert_metadata.bind(":date_added_to_site", doc_matters.conf_make_meta.meta.date_added_to_site); -insert_metadata.bind(":date_available", doc_matters.conf_make_meta.meta.date_available); -insert_metadata.bind(":date_created", doc_matters.conf_make_meta.meta.date_created); -insert_metadata.bind(":date_issued", doc_matters.conf_make_meta.meta.date_issued); -insert_metadata.bind(":date_modified", doc_matters.conf_make_meta.meta.date_modified); -insert_metadata.bind(":date_published", doc_matters.conf_make_meta.meta.date_published); -insert_metadata.bind(":date_valid", doc_matters.conf_make_meta.meta.date_valid); -// insert_metadata.bind(":date_translated", doc_matters.conf_make_meta.meta.date_translated); -// insert_metadata.bind(":date_original_publication", doc_matters.conf_make_meta.meta.date_original_publication); -// insert_metadata.bind(":date_generated", doc_matters.conf_make_meta.meta.date_generated); -insert_metadata.bind(":identifier_isbn", doc_matters.conf_make_meta.meta.identifier_isbn); -insert_metadata.bind(":identifier_oclc", doc_matters.conf_make_meta.meta.identifier_oclc); -insert_metadata.bind(":language_document", doc_matters.conf_make_meta.meta.language_document); -insert_metadata.bind(":language_document_char", doc_matters.conf_make_meta.meta.language_document_char); -// insert_metadata.bind(":language_original", doc_matters.conf_make_meta.meta.language_original); -// insert_metadata.bind(":language_original_char", doc_matters.conf_make_meta.meta.language_original_char); -insert_metadata.bind(":notes_abstract", doc_matters.conf_make_meta.meta.notes_abstract); -insert_metadata.bind(":notes_description", doc_matters.conf_make_meta.meta.notes_description); -insert_metadata.bind(":original_publisher", doc_matters.conf_make_meta.meta.original_publisher); -insert_metadata.bind(":original_language", doc_matters.conf_make_meta.meta.original_language); -insert_metadata.bind(":original_language_char", doc_matters.conf_make_meta.meta.original_language_char); -insert_metadata.bind(":original_source", doc_matters.conf_make_meta.meta.original_source); -insert_metadata.bind(":original_title", doc_matters.conf_make_meta.meta.original_title); -insert_metadata.bind(":publisher", doc_matters.conf_make_meta.meta.publisher); -// insert_metadata.bind(":rights", doc_matters.conf_make_meta.meta.rights); -insert_metadata.bind(":rights_copyright", doc_matters.conf_make_meta.meta.rights_copyright); -insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_copyright_audio); -insert_metadata.bind(":rights_copyright_cover", doc_matters.conf_make_meta.meta.rights_copyright_cover); -insert_metadata.bind(":rights_copyright_illustrations", doc_matters.conf_make_meta.meta.rights_copyright_illustrations); -insert_metadata.bind(":rights_copyright_photographs", doc_matters.conf_make_meta.meta.rights_copyright_photographs); -// insert_metadata.bind(":rights_copyright_preparation", doc_matters.conf_make_meta.meta.rights_preparation); -insert_metadata.bind(":rights_copyright_text", doc_matters.conf_make_meta.meta.rights_copyright_text); -insert_metadata.bind(":rights_copyright_translation", doc_matters.conf_make_meta.meta.rights_copyright_translation); -insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_copyright_video); -// insert_metadata.bind(":rights_copyright_digitization", doc_matters.conf_make_meta.meta.rights_digitization); -// insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_audio); -// insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_video); -insert_metadata.bind(":rights_license", doc_matters.conf_make_meta.meta.rights_license); -// insert_metadata.bind(":src_filename", doc_matters.conf_make_meta.meta.src_filename); -// insert_metadata.bind(":src_fingerprint", doc_matters.conf_make_meta.meta.src_fingerprint); -// insert_metadata.bind(":src_filesize", doc_matters.conf_make_meta.meta.src_filesize); -// insert_metadata.bind(":src_wordcount", doc_matters.conf_make_meta.meta.src_wordcount); -// insert_metadata.bind(":src_text", doc_matters.conf_make_meta.meta.src_text); -// insert_metadata.bind(":fulltext", doc_matters.conf_make_meta.meta.fulltext); -// insert_metadata.bind(":links", doc_matters.conf_make_meta.meta.links); -insert_metadata.execute(); insert_metadata.reset(); -/+ watch +/ -writeln(" ", pth_sqlite.sqlite_file(doc_matters.env.pwd.baseName)); -if ((doc_matters.opt.action.verbose)) { - writeln("sql statement executed"); -} -assert(db.totalChanges == 1); -#+END_SRC - -###+name: sqlite_insert_metadata_and_src_text -#+BEGIN_SRC d -insert_metadata.inject( - 1, - doc_matters.dochead_meta["title"]["full"], - "", - "", - "", - "", - "", - "", - "", - doc_matters.dochead_meta["creator"]["author"] -); -#+END_SRC - -****** 2. _doc objects_ (used with doc_objects in document loop) -******* prepare sql statement -******** d { sql statement - -#+name: sqlite_insert_doc_objects -#+BEGIN_SRC d -Statement insert_doc_objects = db.prepare(" -#+END_SRC - -********* sql insert into - -lid unique, increment by 1 per object, not ocn - -metadata tid document number unique -either: -- increment by adding 1 for each document, -- make hash of document filename or url and use? - -#+name: sqlite_insert_doc_objects -#+BEGIN_SRC sql - INSERT INTO doc_objects ( - lid, - metadata_tid, - ocn, - ocnd, - ocns, - clean, - body, - book_idx, - seg, - lev_an, - lev, - lev0, - lev1, - lev2, - lev3, - lev4, - lev5, - lev6, - lev7, - en_a, - en_z, - en_a_asterisk, - en_z_asterisk, - en_a_plus, - en_z_plus, - t_of, - t_is, - node, - parent, - digest_clean, - digest_all, - types - ) -#+END_SRC - -********* sql values - -#+name: sqlite_insert_doc_objects -#+BEGIN_SRC sql - VALUES ( - :lid, - :metadata_tid, - :ocn, - :ocnd, - :ocns, - :clean, - :body, - :book_idx, - :seg, - :lev_an, - :lev, - :lev0, - :lev1, - :lev2, - :lev3, - :lev4, - :lev5, - :lev6, - :lev7, - :en_a, - :en_z, - :en_a_asterisk, - :en_z_asterisk, - :en_a_plus, - :en_z_plus, - :t_of, - :t_is, - :node, - :parent, - :digest_clean, - :digest_all, - :types - ) -#+END_SRC - -******** d } - -#+name: sqlite_insert_doc_objects -#+BEGIN_SRC d -"); -return insert_doc_objects; -#+END_SRC - -******* TODO (within loop not here - insert doc objects +***** 2. _doc objects_ (used with doc_objects in document loop) +****** TODO (within loop not here - insert doc objects work out *** 3. hub (sqlite_format_and_load_objects) -***** sql related +**** sql related #+name: sqlite_load_object #+BEGIN_SRC d @@ -1092,7 +662,7 @@ string sqlite_statement(O)( } #+END_SRC -***** heading +**** heading #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1117,7 +687,7 @@ auto heading(O)( } #+END_SRC -***** para +**** para #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1142,7 +712,7 @@ auto para(O)( } #+END_SRC -***** quote +**** quote #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1166,7 +736,7 @@ auto quote(O)( return obj_txt; } #+END_SRC -***** group +**** group #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1191,7 +761,7 @@ auto group(O)( } #+END_SRC -***** block +**** block #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1216,7 +786,7 @@ auto block(O)( } #+END_SRC -***** verse +**** verse #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1241,7 +811,7 @@ auto verse(O)( } #+END_SRC -***** code +**** code #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1266,7 +836,7 @@ auto code(O)( } #+END_SRC -***** table +**** table #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1291,14 +861,14 @@ auto table(O)( } #+END_SRC -** 3. loop, identify, load - loop template +** 4. loop, identify, load - loop template #+name: sqlite_objects_loop #+BEGIN_SRC d -Statement insert_doc_objects = SQLiteInstruct!()(doc_matters); auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters); string[string] obj_txt; string doc_text; +string[] _insert_doc_objects; foreach (part; doc_parts) { foreach (obj; doc_abstraction[part]) { switch (obj.of_part) { @@ -1443,35 +1013,426 @@ foreach (part; doc_parts) { ); } } - insert_doc_objects.bind(":t_of", obj.is_of); - insert_doc_objects.bind(":t_is", obj.is_a); - insert_doc_objects.bind(":ocn", obj.ocn); - insert_doc_objects.bind(":clean", obj_txt["text"]); // consider whether book index info should be made available within clear text for search - insert_doc_objects.bind(":body", obj_txt["html"]); - // insert_doc_objects.bind(":book_idx", ""); // not needed, but, consider whether should be made available within object for clear text search - insert_doc_objects.bind(":lev", obj.heading_lev_markup); - // // insert_doc_objects.bind(":dom_markedup", ""); // should make lev sequence below obsolete - // // insert_doc_objects.bind(":dom_collapsed", ""); // should add info - // insert_doc_objects.bind(":lev0", ""); - // insert_doc_objects.bind(":lev1", ""); - // insert_doc_objects.bind(":lev2", ""); - // insert_doc_objects.bind(":lev3", ""); - // insert_doc_objects.bind(":lev4", ""); - // insert_doc_objects.bind(":lev5", ""); - // insert_doc_objects.bind(":lev6", ""); - // insert_doc_objects.bind(":lev7", ""); - // insert_doc_objects.bind(":node", ""); - // insert_doc_objects.bind(":type", ""); - // insert_doc_objects.bind(":parent_ocn", ""); - // insert_doc_objects.bind(":ancestors", ""); - // insert_doc_objects.bind(":heading_lev_markup", ""); - // insert_doc_objects.bind(":heading_lev_collapsed", ""); - // insert_doc_objects.bind(":parent_lev_markup", ""); - // insert_doc_objects.bind(":heading_ancestors", ""); - // insert_doc_objects.bind(":node", ""); - insert_doc_objects.execute(); insert_doc_objects.reset(); - } + if (!(obj.is_a == "comment")) { + _insert_doc_objects ~= insertDocObjectsRow(obj); + } + } // loop closes } +return _insert_doc_objects.join.to!(char[]).toUTF8; +#+END_SRC + +** 5. SQL statements +*** create tables + +**** DROP INDEX IF EXISTS + +#+name: sqlite_statement_drop_existing_index +#+BEGIN_SRC sql +DROP INDEX IF EXISTS ocn; +DROP INDEX IF EXISTS digest_clean; +DROP INDEX IF EXISTS digest_all; +DROP INDEX IF EXISTS clean; +DROP INDEX IF EXISTS lev0; +DROP INDEX IF EXISTS lev1; +DROP INDEX IF EXISTS lev2; +DROP INDEX IF EXISTS lev3; +DROP INDEX IF EXISTS lev4; +DROP INDEX IF EXISTS lev5; +DROP INDEX IF EXISTS lev6; +DROP INDEX IF EXISTS title; +DROP INDEX IF EXISTS creator_author; +DROP INDEX IF EXISTS src_filename; +DROP INDEX IF EXISTS language_document_char; +DROP INDEX IF EXISTS classify_topic_register; +#+END_SRC + +**** DROP TABLE IF EXISTS + +#+name: sqlite_statement_drop_existing_tables +#+BEGIN_SRC sql +DROP TABLE IF EXISTS metadata_and_text; +DROP TABLE IF EXISTS doc_objects; +DROP TABLE IF EXISTS urls; +#+END_SRC + +**** CREATE TABLE metadata_and_text + +#+name: sqlite_statement_create_table_metadata_and_src_txt +#+BEGIN_SRC sql +CREATE TABLE metadata_and_text ( + tid INTEGER PRIMARY KEY, +/* src_filename_composite VARCHAR(256) NOT NULL UNIQUE, /* z pod name if any + src filename + language code */ + title VARCHAR(800) NOT NULL, + title_main VARCHAR(400) NOT NULL, + title_sub VARCHAR(400) NULL, + title_short VARCHAR(400) NULL, + title_edition VARCHAR(10) NULL, + title_note VARCHAR(2500) NULL, + title_language VARCHAR(100) NULL, + title_language_char VARCHAR(6) NULL, + creator_author VARCHAR(600) NOT NULL, + creator_author_email VARCHAR(100) NULL, + creator_author_hon VARCHAR(100) NULL, + creator_author_nationality VARCHAR(100) NULL, + creator_editor VARCHAR(600) NULL, + creator_contributor VARCHAR(600) NULL, + creator_illustrator VARCHAR(600) NULL, + creator_photographer VARCHAR(600) NULL, + creator_translator VARCHAR(600) NULL, + creator_prepared_by VARCHAR(600) NULL, + creator_digitized_by VARCHAR(600) NULL, + creator_audio VARCHAR(600) NULL, + creator_video VARCHAR(600) NULL, + language_document VARCHAR(100) NULL, + language_document_char VARCHAR(6) NOT NULL, + language_original VARCHAR(100) NULL, + language_original_char VARCHAR(6) NULL, + date_added_to_site VARCHAR(10) NULL, + date_available VARCHAR(10) NULL, + date_created VARCHAR(10) NULL, + date_issued VARCHAR(10) NULL, + date_modified VARCHAR(10) NULL, + date_published VARCHAR(10) NULL, + date_valid VARCHAR(10) NULL, + date_translated VARCHAR(10) NULL, + date_original_publication VARCHAR(10) NULL, + date_generated VARCHAR(10) NULL, + publisher VARCHAR(600) NULL, + original_publisher VARCHAR(600) NULL, + original_language VARCHAR(100) NULL, + original_language_char VARCHAR(6) NULL, + original_source VARCHAR(600) NULL, + original_institution VARCHAR(600) NULL, + original_nationality VARCHAR(100) NULL, + original_title VARCHAR(800) NULL, + rights_copyright VARCHAR(2500) NULL, + rights_copyright_audio VARCHAR(2500) NULL, + rights_copyright_cover VARCHAR(2500) NULL, + rights_copyright_illustrations VARCHAR(2500) NULL, + rights_copyright_photographs VARCHAR(2500) NULL, + rights_copyright_text VARCHAR(2500) NULL, + rights_copyright_translation VARCHAR(2500) NULL, + rights_copyright_video VARCHAR(2500) NULL, + rights_license VARCHAR(2500) NULL, + identifier_oclc VARCHAR(30) NULL, + identifier_isbn VARCHAR(16) NULL, + classify_topic_register VARCHAR(2500) NULL, + classify_subject VARCHAR(600) NULL, + classify_loc VARCHAR(30) NULL, + classify_dewey VARCHAR(30) NULL, + classify_keywords VARCHAR(600) NULL, + notes_abstract TEXT NULL, + notes_description TEXT NULL, + notes_comment TEXT NULL, + notes_coverage VARCHAR(200) NULL, + notes_relation VARCHAR(200) NULL, + notes_history VARCHAR(600) NULL, + notes_type VARCHAR(600) NULL, + notes_format VARCHAR(600) NULL, + notes_prefix TEXT NULL, + notes_prefix_a TEXT NULL, + notes_prefix_b TEXT NULL, + notes_suffix TEXT NULL, + src_filename VARCHAR(256) NOT NULL, + src_fingerprint VARCHAR(256) NULL, + src_filesize VARCHAR(10) NULL, + src_wordcount VARCHAR(10) NULL, + pod_name VARCHAR(256) NULL, /* zipped pod, work to be done here */ + pod_fingerprint VARCHAR(256) NULL, /* zipped pod, work to be done here */ + pod_size VARCHAR(10) NULL, /* zipped pod, work to be done here */ + src_text TEXT NULL, + fulltext TEXT NULL, + links TEXT NULL +); +#+END_SRC + +**** CREATE TABLE doc_objects + +#+name: sqlite_statement_create_table_objects +#+BEGIN_SRC sql +CREATE TABLE doc_objects ( + lid INTEGER PRIMARY KEY, + metadata_tid INTEGER REFERENCES metadata_and_text, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + clean TEXT NULL, + body TEXT NULL, + book_idx TEXT NULL, + seg VARCHAR(256) NULL, + lev_an VARCHAR(1), + lev SMALLINT NULL, + lev0 SMALLINT, + lev1 SMALLINT, + lev2 SMALLINT, + lev3 SMALLINT, + lev4 SMALLINT, + lev5 SMALLINT, + lev6 SMALLINT, + lev7 SMALLINT, + en_a SMALLINT NULL, + en_z SMALLINT NULL, + en_a_asterisk SMALLINT NULL, + en_z_asterisk SMALLINT NULL, + en_a_plus SMALLINT NULL, + en_z_plus SMALLINT NULL, + t_of VARCHAR(16), + t_is VARCHAR(16), + node VARCHAR(16) NULL, + parent VARCHAR(16) NULL, + digest_clean CHAR(256), + digest_all CHAR(256), + types CHAR(1) NULL +); +#+END_SRC + +**** CREATE INDEX + +#+name: sqlite_statement_create_index +#+BEGIN_SRC sql +CREATE INDEX idx_ocn ON doc_objects(ocn); +CREATE INDEX idx_digest_clean ON doc_objects(digest_clean); +CREATE INDEX idx_digest_all ON doc_objects(digest_all); +CREATE INDEX idx_clean ON doc_objects(clean); +CREATE INDEX idx_lev0 ON doc_objects(lev0); +CREATE INDEX idx_lev1 ON doc_objects(lev1); +CREATE INDEX idx_lev2 ON doc_objects(lev2); +CREATE INDEX idx_lev3 ON doc_objects(lev3); +CREATE INDEX idx_lev4 ON doc_objects(lev4); +CREATE INDEX idx_lev5 ON doc_objects(lev5); +CREATE INDEX idx_lev6 ON doc_objects(lev6); +CREATE INDEX idx_title ON metadata_and_text(title); +CREATE INDEX idx_author ON metadata_and_text(creator_author); +CREATE INDEX idx_filename ON metadata_and_text(src_filename); +CREATE INDEX idx_language ON metadata_and_text(language_document_char); +CREATE INDEX idx_topics ON metadata_and_text(classify_topic_register); +#+END_SRC + +**** TODO local site link & info + +*** sql insert statement formatted doc objects + +lid unique, increment by 1 per object, not ocn + +metadata tid document number unique +either: +- increment by adding 1 for each document, +- make hash of document filename or url and use? + +**** sql statement: dlang format +#+name: sqlite_formatted_insertions_doc_objects +#+BEGIN_SRC d +string _insert_doc_objects_row; +_insert_doc_objects_row = format(q"¶ +#+END_SRC + +**** INSERT INTO + +#+name: sqlite_formatted_insertions_doc_objects +#+BEGIN_SRC sql + INSERT INTO doc_objects ( + metadata_tid, + ocn, + clean, + body, + lev, + t_of, + t_is + ) +#+END_SRC + +**** VALUES + +#+name: sqlite_formatted_insertions_doc_objects +#+BEGIN_SRC sql + VALUES ( + %d, + %s, + '%s', + '%s', + %s, + '%s', + '%s' + ); +#+END_SRC + +**** sql statement: dlang values for formatting + +#+name: sqlite_formatted_insertions_doc_objects +#+BEGIN_SRC d +¶", + _metadata_tid, + obj.ocn, + SQLinsertDelimiter!()(obj_txt["text"]), + SQLinsertDelimiter!()(obj_txt["html"]), + obj.heading_lev_markup, + obj.is_of, + obj.is_a, +); +#+END_SRC + +*** sqlite insert statement formatted doc matters metadata +**** sql statement: dlang format +#+name: sqlite_formatted_insertions_doc_matters_metadata +#+BEGIN_SRC d +string _insert_metadata; +_insert_metadata = format(q"¶ +#+END_SRC + +**** INSERT INTO + +#+name: sqlite_formatted_insertions_doc_matters_metadata +#+BEGIN_SRC sql + INSERT INTO metadata_and_text ( + src_filename, + title, + title_main, + title_sub, + title_short, + title_edition, + title_language, + classify_dewey, + classify_keywords, + classify_loc, + classify_subject, + classify_topic_register, + creator_author, + creator_author_email, + creator_illustrator, + creator_translator, + date_added_to_site, + date_available, + date_created, + date_issued, + date_modified, + date_published, + date_valid, + identifier_isbn, + identifier_oclc, + language_document, + language_document_char, + notes_abstract, + notes_description, + original_publisher, + original_language, + original_language_char, + original_source, + original_title, + publisher, + rights_copyright, + rights_copyright_audio, + rights_copyright_cover, + rights_copyright_illustrations, + rights_copyright_photographs, + rights_copyright_text, + rights_copyright_translation, + rights_copyright_video, + rights_license + ) +#+END_SRC + +**** VALUES + +#+name: sqlite_formatted_insertions_doc_matters_metadata +#+BEGIN_SRC sql + VALUES ( + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s' + ); +#+END_SRC + +**** sql statement: values for formatting + +#+name: sqlite_formatted_insertions_doc_matters_metadata +#+BEGIN_SRC d +¶", + SQLinsertDelimiter!()(doc_matters.src.filename), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_full), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_main), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_subtitle), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_short), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_edition), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_language), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_dewey), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_keywords), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_loc), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_subject), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_topic_register), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author_email), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_illustrator), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_translator), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_added_to_site), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_available), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_created), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_issued), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_modified), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_published), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_valid), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_isbn), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_oclc), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document_char), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_abstract), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_description), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_publisher), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language_char), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_source), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_title), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.publisher), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_audio), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_cover), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_illustrations), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_photographs), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_text), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_translation), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_video), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_license) +); #+END_SRC * __END__ |