diff options
Diffstat (limited to 'lib/sisu/v1/db_create.rb')
-rw-r--r-- | lib/sisu/v1/db_create.rb | 597 |
1 files changed, 0 insertions, 597 deletions
diff --git a/lib/sisu/v1/db_create.rb b/lib/sisu/v1/db_create.rb deleted file mode 100644 index 62d46cb7..00000000 --- a/lib/sisu/v1/db_create.rb +++ /dev/null @@ -1,597 +0,0 @@ -# coding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see <http://www.gnu.org/licenses/>. - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - <http://www.fsf.org/licensing/licenses/gpl.html> - <http://www.gnu.org/licenses/gpl.html> - - <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> - <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> - <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - <http://www.jus.uio.no/sisu> - <http://www.sisudoc.org> - - * Download: - <http://www.jus.uio.no/sisu/SiSU/download.html> - - * Ralph Amissah - <ralph@amissah.com> - <ralph.amissah@gmail.com> - - ** Description: modules shared by the different db types, dbi, postgresql, - sqlite - -=end -module SiSU_DB_create - require "#{SiSU_lib}/db_columns" - class Create < SiSU_DB_columns::Column_size - require "#{SiSU_lib}/sysenv" - @@dl=nil - def initialize(opt,conn,file,sql_type='pg') - @opt,@conn,@file,@sql_type=opt,conn,file,sql_type - @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX - @comment=Comment.new(@conn,@sql_type) - @@dl ||=SiSU_Env::Info_env.new.digest.length - end - def available - DBI.available_drivers.each do |driver| - puts "Driver: #{driver}" - DBI.data_sources(driver).each do |dsn| - puts "\tDatasource: #{dsn}" - end - end - end - def create_db - @env=SiSU_Env::Info_env.new(@opt.fns) - tell=SiSU_Screen::Ansi.new(@opt.cmd,'invert','Create PG db:',%{"SiSU_#{@env.path.stub_pwd}"}) - tell.colorize unless @opt.cmd =~/q/ - SiSU_Env::System_call.new.create_pg_db(@env.path.stub_pwd) #watch use of path.stub_pwd instead of stub - end - def output_dir? - dir=SiSU_Env::Info_env.new('') - if @opt.cmd =~/d/ - #p dir.path.webserv_stub_ensure.inspect - dir.path.webserv_stub_ensure - end - end - def create_table - def metadata - print %{ - currently using sisu dbi module - to be populated from documents files - create tables metadata - data import through ruby transfer - } unless @opt.cmd =~/q/ - @conn.execute(%{ - CREATE TABLE metadata ( - tid BIGINT PRIMARY KEY, - title VARCHAR(#{lt_title}) NULL, - subtitle VARCHAR(#{lt_subtitle}) NULL, - author VARCHAR(#{lt_author}) NULL, -/* plan to replace creator field, currently used, with author field */ - creator VARCHAR(#{lt_author}) NULL, - author_title VARCHAR(#{lt_author_title}) NULL, - author_nationality VARCHAR(#{lt_author_nationality}) NULL, - illustrator VARCHAR(#{lt_illustrator}) NULL, - translator VARCHAR(#{lt_translator}) NULL, - subject VARCHAR(#{lt_subject}) NULL, - date VARCHAR(#{lt_date}) NULL, - date_added_to_site VARCHAR(#{lt_date}) NULL, - date_created VARCHAR(#{lt_date}) NULL, - date_issued VARCHAR(#{lt_date}) NULL, - date_available VARCHAR(#{lt_date}) NULL, - date_valid VARCHAR(#{lt_date}) NULL, - date_modified VARCHAR(#{lt_date}) NULL, - date_translated VARCHAR(#{lt_date}) NULL, -/* date DATE, */ -/* date_added_to_site DATE, */ -/* date_created DATE, */ -/* date_issued DATE, */ -/* date_available DATE, */ -/* date_valid DATE, */ -/* date_modified DATE, */ -/* date_translated DATE, */ - type VARCHAR(#{lt_type}) NULL, - description VARCHAR(#{lt_description}) NULL, - publisher VARCHAR(#{lt_publisher}) NULL, - contributor VARCHAR(#{lt_contributor}) NULL, - prepared_by VARCHAR(#{lt_prepared_by}) NULL, - digitized_by VARCHAR(#{lt_digitized_by}) NULL, - format VARCHAR(#{lt_format}) NULL, - identifier VARCHAR(#{lt_identifier}) NULL, - source VARCHAR(#{lt_source}) NULL, - language VARCHAR(#{lt_language}) NULL, - language_original VARCHAR(#{lt_language_original}) NULL, - relation VARCHAR(#{lt_relation}) NULL, - coverage VARCHAR(#{lt_coverage}) NULL, - rights VARCHAR(#{lt_rights}) NULL, - copyright VARCHAR(#{lt_copyright}) NULL, - owner VARCHAR(#{lt_owner}) NULL, - keywords VARCHAR(#{lt_keywords}) NULL, - comment VARCHAR(#{lt_comment}) NULL, - loc VARCHAR(#{lt_loc}) NULL, - dewey VARCHAR(#{lt_dewey}) NULL, - isbn VARCHAR(#{lt_isbn}) NULL, - pg VARCHAR(#{lt_pg}) NULL, - abstract VARCHAR(#{lt_abstract}) NULL, - prefix_a TEXT NULL, - prefix_b TEXT NULL, - skin VARCHAR(#{lt_skin}) NULL, - markup VARCHAR(#{lt_markup}) NULL, - links VARCHAR(#{lt_links}) NULL, - information VARCHAR(#{lt_information}) NULL, - contact VARCHAR(#{lt_contact}) NULL, - suffix VARCHAR(#{lt_suffix}) NULL, - filename VARCHAR(#{lt_filename}) NULL UNIQUE, - types CHAR(#{lt_types}) NULL, - subj VARCHAR(#{lt_subj}) NULL, - original_publication VARCHAR(#{lt_orig_pub}) NULL, - original_publication_date VARCHAR(#{lt_orig_pub_date}) NULL, - original_publication_institution VARCHAR(#{lt_orig_pub_institution}) NULL, - original_publication_nationality VARCHAR(#{lt_orig_pub_nationality}) NULL, - writing_focus_nationality VARCHAR(#{lt_writing_focus_nationality}) NULL, - topic_register VARCHAR(#{lt_topic_register}) NULL - ); - }) - @comment.metadata - end - def documents # create documents base - print %{ - to be populated from documents files - create tables documents document_trade document_env - data import through ruby transfer - } unless @opt.cmd =~/q/ - @conn.execute(%{ - CREATE TABLE documents ( - lid BIGINT PRIMARY KEY, - metadata_tid BIGINT REFERENCES metadata, - ocn SMALLINT, - ocnd VARCHAR(6), - ocns VARCHAR(6), - clean TEXT NULL, - body TEXT NULL, - seg VARCHAR(#{document_seg}) NULL, - lev SMALLINT NULL, - lev1 SMALLINT, - lev2 SMALLINT, - lev3 SMALLINT, - lev4 SMALLINT, - lev5 SMALLINT, - lev6 SMALLINT, - en_a SMALLINT NULL, - en_z SMALLINT NULL, - en_a_asterisk SMALLINT NULL, - en_z_asterisk SMALLINT NULL, - en_a_plus SMALLINT NULL, - en_z_plus SMALLINT NULL, - digest_clean CHAR(#{@@dl}), - digest_all CHAR(#{@@dl}), - types CHAR(1) NULL - ); - }) - @comment.documents - end - def endnotes - print %{ - to be populated from document files - create tables endnotes - data import through ruby transfer - } unless @opt.cmd =~/q/ - @conn.execute(%{ - CREATE TABLE endnotes ( - nid BIGINT PRIMARY KEY, - document_lid BIGINT REFERENCES documents, - nr SMALLINT, - clean TEXT NULL, - body TEXT NULL, - ocn SMALLINT, - ocnd VARCHAR(6), - ocns VARCHAR(6), - digest_clean CHAR(#{@@dl}), - metadata_tid BIGINT REFERENCES metadata - ); - }) - @comment.endnotes - end - def endnotes_asterisk - print %{ - to be populated from document files - create tables endnotes_asterisk - data import through ruby transfer - } unless @opt.cmd =~/q/ - @conn.execute(%{ - CREATE TABLE endnotes_asterisk ( - nid BIGINT PRIMARY KEY, - document_lid BIGINT REFERENCES documents, - nr SMALLINT, - clean TEXT NULL, - body TEXT NULL, - ocn SMALLINT, - ocnd VARCHAR(6), - ocns VARCHAR(6), - digest_clean CHAR(#{@@dl}), - metadata_tid BIGINT REFERENCES metadata - ); - }) - @comment.endnotes_asterisk - end - def endnotes_plus - print %{ - to be populated from document files - create tables endnotes_plus - data import through ruby transfer - } unless @opt.cmd =~/q/ - @conn.execute(%{ - CREATE TABLE endnotes_plus ( - nid BIGINT PRIMARY KEY, - document_lid BIGINT REFERENCES documents, - nr SMALLINT, - clean TEXT NULL, - body TEXT NULL, - ocn SMALLINT, - ocnd VARCHAR(6), - ocns VARCHAR(6), - digest_clean CHAR(#{@@dl}), - metadata_tid BIGINT REFERENCES metadata - ); - }) - @comment.endnotes_plus - end - def urls # create documents file links mapping - print %{ - currently using sisu dbi module - to be populated from documents files - create tables urls - data import through ruby transfer - } unless @opt.cmd =~/q/ - @conn.execute(%{ - CREATE TABLE urls ( - metadata_tid BIGINT REFERENCES metadata, - plaintext varchar(512), - html_toc varchar(512), - html_doc varchar(512), - xhtml varchar(512), - xml_sax varchar(512), - xml_dom varchar(512), - odf varchar(512), - pdf_p varchar(512), - pdf_l varchar(512), - concordance varchar(512), - latex_p varchar(512), - latex_l varchar(512), - digest varchar(512), - manifest varchar(512), - markup varchar(512), - sisupod varchar(512) - ); - }) - @comment.urls - end - self - end - end - class Comment - def initialize(conn,sql_type='pg') - @conn=conn - if sql_type =~ /pg/; psql - else none - end - end - def psql - def conn_execute_array(sql_arr) - @conn.transaction do |conn| - sql_arr.each do |sql| - conn.execute(sql) - end - end - end - #def conn_execute_array(sql_arr) - # sql_arr.each do |sql| - # @conn.execute(sql) - # end - #end - def metadata - sql_arr=[ - %{COMMENT ON Table metadata - IS 'contains SiSU documents metadata with metadata';}, - %{COMMENT ON COLUMN metadata.tid - IS 'unique';}, - %{COMMENT ON COLUMN metadata.filename - IS 'document filename';}, - %{COMMENT ON COLUMN metadata.title - IS 'metadata title (dublin core element 1)';}, - %{COMMENT ON COLUMN metadata.subtitle - IS 'document subtitle';}, - %{COMMENT ON COLUMN metadata.creator - IS 'metadata creator (dublin core element 2)';}, - %{COMMENT ON COLUMN metadata.author - IS 'metadata author (dublin core element 2)';}, - %{COMMENT ON COLUMN metadata.illustrator - IS 'metadata illustrator';}, - %{COMMENT ON COLUMN metadata.translator - IS 'metadata translator';}, - %{COMMENT ON COLUMN metadata.subject - IS 'metadata subject (dublin core element 3)';}, - %{COMMENT ON COLUMN metadata.date - IS 'metadata date (dublin core element 7)';}, - %{COMMENT ON COLUMN metadata.date_created - IS 'metadata date created (dublin core)';}, - %{COMMENT ON COLUMN metadata.date_issued - IS 'metadata date of issue (dublin core)';}, - %{COMMENT ON COLUMN metadata.date_available - IS 'metadata date available (dublin core)';}, - %{COMMENT ON COLUMN metadata.date_valid - IS 'metadata date valid (dublin core)';}, - %{COMMENT ON COLUMN metadata.date_modified - IS 'metadata date modified (dublin core)';}, - %{COMMENT ON COLUMN metadata.type - IS 'metadata type (dublin core element 8)';}, - %{COMMENT ON COLUMN metadata.description - IS 'metadata description (dublin core element 4)';}, - %{COMMENT ON COLUMN metadata.publisher - IS 'metadata publisher (dublin core element 5)';}, - %{COMMENT ON COLUMN metadata.contributor - IS 'metadata contributor (dublin core element 6)';}, - %{COMMENT ON COLUMN metadata.prepared_by - IS 'metadata markup prepared by';}, - %{COMMENT ON COLUMN metadata.digitized_by - IS 'metadata digitized by';}, - %{COMMENT ON COLUMN metadata.format - IS 'metadata format (dublin core element 9)';}, - %{COMMENT ON COLUMN metadata.identifier - IS 'metadata identifier (dublin core element 10)';}, - %{COMMENT ON COLUMN metadata.source - IS 'metadata source (dublin core element 11)';}, - %{COMMENT ON COLUMN metadata.language - IS 'metadata language (dublin core element 12)';}, - %{COMMENT ON COLUMN metadata.language_original - IS 'metadata original language';}, - %{COMMENT ON COLUMN metadata.relation - IS 'metadata (dublin core element 13)';}, - %{COMMENT ON COLUMN metadata.coverage - IS 'metadata coverage (dublin core element 14)';}, - %{COMMENT ON COLUMN metadata.rights - IS 'metadata rights / copyright / license (dublin core element 15)';}, - %{COMMENT ON COLUMN metadata.owner - IS 'metadata owner';}, - %{COMMENT ON COLUMN metadata.keywords - IS 'metadata keywords';}, - %{COMMENT ON COLUMN metadata.comment - IS 'metadata comment';}, - %{COMMENT ON COLUMN metadata.abstract - IS 'metadata abstract';}, - %{COMMENT ON COLUMN metadata.loc - IS 'metadata library of congress';}, - %{COMMENT ON COLUMN metadata.dewey - IS 'metadata dewey';}, - %{COMMENT ON COLUMN metadata.isbn - IS 'metadata isbn';}, - %{COMMENT ON COLUMN metadata.pg - IS 'metadata project gutenberg number';}, - %{COMMENT ON COLUMN metadata.prefix_a - IS 'metadata prefix';}, - %{COMMENT ON COLUMN metadata.prefix_b - IS 'metadata prefix';}, - %{COMMENT ON COLUMN metadata.skin - IS 'metadata sisu skin';}, - %{COMMENT ON COLUMN metadata.markup - IS 'metadata markup source';}, - %{COMMENT ON COLUMN metadata.links - IS 'metadata links';}, - %{COMMENT ON COLUMN metadata.information - IS 'metadata information';}, - %{COMMENT ON COLUMN metadata.contact - IS 'metadata contact';}, - %{COMMENT ON COLUMN metadata.suffix - IS 'metadata sisu suffix (output related)';}, - %{COMMENT ON COLUMN metadata.filename - IS 'metadata source filename';}, - %{COMMENT ON COLUMN metadata.types - IS 'document types scroll 1, seg 2, both 3';}, - %{COMMENT ON COLUMN metadata.subj - IS 'subject areas - no way to populate at present as not mapped';}, - ] - conn_execute_array(sql_arr) - end - def documents - sql_arr=[ - %{COMMENT ON Table documents - IS 'contains searchable text of SiSU documents';}, - %{COMMENT ON COLUMN documents.lid - IS 'unique';}, - %{COMMENT ON COLUMN documents.metadata_tid - IS 'tie to title in metadata';}, - %{COMMENT ON COLUMN documents.lev - IS 'doc level 1-6 \d\~';}, - %{COMMENT ON COLUMN documents.seg - IS 'segment name from level 4';}, - %{COMMENT ON COLUMN documents.ocn - IS 'object citation number';}, - %{COMMENT ON COLUMN documents.en_a - IS 'first endnote number in text object (eg. NULL or 34) (used with en_z to create range)';}, - %{COMMENT ON COLUMN documents.en_z - IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a to create range)';}, - %{COMMENT ON COLUMN documents.en_a_asterisk - IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_asterisk to create range)';}, - %{COMMENT ON COLUMN documents.en_z_asterisk - IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_asterisk to create range)';}, - %{COMMENT ON COLUMN documents.en_a_plus - IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_plus to create range)';}, - %{COMMENT ON COLUMN documents.en_z_plus - IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_plus to create range)';}, - %{COMMENT ON COLUMN documents.types - IS 'document types seg scroll';}, - %{COMMENT ON COLUMN documents.clean - IS 'text object - substantive text: clean, stripped of markup';}, - %{COMMENT ON COLUMN documents.body - IS 'text object - substantive text: light html markup';}, - %{COMMENT ON COLUMN documents.lev1 - IS 'document structure, level 1';}, - %{COMMENT ON COLUMN documents.lev2 - IS 'document structure, level 2';}, - %{COMMENT ON COLUMN documents.lev3 - IS 'document structure, level 3';}, - %{COMMENT ON COLUMN documents.lev4 - IS 'document structure, level 4';}, - %{COMMENT ON COLUMN documents.lev5 - IS 'document structure, level 5';}, - %{COMMENT ON COLUMN documents.lev6 - IS 'document structure, level 6';} - ] - conn_execute_array(sql_arr) - end - def endnotes - sql_arr=[ - %{COMMENT ON Table endnotes - IS 'contains searchable text of SiSU documents endnotes';}, - %{COMMENT ON COLUMN endnotes.nid - IS 'unique';}, - %{COMMENT ON COLUMN endnotes.document_lid - IS 'ties to text block from which referenced';}, - %{COMMENT ON COLUMN endnotes.nr - IS 'endnote number <!e_(\d+)!>';}, - %{COMMENT ON COLUMN endnotes.clean - IS 'endnote substantive content, stripped of markup';}, - %{COMMENT ON COLUMN endnotes.body - IS 'endnote substantive content';}, - %{COMMENT ON COLUMN endnotes.ocn - IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, - %{COMMENT ON COLUMN documents.metadata_tid - IS 'tie to title in metadata - unique for each document';} - ] - conn_execute_array(sql_arr) - end - def endnotes_asterisk - sql_arr=[ - %{COMMENT ON Table endnotes_asterisk - IS 'contains searchable text of SiSU documents endnotes asterisk';}, - %{COMMENT ON COLUMN endnotes_asterisk.nid - IS 'unique';}, - %{COMMENT ON COLUMN endnotes_asterisk.document_lid - IS 'ties to text block from which referenced';}, - %{COMMENT ON COLUMN endnotes_asterisk.nr - IS 'endnote number <!e_(\d+)!>';}, - %{COMMENT ON COLUMN endnotes_asterisk.clean - IS 'endnote substantive content, stripped of markup';}, - %{COMMENT ON COLUMN endnotes_asterisk.body - IS 'endnote substantive content';}, - %{COMMENT ON COLUMN endnotes_asterisk.ocn - IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, - %{COMMENT ON COLUMN documents.metadata_tid - IS 'tie to title in metadata - unique for each document';} - ] - conn_execute_array(sql_arr) - end - def endnotes_plus - sql_arr=[ - %{COMMENT ON Table endnotes_plus - IS 'contains searchable text of SiSU documents endnotes';}, - %{COMMENT ON COLUMN endnotes_plus.nid - IS 'unique';}, - %{COMMENT ON COLUMN endnotes_plus.document_lid - IS 'ties to text block from which referenced';}, - %{COMMENT ON COLUMN endnotes_plus.nr - IS 'endnote number <!e_(\d+)!>';}, - %{COMMENT ON COLUMN endnotes_plus.clean - IS 'endnote substantive content, stripped of markup';}, - %{COMMENT ON COLUMN endnotes_plus.body - IS 'endnote substantive content';}, - %{COMMENT ON COLUMN endnotes_plus.ocn - IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, - %{COMMENT ON COLUMN documents.metadata_tid - IS 'tie to title in metadata - unique for each document';}, - ] - conn_execute_array(sql_arr) - end - def urls - sql_arr=[ - %{COMMENT ON Table urls - IS 'contains base url links to different SiSU output';}, - %{COMMENT ON COLUMN documents.metadata_tid - IS 'tie to title in metadata - unique for each document, the mapping of rows is one to one';}, - %{COMMENT ON COLUMN urls.plaintext - IS 'plaintext utf-8';}, - %{COMMENT ON COLUMN urls.html_toc - IS 'table of contents for segmented html document';}, - %{COMMENT ON COLUMN urls.html_doc - IS 'html document (scroll)';}, - %{COMMENT ON COLUMN urls.xhtml - IS 'xhtml document (scroll)';}, - %{COMMENT ON COLUMN urls.xml_sax - IS 'xml sax oriented document (scroll)';}, - %{COMMENT ON COLUMN urls.xml_dom - IS 'xml dom oriented document (scroll)';}, - %{COMMENT ON COLUMN urls.odf - IS 'opendocument format text';}, - %{COMMENT ON COLUMN urls.pdf_p - IS 'pdf portrait';}, - %{COMMENT ON COLUMN urls.pdf_l - IS 'pdf landscape';}, - %{COMMENT ON COLUMN urls.concordance - IS 'rudimentary document index linked to html';}, - %{COMMENT ON COLUMN urls.latex_p - IS 'latex portrait';}, - %{COMMENT ON COLUMN urls.latex_l - IS 'latex_landscape';}, - %{COMMENT ON COLUMN urls.markup - IS 'markup';}, - %{COMMENT ON COLUMN urls.sisupod - IS 'SiSU document format .tgz (all SiSU information on document)';}, - ] - conn_execute_array(sql_arr) - end - self - end - def none - def metadata - end - def documents - end - def endnotes - end - def endnotes_asterisk - end - def endnotes_plus - end - def urls - end - self - end - end -end -__END__ |