diff options
Diffstat (limited to 'lib/sisu/v1/db_create.rb')
-rw-r--r-- | lib/sisu/v1/db_create.rb | 597 |
1 files changed, 597 insertions, 0 deletions
diff --git a/lib/sisu/v1/db_create.rb b/lib/sisu/v1/db_create.rb new file mode 100644 index 00000000..2cdb2c06 --- /dev/null +++ b/lib/sisu/v1/db_create.rb @@ -0,0 +1,597 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2009 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/licenses/gpl.html> + + <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + <http://www.jus.uio.no/sisu> + <http://www.sisudoc.org> + + * Download: + <http://www.jus.uio.no/sisu/SiSU/download.html> + + * Ralph Amissah + <ralph@amissah.com> + <ralph.amissah@gmail.com> + + ** Description: modules shared by the different db types, dbi, postgresql, + sqlite + +=end +module SiSU_DB_create + require "#{SiSU_lib}/db_columns" + class Create < SiSU_DB_columns::Column_size + require "#{SiSU_lib}/sysenv" + @@dl=nil + def initialize(opt,conn,file,sql_type='pg') + @opt,@conn,@file,@sql_type=opt,conn,file,sql_type + @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX + @comment=Comment.new(@conn,@sql_type) + @@dl ||=SiSU_Env::Info_env.new.digest.length + end + def available + DBI.available_drivers.each do |driver| + puts "Driver: #{driver}" + DBI.data_sources(driver).each do |dsn| + puts "\tDatasource: #{dsn}" + end + end + end + def create_db + @env=SiSU_Env::Info_env.new(@opt.fns) + tell=SiSU_Screen::Ansi.new(@opt.cmd,'invert','Create PG db:',%{"SiSU_#{@env.path.stub_pwd}"}) + tell.colorize unless @opt.cmd =~/q/ + SiSU_Env::System_call.new.create_pg_db(@env.path.stub_pwd) #watch use of path.stub_pwd instead of stub + end + def output_dir? + dir=SiSU_Env::Info_env.new('') + if @opt.cmd =~/d/ + #p dir.path.webserv_stub_ensure.inspect + dir.path.webserv_stub_ensure + end + end + def create_table + def metadata + print %{ + currently using sisu dbi module + to be populated from documents files + create tables metadata + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE metadata ( + tid BIGINT PRIMARY KEY, + title VARCHAR(#{lt_title}) NULL, + subtitle VARCHAR(#{lt_subtitle}) NULL, + author VARCHAR(#{lt_author}) NULL, +/* plan to replace creator field, currently used, with author field */ + creator VARCHAR(#{lt_author}) NULL, + author_title VARCHAR(#{lt_author_title}) NULL, + author_nationality VARCHAR(#{lt_author_nationality}) NULL, + illustrator VARCHAR(#{lt_illustrator}) NULL, + translator VARCHAR(#{lt_translator}) NULL, + subject VARCHAR(#{lt_subject}) NULL, + date VARCHAR(#{lt_date}) NULL, + date_added_to_site VARCHAR(#{lt_date}) NULL, + date_created VARCHAR(#{lt_date}) NULL, + date_issued VARCHAR(#{lt_date}) NULL, + date_available VARCHAR(#{lt_date}) NULL, + date_valid VARCHAR(#{lt_date}) NULL, + date_modified VARCHAR(#{lt_date}) NULL, + date_translated VARCHAR(#{lt_date}) NULL, +/* date DATE, */ +/* date_added_to_site DATE, */ +/* date_created DATE, */ +/* date_issued DATE, */ +/* date_available DATE, */ +/* date_valid DATE, */ +/* date_modified DATE, */ +/* date_translated DATE, */ + type VARCHAR(#{lt_type}) NULL, + description VARCHAR(#{lt_description}) NULL, + publisher VARCHAR(#{lt_publisher}) NULL, + contributor VARCHAR(#{lt_contributor}) NULL, + prepared_by VARCHAR(#{lt_prepared_by}) NULL, + digitized_by VARCHAR(#{lt_digitized_by}) NULL, + format VARCHAR(#{lt_format}) NULL, + identifier VARCHAR(#{lt_identifier}) NULL, + source VARCHAR(#{lt_source}) NULL, + language VARCHAR(#{lt_language}) NULL, + language_original VARCHAR(#{lt_language_original}) NULL, + relation VARCHAR(#{lt_relation}) NULL, + coverage VARCHAR(#{lt_coverage}) NULL, + rights VARCHAR(#{lt_rights}) NULL, + copyright VARCHAR(#{lt_copyright}) NULL, + owner VARCHAR(#{lt_owner}) NULL, + keywords VARCHAR(#{lt_keywords}) NULL, + comment VARCHAR(#{lt_comment}) NULL, + loc VARCHAR(#{lt_loc}) NULL, + dewey VARCHAR(#{lt_dewey}) NULL, + isbn VARCHAR(#{lt_isbn}) NULL, + pg VARCHAR(#{lt_pg}) NULL, + abstract VARCHAR(#{lt_abstract}) NULL, + prefix_a TEXT NULL, + prefix_b TEXT NULL, + skin VARCHAR(#{lt_skin}) NULL, + markup VARCHAR(#{lt_markup}) NULL, + links VARCHAR(#{lt_links}) NULL, + information VARCHAR(#{lt_information}) NULL, + contact VARCHAR(#{lt_contact}) NULL, + suffix VARCHAR(#{lt_suffix}) NULL, + filename VARCHAR(#{lt_filename}) NULL UNIQUE, + types CHAR(#{lt_types}) NULL, + subj VARCHAR(#{lt_subj}) NULL, + original_publication VARCHAR(#{lt_orig_pub}) NULL, + original_publication_date VARCHAR(#{lt_orig_pub_date}) NULL, + original_publication_institution VARCHAR(#{lt_orig_pub_institution}) NULL, + original_publication_nationality VARCHAR(#{lt_orig_pub_nationality}) NULL, + writing_focus_nationality VARCHAR(#{lt_writing_focus_nationality}) NULL, + topic_register VARCHAR(#{lt_topic_register}) NULL + ); + }) + @comment.metadata + end + def documents # create documents base + print %{ + to be populated from documents files + create tables documents document_trade document_env + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE documents ( + lid BIGINT PRIMARY KEY, + metadata_tid BIGINT REFERENCES metadata, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + clean TEXT NULL, + body TEXT NULL, + seg VARCHAR(#{document_seg}) NULL, + lev SMALLINT NULL, + lev1 SMALLINT, + lev2 SMALLINT, + lev3 SMALLINT, + lev4 SMALLINT, + lev5 SMALLINT, + lev6 SMALLINT, + en_a SMALLINT NULL, + en_z SMALLINT NULL, + en_a_asterisk SMALLINT NULL, + en_z_asterisk SMALLINT NULL, + en_a_plus SMALLINT NULL, + en_z_plus SMALLINT NULL, + digest_clean CHAR(#{@@dl}), + digest_all CHAR(#{@@dl}), + types CHAR(1) NULL + ); + }) + @comment.documents + end + def endnotes + print %{ + to be populated from document files + create tables endnotes + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE endnotes ( + nid BIGINT PRIMARY KEY, + document_lid BIGINT REFERENCES documents, + nr SMALLINT, + clean TEXT NULL, + body TEXT NULL, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + digest_clean CHAR(#{@@dl}), + metadata_tid BIGINT REFERENCES metadata + ); + }) + @comment.endnotes + end + def endnotes_asterisk + print %{ + to be populated from document files + create tables endnotes_asterisk + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE endnotes_asterisk ( + nid BIGINT PRIMARY KEY, + document_lid BIGINT REFERENCES documents, + nr SMALLINT, + clean TEXT NULL, + body TEXT NULL, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + digest_clean CHAR(#{@@dl}), + metadata_tid BIGINT REFERENCES metadata + ); + }) + @comment.endnotes_asterisk + end + def endnotes_plus + print %{ + to be populated from document files + create tables endnotes_plus + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE endnotes_plus ( + nid BIGINT PRIMARY KEY, + document_lid BIGINT REFERENCES documents, + nr SMALLINT, + clean TEXT NULL, + body TEXT NULL, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + digest_clean CHAR(#{@@dl}), + metadata_tid BIGINT REFERENCES metadata + ); + }) + @comment.endnotes_plus + end + def urls # create documents file links mapping + print %{ + currently using sisu dbi module + to be populated from documents files + create tables urls + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE urls ( + metadata_tid BIGINT REFERENCES metadata, + plaintext varchar(512), + html_toc varchar(512), + html_doc varchar(512), + xhtml varchar(512), + xml_sax varchar(512), + xml_dom varchar(512), + odf varchar(512), + pdf_p varchar(512), + pdf_l varchar(512), + concordance varchar(512), + latex_p varchar(512), + latex_l varchar(512), + digest varchar(512), + manifest varchar(512), + markup varchar(512), + sisupod varchar(512) + ); + }) + @comment.urls + end + self + end + end + class Comment + def initialize(conn,sql_type='pg') + @conn=conn + if sql_type =~ /pg/; psql + else none + end + end + def psql + def conn_execute_array(sql_arr) + @conn.transaction do |conn| + sql_arr.each do |sql| + conn.execute(sql) + end + end + end + #def conn_execute_array(sql_arr) + # sql_arr.each do |sql| + # @conn.execute(sql) + # end + #end + def metadata + sql_arr=[ + %{COMMENT ON Table metadata + IS 'contains SiSU documents metadata with metadata';}, + %{COMMENT ON COLUMN metadata.tid + IS 'unique';}, + %{COMMENT ON COLUMN metadata.filename + IS 'document filename';}, + %{COMMENT ON COLUMN metadata.title + IS 'metadata title (dublin core element 1)';}, + %{COMMENT ON COLUMN metadata.subtitle + IS 'document subtitle';}, + %{COMMENT ON COLUMN metadata.creator + IS 'metadata creator (dublin core element 2)';}, + %{COMMENT ON COLUMN metadata.author + IS 'metadata author (dublin core element 2)';}, + %{COMMENT ON COLUMN metadata.illustrator + IS 'metadata illustrator';}, + %{COMMENT ON COLUMN metadata.translator + IS 'metadata translator';}, + %{COMMENT ON COLUMN metadata.subject + IS 'metadata subject (dublin core element 3)';}, + %{COMMENT ON COLUMN metadata.date + IS 'metadata date (dublin core element 7)';}, + %{COMMENT ON COLUMN metadata.date_created + IS 'metadata date created (dublin core)';}, + %{COMMENT ON COLUMN metadata.date_issued + IS 'metadata date of issue (dublin core)';}, + %{COMMENT ON COLUMN metadata.date_available + IS 'metadata date available (dublin core)';}, + %{COMMENT ON COLUMN metadata.date_valid + IS 'metadata date valid (dublin core)';}, + %{COMMENT ON COLUMN metadata.date_modified + IS 'metadata date modified (dublin core)';}, + %{COMMENT ON COLUMN metadata.type + IS 'metadata type (dublin core element 8)';}, + %{COMMENT ON COLUMN metadata.description + IS 'metadata description (dublin core element 4)';}, + %{COMMENT ON COLUMN metadata.publisher + IS 'metadata publisher (dublin core element 5)';}, + %{COMMENT ON COLUMN metadata.contributor + IS 'metadata contributor (dublin core element 6)';}, + %{COMMENT ON COLUMN metadata.prepared_by + IS 'metadata markup prepared by';}, + %{COMMENT ON COLUMN metadata.digitized_by + IS 'metadata digitized by';}, + %{COMMENT ON COLUMN metadata.format + IS 'metadata format (dublin core element 9)';}, + %{COMMENT ON COLUMN metadata.identifier + IS 'metadata identifier (dublin core element 10)';}, + %{COMMENT ON COLUMN metadata.source + IS 'metadata source (dublin core element 11)';}, + %{COMMENT ON COLUMN metadata.language + IS 'metadata language (dublin core element 12)';}, + %{COMMENT ON COLUMN metadata.language_original + IS 'metadata original language';}, + %{COMMENT ON COLUMN metadata.relation + IS 'metadata (dublin core element 13)';}, + %{COMMENT ON COLUMN metadata.coverage + IS 'metadata coverage (dublin core element 14)';}, + %{COMMENT ON COLUMN metadata.rights + IS 'metadata rights / copyright / license (dublin core element 15)';}, + %{COMMENT ON COLUMN metadata.owner + IS 'metadata owner';}, + %{COMMENT ON COLUMN metadata.keywords + IS 'metadata keywords';}, + %{COMMENT ON COLUMN metadata.comment + IS 'metadata comment';}, + %{COMMENT ON COLUMN metadata.abstract + IS 'metadata abstract';}, + %{COMMENT ON COLUMN metadata.loc + IS 'metadata library of congress';}, + %{COMMENT ON COLUMN metadata.dewey + IS 'metadata dewey';}, + %{COMMENT ON COLUMN metadata.isbn + IS 'metadata isbn';}, + %{COMMENT ON COLUMN metadata.pg + IS 'metadata project gutenberg number';}, + %{COMMENT ON COLUMN metadata.prefix_a + IS 'metadata prefix';}, + %{COMMENT ON COLUMN metadata.prefix_b + IS 'metadata prefix';}, + %{COMMENT ON COLUMN metadata.skin + IS 'metadata sisu skin';}, + %{COMMENT ON COLUMN metadata.markup + IS 'metadata markup source';}, + %{COMMENT ON COLUMN metadata.links + IS 'metadata links';}, + %{COMMENT ON COLUMN metadata.information + IS 'metadata information';}, + %{COMMENT ON COLUMN metadata.contact + IS 'metadata contact';}, + %{COMMENT ON COLUMN metadata.suffix + IS 'metadata sisu suffix (output related)';}, + %{COMMENT ON COLUMN metadata.filename + IS 'metadata source filename';}, + %{COMMENT ON COLUMN metadata.types + IS 'document types scroll 1, seg 2, both 3';}, + %{COMMENT ON COLUMN metadata.subj + IS 'subject areas - no way to populate at present as not mapped';}, + ] + conn_execute_array(sql_arr) + end + def documents + sql_arr=[ + %{COMMENT ON Table documents + IS 'contains searchable text of SiSU documents';}, + %{COMMENT ON COLUMN documents.lid + IS 'unique';}, + %{COMMENT ON COLUMN documents.metadata_tid + IS 'tie to title in metadata';}, + %{COMMENT ON COLUMN documents.lev + IS 'doc level 1-6 \d\~';}, + %{COMMENT ON COLUMN documents.seg + IS 'segment name from level 4';}, + %{COMMENT ON COLUMN documents.ocn + IS 'object citation number';}, + %{COMMENT ON COLUMN documents.en_a + IS 'first endnote number in text object (eg. NULL or 34) (used with en_z to create range)';}, + %{COMMENT ON COLUMN documents.en_z + IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a to create range)';}, + %{COMMENT ON COLUMN documents.en_a_asterisk + IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_asterisk to create range)';}, + %{COMMENT ON COLUMN documents.en_z_asterisk + IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_asterisk to create range)';}, + %{COMMENT ON COLUMN documents.en_a_plus + IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_plus to create range)';}, + %{COMMENT ON COLUMN documents.en_z_plus + IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_plus to create range)';}, + %{COMMENT ON COLUMN documents.types + IS 'document types seg scroll';}, + %{COMMENT ON COLUMN documents.clean + IS 'text object - substantive text: clean, stripped of markup';}, + %{COMMENT ON COLUMN documents.body + IS 'text object - substantive text: light html markup';}, + %{COMMENT ON COLUMN documents.lev1 + IS 'document structure, level 1';}, + %{COMMENT ON COLUMN documents.lev2 + IS 'document structure, level 2';}, + %{COMMENT ON COLUMN documents.lev3 + IS 'document structure, level 3';}, + %{COMMENT ON COLUMN documents.lev4 + IS 'document structure, level 4';}, + %{COMMENT ON COLUMN documents.lev5 + IS 'document structure, level 5';}, + %{COMMENT ON COLUMN documents.lev6 + IS 'document structure, level 6';} + ] + conn_execute_array(sql_arr) + end + def endnotes + sql_arr=[ + %{COMMENT ON Table endnotes + IS 'contains searchable text of SiSU documents endnotes';}, + %{COMMENT ON COLUMN endnotes.nid + IS 'unique';}, + %{COMMENT ON COLUMN endnotes.document_lid + IS 'ties to text block from which referenced';}, + %{COMMENT ON COLUMN endnotes.nr + IS 'endnote number <!e_(\d+)!>';}, + %{COMMENT ON COLUMN endnotes.clean + IS 'endnote substantive content, stripped of markup';}, + %{COMMENT ON COLUMN endnotes.body + IS 'endnote substantive content';}, + %{COMMENT ON COLUMN endnotes.ocn + IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, + %{COMMENT ON COLUMN documents.metadata_tid + IS 'tie to title in metadata - unique for each document';} + ] + conn_execute_array(sql_arr) + end + def endnotes_asterisk + sql_arr=[ + %{COMMENT ON Table endnotes_asterisk + IS 'contains searchable text of SiSU documents endnotes asterisk';}, + %{COMMENT ON COLUMN endnotes_asterisk.nid + IS 'unique';}, + %{COMMENT ON COLUMN endnotes_asterisk.document_lid + IS 'ties to text block from which referenced';}, + %{COMMENT ON COLUMN endnotes_asterisk.nr + IS 'endnote number <!e_(\d+)!>';}, + %{COMMENT ON COLUMN endnotes_asterisk.clean + IS 'endnote substantive content, stripped of markup';}, + %{COMMENT ON COLUMN endnotes_asterisk.body + IS 'endnote substantive content';}, + %{COMMENT ON COLUMN endnotes_asterisk.ocn + IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, + %{COMMENT ON COLUMN documents.metadata_tid + IS 'tie to title in metadata - unique for each document';} + ] + conn_execute_array(sql_arr) + end + def endnotes_plus + sql_arr=[ + %{COMMENT ON Table endnotes_plus + IS 'contains searchable text of SiSU documents endnotes';}, + %{COMMENT ON COLUMN endnotes_plus.nid + IS 'unique';}, + %{COMMENT ON COLUMN endnotes_plus.document_lid + IS 'ties to text block from which referenced';}, + %{COMMENT ON COLUMN endnotes_plus.nr + IS 'endnote number <!e_(\d+)!>';}, + %{COMMENT ON COLUMN endnotes_plus.clean + IS 'endnote substantive content, stripped of markup';}, + %{COMMENT ON COLUMN endnotes_plus.body + IS 'endnote substantive content';}, + %{COMMENT ON COLUMN endnotes_plus.ocn + IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, + %{COMMENT ON COLUMN documents.metadata_tid + IS 'tie to title in metadata - unique for each document';}, + ] + conn_execute_array(sql_arr) + end + def urls + sql_arr=[ + %{COMMENT ON Table urls + IS 'contains base url links to different SiSU output';}, + %{COMMENT ON COLUMN documents.metadata_tid + IS 'tie to title in metadata - unique for each document, the mapping of rows is one to one';}, + %{COMMENT ON COLUMN urls.plaintext + IS 'plaintext utf-8';}, + %{COMMENT ON COLUMN urls.html_toc + IS 'table of contents for segmented html document';}, + %{COMMENT ON COLUMN urls.html_doc + IS 'html document (scroll)';}, + %{COMMENT ON COLUMN urls.xhtml + IS 'xhtml document (scroll)';}, + %{COMMENT ON COLUMN urls.xml_sax + IS 'xml sax oriented document (scroll)';}, + %{COMMENT ON COLUMN urls.xml_dom + IS 'xml dom oriented document (scroll)';}, + %{COMMENT ON COLUMN urls.odf + IS 'opendocument format text';}, + %{COMMENT ON COLUMN urls.pdf_p + IS 'pdf portrait';}, + %{COMMENT ON COLUMN urls.pdf_l + IS 'pdf landscape';}, + %{COMMENT ON COLUMN urls.concordance + IS 'rudimentary document index linked to html';}, + %{COMMENT ON COLUMN urls.latex_p + IS 'latex portrait';}, + %{COMMENT ON COLUMN urls.latex_l + IS 'latex_landscape';}, + %{COMMENT ON COLUMN urls.markup + IS 'markup';}, + %{COMMENT ON COLUMN urls.sisupod + IS 'SiSU document format .tgz (all SiSU information on document)';}, + ] + conn_execute_array(sql_arr) + end + self + end + def none + def metadata + end + def documents + end + def endnotes + end + def endnotes_asterisk + end + def endnotes_plus + end + def urls + end + self + end + end +end +__END__ |