diff options
| author | Ralph Amissah <ralph@amissah.com> | 2010-04-16 10:12:46 -0400 | 
|---|---|---|
| committer | Ralph Amissah <ralph@amissah.com> | 2010-04-16 10:15:13 -0400 | 
| commit | aa47d1db8596aa65746db05d369441d1def62aa4 (patch) | |
| tree | c1f1ab145389abd7e30b083f05e6adbcfcfc4dbe /lib | |
| parent | db, shared_html_lite, link back to footnote/endnote reference, fix (diff) | |
db sql table and column structure changes, name prefix "sisu_v2a_" resulting sisu version bump 2.1.0, plus other lesser fixes
[Note: it is necessary to create new database and tables and to populate them]
* db (sql) database table name and column structure changes, new pgsql db name
  prefix "sisu_v2a_" (version bump), continue to review
  (db_columns, db_create, db_import, db_sqltxt)
* db remove and update fix, match filename for removal with = (not LIKE or ~)
* db sqlite, issue with --recreate, bugfix
  (db_drop)
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/sisu/v2/constants.rb | 18 | ||||
| -rw-r--r-- | lib/sisu/v2/db_columns.rb | 2072 | ||||
| -rw-r--r-- | lib/sisu/v2/db_create.rb | 463 | ||||
| -rw-r--r-- | lib/sisu/v2/db_drop.rb | 80 | ||||
| -rw-r--r-- | lib/sisu/v2/db_import.rb | 321 | ||||
| -rw-r--r-- | lib/sisu/v2/db_indexes.rb | 24 | ||||
| -rw-r--r-- | lib/sisu/v2/db_load_tuple.rb | 176 | ||||
| -rw-r--r-- | lib/sisu/v2/db_remove.rb | 10 | ||||
| -rw-r--r-- | lib/sisu/v2/db_select.rb | 18 | ||||
| -rw-r--r-- | lib/sisu/v2/db_sqltxt.rb | 115 | 
10 files changed, 2581 insertions, 716 deletions
| diff --git a/lib/sisu/v2/constants.rb b/lib/sisu/v2/constants.rb index 9abe9c0b..3fcb1e3a 100644 --- a/lib/sisu/v2/constants.rb +++ b/lib/sisu/v2/constants.rb @@ -111,7 +111,6 @@ Rx[:meta]=/#{Mx[:meta_o]}(\S+?)#{Mx[:meta_c]}/  Dx[:url_o]='‹';   Dx[:url_c]='›'  Dx[:url_o_xml]='<';   Dx[:url_c_xml]='>'  Dx[:rel_o]='‹';   Dx[:rel_c]='›' -Db[:name_prefix]="SiSU#{SiSU_version_dir}_"  Tex[:backslash]="\\\\"  Tex[:backslash]="\\\\"  Tex[:tilde]='\\\\\\~' @@ -132,6 +131,23 @@ Px[:lv4]=     '-'  Px[:lv5]=     '.'  Px[:lv6]=     '.'  #Px[:lv5_6]=   '.' +Db[:name_prefix]="SiSU#{SiSU_version_dir}a_" +Db[:name_prefix_db]="sisu_#{SiSU_version_dir}a_" +Db[:col_title]=800 +Db[:col_title_part]=400 +Db[:col_title_edition]=10 +Db[:col_name]=600 +Db[:col_creator_misc_short]=100 +Db[:col_language]=100 +Db[:col_language_char]=3 +Db[:col_date_text]=10 +Db[:col_classify_txt_long]=600 +Db[:col_classify_txt_short]=600 +Db[:col_classify_short]=200 +Db[:col_classify_identify]=256 +Db[:col_classify_library]=30 +Db[:col_classify_small]=16 +Db[:col_filename]=256  __END__  consider:    〔comment〕 diff --git a/lib/sisu/v2/db_columns.rb b/lib/sisu/v2/db_columns.rb index 1849a442..ee66c59e 100644 --- a/lib/sisu/v2/db_columns.rb +++ b/lib/sisu/v2/db_columns.rb @@ -58,154 +58,1934 @@  =end  module SiSU_DB_columns -  class Column_size -    def lt_title -      600 -    end -    def lt_subtitle -      600 -    end -    def lt_author -      600 -    end -    def lt_author_title -      100 -    end -    def lt_author_nationality -      100 -    end -    def lt_illustrator -      600 -    end -    def lt_translator -      600 -    end -    def lt_prepared_by -      600 -    end -    def lt_digitized_by -      600 -    end -    def lt_subject -      600 -    end -    def lt_date -      10 -    end -    def lt_type -      600 -    end -    def lt_description -      2000 -    end -    def lt_publisher -      600 -    end -    def lt_contributor -      600 -    end -    def lt_format -      600 -    end -    def lt_identifier -      256 -    end -    def lt_source -      200 -    end -    def lt_language -      30 -    end -    def lt_language_char -      3 -    end -    def lt_language_original -      30 -    end -    def lt_language_original_char -      3 -    end -    def lt_relation -      100 -    end -    def lt_coverage -      100 -    end -    def lt_rights -      2000 -    end -    def lt_copyright -      2000 -    end -    def lt_owner -      600 -    end -    def lt_keywords -      600 -    end -    def lt_comment -      600 -    end -    def lt_loc -      30 -    end -    def lt_dewey -      30 -    end -    def lt_isbn -      16 -    end -    def lt_pg -      16 -    end -    def lt_abstract -      600 -    end -    def lt_skin -      100 +  require "#{SiSU_lib}/sysenv"                            # sysenv.rb +  require "#{SiSU_lib}/db_sqltxt"                         # db_sqltxt.rb +  class Columns < SiSU_DB_text::Prepare +    def initialize(md=nil) +      @md=md +      @db=SiSU_Env::Info_db.new #watch +      if defined? md.mod \ +      and md.mod.inspect=~/import|update/ \ +      and FileTest.exist?(md.fns) +        txt_arr=IO.readlines(md.fns,'') +        src=txt_arr.join("\n") +        if @db.share_source? +          @sisutxt=special_character_escape(src) +        else @sisutxt='' +        end +        @fulltext=clean_searchable_text(txt_arr) +       else @sisutxt,@fulltext='','' +      end      end -    def lt_markup -      100 -    end -    def lt_links -      100 -    end -    def lt_information -      100 -    end -    def lt_contact -      100 -    end -    def lt_suffix -      600 -    end -    def lt_filename -      256 -    end -    def lt_types -      1 -    end -    def lt_subj -      64 -    end -    def lt_orig_pub -      400 -    end -    def lt_orig_pub_date -      400 -    end -    def lt_orig_pub_institution -      200 -    end -    def lt_orig_pub_nationality -      200 -    end -    def lt_writing_focus_nationality -      100 -    end -    def lt_topic_register -      2000 +#% structures +    #def column_define +    #  def varchar(name,size) +    #    "#{name}                VARCHAR(#{size}) NULL," +    #  end +    #end +=begin +#% title +@title: + :subtitle: + :short: + :edition: + :language: + :note: +=end +    def column +      def title                          # DublinCore 1 - title +        def name +          'title' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_title]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +            IS 'metadata full document title [DC1]';} +        end +        def tuple +          t=if defined? @md.title.full \ +          and @md.title.full=~/\S+/ +            txt=@md.title.full +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def title_main +        def name +          'title_main' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_title_part]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +            IS 'metadata main document title';} +        end +        def tuple +          t=if defined? @md.title.main \ +          and @md.title.main=~/\S+/ +            txt=@md.title.main +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def title_sub +        def name +          'title_sub' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_title_part]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +            IS 'metadata document subtitle';} +        end +        def tuple +          t=if defined? @md.title.sub \ +          and @md.title.sub=~/\S+/ +            txt=@md.title.sub +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def title_short +        def name +          'title_short' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_title_part]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +            IS 'metadata document short title if any';} +        end +        def tuple +          t=if defined? @md.title.short \ +          and @md.title.short=~/\S+/ +            txt=@md.title.short +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def title_edition +        def name +          'title_edition' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_title_edition]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +            IS 'metadata document edition (version)';} +        end +        def tuple +          t=if defined? @md.title.edition \ +          and @md.title.edition=~/\S+/ +            txt=@md.title.edition +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def title_note +        def name +          'title_note' +        end +        def create_column +          "#{name}                TEXT NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +            IS 'metadata document notes associated with title';} +        end +        def tuple +          t=if defined? @md.title.note \ +          and @md.title.note=~/\S+/ +            txt=@md.title.note +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def title_language +        def name +          'title_language' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_language]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +            IS 'metadata document language [DC12]';} +        end +        def tuple +          t=if defined? @md.title.language \ +          and @md.title.language=~/\S+/ +            txt=@md.title.language +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def title_language_char            # consider +        def name +          'title_language_char' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_language_char]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +            IS 'metadata document language iso code';} +        end +        def tuple +          t=if defined? @md.title.language_char \ +          and @md.title.language_char=~/\S+/ +            txt=@md.title.language_char +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +=begin +#% creator +@creator: + :author: + :contributor: + :illustrator: + :photographer: + :translator: + :prepared_by: + :digitized_by: + :audio: + :video: +=end +      def creator_author                 # DublinCore 2 - creator/author (author) +        def name +          'creator_author' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_name]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document author (creator) [DC2]';} +        end +        def tuple +          t=if defined? @md.creator.author_detail \ +          and @md.creator.author_detail.class==Array \ +          and @md.creator.author_detail.length > 0 +            txt='' +            @md.creator.author_detail.each do |h| +              txt=txt + %{#{h[:the]}, #{h[:others]}; } +            end +            txt.gsub!(/[;, ]+\s*$/,'') +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def creator_author_honorific       # consider +        def name +          'creator_author_hon' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_creator_misc_short]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document author honorific (title e.g, Ms. Dr. Prof.)';} +        end +        def tuple +          t=if defined? @md.creator.author_hon \ +          and @md.creator.author_hon=~/\S+/ +            txt=@md.creator.author_hon +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def creator_author_nationality     # consider +        def name +          'creator_author_nationality' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_creator_misc_short]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata nationality of document author (creator)';} +        end +        def tuple +          t=if defined? @md.creator.author_nationality_detail \ +          and @md.creator.author_nationality=~/\S+/ +            txt=@md.creator.author_nationality_detail +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def creator_contributor            # DublinCore 6 - contributor +        def name +          'creator_contributor' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_name]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document contributor name(s) [DC6]';} +        end +        def tuple +          t=if defined? @md.creator.contributor_detail \ +          and @md.creator.contributor_detail.class==Array \ +          and @md.creator.contributor_detail.length > 0 +            txt=@md.creator.contributor_detail #dc +            txt='' +            @md.creator.contributor_detail.each do |h| +              txt=txt + %{#{h[:the]}, #{h[:others]}; } +            end +            txt.gsub!(/[;, ]+\s*$/,'') +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def creator_illustrator +        def name +          'creator_illustrator' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_name]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document illustrator name(s)';} +        end +        def tuple +          t=if defined? @md.creator.illustrator_detail \ +          and @md.creator.illustrator_detail.class==Array \ +          and @md.creator.illustrator_detail.length > 0 +            txt=@md.creator.illustrator_detail +            txt='' +            @md.creator.illustrator_detail.each do |h| +              txt=txt + %{#{h[:the]}, #{h[:others]}; } +            end +            txt.gsub!(/[;, ]+\s*$/,'') +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def creator_photographer +        def name +          'creator_photographer' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_name]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document photographer name(s)';} +        end +        def tuple +          t=if defined? @md.creator.photographer_detail \ +          and @md.creator.photographer_detail.class==Array \ +          and @md.creator.photographer_detail.length > 0 +            txt=@md.creator.photographer_detail +            txt='' +            @md.creator.photographer_detail.each do |h| +              txt=txt + %{#{h[:the]}, #{h[:others]}; } +            end +            txt.gsub!(/[;, ]+\s*$/,'') +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def creator_translator +        def name +          'creator_translator' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_name]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document translator name(s)';} +        end +        def tuple +          t=if defined? @md.creator.translator_detail \ +          and @md.creator.translator_detail.class==Array \ +          and @md.creator.translator_detail.length > 0 +            txt='' +            @md.creator.translator_detail.each do |h| +              txt=txt + %{#{h[:the]}, #{h[:others]}; } +            end +            txt.gsub!(/[;, ]+\s*$/,'') +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def creator_prepared_by +        def name +          'creator_prepared_by' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_name]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document prepared by name(s)';} +        end +        def tuple +          t=if defined? @md.creator.prepared_by_detail \ +          and @md.creator.prepared_by_detail.class==Array \ +          and @md.creator.prepared_by_detail.length > 0 +            txt=@md.creator.prepared_by_detail +            txt='' +            @md.creator.prepared_by_detail.each do |h| +              txt=txt + %{#{h[:the]}, #{h[:others]}; } +            end +            txt.gsub!(/[;, ]+\s*$/,'') +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def creator_digitized_by +        def name +          'creator_digitized_by' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_name]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document digitized by name(s)';} +        end +        def tuple +          t=if defined? @md.creator.digitized_by_detail \ +          and @md.creator.digitized_by_detail.class==Array \ +          and @md.creator.digitized_by_detail.length > 0 +            txt=@md.creator.digitized_by_detail +            txt='' +            @md.creator.digitized_by_detail.each do |h| +              txt=txt + %{#{h[:the]}, #{h[:others]}; } +            end +            txt.gsub!(/[;, ]+\s*$/,'') +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def creator_audio +        def name +          'creator_audio' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_name]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document audio by name(s)';} +        end +        def tuple +          t=if defined? @md.creator.audio_detail \ +          and @md.creator.audio_detail.class==Array \ +          and @md.creator.audio_detail.length > 0 +            txt=@md.creator.audio_detail +            txt='' +            @md.creator.audio_detail.each do |h| +              txt=txt + %{#{h[:the]}, #{h[:others]}; } +            end +            txt.gsub!(/[;, ]+\s*$/,'') +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def creator_video +        def name +          'creator_video' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_name]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document video by name(s)';} +        end +        def tuple +          t=if defined? @md.creator.video_detail \ +          and @md.creator.video_detail.class==Array \ +          and @md.creator.video_detail.length > 0 +            txt='' +            @md.creator.video_detail.each do |h| +              txt=txt + %{#{h[:the]}, #{h[:others]}; } +            end +            txt.gsub!(/[;, ]+\s*$/,'') +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +=begin +#% language +#taken from other fields +@title: + :language: +@original: + :language: +#not available --> +#@language: +# :document: +# :original: +=end +      def language_document +        def name +          'language_document' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_language]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document language';} +        end +        def tuple +          t=if defined? @md.language.document \ +          and @md.language.document=~/\S+/ +            txt=@md.language.document +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def language_document_char +        def name +          'language_document_char' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_language_char]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document language';} +        end +        def tuple +          t=if defined? @md.language.document_char \ +          and @md.language.document_char=~/\S+/ +            txt=@md.language.document_char +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def language_original +        def name +          'language_original' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_language]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata original document/text language';} +        end +        def tuple +          t=if defined? @md.language.original \ +          and @md.language.original=~/\S+/ +            txt=@md.language.original +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def language_original_char +        def name +          'language_original_char' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_language_char]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document language';} +        end +        def tuple +          t=if defined? @md.language.original_char \ +          and @md.language.original_char=~/\S+/ +            txt=@md.language.original_char +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +=begin +#% date +@date: + :added_to_site: + :available: + :created: + :issued: + :modified: + :published: + :valid: + :translated: + :original_publication: +=end +      def date_added_to_site +        def name +          'date_added_to_site' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_date_text]}) NULL," +          #"#{name}                DATE," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata date added to site';} +        end +        def tuple +          t=if defined? @md.date.added_to_site \ +          and @md.date.added_to_site=~/\S+/ +            txt=@md.date.added_to_site +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def date_available +        def name +          'date_available' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_date_text]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata date added to site [DC]';} +        end +        def tuple +          t=if defined? @md.date.available \ +          and @md.date.available=~/\S+/ +            txt=@md.date.available +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def date_created +        def name +          'date_created' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_date_text]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata date created [DC]';} +        end +        def tuple +          t=if defined? @md.date.created \ +          and @md.date.created=~/\S+/ +            txt=@md.date.created +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def date_issued +        def name +          'date_issued' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_date_text]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata date issued [DC]';} +        end +        def tuple +          t=if defined? @md.date.issued \ +          and @md.date.issued=~/\S+/ +            txt=@md.date.issued +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def date_modified +        def name +          'date_modified' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_date_text]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata date modified [DC]';} +        end +        def tuple +          t=if defined? @md.date.modified \ +          and @md.date.modified=~/\S+/ +            txt=@md.date.modified +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def date_published +        def name +          'date_published' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_date_text]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata date published [DC7]';} +        end +        def tuple +          t=if defined? @md.date.published \ +          and @md.date.published=~/\S+/ +            txt=@md.date.published +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def date_valid +        def name +          'date_valid' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_date_text]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata date valid [DC]';} +        end +        def tuple +          t=if defined? @md.date.valid \ +          and @md.date.valid=~/\S+/ +            txt=@md.date.valid +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def date_translated +        def name +          'date_translated' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_date_text]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata date translated';} +        end +        def tuple +          t=if defined? @md.date.translated \ +          and @md.date.translated=~/\S+/ +            txt=@md.date.translated +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def date_original_publication +        def name +          'date_original_publication' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_date_text]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata date of original publication';} +        end +        def tuple +          t=if defined? @md.date.original_publication \ +          and @md.date.original_publication=~/\S+/ +            txt=@md.date.original_publication +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def date_generated +        def name +          'date_generated' +        end +        def create_column              #choose other representation of time +          "#{name}                VARCHAR(30) NULL," +          #"#{name}                VARCHAR(10) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata date of sisu generation of document, automatically populated';} +        end +        def tuple                      #choose other representation of time +          t=if defined? @md.generated \ +          and @md.generated.to_s=~/\S+/ +            txt=@md.generated.to_s +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +=begin +#% publisher +@publisher: +=end +      def publisher +        def name +          'publisher' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_name]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document publisher [DC5]';} +        end +        def tuple +          t=if defined? @md.publisher \ +          and @md.publisher=~/\S+/ +            txt=@md.publisher +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +##% current +#    def current_publisher +#      def name +#        'current_publisher' +#      end +#      def size +#        10 +#      end +#      def create_column +#        "#{name}                VARCHAR(#{current_publisher.size}) NULL," +#      end +#      def tuple +#        t=if defined? @md.current.publisher \ +#        and @md.current.publisher=~/\S+/ +#          txt=@md.current.publisher +#          special_character_escape(txt) +#          "'#{txt}', " +#        end +#      end +#      self +#    end +=begin +#% original +@original: + :publisher: + #:date:                                #repeated under date + :language: + :institution: + :nationality: + :source: +=end +      def original_publisher +        def name +          'original_publisher' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_name]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document original publisher [DC5]';} +        end +        def tuple +          t=if defined? @md.original.publisher \ +          and @md.original.publisher=~/\S+/ +            txt=@md.original.publisher +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def original_language +        def name +          'original_language' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_language]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document original language';} +        end +        def tuple +          t=if defined? @md.original.language \ +          and @md.original.language=~/\S+/ +            txt=@md.original.language +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def original_language_char         # consider +        def name +          'original_language_char' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_language_char]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document original language iso character';} +        end +        def tuple +          t=if defined? @md.original.language_char \ +          and @md.original.language_char=~/\S+/ +            txt=@md.original.language_char +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def original_source +        def name +          'original_source' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_name]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document original source [DC11]';} +        end +        def tuple +          t=if defined? @md.original.source \ +          and @md.original.source=~/\S+/ +            txt=@md.original.source +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def original_institution +        def name +          'original_institution' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_name]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document original institution';} +        end +        def tuple +          t=if defined? @md.original.institution \ +          and @md.original.institution=~/\S+/ +            txt=@md.original.institution +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def original_nationality +        def name +          'original_nationality' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_language]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document original nationality';} +        end +        def tuple +          t=if defined? @md.original.nationality \ +          and @md.original.nationality=~/\S+/ +            txt=@md.original.nationality +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +=begin +#% rights +@rights: + #:copyright:                          #mapped to :text: used where no other copyrights and included in :all: + :text: + :translation: + :illustrations: + :photographs: + :preparation: + :digitization: + :audio: + :video: + :license: + :all: +=end +      def rights_all +        def name +          'rights' +        end +        def create_column +          "#{name}                  TEXT NULL,"  +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata rights associated with document (composite) [DC15]';} +        end +        def tuple +          t=if defined? @md.rights.all \ +          and @md.rights.all=~/\S+/ +            txt=@md.rights.all +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def rights_copyright_text +        def name +          'rights_copyright_text' +        end +        def create_column +          "#{name}                  TEXT NULL,"  +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata copyright associated for document text';} +        end +        def tuple +          t=if defined? @md.rights.copyright_text \ +          and @md.rights.copyright_text=~/\S+/ +            txt=@md.rights.copyright_text +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def rights_copyright_translation +        def name +          'rights_copyright_translation' +        end +        def create_column +          "#{name}                  TEXT NULL,"  +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata copyright associated for document text translation (if any)';} +        end +        def tuple +          t=if defined? @md.rights.copyright_translation \ +          and @md.rights.copyright_translation=~/\S+/ +            txt=@md.rights.copyright_translation +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def rights_copyright_illustrations +        def name +          'rights_copyright_illustrations' +        end +        def create_column +          "#{name}                  TEXT NULL,"  +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata copyright associated for document text illustrations (if any)';} +        end +        def tuple +          t=if defined? @md.rights.copyright_illustrations \ +          and @md.rights.copyright_illustrations=~/\S+/ +            txt=@md.rights.copyright_illustrations +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def rights_copyright_photographs +        def name +          'rights_copyright_photographs' +        end +        def create_column +          "#{name}                  TEXT NULL,"  +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata copyright associated for document text photographs (if any)';} +        end +        def tuple +          t=if defined? @md.rights.copyright_photographs \ +          and @md.rights.copyright_photographs=~/\S+/ +            txt=@md.rights.copyright_photographs +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def rights_copyright_preparation +        def name +          'rights_copyright_preparation' +        end +        def create_column +          "#{name}                  TEXT NULL,"  +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata copyright associated for document text preparation (if any)';} +        end +        def tuple +          t=if defined? @md.rights.copyright_preparation \ +          and @md.rights.copyright_preparation=~/\S+/ +            txt=@md.rights.copyright_preparation +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def rights_copyright_digitization +        def name +          'rights_copyright_digitization' +        end +        def create_column +          "#{name}                  TEXT NULL,"  +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata copyright associated for document text digitization (if any)';} +        end +        def tuple +          t=if defined? @md.rights.copyright_digitization \ +          and @md.rights.copyright_digitization=~/\S+/ +            txt=@md.rights.copyright_digitization +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def rights_copyright_audio +        def name +          'rights_copyright_audio' +        end +        def create_column +          "#{name}                  TEXT NULL,"  +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata copyright associated for document text audio (if any)';} +        end +        def tuple +          t=if defined? @md.rights.copyright_audio \ +          and @md.rights.copyright_audio=~/\S+/ +            txt=@md.rights.copyright_audio +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def rights_copyright_video +        def name +          'rights_copyright_video' +        end +        def create_column +          "#{name}                  TEXT NULL,"  +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata copyright associated for document text video (if any)';} +        end +        def tuple +          t=if defined? @md.rights.copyright_video \ +          and @md.rights.copyright_video=~/\S+/ +            txt=@md.rights.copyright_video +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def rights_license +        def name +          'rights_license' +        end +        def create_column +          "#{name}                  TEXT NULL,"  +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata license granted for use of document if any)';} +        end +        def tuple +          t=if defined? @md.rights.license \ +          and @md.rights.license=~/\S+/ +            txt=@md.rights.license +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +=begin +#% classify +@classify: + :topic_register: + :coverage: + :format: + :identifier: + :keywords: + :relation: + :subject: + :type: + :loc: + :dewey: + :pg: + :isbn: +=end +      def classify_topic_register +        def name +          'classify_topic_register' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_classify_txt_long]}) NULL," +          #"#{name}                  TEXT NULL,"  +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata classify document topic register (semi-structured document subject information)';} +        end +        def tuple +          t=if defined? @md.classify.topic_register \ +          and @md.classify.topic_register=~/\S+/ +            txt=@md.classify.topic_register +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def classify_subject +        def name +          'classify_subject' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_classify_txt_short]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata classify document subject matter [DC3]';} +        end +        def tuple +          t=if defined? @md.classify.subject \ +          and @md.classify.subject=~/\S+/ +            txt=@md.classify.subject +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def classify_type #check +        def name +          'classify_type' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_classify_txt_short]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata classify document type [DC8]';} +        end +        def tuple +          t=if defined? @md.classify.type \ +          and @md.classify.type=~/\S+/ +            txt=@md.classify.type +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def classify_loc +        def name +          'classify_loc' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_classify_library]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata classify document library of congress (if available)';} +        end +        def tuple +          t=if defined? @md.classify.loc \ +          and @md.classify.loc=~/\S+/ +            txt=@md.classify.loc +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def classify_dewey +        def name +          'classify_dewey' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_classify_library]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata classify document dewey (if available)';} +        end +        def tuple +          t=if defined? @md.classify.dewey \ +          and @md.classify.dewey=~/\S+/ +            txt=@md.classify.dewey +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def classify_pg +        def name +          'classify_pg' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_classify_small]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata classify document project gutenberg (if any)';} +        end +        def tuple +          t=if defined? @md.classify.pg \ +          and @md.classify.pg=~/\S+/ +            txt=@md.classify.pg +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def classify_isbn +        def name +          'classify_isbn' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_classify_small]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata classify document isbn (if any)';} +        end +        def tuple +          t=if defined? @md.classify.isbn \ +          and @md.classify.isbn=~/\S+/ +            txt=@md.classify.isbn +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def classify_format +        def name +          'classify_format' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_classify_txt_short]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata classify document format [DC9]';} +        end +        def tuple +          t=if defined? @md.classify.format \ +          and @md.classify.format=~/\S+/ +            txt=@md.classify.format +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def classify_identifier +        def name +          'classify_identifier' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_classify_identify]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata classify document identifier [DC10]';} +        end +        def tuple +          t=if defined? @md.classify.identifier \ +          and @md.classify.identifier=~/\S+/ +            txt=@md.classify.identifier +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def classify_relation +        def name +          'classify_relation' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_classify_short]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata classify document relation [DC13]';} +        end +        def tuple +          t=if defined? @md.classify.relation \ +          and @md.classify.relation=~/\S+/ +            txt=@md.classify.relation +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def classify_coverage +        def name +          'classify_coverage' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_classify_short]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata classify document coverage [DC14]';} +        end +        def tuple +          t=if defined? @md.classify.coverage \ +          and @md.classify.coverage=~/\S+/ +            txt=@md.classify.coverage +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def classify_keywords +        def name +          'classify_keywords' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_classify_txt_short]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata classify document keywords';} +        end +        def tuple +          t=if defined? @md.classify.keywords \ +          and @md.classify.keywords=~/\S+/ +            txt=@md.classify.keywords +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +=begin +#% notes +@notes: + :abstract: + :comment: + :description: + :history: + :prefix: + :prefix_a: + :prefix_b: + :suffix: +=end +      def notes_abstract +        def name +          'notes_abstract' +        end +        def create_column +          "#{name}                     TEXT NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document notes abstract';} +        end +        def tuple +          t=if defined? @md.notes.abstract \ +          and @md.notes.abstract=~/\S+/ +            txt=@md.notes.abstract +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def notes_comment +        def name +          'notes_comment' +        end +        def create_column +          "#{name}                       TEXT NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document notes comment';} +        end +        def tuple +          t=if defined? @md.notes.comment \ +          and @md.notes.comment=~/\S+/ +            txt=@md.notes.comment +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def notes_description +        def name +          'notes_description' +        end +        def create_column +          "#{name}                    TEXT NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document notes description [DC4]';} +        end +        def tuple +          t=if defined? @md.notes.description \ +          and @md.notes.description=~/\S+/ +            txt=@md.notes.description +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def notes_history   #check, consider removal +        def name +          'notes_history' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_classify_txt_short]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document notes history';} +        end +        def tuple +          t=if defined? @md.notes.history \ +          and @md.notes.history=~/\S+/ +            txt=@md.notes.history +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def notes_prefix +        def name +          'notes_prefix' +        end +        def create_column +          "#{name}                TEXT NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document notes prefix';} +        end +        def tuple +          t=if defined? @md.notes.prefix \ +          and @md.notes.prefix=~/\S+/ +            txt=@md.notes.prefix +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def notes_prefix_a +        def name +          'notes_prefix_a' +        end +        def create_column +          "#{name}                TEXT NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document notes prefix_a';} +        end +        def tuple +          t=if defined? @md.notes.prefix_a \ +          and @md.notes.prefix_a=~/\S+/ +            txt=@md.notes.prefix_a +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def notes_prefix_b +        def name +          'notes_prefix_b' +        end +        def create_column +          "#{name}                TEXT NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document notes prefix_b';} +        end +        def tuple +          t=if defined? @md.notes.prefix_b \ +          and @md.notes.prefix_b=~/\S+/ +            txt=@md.notes.prefix_b +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def notes_suffix +        def name +          'notes_suffix' +        end +        def create_column                # keep text +          "#{name}                TEXT NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document notes suffix';} +        end +        def tuple +          t=if defined? @md.notes.suffix \ +          and @md.notes.suffix=~/\S+/ +            txt=@md.notes.suffix +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +=begin +#% misc +@make: + :skin: +@links: +=end +      def filename +        def name +          'filename' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_filename]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document filename';} +        end +        def tuple +          t=if defined? @md.fns \ +          and @md.fns=~/\S+/ +            txt=@md.fns +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def sisutxt                      # consider naming sisusrc +        def name +          'sisutxt' +        end +        def create_column +          "#{name}                TEXT NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'sisu markup text (if shared)';} +        end +        def tuple +          t=if @md.mod.inspect=~/import|update/ \ +          and FileTest.exist?(@md.fns) +            ["#{name}, ","'#{@sisutxt}', "] +          else ['',''] +          end +        end +        self +      end +      def fulltext +        def name +          'fulltext' +        end +        def create_column +          "#{name}                TEXT NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +            IS 'document full text clean, searchable';} +        end +        def tuple +          t=if @md.mod.inspect=~/import|update/ \ +          and  FileTest.exist?(@md.fns) +            ["#{name}, ","'#{@fulltext}', "] +          else ['',''] +          end +        end +        self +      end +      def word_count +        def name +          'word_count' +        end +        def create_column +          "#{name}                TEXT NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +            IS 'document word count';} +        end +        def tuple +          t=if defined? @md.wc_words \ +          and @md.wc_words=~/\S+/ +            txt=@md.wc_words +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def digest +        def name +          'dgst' +        end +        def create_column +          "#{name}                TEXT NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +            IS 'document hash digest sha256 (or md5)';} +        end +        def tuple +          t=if defined? @md.dgst \ +          and @md.dgst=~/\S+/ +            txt=@md.dgst +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def skin_name                      #check +        def name +          'skin_name' +        end +        def create_column +          "#{name}                 VARCHAR(#{Db[:col_filename]}) NULL,"  +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document skin name';} +        end +        def tuple +          t=if defined? @md.notes.skin_name \ +          and @md.notes.skin_name=~/\S+/ +            txt=@md.notes.skin_name +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def skin # you likely want a separate table for skins +        def name +          'skin' +        end +        def create_column +          "#{name}                TEXT NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document skin';} +        end +        def tuple +          t=if defined? @md.skin \ +          and @md.skin=~/\S+/ +            txt=@md.skin +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def links +        def name +          'links' +        end +        def create_column +          "#{name}                TEXT NULL," +          #"#{name}                 VARCHAR(#{links.size}) NULL,"  +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'metadata document links';} +        end +        def tuple +          t=if defined? @md.notes.links \ +          and @md.notes.links=~/\S+/ +            txt=@md.notes.links +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      self      end +  end +  class Column_size      def document_clean # restriction not necessary        60000      end diff --git a/lib/sisu/v2/db_create.rb b/lib/sisu/v2/db_create.rb index feba670c..8ed638dc 100644 --- a/lib/sisu/v2/db_create.rb +++ b/lib/sisu/v2/db_create.rb @@ -59,7 +59,7 @@  =end  module SiSU_DB_create    require "#{SiSU_lib}/db_columns"                         # db_columns.rb -  class Create < SiSU_DB_columns::Column_size +  class Create < SiSU_DB_columns::Columns      require "#{SiSU_lib}/sysenv"                           # sysenv.rb      @@dl=nil      def initialize(opt,conn,file,sql_type='pg') @@ -82,7 +82,7 @@ module SiSU_DB_create        @env=SiSU_Env::Info_env.new(@opt.fns)        tell=SiSU_Screen::Ansi.new(@opt.cmd,'invert','Create PG db:',%{"SiSU_#{@env.path.stub_pwd}"})        tell.colorize unless @opt.cmd =~/q/ -      SiSU_Env::System_call.new.create_pg_db(@env.path.stub_pwd) #watch use of path.stub_pwd instead of stub +      SiSU_Env::System_call.new.create_pg_db(@env.path.stub_pwd) if @sql_type=='pg' #watch use of path.stub_pwd instead of stub      end      def output_dir?        dir=SiSU_Env::Info_env.new('') @@ -91,102 +91,129 @@ module SiSU_DB_create        end      end      def create_table -      def metadata +      def metadata_and_text          print %{            currently using sisu dbi module -          to be populated from documents files -          create tables metadata +          to be populated from document files +          create tables metadata_and_text            data import through ruby transfer          } unless @opt.cmd =~/q/          @conn.execute(%{ -          CREATE TABLE metadata ( +          CREATE TABLE metadata_and_text (              tid                  BIGINT PRIMARY KEY, -            title                VARCHAR(#{lt_title}) NULL, -            subtitle             VARCHAR(#{lt_subtitle}) NULL, -            author               VARCHAR(#{lt_author}) NULL, -/* plan to replace creator field, currently used, with author field */ -            creator              VARCHAR(#{lt_author}) NULL, -            author_title         VARCHAR(#{lt_author_title}) NULL, -            author_nationality   VARCHAR(#{lt_author_nationality}) NULL, -            illustrator          VARCHAR(#{lt_illustrator}) NULL, -            translator           VARCHAR(#{lt_translator}) NULL, -            subject              VARCHAR(#{lt_subject}) NULL, -            date                 VARCHAR(#{lt_date}) NULL, -            date_added_to_site   VARCHAR(#{lt_date}) NULL, -            date_created         VARCHAR(#{lt_date}) NULL, -            date_issued          VARCHAR(#{lt_date}) NULL, -            date_available       VARCHAR(#{lt_date}) NULL, -            date_valid           VARCHAR(#{lt_date}) NULL, -            date_modified        VARCHAR(#{lt_date}) NULL, -            date_translated      VARCHAR(#{lt_date}) NULL, -/*          date                 DATE, */ -/*          date_added_to_site   DATE, */ -/*          date_created         DATE, */ -/*          date_issued          DATE, */ -/*          date_available       DATE, */ -/*          date_valid           DATE, */ -/*          date_modified        DATE, */ -/*          date_translated      DATE, */ -            type                 VARCHAR(#{lt_type}) NULL, -            description          VARCHAR(#{lt_description}) NULL, -            publisher            VARCHAR(#{lt_publisher}) NULL, -            contributor          VARCHAR(#{lt_contributor}) NULL, -            prepared_by          VARCHAR(#{lt_prepared_by}) NULL, -            digitized_by         VARCHAR(#{lt_digitized_by}) NULL, -            format               VARCHAR(#{lt_format}) NULL, -            identifier           VARCHAR(#{lt_identifier}) NULL, -            source               VARCHAR(#{lt_source}) NULL, -            language             VARCHAR(#{lt_language}) NULL, -            language_original    VARCHAR(#{lt_language_original}) NULL, -            relation             VARCHAR(#{lt_relation}) NULL, -            coverage             VARCHAR(#{lt_coverage}) NULL, -            rights               VARCHAR(#{lt_rights}) NULL, -            copyright            VARCHAR(#{lt_copyright}) NULL, -            owner                VARCHAR(#{lt_owner}) NULL, -            keywords             VARCHAR(#{lt_keywords}) NULL, -            comment              VARCHAR(#{lt_comment}) NULL, -            loc                  VARCHAR(#{lt_loc}) NULL, -            dewey                VARCHAR(#{lt_dewey}) NULL, -            isbn                 VARCHAR(#{lt_isbn}) NULL, -            pg                   VARCHAR(#{lt_pg}) NULL, -            abstract             VARCHAR(#{lt_abstract}) NULL, -            prefix_a             TEXT NULL, -            prefix_b             TEXT NULL, -            skin                 VARCHAR(#{lt_skin}) NULL, -            markup               VARCHAR(#{lt_markup}) NULL, -            links                VARCHAR(#{lt_links}) NULL, -            information          VARCHAR(#{lt_information}) NULL, -            contact              VARCHAR(#{lt_contact}) NULL, -            suffix               VARCHAR(#{lt_suffix}) NULL, -            filename             VARCHAR(#{lt_filename}) NULL UNIQUE, -            types                CHAR(#{lt_types}) NULL, -            subj                 VARCHAR(#{lt_subj}) NULL, -            original_publication VARCHAR(#{lt_orig_pub}) NULL, -            original_publication_date VARCHAR(#{lt_orig_pub_date}) NULL, -            original_publication_institution VARCHAR(#{lt_orig_pub_institution}) NULL, -            original_publication_nationality VARCHAR(#{lt_orig_pub_nationality}) NULL, -            writing_focus_nationality VARCHAR(#{lt_writing_focus_nationality}) NULL, -            topic_register       VARCHAR(#{lt_topic_register}) NULL +            /* title */ +            #{column.title.create_column} +            #{column.title_main.create_column} +            #{column.title_sub.create_column} +            #{column.title_short.create_column} +            #{column.title_edition.create_column} +            #{column.title_note.create_column} +            #{column.title_language.create_column} +            #{column.title_language_char.create_column} +            /* creator */ +            #{column.creator_author.create_column} +            #{column.creator_author_honorific.create_column} +            #{column.creator_author_nationality.create_column} +            #{column.creator_contributor.create_column} +            #{column.creator_illustrator.create_column} +            #{column.creator_photographer.create_column} +            #{column.creator_translator.create_column} +            #{column.creator_prepared_by.create_column} +            #{column.creator_digitized_by.create_column} +            #{column.creator_audio.create_column} +            #{column.creator_video.create_column} +            /* language */ +            #{column.language_document.create_column} +            #{column.language_document_char.create_column} +            #{column.language_original.create_column} +            #{column.language_original_char.create_column} +            /* date */ +            #{column.date_added_to_site.create_column} +            #{column.date_available.create_column} +            #{column.date_created.create_column} +            #{column.date_issued.create_column} +            #{column.date_modified.create_column} +            #{column.date_published.create_column} +            #{column.date_valid.create_column} +            #{column.date_translated.create_column} +            #{column.date_original_publication.create_column} +            #{column.date_generated.create_column} +            /* publisher */ +            #{column.publisher.create_column} +            /* original */ +            #{column.original_publisher.create_column} +            #{column.original_language.create_column} +            #{column.original_language_char.create_column} +            #{column.original_source.create_column} +            #{column.original_institution.create_column} +            #{column.original_nationality.create_column} +            /* rights */ +            #{column.rights_all.create_column} +            #{column.rights_copyright_text.create_column} +            #{column.rights_copyright_translation.create_column} +            #{column.rights_copyright_illustrations.create_column} +            #{column.rights_copyright_photographs.create_column} +            #{column.rights_copyright_preparation.create_column} +            #{column.rights_copyright_digitization.create_column} +            #{column.rights_copyright_audio.create_column} +            #{column.rights_copyright_video.create_column} +            #{column.rights_license.create_column} +            /* classify */ +            #{column.classify_topic_register.create_column} +            #{column.classify_subject.create_column} +            #{column.classify_type.create_column} +            #{column.classify_loc.create_column} +            #{column.classify_dewey.create_column} +            #{column.classify_pg.create_column} +            #{column.classify_isbn.create_column} +            #{column.classify_format.create_column} +            #{column.classify_identifier.create_column} +            #{column.classify_relation.create_column} +            #{column.classify_coverage.create_column} +            #{column.classify_keywords.create_column} +            /* notes */ +            #{column.notes_abstract.create_column} +            #{column.notes_comment.create_column} +            #{column.notes_description.create_column} +            #{column.notes_history.create_column} +            #{column.notes_prefix.create_column} +            #{column.notes_prefix_a.create_column} +            #{column.notes_prefix_b.create_column} +            #{column.notes_suffix.create_column} +            /* misc */ +            #{column.filename.create_column} +            #{column.sisutxt.create_column} +            #{column.fulltext.create_column} +            #{column.word_count.create_column} +            #{column.digest.create_column} +            #{column.skin_name.create_column} +            #{column.skin.create_column} +            #{column.links.create_column.gsub(/,$/,'')} +/*          subj                 VARCHAR(64) NULL, */ +/*          contact              VARCHAR(100) NULL, */ +/*          information          VARCHAR(100) NULL, */ +/*          types                CHAR(1) NULL, */ +/*          writing_focus_nationality VARCHAR(100) NULL, */            );         }) -       @comment.psql.metadata if @comment +       @comment.psql.metadata_and_text if @comment        end -      def documents                                                 # create documents base +      def doc_objects                                                 # create doc_objects base          print %{            to be populated from documents files -          create tables documents document_trade document_env +          create tables doc_objects            data import through ruby transfer          } unless @opt.cmd =~/q/          @conn.execute(%{ -          CREATE TABLE documents ( +          CREATE TABLE doc_objects (              lid             BIGINT PRIMARY KEY, -            metadata_tid    BIGINT REFERENCES metadata, +            metadata_tid    BIGINT REFERENCES metadata_and_text,              ocn             SMALLINT,              ocnd            VARCHAR(6),              ocns            VARCHAR(6),              clean           TEXT NULL,              body            TEXT NULL, -            seg             VARCHAR(#{document_seg}) NULL, +            seg             VARCHAR(120) NULL,              lev_an          VARCHAR(1),              lev             SMALLINT NULL,              lev1            SMALLINT, @@ -210,7 +237,7 @@ module SiSU_DB_create              types           CHAR(1) NULL            );         }) -       @comment.psql.documents if @comment +       @comment.psql.doc_objects if @comment        end        def endnotes          print %{ @@ -221,7 +248,7 @@ module SiSU_DB_create          @conn.execute(%{            CREATE TABLE endnotes (              nid             BIGINT PRIMARY KEY, -            document_lid    BIGINT REFERENCES documents, +            document_lid    BIGINT REFERENCES doc_objects,              nr              SMALLINT,              clean           TEXT NULL,              body            TEXT NULL, @@ -229,7 +256,7 @@ module SiSU_DB_create              ocnd            VARCHAR(6),              ocns            VARCHAR(6),              digest_clean    CHAR(#{@@dl}), -            metadata_tid    BIGINT REFERENCES metadata +            metadata_tid    BIGINT REFERENCES metadata_and_text            );         })         @comment.psql.endnotes if @comment @@ -243,7 +270,7 @@ module SiSU_DB_create          @conn.execute(%{            CREATE TABLE endnotes_asterisk (              nid             BIGINT PRIMARY KEY, -            document_lid    BIGINT REFERENCES documents, +            document_lid    BIGINT REFERENCES doc_objects,              nr              SMALLINT,              clean           TEXT NULL,              body            TEXT NULL, @@ -251,7 +278,7 @@ module SiSU_DB_create              ocnd            VARCHAR(6),              ocns            VARCHAR(6),              digest_clean    CHAR(#{@@dl}), -            metadata_tid    BIGINT REFERENCES metadata +            metadata_tid    BIGINT REFERENCES metadata_and_text            );         })         @comment.psql.endnotes_asterisk if @comment @@ -265,7 +292,7 @@ module SiSU_DB_create          @conn.execute(%{            CREATE TABLE endnotes_plus (              nid             BIGINT PRIMARY KEY, -            document_lid    BIGINT REFERENCES documents, +            document_lid    BIGINT REFERENCES doc_objects,              nr              SMALLINT,              clean           TEXT NULL,              body            TEXT NULL, @@ -273,21 +300,21 @@ module SiSU_DB_create              ocnd            VARCHAR(6),              ocns            VARCHAR(6),              digest_clean    CHAR(#{@@dl}), -            metadata_tid    BIGINT REFERENCES metadata +            metadata_tid    BIGINT REFERENCES metadata_and_text            );         })         @comment.psql.endnotes_plus if @comment        end -      def urls                                                       # create documents file links mapping +      def urls                                                       # create doc_objects file links mapping          print %{            currently using sisu dbi module -          to be populated from documents files +          to be populated from doc_objects files            create tables urls            data import through ruby transfer          } unless @opt.cmd =~/q/          @conn.execute(%{            CREATE TABLE urls ( -            metadata_tid    BIGINT REFERENCES metadata, +            metadata_tid    BIGINT REFERENCES metadata_and_text,              plaintext       varchar(512),              html_toc        varchar(512),              html_doc        varchar(512), @@ -311,7 +338,7 @@ module SiSU_DB_create        self      end    end -  class Comment +  class Comment < SiSU_DB_columns::Columns      def initialize(conn,sql_type='pg')        @conn=conn        if sql_type =~ /pg/; psql @@ -325,162 +352,146 @@ module SiSU_DB_create            end          end        end -      def metadata +      def metadata_and_text          sql_arr=[ -          %{COMMENT ON Table metadata -            IS 'contains SiSU documents metadata with metadata';}, -          %{COMMENT ON COLUMN metadata.tid +          %{COMMENT ON Table metadata_and_text +            IS 'contains SiSU metadata and fulltext for search (including source .sst if shared)';}, +          %{COMMENT ON COLUMN metadata_and_text.tid              IS 'unique';}, -          %{COMMENT ON COLUMN metadata.filename -            IS 'document filename';}, -          %{COMMENT ON COLUMN metadata.title -            IS 'metadata title (dublin core element 1)';}, -          %{COMMENT ON COLUMN metadata.subtitle -            IS 'document subtitle';}, -          %{COMMENT ON COLUMN metadata.creator -            IS 'metadata creator (dublin core element 2)';}, -          %{COMMENT ON COLUMN metadata.author -            IS 'metadata author (dublin core element 2)';}, -          %{COMMENT ON COLUMN metadata.illustrator -            IS 'metadata illustrator';}, -          %{COMMENT ON COLUMN metadata.translator -            IS 'metadata translator';}, -          %{COMMENT ON COLUMN metadata.subject -            IS 'metadata subject (dublin core element 3)';}, -          %{COMMENT ON COLUMN metadata.date -            IS 'metadata date (dublin core element 7)';}, -          %{COMMENT ON COLUMN metadata.date_created -            IS 'metadata date created (dublin core)';}, -          %{COMMENT ON COLUMN metadata.date_issued -            IS 'metadata date of issue (dublin core)';}, -          %{COMMENT ON COLUMN metadata.date_available -            IS 'metadata date available (dublin core)';}, -          %{COMMENT ON COLUMN metadata.date_valid -            IS 'metadata date valid (dublin core)';}, -          %{COMMENT ON COLUMN metadata.date_modified -            IS 'metadata date modified (dublin core)';}, -          %{COMMENT ON COLUMN metadata.type -            IS 'metadata type (dublin core element 8)';}, -          %{COMMENT ON COLUMN metadata.description -            IS 'metadata description (dublin core element 4)';}, -          %{COMMENT ON COLUMN metadata.publisher -            IS 'metadata publisher (dublin core element 5)';}, -          %{COMMENT ON COLUMN metadata.contributor -            IS 'metadata contributor (dublin core element 6)';}, -          %{COMMENT ON COLUMN metadata.prepared_by -            IS 'metadata markup prepared by';}, -          %{COMMENT ON COLUMN metadata.digitized_by -            IS 'metadata digitized by';}, -          %{COMMENT ON COLUMN metadata.format -            IS 'metadata format (dublin core element 9)';}, -          %{COMMENT ON COLUMN metadata.identifier -            IS 'metadata identifier (dublin core element 10)';}, -          %{COMMENT ON COLUMN metadata.source -            IS 'metadata source (dublin core element 11)';}, -          %{COMMENT ON COLUMN metadata.language -            IS 'metadata language (dublin core element 12)';}, -          %{COMMENT ON COLUMN metadata.language_original -            IS 'metadata original language';}, -          %{COMMENT ON COLUMN metadata.relation -            IS 'metadata  (dublin core element 13)';}, -          %{COMMENT ON COLUMN metadata.coverage -            IS 'metadata coverage (dublin core element 14)';}, -          %{COMMENT ON COLUMN metadata.rights -            IS 'metadata rights / copyright / license (dublin core element 15)';}, -          %{COMMENT ON COLUMN metadata.owner -            IS 'metadata owner';}, -          %{COMMENT ON COLUMN metadata.keywords -            IS 'metadata keywords';}, -          %{COMMENT ON COLUMN metadata.comment -            IS 'metadata comment';}, -          %{COMMENT ON COLUMN metadata.abstract -            IS 'metadata abstract';}, -          %{COMMENT ON COLUMN metadata.loc -            IS 'metadata library of congress';}, -          %{COMMENT ON COLUMN metadata.dewey -            IS 'metadata dewey';}, -          %{COMMENT ON COLUMN metadata.isbn -            IS 'metadata isbn';}, -          %{COMMENT ON COLUMN metadata.pg -            IS 'metadata project gutenberg number';}, -          %{COMMENT ON COLUMN metadata.prefix_a -            IS 'metadata prefix';}, -          %{COMMENT ON COLUMN metadata.prefix_b -            IS 'metadata prefix';}, -          %{COMMENT ON COLUMN metadata.skin -            IS 'metadata sisu skin';}, -          %{COMMENT ON COLUMN metadata.markup -            IS 'metadata markup source';}, -          %{COMMENT ON COLUMN metadata.links -            IS 'metadata links';}, -          %{COMMENT ON COLUMN metadata.information -            IS 'metadata information';}, -          %{COMMENT ON COLUMN metadata.contact -            IS 'metadata contact';}, -          %{COMMENT ON COLUMN metadata.suffix -            IS 'metadata sisu suffix (output related)';}, -          %{COMMENT ON COLUMN metadata.filename -            IS 'metadata source filename';}, -          %{COMMENT ON COLUMN metadata.types -            IS 'document types scroll 1, seg 2, both 3';}, -          %{COMMENT ON COLUMN metadata.subj -            IS 'subject areas - no way to populate at present as not mapped';}, +          %{#{column.title.column_comment}}, +          %{#{column.title_main.column_comment}}, +          %{#{column.title_sub.column_comment}}, +          %{#{column.title_short.column_comment}}, +          %{#{column.title_edition.column_comment}}, +          %{#{column.title_note.column_comment}}, +          %{#{column.title_language.column_comment}}, +          %{#{column.title_language_char.column_comment}}, +          %{#{column.creator_author.column_comment}}, +          %{#{column.creator_author_honorific.column_comment}}, +          %{#{column.creator_author_nationality.column_comment}}, +          %{#{column.creator_contributor.column_comment}}, +          %{#{column.creator_illustrator.column_comment}}, +          %{#{column.creator_photographer.column_comment}}, +          %{#{column.creator_translator.column_comment}}, +          %{#{column.creator_prepared_by.column_comment}}, +          %{#{column.creator_digitized_by.column_comment}}, +          %{#{column.creator_audio.column_comment}}, +          %{#{column.creator_video.column_comment}}, +          %{#{column.language_document.column_comment}}, +          %{#{column.language_document_char.column_comment}}, +          %{#{column.language_original.column_comment}}, +          %{#{column.language_original_char.column_comment}}, +          %{#{column.date_added_to_site.column_comment}}, +          %{#{column.date_available.column_comment}}, +          %{#{column.date_created.column_comment}}, +          %{#{column.date_issued.column_comment}}, +          %{#{column.date_modified.column_comment}}, +          %{#{column.date_published.column_comment}}, +          %{#{column.date_valid.column_comment}}, +          %{#{column.date_translated.column_comment}}, +          %{#{column.date_original_publication.column_comment}}, +          %{#{column.date_generated.column_comment}}, +          %{#{column.publisher.column_comment}}, +          %{#{column.original_publisher.column_comment}}, +          %{#{column.original_language.column_comment}}, +          %{#{column.original_language_char.column_comment}}, +          %{#{column.original_source.column_comment}}, +          %{#{column.original_institution.column_comment}}, +          %{#{column.original_nationality.column_comment}}, +          %{#{column.rights_all.column_comment}}, +          %{#{column.rights_copyright_text.column_comment}}, +          %{#{column.rights_copyright_translation.column_comment}}, +          %{#{column.rights_copyright_illustrations.column_comment}}, +          %{#{column.rights_copyright_photographs.column_comment}}, +          %{#{column.rights_copyright_preparation.column_comment}}, +          %{#{column.rights_copyright_digitization.column_comment}}, +          %{#{column.rights_copyright_audio.column_comment}}, +          %{#{column.rights_copyright_video.column_comment}}, +          %{#{column.rights_license.column_comment}}, +          %{#{column.classify_topic_register.column_comment}}, +          %{#{column.classify_subject.column_comment}}, +          %{#{column.classify_type.column_comment}}, +          %{#{column.classify_loc.column_comment}}, +          %{#{column.classify_dewey.column_comment}}, +          %{#{column.classify_pg.column_comment}}, +          %{#{column.classify_isbn.column_comment}}, +          %{#{column.classify_format.column_comment}}, +          %{#{column.classify_identifier.column_comment}}, +          %{#{column.classify_relation.column_comment}}, +          %{#{column.classify_coverage.column_comment}}, +          %{#{column.classify_keywords.column_comment}}, +          %{#{column.notes_abstract.column_comment}}, +          %{#{column.notes_comment.column_comment}}, +          %{#{column.notes_description.column_comment}}, +          %{#{column.notes_history.column_comment}}, +          %{#{column.notes_prefix.column_comment}}, +          %{#{column.notes_prefix_a.column_comment}}, +          %{#{column.notes_prefix_b.column_comment}}, +          %{#{column.notes_suffix.column_comment}}, +          %{#{column.filename.column_comment}}, +          %{#{column.sisutxt.column_comment}}, +          %{#{column.fulltext.column_comment}}, +          %{#{column.word_count.column_comment}}, +          %{#{column.digest.column_comment}}, +          %{#{column.skin_name.column_comment}}, +          %{#{column.skin.column_comment}}, +          %{#{column.links.column_comment}},          ]          conn_execute_array(sql_arr)        end -      def documents +      def doc_objects          sql_arr=[ -          %{COMMENT ON Table documents -            IS 'contains searchable text of SiSU documents';}, -          %{COMMENT ON COLUMN documents.lid +          %{COMMENT ON Table doc_objects +            IS 'contains searchable text of SiSU document objects';}, +          %{COMMENT ON COLUMN doc_objects.lid              IS 'unique';}, -          %{COMMENT ON COLUMN documents.metadata_tid -            IS 'tie to title in metadata';}, -          %{COMMENT ON COLUMN documents.lev_an +          %{COMMENT ON COLUMN doc_objects.metadata_tid +            IS 'tie to title in metadata_and_text';}, +          %{COMMENT ON COLUMN doc_objects.lev_an              IS 'doc level A-C 1-6';}, -          %{COMMENT ON COLUMN documents.lev +          %{COMMENT ON COLUMN doc_objects.lev              IS 'doc level 1-6 \d\~';}, -          %{COMMENT ON COLUMN documents.seg +          %{COMMENT ON COLUMN doc_objects.seg              IS 'segment name from level number 4 (lv 1)';}, -          %{COMMENT ON COLUMN documents.ocn +          %{COMMENT ON COLUMN doc_objects.ocn              IS 'object citation number';}, -          %{COMMENT ON COLUMN documents.en_a +          %{COMMENT ON COLUMN doc_objects.en_a              IS 'first endnote number in text object (eg. NULL or 34) (used with en_z to create range)';}, -          %{COMMENT ON COLUMN documents.en_z +          %{COMMENT ON COLUMN doc_objects.en_z              IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a to create range)';}, -          %{COMMENT ON COLUMN documents.en_a_asterisk +          %{COMMENT ON COLUMN doc_objects.en_a_asterisk              IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_asterisk to create range)';}, -          %{COMMENT ON COLUMN documents.en_z_asterisk +          %{COMMENT ON COLUMN doc_objects.en_z_asterisk              IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_asterisk to create range)';}, -          %{COMMENT ON COLUMN documents.en_a_plus +          %{COMMENT ON COLUMN doc_objects.en_a_plus              IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_plus to create range)';}, -          %{COMMENT ON COLUMN documents.en_z_plus +          %{COMMENT ON COLUMN doc_objects.en_z_plus              IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_plus to create range)';}, -          %{COMMENT ON COLUMN documents.types +          %{COMMENT ON COLUMN doc_objects.types              IS 'document types seg scroll';}, -          %{COMMENT ON COLUMN documents.clean +          %{COMMENT ON COLUMN doc_objects.clean              IS 'text object - substantive text: clean, stripped of markup';}, -          %{COMMENT ON COLUMN documents.body +          %{COMMENT ON COLUMN doc_objects.body              IS 'text object - substantive text: light html markup';}, -          %{COMMENT ON COLUMN documents.lev1 +          %{COMMENT ON COLUMN doc_objects.lev1              IS 'document structure, level number 1';}, -          %{COMMENT ON COLUMN documents.lev2 +          %{COMMENT ON COLUMN doc_objects.lev2              IS 'document structure, level number 2';}, -          %{COMMENT ON COLUMN documents.lev3 +          %{COMMENT ON COLUMN doc_objects.lev3              IS 'document structure, level number 3';}, -          %{COMMENT ON COLUMN documents.lev4 +          %{COMMENT ON COLUMN doc_objects.lev4              IS 'document structure, level number 4';}, -          %{COMMENT ON COLUMN documents.lev5 +          %{COMMENT ON COLUMN doc_objects.lev5              IS 'document structure, level number 5';}, -          %{COMMENT ON COLUMN documents.lev6 +          %{COMMENT ON COLUMN doc_objects.lev6              IS 'document structure, level number 6';}, -          %{COMMENT ON COLUMN documents.t_of +          %{COMMENT ON COLUMN doc_objects.t_of              IS 'document structure, type of object (object is of)';}, -          %{COMMENT ON COLUMN documents.t_is +          %{COMMENT ON COLUMN doc_objects.t_is              IS 'document structure, object is';}, -          %{COMMENT ON COLUMN documents.node +          %{COMMENT ON COLUMN doc_objects.node              IS 'document structure, object node if heading';}, -          %{COMMENT ON COLUMN documents.parent +          %{COMMENT ON COLUMN doc_objects.parent              IS 'document structure, object parent (is a heading)';}          ]          conn_execute_array(sql_arr) @@ -501,15 +512,15 @@ module SiSU_DB_create              IS 'endnote substantive content';},            %{COMMENT ON COLUMN endnotes.ocn              IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, -          %{COMMENT ON COLUMN documents.metadata_tid -            IS 'tie to title in metadata - unique for each document';} +          %{COMMENT ON COLUMN doc_objects.metadata_tid +            IS 'tie to title in metadata_and_text - unique for each document';}          ]          conn_execute_array(sql_arr)        end        def endnotes_asterisk          sql_arr=[            %{COMMENT ON Table endnotes_asterisk -            IS 'contains searchable text of SiSU documents endnotes asterisk';}, +            IS 'contains searchable text of SiSU documents endnotes marked with asterisk';},            %{COMMENT ON COLUMN endnotes_asterisk.nid              IS 'unique';},            %{COMMENT ON COLUMN endnotes_asterisk.document_lid @@ -522,15 +533,15 @@ module SiSU_DB_create              IS 'endnote substantive content';},            %{COMMENT ON COLUMN endnotes_asterisk.ocn              IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, -          %{COMMENT ON COLUMN documents.metadata_tid -            IS 'tie to title in metadata - unique for each document';} +          %{COMMENT ON COLUMN doc_objects.metadata_tid +            IS 'tie to title in metadata_and_text - unique for each document';}          ]          conn_execute_array(sql_arr)        end        def endnotes_plus          sql_arr=[            %{COMMENT ON Table endnotes_plus -            IS 'contains searchable text of SiSU documents endnotes';}, +            IS 'contains searchable text of SiSU documents endnotes marked with plus';},            %{COMMENT ON COLUMN endnotes_plus.nid              IS 'unique';},            %{COMMENT ON COLUMN endnotes_plus.document_lid @@ -543,8 +554,8 @@ module SiSU_DB_create              IS 'endnote substantive content';},            %{COMMENT ON COLUMN endnotes_plus.ocn              IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, -          %{COMMENT ON COLUMN documents.metadata_tid -            IS 'tie to title in metadata - unique for each document';}, +          %{COMMENT ON COLUMN doc_objects.metadata_tid +            IS 'tie to title in metadata_and_text - unique for each document';},          ]          conn_execute_array(sql_arr)        end @@ -552,8 +563,8 @@ module SiSU_DB_create          sql_arr=[            %{COMMENT ON Table urls              IS 'contains base url links to different SiSU output';}, -          %{COMMENT ON COLUMN documents.metadata_tid -            IS 'tie to title in metadata - unique for each document, the mapping of rows is one to one';}, +          %{COMMENT ON COLUMN doc_objects.metadata_tid +            IS 'tie to title in metadata_and_text - unique for each document, the mapping of rows is one to one';},            %{COMMENT ON COLUMN urls.plaintext              IS 'plaintext utf-8';},            %{COMMENT ON COLUMN urls.html_toc diff --git a/lib/sisu/v2/db_drop.rb b/lib/sisu/v2/db_drop.rb index 673c5f8f..7189da56 100644 --- a/lib/sisu/v2/db_drop.rb +++ b/lib/sisu/v2/db_drop.rb @@ -70,8 +70,8 @@ module SiSU_DB_drop          cascade='CASCADE'        end        @drop_table=[ -        "DROP TABLE metadata #{cascade};", -        "DROP TABLE documents #{cascade};", +        "DROP TABLE metadata_and_text #{cascade};", +        "DROP TABLE doc_objects #{cascade};",          "DROP TABLE urls #{cascade};",          "DROP TABLE endnotes #{cascade};",          "DROP TABLE endnotes_asterisk #{cascade};", @@ -84,15 +84,35 @@ module SiSU_DB_drop            msg_sqlite="as not all disk space is recovered after dropping the database << #{@db_info.sqlite.db} >>, you may be better off deleting the file, and recreating it as necessary"            case @sql_type            when /sqlite/ -            @conn.transaction -            @drop_table.each do |d| -              @conn.execute(d) -            end -            @conn.commit              puts msg_sqlite              ans=@ans.response?('remove sql database?') -            if ans and File.exist?(@db_info.sqlite.db) +            if ans \ +            and File.exist?(@db_info.sqlite.db) +              @conn.close                File.unlink(@db_info.sqlite.db) +              db=SiSU_Env::Info_db.new +              conn=db.sqlite.conn_sqlite3 +              sdb=SiSU_DB_DBI::Create.new(@opt,conn,@db_info,@sql_type) +              sdb_index=SiSU_DB_DBI::Index.new(@opt,conn,@db_info,@sql_type) +              sdb.output_dir? +              begin +                sdb.create_db +                sdb.create_table.metadata_and_text +                sdb.create_table.doc_objects +                sdb.create_table.endnotes +                sdb.create_table.endnotes_asterisk +                sdb.create_table.endnotes_plus +                sdb.create_table.urls +                sdb_index.create_indexes +              rescue;  SiSU_Errors::Info_error.new($!,$@,'-D').error; @sdb.output_dir? +              end +              exit +            else +              @conn.transaction +              @drop_table.each do |d| +                @conn.execute(d) +              end +              @conn.commit              end            else              @drop_table.each do |d| @@ -115,33 +135,33 @@ module SiSU_DB_drop        end        def indexes                                                             #% drop all indexes          #@conn.do(%{ -        #  DROP INDEX object_nr ON documents(ocn); -        #  DROP INDEX body ON documents(body); -        #  DROP INDEX clean ON documents(clean); -        #  DROP INDEX lev1 ON documents(lev1); -        #  DROP INDEX lev2 ON documents(lev2); -        #  DROP INDEX lev3 ON documents(lev3); -        #  DROP INDEX lev4 ON documents(lev4); -        #  DROP INDEX lev5 ON documents(lev5); -        #  DROP INDEX lev6 ON documents(lev6); +        #  DROP INDEX object_nr ON doc_objects(ocn); +        #  DROP INDEX body ON doc_objects(body); +        #  DROP INDEX clean ON doc_objects(clean); +        #  DROP INDEX lev1 ON doc_objects(lev1); +        #  DROP INDEX lev2 ON doc_objects(lev2); +        #  DROP INDEX lev3 ON doc_objects(lev3); +        #  DROP INDEX lev4 ON doc_objects(lev4); +        #  DROP INDEX lev5 ON doc_objects(lev5); +        #  DROP INDEX lev6 ON doc_objects(lev6);          #  DROP INDEX endnote_nr ON endnotes(nr);          #  DROP INDEX endnote ON endnotes(body); -        #  DROP INDEX title ON metadata(title); -        #  DROP INDEX filename ON metadata(filename) +        #  DROP INDEX title ON metadata_and_text(title); +        #  DROP INDEX filename ON metadata_and_text(filename)          #  /* -        #  DROP INDEX object_nr ON documents(ocn) CASCADE; -        #  DROP INDEX body ON documents(body) CASCADE; -        #  DROP INDEX clean ON documents(clean) CASCADE; -        #  DROP INDEX lev1 ON documents(lev1) CASCADE; -        #  DROP INDEX lev2 ON documents(lev2) CASCADE; -        #  DROP INDEX lev3 ON documents(lev3) CASCADE; -        #  DROP INDEX lev4 ON documents(lev4) CASCADE; -        #  DROP INDEX lev5 ON documents(lev5) CASCADE; -        #  DROP INDEX lev6 ON documents(lev6) CASCADE; +        #  DROP INDEX object_nr ON doc_objects(ocn) CASCADE; +        #  DROP INDEX body ON doc_objects(body) CASCADE; +        #  DROP INDEX clean ON doc_objects(clean) CASCADE; +        #  DROP INDEX lev1 ON doc_objects(lev1) CASCADE; +        #  DROP INDEX lev2 ON doc_objects(lev2) CASCADE; +        #  DROP INDEX lev3 ON doc_objects(lev3) CASCADE; +        #  DROP INDEX lev4 ON doc_objects(lev4) CASCADE; +        #  DROP INDEX lev5 ON doc_objects(lev5) CASCADE; +        #  DROP INDEX lev6 ON doc_objects(lev6) CASCADE;          #  DROP INDEX endnote_nr ON endnotes(nr) CASCADE;          #  DROP INDEX endnote ON endnotes(body) CASCADE; -        #  DROP INDEX title ON metadata(title) CASCADE; -        #  DROP INDEX filename ON metadata(filename) CASCADE +        #  DROP INDEX title ON metadata_and_text(title) CASCADE; +        #  DROP INDEX filename ON metadata_and_text(filename) CASCADE          #  */          #})        end diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb index 1f795e68..5610a1d0 100644 --- a/lib/sisu/v2/db_import.rb +++ b/lib/sisu/v2/db_import.rb @@ -60,9 +60,10 @@  module SiSU_DB_import    require "#{SiSU_lib}/db_columns"                         # db_columns.rb    require "#{SiSU_lib}/db_load_tuple"                      # db_load_tuple.rb +  require "#{SiSU_lib}/db_sqltxt"                          # db_sqltxt.rb    require "#{SiSU_lib}/shared_html_lite"                   # shared_html_lite.rb    require 'sqlite3' -  class Import < SiSU_DB_columns::Column_size +  class Import < SiSU_DB_text::Prepare      include SiSU_Param      include SiSU_Screen      @@dl=nil @@ -86,7 +87,7 @@ module SiSU_DB_import        @counter={}        @db=SiSU_Env::Info_db.new        @driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) ? true : false -      sql='SELECT MAX(lid) FROM documents' +      sql='SELECT MAX(lid) FROM doc_objects'        begin          @col[:lid] ||=0          @col[:lid]=if @driver_sqlite3 @@ -122,7 +123,7 @@ module SiSU_DB_import        tell.print_grey if @opt.cmd =~/v/        file_exist=if @sql_type=~/sqlite/; nil        else -        @conn.select_one(%{ SELECT metadata.tid FROM metadata WHERE metadata.filename ~ '#{@opt.fns}'; }) +        @conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; })        end        if (@sql_type!~/sqlite/ and not file_exist) \        or @sql_type=~/sqlite/ @@ -192,28 +193,6 @@ module SiSU_DB_import          end        end      end -    def special_character_escape(str) -      str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") -      str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"<br />\n") -      str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check -      str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2') -      str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') -      str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1') -      str -    end -    def strip_markup(str) #define rules, make same as in dal clean -      str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') -      str.gsub!(/(?: \\;|#{Mx[:nbsp]})+/,' ') -      str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1')         #tables -      str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ')                          #tables -      str.gsub!(/#{Mx[:tc_p]}/u,' ')                                                     #tables tidy later -      str.gsub!(/<.+?>/,'') -      str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search -      str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search -      str.gsub!(/\s\s+/,' ') -      str.strip! -      str -    end      def pf_db_import_transaction_open      end      def pf_db_import_transaction_close @@ -222,12 +201,23 @@ module SiSU_DB_import        print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} } unless @opt.cmd =~/q/        @tp={}        @md=SiSU_Param::Parameters.new(@opt).get +#% sisutxt & fulltxt +      if FileTest.exist?(@md.fns) +        txt_arr=IO.readlines(@md.fns,'') +        src=txt_arr.join("\n") +        src=special_character_escape(src) +        @tp[:sisutxt_f],@tp[:sisutxt_i]='sisutxt, ',"'#{src}', " +        txt=clean_searchable_text(txt_arr) +        #special_character_escape(txt) +        @tp[:fulltxt_f],@tp[:fulltxt_i]='fulltxt, ',"'#{txt}', " +      end +#% title        if defined? @md.title.full \        and @md.title.full=~/\S+/                                              # DublinCore 1 - title -        @tp[:title]=@md.title.full -        special_character_escape(@tp[:title]) -        @tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', " -        sql='SELECT MAX(tid) FROM metadata' +        #@tp[:title]=@md.title.full +        #special_character_escape(@tp[:title]) +        #@tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', " +        sql='SELECT MAX(tid) FROM metadata_and_text'          begin            @@id_t ||=0            id_t=if @driver_sqlite3 @@ -242,220 +232,9 @@ module SiSU_DB_import          @@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks @title:          puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}} unless @opt.cmd =~/q/        end -      if defined? @md.creator.author \ -      and @md.creator.author=~/\S+/                                           # DublinCore 2 - creator/author (author) -        txt=@md.creator.author #dc -        special_character_escape(txt) -        @tp[:creator_f],@tp[:creator_i]='creator, ',"'#{txt}', " -      end -      if defined? @md.creator.contributor \ -      and @md.creator.contributor=~/\S+/                                      # DublinCore 6 - contributor -        txt=@md.creator.contributor #dc -        special_character_escape(txt) -        @tp[:contributor_f],@tp[:contributor_i]='contributor, ',"'#{txt}', " -      end -      if defined? @md.creator.translator \ -      and @md.creator.translator=~/\S+/ -        txt=@md.creator.translator -        special_character_escape(txt) -        @tp[:translator_f],@tp[:translator_i]='translator, ',"'#{txt}', " -      end -      if defined? @md.creator.illustrator \ -      and @md.creator.illustrator=~/\S+/ -        txt=@md.creator.illustrator -        special_character_escape(txt) -        @tp[:illustrator_f],@tp[:illustrator_i]='illustrator, ',"'#{txt}', " -      end -      if defined? @md.publisher \ -      and @md.publisher -        txt=@md.publisher #dc -        special_character_escape(txt) -        @tp[:publisher_f],@tp[:publisher_i]='publisher, ',"'#{txt}', " -      end -      if defined? @md.creator.prepared_by \ -      and @md.creator.prepared_by=~/\S+/ -        txt=@md.creator.prepared_by -        special_character_escape(txt) -        @tp[:prepared_by_f],@tp[:prepared_by_i]='prepared_by, ',"'#{txt}', " -      end -      if defined? @md.creator.digitized_by \ -      and @md.creator.digitized_by=~/\S+/ -        txt=@md.creator.digitized_by -        special_character_escape(txt) -        @tp[:digitized_by_f],@tp[:digitized_by_i]='digitized_by, ',"'#{txt}', " -      end -      if defined? @md.classify.subject \ -      and @md.classify.subject=~/\S+/                                          # DublinCore 3 - subject (us library of congress, eric or udc, or schema???) -        txt=@md.classify.subject #dc -        special_character_escape(txt) -        @tp[:subject_f],@tp[:subject_i]='subject, ',"'#{txt}', " -      end -      if defined? @md.notes.description \ -      and @md.notes.description=~/\S+/                                         # DublinCore 4 - description -        txt=@md.notes.description #dc -        special_character_escape(txt) -        @tp[:description_f],@tp[:description_i]='description, ',"'#{txt}', " -      end -      if defined? @md.classify.subject \ -      and @md.classify.subject=~/\S+/                                         # DublinCore 8 - type (genre eg. report, convention etc) -        txt=@md.classify.abstract -        special_character_escape(txt) -        @tp[:abstract_f],@tp[:abstract_i]='abstract, ',"'#{txt}', " -      end -      if defined? @md.rights.all \ -      and @md.rights.all=~/\S+/                                               # DublinCore 15 - rights -        txt=@md.rights.all #dc -        special_character_escape(txt) -        @tp[:rights_f],@tp[:rights_i]='rights, ',"'#{txt}', " -      end -      if defined? @md.date.published \ -      and @md.date.published=~/\S+/                                           # DublinCore 7 - date year-mm-dd -        txt=@md.date.published #dc -        special_character_escape(txt) -        @tp[:date_f],@tp[:date_i]='date, ',"'#{txt}', " -      end -      if defined? @md.date.created \ -      and @md.date.created=~/\S+/ -        txt=@md.date.created #dc -        special_character_escape(txt) -        @tp[:date_created_f],@tp[:date_created_i]='date_created, ',"'#{txt}', " -      end -      if defined? @md.date.issued \ -      and @md.date.issued=~/\S+/ -        txt=@md.date.issued #dc -        special_character_escape(txt) -        @tp[:date_issued_f],@tp[:date_issued_i]='date_issued, ',"'#{txt}', " -      end -      if defined? @md.date.available \ -      and @md.date.available=~/\S+/ -        txt=@md.date.available #dc -        special_character_escape(txt) -        @tp[:date_available_f],@tp[:date_available_i]='date_available, ',"'#{txt}', " -      end -      if defined? @md.date.modified \ -      and @md.date.modified=~/\S+/ -        txt=@md.date.modified #dc -        special_character_escape(txt) -        @tp[:date_modified_f],@tp[:date_modified_i]='date_modified, ',"'#{txt}', " -      end -      if defined? @md.date.valid \ -      and @md.date.valid=~/\S+/ -        txt=@md.date.valid #dc -        special_character_escape(txt) -        @tp[:date_valid_f],@tp[:date_valid_i]='date_valid, ',"'#{txt}', " -      end -      if defined? @md.title.language \ -      and @md.title.language=~/\S+/ -        txt=@md.title.language -        special_character_escape(txt) -        @tp[:language_f],@tp[:language_i]='language, ',"'#{txt}', " -      end -      if defined? @md.original.language \ -      and @md.original.language=~/\S+/ -        txt=@md.original.language -        special_character_escape(txt) -        @tp[:language_original_f],@tp[:language_original_i]='language_original, ',"'#{txt}', " -      end -      if defined? @md.classify.format \ -      and @md.classify.format=~/\S+/                                          # DublinCore 9 - format (use your mime type) -        txt=@md.classify.format #dc -        special_character_escape(txt) -        @tp[:format_f],@tp[:format_i]='format, ',"'#{txt}', " -      end -      if defined? @md.classify.identifier \ -      and @md.classify.identifier=~/\S+/                                       # DublinCore 10 - identifier (your identifier, could use urn which is free) -        txt=@md.classify.identifier #dc -        special_character_escape(txt) -        @tp[:identifier_f],@tp[:identifier_i]='identifier, ',"'#{txt}', " -      end -      if defined? @md.original.source \ -      and @md.original.source=~/\S+/                                           # DublinCore 11 - source (document source) -        txt=@md.original.source #dc -        special_character_escape(txt) -        @tp[:source_f],@tp[:source_i]='source, ',"'#{txt}', " -      end -      if defined? @md.classify.relation \ -      and @md.classify.relation=~/\S+/                                         # DublinCore 13 - relation -        txt=@md.classify.relation #dc -        special_character_escape(txt) -        @tp[:relation_f],@tp[:relation_i]='relation, ',"'#{txt}', " -      end -      if defined? @md.classify.coverage \ -      and @md.classify.coverage=~/\S+/                                         # DublinCore 14 - coverage -        txt=@md.classify.coverage #dc -        special_character_escape(txt) -        @tp[:coverage_f],@tp[:coverage_i]='coverage, ',"'#{txt}', " -      end -      if defined? @md.classify.keywords \ -      and @md.classify.keywords=~/\S+/ -        txt=@md.classify.keywords -        special_character_escape(txt) -        @tp[:keywords_f],@tp[:keywords_i]='keywords, ',"'#{txt}', " -      end -      if defined? @md.notes.comment \ -      and @md.notes.comment=~/\S+/ -        txt=@md.notes.comments -        special_character_escape(txt) -        @tp[:comments_f],@tp[:comments_i]='comments, ',"'#{txt}', " -      end -      if defined? @md.classify.loc \ -      and @md.classify.loc=~/\S+/ -        txt=@md.classify.loc -        special_character_escape(txt) -        @tp[:cls_loc_f],@tp[:cls_loc_i]='cls_loc, ',"'#{txt}', " -      end -      if defined? @md.classify.dewey \ -      and @md.classify.dewey=~/\S+/ -        txt=@md.classify.dewey -        special_character_escape(txt) -        @tp[:cls_dewey_f],@tp[:cls_dewey_i]='cls_dewey, ',"'#{txt}', " -      end -      if defined? @md.classify.pg \ -      and @md.classify.pg=~/\S+/ -        txt=@md.classify.pg -        special_character_escape(txt) -        @tp[:cls_pg_f],@tp[:cls_pg_i]='cls_pg, ',"'#{txt}', " -      end -      if defined? @md.classify.isbn \ -      and @md.classify.isbn=~/\S+/ -        txt=@md.classify.isbn -        special_character_escape(txt) -        @tp[:cls_isbn_f],@tp[:cls_isbn_i]='cls_isbn, ',"'#{txt}', " -      end -      if defined? @md.notes.prefix_a \ -      and @md.notes.prefix_a=~/\S+/ -        txt=@md.notes.prefix_a -        special_character_escape(txt) -        @tp[:prefix_a_f],@tp[:prefix_a_i]='prefix_a, ',"'#{txt}', " -      end -      if defined? @md.notes.prefix_b \ -      and @md.notes.prefix_b=~/\S+/ -        txt=@md.notes.prefix_b -        special_character_escape(txt) -        @tp[:prefix_b_f],@tp[:prefix_b_i]='prefix_b, ',"'#{txt}', " -      end -      if defined? @md.fns \ -      and @md.fns=~/\S+/ -        txt=@md.fns -        special_character_escape(txt) -        @tp[:fns_f],@tp[:fns_i]="filename, ","'#{txt}', " -      end -      if @md.wc_words; txt=@md.wc_words -        @tp[:wc_words_f],@tp[:wc_words_i]='wc_words, ',"'#{txt}', " -      end -      if defined? @md.dgst \ -      and @md.dgst.class==Array -        txt=@md.dgst[1] -        @tp[:dgst_f],@tp[:dgst_i]='dgst, ',"'#{txt}', " -      end -      if @md.sc_date; txt=@md.sc_date -        @tp[:sc_date_f],@tp[:sc_date_i]='sc_date, ',"'#{txt}', " -      end -      if @md.generated; txt=@md.generated -        @tp[:generated_f],@tp[:generated_i]='generated, ',"'#{@txt}', " -      end +      ################ CLEAR ##############        SiSU_DB_DBI::Test.new(self,@opt).verify                          #% import title names, filenames (tuple) -      t=SiSU_DB_tuple::Load_metadata.new(@conn,@tp,@@id_t,@opt,@file) +      t=SiSU_DB_tuple::Load_metadata.new(@conn,@@id_t,@md,@file)        tuple=t.tuple        tuple      end @@ -482,13 +261,7 @@ module SiSU_DB_import              and data.ln.inspect=~/[123]/                @col[:lev],txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.ln,data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'',''                @col[:lid]+=1 -              if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ -                endnotes(txt).range -                @en << endnotes(txt).standard if txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ -                @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ -                @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/ -                txt=endnotes(txt).clean_text -              end +              txt=endnotes(txt).extract_any                @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_minus                special_character_escape(@col[:body])                @col[:plaintext]=@col[:body].dup @@ -521,13 +294,7 @@ module SiSU_DB_import                end                @env=SiSU_Env::Info_env.new(@md.fns)                @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" -              if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ -                endnotes(txt).range -                @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ -                @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ -                @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ -                txt=endnotes(txt).clean_text(@base_url) -              end +              txt=endnotes(txt).extract_any                @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus                special_character_escape(@col[:body])                @col[:plaintext]=@col[:body].dup @@ -553,13 +320,7 @@ module SiSU_DB_import                end                @env=SiSU_Env::Info_env.new(@md.fns)                @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" -              if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ -                endnotes(txt).range -                @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ -                @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ -                @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ -                txt=endnotes(txt).clean_text(@base_url) -              end +              txt=endnotes(txt).extract_any                @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus                special_character_escape(@col[:body])                @col[:plaintext]=@col[:body].dup @@ -585,13 +346,7 @@ module SiSU_DB_import                end                @env=SiSU_Env::Info_env.new(@md.fns)                @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" -              if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ -                endnotes(txt).range -                @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ -                @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ -                @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ -                txt=endnotes(txt).clean_text(@base_url) -              end +              txt=endnotes(txt).extract_any                @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus                special_character_escape(@col[:body])                @col[:plaintext]=@col[:body].dup @@ -613,15 +368,9 @@ module SiSU_DB_import                end                @env=SiSU_Env::Info_env.new(@md.fns)                @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" -              if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ -                endnotes(txt).range -                @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ -                @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ -                @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ -                txt=endnotes(txt).clean_text(@base_url) -              end +              txt=endnotes(txt).extract_any                if @sql_type=~/pg/ \ -              and txt.size > (document_clean - 1)             #% examine pg build & remove limitation +              and txt.size > (SiSU_DB_columns::Column_size.new.document_clean - 1)             #% examine pg build & remove limitation                  puts "\n\nTOO LARGE (TXT - see error log)\n\n"                  open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|                    error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}") @@ -660,7 +409,7 @@ module SiSU_DB_import                    #special_character_escape(body)                    #special_character_escape(txt)                    strip_markup(txt) -                  if txt.size > (endnote_clean - 1) +                  if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)                      puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"                      open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|                        error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") @@ -698,7 +447,7 @@ module SiSU_DB_import                    special_character_escape(txt)                    body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)                    strip_markup(txt) -                  if txt.size > (endnote_clean - 1) +                  if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)                      puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"                      open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|                        error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") @@ -736,7 +485,7 @@ module SiSU_DB_import                    special_character_escape(txt)                    body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)                    strip_markup(txt) -                  if txt.size > (endnote_clean - 1) +                  if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)                      puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"                      open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|                        error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") @@ -772,6 +521,16 @@ module SiSU_DB_import      end      def endnotes(txt)        @txt=txt +      def extract_any +        if @txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ +          endnotes(@txt).range +          @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ +          @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ +          @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/ +          @txt=endnotes(@txt).clean_text +        end +        @txt +      end        def standard          x=if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/)          else nil diff --git a/lib/sisu/v2/db_indexes.rb b/lib/sisu/v2/db_indexes.rb index a64fb362..3cbcc20c 100644 --- a/lib/sisu/v2/db_indexes.rb +++ b/lib/sisu/v2/db_indexes.rb @@ -73,15 +73,15 @@ module SiSU_DB_index        def base          print "\n          create documents common indexes\n" unless @opt.cmd =~/q/          sql_arr=[ -          %{CREATE INDEX object_nr ON documents(ocn);}, -          %{CREATE INDEX digest_clean ON documents(digest_clean);}, -          %{CREATE INDEX digest_all ON documents(digest_all);}, -          %{CREATE INDEX lev1 ON documents(lev1);}, -          %{CREATE INDEX lev2 ON documents(lev2);}, -          %{CREATE INDEX lev3 ON documents(lev3);}, -          %{CREATE INDEX lev4 ON documents(lev4);}, -          %{CREATE INDEX lev5 ON documents(lev5);}, -          %{CREATE INDEX lev6 ON documents(lev6);}, +          %{CREATE INDEX object_nr ON doc_objects(ocn);}, +          %{CREATE INDEX digest_clean ON doc_objects(digest_clean);}, +          %{CREATE INDEX digest_all ON doc_objects(digest_all);}, +          %{CREATE INDEX lev1 ON doc_objects(lev1);}, +          %{CREATE INDEX lev2 ON doc_objects(lev2);}, +          %{CREATE INDEX lev3 ON doc_objects(lev3);}, +          %{CREATE INDEX lev4 ON doc_objects(lev4);}, +          %{CREATE INDEX lev5 ON doc_objects(lev5);}, +          %{CREATE INDEX lev6 ON doc_objects(lev6);},            %{CREATE INDEX endnote_nr ON endnotes(nr);},            %{CREATE INDEX digest_en ON endnotes(digest_clean);},            %{CREATE INDEX endnote_nr_asterisk ON endnotes_asterisk(nr);}, @@ -90,15 +90,15 @@ module SiSU_DB_index            %{CREATE INDEX endnote_nr_plus ON endnotes_plus(nr);},            %{CREATE INDEX endnote_plus ON endnotes_plus(clean);},            %{CREATE INDEX digest_en_plus ON endnotes_plus(digest_clean);}, -          %{CREATE INDEX title ON metadata(title);}, -          %{CREATE INDEX filename ON metadata(filename)}, +          %{CREATE INDEX title ON metadata_and_text(title);}, +          %{CREATE INDEX filename ON metadata_and_text(filename)},          ]          conn_execute_array(sql_arr)        end        def text          print "\n          create documents text indexes\n" unless @opt.cmd =~/q/          sql_arr=[ -          %{CREATE INDEX clean ON documents(clean);}, +          %{CREATE INDEX clean ON doc_objects(clean);},            %{CREATE INDEX endnote ON endnotes(clean);}          ]          conn_execute_array(sql_arr) diff --git a/lib/sisu/v2/db_load_tuple.rb b/lib/sisu/v2/db_load_tuple.rb index 2fc3a455..cc00b74a 100644 --- a/lib/sisu/v2/db_load_tuple.rb +++ b/lib/sisu/v2/db_load_tuple.rb @@ -58,6 +58,7 @@  =end  module SiSU_DB_tuple +  require "#{SiSU_lib}/db_columns"                         # db_columns.rb    class Load_documents      require "#{SiSU_lib}/param"                            # param.rb        include SiSU_Param @@ -78,10 +79,10 @@ module SiSU_DB_tuple      end      def tuple                                                                    #% import line        sql_entry=if @col[:en_a] -        "INSERT INTO documents (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + +        "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " +          "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"        else -        "INSERT INTO documents (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + +        "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " +          "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"        end        if @opt.cmd =~/M/ @@ -113,13 +114,174 @@ module SiSU_DB_tuple        sql_entry      end    end -  class Load_metadata -    def initialize(conn,tp,id,opt,file) -      @conn,@tp,@id,@opt,@file=conn,tp,id,opt,file +  class Load_metadata #< SiSU_DB_columns::Columns +    def initialize(conn,id,md,file) +      @conn,@id,@opt,@file=conn,id,md,file +      @tp=SiSU_DB_columns::Columns.new(md)      end      def tuple -      sql_entry="INSERT INTO metadata (#{@tp[:fns_f]} #{@tp[:suffix_f]} #{@tp[:title_f]} #{@tp[:subtitle_f]} #{@tp[:creator_f]} #{@tp[:illustrator_f]} #{@tp[:translator_f]} #{@tp[:subject_f]} #{@tp[:description_f]} #{@tp[:publisher_f]} #{@tp[:contributor_f]} #{@tp[:prepared_by_f]} #{@tp[:digitized_by_f]} #{@tp[:date_f]} #{@tp[:date_created_f]} #{@tp[:date_issued_f]} #{@tp[:date_valid_f]} #{@tp[:date_available_f]} #{@tp[:date_modified_f]} #{@tp[:type_f]} #{@tp[:format_f]} #{@tp[:identifier_f]} #{@tp[:source_f]} #{@tp[:language_f]} #{@tp[:language_original_f]} #{@tp[:relation_f]} #{@tp[:coverage_f]} #{@tp[:rights_f]} #{@tp[:copyright_f]} #{@tp[:owner_f]} #{@tp[:keywords_f]} #{@tp[:abstract_f]} #{@tp[:comment_f]} #{@tp[:loc_f]} #{@tp[:dewey_f]} #{@tp[:isbn_f]} #{@tp[:pg_f]} #{@tp[:prefix_a_f]} #{@tp[:prefix_b_f]} tid) " + -       "VALUES (#{@tp[:fns_i]} #{@tp[:suffix_i]} #{@tp[:title_i]} #{@tp[:subtitle_i]} #{@tp[:creator_i]} #{@tp[:illustrator_i]} #{@tp[:translator_i]} #{@tp[:subject_i]} #{@tp[:description_i]} #{@tp[:publisher_i]} #{@tp[:contributor_i]} #{@tp[:prepared_by_i]} #{@tp[:digitized_by_i]} #{@tp[:date_i]} #{@tp[:date_created_i]} #{@tp[:date_issued_i]} #{@tp[:date_valid_i]} #{@tp[:date_available_i]} #{@tp[:date_modified_i]} #{@tp[:type_i]} #{@tp[:format_i]} #{@tp[:identifier_i]} #{@tp[:source_i]} #{@tp[:language_i]} #{@tp[:language_original_i]} #{@tp[:relation_i]} #{@tp[:coverage_i]} #{@tp[:rights_i]} #{@tp[:copyright_i]} #{@tp[:owner_i]} #{@tp[:keywords_i]} #{@tp[:abstract_i]} #{@tp[:comment_i]} #{@tp[:loc_i]} #{@tp[:dewey_i]} #{@tp[:isbn_i]} #{@tp[:pg_i]} #{@tp[:prefix_a_i]} #{@tp[:prefix_b_i]} #{@id});" +      sql_entry="INSERT INTO metadata_and_text ( +#{@tp.column.title.tuple[0]} +#{@tp.column.title_main.tuple[0]} +#{@tp.column.title_sub.tuple[0]} +#{@tp.column.title_short.tuple[0]} +#{@tp.column.title_edition.tuple[0]} +#{@tp.column.title_note.tuple[0]} +#{@tp.column.title_language.tuple[0]} +#{@tp.column.title_language_char.tuple[0]} +#{@tp.column.creator_author.tuple[0]} +#{@tp.column.creator_author_honorific.tuple[0]} +#{@tp.column.creator_author_nationality.tuple[0]} +#{@tp.column.creator_contributor.tuple[0]} +#{@tp.column.creator_illustrator.tuple[0]} +#{@tp.column.creator_photographer.tuple[0]} +#{@tp.column.creator_translator.tuple[0]} +#{@tp.column.creator_prepared_by.tuple[0]} +#{@tp.column.creator_digitized_by.tuple[0]} +#{@tp.column.creator_audio.tuple[0]} +#{@tp.column.creator_video.tuple[0]} +#{@tp.column.language_document.tuple[0]} +#{@tp.column.language_document_char.tuple[0]} +#{@tp.column.language_original.tuple[0]} +#{@tp.column.language_original_char.tuple[0]} +#{@tp.column.date_added_to_site.tuple[0]} +#{@tp.column.date_available.tuple[0]} +#{@tp.column.date_created.tuple[0]} +#{@tp.column.date_issued.tuple[0]} +#{@tp.column.date_modified.tuple[0]} +#{@tp.column.date_published.tuple[0]} +#{@tp.column.date_valid.tuple[0]} +#{@tp.column.date_translated.tuple[0]} +#{@tp.column.date_original_publication.tuple[0]} +#{@tp.column.date_generated.tuple[0]} +#{@tp.column.publisher.tuple[0]} +#{@tp.column.original_publisher.tuple[0]} +#{@tp.column.original_language.tuple[0]} +#{@tp.column.original_language_char.tuple[0]} +#{@tp.column.original_source.tuple[0]} +#{@tp.column.original_institution.tuple[0]} +#{@tp.column.original_nationality.tuple[0]} +#{@tp.column.rights_all.tuple[0]} +#{@tp.column.rights_copyright_text.tuple[0]} +#{@tp.column.rights_copyright_translation.tuple[0]} +#{@tp.column.rights_copyright_illustrations.tuple[0]} +#{@tp.column.rights_copyright_photographs.tuple[0]} +#{@tp.column.rights_copyright_preparation.tuple[0]} +#{@tp.column.rights_copyright_digitization.tuple[0]} +#{@tp.column.rights_copyright_audio.tuple[0]} +#{@tp.column.rights_copyright_video.tuple[0]} +#{@tp.column.rights_license.tuple[0]} +#{@tp.column.classify_topic_register.tuple[0]} +#{@tp.column.classify_subject.tuple[0]} +#{@tp.column.classify_type.tuple[0]} +#{@tp.column.classify_loc.tuple[0]} +#{@tp.column.classify_dewey.tuple[0]} +#{@tp.column.classify_pg.tuple[0]} +#{@tp.column.classify_isbn.tuple[0]} +#{@tp.column.classify_format.tuple[0]} +#{@tp.column.classify_identifier.tuple[0]} +#{@tp.column.classify_relation.tuple[0]} +#{@tp.column.classify_coverage.tuple[0]} +#{@tp.column.classify_keywords.tuple[0]} +#{@tp.column.notes_abstract.tuple[0]} +#{@tp.column.notes_comment.tuple[0]} +#{@tp.column.notes_description.tuple[0]} +#{@tp.column.notes_history.tuple[0]} +#{@tp.column.notes_prefix.tuple[0]} +#{@tp.column.notes_prefix_a.tuple[0]} +#{@tp.column.notes_prefix_b.tuple[0]} +#{@tp.column.notes_suffix.tuple[0]} +#{@tp.column.filename.tuple[0]} +#{@tp.column.sisutxt.tuple[0]} +#{@tp.column.fulltext.tuple[0]} +#{@tp.column.word_count.tuple[0]} +#{@tp.column.digest.tuple[0]} +#{@tp.column.skin_name.tuple[0]} +#{@tp.column.skin.tuple[0]} +#{@tp.column.links.tuple[0]} +tid) +" + +       "VALUES ( +#{@tp.column.title.tuple[1]} +#{@tp.column.title_main.tuple[1]} +#{@tp.column.title_sub.tuple[1]} +#{@tp.column.title_short.tuple[1]} +#{@tp.column.title_edition.tuple[1]} +#{@tp.column.title_note.tuple[1]} +#{@tp.column.title_language.tuple[1]} +#{@tp.column.title_language_char.tuple[1]} +#{@tp.column.creator_author.tuple[1]} +#{@tp.column.creator_author_honorific.tuple[1]} +#{@tp.column.creator_author_nationality.tuple[1]} +#{@tp.column.creator_contributor.tuple[1]} +#{@tp.column.creator_illustrator.tuple[1]} +#{@tp.column.creator_photographer.tuple[1]} +#{@tp.column.creator_translator.tuple[1]} +#{@tp.column.creator_prepared_by.tuple[1]} +#{@tp.column.creator_digitized_by.tuple[1]} +#{@tp.column.creator_audio.tuple[1]} +#{@tp.column.creator_video.tuple[1]} +#{@tp.column.language_document.tuple[1]} +#{@tp.column.language_document_char.tuple[1]} +#{@tp.column.language_original.tuple[1]} +#{@tp.column.language_original_char.tuple[1]} +#{@tp.column.date_added_to_site.tuple[1]} +#{@tp.column.date_available.tuple[1]} +#{@tp.column.date_created.tuple[1]} +#{@tp.column.date_issued.tuple[1]} +#{@tp.column.date_modified.tuple[1]} +#{@tp.column.date_published.tuple[1]} +#{@tp.column.date_valid.tuple[1]} +#{@tp.column.date_translated.tuple[1]} +#{@tp.column.date_original_publication.tuple[1]} +#{@tp.column.date_generated.tuple[1]} +#{@tp.column.publisher.tuple[1]} +#{@tp.column.original_publisher.tuple[1]} +#{@tp.column.original_language.tuple[1]} +#{@tp.column.original_language_char.tuple[1]} +#{@tp.column.original_source.tuple[1]} +#{@tp.column.original_institution.tuple[1]} +#{@tp.column.original_nationality.tuple[1]} +#{@tp.column.rights_all.tuple[1]} +#{@tp.column.rights_copyright_text.tuple[1]} +#{@tp.column.rights_copyright_translation.tuple[1]} +#{@tp.column.rights_copyright_illustrations.tuple[1]} +#{@tp.column.rights_copyright_photographs.tuple[1]} +#{@tp.column.rights_copyright_preparation.tuple[1]} +#{@tp.column.rights_copyright_digitization.tuple[1]} +#{@tp.column.rights_copyright_audio.tuple[1]} +#{@tp.column.rights_copyright_video.tuple[1]} +#{@tp.column.rights_license.tuple[1]} +#{@tp.column.classify_topic_register.tuple[1]} +#{@tp.column.classify_subject.tuple[1]} +#{@tp.column.classify_type.tuple[1]} +#{@tp.column.classify_loc.tuple[1]} +#{@tp.column.classify_dewey.tuple[1]} +#{@tp.column.classify_pg.tuple[1]} +#{@tp.column.classify_isbn.tuple[1]} +#{@tp.column.classify_format.tuple[1]} +#{@tp.column.classify_identifier.tuple[1]} +#{@tp.column.classify_relation.tuple[1]} +#{@tp.column.classify_coverage.tuple[1]} +#{@tp.column.classify_keywords.tuple[1]} +#{@tp.column.notes_abstract.tuple[1]} +#{@tp.column.notes_comment.tuple[1]} +#{@tp.column.notes_description.tuple[1]} +#{@tp.column.notes_history.tuple[1]} +#{@tp.column.notes_prefix.tuple[1]} +#{@tp.column.notes_prefix_a.tuple[1]} +#{@tp.column.notes_prefix_b.tuple[1]} +#{@tp.column.notes_suffix.tuple[1]} +#{@tp.column.filename.tuple[1]} +#{@tp.column.sisutxt.tuple[1]} +#{@tp.column.fulltext.tuple[1]} +#{@tp.column.word_count.tuple[1]} +#{@tp.column.digest.tuple[1]} +#{@tp.column.skin_name.tuple[1]} +#{@tp.column.skin.tuple[1]} +#{@tp.column.links.tuple[1]} +#{@id} +);"        if @opt.cmd =~/M/          puts "maintenance mode on: creating sql transaction file (for last transaction set (document) only):\n\t#{@file.inspect}"          @file.puts sql_entry diff --git a/lib/sisu/v2/db_remove.rb b/lib/sisu/v2/db_remove.rb index 99640cdf..0a51b892 100644 --- a/lib/sisu/v2/db_remove.rb +++ b/lib/sisu/v2/db_remove.rb @@ -68,19 +68,19 @@ module SiSU_DB_remove      def remove        driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) ? true : false        del_id=if driver_sqlite3 -        @conn.get_first_value(%{ SELECT tid FROM metadata WHERE filename LIKE '#{@opt.fns}'; }).to_i +        @conn.get_first_value(%{ SELECT tid FROM metadata_and_text WHERE filename = '#{@opt.fns}'; }).to_i        else -        x=@conn.select_one(%{ SELECT tid FROM metadata WHERE filename LIKE '#{@opt.fns}'; }) -        del=x ? (x.join.to_i) : nil +        x=@conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; }) +        x ? (x.join.to_i) : nil        end        if del_id          sql_entry=[            "DELETE FROM endnotes WHERE metadata_tid = '#{del_id}';",            "DELETE FROM endnotes_asterisk WHERE metadata_tid = '#{del_id}';",            "DELETE FROM endnotes_plus WHERE metadata_tid = '#{del_id}';", -          "DELETE FROM documents WHERE metadata_tid = '#{del_id}';", +          "DELETE FROM doc_objects WHERE metadata_tid = '#{del_id}';",            "DELETE FROM urls WHERE metadata_tid = '#{del_id}';", -          "DELETE FROM metadata WHERE tid = '#{del_id}';", +          "DELETE FROM metadata_and_text WHERE metadata_and_text.tid = '#{del_id}';",          ]          if driver_sqlite3            @conn.transaction diff --git a/lib/sisu/v2/db_select.rb b/lib/sisu/v2/db_select.rb index 33441b45..1ac9195f 100644 --- a/lib/sisu/v2/db_select.rb +++ b/lib/sisu/v2/db_select.rb @@ -69,6 +69,8 @@ module SiSU_DB_select        if @opt.mod.inspect =~/update|import/          @sdb_import=SiSU_DB_DBI::Import.new(@opt,@conn,@file,@sql_type)          @sdb_remove_doc=SiSU_DB_DBI::Remove.new(@opt,@conn,@file) +      elsif @opt.mod.inspect =~/remove/ +        @sdb_remove_doc=SiSU_DB_DBI::Remove.new(@opt,@conn,@file)        end      end      def sql_maintenance_file @@ -98,8 +100,8 @@ module SiSU_DB_select          when /^--(?:init(?:ialize)?|create(?:all)?)$/            @sdb.output_dir?            begin -          @sdb.create_table.metadata -          @sdb.create_table.documents +          @sdb.create_table.metadata_and_text +          @sdb.create_table.doc_objects            @sdb.create_table.endnotes            @sdb.create_table.endnotes_asterisk            @sdb.create_table.endnotes_plus @@ -110,8 +112,8 @@ module SiSU_DB_select          when /^--createtable(s)?$/            @sdb.output_dir?            begin -          @sdb.create_table.metadata -          @sdb.create_table.documents +          @sdb.create_table.metadata_and_text +          @sdb.create_table.doc_objects            @sdb.create_table.endnotes            @sdb.create_table.endnotes_asterisk            @sdb.create_table.endnotes_plus @@ -123,8 +125,8 @@ module SiSU_DB_select            @sdb.output_dir?            begin            @sdb_no.drop.tables -          @sdb.create_table.metadata -          @sdb.create_table.documents +          @sdb.create_table.metadata_and_text +          @sdb.create_table.doc_objects            @sdb.create_table.endnotes            @sdb.create_table.endnotes_asterisk            @sdb.create_table.endnotes_plus @@ -135,13 +137,13 @@ module SiSU_DB_select          when /^--cr(eate)?lex$/            @sdb.output_dir?            begin -          @sdb.create_table.documents +          @sdb.create_table.doc_objects            rescue; @sdb.output_dir?            end          when /^--cr(eate)?metadata$/            @sdb.output_dir?            begin -          @sdb.create_table.metadata +          @sdb.create_table.metadata_and_text            rescue; @sdb.output_dir?            end          when /^--import$/ diff --git a/lib/sisu/v2/db_sqltxt.rb b/lib/sisu/v2/db_sqltxt.rb new file mode 100644 index 00000000..f120b95f --- /dev/null +++ b/lib/sisu/v2/db_sqltxt.rb @@ -0,0 +1,115 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search +   #___# + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +   2007, 2008, 2009, 2010 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + +   SiSU, a framework for document structuring, publishing and search + +   Copyright (C) Ralph Amissah + +   This program is free software: you can redistribute it and/or modify it +   under the terms of the GNU General Public License as published by the Free +   Software Foundation, either version 3 of the License, or (at your option) +   any later version. + +   This program is distributed in the hope that it will be useful, but WITHOUT +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +   more details. + +   You should have received a copy of the GNU General Public License along with +   this program. If not, see <http://www.gnu.org/licenses/>. + +   If you have Internet connection, the latest version of the GPL should be +   available at these locations: +   <http://www.fsf.org/licensing/licenses/gpl.html> +   <http://www.gnu.org/copyleft/gpl.html> + +   <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> +   <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> +   <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> + + * SiSU uses: +   * Standard SiSU markup syntax, +   * Standard SiSU meta-markup syntax, and the +   * Standard SiSU object citation numbering and system + + * Hompages: +   <http://www.jus.uio.no/sisu> +   <http://www.sisudoc.org> + + * Download: +   <http://www.jus.uio.no/sisu/SiSU/download.html> + + * Ralph Amissah +   <ralph@amissah.com> +   <ralph.amissah@gmail.com> + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_DB_text +  class Prepare +    def special_character_escape(str) +      str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") +      str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"<br />\n") +      str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check +      str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2') +      str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') +      str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1') +      str +    end +    def clean_searchable_text(arr) #produce clean, searchable, plaintext from document source +      txt_arr,en=[],[] +      arr.each do |s| +        s.gsub!(/([*\/_-])\{(.+?)\}\1/,'\2') +        s.gsub!(/^(?:group|poem|code)\{/,''); s.gsub!(/^\}(?:group|poem|code)/,'') +        s.gsub!(/\A(?:@\S+:\s+.+)\Z/m,'') +        if s =~/^:A~/ +          s.gsub!(/@author/,@md.creator.author) +          s.gsub!(/@title/,@md.title.full) +        end +        s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/,'') +        s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/,'') +        s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/,'') +        s.gsub!(/^%{1,3} .+/,'') #removed even if contained in code block +        s.gsub!(/<br>/,' ') +        en << s.scan(/~\{\s*(.+?)\s*\}~/) +        s.gsub!(/~\{.+?\}~/,'') +        s.gsub!(/ \s+/,' ') +        #special_character_escape(s) +        s +      end +      txt_arr << arr << en +      #txt_arr=txt_arr.flatten +      txt=txt_arr.flatten.join("\n") +      txt=special_character_escape(txt) +      txt +    end +    def strip_markup(str) #define rules, make same as in dal clean +      str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') +      str.gsub!(/(?: \\;|#{Mx[:nbsp]})+/,' ') +      str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1')         #tables +      str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ')                          #tables +      str.gsub!(/#{Mx[:tc_p]}/u,' ')                                                     #tables tidy later +      str.gsub!(/<.+?>/,'') +      str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search +      str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search +      str.gsub!(/\s\s+/,' ') +      str.strip! +      str +    end +  end +end +__END__ + | 
