diff options
Diffstat (limited to 'lib/sisu/v2/db_import.rb')
-rw-r--r-- | lib/sisu/v2/db_import.rb | 131 |
1 files changed, 67 insertions, 64 deletions
diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb index 5610a1d0..0e2db8e3 100644 --- a/lib/sisu/v2/db_import.rb +++ b/lib/sisu/v2/db_import.rb @@ -86,14 +86,17 @@ module SiSU_DB_import @col[:ocn]='' @counter={} @db=SiSU_Env::Info_db.new - @driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) ? true : false + if @sql_type=='sqlite' + @driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \ + ? true \ + : false + end sql='SELECT MAX(lid) FROM doc_objects' begin @col[:lid] ||=0 - @col[:lid]=if @driver_sqlite3 - @conn.execute( sql ).join.to_i - else @conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } - end + @col[:lid]=@driver_sqlite3 \ + ? @conn.execute( sql ).join.to_i \ + : @conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } rescue puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/ end @@ -101,11 +104,9 @@ module SiSU_DB_import sql='SELECT MAX(nid) FROM endnotes' begin @id_n ||=0 - @id_n=if @driver_sqlite3 - @conn.execute( sql ).join.to_i - else - @id_n=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } - end + @id_n=@driver_sqlite3 \ + ? @conn.execute( sql ).join.to_i \ + : @id_n=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } rescue puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/ end @@ -121,12 +122,11 @@ module SiSU_DB_import tell.puts_blue unless @opt.cmd =~/q/ tell=SiSU_Screen::Ansi.new(@opt.cmd,'Marshal Load',@fnc) tell.print_grey if @opt.cmd =~/v/ - file_exist=if @sql_type=~/sqlite/; nil - else - @conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; }) - end - if (@sql_type!~/sqlite/ and not file_exist) \ - or @sql_type=~/sqlite/ + select_first_match=%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.src_filename = '#{@opt.fns}'; } + file_exist=@sql_type=~/sqlite/ \ + ? @conn.get_first_value(select_first_match) \ + : @conn.select_one(select_first_match) + if not file_exist t_d=[] # transaction_data t_d << db_import_metadata t_d << db_import_documents(@dal_array) @@ -265,7 +265,8 @@ module SiSU_DB_import @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_minus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup - strip_markup(@col[:plaintext]) + @col[:plaintext]=strip_markup(@col[:plaintext]) + @col[:plaintext]=clean_searchable_text(@col[:plaintext]) if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last end if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last @@ -298,7 +299,8 @@ module SiSU_DB_import @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup - strip_markup(@col[:plaintext]) + @col[:plaintext]=strip_markup(@col[:plaintext]) + @col[:plaintext]=clean_searchable_text(@col[:plaintext]) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -324,7 +326,8 @@ module SiSU_DB_import @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup - strip_markup(@col[:plaintext]) + @col[:plaintext]=strip_markup(@col[:plaintext]) + @col[:plaintext]=clean_searchable_text(@col[:plaintext]) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -350,7 +353,8 @@ module SiSU_DB_import @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup - strip_markup(@col[:plaintext]) + @col[:plaintext]=strip_markup(@col[:plaintext]) + @col[:plaintext]=clean_searchable_text(@col[:plaintext]) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -389,12 +393,13 @@ module SiSU_DB_import end special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup - strip_markup(@col[:plaintext]) + @col[:plaintext]=strip_markup(@col[:plaintext]) + @col[:plaintext]=clean_searchable_text(@col[:plaintext]) t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) @tuple_array << t.tuple @en,@en_ast,@en_pls=[],[],[] @col[:en_a]=@col[:en_z]=nil - @col[:lev]=@col[:plaintext]=@col[:body]='' + @col[:lev]=@col[:plaintext]=@col[:body]=@col[:words]='' end if notedata =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ #% import into database endnotes tables endnote_array=notedata.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) @@ -406,8 +411,6 @@ module SiSU_DB_import @id_n+=1 special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) - #special_character_escape(body) - #special_character_escape(txt) strip_markup(txt) if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" @@ -418,16 +421,16 @@ module SiSU_DB_import end if txt en={ :type => 'endnotes', - :id => @id_n, - :lid => @col[:lid], - :nr => nr, - :txt => txt, - :body => body, - :ocn => @col[:ocn], - :ocnd => @col[:ocnd], - :ocns => @col[:ocns], - :id_t => @@id_t, - :hash => digest_clean + :id => @id_n, + :lid => @col[:lid], + :nr => nr, + :txt => txt, + :body => body, + :ocn => @col[:ocn], + :ocnd => @col[:ocnd], + :ocns => @col[:ocns], + :id_t => @@id_t, + :hash => digest_clean } t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file) @tuple_array << t.tuple @@ -456,16 +459,16 @@ module SiSU_DB_import end if txt en={ :type => 'endnotes_asterisk', - :id => @id_n, - :lid => @col[:lid], - :nr => nr, - :txt => txt, - :body => body, - :ocn => @col[:ocn], - :ocnd => @col[:ocnd], - :ocns => @col[:ocns], - :id_t => @@id_t, - :hash => digest_clean + :id => @id_n, + :lid => @col[:lid], + :nr => nr, + :txt => txt, + :body => body, + :ocn => @col[:ocn], + :ocnd => @col[:ocnd], + :ocns => @col[:ocns], + :id_t => @@id_t, + :hash => digest_clean } t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file) @tuple_array << t.tuple @@ -494,16 +497,16 @@ module SiSU_DB_import end if txt en={ :type => 'endnotes_plus', - :id => @id_n, - :lid => @col[:lid], - :nr => nr, - :txt => txt, - :body => body, - :ocn => @col[:ocn], - :ocnd => @col[:ocnd], - :ocns => @col[:ocns], - :id_t => @@id_t, - :hash => digest_clean + :id => @id_n, + :lid => @col[:lid], + :nr => nr, + :txt => txt, + :body => body, + :ocn => @col[:ocn], + :ocnd => @col[:ocnd], + :ocns => @col[:ocns], + :id_t => @@id_t, + :hash => digest_clean } t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file) @tuple_array << t.tuple @@ -526,25 +529,25 @@ module SiSU_DB_import endnotes(@txt).range @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ - @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/ + @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ @txt=endnotes(@txt).clean_text end @txt end def standard - x=if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) - else nil - end + x=(@txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) \ + ? @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) \ + : nil end def asterisk - x=if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/; @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/) - else nil - end + x=(@txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) \ + ? @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/) \ + : nil end def plus - x=if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/; @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/) - else nil - end + x=(@txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) \ + ? @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/) \ + : nil end def clean_text(base_url=nil) if base_url |