From 35e8a112e76ec2990e21877f2453355325325180 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 17 Apr 2010 20:56:26 -0400 Subject: dal, on processing screen output display, minor --- lib/sisu/v2/dal.rb | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/dal.rb b/lib/sisu/v2/dal.rb index 04363498..6fd23b60 100644 --- a/lib/sisu/v2/dal.rb +++ b/lib/sisu/v2/dal.rb @@ -210,8 +210,6 @@ module SiSU_DAL protected def create_dal dal_array=[] - tell=SiSU_Screen::Ansi.new(@opt.cmd,'Document Abstraction') - tell.green_title_hi unless @opt.cmd =~/q/ file_array=@env.read_source_file(@opt.fns) file_array.each do |l| if l =~/\r\n/; l.gsub!(/\r\n/,"\n") @@ -220,11 +218,19 @@ module SiSU_DAL meta=file_array.dup meta=meta.join.split("\n\n") #check whether can be eliminated, some of these are large objects to have twice @md=SiSU_Param::Parameters::Instructions.new(meta,@opt).extract + unless @opt.cmd =~/q/ + tell=(@md.cmd=~/[vVM]/) \ + ? SiSU_Screen::Ansi.new(@opt.cmd,'Document Abstraction') \ + : SiSU_Screen::Ansi.new(@opt.cmd,'Document Abstraction',@md.fns) + tell.green_title_hi + end meta=nil dal=SiSU_DAL::Make.new(@md,file_array).song - SiSU_Screen::Ansi.new(@md.cmd,@md.fns,"~meta/#{@md.fns}.meta").output if @md.cmd =~/v/ - tell=SiSU_Screen::Ansi.new(@md.cmd,"dal -> #{@make_fns.meta}") if @md.cmd =~/M/ - tell.txt_grey unless @md.cmd =~/q/ + if @md.cmd =~/[vM]/ + SiSU_Screen::Ansi.new(@md.cmd,@md.fns,"~meta/#{@md.fns}.meta").output if @md.cmd =~/v/i + tell=SiSU_Screen::Ansi.new(@md.cmd,"dal -> #{@make_fns.meta}") if @md.cmd =~/M/ + tell.txt_grey unless @md.cmd =~/q/ + end dal.each{|s| dal_array << s} dal_array end -- cgit v1.2.3 From 1ec06b89078273067b64262eb9166e8e6e278f62 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 17 Apr 2010 20:57:23 -0400 Subject: shared_metadata, fix wrong reference to md.gutenberg instead of md.pg prevents building of latex/pdf in affected cases --- lib/sisu/v2/shared_metadata.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/sisu/v2/shared_metadata.rb b/lib/sisu/v2/shared_metadata.rb index f39b2f79..07f87e2c 100644 --- a/lib/sisu/v2/shared_metadata.rb +++ b/lib/sisu/v2/shared_metadata.rb @@ -645,7 +645,7 @@ WOK end if defined? @md.classify.pg \ and @md.classify.pg=~/\S+/ - tag,inf=tr.cls_gutenberg,@md.classify.gutenberg + tag,inf=tr.cls_gutenberg,@md.classify.pg meta << meta_para(tag,inf) end if defined? @md.classify.isbn \ -- cgit v1.2.3 From 178ce66b4335fe51d2e18e729744be751e9d6280 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 17 Apr 2010 20:59:07 -0400 Subject: param, information on missing author, title, or copyright --- lib/sisu/v2/param.rb | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/param.rb b/lib/sisu/v2/param.rb index c50a55ee..ef91f10d 100644 --- a/lib/sisu/v2/param.rb +++ b/lib/sisu/v2/param.rb @@ -149,8 +149,8 @@ module SiSU_Param end end class Md - def initialize(str) - @s=str + def initialize(str,opt) + @s,@opt=str,opt end def validate_length(s,l,n) #s=(s.length <= l) ? s : nil @@ -388,7 +388,10 @@ module SiSU_Param @h['text'] elsif @h['main'] @h['main'] - else p 'provide copyright' + else + tell=SiSU_Screen::Ansi.new(@cmd,'WARNING Document Copyright missing; provide @rights: :copyright:') + tell.warn unless @opt.cmd =~/q/ + '' end end def translation @@ -439,6 +442,10 @@ module SiSU_Param and copyright.license s +=' License: ' + copyright.license end + if s.empty? + tell=SiSU_Screen::Ansi.new(@cmd,'WARNING Document Rights information missing; provide @rights: :copyright:') + tell.warn unless @opt.cmd =~/q/ + end s end s @@ -878,23 +885,23 @@ module SiSU_Param case para when /^@ocn:\s+(.+?)$/m; @ocn=$1 #% processing when /^@title:(.+)/m #% metainfo DC - @title=Md.new($1.strip).title + @title=Md.new($1.strip,@opt).title when /^@creator:(.+)/m #% metainfo DC - @creator=Md.new($1.strip).creator + @creator=Md.new($1.strip,@opt).creator @authorship=@author=@creator.author @authors=@creator.author_detail when /^@classify:(.+)/m; classify=$1 - @classify=Md.new($1.strip).classify + @classify=Md.new($1.strip,@opt).classify when /^@publisher:\s+(.+)/m - @publisher=Md.new($1.strip).current_publisher + @publisher=Md.new($1.strip,@opt).current_publisher when /^@original:(.+)/m - @original=Md.new($1.strip).original + @original=Md.new($1.strip,@opt).original @source=@original.source when /^@writing_focus[:.]nationality:\s+(.+?)$/; @writing_focus=$1 # e.g. Finland (where and article on Finnish law) when /^@date:(.+)/m #% metainfo DC - @date=Md.new($1.strip).date + @date=Md.new($1.strip,@opt).date when /^@rights:(.+)/m; #@rights=$1.gsub(/<(?:\/\s*)?br(?:\s*\/)?>/,Mx[:br_line]) #% metainfo DC copyright, public domain, copyleft, creative commons, etc. - @rights=Md.new($1.strip).rights + @rights=Md.new($1.strip,@opt).rights when /^@papersize:\s+(.+?)$/m #% metainfo DC l=$1 if @mod.inspect !~/--papersize[=-]\S+/ @@ -902,7 +909,7 @@ module SiSU_Param @papersize=l end when /^@make:(.+)/m #% metainfo DC - @make=Md.new($1.strip).make + @make=Md.new($1.strip,@opt).make if defined? @make.breaks \ and @make.breaks[:page_new] #clearpage @pagenew=@make.breaks[:page_new] @@ -934,7 +941,7 @@ module SiSU_Param @man_section=(defined? @make.manpage.section) ? @make.manpage.section : 1 end when /^@links:\s+(.+?)\Z/m #% processing - doc_links=Md.new($1.strip).links + doc_links=Md.new($1.strip,@opt).links count=1 @lnk=[] doc_links.each do |doc_link| @@ -945,7 +952,7 @@ module SiSU_Param end end when /^@notes?:\s(.+)/ #% metainfo - Md.new($1.strip).notes + Md.new($1.strip,@opt).notes when /^@base_program:\s+(.+?)$/; @base_program=$1 #% processing end @lv1 ||=/^1~/ @@ -1153,12 +1160,12 @@ module SiSU_Param footnote_conversion_errors << "#{@fns}:\n\tendnotes: #{@en[:note]} != endnote reference marks: #{@en[:mark]} (difference = #{@en[:mismatch]})\n" end if @title.main !~/[\S]/ - tell=SiSU_Screen::Ansi.new(@cmd,'Document Title Missing','please provide it') - tell.warn if @cmd =~/v/ + tell=SiSU_Screen::Ansi.new(@cmd,'WARNING Document Title missing','please provide @title:') + tell.warn unless @cmd =~/q/ end if @author !~/[\S]/ - tell=SiSU_Screen::Ansi.new(@cmd,'Document Author/Creator Missing','please provide it') - tell.warn if @cmd =~/v/ + tell=SiSU_Screen::Ansi.new(@cmd,'Warning Document Author missing','please provide @creator: :author:') + tell.warn unless @cmd =~/q/ end # Elementary Document Structure Analysis - adds complexity may remove - need to develop - appears to work, proof of concept if @title.nil? @@ -1259,7 +1266,7 @@ module SiSU_Param def store File.unlink(@pstorefile) if FileTest.file?(@pstorefile) tell=SiSU_Screen::Ansi.new(@md.cmd,"PStore -> #{@pstorefile}") - tell.txt_grey if @md.cmd =~/v/ + tell.txt_grey if @md.cmd =~/[MV]/ store=PStore.new(@pstorefile) store.transaction do store['md']=@md -- cgit v1.2.3 From fc01f2cd27721cccbda35ae01ab5413bb8e099db Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 17 Apr 2010 21:00:58 -0400 Subject: cgi_sql_common, sample search form, on navigation of matched results --- lib/sisu/v2/cgi_sql_common.rb | 155 +++++++++++++++++------------------------- 1 file changed, 64 insertions(+), 91 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/cgi_sql_common.rb b/lib/sisu/v2/cgi_sql_common.rb index e536551f..65bb01a9 100644 --- a/lib/sisu/v2/cgi_sql_common.rb +++ b/lib/sisu/v2/cgi_sql_common.rb @@ -203,10 +203,8 @@ module SiSU_CGI_sql <<-'WOK_SQL'
match limit: - 1,000 - 2,000 - 5,000 - 10,000 + 1,000 + 2,500
echo query result stats @@ -334,9 +332,7 @@ module SiSU_CGI_sql #@offset||=@@offset #@offset+=@@limit search={ :text => [],:endnotes => [] } - cse=if c =~/\S/; true - else false - end + cse=(c =~/\S/) ? true : false st=Dbi_search_string.new('doc_objects.clean',search_for.text1,q['s1'],cse).string se=Dbi_search_string.new('endnotes.clean',search_for.text1,q['s1'],cse).string @text_search_flag=st[:flag] @@ -480,14 +476,14 @@ module SiSU_CGI_sql page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i if beyond_limit if page.to_s =~ /^1$/ - %{

+ %{
pg. #{page.to_s}  >>
} elsif page.to_s =~ /^2$/ - %{

+ %{
<<  @@ -497,7 +493,7 @@ module SiSU_CGI_sql
} else - %{

+ %{
|<  @@ -513,14 +509,14 @@ module SiSU_CGI_sql else if page.to_s =~ /^1$/; '' elsif page.to_s =~ /^2$/ - %{

+ %{
<<  pg. #{page.to_s}
} else - %{

+ %{
|<  @@ -650,22 +646,14 @@ module SiSU_CGI_sql "#{@db_name_prefix}#{@stub}" end checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' - if cgi['view']=~/text/ - result_type={:index=>'',:text=>'checked'} - else - result_type={:index=>'checked',:text=>''} - end - @@limit=if cgi['sql_match_limit'].to_s=~/10000/ - checked_sql_limit={:l1k=>'',:l2k=>'',:l5k=>'',:l10k=>'checked'} - '10000' - elsif cgi['sql_match_limit'].to_s=~/5000/ - checked_sql_limit={:l1k=>'',:l2k=>'',:l5k=>'checked',:l10k=>''} - '5000' - elsif cgi['sql_match_limit'].to_s=~/2000/ - checked_sql_limit={:l1k=>'',:l2k=>'checked',:l5k=>'',:l10k=>''} - '2000' + result_type=(cgi['view']=~/text/) \ + ? result_type={:index=>'',:text=>'checked'} \ + : result_type={:index=>'checked',:text=>''} + @@limit=if cgi['sql_match_limit'].to_s=~/2500/ + checked_sql_limit={:l1000=>'',:l2500=>'checked'} + '2500' else - checked_sql_limit={:l1k=>'checked',:l2k=>'',:l5k=>'',:l10k=>''} + checked_sql_limit={:l1000=>'checked',:l2500=>''} '1000' end checked_echo='checked' if cgi['echo'] =~/\S/ @@ -728,10 +716,9 @@ module SiSU_CGI_sql dta='&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available=~/\S/ dtv='&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid=~/\S/ fns='&fns=' + CGI.escape(@search_for.filename) if @search_for.filename=~/\S/ - @@canned_search_url=if checked_all =~/checked/ - "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" - else "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" - end + @@canned_search_url=(checked_all =~/checked/) \ + ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" \ + : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" if checked_case=~/\S/ @@ -802,10 +789,9 @@ module SiSU_CGI_sql print "Content-type: text/html\n\n" puts (@header+@tail) else #% searches - s1=if @search_for.text1 =~/\S/ - @search_for.text1 - else 'Unavailable' - end + s1=(@search_for.text1 =~/\S/) \ + ? @search_for.text1 \ + : 'Unavailable' if checked_case=~/\S/ @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} @search[:endnotes]<<%{endnotes.clean~'#{CGI.unescape(s1)}'} @@ -845,13 +831,13 @@ module SiSU_CGI_sql #metadata_found_body if c['tid'].to_i != oldtid.to_i ti=c['title'] - can_txt_srch=if cgi['view']=~/index/; %{search } - else %{search } - end + can_txt_srch=(cgi['view']=~/index/) \ + ? %{search } \ + : %{search } title=%{toc html #{ti} by #{c['creator_author']} #{can_txt_srch}toc html epub pdf portrait pdf landscape odf manifest
} if file_suffix=~/s/ #hmm watch file_suffix - if @text_search_flag; title='

'+title - else title='
'+title - end + title=@text_search_flag \ + ? '

'+title \ + : '
'+title @counter_txt_doc+=1 oldtid=c['tid'].to_i else title='' @@ -871,10 +857,9 @@ module SiSU_CGI_sql @search_regx=if unescaped_search #check search_regex=[] build=unescaped_search.scan(/\S+/).each do |g| - if g.to_s =~/(AND|OR)/ - search_regex << '|' - else search_regex << %{#{g.to_s}} - end + (g.to_s =~/(AND|OR)/) \ + ? (search_regex << '|') \ + : (search_regex << %{#{g.to_s}}) end search_regex=search_regex.join(' ') search_regex=search_regex.gsub(/\s*\|\s*/,'|') @@ -883,10 +868,9 @@ module SiSU_CGI_sql end else nil end - matched_para=if (@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) - matched=c['body'].gsub(/(<\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{\\1}) - else c['body'] - end + matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ + ? (c['body'].gsub(/(<\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{\\1})) \ + : c['body'] %{

ocn #{c['ocn']}:

#{matched_para}} elsif c['suffix'] =~/1/ #doc %{#{title}

ocn #{c['ocn']}:#{c['body']}} @@ -906,24 +890,19 @@ module SiSU_CGI_sql end else @counter_txt_ocn+=1 - output=unless c['suffix'] =~/1/; title+index - else %{#{title}#{c['ocn'].sort}, } - end + output=c['suffix'] !~/1/ \ + ? title+index \ + : %{#{title}#{c['ocn'].sort}, } end end else output=title end @counters_txt=if @counter_txt_doc > 0 if checked_stats =~/\S/ - @@lt_t=if @counter_txt_ocn==dbi_statement.sql_match_limit.to_i - over_limit='over the limit set of' - over_this_number='more than' - true - else - over_limit,over_this_number='','' - false - end - %{


Found in the main body of #{over_this_number} #{@counter_txt_doc} documents, and at #{over_limit} #{@counter_txt_ocn} locations within.
} + @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false + start=(@@offset.to_i+1).to_s + range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s + %{
Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]
} else '' end else '' @@ -945,9 +924,9 @@ module SiSU_CGI_sql if @text_search_flag if e['metadata_tid'].to_i != oldtid.to_i ti=e['title'] - can_txt_srch=if cgi['view']=~/index/; %{search } - else %{search } - end + can_txt_srch=(cgi['view']=~/index/) \ + ? %{search } \ + : %{search } title=%{

toc html #{ti} by #{e['creator_author']} #{can_txt_srch}toc html epub pdf portrait pdf landscape odf manifest
} if file_suffix=~/s/ @counter_endn_doc+=1 oldtid=e['metadata_tid'].to_i @@ -956,10 +935,9 @@ module SiSU_CGI_sql if cgi['view']=~/text/ \ or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt endnotes @counter_endn_ocn+=1 - matched_endnote=if (@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) - matched=e['body'].gsub(/(<\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{\\1}) - else e['body'] - end + matched_endnote=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ + ? matched=e['body'].gsub(/(<\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{\\1}) \ + : e['body'] output=%{#{title}
note #{e['nr']} referred to from ocn #{e['ocn']}: #{matched_endnote}} else #elsif cgi['view']=~/index/ #% idx endnotes @counter_endn_ocn+=1 @@ -967,33 +945,28 @@ module SiSU_CGI_sql end @counters_endn=if @counter_endn_doc > 0 if checked_stats =~/\S/ - @@lt_e=if @counter_endn_ocn==dbi_statement.sql_match_limit.to_i - over_limit='over the limit set of' - over_this_number='more than' - true - else - over_limit,over_this_number='','' - false - end - %{Found in the endnotes of #{over_this_number} #{@counter_endn_doc} documents, and at #{over_limit} #{@counter_endn_ocn} locations within.
} - else '' - end + @@lt_e=@counter_endn_ocn==dbi_statement.sql_match_limit.to_i \ + ? true \ + : false + start=(@@offset.to_i+1).to_s + range=(@@offset.to_i+@counter_endn_ocn.to_i).to_s + %{Found #{@counter_endn_ocn} times in the endnotes of #{@counter_endn_doc} documents [ matches #{start} to #{range} ]
} + else '' end - @endnotes << output #+ details - else @endnotes=[] #does not take out yet end + @endnotes << output #+ details + else @endnotes=[] #does not take out yet end - offset=dbi_statement.sql_offset.to_s - limit=dbi_statement.sql_match_limit.to_s - @@lt_t ||=false; @@lt_e ||=false - canned=if (@@lt_t or @@lt_e) - dbi_statement.pre_next(true,@image_src).to_s - else - dbi_statement.pre_next(false,@image_src).to_s - end - limit=dbi_statement.sql_match_limit.to_s - cgi.out{@header + @counters_txt + @counters_endn + @body_main.join + @endnotes.join + canned + @tail} #% print cgi_output_header+counters+body+endnotes end + offset=dbi_statement.sql_offset.to_s + limit=dbi_statement.sql_match_limit.to_s + @@lt_t ||=false; @@lt_e ||=false + canned=(@@lt_t or @@lt_e) \ + ? dbi_statement.pre_next(true,@image_src).to_s \ + : dbi_statement.pre_next(false,@image_src).to_s + limit=dbi_statement.sql_match_limit.to_s + cgi.out{@header + @counters_txt + @counters_endn + canned + @body_main.join + @endnotes.join + canned + @tail} #% print cgi_output_header+counters+body+endnotes + end rescue Exception => e s='
' + CGI::escapeHTML(e.backtrace.reverse.join("\n"))
           s << CGI::escapeHTML(e.message) + '
' -- cgit v1.2.3 From fb823467d03d7b8319c96229090850bbf2265d56 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 17 Apr 2010 21:03:04 -0400 Subject: db_dbi, cosmetic --- lib/sisu/v2/db_dbi.rb | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/db_dbi.rb b/lib/sisu/v2/db_dbi.rb index 513b54a3..1a3825b9 100644 --- a/lib/sisu/v2/db_dbi.rb +++ b/lib/sisu/v2/db_dbi.rb @@ -58,36 +58,36 @@ =end module SiSU_DB_DBI - require "#{SiSU_lib}/db_columns" # db_columns.rb - require "#{SiSU_lib}/db_tests" # db_tests.rb - require "#{SiSU_lib}/db_create" # db_create.rb - require "#{SiSU_lib}/db_select" # db_select.rb - require "#{SiSU_lib}/db_indexes" # db_indexes.rb - require "#{SiSU_lib}/db_drop" # db_drop.rb - require "#{SiSU_lib}/db_remove" # db_remove.rb - require "#{SiSU_lib}/db_load_tuple" # db_load_tuple.rb - require "#{SiSU_lib}/db_import" # db_import.rb - class Column_size < SiSU_DB_columns::Column_size + require "#{SiSU_lib}/db_columns" # db_columns.rb + require "#{SiSU_lib}/db_tests" # db_tests.rb + require "#{SiSU_lib}/db_create" # db_create.rb + require "#{SiSU_lib}/db_select" # db_select.rb + require "#{SiSU_lib}/db_indexes" # db_indexes.rb + require "#{SiSU_lib}/db_drop" # db_drop.rb + require "#{SiSU_lib}/db_remove" # db_remove.rb + require "#{SiSU_lib}/db_load_tuple" # db_load_tuple.rb + require "#{SiSU_lib}/db_import" # db_import.rb + class Column_size < SiSU_DB_columns::Column_size # db_columns.rb end - class Test < SiSU_DB_tests::Test #% db_tests.rb + class Test < SiSU_DB_tests::Test # db_tests.rb end - class Create Date: Sat, 17 Apr 2010 21:09:10 -0400 Subject: db_sqltxt, issue with: escaping backslashes (that works for both pgsql & sqlite); missing author and/or title (where called in first heading) --- lib/sisu/v2/db_sqltxt.rb | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/db_sqltxt.rb b/lib/sisu/v2/db_sqltxt.rb index f120b95f..68e9ef8a 100644 --- a/lib/sisu/v2/db_sqltxt.rb +++ b/lib/sisu/v2/db_sqltxt.rb @@ -62,6 +62,7 @@ module SiSU_DB_text class Prepare def special_character_escape(str) str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") + str.gsub!(/(\\)/m,'\1\1') #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"
\n") str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2') @@ -76,8 +77,22 @@ module SiSU_DB_text s.gsub!(/^(?:group|poem|code)\{/,''); s.gsub!(/^\}(?:group|poem|code)/,'') s.gsub!(/\A(?:@\S+:\s+.+)\Z/m,'') if s =~/^:A~/ - s.gsub!(/@author/,@md.creator.author) - s.gsub!(/@title/,@md.title.full) + if defined? @md.creator \ + and defined? @md.creator.author \ + and not @md.creator.author.empty? + s.gsub!(/@author/,@md.creator.author) + else + tell=SiSU_Screen::Ansi.new('v','WARNING Document Author information missing; provide @creator: :author:') + tell.warn + end + if defined? @md.title \ + and defined? @md.title.full \ + and not @md.title.full.empty? + s.gsub!(/@title/,@md.title.full) + else + tell=SiSU_Screen::Ansi.new('v','WARNING Document Title missing; provide @title:') + tell.warn + end end s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/,'') s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/,'') -- cgit v1.2.3 From 85eb3805118bc57b866e6c545cbfa8d24c86f626 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 17 Apr 2010 21:20:02 -0400 Subject: db_create, on processing screen output appropriate display for pgsql & sqlite, minor fix * previously reported pgsql info for sqlite processing --- lib/sisu/v2/db_create.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/sisu/v2/db_create.rb b/lib/sisu/v2/db_create.rb index 8ed638dc..1fc00168 100644 --- a/lib/sisu/v2/db_create.rb +++ b/lib/sisu/v2/db_create.rb @@ -80,7 +80,9 @@ module SiSU_DB_create end def create_db @env=SiSU_Env::Info_env.new(@opt.fns) - tell=SiSU_Screen::Ansi.new(@opt.cmd,'invert','Create PG db:',%{"SiSU_#{@env.path.stub_pwd}"}) + tell=(@sql_type=='sqlite') \ + ? SiSU_Screen::Ansi.new(@opt.cmd,'invert','Create Sqlite db tables in:',%{"#{@env.path.output}/sisu_sqlite.db"}) \ + : SiSU_Screen::Ansi.new(@opt.cmd,'invert','Create PG db tables in:',%{"#{Db[:name_prefix]}#{@env.path.stub_pwd}"}) tell.colorize unless @opt.cmd =~/q/ SiSU_Env::System_call.new.create_pg_db(@env.path.stub_pwd) if @sql_type=='pg' #watch use of path.stub_pwd instead of stub end -- cgit v1.2.3 From 9f5b2818972fc01ffd992b2635765c982f644e24 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 17 Apr 2010 21:38:33 -0400 Subject: db_import sqlite, prevent multiple import of file with same filename(.sst) (cgi form should work correctly, without multiple entries) db_import, db_remove: * wrap some sqlite actions in sqlite check; * cosmetic code arrangement --- lib/sisu/v2/db_import.rb | 32 ++++++++++++++++---------------- lib/sisu/v2/db_remove.rb | 10 +++++++--- 2 files changed, 23 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb index 5610a1d0..45aca11b 100644 --- a/lib/sisu/v2/db_import.rb +++ b/lib/sisu/v2/db_import.rb @@ -86,14 +86,17 @@ module SiSU_DB_import @col[:ocn]='' @counter={} @db=SiSU_Env::Info_db.new - @driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) ? true : false + if @sql_type=='sqlite' + @driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \ + ? true \ + : false + end sql='SELECT MAX(lid) FROM doc_objects' begin @col[:lid] ||=0 - @col[:lid]=if @driver_sqlite3 - @conn.execute( sql ).join.to_i - else @conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } - end + @col[:lid]=@driver_sqlite3 \ + ? @conn.execute( sql ).join.to_i \ + : @conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } rescue puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/ end @@ -101,11 +104,9 @@ module SiSU_DB_import sql='SELECT MAX(nid) FROM endnotes' begin @id_n ||=0 - @id_n=if @driver_sqlite3 - @conn.execute( sql ).join.to_i - else - @id_n=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } - end + @id_n=@driver_sqlite3 \ + ? @conn.execute( sql ).join.to_i \ + : @id_n=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } rescue puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/ end @@ -121,12 +122,11 @@ module SiSU_DB_import tell.puts_blue unless @opt.cmd =~/q/ tell=SiSU_Screen::Ansi.new(@opt.cmd,'Marshal Load',@fnc) tell.print_grey if @opt.cmd =~/v/ - file_exist=if @sql_type=~/sqlite/; nil - else - @conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; }) - end - if (@sql_type!~/sqlite/ and not file_exist) \ - or @sql_type=~/sqlite/ + select_first_match=%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; } + file_exist=@sql_type=~/sqlite/ \ + ? @conn.get_first_value(select_first_match) \ + : @conn.select_one(select_first_match) + if not file_exist t_d=[] # transaction_data t_d << db_import_metadata t_d << db_import_documents(@dal_array) diff --git a/lib/sisu/v2/db_remove.rb b/lib/sisu/v2/db_remove.rb index 0a51b892..e7942a15 100644 --- a/lib/sisu/v2/db_remove.rb +++ b/lib/sisu/v2/db_remove.rb @@ -59,14 +59,18 @@ =end module SiSU_DB_remove class Remove - def initialize(opt,conn,file) - @opt,@conn,@file=opt,conn,file + def initialize(opt,conn,file,sql_type) + @opt,@conn,@file,@sql_type=opt,conn,file,sql_type @md=SiSU_Param::Parameters.new(@opt).get @fnb=@md.fnb @db=SiSU_Env::Info_db.new end def remove - driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) ? true : false + driver_sqlite3=if @sql_type=='sqlite' + (@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \ + ? true \ + : false + end del_id=if driver_sqlite3 @conn.get_first_value(%{ SELECT tid FROM metadata_and_text WHERE filename = '#{@opt.fns}'; }).to_i else -- cgit v1.2.3 From d1eeab54588f762adfa492ace63bfd09f4e1e350 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 17 Apr 2010 21:49:35 -0400 Subject: db_select, check for whether db exists before attempting certain actions (note this does not check whether tables have been created) --- lib/sisu/v2/db_select.rb | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/db_select.rb b/lib/sisu/v2/db_select.rb index 1ac9195f..87452c5c 100644 --- a/lib/sisu/v2/db_select.rb +++ b/lib/sisu/v2/db_select.rb @@ -61,16 +61,30 @@ module SiSU_DB_select class Case def initialize(opt,conn='',sql_type='pg') @opt,@conn,@sql_type=opt,conn,sql_type - @file=sql_maintenance_file @db=SiSU_Env::Info_db.new + @file=sql_maintenance_file @sdb=SiSU_DB_DBI::Create.new(@opt,@conn,@file,@sql_type) # db_dbi.rb @sdb_index=SiSU_DB_DBI::Index.new(@opt,@conn,@file,@sql_type) # db_dbi.rb @sdb_no=SiSU_DB_DBI::Drop.new(@opt,@conn,@db,@sql_type) # db_dbi.rb if @opt.mod.inspect =~/update|import/ @sdb_import=SiSU_DB_DBI::Import.new(@opt,@conn,@file,@sql_type) - @sdb_remove_doc=SiSU_DB_DBI::Remove.new(@opt,@conn,@file) + @sdb_remove_doc=SiSU_DB_DBI::Remove.new(@opt,@conn,@file,@sql_type) elsif @opt.mod.inspect =~/remove/ - @sdb_remove_doc=SiSU_DB_DBI::Remove.new(@opt,@conn,@file) + @sdb_remove_doc=SiSU_DB_DBI::Remove.new(@opt,@conn,@file,@sql_type) + end + end + def db_exist? + if @sql_type=='sqlite' \ + and (not (FileTest.file?(@db.sqlite.db)) or FileTest.zero?(@db.sqlite.db)) + puts %{no connection with #{@sql_type} database established, createdb "#{@db.sqlite.db}"?} + exit + end + if @conn.class==NilClass + db=@sql_type=='sqlite' \ + ? @db.sqlite.db \ + : @db.psql.db + puts %{no connection with #{@sql_type} database established, createdb "#{db}"?} + exit end end def sql_maintenance_file @@ -109,7 +123,7 @@ module SiSU_DB_select @sdb_index.create_indexes rescue; SiSU_Errors::Info_error.new($!,$@,'-D').error; @sdb.output_dir? end - when /^--createtable(s)?$/ + when /^--createtables?$/ @sdb.output_dir? begin @sdb.create_table.metadata_and_text @@ -147,6 +161,7 @@ module SiSU_DB_select rescue; @sdb.output_dir? end when /^--import$/ + db_exist? @sdb_import.marshal_load tell=case @sql_type when /sqlite/; SiSU_Screen::Ansi.new(@opt.cmd,"sqlite #{@db.sqlite.db} database?") @@ -155,19 +170,25 @@ module SiSU_DB_select end tell.puts_grey if @opt.cmd =~/v/ when /^--update$/ + db_exist? @sdb_remove_doc.remove @sdb_import.marshal_load tell=SiSU_Screen::Ansi.new(@opt.cmd,"pgaccess or psql #{@db.psql.db} database?") tell.puts_grey if @opt.cmd =~/v/ when /^--remove$/ + db_exist? @sdb_remove_doc.remove when /^--index$/ + db_exist? @sdb_index.create_indexes when /^droptable(s)?$/ + db_exist? @sdb_no.drop.tables when /^--dropindex(es)?$/ + db_exist? @sdb_no.drop.indexes when /^--(?:dropall|drop)$/ + db_exist? @sdb_no.drop.tables when /^--(?:db=)?(?:(?:sq)?lite|pg(?:sql)?|my(?:sql)?)$/ else -- cgit v1.2.3 From 9d34c408a77c6ef447aaabb7c4c31f69fc0c82a8 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 19 Apr 2010 20:45:46 -0400 Subject: shared_html_lite, markup heading objects --- lib/sisu/v2/shared_html_lite.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/shared_html_lite.rb b/lib/sisu/v2/shared_html_lite.rb index a972e379..6303691b 100644 --- a/lib/sisu/v2/shared_html_lite.rb +++ b/lib/sisu/v2/shared_html_lite.rb @@ -188,11 +188,13 @@ GSUB tag_para(h) end def lev4_plus - h={:txt =>@txt,:class =>"h#{@lv}",:type =>'substantive',:id =>@ocn,:header =>@hname} + txt=markup_object(@t_o) + h={:txt =>txt,:class =>"h#{@lv}",:type =>'substantive',:id =>@ocn,:header =>@hname} tag_header(h) end def lev4_minus - h={:txt =>@t_o.obj,:class =>"h#{@t_o.ln}",:type =>'substantive',:id =>@t_o.ocn} + txt=markup_object(@t_o) + h={:txt =>txt,:class =>"h#{@t_o.ln}",:type =>'substantive',:id =>@ocn} tag_para(h) end def norm_comment -- cgit v1.2.3 From 7fc56d1de62d33980a9415d50792747ba4159d36 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 20 Apr 2010 18:00:48 -0400 Subject: dal, minor cosmetic re-arrangement --- lib/sisu/v2/dal.rb | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/dal.rb b/lib/sisu/v2/dal.rb index 6fd23b60..b0a9df5d 100644 --- a/lib/sisu/v2/dal.rb +++ b/lib/sisu/v2/dal.rb @@ -210,6 +210,12 @@ module SiSU_DAL protected def create_dal dal_array=[] + unless @opt.cmd =~/q/ + tell=(@opt.cmd=~/[vVM]/) \ + ? SiSU_Screen::Ansi.new(@opt.cmd,'Document Abstraction') \ + : SiSU_Screen::Ansi.new(@opt.cmd,'Document Abstraction',@opt.fns) + tell.green_title_hi + end file_array=@env.read_source_file(@opt.fns) file_array.each do |l| if l =~/\r\n/; l.gsub!(/\r\n/,"\n") @@ -218,18 +224,12 @@ module SiSU_DAL meta=file_array.dup meta=meta.join.split("\n\n") #check whether can be eliminated, some of these are large objects to have twice @md=SiSU_Param::Parameters::Instructions.new(meta,@opt).extract - unless @opt.cmd =~/q/ - tell=(@md.cmd=~/[vVM]/) \ - ? SiSU_Screen::Ansi.new(@opt.cmd,'Document Abstraction') \ - : SiSU_Screen::Ansi.new(@opt.cmd,'Document Abstraction',@md.fns) - tell.green_title_hi - end meta=nil dal=SiSU_DAL::Make.new(@md,file_array).song - if @md.cmd =~/[vM]/ - SiSU_Screen::Ansi.new(@md.cmd,@md.fns,"~meta/#{@md.fns}.meta").output if @md.cmd =~/v/i - tell=SiSU_Screen::Ansi.new(@md.cmd,"dal -> #{@make_fns.meta}") if @md.cmd =~/M/ - tell.txt_grey unless @md.cmd =~/q/ + if @opt.cmd =~/[vM]/ + SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"~meta/#{@opt.fns}.meta").output if @opt.cmd =~/v/i + tell=SiSU_Screen::Ansi.new(@opt.cmd,"dal -> #{@make_fns.meta}") if @opt.cmd =~/M/ + tell.txt_grey unless @opt.cmd =~/q/ end dal.each{|s| dal_array << s} dal_array -- cgit v1.2.3 From 63c5a3cead1fb5cbd9b1bff653f269dce8d8052c Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 20 Apr 2010 19:01:55 -0400 Subject: db name, tables, columns, indexes changes, review (need another version bump 2.2.0) * db (sql) table structure, further review and changes (hence breakage & version bump) * new pgsql db name prefix "sisu_v2b_" * new table column words in doc_objects & endnotes, VARCHAR 3000 to contain list of unique sorted words in object * increase use of VARCHAR * constants takes on related additions * param, extensive db column size checks for metadata --- lib/sisu/v2/constants.rb | 7 +- lib/sisu/v2/db_columns.rb | 158 ++++++++++++++++++++++++++++--------------- lib/sisu/v2/db_create.rb | 27 +++++--- lib/sisu/v2/db_drop.rb | 72 ++++++++++++-------- lib/sisu/v2/db_import.rb | 120 ++++++++++++++++++-------------- lib/sisu/v2/db_indexes.rb | 52 +++++++------- lib/sisu/v2/db_load_tuple.rb | 32 +++++---- lib/sisu/v2/db_remove.rb | 4 +- lib/sisu/v2/db_sqltxt.rb | 34 ++++++---- lib/sisu/v2/param.rb | 57 +++++++++++----- 10 files changed, 349 insertions(+), 214 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/constants.rb b/lib/sisu/v2/constants.rb index 3fcb1e3a..9a24736c 100644 --- a/lib/sisu/v2/constants.rb +++ b/lib/sisu/v2/constants.rb @@ -131,8 +131,8 @@ Px[:lv4]= '-' Px[:lv5]= '.' Px[:lv6]= '.' #Px[:lv5_6]= '.' -Db[:name_prefix]="SiSU#{SiSU_version_dir}a_" -Db[:name_prefix_db]="sisu_#{SiSU_version_dir}a_" +Db[:name_prefix]="SiSU#{SiSU_version_dir}b_" +Db[:name_prefix_db]="sisu_#{SiSU_version_dir}b_" Db[:col_title]=800 Db[:col_title_part]=400 Db[:col_title_edition]=10 @@ -148,6 +148,9 @@ Db[:col_classify_identify]=256 Db[:col_classify_library]=30 Db[:col_classify_small]=16 Db[:col_filename]=256 +Db[:col_digest]=64 +Db[:col_filesize]=10 +Db[:col_info_note]=3000 __END__ consider: 〔comment〕 diff --git a/lib/sisu/v2/db_columns.rb b/lib/sisu/v2/db_columns.rb index ee66c59e..0c2eb367 100644 --- a/lib/sisu/v2/db_columns.rb +++ b/lib/sisu/v2/db_columns.rb @@ -208,7 +208,7 @@ module SiSU_DB_columns 'title_note' end def create_column - "#{name} TEXT NULL," + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1119,7 +1119,7 @@ module SiSU_DB_columns 'rights' end def create_column - "#{name} TEXT NULL," + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1141,7 +1141,7 @@ module SiSU_DB_columns 'rights_copyright_text' end def create_column - "#{name} TEXT NULL," + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1163,7 +1163,7 @@ module SiSU_DB_columns 'rights_copyright_translation' end def create_column - "#{name} TEXT NULL," + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1185,7 +1185,7 @@ module SiSU_DB_columns 'rights_copyright_illustrations' end def create_column - "#{name} TEXT NULL," + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1207,7 +1207,7 @@ module SiSU_DB_columns 'rights_copyright_photographs' end def create_column - "#{name} TEXT NULL," + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1229,7 +1229,7 @@ module SiSU_DB_columns 'rights_copyright_preparation' end def create_column - "#{name} TEXT NULL," + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1251,7 +1251,7 @@ module SiSU_DB_columns 'rights_copyright_digitization' end def create_column - "#{name} TEXT NULL," + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1273,7 +1273,7 @@ module SiSU_DB_columns 'rights_copyright_audio' end def create_column - "#{name} TEXT NULL," + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1295,7 +1295,7 @@ module SiSU_DB_columns 'rights_copyright_video' end def create_column - "#{name} TEXT NULL," + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1317,7 +1317,7 @@ module SiSU_DB_columns 'rights_license' end def create_column - "#{name} TEXT NULL," + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1355,8 +1355,7 @@ module SiSU_DB_columns 'classify_topic_register' end def create_column - "#{name} VARCHAR(#{Db[:col_classify_txt_long]}) NULL," - #"#{name} TEXT NULL," + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1804,21 +1803,18 @@ module SiSU_DB_columns self end =begin -#% misc -@make: - :skin: -@links: +#% src =end - def filename + def src_filename def name - 'filename' + 'src_filename' end def create_column "#{name} VARCHAR(#{Db[:col_filename]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} - IS 'metadata document filename';} + IS 'sisu markup source text filename';} end def tuple t=if defined? @md.fns \ @@ -1831,56 +1827,61 @@ module SiSU_DB_columns end self end - def sisutxt # consider naming sisusrc + def src_fingerprint def name - 'sisutxt' + 'src_fingerprint' #hash/digest, sha256 or md5 end def create_column - "#{name} TEXT NULL," + "#{name} VARCHAR(#{Db[:col_digest]}) NULL," + #"#{name} TEXT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} - IS 'sisu markup text (if shared)';} + IS 'sisu markup source text fingerprint, hash digest sha256 (or md5)';} end def tuple - t=if @md.mod.inspect=~/import|update/ \ - and FileTest.exist?(@md.fns) - ["#{name}, ","'#{@sisutxt}', "] + t=if defined? @md.dgst \ + and @md.dgst.class==Array \ + and @md.dgst[1]=~/\S+/ + txt=@md.dgst[1] + ["#{name}, ","'#{txt}', "] else ['',''] end end self end - def fulltext + def src_filesize def name - 'fulltext' + 'src_filesize' end def create_column - "#{name} TEXT NULL," + "#{name} VARCHAR(#{Db[:col_filesize]}) NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} - IS 'document full text clean, searchable';} + IS 'sisu markup source text file size';} end def tuple - t=if @md.mod.inspect=~/import|update/ \ - and FileTest.exist?(@md.fns) - ["#{name}, ","'#{@fulltext}', "] - else ['',''] - end + t=if defined? @md.filesize \ + and @md.filesize=~/\S+/ + txt=@md.filesize + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end end self end - def word_count + def src_word_count def name - 'word_count' + 'src_word_count' end def create_column "#{name} TEXT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} - IS 'document word count';} + IS 'sisu markup source text word count';} end def tuple t=if defined? @md.wc_words \ @@ -1893,23 +1894,47 @@ module SiSU_DB_columns end self end - def digest + def src_txt # consider naming sisusrc def name - 'dgst' + 'src_text' end def create_column "#{name} TEXT NULL," end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} - IS 'document hash digest sha256 (or md5)';} + IS 'sisu markup source text (if shared)';} end def tuple - t=if defined? @md.dgst \ - and @md.dgst=~/\S+/ - txt=@md.dgst - special_character_escape(txt) - ["#{name}, ","'#{txt}', "] + t=if @md.mod.inspect=~/import|update/ \ + and FileTest.exist?(@md.fns) + ["#{name}, ","'#{@sisutxt}', "] + else ['',''] + end + end + self + end +=begin +#% misc +@make: + :skin: +@links: +=end + def fulltext + def name + 'fulltext' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'document full text clean, searchable';} + end + def tuple + t=if @md.mod.inspect=~/import|update/ \ + and FileTest.exist?(@md.fns) + ["#{name}, ","'#{@fulltext}', "] else ['',''] end end @@ -1924,12 +1949,35 @@ module SiSU_DB_columns end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} - IS 'metadata document skin name';} + IS 'source text skin name';} + end + def tuple + t=if defined? @md.skin_name \ + and @md.skin_name=~/\S+/ + txt=@md.skin_name + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def skin_fingerprint #check + def name + 'skin_fingerprint' + end + def create_column + "#{name} VARCHAR(#{Db[:col_digest]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'source text skin fingerprint';} end def tuple - t=if defined? @md.notes.skin_name \ - and @md.notes.skin_name=~/\S+/ - txt=@md.notes.skin_name + t=if defined? @md.dgst_skin \ + and @md.dgst_skin.class==Array \ + and @md.dgst_skin[1]=~/\S+/ + txt=@md.dgst_skin[1] special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] @@ -1946,7 +1994,7 @@ module SiSU_DB_columns end def column_comment %{COMMENT ON COLUMN metadata_and_text.#{name} - IS 'metadata document skin';} + IS 'source text skin';} end def tuple t=if defined? @md.skin \ @@ -1972,9 +2020,9 @@ module SiSU_DB_columns IS 'metadata document links';} end def tuple - t=if defined? @md.notes.links \ - and @md.notes.links=~/\S+/ - txt=@md.notes.links + t=if defined? @md.links \ + and @md.links=~/\S+/ + txt=@md.links special_character_escape(txt) ["#{name}, ","'#{txt}', "] else ['',''] diff --git a/lib/sisu/v2/db_create.rb b/lib/sisu/v2/db_create.rb index 1fc00168..c1fed045 100644 --- a/lib/sisu/v2/db_create.rb +++ b/lib/sisu/v2/db_create.rb @@ -182,13 +182,16 @@ module SiSU_DB_create #{column.notes_prefix_a.create_column} #{column.notes_prefix_b.create_column} #{column.notes_suffix.create_column} + /* src */ + #{column.src_filename.create_column} + #{column.src_fingerprint.create_column} + #{column.src_filesize.create_column} + #{column.src_word_count.create_column} + #{column.src_txt.create_column} /* misc */ - #{column.filename.create_column} - #{column.sisutxt.create_column} #{column.fulltext.create_column} - #{column.word_count.create_column} - #{column.digest.create_column} #{column.skin_name.create_column} + #{column.skin_fingerprint.create_column} #{column.skin.create_column} #{column.links.create_column.gsub(/,$/,'')} /* subj VARCHAR(64) NULL, */ @@ -215,7 +218,8 @@ module SiSU_DB_create ocns VARCHAR(6), clean TEXT NULL, body TEXT NULL, - seg VARCHAR(120) NULL, + words VARCHAR(3000) NULL, + seg VARCHAR(256) NULL, lev_an VARCHAR(1), lev SMALLINT NULL, lev1 SMALLINT, @@ -254,6 +258,7 @@ module SiSU_DB_create nr SMALLINT, clean TEXT NULL, body TEXT NULL, + words VARCHAR(3000) NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), @@ -276,6 +281,7 @@ module SiSU_DB_create nr SMALLINT, clean TEXT NULL, body TEXT NULL, + words VARCHAR(3000) NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), @@ -298,6 +304,7 @@ module SiSU_DB_create nr SMALLINT, clean TEXT NULL, body TEXT NULL, + words VARCHAR(3000) NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), @@ -430,12 +437,14 @@ module SiSU_DB_create %{#{column.notes_prefix_a.column_comment}}, %{#{column.notes_prefix_b.column_comment}}, %{#{column.notes_suffix.column_comment}}, - %{#{column.filename.column_comment}}, - %{#{column.sisutxt.column_comment}}, + %{#{column.src_filename.column_comment}}, + %{#{column.src_fingerprint.column_comment}}, + %{#{column.src_filesize.column_comment}}, + %{#{column.src_word_count.column_comment}}, + %{#{column.src_txt.column_comment}}, %{#{column.fulltext.column_comment}}, - %{#{column.word_count.column_comment}}, - %{#{column.digest.column_comment}}, %{#{column.skin_name.column_comment}}, + %{#{column.skin_fingerprint.column_comment}}, %{#{column.skin.column_comment}}, %{#{column.links.column_comment}}, ] diff --git a/lib/sisu/v2/db_drop.rb b/lib/sisu/v2/db_drop.rb index 7189da56..35d971af 100644 --- a/lib/sisu/v2/db_drop.rb +++ b/lib/sisu/v2/db_drop.rb @@ -133,37 +133,49 @@ module SiSU_DB_drop ensure end end + def conn_execute_array(sql_arr) + @conn.transaction do |conn| + sql_arr.each do |sql| + conn.execute(sql) + end + end + end def indexes #% drop all indexes - #@conn.do(%{ - # DROP INDEX object_nr ON doc_objects(ocn); - # DROP INDEX body ON doc_objects(body); - # DROP INDEX clean ON doc_objects(clean); - # DROP INDEX lev1 ON doc_objects(lev1); - # DROP INDEX lev2 ON doc_objects(lev2); - # DROP INDEX lev3 ON doc_objects(lev3); - # DROP INDEX lev4 ON doc_objects(lev4); - # DROP INDEX lev5 ON doc_objects(lev5); - # DROP INDEX lev6 ON doc_objects(lev6); - # DROP INDEX endnote_nr ON endnotes(nr); - # DROP INDEX endnote ON endnotes(body); - # DROP INDEX title ON metadata_and_text(title); - # DROP INDEX filename ON metadata_and_text(filename) - # /* - # DROP INDEX object_nr ON doc_objects(ocn) CASCADE; - # DROP INDEX body ON doc_objects(body) CASCADE; - # DROP INDEX clean ON doc_objects(clean) CASCADE; - # DROP INDEX lev1 ON doc_objects(lev1) CASCADE; - # DROP INDEX lev2 ON doc_objects(lev2) CASCADE; - # DROP INDEX lev3 ON doc_objects(lev3) CASCADE; - # DROP INDEX lev4 ON doc_objects(lev4) CASCADE; - # DROP INDEX lev5 ON doc_objects(lev5) CASCADE; - # DROP INDEX lev6 ON doc_objects(lev6) CASCADE; - # DROP INDEX endnote_nr ON endnotes(nr) CASCADE; - # DROP INDEX endnote ON endnotes(body) CASCADE; - # DROP INDEX title ON metadata_and_text(title) CASCADE; - # DROP INDEX filename ON metadata_and_text(filename) CASCADE - # */ - #}) + print "\n drop documents common indexes\n" unless @opt.cmd =~/q/ + sql_arr=[ + %{DROP INDEX idx_text_words;}, + %{DROP INDEX idx_title;}, + %{DROP INDEX idx_author;}, + %{DROP INDEX idx_filename;}, + %{DROP INDEX idx_topics;}, + %{DROP INDEX idx_ocn;}, + %{DROP INDEX idx_digest_clean;}, + %{DROP INDEX idx_digest_all;}, + %{DROP INDEX idx_lev1;}, + %{DROP INDEX idx_lev2;}, + %{DROP INDEX idx_lev3;}, + %{DROP INDEX idx_lev4;}, + %{DROP INDEX idx_lev5;}, + %{DROP INDEX idx_lev6;}, + %{DROP INDEX idx_endnote_words;}, + %{DROP INDEX idx_endnote_nr;}, + %{DROP INDEX idx_digest_en;}, + %{DROP INDEX idx_endnote_words_asterisk;}, + %{DROP INDEX idx_endnote_nr_asterisk;}, + %{DROP INDEX idx_endnote_asterisk;}, + %{DROP INDEX idx_digest_en_asterisk;}, + %{DROP INDEX idx_endnote_words_plus;}, + %{DROP INDEX idx_endnote_nr_plus;}, + %{DROP INDEX idx_endnote_plus;}, + %{DROP INDEX idx_digest_en_plus}, + ] + conn_execute_array(sql_arr) + print "\n drop documents TEXT indexes\n" unless @opt.cmd =~/q/ + sql_arr=[ + %{DROP INDEX idx_clean;}, + %{DROP INDEX idx_endnote}, + ] + conn_execute_array(sql_arr) end self end diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb index 45aca11b..e351f6fc 100644 --- a/lib/sisu/v2/db_import.rb +++ b/lib/sisu/v2/db_import.rb @@ -122,7 +122,7 @@ module SiSU_DB_import tell.puts_blue unless @opt.cmd =~/q/ tell=SiSU_Screen::Ansi.new(@opt.cmd,'Marshal Load',@fnc) tell.print_grey if @opt.cmd =~/v/ - select_first_match=%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; } + select_first_match=%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.src_filename = '#{@opt.fns}'; } file_exist=@sql_type=~/sqlite/ \ ? @conn.get_first_value(select_first_match) \ : @conn.select_one(select_first_match) @@ -265,7 +265,10 @@ module SiSU_DB_import @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_minus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup - strip_markup(@col[:plaintext]) + @col[:plaintext]=strip_markup(@col[:plaintext]) + @col[:plaintext]=clean_searchable_text(@col[:plaintext]) + @col[:words]=@col[:plaintext].dup + @col[:words]=unique_words(@col[:words]) if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last end if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last @@ -298,7 +301,10 @@ module SiSU_DB_import @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup - strip_markup(@col[:plaintext]) + @col[:plaintext]=strip_markup(@col[:plaintext]) + @col[:plaintext]=clean_searchable_text(@col[:plaintext]) + @col[:words]=@col[:plaintext].dup + @col[:words]=unique_words(@col[:words]) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -324,7 +330,10 @@ module SiSU_DB_import @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup - strip_markup(@col[:plaintext]) + @col[:plaintext]=strip_markup(@col[:plaintext]) + @col[:plaintext]=clean_searchable_text(@col[:plaintext]) + @col[:words]=@col[:plaintext].dup + @col[:words]=unique_words(@col[:words]) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -350,7 +359,10 @@ module SiSU_DB_import @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup - strip_markup(@col[:plaintext]) + @col[:plaintext]=strip_markup(@col[:plaintext]) + @col[:plaintext]=clean_searchable_text(@col[:plaintext]) + @col[:words]=@col[:plaintext].dup + @col[:words]=unique_words(@col[:words]) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -389,12 +401,15 @@ module SiSU_DB_import end special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup - strip_markup(@col[:plaintext]) + @col[:plaintext]=strip_markup(@col[:plaintext]) + @col[:plaintext]=clean_searchable_text(@col[:plaintext]) + @col[:words]=@col[:plaintext].dup + @col[:words]=unique_words(@col[:words]) t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) @tuple_array << t.tuple @en,@en_ast,@en_pls=[],[],[] @col[:en_a]=@col[:en_z]=nil - @col[:lev]=@col[:plaintext]=@col[:body]='' + @col[:lev]=@col[:plaintext]=@col[:body]=@col[:words]='' end if notedata =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ #% import into database endnotes tables endnote_array=notedata.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) @@ -406,9 +421,9 @@ module SiSU_DB_import @id_n+=1 special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) - #special_character_escape(body) - #special_character_escape(txt) strip_markup(txt) + words=txt.dup + words=unique_words(words) if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| @@ -418,16 +433,17 @@ module SiSU_DB_import end if txt en={ :type => 'endnotes', - :id => @id_n, - :lid => @col[:lid], - :nr => nr, - :txt => txt, - :body => body, - :ocn => @col[:ocn], - :ocnd => @col[:ocnd], - :ocns => @col[:ocns], - :id_t => @@id_t, - :hash => digest_clean + :id => @id_n, + :lid => @col[:lid], + :nr => nr, + :txt => txt, + :body => body, + :words => words, + :ocn => @col[:ocn], + :ocnd => @col[:ocnd], + :ocns => @col[:ocns], + :id_t => @@id_t, + :hash => digest_clean } t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file) @tuple_array << t.tuple @@ -447,6 +463,8 @@ module SiSU_DB_import special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) strip_markup(txt) + words=txt.dup + words=unique_words(words) if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| @@ -456,16 +474,17 @@ module SiSU_DB_import end if txt en={ :type => 'endnotes_asterisk', - :id => @id_n, - :lid => @col[:lid], - :nr => nr, - :txt => txt, - :body => body, - :ocn => @col[:ocn], - :ocnd => @col[:ocnd], - :ocns => @col[:ocns], - :id_t => @@id_t, - :hash => digest_clean + :id => @id_n, + :lid => @col[:lid], + :nr => nr, + :txt => txt, + :body => body, + :words => words, + :ocn => @col[:ocn], + :ocnd => @col[:ocnd], + :ocns => @col[:ocns], + :id_t => @@id_t, + :hash => digest_clean } t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file) @tuple_array << t.tuple @@ -485,6 +504,8 @@ module SiSU_DB_import special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) strip_markup(txt) + words=txt.dup + words=unique_words(words) if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| @@ -494,16 +515,17 @@ module SiSU_DB_import end if txt en={ :type => 'endnotes_plus', - :id => @id_n, - :lid => @col[:lid], - :nr => nr, - :txt => txt, - :body => body, - :ocn => @col[:ocn], - :ocnd => @col[:ocnd], - :ocns => @col[:ocns], - :id_t => @@id_t, - :hash => digest_clean + :id => @id_n, + :lid => @col[:lid], + :nr => nr, + :txt => txt, + :body => body, + :words => words, + :ocn => @col[:ocn], + :ocnd => @col[:ocnd], + :ocns => @col[:ocns], + :id_t => @@id_t, + :hash => digest_clean } t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file) @tuple_array << t.tuple @@ -526,25 +548,25 @@ module SiSU_DB_import endnotes(@txt).range @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ - @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/ + @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ @txt=endnotes(@txt).clean_text end @txt end def standard - x=if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) - else nil - end + x=(@txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) \ + ? @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) \ + : nil end def asterisk - x=if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/; @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/) - else nil - end + x=(@txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) \ + ? @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/) \ + : nil end def plus - x=if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/; @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/) - else nil - end + x=(@txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) \ + ? @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/) \ + : nil end def clean_text(base_url=nil) if base_url diff --git a/lib/sisu/v2/db_indexes.rb b/lib/sisu/v2/db_indexes.rb index 3cbcc20c..fb5ddd76 100644 --- a/lib/sisu/v2/db_indexes.rb +++ b/lib/sisu/v2/db_indexes.rb @@ -73,38 +73,44 @@ module SiSU_DB_index def base print "\n create documents common indexes\n" unless @opt.cmd =~/q/ sql_arr=[ - %{CREATE INDEX object_nr ON doc_objects(ocn);}, - %{CREATE INDEX digest_clean ON doc_objects(digest_clean);}, - %{CREATE INDEX digest_all ON doc_objects(digest_all);}, - %{CREATE INDEX lev1 ON doc_objects(lev1);}, - %{CREATE INDEX lev2 ON doc_objects(lev2);}, - %{CREATE INDEX lev3 ON doc_objects(lev3);}, - %{CREATE INDEX lev4 ON doc_objects(lev4);}, - %{CREATE INDEX lev5 ON doc_objects(lev5);}, - %{CREATE INDEX lev6 ON doc_objects(lev6);}, - %{CREATE INDEX endnote_nr ON endnotes(nr);}, - %{CREATE INDEX digest_en ON endnotes(digest_clean);}, - %{CREATE INDEX endnote_nr_asterisk ON endnotes_asterisk(nr);}, - %{CREATE INDEX endnote_asterisk ON endnotes_asterisk(clean);}, - %{CREATE INDEX digest_en_asterisk ON endnotes_asterisk(digest_clean);}, - %{CREATE INDEX endnote_nr_plus ON endnotes_plus(nr);}, - %{CREATE INDEX endnote_plus ON endnotes_plus(clean);}, - %{CREATE INDEX digest_en_plus ON endnotes_plus(digest_clean);}, - %{CREATE INDEX title ON metadata_and_text(title);}, - %{CREATE INDEX filename ON metadata_and_text(filename)}, + %{CREATE INDEX idx_text_words ON doc_objects(words);}, + %{CREATE INDEX idx_ocn ON doc_objects(ocn);}, + %{CREATE INDEX idx_digest_clean ON doc_objects(digest_clean);}, + %{CREATE INDEX idx_digest_all ON doc_objects(digest_all);}, + %{CREATE INDEX idx_lev1 ON doc_objects(lev1);}, + %{CREATE INDEX idx_lev2 ON doc_objects(lev2);}, + %{CREATE INDEX idx_lev3 ON doc_objects(lev3);}, + %{CREATE INDEX idx_lev4 ON doc_objects(lev4);}, + %{CREATE INDEX idx_lev5 ON doc_objects(lev5);}, + %{CREATE INDEX idx_lev6 ON doc_objects(lev6);}, + %{CREATE INDEX idx_endnote_words ON endnotes(words);}, + %{CREATE INDEX idx_endnote_nr ON endnotes(nr);}, + %{CREATE INDEX idx_digest_en ON endnotes(digest_clean);}, + %{CREATE INDEX idx_endnote_words_asterisk ON endnotes_asterisk(words);}, + %{CREATE INDEX idx_endnote_nr_asterisk ON endnotes_asterisk(nr);}, + %{CREATE INDEX idx_endnote_asterisk ON endnotes_asterisk(clean);}, + %{CREATE INDEX idx_digest_en_asterisk ON endnotes_asterisk(digest_clean);}, + %{CREATE INDEX idx_endnote_words_plus ON endnotes_plus(words);}, + %{CREATE INDEX idx_endnote_nr_plus ON endnotes_plus(nr);}, + %{CREATE INDEX idx_endnote_plus ON endnotes_plus(clean);}, + %{CREATE INDEX idx_digest_en_plus ON endnotes_plus(digest_clean);}, + %{CREATE INDEX idx_title ON metadata_and_text(title);}, + %{CREATE INDEX idx_author ON metadata_and_text(creator_author);}, + %{CREATE INDEX idx_filename ON metadata_and_text(src_filename);}, + %{CREATE INDEX idx_topics ON metadata_and_text(classify_topic_register)}, ] conn_execute_array(sql_arr) end def text - print "\n create documents text indexes\n" unless @opt.cmd =~/q/ + print "\n create documents TEXT indexes\n" unless @opt.cmd =~/q/ sql_arr=[ - %{CREATE INDEX clean ON doc_objects(clean);}, - %{CREATE INDEX endnote ON endnotes(clean);} + %{CREATE INDEX idx_clean ON doc_objects(clean);}, + %{CREATE INDEX idx_endnote ON endnotes(clean);} ] conn_execute_array(sql_arr) end base - @opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text) + text #@opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text) end end end diff --git a/lib/sisu/v2/db_load_tuple.rb b/lib/sisu/v2/db_load_tuple.rb index cc00b74a..23b6249d 100644 --- a/lib/sisu/v2/db_load_tuple.rb +++ b/lib/sisu/v2/db_load_tuple.rb @@ -79,11 +79,11 @@ module SiSU_DB_tuple end def tuple #% import line sql_entry=if @col[:en_a] - "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + - "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" else - "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + - "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" end if @opt.cmd =~/M/ if @opt.cmd =~/V/ @@ -191,12 +191,14 @@ module SiSU_DB_tuple #{@tp.column.notes_prefix_a.tuple[0]} #{@tp.column.notes_prefix_b.tuple[0]} #{@tp.column.notes_suffix.tuple[0]} -#{@tp.column.filename.tuple[0]} -#{@tp.column.sisutxt.tuple[0]} +#{@tp.column.src_filename.tuple[0]} +#{@tp.column.src_fingerprint.tuple[0]} +#{@tp.column.src_filesize.tuple[0]} +#{@tp.column.src_word_count.tuple[0]} +#{@tp.column.src_txt.tuple[0]} #{@tp.column.fulltext.tuple[0]} -#{@tp.column.word_count.tuple[0]} -#{@tp.column.digest.tuple[0]} #{@tp.column.skin_name.tuple[0]} +#{@tp.column.skin_fingerprint.tuple[0]} #{@tp.column.skin.tuple[0]} #{@tp.column.links.tuple[0]} tid) @@ -272,12 +274,14 @@ tid) #{@tp.column.notes_prefix_a.tuple[1]} #{@tp.column.notes_prefix_b.tuple[1]} #{@tp.column.notes_suffix.tuple[1]} -#{@tp.column.filename.tuple[1]} -#{@tp.column.sisutxt.tuple[1]} +#{@tp.column.src_filename.tuple[1]} +#{@tp.column.src_fingerprint.tuple[1]} +#{@tp.column.src_filesize.tuple[1]} +#{@tp.column.src_word_count.tuple[1]} +#{@tp.column.src_txt.tuple[1]} #{@tp.column.fulltext.tuple[1]} -#{@tp.column.word_count.tuple[1]} -#{@tp.column.digest.tuple[1]} #{@tp.column.skin_name.tuple[1]} +#{@tp.column.skin_fingerprint.tuple[1]} #{@tp.column.skin.tuple[1]} #{@tp.column.links.tuple[1]} #{@id} @@ -311,8 +315,8 @@ tid) @conn,@en,@opt,@file=conn,en,opt,file end def tuple - sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, ocn, ocnd, ocns, metadata_tid, digest_clean) " + - "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');" + sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, words, ocn, ocnd, ocns, metadata_tid, digest_clean) " + + "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:words]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');" if @opt.cmd =~/M/ @file.puts sql_entry else diff --git a/lib/sisu/v2/db_remove.rb b/lib/sisu/v2/db_remove.rb index e7942a15..5a7f1244 100644 --- a/lib/sisu/v2/db_remove.rb +++ b/lib/sisu/v2/db_remove.rb @@ -72,9 +72,9 @@ module SiSU_DB_remove : false end del_id=if driver_sqlite3 - @conn.get_first_value(%{ SELECT tid FROM metadata_and_text WHERE filename = '#{@opt.fns}'; }).to_i + @conn.get_first_value(%{ SELECT tid FROM metadata_and_text WHERE src_filename = '#{@opt.fns}'; }).to_i else - x=@conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; }) + x=@conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.src_filename = '#{@opt.fns}'; }) x ? (x.join.to_i) : nil end if del_id diff --git a/lib/sisu/v2/db_sqltxt.rb b/lib/sisu/v2/db_sqltxt.rb index 68e9ef8a..17a92683 100644 --- a/lib/sisu/v2/db_sqltxt.rb +++ b/lib/sisu/v2/db_sqltxt.rb @@ -72,9 +72,10 @@ module SiSU_DB_text end def clean_searchable_text(arr) #produce clean, searchable, plaintext from document source txt_arr,en=[],[] + arr=arr.class==String ? arr.split(/\n+/m) : arr arr.each do |s| - s.gsub!(/([*\/_-])\{(.+?)\}\1/,'\2') - s.gsub!(/^(?:group|poem|code)\{/,''); s.gsub!(/^\}(?:group|poem|code)/,'') + s.gsub!(/([*\/_-])\{(.+?)\}\1/m,'\2') + s.gsub!(/^(?:group|poem|code)\{/m,''); s.gsub!(/^\}(?:group|poem|code)/m,'') s.gsub!(/\A(?:@\S+:\s+.+)\Z/m,'') if s =~/^:A~/ if defined? @md.creator \ @@ -82,26 +83,26 @@ module SiSU_DB_text and not @md.creator.author.empty? s.gsub!(/@author/,@md.creator.author) else - tell=SiSU_Screen::Ansi.new('v','WARNING Document Author information missing; provide @creator: :author:') - tell.warn + tell=SiSU_Screen::Ansi.new('v','WARNING Document Author information missing; provide @creator: :author:',@md.fnb) + tell.warn unless @md.cmd.inspect =~/q/ end if defined? @md.title \ and defined? @md.title.full \ and not @md.title.full.empty? s.gsub!(/@title/,@md.title.full) else - tell=SiSU_Screen::Ansi.new('v','WARNING Document Title missing; provide @title:') - tell.warn + tell=SiSU_Screen::Ansi.new('v','WARNING Document Title missing; provide @title:',@md.fnb) + tell.warn unless @md.cmd.inspect =~/q/ end end - s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/,'') - s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/,'') - s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/,'') - s.gsub!(/^%{1,3} .+/,'') #removed even if contained in code block - s.gsub!(/
/,' ') - en << s.scan(/~\{\s*(.+?)\s*\}~/) - s.gsub!(/~\{.+?\}~/,'') - s.gsub!(/ \s+/,' ') + s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/m,'') + s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/m,'') + s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/m,'') + s.gsub!(/^%{1,3} .+/m,'') #removed even if contained in code block + s.gsub!(/
/m,' ') + en << s.scan(/~\{\s*(.+?)\s*\}~/m) + s.gsub!(/~\{.+?\}~/m,'') + s.gsub!(/ \s+/m,' ') #special_character_escape(s) s end @@ -124,6 +125,11 @@ module SiSU_DB_text str.strip! str end + def unique_words(str) + a=str.scan(/[a-zA-Z0-9\\\/_-]{2,}/) #a=str.scan(/\S+{2,}/) + str=a.uniq.sort.join(' ') + str + end end end __END__ diff --git a/lib/sisu/v2/param.rb b/lib/sisu/v2/param.rb index ef91f10d..87dd9aab 100644 --- a/lib/sisu/v2/param.rb +++ b/lib/sisu/v2/param.rb @@ -162,8 +162,8 @@ module SiSU_Param puts "#{n} is #{s.class}: programming error, String expected #{__FILE__}:#{__LINE__}" s else - tell=SiSU_Screen::Ansi.new('v',"#{n} length #{s.length} exceeds set db field length #{l}, metadata dropped") - tell.warn + tell=SiSU_Screen::Ansi.new('v',"#{n} length #{s.length} exceeds set db field length #{l}, metadata dropped",@opt.fns) + tell.warn unless @opt.cmd =~/q/ nil end end @@ -244,7 +244,9 @@ module SiSU_Param validate_length(s,l,n) end def note - @h['note'] #TEXT + s=@h['note'] + l,n=Db[:col_info_note],'title.note' + validate_length(s,l,n) end def short s=(@h['short'] ? @h['short'] : @h['main']) @@ -380,9 +382,9 @@ module SiSU_Param def rights a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) - def copyright # TEXT used db sql + def copyright def text #you may wish to expand to take from all - r=if @h['copyright'] + s=if @h['copyright'] @h['copyright'] elsif @h['text'] @h['text'] @@ -393,26 +395,40 @@ module SiSU_Param tell.warn unless @opt.cmd =~/q/ '' end + l,n=Db[:col_info_note],'rights.copyright.text' + validate_length(s,l,n) end def translation - r=(@h['translation'] ? @h['translation'] : nil) + s=(@h['translation'] ? @h['translation'] : nil) + l,n=Db[:col_info_note],'rights.copyright.translation' + validate_length(s,l,n) end def illustrations - r=(@h['illustrations'] ? @h['illustrations'] : nil) + s=(@h['illustrations'] ? @h['illustrations'] : nil) + l,n=Db[:col_info_note],'rights.copyright.illustrations' + validate_length(s,l,n) end def photographs - r=(@h['photographs'] ? @h['photographs'] : nil) + s=(@h['photographs'] ? @h['photographs'] : nil) + l,n=Db[:col_info_note],'rights.copyright.photographs' + validate_length(s,l,n) end def digitiztion - r=(@h['digitization'] ? @h['digitization'] : nil) + s=(@h['digitization'] ? @h['digitization'] : nil) + l,n=Db[:col_info_note],'rights.copyright.digitization' + validate_length(s,l,n) end def audio - r=(@h['audio'] ? @h['audio'] : nil) + s=(@h['audio'] ? @h['audio'] : nil) + l,n=Db[:col_info_note],'rights.copyright.audio' + validate_length(s,l,n) end self end def license - r=(@h['license'] ? @h['license'] : nil) + s=(@h['license'] ? @h['license'] : nil) + l,n=Db[:col_info_note],'rights.license' + validate_length(s,l,n) end def all s=if @h['all']; @h['all'] @@ -445,6 +461,9 @@ module SiSU_Param if s.empty? tell=SiSU_Screen::Ansi.new(@cmd,'WARNING Document Rights information missing; provide @rights: :copyright:') tell.warn unless @opt.cmd =~/q/ + else + l,n=Db[:col_info_note],'rights.all' + validate_length(s,l,n) end s end @@ -472,7 +491,7 @@ module SiSU_Param end def topic_register s=@h['topic_register'] - l,n=Db[:col_classify_txt_long],'classify.topic_register' + l,n=Db[:col_info_note],'classify.topic_register' validate_length(s,l,n) end def type @@ -743,9 +762,9 @@ module SiSU_Param @doc={ :lv=>[] } @doc[:fns],@doc[:fnb],@doc[:scr_suffix]='','','' @@publisher='SiSU scribe' - attr_accessor :cmd,:make,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:subtitle,:full_title,:html_title,:subtitle_tex,:creator,:classify,:author_home,:author,:author_title,:author_nationality,:authors,:authorship,:translator,:illustrator,:prepared_by,:digitized_by,:subject,:description,:publisher,:contributor,:date,:date_created,:date_issued,:date_available,:date_valid,:date_modified,:date_translated,:date_added_to_site,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:type,:format,:identifier,:source,:language,:language_original,:relation,:coverage,:rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:lvs,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:markup,:markup_instruction,:markup_version,:markup_declared,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:author_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:ec,:opt,:sem_tag,:book_idx,:topic_register,:topic_register_array,:original_publication,:original_publication_date,:original_publication_nationality,:original_publication_institution,:writing_focus,:audio,:daisy + attr_accessor :cmd,:make,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:subtitle,:full_title,:html_title,:subtitle_tex,:creator,:classify,:author_home,:author,:author_title,:author_nationality,:authors,:authorship,:translator,:illustrator,:prepared_by,:digitized_by,:subject,:description,:publisher,:contributor,:date,:date_created,:date_issued,:date_available,:date_valid,:date_modified,:date_translated,:date_added_to_site,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:type,:format,:identifier,:source,:language,:language_original,:relation,:coverage,:rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:lvs,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:markup,:markup_instruction,:markup_version,:markup_declared,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:filesize,:user,:home,:hostname,:pwd,:firstseg,:programs,:author_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:ec,:opt,:sem_tag,:book_idx,:topic_register,:topic_register_array,:original_publication,:original_publication_date,:original_publication_nationality,:original_publication_institution,:writing_focus,:audio,:daisy def initialize(fns_array,opt) - @env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@make=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@subtitle=@full_title=@html_title=@subtitle_tex=@creator=@classify=@author_home=@author=@author_title=@author_nationality=@translator=@illustrator=@prepared_by=@digitized_by=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@date_translated=@date_added_to_site=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@type=@format=@identifier=@source=@language=@language_original=@relation=@coverage=@rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@file_size=@firstseg=@programs=@author_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@topic_register=@original_publication_details=@original_publication=@original_publication_date=@original_publication_nationality=@original_publication_institution=@writing_focus=@audio=nil + @env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@make=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@subtitle=@full_title=@html_title=@subtitle_tex=@creator=@classify=@author_home=@author=@author_title=@author_nationality=@translator=@illustrator=@prepared_by=@digitized_by=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@date_translated=@date_added_to_site=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@type=@format=@identifier=@source=@language=@language_original=@relation=@coverage=@rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@filesize=@firstseg=@programs=@author_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@topic_register=@original_publication_details=@original_publication=@original_publication_date=@original_publication_nationality=@original_publication_institution=@writing_focus=@audio=nil @data,@fns,@cmd,@mod,@opt=fns_array,opt.fns,opt.cmd,opt.mod,opt #@data used as data @flag_tables,@set_header_title,@set_heading_top,@set_heading_seg,@heading_seg_first_flag,@flag_promo,@book_idx=false,false,false,false,false,false,false @seg_autoname_safe=true @@ -1107,16 +1126,22 @@ module SiSU_Param @papersize=determine_papersize(@mod.inspect) end @papersize_array=@papersize.scan(/(?:a4|letter|legal|book|a5|b5)/i) + fn=@fns=~/\.ssm\.sst$/ ? @fns.gsub(/.sst/,'') : @fns #decide what to do a filesize on .ssm tells very little about actual document size + @filesize=(File.size(fn)).to_s if @sys.openssl !=false skin=@doc_skin \ ? (SiSU_Env::Info_skin.new(@opt,@doc_skin).select) \ : SiSU_Env::Info_skin.new(@opt).select @dgst,@dgst_skin=[],[] if @env.digest.type =~/sha256/ - @dgst=@sys.sha256(@env.source_file_with_path) + dgst=@sys.sha256(@env.source_file_with_path) + @dgst=dgst[1].length==64 ? dgst : nil + puts 'check document (sha256) digest' if not @dgst @dgst_skin=skin ? (@sys.sha256(skin)) : nil else - @dgst=@sys.md5(@env.source_file_with_path) + dgst=@sys.md5(@env.source_file_with_path) + @dgst=dgst[1].length==32 ? dgst : nil + puts 'check document (md5) digest' if not @dgst @dgst_skin=skin ? (@sys.md5(skin)) : nil end end -- cgit v1.2.3 From 7d5cf5f325a7cb9de869a769d97138f2be308657 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 20 Apr 2010 19:04:01 -0400 Subject: cgi take account of db name prefix change SiSUv2b_ and changed column names --- lib/sisu/v2/cgi_pgsql.rb | 4 ++-- lib/sisu/v2/cgi_sql_common.rb | 18 +++++++++--------- lib/sisu/v2/cgi_sqlite.rb | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/cgi_pgsql.rb b/lib/sisu/v2/cgi_pgsql.rb index 493c39d4..071e63e4 100644 --- a/lib/sisu/v2/cgi_pgsql.rb +++ b/lib/sisu/v2/cgi_pgsql.rb @@ -189,7 +189,7 @@ module SiSU_CGI_pgsql def sql_select_body limit ||=@@limit offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.filename, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE (#{@search_text}) AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.title, metadata_and_text.filename, doc_objects.ocn} + @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE (#{@search_text}) AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} select=@sql_statement[:body] + ' ' + @sql_statement[:range] select @@ -197,7 +197,7 @@ module SiSU_CGI_pgsql def sql_select_endnotes limit ||=@@limit offset ||=@@offset - @sql_statement[:endnotes]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.filename, endnotes.body, endnotes.nr, endnotes.ocn, endnotes.metadata_tid FROM metadata_and_text, endnotes WHERE (#{@search_endnotes}) AND metadata_and_text.tid = endnotes.metadata_tid ORDER BY metadata_and_text.title, metadata_and_text.filename, endnotes.nr} + @sql_statement[:endnotes]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, endnotes.body, endnotes.nr, endnotes.ocn, endnotes.metadata_tid FROM metadata_and_text, endnotes WHERE (#{@search_endnotes}) AND metadata_and_text.tid = endnotes.metadata_tid ORDER BY metadata_and_text.title, metadata_and_text.src_filename, endnotes.nr} @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} select=@sql_statement[:endnotes] + ' ' + @sql_statement[:range] select diff --git a/lib/sisu/v2/cgi_sql_common.rb b/lib/sisu/v2/cgi_sql_common.rb index 65bb01a9..56f6cb37 100644 --- a/lib/sisu/v2/cgi_sql_common.rb +++ b/lib/sisu/v2/cgi_sql_common.rb @@ -435,7 +435,7 @@ module SiSU_CGI_sql search[:text] << st[:search] search[:endnotes] << st[:search] end - st=Dbi_search_string.new('metadata_and_text.filename',search_for.filename,q['fns'],cse).string + st=Dbi_search_string.new('metadata_and_text.src_filename',search_for.filename,q['fns'],cse).string if st[:flag] search[:text] << st[:search] search[:endnotes] << st[:search] @@ -820,8 +820,8 @@ module SiSU_CGI_sql end #text_objects_body s_contents.each do |c| #% text body - location=c['filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] - file_suffix=c['filename'][/.+?\.(_?sst|ssm)$/,1] + location=c['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] + file_suffix=c['src_filename'][/.+?\.(_?sst|ssm)$/,1] lang=if location =~ /\S+?~(\S\S\S?)$/ l=location[/\S+?~(\S\S\S?)$/,1] location.gsub!(/(\S+?)~\S\S\S?/,'\1') @@ -832,8 +832,8 @@ module SiSU_CGI_sql if c['tid'].to_i != oldtid.to_i ti=c['title'] can_txt_srch=(cgi['view']=~/index/) \ - ? %{search } \ - : %{search } + ? %{search } \ + : %{search } title=%{toc html #{ti} by #{c['creator_author']} #{can_txt_srch}toc html epub pdf portrait pdf landscape odf manifest
} if file_suffix=~/s/ #hmm watch file_suffix title=@text_search_flag \ ? '

'+title \ @@ -912,8 +912,8 @@ module SiSU_CGI_sql #text_objects_endnote oldtid = 0 s_endnotes.each do |e| #% endnotes - location=e['filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] - file_suffix=e['filename'][/.+?\.(_?sst|ssm)$/,1] + location=e['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] + file_suffix=e['src_filename'][/.+?\.(_?sst|ssm)$/,1] lang=if location =~ /\S+?~(\S\S\S?)$/ l=location[/\S+?~(\S\S\S?)$/,1] location.gsub!(/(\S+?)~\S\S\S?/,'\1') @@ -925,8 +925,8 @@ module SiSU_CGI_sql if e['metadata_tid'].to_i != oldtid.to_i ti=e['title'] can_txt_srch=(cgi['view']=~/index/) \ - ? %{search } \ - : %{search } + ? %{search } \ + : %{search } title=%{

toc html #{ti} by #{e['creator_author']} #{can_txt_srch}toc html epub pdf portrait pdf landscape odf manifest
} if file_suffix=~/s/ @counter_endn_doc+=1 oldtid=e['metadata_tid'].to_i diff --git a/lib/sisu/v2/cgi_sqlite.rb b/lib/sisu/v2/cgi_sqlite.rb index 947cc80a..860d7bed 100644 --- a/lib/sisu/v2/cgi_sqlite.rb +++ b/lib/sisu/v2/cgi_sqlite.rb @@ -179,7 +179,7 @@ module SiSU_CGI_sqlite def sql_select_body limit ||=@@limit offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.filename, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE #{@search_text} AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.title, metadata_and_text.filename, doc_objects.ocn} + @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE #{@search_text} AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} select=@sql_statement[:body] + ' ' + @sql_statement[:range] select @@ -187,7 +187,7 @@ module SiSU_CGI_sqlite def sql_select_endnotes limit ||=@@limit offset ||=@@offset - @sql_statement[:endnotes]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.filename, endnotes.body, endnotes.nr, endnotes.ocn, endnotes.metadata_tid FROM metadata_and_text, endnotes WHERE #{@search_endnotes} AND metadata_and_text.tid = endnotes.metadata_tid ORDER BY metadata_and_text.title, metadata_and_text.filename, endnotes.nr} + @sql_statement[:endnotes]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, endnotes.body, endnotes.nr, endnotes.ocn, endnotes.metadata_tid FROM metadata_and_text, endnotes WHERE #{@search_endnotes} AND metadata_and_text.tid = endnotes.metadata_tid ORDER BY metadata_and_text.title, metadata_and_text.src_filename, endnotes.nr} @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} select=@sql_statement[:endnotes] + ' ' + @sql_statement[:range] select -- cgit v1.2.3 From d348ae931a17901eda839ef9501e13c9be51e913 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 20 Apr 2010 19:12:58 -0400 Subject: reverts unique words list column and indexes; fix for drop indexes; changelog & version update * reverts unique word list column on doc_objects and endnotes and associated indexes, may reintroduce at a later time * db_drop, drop TEXT indexes for sqlite only * update: changelog, version date --- lib/sisu/v2/db_create.rb | 4 --- lib/sisu/v2/db_drop.rb | 85 +++++++++++++++++++++++--------------------- lib/sisu/v2/db_import.rb | 19 ---------- lib/sisu/v2/db_indexes.rb | 6 +--- lib/sisu/v2/db_load_tuple.rb | 12 +++---- 5 files changed, 51 insertions(+), 75 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/db_create.rb b/lib/sisu/v2/db_create.rb index c1fed045..c7ce9a6d 100644 --- a/lib/sisu/v2/db_create.rb +++ b/lib/sisu/v2/db_create.rb @@ -218,7 +218,6 @@ module SiSU_DB_create ocns VARCHAR(6), clean TEXT NULL, body TEXT NULL, - words VARCHAR(3000) NULL, seg VARCHAR(256) NULL, lev_an VARCHAR(1), lev SMALLINT NULL, @@ -258,7 +257,6 @@ module SiSU_DB_create nr SMALLINT, clean TEXT NULL, body TEXT NULL, - words VARCHAR(3000) NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), @@ -281,7 +279,6 @@ module SiSU_DB_create nr SMALLINT, clean TEXT NULL, body TEXT NULL, - words VARCHAR(3000) NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), @@ -304,7 +301,6 @@ module SiSU_DB_create nr SMALLINT, clean TEXT NULL, body TEXT NULL, - words VARCHAR(3000) NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), diff --git a/lib/sisu/v2/db_drop.rb b/lib/sisu/v2/db_drop.rb index 35d971af..edd08345 100644 --- a/lib/sisu/v2/db_drop.rb +++ b/lib/sisu/v2/db_drop.rb @@ -133,50 +133,53 @@ module SiSU_DB_drop ensure end end - def conn_execute_array(sql_arr) - @conn.transaction do |conn| - sql_arr.each do |sql| - conn.execute(sql) + def indexes + def conn_execute_array(sql_arr) + @conn.transaction do |conn| + sql_arr.each do |sql| + conn.execute(sql) + end end end + def base #% drop base indexes + print "\n drop documents common indexes\n" unless @opt.cmd =~/q/ + sql_arr=[ + %{DROP INDEX idx_title;}, + %{DROP INDEX idx_author;}, + %{DROP INDEX idx_filename;}, + %{DROP INDEX idx_topics;}, + %{DROP INDEX idx_ocn;}, + %{DROP INDEX idx_digest_clean;}, + %{DROP INDEX idx_digest_all;}, + %{DROP INDEX idx_lev1;}, + %{DROP INDEX idx_lev2;}, + %{DROP INDEX idx_lev3;}, + %{DROP INDEX idx_lev4;}, + %{DROP INDEX idx_lev5;}, + %{DROP INDEX idx_lev6;}, + %{DROP INDEX idx_endnote_nr;}, + %{DROP INDEX idx_digest_en;}, + %{DROP INDEX idx_endnote_nr_asterisk;}, + %{DROP INDEX idx_endnote_asterisk;}, + %{DROP INDEX idx_digest_en_asterisk;}, + %{DROP INDEX idx_endnote_nr_plus;}, + %{DROP INDEX idx_endnote_plus;}, + %{DROP INDEX idx_digest_en_plus}, + ] + conn_execute_array(sql_arr) + end + def text #% drop TEXT indexes, sqlite + print "\n drop documents TEXT indexes\n" unless @opt.cmd =~/q/ + sql_arr=[ + %{DROP INDEX idx_clean;}, + %{DROP INDEX idx_endnote}, + ] + conn_execute_array(sql_arr) + end + self end - def indexes #% drop all indexes - print "\n drop documents common indexes\n" unless @opt.cmd =~/q/ - sql_arr=[ - %{DROP INDEX idx_text_words;}, - %{DROP INDEX idx_title;}, - %{DROP INDEX idx_author;}, - %{DROP INDEX idx_filename;}, - %{DROP INDEX idx_topics;}, - %{DROP INDEX idx_ocn;}, - %{DROP INDEX idx_digest_clean;}, - %{DROP INDEX idx_digest_all;}, - %{DROP INDEX idx_lev1;}, - %{DROP INDEX idx_lev2;}, - %{DROP INDEX idx_lev3;}, - %{DROP INDEX idx_lev4;}, - %{DROP INDEX idx_lev5;}, - %{DROP INDEX idx_lev6;}, - %{DROP INDEX idx_endnote_words;}, - %{DROP INDEX idx_endnote_nr;}, - %{DROP INDEX idx_digest_en;}, - %{DROP INDEX idx_endnote_words_asterisk;}, - %{DROP INDEX idx_endnote_nr_asterisk;}, - %{DROP INDEX idx_endnote_asterisk;}, - %{DROP INDEX idx_digest_en_asterisk;}, - %{DROP INDEX idx_endnote_words_plus;}, - %{DROP INDEX idx_endnote_nr_plus;}, - %{DROP INDEX idx_endnote_plus;}, - %{DROP INDEX idx_digest_en_plus}, - ] - conn_execute_array(sql_arr) - print "\n drop documents TEXT indexes\n" unless @opt.cmd =~/q/ - sql_arr=[ - %{DROP INDEX idx_clean;}, - %{DROP INDEX idx_endnote}, - ] - conn_execute_array(sql_arr) - end + indexes.base + @opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : indexes.text) self end end diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb index e351f6fc..0e2db8e3 100644 --- a/lib/sisu/v2/db_import.rb +++ b/lib/sisu/v2/db_import.rb @@ -267,8 +267,6 @@ module SiSU_DB_import @col[:plaintext]=@col[:body].dup @col[:plaintext]=strip_markup(@col[:plaintext]) @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @col[:words]=@col[:plaintext].dup - @col[:words]=unique_words(@col[:words]) if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last end if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last @@ -303,8 +301,6 @@ module SiSU_DB_import @col[:plaintext]=@col[:body].dup @col[:plaintext]=strip_markup(@col[:plaintext]) @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @col[:words]=@col[:plaintext].dup - @col[:words]=unique_words(@col[:words]) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -332,8 +328,6 @@ module SiSU_DB_import @col[:plaintext]=@col[:body].dup @col[:plaintext]=strip_markup(@col[:plaintext]) @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @col[:words]=@col[:plaintext].dup - @col[:words]=unique_words(@col[:words]) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -361,8 +355,6 @@ module SiSU_DB_import @col[:plaintext]=@col[:body].dup @col[:plaintext]=strip_markup(@col[:plaintext]) @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @col[:words]=@col[:plaintext].dup - @col[:words]=unique_words(@col[:words]) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -403,8 +395,6 @@ module SiSU_DB_import @col[:plaintext]=@col[:body].dup @col[:plaintext]=strip_markup(@col[:plaintext]) @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @col[:words]=@col[:plaintext].dup - @col[:words]=unique_words(@col[:words]) t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) @tuple_array << t.tuple @en,@en_ast,@en_pls=[],[],[] @@ -422,8 +412,6 @@ module SiSU_DB_import special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) strip_markup(txt) - words=txt.dup - words=unique_words(words) if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| @@ -438,7 +426,6 @@ module SiSU_DB_import :nr => nr, :txt => txt, :body => body, - :words => words, :ocn => @col[:ocn], :ocnd => @col[:ocnd], :ocns => @col[:ocns], @@ -463,8 +450,6 @@ module SiSU_DB_import special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) strip_markup(txt) - words=txt.dup - words=unique_words(words) if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| @@ -479,7 +464,6 @@ module SiSU_DB_import :nr => nr, :txt => txt, :body => body, - :words => words, :ocn => @col[:ocn], :ocnd => @col[:ocnd], :ocns => @col[:ocns], @@ -504,8 +488,6 @@ module SiSU_DB_import special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) strip_markup(txt) - words=txt.dup - words=unique_words(words) if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| @@ -520,7 +502,6 @@ module SiSU_DB_import :nr => nr, :txt => txt, :body => body, - :words => words, :ocn => @col[:ocn], :ocnd => @col[:ocnd], :ocns => @col[:ocns], diff --git a/lib/sisu/v2/db_indexes.rb b/lib/sisu/v2/db_indexes.rb index fb5ddd76..abd90409 100644 --- a/lib/sisu/v2/db_indexes.rb +++ b/lib/sisu/v2/db_indexes.rb @@ -73,7 +73,6 @@ module SiSU_DB_index def base print "\n create documents common indexes\n" unless @opt.cmd =~/q/ sql_arr=[ - %{CREATE INDEX idx_text_words ON doc_objects(words);}, %{CREATE INDEX idx_ocn ON doc_objects(ocn);}, %{CREATE INDEX idx_digest_clean ON doc_objects(digest_clean);}, %{CREATE INDEX idx_digest_all ON doc_objects(digest_all);}, @@ -83,14 +82,11 @@ module SiSU_DB_index %{CREATE INDEX idx_lev4 ON doc_objects(lev4);}, %{CREATE INDEX idx_lev5 ON doc_objects(lev5);}, %{CREATE INDEX idx_lev6 ON doc_objects(lev6);}, - %{CREATE INDEX idx_endnote_words ON endnotes(words);}, %{CREATE INDEX idx_endnote_nr ON endnotes(nr);}, %{CREATE INDEX idx_digest_en ON endnotes(digest_clean);}, - %{CREATE INDEX idx_endnote_words_asterisk ON endnotes_asterisk(words);}, %{CREATE INDEX idx_endnote_nr_asterisk ON endnotes_asterisk(nr);}, %{CREATE INDEX idx_endnote_asterisk ON endnotes_asterisk(clean);}, %{CREATE INDEX idx_digest_en_asterisk ON endnotes_asterisk(digest_clean);}, - %{CREATE INDEX idx_endnote_words_plus ON endnotes_plus(words);}, %{CREATE INDEX idx_endnote_nr_plus ON endnotes_plus(nr);}, %{CREATE INDEX idx_endnote_plus ON endnotes_plus(clean);}, %{CREATE INDEX idx_digest_en_plus ON endnotes_plus(digest_clean);}, @@ -110,7 +106,7 @@ module SiSU_DB_index conn_execute_array(sql_arr) end base - text #@opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text) + @opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text) end end end diff --git a/lib/sisu/v2/db_load_tuple.rb b/lib/sisu/v2/db_load_tuple.rb index 23b6249d..67c8008f 100644 --- a/lib/sisu/v2/db_load_tuple.rb +++ b/lib/sisu/v2/db_load_tuple.rb @@ -79,11 +79,11 @@ module SiSU_DB_tuple end def tuple #% import line sql_entry=if @col[:en_a] - "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + - "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" else - "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + - "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" end if @opt.cmd =~/M/ if @opt.cmd =~/V/ @@ -315,8 +315,8 @@ tid) @conn,@en,@opt,@file=conn,en,opt,file end def tuple - sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, words, ocn, ocnd, ocns, metadata_tid, digest_clean) " + - "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:words]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');" + sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, ocn, ocnd, ocns, metadata_tid, digest_clean) " + + "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');" if @opt.cmd =~/M/ @file.puts sql_entry else -- cgit v1.2.3