diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/sisu/v0/constants.rb | 1 | ||||
| -rw-r--r-- | lib/sisu/v0/dal.rb | 335 | ||||
| -rw-r--r-- | lib/sisu/v0/dal_idx.rb | 201 | ||||
| -rw-r--r-- | lib/sisu/v0/dal_numbering.rb | 374 | ||||
| -rw-r--r-- | lib/sisu/v0/dal_syntax.rb | 2 | ||||
| -rw-r--r-- | lib/sisu/v0/param.rb | 7 | ||||
| -rw-r--r-- | lib/sisu/v0/plaintext.rb | 2 | 
7 files changed, 589 insertions, 333 deletions
| diff --git a/lib/sisu/v0/constants.rb b/lib/sisu/v0/constants.rb index f352ab7c..3f050df7 100644 --- a/lib/sisu/v0/constants.rb +++ b/lib/sisu/v0/constants.rb @@ -90,6 +90,7 @@ Mx[:gl_bullet]=       "#{Mx[:gl_o]}●#{Mx[:gl_c]}"  #non substantive text sort: <-#> <~#>    Mx[:pa_non_object_dummy_heading]="#{Mx[:pa_o]}-##{Mx[:pa_c]}" #unnumbered paragraph, delete when not required [used in dummy headings, eg. for segmented html] (place marker at end of paragraph)    Mx[:pa_non_object_no_heading]="#{Mx[:pa_o]}~##{Mx[:pa_c]}" #unnumbered paragraph (place marker at end of paragraph) +Mx[:idx_o]='▢ ';                                         Mx[:idx_c]='▢ ' #  Mx[:nbsp]=            '▭ '  Mx[:br_line]=         "#{Mx[:mk_o]}br#{Mx[:mk_c]}"  Mx[:br_paragraph]=    "#{Mx[:mk_o]}br#{Mx[:mk_c]}" diff --git a/lib/sisu/v0/dal.rb b/lib/sisu/v0/dal.rb index 25b7528e..aa4758b7 100644 --- a/lib/sisu/v0/dal.rb +++ b/lib/sisu/v0/dal.rb @@ -65,6 +65,8 @@ module SiSU_DAL    require "#{SiSU_lib}/param"    require "#{SiSU_lib}/dal_syntax"    require "#{SiSU_lib}/dal_doc_str" +  require "#{SiSU_lib}/dal_idx" +  require "#{SiSU_lib}/dal_numbering"    require "#{SiSU_lib}/i18n"    require "#{SiSU_lib}/shared_sem"    include SiSU_Env @@ -211,7 +213,8 @@ module SiSU_DAL        data=character_check(data)        data=images(data)        data=SiSU_document_structure::Tables.new(@md,data).tables -      data=numbering_song(data) #tr issue +      data=SiSU_numbering::Numbering.new(@md,data).numbering_song +      data=SiSU_book_index::Book_index.new(data).indexing_song if @md.book_index        data=endnotes(data)        data=object_digest(data)        meta=metadata(data) @@ -449,7 +452,7 @@ module SiSU_DAL      end      def substitutions_and_insertions?(data)        data_expand=[] -      if data[0] =~ /^#!\s*(?:\/usr\/bin\/env sisu|\/usr\/bin\/sisu)/ # remove bang from top #! (however file is stripped, so will be removed provided no content preceeds it) +      if data[0] =~ /^#!\s*(?:\/usr\/bin\/env sisu|\/usr\/bin\/sisu)/ # remove bang from top #! (however file is stripped, so will be removed provided no content precedes it)          data[0].gsub!(/^#!\s*\/usr\/bin\/sisu/,'')          data[0].gsub!(/^#!\s*\/usr\/bin\/env sisu/,'')        end @@ -507,332 +510,6 @@ module SiSU_DAL          end        end      end -    def numbering_song(data) -      data=number_plaintext_para(data) -      data=name_endnote_seg(data) #tr issue -      data=auto_number_heading_ie_title(data) #tr issue -      data=ocn(data) #watch -      data=minor_numbering(data) -      data=name_para_seg_filename(data) -      data=set_heading_seg(data) unless @md.set_heading_seg -      data=set_heading_top(data) unless @md.set_heading_top -      data=set_header_title(data) unless @md.set_header_title -      data -    end -    def number_plaintext_para(data) -      @tuned_file=[] -      data.each do |para| -        if para !~/#{Mx[:gr_o]}(?:code|group|alt|poem|verse)#{Mx[:gr_c]}|#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ -          para.gsub!(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks -        end -        para.gsub!(/^/,"\n") unless para =~/#{Mx[:tc_p]}/u -        para.gsub!(/^\s+|\s$/,"\n") -        @tuned_file << para -      end -      @tuned_file=@tuned_file.flatten -    end -    def name_endnote_seg(data) -      @tuned_file=[] -      data.each do |para| -        para.gsub!(/<:3>\s*<:ee>/, <<-WOK -#{@@endnote['special_align']} <p /><br />\r -#{@@endnote['seg_name_3']} <p /> -#{@@endnote['special_align_close']} -        WOK -        ) -        para.gsub!(/<:2>\s*<:ee>/, <<-WOK -#{@@endnote['special_align']} <p /><br />\r -#{@@endnote['seg_name_2']} <p /> -#{@@endnote['special_align_close']} -        WOK -        ) -        para.gsub!(/<:1>\s*<:ee>/, <<-WOK -#{@@endnote['special_align']} <p /><br />\r -#{@@endnote['seg_name_1']} <p /> -#{@@endnote['special_align_close']} -        WOK -        ) -        @tuned_file << para -      end -      # debug 2003w46 adding revision control info -      if @md.flag_auto_endnotes \ -      and @md.flag_separate_endnotes_make -        @tuned_file << "\n#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}Endnotes #{Mx[:gl_o]}-##{Mx[:gl_c]} #{Mx[:id_o]}~0;0:0;u0#{Mx[:id_c]}" -      end -      @tuned_file << "\n#{Mx[:br_endnotes]}" #DECIDE ON -      @tuned_file=@tuned_file.flatten -    end -    def owner_details_seg -      data << "#{Mx[:lv_o]}4:owner.details#{Mx[:lv_c]}Owner Details" -    end -    def number_sub_heading(para,num,title_no) -      case para -      when /#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/;  para.gsub!(/#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/,"#{title_no} ") -      when /^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/; para.gsub!(/^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/,"#{title_no} ") -      when /^#{Mx[:lv_o]}#{num}:[a-z_\.]+#{Mx[:lv_c]}/ -        para.gsub!(/^#{Mx[:lv_o]}#{num}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,%{#{Mx[:lv_o]}#{num}:\\1#{Mx[:lv_c]} #{title_no} \\2  #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) -      when /^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}\s*#{title_no}/ -        para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]}")                #where title contains title number -      else para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]} #{title_no} ") #main, where title number is to be provided -      end -      if @md.toc_lev_limit \ -      and @md.toc_lev_limit < num -        para.gsub!(/^#{Mx[:lv_o]}[5-9]:\S*?#{Mx[:lv_c]}/,'!_ ') #bold line, watch -      end -      para -    end -    def auto_number_heading_ie_title(data)                                             #also does some segment naming -      @tuned_file=[] -      if @md.markup =~/num_top/ \ -      or @md.num_top # watch, 2003w23 -        input="#{@md.markup}"[/num_top\=([1-6])/,1] if @md.markup -        input||=@md.num_top if @md.num_top !~/^$/ -      end -      num_top=input.to_i -      t_no1=t_no2=t_no3=t_no4=0 -      no1=num_top; no2=(num_top + 1); no3=(num_top + 2);  no4=(num_top + 3) -      t_not=0 -      data.each do |para| #@md.seg_names << [additions to segment names] -        if (@md.markup =~/num_top/ \ -        or (@md.num_top \ -        and @md.num_top !~/^$/)) \ -        and para !~/^#{Rx[:meta]}/ -          if (para =~/^(?:#{no1}|^#{no2}|^#{no3}#{no4})~#/ \ -          and para !~/^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}/) -            t_not+=1 #; t_no2=0; t_no3=0 -            para.gsub!(/^(#{Mx[:lv_o]}#{no1}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") -            para.gsub!(/^(#{Mx[:lv_o]}#{no2}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") -            para.gsub!(/^(#{Mx[:lv_o]}#{no3}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") -            para.gsub!(/^(#{Mx[:lv_o]}#{no4}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") -          end -          if para =~/#{Mx[:lv_o]}#{no1}:/ -            @subnumber=1 -            @subnumber=0 if para =~/#{Mx[:lv_o]}#{no1}:/ -          end -          if para =~/^#{Mx[:lv_o]}[1-6]:[\w-]*#{Mx[:lv_c]}/ \ -          and para !~ /(?:#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}|#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^#{Mx[:lv_o]}[1-6]:[a-z_\.]+#{Mx[:lv_c]}\s*[\d.]+)\s/ \ -          and para !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ -            if para =~/^#{Mx[:lv_o]}#{no1}:/ -              t_no1+=1; t_no2=0; t_no3=0 -              title_no="#{t_no1}" -              if not @md.seg_names.nil? \ -              and not @md.seg_names.include?(title_no) -                para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*(\S+)#/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]} \\1 #{title_no} ") #shift placement of auto-number to after first word, e.g. Article # not # Article, added on occasion of ABF (20040329) -                para.gsub!(/^#{no1}\{\s+(Article|Clause|Section)\s+#/i,%{#{no1}~#{title_no} \\1 #{title_no}. }) -                unless para =~/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review -                  para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]}#{title_no}. ") -                end -                @md.seg_names << title_no -              #else puts "warning segment name #{title_no} already exists" -              end -              unless para =~/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required -                para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i, -                  %{#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}#{title_no}. \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) -              end -              para.gsub!(/^#{Mx[:lv_o]}#{no1}:##{Mx[:lv_c]}/,"#{title_no}. ") #watch -              para.gsub!(/^#{no1}~#\s*/,"#{title_no}. ") -            end -            if para =~/^#{Mx[:lv_o]}#{no2}:\S*?#{Mx[:lv_c]}/ -              t_no2+=1; t_no3=0 -              title_no="#{t_no1}.#{t_no2}" -              para=number_sub_heading(para,no2,title_no) -            end -            if para =~/^#{Mx[:lv_o]}#{no3}:\S*?#{Mx[:lv_c]}/ -              t_no3+=1 -              title_no="#{t_no1}.#{t_no2}.#{t_no3}" -              para=number_sub_heading(para,no3,title_no) -            end -          elsif para =~/^#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}/ # endnotes, watch2005 -            para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}") #"#{no1}~\\1 ") -            para.gsub!(/^#{Mx[:lv_o]}#{no2}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no2}:\\1#{Mx[:lv_c]}") -            para.gsub!(/^#{Mx[:lv_o]}#{no3}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no3}:\\1#{Mx[:lv_c]}") -          end -        elsif @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4 -          if para =~/^#{Mx[:lv_o]}[1-9]:#{Mx[:lv_c]}([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d -            name_num=$1 -            para.gsub!(/^#{Mx[:lv_o]}([1-9]:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{name_num}#{Mx[:lv_c]}") -          end -          if @md.toc_lev_limit -          end -        end -        @tuned_file << para -      end -      @tuned_file=@tuned_file.flatten -    end -    def ocn(data)                                                                      #and auto segment numbering increment -      @tuned_file=[] -      object_array=SiSU_document_structure::OCN.new(@md,data).ocn -      object_array.each do |o| -        @tuned_file <<= if o.ocn; "#{o.txt} #{Mx[:id_o]}~#{o.ocn};#{o.lv};#{o.type}#{Mx[:id_c]}" #main ocn descriptor -        else o.txt -        end -      end -      @tuned_file=@tuned_file.flatten -    end -    def minor_numbering(data)                                                          #and auto segment numbering increment -      @tuned_file=[] -      number_small,letter_small=0,0 -      letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z ) -      data.each do |para| -        if para =~/\w|\S|<|\(/ -          if para !~/^%% |#{Rx[:meta]}|^0~|^#{Mx[:lv_o]}endnotes:|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}:p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|#{Mx[:gr_o]}(?:alt|code|group|poem|table)#{Mx[:gr_c]}|^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$|<table|<\/table>|<td|<\/td>|<th|<\/th>|<tr>|<\/tr>|<hr width|<:4-endnotes>|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|#{Mx[:br_endnotes]}/i #ocn here #  added with Tune.code #¡ -            if para=~/^#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/; number_small,letter_small=0,0                    #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later) -            end -            if para =~/^#[ 1]/ -              letter_small=0 -              number_small=0 if para =~ /^#1/ -              number_small+=1 -              para.gsub!(/^#[ 1]/,"#{number_small}. ") #change 2004 -            end -            if para =~/^_# / -              para.gsub!(/^_# /,"#{Mx[:fa_o]}:i1#{Mx[:fa_c]} #{letter[letter_small]}. ") #change 2004 -              letter_small+=1 -            end -          end -        end -        @tuned_file << para -      end -      @tuned_file=@tuned_file.flatten -    end -    def name_para_seg_filename(data) -      # paragraph name/numbering rules -      # manual naming overrides, manual naming may be -      #   alpha-numeric characters mixed, -      #   numeric only (a number), if -      #     all segments have been named, -      #     the numbers used are over 1000 or -      #     it is  not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented) -      #       [for now a warning is printed for such documents on use of maintenance or very-verbose flag] -      # auto-naming takes the form of giving numbers to segments -      # the rules for which are as follows -      #   if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.) -      #   otherwise the level 4 segment number from the embedded document structure info is used -      #   if there is none a sequential number is designated, preceded by an underscore -      @tuned_file=[] -      art_filename_auto=1 -      @counter=1 -      @unique_auto_name=[] -      if not @md.seg_autoname_safe and @md.cmd =~/[MV]/ -        puts 'manual segment names, numbers used as names, risk warning (segmented html)' -      end -      data.each do |para| -        para=SiSU_document_structure::Structure.new(@md,para).structure_markup -        if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}/ -          if para=~/^#{Mx[:lv_o]}[4]:#{Mx[:lv_c]}/ \ -          and not @md.set_heading_seg -            @md.set_heading_seg=true -          end -          if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}(?:\s*\S+)?\s+([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name -            pattern=$1 -            pattern.gsub!(/(?:[:,-]|\W)/,'.') -            pattern.gsub!(/\.$/,'') -            if not @md.seg_names.nil? \ -            and not @md.seg_names.include?(pattern) -              para.gsub!(/^#{Mx[:lv_o]}([456]):#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1:#{pattern}#{Mx[:lv_c]}") -              @md.seg_names << pattern -            else puts 'warn, there may be a conflicting numbering scheme' if @md.cmd =~/[VM]/ -            end -          end -          if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}.+?;4:(\d+);/m #extract segment name from embedded document structure info -            pattern=$1 -            pattern.gsub!(/(?:[:,-]|\W)/,'.') -            pattern.gsub!(/\.$/,'') -            if not @md.seg_names.nil? \ -            and not @md.seg_names.include?(pattern) -              para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{pattern}#{Mx[:lv_c]}") -              @md.seg_names << pattern -            else -              para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1~#{pattern}#{Mx[:lv_c]}") -              @md.seg_names << "~#{pattern}" -            end -          end -          if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}/ #if still not segment name, provide a numerical one -            if not @md.seg_names.nil? \ -            and not @md.seg_names.include?(art_filename_auto) -              para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,%{#{Mx[:lv_o]}\\1~#{art_filename_auto}#{Mx[:lv_c]}}) -              @md.seg_names << art_filename_auto -            else puts 'segment name (numbering) error' -            end -            art_filename_auto+=1 -          end -        end -        @tuned_file << if para =~/^#{Mx[:lv_o]}([1-6]):\S*?#{Mx[:lv_c]}/m \ -        and (@md.pagenew or @md.pagebreak) -          m=$1 #watch ref~ -          para_tmp=[] -          if @md.pagenew.inspect =~/#{m}/;           para_tmp << "#{Mx[:br_page_new]}\n" << para -          elsif @md.pagebreak.inspect =~/#{m}/;         para_tmp << "#{Mx[:br_page]}\n" << para -          end -          para_result=unless para_tmp.length > 0; para -          else                       para_tmp -          end -        else                         para -        end -      end -      if @md.seg_names.length > 0 -        @md.set_heading_seg=true -      end -      @tuned_file=@tuned_file.flatten -    end -    def set_heading_top(data)                                                         #% make sure no false positives -      unless @md.set_heading_top -        puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.cmd =~/[MV]/ -        @tuned_file=[] -        data.each do |para| -          unless @md.set_heading_top -            if para !~/^(?:#{Rx[:meta]}|@\S+:|0~\S+)\s/m \ -            and para !~/\A\s*\Z/m -              @md.set_heading_top=true -              head=if @md.title ; "#{Mx[:lv_o]}1:#{Mx[:lv_c]} #{@md.title}" -              else                "#{Mx[:lv_o]}1:#{Mx[:lv_c]} [no title provided]" -              end -              @tuned_file << head -            end -          end -          @tuned_file << para -        end -        @tuned_file=@tuned_file.flatten -      end -    end -    def set_heading_seg(data)                                                          #% make sure no false positives -      unless @md.set_heading_seg -        puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.cmd =~/[MV]/ -        @tuned_file=[] -        data.each do |para| -          unless @md.set_heading_seg -            if para !~/^(?:#{Rx[:meta]}|#{Mx[:lv_o]}[123]:\S*?#{Mx[:lv_c]})/m \ -            and para !~/\A\s*\Z/m \ -            and para !~/#{Mx[:br_page]}|#{Mx[:br_page_new]}/ -              @md.set_heading_seg=true -              head=if @md.title ; "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [#{@md.title}]" -              else                "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [segment]" -              end -              @tuned_file << head -            end -          end -          @tuned_file << para -        end -        @tuned_file=@tuned_file.flatten -      end -    end -    def set_header_title(data)                                                         #% make sure no false positives -      unless @md.set_header_title -        puts "\t no document title provided, (will have to manufacture one)" if @md.cmd =~/[MV]/ -        @tuned_file=[] -        data.each do |para| -          unless @md.set_header_title -            if para !~/^%{1,2}\s/m \ -            and para !~/\A\s*\Z/m -              @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}" -              @md.title=@md.heading_seg_first -              @md.set_header_title=true -            end -          end -          @tuned_file << para -        end -        @tuned_file=@tuned_file.flatten -      end -    end      def endnotes(data)        @tuned_file=[]        endnote_no,endnote_ref=1,1 @@ -1058,7 +735,7 @@ module SiSU_DAL            para.gsub!(/\s*(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m,' \1') #watch            para_plus_en=para.scan(/.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m)            para_tail=if para =~/(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+([\s\S]+)/m -            /(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+(.+?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]})/m.match(para)[1] +            /(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+(.*?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]})/m.match(para)[1]            else ''            end            para_plus_en << para_tail diff --git a/lib/sisu/v0/dal_idx.rb b/lib/sisu/v0/dal_idx.rb new file mode 100644 index 00000000..5e07396a --- /dev/null +++ b/lib/sisu/v0/dal_idx.rb @@ -0,0 +1,201 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search +   #___# + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +   2007, 2008 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + +   SiSU, a framework for document structuring, publishing and search + +   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +   2007 Ralph Amissah + +   This program is free software: you can redistribute it and/or modify it +   under the terms of the GNU General Public License as published by the Free +   Software Foundation, either version 3 of the License, or (at your option) +   any later version. + +   This program is distributed in the hope that it will be useful, but WITHOUT +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +   more details. + +   You should have received a copy of the GNU General Public License along with +   this program. If not, see <http://www.gnu.org/licenses/>. + +   If you have Internet connection, the latest version of the GPL should be +   available at these locations: +   <http://www.fsf.org/licensing/licenses/gpl.html> +   <http://www.gnu.org/copyleft/gpl.html> + +   <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> +   <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> +   <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> + + * SiSU uses: +   * Standard SiSU markup syntax, +   * Standard SiSU meta-markup syntax, and the +   * Standard SiSU object citation numbering and system + + * Hompages: +   <http://www.jus.uio.no/sisu> +   <http://www.sisudoc.org> + + * Download: +   <http://www.jus.uio.no/sisu/SiSU/download.html> + + * Ralph Amissah +   <ralph@amissah.com> +   <ralph.amissah@gmail.com> + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_book_index +  class Book_index +    def initialize(data) +      @data=data +    end +    def indexing_song +      @rgx_idx=/#{Mx[:idx_o]}(?:.+?)#{Mx[:idx_c]}\s*/ +      #@rgx_idx=/\s*#{Mx[:idx_o]}(?:.+?)#{Mx[:idx_c]}\s*/ +      @rgx_idx_ocn_init=/#{Mx[:idx_o]}(.+?)#{Mx[:idx_c]}\s*#{Mx[:id_o]}~(\d+)\S+?#{Mx[:id_c]}/ +      @rgx_idx_ocn=/(.+?)~(\d+)/ +      @data=extract_book_index(@data) +      @data=clean_index(@data) +      @data +    end +    def extract_book_index(data) +      tuned_file=[] +      idx_array=[] +      data.each do |para| +        idx_array << @rgx_idx_ocn_init.match(para)[0].gsub(@rgx_idx_ocn_init,'\1~\2') if para =~ @rgx_idx_ocn_init +        tuned_file << para if para +      end +      idx_array.each do |i| +        i.gsub!(@rgx_idx_ocn_init,'\1~\2') +      end +      idx_array=construct_idx_array(idx_array) if idx_array.length > 0 +      if idx_array.length > 0 +        the_idx=construct_book_index(idx_array) +        screen_print(the_idx) +      end +      tuned_file +    end +    def construct_idx_array(idx_array) +      idx_lst=[] +      idx_array.each do |idx| +        idx_list,ocn=@rgx_idx_ocn.match(idx)[1,2] +        idx_lst <<=if idx_list =~/;/ +          g=idx_list.scan(/[^;]+/) +          idxl=[] +          g.each do |i| +            idxl << { :rough_idx => i, :ocn => ocn } +          end +          idxl +        else { :rough_idx => idx_list, :ocn => ocn } +        end +      end +      idx_lst.flatten! +      idx_lst +    end +    def construct_book_index(idx_array) +      @the_idx={} +      idx_array.each do |idx| +        idx_lst=idx[:rough_idx].scan(/[^|:]+/) +        if idx_lst[0] =~/.+?\+\d+/ +          use,plus=/(.+?)\+(\d+)/.match(idx_lst[0])[1,2] +        else use=idx_lst[0] +        end +        @the_idx[use]={} unless @the_idx[use] and defined? @the_idx[use] +        idx_lst.each do |i| +          i.strip! +          i,r=/(.+?)\+(\d+)/.match(i)[1,2] if i =~/.+?\+\d+/ +          x=if idx_lst.length == 1 or idx_lst[0] == i +            @the_idx[use]['a1']=[] unless @the_idx[use]['a1'] and defined? @the_idx[use]['a1'] +            x=if r +              @the_idx[use]['a1'] << { :ocn => idx[:ocn], :range => "#{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}" } +              "#{i} #{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}" +            else +              @the_idx[use]['a1'] << { :ocn => idx[:ocn] } +              "#{i} #{idx[:ocn]}" +            end +          else +            @the_idx[use]['b1']={} unless @the_idx[use]['b1'] and defined? @the_idx[use]['b1'] +            @the_idx[use]['b1'][i]=[] unless @the_idx[use]['b1'][i] and defined? @the_idx[use]['b1'][i] +            x=if r +              @the_idx[use]['b1'][i] << { :ocn => idx[:ocn], :range => "#{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}" } +              "#{idx_lst[0]}:#{i} #{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}" +            else +              @the_idx[use]['b1'][i] << { :ocn => idx[:ocn] } +              "#{idx_lst[0]}:#{i} #{idx[:ocn]}" +            end +          end +        end +      end +      the_idx=@the_idx.sort +      #p the_idx; p '-----' +      the_idx +    end +    def screen_print(the_idx) +      the_idx.each do |i| +        i.each do |x| +          if x.class == String +            print "\n" + x + ', ' +          elsif x.class == Array +            p 'array error? -->' +            print x +          elsif x.class == Hash +            if x['a1'].class == Array +              x['a1'].each do |a| +                if a[:range] +                  print a[:range] + ', ' +                elsif a[:ocn] +                  print a[:ocn] + ', ' +                else p 'error' +                end +              end +            end +            if x['b1'] +              m=x['b1'] +              m=m.sort +              m.each do |k,y| +                if k !~/a1/ +                  print "\n\t" + k + ', ' +                  #p y +                  y.each do |z| +                    if z[:range] +                      print z[:range] + ', ' +                    elsif z[:ocn] +                      print z[:ocn] + ', ' +                    else p 'error' +                    end +                  end +                end +              end +            end +          end +        end +      end +    end +    def clean_index(data) +      tuned_file=[] +      data.each do |para| +        para.gsub!(@rgx_idx,'') +        tuned_file << para +      end +      tuned_file +    end +  end +end + +__END__ + diff --git a/lib/sisu/v0/dal_numbering.rb b/lib/sisu/v0/dal_numbering.rb new file mode 100644 index 00000000..e14b87d7 --- /dev/null +++ b/lib/sisu/v0/dal_numbering.rb @@ -0,0 +1,374 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search +   #___# + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +   2007, 2008 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + +   SiSU, a framework for document structuring, publishing and search + +   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +   2007 Ralph Amissah + +   This program is free software: you can redistribute it and/or modify it +   under the terms of the GNU General Public License as published by the Free +   Software Foundation, either version 3 of the License, or (at your option) +   any later version. + +   This program is distributed in the hope that it will be useful, but WITHOUT +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +   more details. + +   You should have received a copy of the GNU General Public License along with +   this program. If not, see <http://www.gnu.org/licenses/>. + +   If you have Internet connection, the latest version of the GPL should be +   available at these locations: +   <http://www.fsf.org/licensing/licenses/gpl.html> +   <http://www.gnu.org/copyleft/gpl.html> + +   <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> +   <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> +   <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> + + * SiSU uses: +   * Standard SiSU markup syntax, +   * Standard SiSU meta-markup syntax, and the +   * Standard SiSU object citation numbering and system + + * Hompages: +   <http://www.jus.uio.no/sisu> +   <http://www.sisudoc.org> + + * Download: +   <http://www.jus.uio.no/sisu/SiSU/download.html> + + * Ralph Amissah +   <ralph@amissah.com> +   <ralph.amissah@gmail.com> + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_numbering +  class Numbering +    def initialize(md,data) +      @md,@data=md,data +    end +    def numbering_song +      data=@data +      data=number_plaintext_para(data) +      #data=name_endnote_seg(data) #tr issue +      data=auto_number_heading_ie_title(data) #tr issue +      data=ocn(data) #watch +      data=minor_numbering(data) +      data=name_para_seg_filename(data) +      data=set_heading_seg(data) unless @md.set_heading_seg +      data=set_heading_top(data) unless @md.set_heading_top +      data=set_header_title(data) unless @md.set_header_title +      data +    end +    def number_plaintext_para(data) +      @tuned_file=[] +      data.each do |para| +        if para !~/#{Mx[:gr_o]}(?:code|group|alt|poem|verse)#{Mx[:gr_c]}|#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ +          para.gsub!(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks +        end +        para.gsub!(/^/,"\n") unless para =~/#{Mx[:tc_p]}/u +        para.gsub!(/^\s+|\s$/,"\n") +        @tuned_file << para +      end +      @tuned_file=@tuned_file.flatten +    end +    def name_endnote_seg(data) +      @tuned_file=[] +      if @md.flag_auto_endnotes \ +      and @md.flag_separate_endnotes_make +        @tuned_file << "\n#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}Endnotes #{Mx[:gl_o]}-##{Mx[:gl_c]} #{Mx[:id_o]}~0;0:0;u0#{Mx[:id_c]}" +      end +      @tuned_file << "\n#{Mx[:br_endnotes]}" #DECIDE ON +      @tuned_file=@tuned_file.flatten +    end +    def owner_details_seg +      data << "#{Mx[:lv_o]}4:owner.details#{Mx[:lv_c]}Owner Details" +    end +    def number_sub_heading(para,num,title_no) +      case para +      when /#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/;  para.gsub!(/#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/,"#{title_no} ") +      when /^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/; para.gsub!(/^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/,"#{title_no} ") +      when /^#{Mx[:lv_o]}#{num}:[a-z_\.]+#{Mx[:lv_c]}/ +        para.gsub!(/^#{Mx[:lv_o]}#{num}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,%{#{Mx[:lv_o]}#{num}:\\1#{Mx[:lv_c]} #{title_no} \\2  #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) +      when /^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}\s*#{title_no}/ +        para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]}")                #where title contains title number +      else para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]} #{title_no} ") #main, where title number is to be provided +      end +      if @md.toc_lev_limit \ +      and @md.toc_lev_limit < num +        para.gsub!(/^#{Mx[:lv_o]}[5-9]:\S*?#{Mx[:lv_c]}/,'!_ ') #bold line, watch +      end +      para +    end +    def auto_number_heading_ie_title(data)                                             #also does some segment naming +      @tuned_file=[] +      if @md.markup =~/num_top/ \ +      or @md.num_top # watch, 2003w23 +        input="#{@md.markup}"[/num_top\=([1-6])/,1] if @md.markup +        input||=@md.num_top if @md.num_top !~/^$/ +      end +      num_top=input.to_i +      t_no1=t_no2=t_no3=t_no4=0 +      no1=num_top; no2=(num_top + 1); no3=(num_top + 2);  no4=(num_top + 3) +      t_not=0 +      data.each do |para| #@md.seg_names << [additions to segment names] +        if (@md.markup =~/num_top/ \ +        or (@md.num_top \ +        and @md.num_top !~/^$/)) \ +        and para !~/^#{Rx[:meta]}/ +          if (para =~/^(?:#{no1}|^#{no2}|^#{no3}#{no4})~#/ \ +          and para !~/^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}/) +            t_not+=1 #; t_no2=0; t_no3=0 +            para.gsub!(/^(#{Mx[:lv_o]}#{no1}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") +            para.gsub!(/^(#{Mx[:lv_o]}#{no2}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") +            para.gsub!(/^(#{Mx[:lv_o]}#{no3}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") +            para.gsub!(/^(#{Mx[:lv_o]}#{no4}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") +          end +          if para =~/#{Mx[:lv_o]}#{no1}:/ +            @subnumber=1 +            @subnumber=0 if para =~/#{Mx[:lv_o]}#{no1}:/ +          end +          if para =~/^#{Mx[:lv_o]}[1-6]:[\w-]*#{Mx[:lv_c]}/ \ +          and para !~ /(?:#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}|#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^#{Mx[:lv_o]}[1-6]:[a-z_\.]+#{Mx[:lv_c]}\s*[\d.]+)\s/ \ +          and para !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ +            if para =~/^#{Mx[:lv_o]}#{no1}:/ +              t_no1+=1; t_no2=0; t_no3=0 +              title_no="#{t_no1}" +              if not @md.seg_names.nil? \ +              and not @md.seg_names.include?(title_no) +                para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*(\S+)#/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]} \\1 #{title_no} ") #shift placement of auto-number to after first word, e.g. Article # not # Article, added on occasion of ABF (20040329) +                para.gsub!(/^#{no1}\{\s+(Article|Clause|Section)\s+#/i,%{#{no1}~#{title_no} \\1 #{title_no}. }) +                unless para =~/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review +                  para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]}#{title_no}. ") +                end +                @md.seg_names << title_no +              #else puts "warning segment name #{title_no} already exists" +              end +              unless para =~/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required +                para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i, +                  %{#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}#{title_no}. \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) +              end +              para.gsub!(/^#{Mx[:lv_o]}#{no1}:##{Mx[:lv_c]}/,"#{title_no}. ") #watch +              para.gsub!(/^#{no1}~#\s*/,"#{title_no}. ") +            end +            if para =~/^#{Mx[:lv_o]}#{no2}:\S*?#{Mx[:lv_c]}/ +              t_no2+=1; t_no3=0 +              title_no="#{t_no1}.#{t_no2}" +              para=number_sub_heading(para,no2,title_no) +            end +            if para =~/^#{Mx[:lv_o]}#{no3}:\S*?#{Mx[:lv_c]}/ +              t_no3+=1 +              title_no="#{t_no1}.#{t_no2}.#{t_no3}" +              para=number_sub_heading(para,no3,title_no) +            end +          elsif para =~/^#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}/ # endnotes, watch2005 +            para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}") #"#{no1}~\\1 ") +            para.gsub!(/^#{Mx[:lv_o]}#{no2}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no2}:\\1#{Mx[:lv_c]}") +            para.gsub!(/^#{Mx[:lv_o]}#{no3}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no3}:\\1#{Mx[:lv_c]}") +          end +        elsif @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4 +          if para =~/^#{Mx[:lv_o]}[1-9]:#{Mx[:lv_c]}([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d +            name_num=$1 +            para.gsub!(/^#{Mx[:lv_o]}([1-9]:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{name_num}#{Mx[:lv_c]}") +          end +          if @md.toc_lev_limit +          end +        end +        @tuned_file << para +      end +      @tuned_file=@tuned_file.flatten +    end +    def ocn(data)                                                                      #and auto segment numbering increment +      @tuned_file=[] +      object_array=SiSU_document_structure::OCN.new(@md,data).ocn +      object_array.each do |o| +        @tuned_file <<= if o.ocn; "#{o.txt} #{Mx[:id_o]}~#{o.ocn};#{o.lv};#{o.type}#{Mx[:id_c]}" #main ocn descriptor +        else o.txt +        end +      end +      @tuned_file=@tuned_file.flatten +    end +    def minor_numbering(data)                                                          #and auto segment numbering increment +      @tuned_file=[] +      number_small,letter_small=0,0 +      letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z ) +      data.each do |para| +        if para =~/\w|\S|<|\(/ +          if para !~/^%% |#{Rx[:meta]}|^0~|^#{Mx[:lv_o]}endnotes:|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}:p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|#{Mx[:gr_o]}(?:alt|code|group|poem|table)#{Mx[:gr_c]}|^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$|<table|<\/table>|<td|<\/td>|<th|<\/th>|<tr>|<\/tr>|<hr width|<:4-endnotes>|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|#{Mx[:br_endnotes]}/i #ocn here #  added with Tune.code #¡ +            if para=~/^#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/; number_small,letter_small=0,0                    #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later) +            end +            if para =~/^#[ 1]/ +              letter_small=0 +              number_small=0 if para =~ /^#1/ +              number_small+=1 +              para.gsub!(/^#[ 1]/,"#{number_small}. ") #change 2004 +            end +            if para =~/^_# / +              para.gsub!(/^_# /,"#{Mx[:fa_o]}:i1#{Mx[:fa_c]} #{letter[letter_small]}. ") #change 2004 +              letter_small+=1 +            end +          end +        end +        @tuned_file << para +      end +      @tuned_file=@tuned_file.flatten +    end +    def name_para_seg_filename(data) +      # paragraph name/numbering rules +      # manual naming overrides, manual naming may be +      #   alpha-numeric characters mixed, +      #   numeric only (a number), if +      #     all segments have been named, +      #     the numbers used are over 1000 or +      #     it is  not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented) +      #       [for now a warning is printed for such documents on use of maintenance or very-verbose flag] +      # auto-naming takes the form of giving numbers to segments +      # the rules for which are as follows +      #   if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.) +      #   otherwise the level 4 segment number from the embedded document structure info is used +      #   if there is none a sequential number is designated, preceded by an underscore +      @tuned_file=[] +      art_filename_auto=1 +      @counter=1 +      @unique_auto_name=[] +      if not @md.seg_autoname_safe and @md.cmd =~/[MV]/ +        puts 'manual segment names, numbers used as names, risk warning (segmented html)' +      end +      data.each do |para| +        para=SiSU_document_structure::Structure.new(@md,para).structure_markup +        if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}/ +          if para=~/^#{Mx[:lv_o]}[4]:#{Mx[:lv_c]}/ \ +          and not @md.set_heading_seg +            @md.set_heading_seg=true +          end +          if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}(?:\s*\S+)?\s+([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name +            pattern=$1 +            pattern.gsub!(/(?:[:,-]|\W)/,'.') +            pattern.gsub!(/\.$/,'') +            if not @md.seg_names.nil? \ +            and not @md.seg_names.include?(pattern) +              para.gsub!(/^#{Mx[:lv_o]}([456]):#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1:#{pattern}#{Mx[:lv_c]}") +              @md.seg_names << pattern +            else puts 'warn, there may be a conflicting numbering scheme' if @md.cmd =~/[VM]/ +            end +          end +          if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}.+?;4:(\d+);/m #extract segment name from embedded document structure info +            pattern=$1 +            pattern.gsub!(/(?:[:,-]|\W)/,'.') +            pattern.gsub!(/\.$/,'') +            if not @md.seg_names.nil? \ +            and not @md.seg_names.include?(pattern) +              para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{pattern}#{Mx[:lv_c]}") +              @md.seg_names << pattern +            else +              para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1~#{pattern}#{Mx[:lv_c]}") +              @md.seg_names << "~#{pattern}" +            end +          end +          if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}/ #if still not segment name, provide a numerical one +            if not @md.seg_names.nil? \ +            and not @md.seg_names.include?(art_filename_auto) +              para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,%{#{Mx[:lv_o]}\\1~#{art_filename_auto}#{Mx[:lv_c]}}) +              @md.seg_names << art_filename_auto +            else puts 'segment name (numbering) error' +            end +            art_filename_auto+=1 +          end +        end +        @tuned_file << if para =~/^#{Mx[:lv_o]}([1-6]):\S*?#{Mx[:lv_c]}/m \ +        and (@md.pagenew or @md.pagebreak) +          m=$1 #watch ref~ +          para_tmp=[] +          if @md.pagenew.inspect =~/#{m}/;           para_tmp << "#{Mx[:br_page_new]}\n" << para +          elsif @md.pagebreak.inspect =~/#{m}/;         para_tmp << "#{Mx[:br_page]}\n" << para +          end +          para_result=unless para_tmp.length > 0; para +          else                       para_tmp +          end +        else                         para +        end +      end +      if @md.seg_names.length > 0 +        @md.set_heading_seg=true +      end +      @tuned_file=@tuned_file.flatten +    end +    def set_heading_top(data)                                                         #% make sure no false positives +      unless @md.set_heading_top +        puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.cmd =~/[MV]/ +        @tuned_file=[] +        data.each do |para| +          unless @md.set_heading_top +            if para !~/^(?:#{Rx[:meta]}|@\S+:|0~\S+)\s/m \ +            and para !~/\A\s*\Z/m +              @md.set_heading_top=true +              head=if @md.title ; "#{Mx[:lv_o]}1:#{Mx[:lv_c]} #{@md.title}" +              else                "#{Mx[:lv_o]}1:#{Mx[:lv_c]} [no title provided]" +              end +              @tuned_file << head +            end +          end +          @tuned_file << para +        end +        @tuned_file=@tuned_file.flatten +      end +    end +    def set_heading_seg(data)                                                          #% make sure no false positives +      unless @md.set_heading_seg +        puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.cmd =~/[MV]/ +        @tuned_file=[] +        data.each do |para| +          unless @md.set_heading_seg +            if para !~/^(?:#{Rx[:meta]}|#{Mx[:lv_o]}[123]:\S*?#{Mx[:lv_c]})/m \ +            and para !~/\A\s*\Z/m \ +            and para !~/#{Mx[:br_page]}|#{Mx[:br_page_new]}/ +              @md.set_heading_seg=true +              head=if @md.title ; "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [#{@md.title}]" +              else                "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [segment]" +              end +              @tuned_file << head +            end +          end +          @tuned_file << para +        end +        @tuned_file=@tuned_file.flatten +      end +    end +    def set_header_title(data)                                                         #% make sure no false positives +      unless @md.set_header_title +        puts "\t no document title provided, (will have to manufacture one)" if @md.cmd =~/[MV]/ +        @tuned_file=[] +        data.each do |para| +          unless @md.set_header_title +            if para !~/^%{1,2}\s/m \ +            and para !~/\A\s*\Z/m +              @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}" +              @md.title=@md.heading_seg_first +              @md.set_header_title=true +            end +          end +          @tuned_file << para +        end +        @tuned_file=@tuned_file.flatten +      end +    end +  end +end +__END__ diff --git a/lib/sisu/v0/dal_syntax.rb b/lib/sisu/v0/dal_syntax.rb index acdec0e4..80635f36 100644 --- a/lib/sisu/v0/dal_syntax.rb +++ b/lib/sisu/v0/dal_syntax.rb @@ -326,7 +326,7 @@ module SiSU_Syntax          line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>|\d+)\^(\S+?)\^/,"\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript single word, watch digit added          line.gsub!(/<[:e]\s+(.+?)!?>/,"#{Mx[:en_a_o]}\\1#{Mx[:en_a_c]}")                                                                                     #not tested          line.gsub!(/^\s*_\*\s*/,"#{Mx[:gl_bullet]}")                               #bullets, shortcut -        #line.gsub!(/^\s*_(\*+)\s*/,"#{Mx[:gl_bullet]}")                               #bullets, shortcut +        line.gsub!(/=\{(.+?)\}/,"#{Mx[:idx_o]}\\1#{Mx[:idx_c]}")                   #          line.gsub!(/^\s*_([1-9])\*\s*/,"#{Mx[:pa_o]}:i\\1#{Mx[:pa_c]}#{Mx[:gl_bullet]}")                               #bullets, shortcut          #line.gsub!(/^\s*_([1-9])(\*+)\s*/,"#{Mx[:fa_o]}:i\\1#{Mx[:fa_c]}#{Mx[:fa_o]}\\2#{Mx[:fa_c_o]}")                               #bullets, shortcut          line.gsub!(/^\s*_([1-9])\s+/,"#{Mx[:pa_o]}:i\\1#{Mx[:pa_c]}")                                                                 #indent diff --git a/lib/sisu/v0/param.rb b/lib/sisu/v0/param.rb index b211f5c1..7994487d 100644 --- a/lib/sisu/v0/param.rb +++ b/lib/sisu/v0/param.rb @@ -119,13 +119,13 @@ module SiSU_Param        @doc={ :lv=>[] }        @doc[:fns],@doc[:fnb],@doc[:scr_suffix]='','',''        @@publisher='SiSU scribe' -      attr_accessor :cmd,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:sfx,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:dc_title,:html_title,:subtitle,:subtitle_tex,:creator_home,:dc_creator,:translator,:illustrator,:prepared_by,:digitized_by,:dc_subject,:dc_description,:dc_publisher,:dc_contributor,:dc_date,:dc_date_created,:dc_date_issued,:dc_date_available,:dc_date_valid,:dc_date_modified,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:dc_type,:dc_format,:dc_identifier,:dc_source,:dc_language,:language_original,:dc_relation,:dc_coverage,:dc_rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:flag_auto_heading_num,:markup,:markup_instruction,:markup_version,:markup_declared,:make_bold,:make_italic,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:creator_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:man_section,:man_name,:man_synopsis,:ec,:opt,:sem_tag +      attr_accessor :cmd,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:sfx,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:dc_title,:html_title,:subtitle,:subtitle_tex,:creator_home,:dc_creator,:translator,:illustrator,:prepared_by,:digitized_by,:dc_subject,:dc_description,:dc_publisher,:dc_contributor,:dc_date,:dc_date_created,:dc_date_issued,:dc_date_available,:dc_date_valid,:dc_date_modified,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:dc_type,:dc_format,:dc_identifier,:dc_source,:dc_language,:language_original,:dc_relation,:dc_coverage,:dc_rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:flag_auto_heading_num,:markup,:markup_instruction,:markup_version,:markup_declared,:make_bold,:make_italic,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:creator_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:man_section,:man_name,:man_synopsis,:ec,:opt,:sem_tag,:book_index        def initialize(fns_array,opt)          @env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@sfx=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@dc_title=@html_title=@subtitle=@subtitle_tex=@creator_home=@dc_creator=@translator=@illustrator=@prepared_by=@digitized_by=@dc_subject=@dc_description=@dc_publisher=@dc_contributor=@dc_date=@dc_date_created=@dc_date_issued=@dc_date_available=@dc_date_valid=@dc_date_modified=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@dc_type=@dc_format=@dc_identifier=@dc_source=@dc_language=@language_original=@dc_relation=@dc_coverage=@dc_rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_auto_heading_num=@make_bold=@make_italic=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@file_size=@firstseg=@programs=@creator_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@man_synopsis=nil          @man_section=1          @man_name='man page "name/whatis" information not provided, set in header @man: name=[whatis information]'          @data,@fns,@cmd,@mod,@opt=fns_array,opt.fns,opt.cmd,opt.mod,opt #@data used as data -        @flag_tables,@set_header_title,@set_heading_top,@set_heading_seg,@heading_seg_first_flag,@flag_promo=false,false,false,false,false,false +        @flag_tables,@set_header_title,@set_heading_top,@set_heading_seg,@heading_seg_first_flag,@flag_promo,@book_index=false,false,false,false,false,false,false          @seg_autoname_safe=true          @sem_tag=false          @markup_instruction,@markup_declared,@image='','','' #check which other values should be set to empty rather than nil @@ -576,6 +576,9 @@ module SiSU_Param                  end                end              end +            if not @book_index and para =~/^=\{(.+?)\}\s*$/  +              @book_index=true +            end              unless @code_flag                case para                when /~\{\s+.+?\}~/m                                                                    #% processing diff --git a/lib/sisu/v0/plaintext.rb b/lib/sisu/v0/plaintext.rb index 4dd808fc..fb4b1115 100644 --- a/lib/sisu/v0/plaintext.rb +++ b/lib/sisu/v0/plaintext.rb @@ -321,7 +321,7 @@ WOK            para.gsub!(/(^|#{Mx[:gl_c]}|\s)[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3')            para.gsub!(/<a href=".+?">(.+?)<\/a>/m,'\1')            para.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'')                                       # remove name links -          para.gsub!(/ /,' ')                                            # decide on +          para.gsub!(/ |#{Mx[:nbsp]}/,' ')                                            # decide on            para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/,'    [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]")            para.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]')            #para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') | 
