diff options
| author | Ralph Amissah <ralph@amissah.com> | 2014-01-26 02:22:02 -0500 | 
|---|---|---|
| committer | Ralph Amissah <ralph@amissah.com> | 2014-01-26 02:31:54 -0500 | 
| commit | 506e32633838b4daf9ab566c9da083329212f219 (patch) | |
| tree | ef48a6985ce663aa3d4d62037f232b2286422979 /lib/sisu/v6/ao_numbering.rb | |
| parent | v5 v6: version & changelog (& rakefile), make true on next commit (diff) | |
v5 v6: made true, branches: v6 development; v5 stable; v4 closedsisu_5.3.0
Diffstat (limited to 'lib/sisu/v6/ao_numbering.rb')
| -rw-r--r-- | lib/sisu/v6/ao_numbering.rb | 487 | 
1 files changed, 487 insertions, 0 deletions
| diff --git a/lib/sisu/v6/ao_numbering.rb b/lib/sisu/v6/ao_numbering.rb new file mode 100644 index 00000000..23d9fd73 --- /dev/null +++ b/lib/sisu/v6/ao_numbering.rb @@ -0,0 +1,487 @@ +# encoding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +   2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah, +   All Rights Reserved. + + * License: GPL 3 or later: + +   SiSU, a framework for document structuring, publishing and search + +   Copyright (C) Ralph Amissah + +   This program is free software: you can redistribute it and/or modify it +   under the terms of the GNU General Public License as published by the Free +   Software Foundation, either version 3 of the License, or (at your option) +   any later version. + +   This program is distributed in the hope that it will be useful, but WITHOUT +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +   more details. + +   You should have received a copy of the GNU General Public License along with +   this program. If not, see <http://www.gnu.org/licenses/>. + +   If you have Internet connection, the latest version of the GPL should be +   available at these locations: +   <http://www.fsf.org/licensing/licenses/gpl.html> +   <http://www.gnu.org/licenses/gpl.html> + +   <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html> + + * SiSU uses: +   * Standard SiSU markup syntax, +   * Standard SiSU meta-markup syntax, and the +   * Standard SiSU object citation numbering and system + + * Hompages: +   <http://www.jus.uio.no/sisu> +   <http://www.sisudoc.org> + + * Download: +   <http://www.sisudoc.org/sisu/en/SiSU/download.html> + + * Git +   <http://sources.sisudoc.org/gitweb/?p=code/sisu.git;a=summary> +   <http://sources.sisudoc.org/?p=code/sisu.git;a=blob;f=lib/sisu/v6/ao_numbering.rb;hb=HEAD> + + * Ralph Amissah +   <ralph@amissah.com> +   <ralph.amissah@gmail.com> + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_AO_Numbering +  class Numbering +    attr_accessor :obj,:osp,:ocn,:lv,:name,:index,:comment +    def initialize(md,data) +      @md,@data=md,data +      @obj=@type=@ocn=@lv=@name=@index=@comment=nil +    end +    def numbering_song +      data=@data +      data=number_plaintext_para(data) +      data=auto_number_heading_ie_title(data.compact) #tr issue +      data=ocn(data.compact) #watch +      data=xml(data.compact) +      data=minor_numbering(data.compact) +      data,tags_map,ocn_html_seg_map=name_para_seg_filename(data) +      data=set_heading_top(data) unless @md.set_heading_top +      [data,tags_map,ocn_html_seg_map] +    end +    def number_plaintext_para(data) +      @tuned_file=[] +      data.each do |dob| +        if (dob.of !=:block \ +        && dob.of !=:comment \ +        && dob.of !=:layout) \ +        && dob.ocn_ #and dob.obj !~ /#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ #FIX +          dob.obj=dob.obj.gsub(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks +        end +        unless dob.obj.is_a?(Array) +          dob.obj=dob.obj.gsub(/^\s+/,''). +            gsub(/\s$/,"\n") +        end +        @tuned_file << dob +      end +      @tuned_file=@tuned_file.flatten +    end +    def number_sub_heading(dob,num,title_no) +      unless dob.obj =~/\d+\.|(?:chapter|article|section|clause)\s+\d+/i #name selection arbitrary, fix +        dob.obj=case dob.name +        when /-/          then dob.obj.gsub(/^/,"#{title_no} ") +        when /^#/         then dob.obj.gsub(/^/,"#{title_no} ") +        when /^[a-z_\.]+/ then dob.obj.gsub(/^/,"#{title_no} ") +        else +          dob.name=title_no if dob.name=~/^$/ #where title contains title number +          dob.obj.gsub(/^/,"#{title_no} ") if title_no =~/\d+/ #main, where title number is to be provided #watch changed placement +        end +        if @md.toc_lev_limit \ +        and @md.toc_lev_limit < num +          dob.obj=dob.obj.gsub(/^/,'!_ ') #bold line, watch +        end +      end +      dob +    end +    def heading_tag_clean(heading_tag) +      heading_tag=heading_tag.gsub(/[ ]+/,'_'). +        gsub(/["']/,''). +        gsub(/[\/]/,'-'). +        gsub(/#{Mx[:fa_bold_o]}|#{Mx[:fa_bold_c]}/,''). +        gsub(/#{Mx[:fa_italics_o]}|#{Mx[:fa_italics_c]}/,''). +        gsub(/#{Mx[:fa_underscore_o]}|#{Mx[:fa_underscore_c]}/,''). +        gsub(/#{Mx[:fa_cite_o]}|#{Mx[:fa_cite_c]}/,''). +        gsub(/#{Mx[:fa_insert_o]}|#{Mx[:fa_insert_c]}/,''). +        gsub(/#{Mx[:fa_strike_o]}|#{Mx[:fa_strike_c]}/,''). +        gsub(/#{Mx[:fa_superscript_o]}|#{Mx[:fa_superscript_c]}/,''). +        gsub(/#{Mx[:fa_subscript_o]}|#{Mx[:fa_subscript_c]}/,''). +        gsub(/#{Mx[:fa_hilite_o]}|#{Mx[:fa_hilite_c]}/,''). +        gsub(/#{Mx[:gl_bullet]}/,'') +    end +    def auto_number_heading_ie_title(data)                                             #also does some segment naming +      @tuned_file=[] +      if defined? @md.make.num_top \ +      and @md.make.num_top \ +      and @md.make.num_top !~/^$/ +        input||=@md.make.num_top +      end +      num_top=(input ? input.to_i : nil) +      t_no1=t_no2=t_no3=0 +      if num_top +        no1=num_top; no2=(num_top + 1); no3=(num_top + 2) +      end +      chapter_number_counter=0 +      data=data.compact +      data.each do |dob| #@md.seg_names << [additions to segment names] +        title_no=nil +        dob=SiSU_AO_DocumentStructureExtract::Structure.new(@md,dob).structure_markup #must happen earlier, node info etc. require +        if dob.is ==:heading \ +        && dob.autonum_ \ +        and defined? @md.make.num_top \ +        and @md.make.num_top !~/^$/ +          if  dob.lv=='1' \ +          and dob.obj =~/^#\s|\s#(?:\s|$)/ +            chapter_number_counter +=1 +            dob.obj=dob.obj.gsub(/^#\s/,"#{chapter_number_counter} "). +              gsub(/#([:,]?\s|[.]?$)/,"#{chapter_number_counter}\\1") +          end +          if dob.ln==no1 +            @subnumber=1 +            @subnumber=0 if dob.ln==no1 +          end +          if dob.ln.to_s =~/^[0-6]/ \ +          and not dob.toc_ \ +          and dob.obj !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ # <-- fix +            if dob.ln==no1 +              t_no1+=1; t_no2=0; t_no3=0 +              title_no="#{t_no1}" +              if @md.seg_names.is_a?(Array) \ +              and not @md.seg_names.include?(title_no) +                if dob.ln==no1 +                  dob.name="#{title_no}" if not dob.name +                  dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/  #check whether will work across file types with stop signs +                  tag=dob.obj.gsub(/(Article|Clause|Section|Chapter)\s+/,"\\1_#{title_no}").downcase +                  tag=heading_tag_clean(tag) +                  dob.tags=[tag,dob.tags].flatten if tag !~/^\d+$/ #check whether will work across file types with stop signs +                  dob.obj=(dob.obj =~/(Article|Clause|Section)\s+/) \ +                  ? (dob.obj.gsub(/(Article|Clause|Section)\s+/,"\\1 #{title_no} ")) +                  : (dob.obj.gsub(/^/,"#{title_no}. ")) #fix stop later +                end +                if dob.ln !=no1 \ +                and dob.obj =~/^[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review +                  dob.name ="#{title_no}" if not dob.name +                  dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs +                  dob.obj=dob.obj.gsub(/^/,"#{title_no}. ") +                end +                @md.seg_names << title_no +              end +              if dob.ln!=no1 \ +              and dob.name!~/^[a-z_\.]+$/ \ +              and dob.obj !~/[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required # not fixed, work on +                dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs +                dob.obj=dob.obj.gsub(/^/i,"#{title_no}. ") +              end +            end +            if dob.ln==no1         #watch because here you change dob.name +              dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs +            end +            if dob.ln==no2         #watch because here you change dob.name +              t_no2+=1; t_no3=0 +              title_no="#{t_no1}.#{t_no2}" +              dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs +              dob=number_sub_heading(dob,no2,title_no) +            end +            if dob.ln==no3         #watch because here you change dob.name +              t_no3+=1 +              title_no="#{t_no1}.#{t_no2}.#{t_no3}" +              dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs +              dob=number_sub_heading(dob,no3,title_no) +            end +          elsif dob.ln.to_s =~/^[0-6]/ \ +          and dob.name =~ /^[\w-]+-/ # endnotes, watch2005# endnotes, watch2005 +            dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/  #check whether will work across file types with stop signs +            dob.name.gsub(/^([a-z_\.]+)-$/,'\1') +          end +        elsif dob.is ==:heading \ +        and dob.autonum_ \ +        and @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4 +          #here lies a bug, as is nil when run from -Dv --update, FIX +          if (dob.name.nil? or dob.name.empty?) \ +          and dob.ln.to_s =~/^[0-9]/ \ +          and dob.obj =~ /^([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d +            dob.name=$1 +            dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs +          end +          if @md.toc_lev_limit +          end +        elsif defined? dob.name \ +        and  dob.name +          dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs +        end +        dob.tags=dob.tags.uniq if defined? dob.tags +        @tuned_file << dob +      end +      @tuned_file=@tuned_file.flatten +    end +    def ocn(data)                                                                      #and auto segment numbering increment +      @tuned_file=SiSU_AO_DocumentStructureExtract::OCN.new(@md,data).ocn +      @tuned_file +    end +    def xml(data) +      @tuned_file=SiSU_AO_DocumentStructureExtract::XML.new(@md,data).dom +      @tuned_file +    end +    def minor_numbering(data)                                                          #and auto segment numbering increment +      @tuned_file=[] +      number_small,letter_small=0,0 +      letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z ) +      data.each do |dob| +        if dob.of ==:heading \ +        || dob.of ==:heading_insert \ +        || dob.of ==:para \ +        || dob.of ==:block +          if dob.is ==:heading \ +          and dob.ln.to_s=~/^[0-9]/                                                    #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later) +            number_small,letter_small=0,0 +          elsif dob.is ==:para +            if dob.obj =~/^#[ 1]/ \ +            and dob.obj !~/^#\s+(?:~#)?$/ +              letter_small=0 +              number_small=0 if dob.obj =~ /^#1/ +              number_small+=1 +              dob.obj=dob.obj.gsub(/^#[ 1]/,"#{number_small}. ") +            end +            if dob.obj =~/^_# / +              dob.obj=dob.obj.gsub(/^_# /,"#{letter[letter_small]}. ") +              dob.indent='1' +              letter_small+=1 +            end +          end +        end +        @tuned_file << dob +      end +      @tuned_file=@tuned_file.flatten +    end +    def name_para_seg_filename(data)                                                   #segment naming, remaining +      # paragraph name/numbering rules +      # manual naming overrides, manual naming may be +      #   alpha-numeric characters mixed, +      #   numeric only (a number), if +      #     all segments have been named, +      #     the numbers used are over 1000 or +      #     it is  not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented) +      #       [for now a warning is printed for such documents on use of maintenance or very-verbose flag] +      # auto-naming takes the form of giving numbers to segments +      # the rules for which are as follows +      #   if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.) +      #   otherwise the level 4 segment number from the embedded document structure info is used +      #   if there is none a sequential number is designated, preceded by an underscore +      @tuned_file,@unique_auto_name=[],[] +      tags={} +      art_filename_auto=1 +      @counter=1 +      if not @md.seg_autoname_safe \ +      and (@md.opt.act[:verbose_plus][:set]==:on \ +      || @md.opt.act[:maintenance][:set]==:on) +        puts 'manual segment names, numbers used as names, risk warning (segmented html)' +      end +      ocn_html_seg=[] +      data.each do |dob| +        if dob.is==:heading \ +        && dob.ln \ +        and dob.ln.to_s =~/^[456]/ +          if dob.ln==4 \ +          and not dob.name \ +          and not @md.set_heading_seg +            @md.set_heading_seg=true +          end +          if dob.name !~/^\S+/ \ +          and dob.obj =~/^\s*(?:\S+\s+)?([\d.,:-]+)/m      #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name +            possible_seg_name=$1 +            possible_seg_name=possible_seg_name.gsub(/(?:[:,-]|\W)/,'.'). +              gsub(/\.$/,'') +            if @md.seg_names.is_a?(Array) \ +            and not @md.seg_names.include?(possible_seg_name) +              dob.name=possible_seg_name +              dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ +              @md.seg_names << possible_seg_name +            elsif (@md.opt.act[:verbose_plus][:set]==:on \ +            or @md.opt.act[:maintenance][:set]==:on) +              puts 'warn, there may be a conflicting numbering scheme' +            end +          end +          if dob.ln==4 \ +          and dob.name                                     #extract segment name from embedded document structure info +            if @md.seg_names.is_a?(Array) \ +            and not @md.seg_names.include?(dob.name) +              dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ +              @md.seg_names << dob.name +            end +          end +          if dob.ln==4 \ +          and not dob.name                                 #if still no segment name, provide a numerical one +            pf='_'                                         #pg='' #may use e.g. '' or '~' or '_' +            segn_auto="#{pf}#{art_filename_auto.to_s}" +            if @md.seg_names.is_a?(Array) \ +            and not @md.seg_names.include?(segn_auto) +             dob.name=segn_auto +             dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs +              @md.seg_names << segn_auto +            else puts 'segment name (numbering) error' +            end +            art_filename_auto+=1 +          end +          if dob.ln==4 \ +          and not dob.name #should not occur +            puts "e r r o r -\t#{__FILE__}::#{__LINE__}\n#{dob.inspect}" +          end +        end +        if (dob.is ==:heading \ +        || dob.is ==:heading_insert) \ +        && dob.ln==4 +          @seg=dob.name +        end +        @tuned_file << if dob.is==:heading \ +        && (@md.pagenew || @md.pagebreak || @md.pageline) +          m=dob.ln.to_s +          dob_tmp=[] +          if @md.pagenew.inspect =~/#{m}/ +            dob_tmp << SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new]) << dob +          elsif @md.pagebreak.inspect =~/#{m}/ +            dob_tmp << SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page]) << dob +          elsif @md.pageline.inspect =~/#{m}/ +            dob_tmp << SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_line]) << dob +          end +          unless dob_tmp.length > 0; dob +          else                       dob_tmp +          end +        else dob +        end +        if defined? dob.ocn \ +        and dob.ocn +          @segname=((dob.is==:heading || dob.is==:heading_insert) && dob.ln==4 && (defined? dob.name)) \ +          ? (dob.name) +          : @segname +          tags["#{dob.ocn}"]={ segname: @segname } +          ocn_html_seg[dob.ocn]=if (dob.is==:heading || dob.is==:heading_insert) +            if dob.ln =~/[0-3]/ +              { seg: nil, level: dob.ln } +            else #elsif dob.ln =~/[4-6]/ +              { seg: @seg, level: dob.ln } +            end +          else +            { seg: @seg, level: nil } +          end +        end +        dob.tags=dob.tags.uniq if defined? dob.tags +        if defined? dob.tags \ +        and dob.tags.length > 0 +          #@segname=((dob.is=='heading'|| dob.is=='heading_insert') && dob.ln==4 && (defined? dob.name)) \ +          #? (dob.name) \ +          #: @segname +          dob.tags.each do |y| +            tags[y]={ ocn: dob.ocn.to_s, segname: @segname } +          end +        end +        dob +      end +      ocn_html_seg.each_with_index do |ocn,i| +        if ocn \ +        and ocn[:level].to_s=~/[1-3]/ +          (1..4).each do |x| +            if ocn_html_seg[i+x] and ocn_html_seg[i+x][:level]==4 +              ocn[:seg]=ocn_html_seg[i+x][:seg] +            end +          end +        end +      end +      if @md.seg_names.length > 0 +        @md.set_heading_seg=true +      end +      tuned_file=@tuned_file.flatten +      [tuned_file,tags,ocn_html_seg] +    end +    def set_heading_top(data)                                                          #% make sure no false positives +      unless @md.set_heading_top +        if (@md.opt.act[:verbose_plus][:set]==:on \ +        or @md.opt.act[:maintenance][:set]==:on) +          puts "\tdocument contains no top level heading, (will have to manufacture one)" +        end +        @tuned_file=[] +        data.each do |t_o| +          unless @md.set_heading_top +            if t_o !~/^(?:#{Rx[:meta]}|@\S+:)\s/m \ +            and t_o !~/\A\s*\Z/m +              @md.set_heading_top=true +              if defined? @md.title \ +              and @md.title \ +              and defined? @md.title.full \ +              and defined? @md.creator \ +              and @md.creator +                head=@md.title.main ? ([@lv='1',@obj=@md.title.main]) : ([@lv='1',@obj='[no title provided]']) +                @tuned_file << head +              end +            end +          end +          @tuned_file << t_o +        end +        @tuned_file=@tuned_file.flatten +      end +    end +    def set_heading_seg(data)                                                          #% make sure no false positives +      unless @md.set_heading_seg +        if (@md.opt.act[:verbose_plus][:set]==:on \ +        or @md.opt.act[:maintenance][:set]==:on) +          puts "\tdocument contains no segment level, (will have to manufacture one)" +        end +        @tuned_file=[] +        data.each do |dob| +          unless @md.set_heading_seg +            if defined? dob.ln and dob.ln.to_s !~/^[0-3]/m \ +            and dob.obj !~/\A\s*\Z/m \ +            and dob.is !=:layout +              @md.set_heading_seg=true +              head=@md.title.main \ +              ? (dob.ln,dob.name,dob.obj=4,'seg',@md.title.main) +              : (dob.ln,dob.name,dob.obj=4,'seg','[segment]') +              @tuned_file << head +            end +          end +          @tuned_file << dob +        end +        @tuned_file=@tuned_file.flatten +      end +    end +    def set_header_title(data)                                                         #% make sure no false positives +      unless @md.set_header_title +        if (@md.opt.act[:verbose_plus][:set]==:on \ +        or @md.opt.act[:maintenance][:set]==:on) +          puts "\t no document title provided, (will have to manufacture one)" +        end +        @tuned_file=[] +        data.each do |t_o| +          unless @md.set_header_title +            if t_o !~/^%{1,2}\s/m \ +            and t_o !~/\A\s*\Z/m +              @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}" +              @md.title.main=@md.heading_seg_first +              @md.set_header_title=true +            end +          end +          @tuned_file << t_o +        end +        @tuned_file=@tuned_file.flatten +      end +    end +  end +end +__END__ | 
