aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v6/ao_numbering.rb
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2014-01-26 02:32:58 -0500
committerRalph Amissah <ralph@amissah.com>2014-01-26 02:32:58 -0500
commit29bac7d3d3cfbc8b31ce7acd68b8b1c3a59362e1 (patch)
treee9a1d34b21eaa57dd9a1c129f224edeb9a2d184d /lib/sisu/v6/ao_numbering.rb
parentdebian/changelog (4.2.21-1) (diff)
parentv5 v6: made true, branches: v6 development; v5 stable; v4 closed (diff)
Merge tag 'sisu_5.3.0' into debian/sid
SiSU 5.3.0
Diffstat (limited to 'lib/sisu/v6/ao_numbering.rb')
-rw-r--r--lib/sisu/v6/ao_numbering.rb487
1 files changed, 487 insertions, 0 deletions
diff --git a/lib/sisu/v6/ao_numbering.rb b/lib/sisu/v6/ao_numbering.rb
new file mode 100644
index 00000000..23d9fd73
--- /dev/null
+++ b/lib/sisu/v6/ao_numbering.rb
@@ -0,0 +1,487 @@
+# encoding: utf-8
+=begin
+
+ * Name: SiSU
+
+ * Description: a framework for document structuring, publishing and search
+
+ * Author: Ralph Amissah
+
+ * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah,
+ All Rights Reserved.
+
+ * License: GPL 3 or later:
+
+ SiSU, a framework for document structuring, publishing and search
+
+ Copyright (C) Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ <http://www.fsf.org/licensing/licenses/gpl.html>
+ <http://www.gnu.org/licenses/gpl.html>
+
+ <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>
+
+ * SiSU uses:
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+ * Hompages:
+ <http://www.jus.uio.no/sisu>
+ <http://www.sisudoc.org>
+
+ * Download:
+ <http://www.sisudoc.org/sisu/en/SiSU/download.html>
+
+ * Git
+ <http://sources.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
+ <http://sources.sisudoc.org/?p=code/sisu.git;a=blob;f=lib/sisu/v6/ao_numbering.rb;hb=HEAD>
+
+ * Ralph Amissah
+ <ralph@amissah.com>
+ <ralph.amissah@gmail.com>
+
+ ** Description: system environment, resource control and configuration details
+
+=end
+module SiSU_AO_Numbering
+ class Numbering
+ attr_accessor :obj,:osp,:ocn,:lv,:name,:index,:comment
+ def initialize(md,data)
+ @md,@data=md,data
+ @obj=@type=@ocn=@lv=@name=@index=@comment=nil
+ end
+ def numbering_song
+ data=@data
+ data=number_plaintext_para(data)
+ data=auto_number_heading_ie_title(data.compact) #tr issue
+ data=ocn(data.compact) #watch
+ data=xml(data.compact)
+ data=minor_numbering(data.compact)
+ data,tags_map,ocn_html_seg_map=name_para_seg_filename(data)
+ data=set_heading_top(data) unless @md.set_heading_top
+ [data,tags_map,ocn_html_seg_map]
+ end
+ def number_plaintext_para(data)
+ @tuned_file=[]
+ data.each do |dob|
+ if (dob.of !=:block \
+ && dob.of !=:comment \
+ && dob.of !=:layout) \
+ && dob.ocn_ #and dob.obj !~ /#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ #FIX
+ dob.obj=dob.obj.gsub(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks
+ end
+ unless dob.obj.is_a?(Array)
+ dob.obj=dob.obj.gsub(/^\s+/,'').
+ gsub(/\s$/,"\n")
+ end
+ @tuned_file << dob
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def number_sub_heading(dob,num,title_no)
+ unless dob.obj =~/\d+\.|(?:chapter|article|section|clause)\s+\d+/i #name selection arbitrary, fix
+ dob.obj=case dob.name
+ when /-/ then dob.obj.gsub(/^/,"#{title_no} ")
+ when /^#/ then dob.obj.gsub(/^/,"#{title_no} ")
+ when /^[a-z_\.]+/ then dob.obj.gsub(/^/,"#{title_no} ")
+ else
+ dob.name=title_no if dob.name=~/^$/ #where title contains title number
+ dob.obj.gsub(/^/,"#{title_no} ") if title_no =~/\d+/ #main, where title number is to be provided #watch changed placement
+ end
+ if @md.toc_lev_limit \
+ and @md.toc_lev_limit < num
+ dob.obj=dob.obj.gsub(/^/,'!_ ') #bold line, watch
+ end
+ end
+ dob
+ end
+ def heading_tag_clean(heading_tag)
+ heading_tag=heading_tag.gsub(/[ ]+/,'_').
+ gsub(/["']/,'').
+ gsub(/[\/]/,'-').
+ gsub(/#{Mx[:fa_bold_o]}|#{Mx[:fa_bold_c]}/,'').
+ gsub(/#{Mx[:fa_italics_o]}|#{Mx[:fa_italics_c]}/,'').
+ gsub(/#{Mx[:fa_underscore_o]}|#{Mx[:fa_underscore_c]}/,'').
+ gsub(/#{Mx[:fa_cite_o]}|#{Mx[:fa_cite_c]}/,'').
+ gsub(/#{Mx[:fa_insert_o]}|#{Mx[:fa_insert_c]}/,'').
+ gsub(/#{Mx[:fa_strike_o]}|#{Mx[:fa_strike_c]}/,'').
+ gsub(/#{Mx[:fa_superscript_o]}|#{Mx[:fa_superscript_c]}/,'').
+ gsub(/#{Mx[:fa_subscript_o]}|#{Mx[:fa_subscript_c]}/,'').
+ gsub(/#{Mx[:fa_hilite_o]}|#{Mx[:fa_hilite_c]}/,'').
+ gsub(/#{Mx[:gl_bullet]}/,'')
+ end
+ def auto_number_heading_ie_title(data) #also does some segment naming
+ @tuned_file=[]
+ if defined? @md.make.num_top \
+ and @md.make.num_top \
+ and @md.make.num_top !~/^$/
+ input||=@md.make.num_top
+ end
+ num_top=(input ? input.to_i : nil)
+ t_no1=t_no2=t_no3=0
+ if num_top
+ no1=num_top; no2=(num_top + 1); no3=(num_top + 2)
+ end
+ chapter_number_counter=0
+ data=data.compact
+ data.each do |dob| #@md.seg_names << [additions to segment names]
+ title_no=nil
+ dob=SiSU_AO_DocumentStructureExtract::Structure.new(@md,dob).structure_markup #must happen earlier, node info etc. require
+ if dob.is ==:heading \
+ && dob.autonum_ \
+ and defined? @md.make.num_top \
+ and @md.make.num_top !~/^$/
+ if dob.lv=='1' \
+ and dob.obj =~/^#\s|\s#(?:\s|$)/
+ chapter_number_counter +=1
+ dob.obj=dob.obj.gsub(/^#\s/,"#{chapter_number_counter} ").
+ gsub(/#([:,]?\s|[.]?$)/,"#{chapter_number_counter}\\1")
+ end
+ if dob.ln==no1
+ @subnumber=1
+ @subnumber=0 if dob.ln==no1
+ end
+ if dob.ln.to_s =~/^[0-6]/ \
+ and not dob.toc_ \
+ and dob.obj !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ # <-- fix
+ if dob.ln==no1
+ t_no1+=1; t_no2=0; t_no3=0
+ title_no="#{t_no1}"
+ if @md.seg_names.is_a?(Array) \
+ and not @md.seg_names.include?(title_no)
+ if dob.ln==no1
+ dob.name="#{title_no}" if not dob.name
+ dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
+ tag=dob.obj.gsub(/(Article|Clause|Section|Chapter)\s+/,"\\1_#{title_no}").downcase
+ tag=heading_tag_clean(tag)
+ dob.tags=[tag,dob.tags].flatten if tag !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.obj=(dob.obj =~/(Article|Clause|Section)\s+/) \
+ ? (dob.obj.gsub(/(Article|Clause|Section)\s+/,"\\1 #{title_no} "))
+ : (dob.obj.gsub(/^/,"#{title_no}. ")) #fix stop later
+ end
+ if dob.ln !=no1 \
+ and dob.obj =~/^[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review
+ dob.name ="#{title_no}" if not dob.name
+ dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.obj=dob.obj.gsub(/^/,"#{title_no}. ")
+ end
+ @md.seg_names << title_no
+ end
+ if dob.ln!=no1 \
+ and dob.name!~/^[a-z_\.]+$/ \
+ and dob.obj !~/[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required # not fixed, work on
+ dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.obj=dob.obj.gsub(/^/i,"#{title_no}. ")
+ end
+ end
+ if dob.ln==no1 #watch because here you change dob.name
+ dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
+ end
+ if dob.ln==no2 #watch because here you change dob.name
+ t_no2+=1; t_no3=0
+ title_no="#{t_no1}.#{t_no2}"
+ dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
+ dob=number_sub_heading(dob,no2,title_no)
+ end
+ if dob.ln==no3 #watch because here you change dob.name
+ t_no3+=1
+ title_no="#{t_no1}.#{t_no2}.#{t_no3}"
+ dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
+ dob=number_sub_heading(dob,no3,title_no)
+ end
+ elsif dob.ln.to_s =~/^[0-6]/ \
+ and dob.name =~ /^[\w-]+-/ # endnotes, watch2005# endnotes, watch2005
+ dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.name.gsub(/^([a-z_\.]+)-$/,'\1')
+ end
+ elsif dob.is ==:heading \
+ and dob.autonum_ \
+ and @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4
+ #here lies a bug, as is nil when run from -Dv --update, FIX
+ if (dob.name.nil? or dob.name.empty?) \
+ and dob.ln.to_s =~/^[0-9]/ \
+ and dob.obj =~ /^([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d
+ dob.name=$1
+ dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ end
+ if @md.toc_lev_limit
+ end
+ elsif defined? dob.name \
+ and dob.name
+ dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ end
+ dob.tags=dob.tags.uniq if defined? dob.tags
+ @tuned_file << dob
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def ocn(data) #and auto segment numbering increment
+ @tuned_file=SiSU_AO_DocumentStructureExtract::OCN.new(@md,data).ocn
+ @tuned_file
+ end
+ def xml(data)
+ @tuned_file=SiSU_AO_DocumentStructureExtract::XML.new(@md,data).dom
+ @tuned_file
+ end
+ def minor_numbering(data) #and auto segment numbering increment
+ @tuned_file=[]
+ number_small,letter_small=0,0
+ letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z )
+ data.each do |dob|
+ if dob.of ==:heading \
+ || dob.of ==:heading_insert \
+ || dob.of ==:para \
+ || dob.of ==:block
+ if dob.is ==:heading \
+ and dob.ln.to_s=~/^[0-9]/ #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later)
+ number_small,letter_small=0,0
+ elsif dob.is ==:para
+ if dob.obj =~/^#[ 1]/ \
+ and dob.obj !~/^#\s+(?:~#)?$/
+ letter_small=0
+ number_small=0 if dob.obj =~ /^#1/
+ number_small+=1
+ dob.obj=dob.obj.gsub(/^#[ 1]/,"#{number_small}. ")
+ end
+ if dob.obj =~/^_# /
+ dob.obj=dob.obj.gsub(/^_# /,"#{letter[letter_small]}. ")
+ dob.indent='1'
+ letter_small+=1
+ end
+ end
+ end
+ @tuned_file << dob
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def name_para_seg_filename(data) #segment naming, remaining
+ # paragraph name/numbering rules
+ # manual naming overrides, manual naming may be
+ # alpha-numeric characters mixed,
+ # numeric only (a number), if
+ # all segments have been named,
+ # the numbers used are over 1000 or
+ # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented)
+ # [for now a warning is printed for such documents on use of maintenance or very-verbose flag]
+ # auto-naming takes the form of giving numbers to segments
+ # the rules for which are as follows
+ # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.)
+ # otherwise the level 4 segment number from the embedded document structure info is used
+ # if there is none a sequential number is designated, preceded by an underscore
+ @tuned_file,@unique_auto_name=[],[]
+ tags={}
+ art_filename_auto=1
+ @counter=1
+ if not @md.seg_autoname_safe \
+ and (@md.opt.act[:verbose_plus][:set]==:on \
+ || @md.opt.act[:maintenance][:set]==:on)
+ puts 'manual segment names, numbers used as names, risk warning (segmented html)'
+ end
+ ocn_html_seg=[]
+ data.each do |dob|
+ if dob.is==:heading \
+ && dob.ln \
+ and dob.ln.to_s =~/^[456]/
+ if dob.ln==4 \
+ and not dob.name \
+ and not @md.set_heading_seg
+ @md.set_heading_seg=true
+ end
+ if dob.name !~/^\S+/ \
+ and dob.obj =~/^\s*(?:\S+\s+)?([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name
+ possible_seg_name=$1
+ possible_seg_name=possible_seg_name.gsub(/(?:[:,-]|\W)/,'.').
+ gsub(/\.$/,'')
+ if @md.seg_names.is_a?(Array) \
+ and not @md.seg_names.include?(possible_seg_name)
+ dob.name=possible_seg_name
+ dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/
+ @md.seg_names << possible_seg_name
+ elsif (@md.opt.act[:verbose_plus][:set]==:on \
+ or @md.opt.act[:maintenance][:set]==:on)
+ puts 'warn, there may be a conflicting numbering scheme'
+ end
+ end
+ if dob.ln==4 \
+ and dob.name #extract segment name from embedded document structure info
+ if @md.seg_names.is_a?(Array) \
+ and not @md.seg_names.include?(dob.name)
+ dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/
+ @md.seg_names << dob.name
+ end
+ end
+ if dob.ln==4 \
+ and not dob.name #if still no segment name, provide a numerical one
+ pf='_' #pg='' #may use e.g. '' or '~' or '_'
+ segn_auto="#{pf}#{art_filename_auto.to_s}"
+ if @md.seg_names.is_a?(Array) \
+ and not @md.seg_names.include?(segn_auto)
+ dob.name=segn_auto
+ dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ @md.seg_names << segn_auto
+ else puts 'segment name (numbering) error'
+ end
+ art_filename_auto+=1
+ end
+ if dob.ln==4 \
+ and not dob.name #should not occur
+ puts "e r r o r -\t#{__FILE__}::#{__LINE__}\n#{dob.inspect}"
+ end
+ end
+ if (dob.is ==:heading \
+ || dob.is ==:heading_insert) \
+ && dob.ln==4
+ @seg=dob.name
+ end
+ @tuned_file << if dob.is==:heading \
+ && (@md.pagenew || @md.pagebreak || @md.pageline)
+ m=dob.ln.to_s
+ dob_tmp=[]
+ if @md.pagenew.inspect =~/#{m}/
+ dob_tmp << SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new]) << dob
+ elsif @md.pagebreak.inspect =~/#{m}/
+ dob_tmp << SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page]) << dob
+ elsif @md.pageline.inspect =~/#{m}/
+ dob_tmp << SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_line]) << dob
+ end
+ unless dob_tmp.length > 0; dob
+ else dob_tmp
+ end
+ else dob
+ end
+ if defined? dob.ocn \
+ and dob.ocn
+ @segname=((dob.is==:heading || dob.is==:heading_insert) && dob.ln==4 && (defined? dob.name)) \
+ ? (dob.name)
+ : @segname
+ tags["#{dob.ocn}"]={ segname: @segname }
+ ocn_html_seg[dob.ocn]=if (dob.is==:heading || dob.is==:heading_insert)
+ if dob.ln =~/[0-3]/
+ { seg: nil, level: dob.ln }
+ else #elsif dob.ln =~/[4-6]/
+ { seg: @seg, level: dob.ln }
+ end
+ else
+ { seg: @seg, level: nil }
+ end
+ end
+ dob.tags=dob.tags.uniq if defined? dob.tags
+ if defined? dob.tags \
+ and dob.tags.length > 0
+ #@segname=((dob.is=='heading'|| dob.is=='heading_insert') && dob.ln==4 && (defined? dob.name)) \
+ #? (dob.name) \
+ #: @segname
+ dob.tags.each do |y|
+ tags[y]={ ocn: dob.ocn.to_s, segname: @segname }
+ end
+ end
+ dob
+ end
+ ocn_html_seg.each_with_index do |ocn,i|
+ if ocn \
+ and ocn[:level].to_s=~/[1-3]/
+ (1..4).each do |x|
+ if ocn_html_seg[i+x] and ocn_html_seg[i+x][:level]==4
+ ocn[:seg]=ocn_html_seg[i+x][:seg]
+ end
+ end
+ end
+ end
+ if @md.seg_names.length > 0
+ @md.set_heading_seg=true
+ end
+ tuned_file=@tuned_file.flatten
+ [tuned_file,tags,ocn_html_seg]
+ end
+ def set_heading_top(data) #% make sure no false positives
+ unless @md.set_heading_top
+ if (@md.opt.act[:verbose_plus][:set]==:on \
+ or @md.opt.act[:maintenance][:set]==:on)
+ puts "\tdocument contains no top level heading, (will have to manufacture one)"
+ end
+ @tuned_file=[]
+ data.each do |t_o|
+ unless @md.set_heading_top
+ if t_o !~/^(?:#{Rx[:meta]}|@\S+:)\s/m \
+ and t_o !~/\A\s*\Z/m
+ @md.set_heading_top=true
+ if defined? @md.title \
+ and @md.title \
+ and defined? @md.title.full \
+ and defined? @md.creator \
+ and @md.creator
+ head=@md.title.main ? ([@lv='1',@obj=@md.title.main]) : ([@lv='1',@obj='[no title provided]'])
+ @tuned_file << head
+ end
+ end
+ end
+ @tuned_file << t_o
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ end
+ def set_heading_seg(data) #% make sure no false positives
+ unless @md.set_heading_seg
+ if (@md.opt.act[:verbose_plus][:set]==:on \
+ or @md.opt.act[:maintenance][:set]==:on)
+ puts "\tdocument contains no segment level, (will have to manufacture one)"
+ end
+ @tuned_file=[]
+ data.each do |dob|
+ unless @md.set_heading_seg
+ if defined? dob.ln and dob.ln.to_s !~/^[0-3]/m \
+ and dob.obj !~/\A\s*\Z/m \
+ and dob.is !=:layout
+ @md.set_heading_seg=true
+ head=@md.title.main \
+ ? (dob.ln,dob.name,dob.obj=4,'seg',@md.title.main)
+ : (dob.ln,dob.name,dob.obj=4,'seg','[segment]')
+ @tuned_file << head
+ end
+ end
+ @tuned_file << dob
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ end
+ def set_header_title(data) #% make sure no false positives
+ unless @md.set_header_title
+ if (@md.opt.act[:verbose_plus][:set]==:on \
+ or @md.opt.act[:maintenance][:set]==:on)
+ puts "\t no document title provided, (will have to manufacture one)"
+ end
+ @tuned_file=[]
+ data.each do |t_o|
+ unless @md.set_header_title
+ if t_o !~/^%{1,2}\s/m \
+ and t_o !~/\A\s*\Z/m
+ @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}"
+ @md.title.main=@md.heading_seg_first
+ @md.set_header_title=true
+ end
+ end
+ @tuned_file << t_o
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ end
+ end
+end
+__END__