diff options
author | Ralph Amissah <ralph@amissah.com> | 2012-12-12 15:07:22 -0500 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2012-12-12 15:07:22 -0500 |
commit | db23b42728ad667d328af2cd3759cc47091eade3 (patch) | |
tree | 804f5f7042b603315ed917f2428845547f8836c3 /lib/sisu/v4/dal_doc_str.rb | |
parent | debian/changelog (3.3.3-1) (diff) | |
parent | v4: 4.0.0 version & changelog, dates touched (diff) |
Merge tag 'sisu_4.0.0' into debian/sid
Diffstat (limited to 'lib/sisu/v4/dal_doc_str.rb')
-rw-r--r-- | lib/sisu/v4/dal_doc_str.rb | 1044 |
1 files changed, 1044 insertions, 0 deletions
diff --git a/lib/sisu/v4/dal_doc_str.rb b/lib/sisu/v4/dal_doc_str.rb new file mode 100644 index 00000000..012e7a33 --- /dev/null +++ b/lib/sisu/v4/dal_doc_str.rb @@ -0,0 +1,1044 @@ +# encoding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012 Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/licenses/gpl.html> + + <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html> + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + <http://www.jus.uio.no/sisu> + <http://www.sisudoc.org> + + * Download: + <http://www.sisudoc.org/sisu/en/SiSU/download.html> + + * Ralph Amissah + <ralph@amissah.com> + <ralph.amissah@gmail.com> + + ** Description: document abstraction + +=end +module SiSU_DAL_DocumentStructureExtract + class Instantiate < SiSU_Param::Parameters::Instructions + @@flag={} #Beware!! + def initialize + @@flag['table_to']=false + @@counter=@@column=@@columns=0 + @@line_mode='' + end + end + class Build + @@flag={} #Beware!! + def initialize(md,data) + @md,@data=md,data + SiSU_DAL_DocumentStructureExtract::Instantiate.new + @pb=SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page]) + @pbn=SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new]) + end + def ln_get(lv) + ln=case lv + when /A/; 1 + when /B/; 2 + when /C/; 3 + when /1/; 4 + when /2/; 5 + when /3/; 6 + when /4/; 7 + when /5/; 8 + when /6/; 9 + end + end + def image_test(str) + boolean=(str=~/\{\s*\S+?\.png.+?\}https?:\/\/\S+/ ? true : false) + end + def bullet_test(str) + bool=((str=~/\*/) ? true : false) + end + def hang_and_indent_test(str) + hang_indent=if str=~/^_([1-9])[^_]/ + [$1,$1] + elsif str=~/^__([1-9])/ + [0,$1] + elsif str=~/^_([0-9])_([0-9])/ + [$1,$2] + else + [0,0] + end + hang,indent=hang_indent[0],hang_indent[1] + [hang,indent] + end + def hang_and_indent_def_test(str1,str2) + hang_indent=if str1=~/^_([1-9])[^_]/ + [$1,$1] + elsif str1=~/^__([1-9])/ + [0,$1] + elsif str1=~/^_([0-9])_([0-9])/ + [$1,$2] + else + [0,0] + end + obj=if str2 =~/^(.+?)\s+\\\\(?:\s+|\n)/ + str2.gsub(/^(.+?)(\s+\\\\(?:\s+|\n))/,"#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\\2") + else + str2.gsub(/^(.+?)\n/,"#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\n") + end + hang,indent=hang_indent[0],hang_indent[1] + [hang,indent,obj] + end + def endnote_test?(str) + bool=((str=~/~\{.+?\}~|~\[.+?\]~/) ? true : false) + end + def extract_tags(str,nametag=nil) + tags=[] + if str.nil? + else + if str =~/(?:^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/ + str=str.gsub(/(^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/i, + "\\1#{Mx[:tag_o]}\\2#{Mx[:tag_c]}"). + gsub(/ [ ]+/i,' ') + tags=str.scan(/#{Mx[:tag_o]}(\S+?)#{Mx[:tag_c]}/).flatten + str=str.gsub(/[ ]?#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}[ ]?/,' ') #may be issues with spaces would leave one, but "code" blocks? + end + tags=nametag ? (tags << nametag) : tags + end + [str,tags] + end + def identify_parts + data=@data + tuned_file=[] + @tuned_block,@tuned_code=[],[] + @@counter,@verse_count=0,0 + @metadata={} + @data.each do |t_o| + t_o=t_o.gsub(/(?:\n\s*\n)+/m,"\n") unless @@flag['code'] + if t_o !~/^(?:code|poem|alt|group|block)\{|^\}(?:code|poem|alt|group|block)|^(?:table\{|\{table)[ ~]/ \ + and not @@flag['code'] \ + and not @@flag['poem'] \ + and not @@flag['group'] \ + and not @@flag['block'] \ + and not @@flag['alt'] \ + and not @@flag['table'] + unless t_o =~/^(?:@\S+?:|%+)\s/ # extract book index for paragraph if any + idx=if t_o=~/^=\{(.+)\}\s*$\Z/m; m=$1 + t_o=t_o.gsub(/\n=\{.+\}\s*$\Z/m,'') + m + else nil + end + end + t_o=case t_o + when /^#{Mx[:meta_o]}\S+?#{Mx[:meta_c]}/ #metadata, header + if t_o=~/^#{Mx[:meta_o]}(\S+?)#{Mx[:meta_c]}\s*(.+)/m + tag,obj=$1,$2 + @metadata[tag]=obj + end + t_o=nil + when /^%+\s/ #comment + t_o=if t_o=~/^%+\s+(.+)/ + h={obj: $1} + SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) + else nil + end + when /^:?([A-C1-6])\~/ #heading / lv + lv=$1 + ln=ln_get(lv) + t_o=if t_o=~/^:?[A-C1-6]\~\s+(.+)/m + obj=$1 + note=endnote_test?(obj) + obj,tags=extract_tags(obj) + h={ lv: lv, ln: ln, obj: obj, idx: idx, tags: tags } + SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h) + elsif t_o=~/^:?[A-C1-6]\~(\S+?)-\s+(.+)/m + name,obj=$1,$2 + note=endnote_test?(obj) + obj,tags=extract_tags(obj) + h={ lv: lv, name: name, obj: obj, idx: idx, autonum_: false, tags: tags} + SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h) + elsif t_o=~/^:?[A-C1-6]\~(\S+)\s+(.+)/m + name,obj=$1,$2 + note=endnote_test?(obj) + obj,tags=extract_tags(obj,name) + h={ lv: lv, name: name, obj: obj, idx: idx, tags: tags } + SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h) + else nil + end + when /^_(?:[1-9]!?|[1-9]?\*)\s+/ #indented and/or bullet paragraph + t_o=if t_o=~/^(_(?:[1-9]?\*|[1-9]!?)\s+)(.+)/m + tst,obj=$1,$2 + if t_o=~/^_[1-9]!\s+.+/m + hang,indent,obj=hang_and_indent_def_test(tst,obj) + else + hang,indent=hang_and_indent_test(tst) + end + bullet=bullet_test(tst) + image=image_test(obj) + note=endnote_test?(obj) + obj,tags=extract_tags(obj) + unless obj=~/\A\s*\Z/m + h={ bullet_: bullet, hang: hang, indent: indent, obj: obj, idx: idx, note_: note, image_: image, tags: tags } + SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h) + end + else nil + end + when /^_[0-9]?_[0-9]!?\s+/ #hanging indent paragraph + t_o=if t_o=~/^(_[0-9]?_[0-9]!?\s+)(.+)/m + tst,obj=$1,$2 + if t_o=~/^_[0-9]?_[0-9]!\s+.+/m + hang,indent,obj=hang_and_indent_def_test(tst,obj) + else + hang,indent=hang_and_indent_test(tst) + end + image=image_test(obj) + note=endnote_test?(obj) + obj,tags=extract_tags(obj) + unless obj=~/\A\s*\Z/m + h={ hang: hang, indent: indent, obj: obj, idx: idx, note_: note, image_: image, tags: tags } + SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h) + end + else nil + end + when /^<(?:br)?:(?:pa?r|o(?:bj|---)?)>\s*$/ #[br:par] #[br:obj] + SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_obj]) + when /^(?:-\\\\-|<:pb>)\s*$/ #[br:pg] + SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page]) + when /^(?:=\\\\=|<:pn>)\s*$/ #[br:pgn] + SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new]) + else #paragraph + image=image_test(t_o) + note=endnote_test?(t_o) + obj,tags=extract_tags(t_o) + unless obj=~/\A\s*\Z/m + h={ bullet_: false, indent: 0, hang: 0, obj: obj, idx: idx, note_: note, image_: image, tags: tags } + SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h) + end + end + elsif not @@flag['code'] + if t_o =~/^code\{/ + @@flag['code']=true + @@counter=1 + @codeblock_numbered=(t_o =~/^code\{#/) ? true : false + h={ obj: 'code block start' } #introduce a counter + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + elsif t_o =~/^poem\{/ + @@flag['poem']=true + h={ obj: 'poem start' } #introduce a counter + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + tuned_file << t_o + elsif t_o =~/^group\{/ + @@flag['group']=true + h={ obj: 'group text start' } #introduce a counter + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + tuned_file << t_o + elsif t_o =~/^block\{/ + @@flag['block']=true + h={ obj: 'block text start' } #introduce a counter + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + tuned_file << t_o + elsif t_o =~/^alt\{/ + @@flag['alt']=true + h={ obj: 'alt text start' } #introduce a counter + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + tuned_file << t_o + elsif t_o =~/^(?:table\{|\{table)[ ~]/ + h={ obj: 'table start' } #introduce a counter + ins=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #ins=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + tuned_file << ins + if t_o=~/^table\{(?:~h)?\s+/ + @@flag['table']=true + @rows='' + case t_o + when /table\{~h\s+c(\d+);\s+(.+)/ + cols=$1 + col=$2.scan(/\d+/) + heading=true + when /table\{\s+c(\d+);\s+(.+)/ + cols=$1 + col=$2.scan(/\d+/) + heading=false + end + @h={ head_: heading, cols: cols, widths: col, idx: idx } + elsif t_o=~/^\{table(?:~h)?(?:\s+\d+;?)?\}\n.+\Z/m + m1,m2,hd=nil,nil,nil + tbl=/^\{table(?:~h)?(?:\s+\d+;?)?\}\n(.+)\Z/m.match(t_o)[1] #two table representations should be consolidated as one + hd=((t_o =~/^\{table~h/) ? true : false) + tbl,tags=extract_tags(tbl) + rws=tbl.split(/\n/) + rows='' + cols=nil + rws.each do |r| + cols=(cols ? cols : (r.scan('|').length) +1) + r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") + rows += r + Mx[:tc_c] + end + col=[] + if t_o =~/^\{table(?:~h)?\s+(\d+);?\}/ #width of col 1 given as %, usually when wider than rest that are even + c1=$1.to_i + width=(100 - c1)/(cols - 1) + col=[ c1 ] + (cols - 1).times { col << width } + else #all columns of equal width + width=100.00/cols + cols.times { col << width } + end + h={ head_: hd, cols: cols, widths: col, obj: rows, idx: idx, tags: tags } + t_o=SiSU_DAL_DocumentStructure::ObjectTable.new.table(h) unless h.nil? + tuned_file << t_o + h={ obj: 'table end' } #introduce a counter + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) + t_o + elsif t_o=~/^\{table(?:~h)?\s+/ + m1,m2,hd=nil,nil,nil + h=case t_o + when /\{table~h\s+(.+?)\}\n(.+)\Z/m #two table representations should be consolidated as one + m1,tbl,hd=$1,$2,true + when /\{table\s+(.+?)\}\n(.+)\Z/m #two table representations should be consolidated as one + m1,tbl,hd=$1,$2,false + else nil + end + tbl,tags=extract_tags(tbl) + col=m1.scan(/\d+/) + rws=tbl.split(/\n/) + rows='' + rws.each do |r| + r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") + rows += r + Mx[:tc_c] + end + h={ head_: hd, cols: col.length, widths: col, obj: rows, idx: idx, tags: tags } + t_o=SiSU_DAL_DocumentStructure::ObjectTable.new.table(h) unless h.nil? + tuned_file << t_o + h={ obj: 'table end' } #introduce a counter + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) + t_o + end + end + t_o + end + if @@flag['table'] + if @@flag['table'] \ + and t_o =~/^\}table/ #two table representations should be consolidated as one + @@flag['table']=false + headings,columns,widths,idx=@h[:head_],@h[:cols],@h[:widths],@h[:idx] + @h={ head_: headings, cols: columns, widths: widths, idx: idx, obj: @rows } + t_o=SiSU_DAL_DocumentStructure::ObjectTable.new.table(@h) + tuned_file << t_o + @h,@rows=nil,'' + t_o + h={ obj: 'table end' } #introduce a counter + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + t_o + else + if t_o.is_a?(String) \ + and t_o !~/^table\{/ + t_o=t_o.gsub(/^\n+/m,''). #check added for ruby 1.9.2 not needed in 1.8 series (tested in v2) + gsub(/\n+/m,"#{Mx[:tc_p]}") + @rows += t_o + Mx[:tc_c] + end + t_o=nil + end + end + if @@flag['code'] + if t_o =~/^\}code/ + @@flag['code']=false + obj=@tuned_code.join("\n") + tags=[] + h={ obj: obj, tags: tags, number_: @codeblock_numbered } + t_o=SiSU_DAL_DocumentStructure::ObjectBlockTxt.new.code(h) + @tuned_code=[] + tuned_file << t_o + h={ obj: 'code block end' } #introduce a counter + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + end + if @@flag['code'] \ + and t_o.is_a?(String) + sub_array=t_o.dup + "#{Mx[:br_nl]}" + @line_mode=sub_array.scan(/.+/) + @line_mode=[] + sub_array.scan(/.+/) {|w| @line_mode << w if w =~/[\S]+/} + t_o=SiSU_DAL_DocumentStructureExtract::Build.new(@md,@line_mode).build_lines('code').join + @tuned_code << t_o + t_o=nil + end + elsif @@flag['poem'] \ + or @@flag['group'] \ + or @@flag['block'] \ + or @@flag['alt'] + if @@flag['poem'] \ + and t_o =~/^\}poem/ + @@flag['poem']=false + h={ obj: 'poem end' } #introduce a counter + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + elsif ( @@flag['group'] \ + and t_o =~/^\}group/ ) + @@flag['group']=false + obj,tags=extract_tags(@tuned_block.join("\n")) + h={ obj: obj, tags: tags } + @tuned_block=[] + t_o=SiSU_DAL_DocumentStructure::ObjectBlockTxt.new.group(h) + tuned_file << t_o + h={ obj: 'group text end' } #introduce a counter + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + elsif ( @@flag['block'] \ + and t_o =~/^\}block/ ) + @@flag['block']=false + obj,tags=extract_tags(@tuned_block.join("\n")) + h={ obj: obj, tags: tags } + @tuned_block=[] + t_o=SiSU_DAL_DocumentStructure::ObjectBlockTxt.new.block(h) + tuned_file << t_o + h={ obj: 'block text end' } #introduce a counter + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + elsif ( @@flag['alt'] \ + and t_o =~/^\}alt/ ) + @@flag['alt']=false + obj,tags=extract_tags(@tuned_block.join("\n")) + h={ obj: obj, tags: tags } + t_o=SiSU_DAL_DocumentStructure::ObjectBlockTxt.new.alt(h) + @tuned_block=[] + tuned_file << t_o + h={ obj: 'alt text end' } #introduce a counter + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + end + if @@flag['poem'] \ + or @@flag['group'] \ + or @@flag['alt'] \ + and t_o =~/\S/ \ + and t_o !~/^(?:\}(?:verse|code|alt|group|block)|(?:verse|code|alt|group|block)\{)/ # fix logic + sub_array=t_o.dup + @line_mode=sub_array.scan(/.+/) + type=if @@flag['poem']; 'poem' + t_o=SiSU_DAL_DocumentStructureExtract::Build.new(@md,@line_mode).build_lines(type).join + poem=t_o.split(/\n\n/) + poem.each do |v| + v=v.gsub(/\n/m,"#{Mx[:br_nl]}\n") + obj,tags=extract_tags(v) + h={ obj: obj, tags: tags } + t_o=SiSU_DAL_DocumentStructure::ObjectBlockTxt.new.verse(h) + tuned_file << t_o + end + else 'group' + end + @verse_count+=1 if @@flag['poem'] + end + end + if not @@flag['code'] + if @@flag['poem'] \ + or @@flag['group'] \ + or @@flag['alt'] + if t_o.is_a?(String) + t_o=t_o.gsub(/\n/m,"#{Mx[:br_nl]}"). + gsub(/[ ][ ]/m,"#{Mx[:nbsp]*2}"). + gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") + t_o=t_o + Mx[:br_nl] if t_o =~/\S+/ + elsif t_o.is==:group \ + || t_o.is==:block \ + || t_o.is==:alt \ + || t_o.is==:verse + t_o.obj=t_o.obj.gsub(/\n/m,"#{Mx[:br_nl]}"). + gsub(/[ ][ ]/m,"#{Mx[:nbsp]*2}"). + gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") + end + @tuned_block << t_o if t_o =~/\S+/ + else tuned_file << t_o + end + else tuned_file << t_o + end + end + if @md.flag_endnotes + tuned_file << @pb + h={ ln: 2, obj: 'Endnotes', autonum_: false } + tuned_file << SiSU_DAL_DocumentStructure::ObjectHeading.new.heading_insert(h) + h={ ln: 4, obj: 'Endnotes', name: 'endnotes', autonum_: false } + tuned_file << SiSU_DAL_DocumentStructure::ObjectHeading.new.heading_insert(h) + h={ obj: 'Endnotes' } + end + if @md.book_idx + tuned_file << @pb + h={ ln: 2, obj: 'Index', autonum_: false } + tuned_file << SiSU_DAL_DocumentStructure::ObjectHeading.new.heading_insert(h) + h={ ln: 4, obj: 'Index', name: 'book_index', autonum_: false } + tuned_file << SiSU_DAL_DocumentStructure::ObjectHeading.new.heading_insert(h) + h={ obj: 'Index' } + end + tuned_file << @pb + h={ ln: 2, obj: 'Metadata', autonum_: false, ocn_: false } + tuned_file << SiSU_DAL_DocumentStructure::ObjectHeading.new.heading_insert(h) + h={ ln: 4, obj: 'SiSU Metadata, document information', name: 'metadata', autonum_: false, ocn_: false } + tuned_file << SiSU_DAL_DocumentStructure::ObjectHeading.new.heading_insert(h) + tuned_file + h={ obj: 'eof' } + meta=SiSU_DAL_DocumentStructure::ObjectMetadata.new.metadata(@metadata) + [tuned_file,meta] + end + def table_rows_and_columns_array(table_str) + table=[] + table_str.split(/#{Mx[:tc_c]}/).each do |table_row| + table_row_with_columns=table_row.split(/#{Mx[:tc_p]}/) + table << table_row_with_columns + end + table + end + def meta_heading(h) + h={ lv: h[:lv], ln: h[:ln], name: h[:name], obj: h[:obj], ocn: '0' } + SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h) + end + def meta_para(str) + h={ obj: str, ocn_: false } + SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h) + end + def build_lines(type='') + lines,lines_new=@data,[] + lines.each do |line| + line=if line =~/\S/ \ + and line !~/^code\{|^\}code/ \ + and not line.is_a?(Hash) + @@counter+=1 if @@flag['code'] + line=line.gsub(/\s\s/,"#{Mx[:nbsp]*2}"). + gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") + line=line.gsub(/^/,"#{Mx[:gr_o]}codeline#{Mx[:gr_c]}") if type=='code' # REMOVE try sort for texpdf special case + line=if line =~/(?:https?|file|ftp):\/\/\S+$/ + line.gsub(/\s*$/," #{Mx[:br_nl]}") + else line.gsub(/\s*$/,"#{Mx[:br_nl]}") #unless type=='code' + end + elsif line =~/^\s*$/ + line.gsub(/\s*$/,"#{Mx[:br_nl]}") + else line + end + lines_new << line + end + lines_new + end + end + class Structure # this must happen early + def initialize(md,dob) + @md,@dob=md,dob + end + def structure + structure_markup_normalize + structure_markup + @dob + end + def structure_markup #build structure where structure provided only in meta header + @dob=if @dob.is ==:para \ + && (((@dob.hang !~/[1-9]/) && (@dob.indent !~/[1-9]/)) \ + || (@dob.hang != @dob.indent)) \ + and not @dob.bullet_ + @dob=case @dob.obj + when /^#{@md.lv1}/ + h={ lv: 'A', ln: 1 } + SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,@dob) + when /^#{@md.lv2}/ + h={ lv: 'B', ln: 2 } + SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,@dob) + when /^#{@md.lv3}/ + h={ lv: 'C', ln: 3 } + SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,@dob) + when /^#{@md.lv4}/ + h={ lv: '1', ln: 4 } + SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,@dob) + when /^#{@md.lv5}/ + h={ lv: '2', ln: 5 } + SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,@dob) + when /^#{@md.lv6}/ + h={ lv: '3', ln: 6 } + SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,@dob) + else @dob + end + else @dob + end + @dob + end + def structure_markup_normalize #needs a bit of thinking + dob=if @md.markup_version.determined < 0.38 #%convert internal representation, consider making 0.38 structure default ([A-C1-6] instead of [1-9]), requires downstream changes + @dob=@dob.gsub(/^[456]~/,'!_'). + gsub(/^3~(\S+)/,"#{Mx[:lv_o]}6:\\1#{Mx[:lv_c]}"). + gsub(/^3~\s+/,"#{Mx[:lv_o]}6:#{Mx[:lv_c]}"). + gsub(/^2~(\S+)/,"#{Mx[:lv_o]}5:\\1#{Mx[:lv_c]}"). + gsub(/^2~\s+/,"#{Mx[:lv_o]}5:#{Mx[:lv_c]}"). + gsub(/^1~(\S+)/,"#{Mx[:lv_o]}4:\\1#{Mx[:lv_c]}"). + gsub(/^1~\s+/,"#{Mx[:lv_o]}4:#{Mx[:lv_c]}"). + gsub(/^:?C~(\S+)/,"#{Mx[:lv_o]}3:\\1#{Mx[:lv_c]}"). + gsub(/^:?C~\s+/,"#{Mx[:lv_o]}3:#{Mx[:lv_c]}"). + gsub(/^:?B~(\S+)/,"#{Mx[:lv_o]}2:\\1#{Mx[:lv_c]}"). + gsub(/^:?B~\s+/,"#{Mx[:lv_o]}2:#{Mx[:lv_c]}"). + gsub(/^:?A~(\S+)/,"#{Mx[:lv_o]}1:\\1#{Mx[:lv_c]}"). + gsub(/^:?A~\s+/,"#{Mx[:lv_o]}1:#{Mx[:lv_c]}") + @dob=if @dob =~/^@(?:level|markup):\s/ + @dob=@dob.gsub(/3/,'6'). + gsub(/2/,'5'). + gsub(/1/,'4'). + gsub(/:?C/,'3'). + gsub(/:?B/,'2'). + gsub(/:?A/,'1') + @dob + else @dob + end + else @dob + end + end + def structure_marks + t_o=if @md.markup_version.determined < 0.38 + @t_o=@t_o.gsub(/^1~(\S+)/,"#{Mx[:lv_o]}1:\\1#{Mx[:lv_c]}"). + gsub(/^1~\s+/,"#{Mx[:lv_o]}1:#{Mx[:lv_c]}"). + gsub(/^2~(\S+)/,"#{Mx[:lv_o]}2:\\1#{Mx[:lv_c]}"). + gsub(/^2~\s+/,"#{Mx[:lv_o]}2:#{Mx[:lv_c]}"). + gsub(/^3~(\S+)/,"#{Mx[:lv_o]}3:\\1#{Mx[:lv_c]}"). + gsub(/^3~\s+/,"#{Mx[:lv_o]}3:#{Mx[:lv_c]}"). + gsub(/^4~(\S+)/,"#{Mx[:lv_o]}4:\\1#{Mx[:lv_c]}"). + gsub(/^4~\s+/,"#{Mx[:lv_o]}4:#{Mx[:lv_c]}"). + gsub(/^5~(\S+)/,"#{Mx[:lv_o]}5:\\1#{Mx[:lv_c]}"). + gsub(/^5~\s+/,"#{Mx[:lv_o]}5:#{Mx[:lv_c]}"). + gsub(/^6~(\S+)/,"#{Mx[:lv_o]}6:\\1#{Mx[:lv_c]}"). + gsub(/^6~\s+/,"#{Mx[:lv_o]}6:#{Mx[:lv_c]}"). + gsub(/^[789]~/,'!_') + @t_o + else @t_o + end + end + end + class OCN + def initialize(md,data) + @md,@data=md,data + end + def ocn #and auto segment numbering increment + data=@data + @o_array=[] + node=ocn=ocn_dv=ocn_sp=ocnh=ocnh1=ocnh2=ocnh3=ocnh4=ocnh5=ocnh6=ocno=ocnp=ocnt=ocnc=ocng=ocni=ocnm=ocnu=ocnk=nm=0 # h heading, o other, t table, g group, i image + node_count_flag=false + regex_exclude_ocn_and_node = /#{Rx[:meta]}|^@\S+?:\s|^4~endnotes|^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^\^~ |<:e[:_]\d+?>|^<:\#|<:- |<[:!]!4|<hr width|#{Mx[:br_endnotes]}|\A\s*\Z/mi #ocn here # added with Tune.code #ยก + parent=node1=node2=node3=node4=node5=node6=nil + data.each do |dob| + h={} + if (dob.obj !~ regex_exclude_ocn_and_node || dob.is ==:code) \ + && (dob.of !=:comment \ + && dob.of !=:layout \ + && dob.of !=:meta) \ + && dob.obj !~/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/ \ + && dob.ocn_ + #dob.ln now is determined, and set earlier, check how best to remove this --> + if dob.is==:heading + ln=case dob.lv + when 'A'; 1 + when 'B'; 2 + when 'C'; 3 + when '1'; 4 + when '2'; 5 + when '3'; 6 + when '4'; 7 + when '5'; 8 + when '6'; 9 + end + end + if not dob.obj =~/<:#>|~#|-#/ \ + or not dob.toc_ # fix this no longer in dob.obj + ocn+=1 + if dob.is==:heading \ + and (ln.to_s =~/^[1-9]/ \ + or ln.to_s =~@md.lv1 \ + or ln.to_s =~@md.lv2 \ + or ln.to_s =~@md.lv3 \ + or ln.to_s =~@md.lv4 \ + or ln.to_s =~@md.lv5 \ + or ln.to_s =~@md.lv6) + ocnh+=1 + if ln==1 \ + or ln=~@md.lv1; ocnh1+=1 #heading + node1="1:#{ocnh1};#{ocn}" + node,ocn_sp,parent=node1,"h#{ocnh}",0 #FIX + elsif ln==2 \ + or ln=~@md.lv2; ocnh2+=1 + node2="2:#{ocnh2};#{ocn}" + node,ocn_sp,parent=node2,"h#{ocnh}",node1 + elsif ln==3 \ + or ln=~@md.lv3; ocnh3+=1 + node3="3:#{ocnh3};#{ocn}" + node,ocn_sp,parent=node3,"h#{ocnh}",node2 + elsif ln==4 \ + or ln=~@md.lv4; ocnh4+=1 + node4="4:#{ocnh4};#{ocn}" + node,ocn_sp,parent=node4,"h#{ocnh}",node3 + elsif ln==5 \ + or ln=~@md.lv5; ocnh5+=1 + node5="5:#{ocnh5};#{ocn}" + node,ocn_sp,parent=node5,"h#{ocnh}",node4 + elsif ln==6 \ + or ln=~@md.lv6; ocnh6+=1 + node6="6:#{ocnh6};#{ocn}" + node,ocn_sp,parent=node6,"h#{ocnh}",node5 + end + else + ocno+=1 + if dob.is==:table + ocnt+=1 + ocn_sp,parent="t#{ocnt}",node + elsif dob.is==:code + ocnc+=1 + ocn_sp,parent="c#{ocnc}",node + elsif dob.is==:group \ + || dob.is==:block \ + || dob.is==:alt \ + || dob.is==:verse + ocng+=1 #group, poem + ocn_sp,parent="g#{ocng}",node + elsif dob.is==:image #check + ocni+=1 + ocn_sp,parent="i#{ocni}",node + else ocnp+=1 #paragraph + ocn_sp,parent="p#{ocnp}",node + end + end + if dob.is==:heading + dob.ln,dob.node,dob.ocn,dob.odv,dob.osp,dob.parent=ln,node,ocn,ocn_dv,ocn_sp,parent + else + if dob.of !=:meta \ + && dob.of !=:comment \ + && dob.of !=:layout + dob.ocn,dob.odv,dob.osp,dob.parent=ocn,ocn_dv,ocn_sp,parent + end + end + else ocnu+=1 + dob.obj=dob.obj.gsub(/#{Mx[:fa_o]}~##{Mx[:fa_c]}/,'') if dob.obj + ocn_dv,ocn_sp="u#{ocnu}","u#{ocnu}" + dob.ocn,dob.odv,dob.osp=ocn,ocn_dv,ocn_sp + end + h + elsif dob.obj=~/#{Mx[:pa_non_object_no_heading]}/ + dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_no_heading]}/,'') + if dob.is==:para + h={ obj: dob.obj, ocn_: false, ocn: nil } + dob=SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h,dob) + elsif dob.is==:heading + h={ obj: dob.obj, ocn_: false, ocn: nil, toc_: true } + dob=SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,dob) + end + elsif dob.obj=~/#{Mx[:pa_non_object_dummy_heading]}/ + dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_dummy_heading]}/,'') + if dob.is==:para + h={ obj: dob.obj, ocn_: false, ocn: nil } + dob=SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h,dob) + elsif dob.is==:heading + h={ obj: dob.obj, ocn_: false, ocn: nil, toc_: false } + dob=SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,dob) + end + else dob + end + if dob.is==:code \ + || dob.is==:verse \ + || dob.is==:alt \ + || dob.is==:group \ + || dob.is==:block + dob.obj=dob.obj.gsub(/\n\n/,"\n") #newlines taken out + end + @o_array << dob + end + @o_array + end + end + class XML + def initialize(md,data) + @data,@md=data,md + end + def dom + @s=['0', + 'A', + 'B', + 'C', + '1', + '2', + '3' + ] + @sp=' ' + tuned_file=structure_build + tuned_file + end + def structure_build + data=@data + tuned_file=[] + hs=[0,false,false,false] + t={ lv: @s[0], status: 'open' } + tuned_file << tags(t) + if @md.opt.cmd =~/V/ + puts "\nXML sisu structure outline --->\n" + puts "<#{@s[0]}>" + end + data.each_with_index do |o,i| + if o.is ==:heading \ + || o.is ==:heading_insert + case o.ln + when 1 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.opt.cmd =~/V/ + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs=[1,true,false,false] + when 2 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.opt.cmd =~/V/ + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs=[2,true,true,false] + when 3 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.opt.cmd =~/V/ + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs=[3,true,true,true] + when 4 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.opt.cmd =~/V/ + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs[0]=4 + when 5 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.opt.cmd =~/V/ + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs[0]=5 + when 6 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.opt.cmd =~/V/ + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs[0]=6 + end + end + tuned_file << o + end + puts_tag_close(0,hs) if @md.opt.cmd =~/V/ + tuned_file << tag_close(0,hs) + tuned_file=tuned_file.flatten + end + def tags(o) + tag=(o[:status]=='open') \ + ? %{<#{o[:lv]} id="#{o[:node]}">} + : "</#{o[:lv]}>" + ln=case o[:lv] + when 'A'; 1 + when 'B'; 2 + when 'C'; 3 + when '1'; 4 + when '2'; 5 + when '3'; 6 + when '4'; 7 + when '5'; 8 + when '6'; 9 + end + h={ tag: tag, node: o[:node], lv: o[:lv], ln: ln, status: o[:status] } + SiSU_DAL_DocumentStructure::ObjectStructure.new.xml_dom(h) #downstream code utilise else ignore like comments + end + def tag_open(o,tag) + t={ lv: tag[o.ln], node: o.node, status: 'open' } + t_o=tags(t) + t_o + end + def tag_close(lev,hs) + ary=[] + case hs[0] + when 1 + if (lev <= 1) and hs[1] + t={ lv: @s[1], status: 'close' } + ary << tags(t) + end + if (lev==0) + t={ lv: @s[0], status: 'close' } + ary << tags(t) + end + when 2 + if (lev <= 2) and hs[2] + t={ lv: @s[2], status: 'close' } + ary << tags(t) + end + if (lev <= 1) and hs[1] + t={ lv: @s[1], status: 'close' } + ary << tags(t) + end + if (lev==0) + t={ lv: @s[0], status: 'close' } + ary << tags(t) + end + when 3 + if (lev <= 3) and hs[3] + t={ lv: @s[3], status: 'close' } + ary << tags(t) + end + if (lev <= 2) and hs[2] + t={ lv: @s[2], status: 'close' } + ary << tags(t) + end + if (lev <= 1) and hs[1] + t={ lv: @s[1], status: 'close' } + ary << tags(t) + end + if (lev==0) + t={ lv: @s[0], status: 'close' } + ary << tags(t) + end + when 4 + if (lev <= 4) + t={ lv: @s[4], status: 'close' } + ary << tags(t) + end + if (lev <= 3) and hs[3] + t={ lv: @s[3], status: 'close' } + ary << tags(t) + end + if (lev <= 2) and hs[2] + t={ lv: @s[2], status: 'close' } + ary << tags(t) + end + if (lev <= 1) and hs[1] + t={ lv: @s[1], status: 'close' } + ary << tags(t) + end + if (lev==0) + t={ lv: @s[0], status: 'close' } + ary << tags(t) + end + when 5 + if (lev <= 5) + t={ lv: @s[5], status: 'close' } + ary << tags(t) + end + if (lev <= 4) + t={ lv: @s[4], status: 'close' } + ary << tags(t) + end + if (lev <= 3) and hs[3] + t={ lv: @s[3], status: 'close' } + ary << tags(t) + end + if (lev <= 2) and hs[2] + t={ lv: @s[2], status: 'close' } + ary << tags(t) + end + if (lev <= 1) and hs[1] + t={ lv: @s[1], status: 'close' } + ary << tags(t) + end + if (lev==0) + t={ lv: @s[0], status: 'close' } + ary << tags(t) + end + when 6 + if (lev <= 6) + t={ lv: @s[6], status: 'close' } + ary << tags(t) + end + if (lev <= 5) + t={ lv: @s[5], status: 'close' } + ary << tags(t) + end + if (lev <= 4) + t={ lv: @s[4], status: 'close' } + ary << tags(t) + end + if (lev <= 3) and hs[3] + t={ lv: @s[3], status: 'close' } + ary << tags(t) + end + if (lev <= 2) and hs[2] + t={ lv: @s[2], status: 'close' } + ary << tags(t) + end + if (lev <= 1) and hs[1] + t={ lv: @s[1], status: 'close' } + ary << tags(t) + end + if (lev==0) + t={ lv: @s[0], status: 'close' } + ary << tags(t) + end + end + ary + end + def puts_tag_open(o,tag) + puts %{#{@sp*o.ln}<#{tag[o.ln]} id="#{o.node}">} + end + def puts_tag_close(lev,hs) + case hs[0] + when 1 + puts "#{@sp*1}</#{@s[1]}>" if (lev <= 1) and hs[1] + puts "</#{@s[0]}>" if (lev==0) + when 2 + puts "#{@sp*2}</#{@s[2]}>" if (lev <= 2) and hs[2] + puts "#{@sp*1}</#{@s[1]}>" if (lev <= 1) and hs[1] + puts "</#{@s[0]}>" if (lev==0) + when 3 + puts "#{@sp*3}</#{@s[3]}>" if (lev <= 3) and hs[3] + puts "#{@sp*2}</#{@s[2]}>" if (lev <= 2) and hs[2] + puts "#{@sp*1}</#{@s[1]}>" if (lev <= 1) and hs[1] + puts "</#{@s[0]}>" if (lev==0) + when 4 + puts "#{@sp*4}</#{@s[4]}>" if (lev <= 4) + puts "#{@sp*3}</#{@s[3]}>" if (lev <= 3) and hs[3] + puts "#{@sp*2}</#{@s[2]}>" if (lev <= 2) and hs[2] + puts "#{@sp*1}</#{@s[1]}>" if (lev <= 1) and hs[1] + puts "</#{@s[0]}>" if (lev==0) + when 5 + puts "#{@sp*5}</#{@s[5]}>" if (lev <= 5) + puts "#{@sp*4}</#{@s[4]}>" if (lev <= 4) + puts "#{@sp*3}</#{@s[3]}>" if (lev <= 3) and hs[3] + puts "#{@sp*2}</#{@s[2]}>" if (lev <= 2) and hs[2] + puts "#{@sp*1}</#{@s[1]}>" if (lev <= 1) and hs[1] + puts "</#{@s[0]}>" if (lev==0) + when 6 + puts "#{@sp*6}</#{@s[6]}>" if (lev <= 6) + puts "#{@sp*5}</#{@s[5]}>" if (lev <= 5) + puts "#{@sp*4}</#{@s[4]}>" if (lev <= 4) + puts "#{@sp*3}</#{@s[3]}>" if (lev <= 3) and hs[3] + puts "#{@sp*2}</#{@s[2]}>" if (lev <= 2) and hs[2] + puts "#{@sp*1}</#{@s[1]}>" if (lev <= 1) and hs[1] + puts "</#{@s[0]}>" if (lev==0) + end + end + end +end +__END__ |