diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/sisu/v0/dal.rb | 4 | ||||
-rw-r--r-- | lib/sisu/v0/shared_xml.rb | 12 |
2 files changed, 9 insertions, 7 deletions
diff --git a/lib/sisu/v0/dal.rb b/lib/sisu/v0/dal.rb index a8bf1cd0..e75309f9 100644 --- a/lib/sisu/v0/dal.rb +++ b/lib/sisu/v0/dal.rb @@ -240,13 +240,9 @@ module SiSU_DAL para.gsub!(/(#{Mx[:en_a_o]})\s*/,'\1 '); para.gsub!(/(~\{\s*)\s+/,'\1 ') para.gsub!(/ \/\//,"#{Mx[:br_line]}") #added 2004w29 para.gsub!(/<br>/,"#{Mx[:br_line]}") #needed by xml, xhtml etc. - #para.gsub!(/<p>/,'<p />') #consider - para.gsub!(/`/,"'") para.gsub!(/\t/,' ') para.gsub!(/\342\200\231/u,"'") #if para =~/’/ #Avoid #‘ ’ #“ ” para.gsub!(/�/u,' ') #watch, replace with char code - para.gsub!(/[“”]/u,'""') - para.gsub!(/[–—]/u,'-') #— – chk para.gsub!(/·/u,'*') para.gsub!(/\\copy(?:right)?\b/,'©') para.gsub!(/\\trademark\b|\\tm\b/,'®') diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index 228a5c14..7ecc52bb 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -254,6 +254,8 @@ module SiSU_XML_munge para.gsub!(/ÿ/u, 'ÿ') # 'ÿ' # ÿ para.gsub!(/‘/u, '‘') # '‘' # ‘ para.gsub!(/’/u, '’') # '’' # ’ + para.gsub!(/“/u, '“') # “ # “ + para.gsub!(/”/u, '”') # ” # ” para.gsub!(/–/u, '–') # – # – para.gsub!(/—/u, '—') # — # — para.gsub!(/∝/u, '∝') # ∝ # ∝ @@ -355,6 +357,8 @@ module SiSU_XML_munge para.gsub!(/ÿ/u, 'ÿ') # ÿ para.gsub!(/‘/u, '&#lsquo;') # ‘ # ‘ para.gsub!(/’/u, '&#rsquo;') # ’ # ’ + para.gsub!(/“/u, '“') # “ # “ + para.gsub!(/”/u, '”') # ” # ” para.gsub!(/–/u, '–') # – # – para.gsub!(/—/u, '—') # — # — para.gsub!(/∝/u, '∝') # ∝ # ∝ @@ -369,11 +373,13 @@ module SiSU_XML_munge end def tidywords(wordlist) wordlist.each do |x| + #imperfect solution will not catch all possible cases x.gsub!(/&/,'&') unless x =~/&\S+;/ + x.gsub!(/&([A-Z])/,'&\1') end end def markup(para='') - wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 + wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br />') para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') @@ -439,7 +445,7 @@ module SiSU_XML_munge "<image.path>#{@dir.url.images_local}\/\\1</image.path>") para.gsub!(/ /,' ') #para.gsub!(/ /,' ') #clean - wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 + wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip para end @@ -462,7 +468,7 @@ module SiSU_XML_munge "<image.path>#{@dir.url.images_local}\/\\1</image.path>") para.gsub!(/ /,' ') #para.gsub!(/ /,' ') #clean - wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 + wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip para end |