diff options
| -rw-r--r-- | lib/sisu/v0/character_encoding.rb | 4 | ||||
| -rw-r--r-- | lib/sisu/v0/odf.rb | 7 | ||||
| -rw-r--r-- | lib/sisu/v0/param.rb | 42 | ||||
| -rw-r--r-- | lib/sisu/v0/shared_xml.rb | 29 | ||||
| -rw-r--r-- | lib/sisu/v0/xhtml.rb | 2 | ||||
| -rw-r--r-- | lib/sisu/v0/xml.rb | 2 | ||||
| -rw-r--r-- | lib/sisu/v0/xml_dom.rb | 2 | 
7 files changed, 61 insertions, 27 deletions
| diff --git a/lib/sisu/v0/character_encoding.rb b/lib/sisu/v0/character_encoding.rb index 60c2f335..aa856cdd 100644 --- a/lib/sisu/v0/character_encoding.rb +++ b/lib/sisu/v0/character_encoding.rb @@ -374,7 +374,9 @@ module SiSU_character_encode    ['ü',  252, '374', '\303\274',     'ü', 'ü',    'ü',    '\"{u}',           '',                'Small u, umlaut         ü' ],    ['ý',  253, '375', '\303\275',     'ý', 'ý',  'ý',    '',                '',                'Small y, acute accent   ý' ],    ['þ',  254, '376', '\303\276',     'þ', 'þ',   'þ',    '',                '',                'Small thorn, Icelandic  þ' ], -  ['ÿ',  255, '377', '\303\277',     'ÿ', 'ÿ',    'ÿ',    '',                '',                'Smally y, umlaut        ÿ' ] +  ['ÿ',  255, '377', '\303\277',     'ÿ', 'ÿ',    'ÿ',    '',                '',                'Smally y, umlaut        ÿ' ], +  ['∝',     ,    '',         '',    '∝', '∝',    '∝',    '',                '',                'proportional to  U+221D (8733) ∝' ], +  ['∞',     ,    '',         '',    '∞', '∞',   '∞',    '',                '',                'infinity  U+221E (8734) ∞' ],  ]      end    end diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb index 6b1491c2..bf23f91f 100644 --- a/lib/sisu/v0/odf.rb +++ b/lib/sisu/v0/odf.rb @@ -267,7 +267,7 @@ module SiSU_ODF          #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, also works            #%{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration          para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/, -          %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="mailto:\\1">\\1</text:a>#{@url_brace.xml_close}}) +          %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="mailto:\\1">\\1</text:a>#{@url_brace.xml_close}}) if para !~/http:\/\// # improve upon, document crash where url contains '@' symbol          para=case para          when /^#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/m            m=$1 @@ -443,6 +443,7 @@ module SiSU_ODF            #para.gsub!(/<(~\d+;(?:\w|[0-6]:)\d+;\w\d+)><(#@dp:#@dp)>/,'<\1><\2>')            para='' if para =~/#{Mx[:lv_o]}\d+:.*?#{Mx[:lv_c]}.+?#{Mx[:pa_non_object_dummy_heading]}/            para_array=[] +          para.gsub!(/</,'<'); para.gsub!(/>/,'>')            word=para.scan(/\S+|\n/)            if word              word.each do |w| # _ - / # | : ! ^ ~ @@ -487,8 +488,10 @@ module SiSU_ODF            para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<text:span text:style-name="T3">\1</text:span>')            para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'<text:span text:style-name="T4">\1</text:span>')            para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'<text:span text:style-name="T5">\1</text:span>') -          para.gsub!(/`/,"'") +          para.gsub!(/[`’]/,"'")            para.gsub!(//u,'-') +          para.gsub!(/ /u, ' ')       # space identify +          para.gsub!(/ /u, ' ')       # space identify            para.gsub!(/·/u,'*')            para.gsub!(/[“”]/u,'""')            para.gsub!(/[–—]/u,'-')   #— – chk diff --git a/lib/sisu/v0/param.rb b/lib/sisu/v0/param.rb index 3cfbf1e5..b211f5c1 100644 --- a/lib/sisu/v0/param.rb +++ b/lib/sisu/v0/param.rb @@ -368,32 +368,32 @@ module SiSU_Param                  @dc_date_modified=date                  @date_modified_scheme='scheme="ISO-8601"' if date =~/\d{4}-\d{2}-\d{2}/                end -            when /^(?:0~type|@type:)\s+(.+?)$/m;             @dc_type=$1                            #% metainfo DC -            when /^(?:0~format|@format:)\s+(.+?)$/m;         @dc_format=$1                          #% metainfo DC -            #when /^(?:0~identifier|@identifier:)\s+(.+?)$/m; @dc_identifier=$1                      #% metainfo DC -            when /^(?:0~source|@source:)\s+(.+?)$/m;         @dc_source=$1                          #% metainfo DC -            when /^(?:0~language(?:\.document)?|@language(?:\.document)?:)\s+(.+?)$/m               #% metainfo DC +            when /^(?:0~type|@type:)\s+(.+?)$/m;             @dc_type=$1                                                 #% metainfo DC +            when /^(?:0~format|@format:)\s+(.+?)$/m;         @dc_format=$1                                               #% metainfo DC +            #when /^(?:0~identifier|@identifier:)\s+(.+?)$/m; @dc_identifier=$1                                           #% metainfo DC +            when /^(?:0~source|@source:)\s+(.+?)$/m;         @dc_source=$1                                               #% metainfo DC +            when /^(?:0~language(?:\.document)?|@language(?:\.document)?:)\s+(.+?)$/m                                    #% metainfo DC                x=$1.strip                lang=SiSU_Env::Standardise_language.new(x.dup)                @dc_language[:code]=lang.code                @dc_language[:name]=lang.title -            when /^(?:0~language\.original|@language\.original:)\s+(.+?)$/m                         #% metainfo DC +            when /^(?:0~language\.original|@language\.original:)\s+(.+?)$/m                                              #% metainfo DC                x=$1.strip                lang=SiSU_Env::Standardise_language.new(x.dup)                @language_original[:name]=lang.title -            when /^(?:0~relation|@relation:)\s+(.+?)$/m;     @dc_relation=$1                        #% metainfo DC -            when /^(?:0~coverage|@coverage:)\s+(.+?)$/m;     @dc_coverage=$1                        #% metainfo DC -            when /^(?:0~rights|@rights:)\s+(.+?)$/m;         @dc_rights=$1                          #% metainfo DC copyright, public domain, copyleft, creative commons, etc. -            when /^(?:0~papersize|@papersize:)\s+(.+?)$/m                                           #% metainfo DC +            when /^(?:0~relation|@relation:)\s+(.+?)$/m;     @dc_relation=$1                                             #% metainfo DC +            when /^(?:0~coverage|@coverage:)\s+(.+?)$/m;     @dc_coverage=$1                                             #% metainfo DC +            when /^(?:0~rights|@rights:)\s+(.+?)$/m;         @dc_rights=$1.gsub(/<(?:\/\s*)?br(?:\s*\/)?>/,Mx[:br_line]) #% metainfo DC copyright, public domain, copyleft, creative commons, etc. +            when /^(?:0~papersize|@papersize:)\s+(.+?)$/m                                                                #% metainfo DC                l=$1                if @mod.inspect !~/--papersize[=-]\S+/                  l=determine_papersize(l.dup)                  @papersize=l                end -            when /^(?:0~keywords?|@keywords?:?)\s+(.+?)$/m;  @keywords=$1                           #% metainfo DC -            when /^(?:0~comments?|@comments?:?)\s+(.+?)$/m;  @comments=$1                           #% metainfo DC -            when /^(?:0~abstract|@abstract)\s+(.+?)$/m;      @abstract=$1                           #% metainfo DC -            when /^(?:0~tags?|@tags?:)\s+\S/m                                                       #% metainfo +            when /^(?:0~keywords?|@keywords?:?)\s+(.+?)$/m;  @keywords=$1                                                #% metainfo DC +            when /^(?:0~comments?|@comments?:?)\s+(.+?)$/m;  @comments=$1.gsub(/<(?:\/\s*)?br(?:\s*\/)?>/,Mx[:br_line])  #% metainfo DC +            when /^(?:0~abstract|@abstract)\s+(.+?)$/m;      @abstract=$1.gsub(/<(?:\/\s*)?br(?:\s*\/)?>/,Mx[:br_line])  #% metainfo DC +            when /^(?:0~tags?|@tags?:)\s+\S/m                                                                            #% metainfo                tags=para.match(/^(?:0~tags?|@tags?:)\s+(.+)\Z/m)[1]                tags.split(/,|$/).each do |tag|                  tag.strip! @@ -403,18 +403,18 @@ module SiSU_Param                  tag_a=tag_a.split(/:/).join('][')                  @tag_a << tag_a                end -            when /^(?:0~catalogue|@catalogue:)\s+(.+)?$/m                                           #% metainfo +            when /^(?:0~catalogue|@catalogue:)\s+(.+)?$/m                                                                #% metainfo                m=$1                @cls_pg=m.match(/pg=(\S+)/)[1] if m =~/pg=/                @cls_isbn=m.match(/isbn=(\S+)/)[1] if m =~/isbn=/                @cls_dewey=m.match(/dewey=(\S+)/)[1] if m =~/dewey=/                @cls_loc=m.match(/loc=(\S+)/)[1] if m =~/loc=/ -            when /^(?:0~class(?:ify)?_loc|@class(?:ify)?_loc:)\s+(.+?)$/m;             @cls_loc=$1  #% metainfo -            when /^(?:0~class(?:ify)?_dewey|@class(?:ify)?_dewey:)\s+(.+?)$/m;         @cls_dewey=$1 #% metainfo -            when /^(?:0~class(?:ify)?_pg|@class(?:ify)?_pg)\s+(.+?)$/m;                @cls_pg=$1   #% metainfo -            when /^(?:0~(?:class(?:ify)?_)?isbn|@(?:class(?:ify)?_)?isbn)\s+(\S+?)$/m; @cls_isbn=$1 #% metainfo -            when /^(?:0~images?|@images?:)\s+(.+?)$/m;                                 @image=$1    #% processing -            when /^(?:0~(?:toc|structure)|@(?:toc|structure):)\s+(.+?)\Z/m                           #% processing +            when /^(?:0~class(?:ify)?_loc|@class(?:ify)?_loc:)\s+(.+?)$/m;             @cls_loc=$1                       #% metainfo +            when /^(?:0~class(?:ify)?_dewey|@class(?:ify)?_dewey:)\s+(.+?)$/m;         @cls_dewey=$1                      #% metainfo +            when /^(?:0~class(?:ify)?_pg|@class(?:ify)?_pg)\s+(.+?)$/m;                @cls_pg=$1                        #% metainfo +            when /^(?:0~(?:class(?:ify)?_)?isbn|@(?:class(?:ify)?_)?isbn)\s+(\S+?)$/m; @cls_isbn=$1                      #% metainfo +            when /^(?:0~images?|@images?:)\s+(.+?)$/m;                                 @image=$1                         #% processing +            when /^(?:0~(?:toc|structure)|@(?:toc|structure):)\s+(.+?)\Z/m                                                 #% processing                doc_toc_str=$1                @toc=doc_toc_str.split(/;\s*/)                @toc=[ @toc ] if @toc == String diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index 9203f0df..228a5c14 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -166,6 +166,8 @@ module SiSU_XML_munge            #¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷            ##para.gsub!(//, '&#;')            ##para.gsub!(//, '&;') +          para.gsub!(/</u, '<') # '<'   # < +          para.gsub!(/>/u, '>') # '>'   # >            para.gsub!(/¢/u, '¢') # '¢'   # ¢            para.gsub!(/£/u, '£') # '£'  # £            para.gsub!(/¥/u, '¥') # '¥'    # ¥ @@ -250,10 +252,23 @@ module SiSU_XML_munge            para.gsub!(/ü/u, 'ý') # 'ü'   # ý            para.gsub!(/þ/u, 'þ') # 'þ'  # þ            para.gsub!(/ÿ/u, 'ÿ') # 'ÿ'   # ÿ +          para.gsub!(/‘/u, '‘') # '‘' # ‘ +          para.gsub!(/’/u, '’') # '’' # ’ +          para.gsub!(/–/u, '–') # –   # – +          para.gsub!(/—/u, '—') # —   # — +          para.gsub!(/∝/u, '∝') # ∝    # ∝ +          para.gsub!(/∞/u, '∞') # ∞   # ∞ +          para.gsub!(/™/u, '™') # ™   # ™ +          para.gsub!(/✠/u, '†') # †  # † incorrect replacement! † +          para.gsub!(/ /u, ' ')       # space identify +          para.gsub!(/ /u, ' ')       # space identify          end        end        def html(para='')          if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn +          para.gsub!(/ /u, ' ')           # space identify +          para.gsub!(/ /u, ' ')           # space identify +        else            para.gsub!(/¢/u, '¢')      # ¢            para.gsub!(/£/u, '£')     # £            para.gsub!(/¥/u, '¥')       # ¥ @@ -338,6 +353,16 @@ module SiSU_XML_munge            para.gsub!(/ü/u, 'ü')      # ý            para.gsub!(/þ/u, 'þ')     # þ            para.gsub!(/ÿ/u, 'ÿ')      # ÿ +          para.gsub!(/‘/u, '&#lsquo;')    # ‘  # ‘ +          para.gsub!(/’/u, '&#rsquo;')    # ’  # ’ +          para.gsub!(/–/u, '–')     # –  # – +          para.gsub!(/—/u, '—')     # —  # — +          para.gsub!(/∝/u, '∝')      # ∝   # ∝ +          para.gsub!(/∞/u, '∞')     # ∞  # ∞ +          para.gsub!(/™/u, '™')     # ™  # ™ +          para.gsub!(/✠/u, '†')    # † # † incorrect replacement † +          para.gsub!(/ /u, ' ')           # space identify +          para.gsub!(/ /u, ' ')           # space identify          end        end        self @@ -381,6 +406,10 @@ module SiSU_XML_munge            %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1" width="\\2" height="\\3" />[\\1] \\4})          para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?#{Mx[:lnk_c]}(https?:\/\/\S+)/,            %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1"/>\\1}) +        para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}image/, +          %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1" width="\\2" height="\\3" />[\\1] \\4}) +        para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?#{Mx[:lnk_c]}image/, +          %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1"/>\\1})          para.gsub!(/(^|#{Mx[:gl_c]}|\s)#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/,            '\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\3">\2</link>\4') #watch, compare html_tune          para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, diff --git a/lib/sisu/v0/xhtml.rb b/lib/sisu/v0/xhtml.rb index 95da5d2d..5f99cacf 100644 --- a/lib/sisu/v0/xhtml.rb +++ b/lib/sisu/v0/xhtml.rb @@ -267,8 +267,8 @@ WOK          (0..6).each { |x| @cont[x]=@level[x]=false }          (4..6).each { |x| @xml_contents_close[x]='' }          data.each do |para| -          para=@trans.markup(para)            @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 +          para=@trans.markup(para)            if para =~/^#{Rx[:meta]}\s*.+?$/ # for headers              d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta              if d_meta; xml_head(d_meta) diff --git a/lib/sisu/v0/xml.rb b/lib/sisu/v0/xml.rb index 4826a503..49c71b88 100644 --- a/lib/sisu/v0/xml.rb +++ b/lib/sisu/v0/xml.rb @@ -291,8 +291,8 @@ WOK          (0..6).each { |x| @cont[x]=@level[x]=false }          (4..6).each { |x| @xml_contents_close[x]='' }          data.each do |para| -          para=@trans.markup(para)            @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 +          para=@trans.markup(para)            if para =~/^#{Rx[:meta]}\s*.+?$/ # for headers              d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta              if d_meta; xml_head(d_meta) diff --git a/lib/sisu/v0/xml_dom.rb b/lib/sisu/v0/xml_dom.rb index b2bc0de7..13aed504 100644 --- a/lib/sisu/v0/xml_dom.rb +++ b/lib/sisu/v0/xml_dom.rb @@ -349,8 +349,8 @@ WOK          (0..6).each { |x| @cont[x]=@level[x]=false }          (4..6).each { |x| @xml_contents_close[x]='' }          data.each do |para| -          para=@trans.markup(para)            @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 +          para=@trans.markup(para)            if para =~/^#{Rx[:meta]}\s*(.+?)$/ # for headers              d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta              if d_meta; xml_head(d_meta) | 
