diff options
-rw-r--r-- | lib/sisu/v0/shared_xml.rb | 89 | ||||
-rw-r--r-- | lib/sisu/v0/sst_from_xml.rb | 7 | ||||
-rw-r--r-- | lib/sisu/v0/sst_to_s_xml_dom.rb | 14 | ||||
-rw-r--r-- | lib/sisu/v0/sst_to_s_xml_node.rb | 11 | ||||
-rw-r--r-- | lib/sisu/v0/sst_to_s_xml_sax.rb | 10 |
5 files changed, 121 insertions, 10 deletions
diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index fd27c664..abc6cc1a 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -1,3 +1,4 @@ +# coding: utf-8 =begin * Name: SiSU @@ -154,10 +155,44 @@ module SiSU_XML_munge class Trans require "#{SiSU_lib}/defaults" def initialize(md) + @md=md @sys=SiSU_Env::System_call.new - @dir=SiSU_Env::Info_env.new(md.fns) + @dir=SiSU_Env::Info_env.new(@md.fns) @dp=SiSU_Env::Info_env.new.digest.pattern @url_brace=SiSU_Viz::Skin.new.url_decoration + if @md.sem_tag + @ab ||=semantic_tags.default + end + end + def semantic_tags + def default + { + :pub => 'publication', + :ref => 'reference', + :desc => 'description', + :conv => 'convention', + :vol => 'volume', + :pg => 'page', + :ct => 'cite', + :cty => 'city', + :org => 'organization', + :d => 'date', + :t => 'title', + :a => 'author', + :n => 'name', + :fn => 'firstname', + :f => 'firstname', + :mn => 'middlename', + :m => 'middlename', + :ln => 'lastname', + :l => 'lastname', + :i => 'initials', + :q => 'quote', + :y => 'year', + :ab => 'abreviation', + } + end + self end def char_enc #character encode def utf8(para='') @@ -348,10 +383,8 @@ module SiSU_XML_munge end end def markup(para='') - #if para !~/^<:code>/ - wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 - para=tidywords(wordlist).join(' ').strip - #end + wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 + para=tidywords(wordlist).join(' ').strip para.gsub!(/(^|\s+)<\s+/,'\1< '); para.gsub!(/\s+>(\s+|$)/,' >\1') para.gsub!(/<:pb>\s*/,'') para.gsub!(/<+[-~]#>+/,'') @@ -426,6 +459,52 @@ module SiSU_XML_munge para.gsub!(/<(\/?en)>/,'<\1>') para end + def xml_sem_block_paired(matched) # colon depth: many, recurs + matched.gsub!(/\b(a):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:a]} depth="many">\\2</sem:#{@ab[:a]}>}) # sem : + matched.gsub!(/\b(vol):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:vol]} depth="many">\\2</sem:#{@ab[:vol]}>}) # sem : + matched.gsub!(/\b(pub):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:pub]} depth="many">\\2</sem:#{@ab[:pub]}>}) # sem : + matched.gsub!(/\b(ref):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:ref]} depth="many">\\2</sem:#{@ab[:ref]}>}) # sem : + matched.gsub!(/\b(desc):\{(.+?)\}:\1\b/m,%{<sem:#{@ab[:desc]} depth="many">\\2</sem:#{@ab[:desc]}>}) # sem : + matched.gsub!(/\b(conv):\{(.+?)\}:\1\b/m,%{<sem:#{@ab[:conv]} depth="many">\\2</sem:#{@ab[:conv]}>}) # sem : + matched.gsub!(/\b(ct):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:ct]} depth="many">\\2</sem:#{@ab[:ct]}>}) # sem : + matched.gsub!(/\b(cty):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:cty]} depth="many">\\2</sem:#{@ab[:cty]}>}) # sem : + matched.gsub!(/\b(org):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:org]} depth="many">\\2</sem:#{@ab[:org]}>}) # sem : + matched.gsub!(/\b(d):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:d]} depth="many">\\2</sem:#{@ab[:d]}>}) # sem : + matched.gsub!(/\b(n):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:n]} depth="many">\\2</sem:#{@ab[:n]}>}) # sem : + matched.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m,'<sem:\1 depth="many">\2</sem:\1>') # sem : + end + def xml_semantic_tags(para) + if @md.sem_tag + para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : + para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : + para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : + #colon one / single / flat / shallow + para.gsub!(/:\{(.+?)\}:a\b/m, %{<sem:#{@ab[:a]} depth="one">\\1</sem:#{@ab[:a]}>}) # sem : + para.gsub!(/:\{(.+?)\}:n\b/m, %{<sem:#{@ab[:n]} depth="one">\\1</sem:#{@ab[:n]}>}) # sem : + para.gsub!(/:\{(.+?)\}:t\b/m, %{<sem:#{@ab[:t]} depth="one">\\1</sem:#{@ab[:t]}>}) # sem : + para.gsub!(/:\{(.+?)\}:ref\b/m, %{<sem:#{@ab[:ref]} depth="one">\\1</sem:#{@ab[:ref]}>}) # sem : + para.gsub!(/:\{(.+?)\}:desc\b/m, %{<sem:#{@ab[:desc]} depth="one">\\1</sem:#{@ab[:desc]}>}) # sem : + para.gsub!(/:\{(.+?)\}:cty\b/m, %{<sem:#{@ab[:cty]} depth="one">\\1</sem:#{@ab[:cty]}>}) # sem : + para.gsub!(/:\{(.+?)\}:org\b/m, %{<sem:#{@ab[:org]} depth="one">\\1</sem:#{@ab[:org]}>}) # sem : + para.gsub!(/:\{(.+?)\}:([a-z]+(?:[_:.][a-z]+)*)/m,'<sem:\2 depth="one">\1</sem:\2>') # sem : + #semicolon zero / none + para.gsub!(/;\{([^}]+(?![;]))\};t\b/m, %{<sem:#{@ab[:t]} depth="zero">\\1</sem:#{@ab[:t]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};q\b/m, %{<sem:#{@ab[:q]} depth="zero">\\1</sem:#{@ab[:q]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};ref\b/m, %{<sem:#{@ab[:ref]} depth="zero">\\1</sem:#{@ab[:ref]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};desc\b/m,%{<sem:#{@ab[:desc]} depth="zero">\\1</sem:#{@ab[:desc]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};y\b/m, %{<sem:#{@ab[:y]} depth="zero">\\1</sem:#{@ab[:y]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};ab\b/m, %{<sem:#{@ab[:ab]} depth="zero">\\1</sem:#{@ab[:ab]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};pg\b/m, %{<sem:#{@ab[:pg]} depth="zero">\\1</sem:#{@ab[:pg]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};fn?\b/m, %{<sem:#{@ab[:fn]} depth="zero">\\1</sem:#{@ab[:fn]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};mn?\b/m, %{<sem:#{@ab[:mn]} depth="zero">\\1</sem:#{@ab[:mn]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};ln?\b/m, %{<sem:#{@ab[:ln]} depth="zero">\\1</sem:#{@ab[:ln]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};i\b/m, %{<sem:#{@ab[:i]} depth="zero">\\1</sem:#{@ab[:i]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};org\b/m, %{<sem:#{@ab[:org]} depth="zero">\\1</sem:#{@ab[:org]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};cty\b/m, %{<sem:#{@ab[:cty]} depth="zero">\\1</sem:#{@ab[:cty]}>}) # sem ; + para.gsub!(/;\{([^}]+(?![;]))\};([a-z]+(?:[_:.][a-z]+)*)/m,'<sem:\2 depth="zero">\1</sem:\2>') # sem ; + end + para + end end end module SiSU_XML_tags #Format diff --git a/lib/sisu/v0/sst_from_xml.rb b/lib/sisu/v0/sst_from_xml.rb index d93e68b8..af43e611 100644 --- a/lib/sisu/v0/sst_from_xml.rb +++ b/lib/sisu/v0/sst_from_xml.rb @@ -1,3 +1,4 @@ +# coding: utf-8 =begin * Name: SiSU @@ -99,6 +100,10 @@ module SiSU_sst_from_xml text.gsub!(/<i>(.+?)<\/i>/,'/{\1}/') text.gsub!(/<b>(.+?)<\/b>/,'*{\1}*') text.gsub!(/<u>(.+?)<\/u>/,'_{\1}_') + text.gsub!(/<sem:([a-z_]+)\s+depth=['"]zero['"]>(\s*.+?\s*)<\/sem:\1>/,';{ \2 };\1') + text.gsub!(/<sem:([a-z_]+)\s+depth=['"]one['"]>(\s*.+?\s*)<\/sem:\1>/,':{ \2 }:\1') + text.gsub!(/<sem:([a-z_]+)\s+depth=['"]many['"]>(\s*.+?\s*)<\/sem:\1>/,'\1:{ \2 }:\1') + text.gsub!(/<sem:([a-z_]+)>(\s*.+?\s*)<\/sem:\1>/,'\1:{ \2 }:\1') text.gsub!(/\s +/,' ') text.strip! #text.gsub!(/<header>(.+?)<\/header/,"@#{x.name}: \\1\n\n") @@ -126,7 +131,7 @@ module SiSU_sst_from_xml sax end def dom - raise "#{__FILE}::#{__LINE__} xml dom representation to sst not yet implemented (experimental simple xml representations sax and node to sst are in place)." + raise "#{__FILE__}::#{__LINE__} xml dom representation to sst not yet implemented (experimental simple xml representations sax and node to sst are in place)." end def xml_to_sisu unless @opt.files.empty? diff --git a/lib/sisu/v0/sst_to_s_xml_dom.rb b/lib/sisu/v0/sst_to_s_xml_dom.rb index a1c81532..f9c190bf 100644 --- a/lib/sisu/v0/sst_to_s_xml_dom.rb +++ b/lib/sisu/v0/sst_to_s_xml_dom.rb @@ -1,3 +1,4 @@ +# coding: utf-8 =begin * Name: SiSU @@ -161,7 +162,7 @@ module SiSU_simple_xml_model_dom para.gsub!(/~\{([*+]+)\s+(.+?)\}~/, '<endnote><symbol>\1</symbol><note>\2</note></endnote> ') para.gsub!(/~\{(.+?)\}~/, - '<endnote><note>\2</note></endnote> ') + '<endnote><note>\1</note></endnote> ') end def xml_head(meta) txt=meta.text @@ -356,7 +357,7 @@ WOK (0..6).each { |x| @cont[x]=@level[x]=false } (4..6).each { |x| @xml_contents_close[x]='' } data.each do |para| - para=SiSU_sem::Tags.new(para).rm.all + #para=SiSU_sem::Tags.new(para,@md).rm.all wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip para.gsub!(/<[-~]#>/,'') @@ -546,10 +547,19 @@ WOK include SiSU_Env def initialize(data,md) @data,@md=data,md + @trans=SiSU_XML_munge::Trans.new(@md) end def xml @sisu=[] @data.each do |para| + if para !~/^\s*(?:%+ |<:code>)/ + if @md.sem_tag and para =~/[:;]\{|\}[:;]/ + para=@trans.xml_semantic_tags(para) + end + if para =~/[:;]\{|\}[:;]/ + para=SiSU_sem::Tags.new(para,@md).rm.all + end + end para.gsub!(/\/\{(.+?)\}\//,'<italic>\1</italic>') para.gsub!(/\*\{(.+?)\}\*/,'<bold>\1</bold>') para.gsub!(/!\{(.+?)\}!/,'<bold>\1</bold>') diff --git a/lib/sisu/v0/sst_to_s_xml_node.rb b/lib/sisu/v0/sst_to_s_xml_node.rb index b6d7c9b2..a2656e3e 100644 --- a/lib/sisu/v0/sst_to_s_xml_node.rb +++ b/lib/sisu/v0/sst_to_s_xml_node.rb @@ -1,3 +1,4 @@ +# coding: utf-8 =begin * Name: SiSU @@ -399,7 +400,6 @@ WOK @data=@data.join.split("\n\n") @data=SiSU_document_structure::Code.new(@md,@data).code @data.each do |para| - para=SiSU_sem::Tags.new(para).rm.all data << SiSU_document_structure::Structure.new(@md,para).structure end data=Syntax::Markup.new(@md,data).songsheet @@ -416,6 +416,15 @@ WOK obj.each do |o| para=o.txt unless o.txt =~/^%% / #comments are lost, consider if para + if para !~/^\s*(?:%+ |<:code>)/ + if @md.sem_tag and para =~/[:;]\{|\}[:;]/ + para=@trans.xml_semantic_tags(para) + end + if para =~/[:;]\{|\}[:;]/ + para=SiSU_sem::Tags.new(para,@md).rm.all + end + end + para=SiSU_sem::Tags.new(para,@md).rm.all para=@trans.markup_light(para) @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 if para =~/\A(?:@|0~)(\S+?):?\s+(.+?)\Z/m # for headers diff --git a/lib/sisu/v0/sst_to_s_xml_sax.rb b/lib/sisu/v0/sst_to_s_xml_sax.rb index 5e4eb5ea..4625779d 100644 --- a/lib/sisu/v0/sst_to_s_xml_sax.rb +++ b/lib/sisu/v0/sst_to_s_xml_sax.rb @@ -1,3 +1,4 @@ +# coding: utf-8 =begin * Name: SiSU @@ -262,7 +263,14 @@ WOK data << SiSU_document_structure::Structure.new(@md,para).structure end data.each do |para| - para=SiSU_sem::Tags.new(para).rm.all + if para !~/^\s*(?:%+ |<:code>)/ + if @md.sem_tag and para =~/[:;]\{|\}[:;]/ + para=@trans.xml_semantic_tags(para) + end + if para =~/[:;]\{|\}[:;]/ + para=SiSU_sem::Tags.new(para,@md).rm.all + end + end para=@trans.markup_light(para) @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 if para =~/\A(?:@|0~)(\S+?):?\s+(.+?)\Z/m # for headers |