diff options
Diffstat (limited to 'data')
-rw-r--r-- | data/sisu/conf/convert/sisu_convert | 395 | ||||
-rw-r--r-- | data/sisu/conf/vim/addons/syntax/sisu.vim | 8 |
2 files changed, 265 insertions, 138 deletions
diff --git a/data/sisu/conf/convert/sisu_convert b/data/sisu/conf/convert/sisu_convert index d7876083..9a734678 100644 --- a/data/sisu/conf/convert/sisu_convert +++ b/data/sisu/conf/convert/sisu_convert @@ -27,47 +27,47 @@ module CONVERT end def headerBasic <<WOK -0~title +@title: -0~subtitle +@subtitle: -0~creator +@creator: -0~type +@type: -0~subject +@subject: -0~date +@date: -0~date.available +@date.available: -0~publisher SiSU +@publisher: SiSU -0~rights +@rights: -0~level +@level: WOK end def headerDefault <<WOK -0~title +@title: -0~subtitle +@subtitle: -0~creator +@creator: -0~type +@type: -0~subject +@subject: -0~date +@date: -0~date.available +@date.available: -0~publisher SiSU +@publisher: SiSU -0~rights ... +@rights: WOK end @@ -79,7 +79,7 @@ WOK else pre << headerBasic end - @filename_wv=File.new(%{,,#{@filename}.er9}, "w+") + @filename_wv=File.new(%{,,#{@filename}.sst},'w+') @filename_wv << pre @data.each do |x| y = x.split("\n") @@ -98,11 +98,11 @@ WOK end def songsheet data=@data - print "Convert to SiSU file from Word97 << gvim ,,#{@filename}.er9 >\n" #: <<#{@@html_title}>> - data=WareWord97.new(data.collect, @filename, @instruct).strip - data=WareWord97.new(data.collect, @filename, @instruct).strip - data=WareWord97.new(data.collect, @filename, @instruct).markup_rules - data=MyOutput.new(data.collect, @filename, @instruct).hardOutput + print "Convert to SiSU file from Word97 << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>> + data=WareWord97.new(data.collect,@filename,@instruct).strip + data=WareWord97.new(data.collect,@filename,@instruct).strip + data=WareWord97.new(data.collect,@filename,@instruct).markup_rules + data=MyOutput.new(data.collect,@filename,@instruct).hardOutput end def strip data=@data @@ -110,12 +110,12 @@ WOK endnote_no=1 data.each do |para| para.strip! - para.gsub!(/<u>\s*<\/u>/, '') - para.gsub!(/<\/u>\s*<u>/, '') - para.gsub!(/<b>\s*<\/b>/, '') - para.gsub!(/<\/b>\s*<b>/, '') - para.gsub!(/<i>\s*<\/i>/, '') - para.gsub!(/<\/i>\s*<i>/, '') + para.gsub!(/<u>\s*<\/u>/,'') + para.gsub!(/<\/u>\s*<u>/,'') + para.gsub!(/<b>\s*<\/b>/,'') + para.gsub!(/<\/b>\s*<b>/,'') + para.gsub!(/<i>\s*<\/i>/,'') + para.gsub!(/<\/i>\s*<i>/,'') tuned_file << para unless para == nil end tuned_file @@ -126,15 +126,16 @@ WOK endnote_no=1 data.each do |para| para.strip! - para.gsub!(/\s+/, ' ') - para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i, "4{ \\1 \\2") #watch case insensitivity - para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i, "3{ \\1 \\2") #watch case insensitivity - para.gsub!(/^<b>(\d+\.\d+\.\d+)(.+?)<\/b>/i, "6{ \\1 \\2") #numeric, decide what to do, can be different - para.gsub!(/^<b>(\d+\.\d+)(.+?)<\/b>/i, "5{ \\1 \\2") #numeric, decide what to do, can be different - para.gsub!(/^<b>(\d+)(.+?)<\/b>/i, "4{ \\1 \\2") #numeric, decide what to do, can be different - para.gsub!(/<u>(.+?)<\/u>/, "_{\\1}_") - para.gsub!(/<b>(.+?)<\/b>/, "*{\\1}*") - para.gsub!(/<i>(.+?)<\/i>/, "/{\\1}/") + para.gsub!(/\s+/,' ') + para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i,'4~ \1 \2') #watch case insensitivity + para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i,'3~ \1 \2') #watch case insensitivity + para.gsub!(/^<b>(\d+\.\d+\.\d+\.?)(.+?)<\/b>/i,'6~ \1 \2') #numeric, decide what to do, can be different + para.gsub!(/^<b>(\d+\.\d+\.?)(.+?)<\/b>/i,'5~ \1 \2') #numeric, decide what to do, can be different + para.gsub!(/^<b>(\d.+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different + #para.gsub!(/^<b>([\d.]+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different + para.gsub!(/<u>(.+?)<\/u>/,'_{\1}_') + para.gsub!(/<b>(.+?)<\/b>/,'*{\1}*') + para.gsub!(/<i>(.+?)<\/i>/,'/{\1}/') tuned_file << para unless para == nil end tuned_file @@ -148,10 +149,11 @@ WOK end def songsheet data=@data - print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.er9 >\n" #: <<#{@@html_title}>> + print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>> #data=Html.new(data.collect, @filename, @instruct).space_paragraphs #data=Html.new(data.split(''), @filename, @instruct).space_paragraphs - data=Html.new(data.split("\n"), @filename, @instruct).space_paragraphs + data=Html.new(data.join.split(/\n\n+/), @filename, @instruct).space_paragraphs + #data=Html.new(data.split("\n"), @filename, @instruct).space_paragraphs #data=Html.new(data.collect.join.split("\n"), @filename, @instruct).space_paragraphs data=Html.new(data.collect, @filename, @instruct).multiline data=Html.new(data.collect.join.split("\n\n"), @filename, @instruct).markup_rules @@ -164,18 +166,18 @@ WOK tuned_file=Array.new data.each do |para| para.strip! - para.gsub!(/\r/, '') + para.gsub!(/\r/,'') #para.gsub!(/\n/, ' ') #PROBLEM, serious time issues on a few files also for \n (or multiline matches which is less surprising), edit out if necessary - para.gsub!(/<\/?p>/i, 'zZz') - para.gsub!(/<\/?\s*p(?:\s+ALIGN=.+?)?>/i, "zZz") #all manner of <p> para.gsub!(/<\/?p>/i, "\n\n") - para.gsub!(/<p\s+(class|align).+?>/i, "zZz") # - para.gsub!(/<\/p>/i, "zZz") # repeat actually - para.gsub!(/<(?:dir|tr|br)>/i, "zZz") # + para.gsub!(/<\/?p>/i,'zZz') + para.gsub!(/<\/?\s*p(?:\s+ALIGN=.+?)?>/i,'zZz') #all manner of <p> para.gsub!(/<\/?p>/i, "\n\n") + para.gsub!(/<p\s+(class|align).+?>/i,'zZz') # + para.gsub!(/<\/p>/i,'zZz') # repeat actually + para.gsub!(/<(?:dir|tr|br)>/i,'zZz') # #para.gsub!(/<(?:\/\s*)?(?:dir|tr|br)>/i, "zZz") # - para.gsub!(/(<\/center>)/i, "\\1zZz") - para.gsub!(/(<\/h[1-6]>)/i, "\\1zZz") - para.gsub!(/ \s+/i, ' ') - para.gsub!(/(?:\s*zZz\s*)+/i, "zZz") # + para.gsub!(/(<\/center>)/i,'\1zZz') + para.gsub!(/(<\/h[1-6]>)/i,'\1zZz') + para.gsub!(/ \s+/i,' ') + para.gsub!(/(?:\s*zZz\s*)+/i,'zZz') # tuned_file << para unless para == nil end tuned_file @@ -185,10 +187,10 @@ WOK sub.each do |x| if x=~/(<\/blockquote>)/i m = $1 - res << x[/(.+?)#{m}/mi, 1].gsub!(/zZz/,"zZz_1 ") if x =~/.+?#{m}/mi - res << x[/#{m}(.+)/mi, 1] + res << x[/(.+?)#{m}/mi,1].gsub!(/zZz/,'zZz_1 ') if x =~/.+?#{m}/mi + res << x[/#{m}(.+)/mi,1] else - res << x #[/(.+)/mi, 1] + res << x #[/(.+)/mi,1] end end res.join @@ -197,26 +199,27 @@ WOK data=@data tuned_file=Array.new data.each do |para| - para.gsub!(/\n/, ' ') - para.gsub!(/ \s+/mi, ' ') + para.gsub!(/\n/,' ') + para.gsub!(/ \s+/mi,' ') #ALL HERE could be very time EXPENSIVE but tamed? compromise ... /mi - para.gsub!(/<([biu]|h[1-6])>(?:zZz)?([^<]+)?zZz(.+?)<\/\1>/i, "zZz<\\1>\\2 \\3</\\1>") - para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)(?:<\/center>)?zZz(.+?)?<\/\1>/i, "zZz<\\1>\\2 \\3</\\1>") - #para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/center>zZz(.+?)?<\/\1>/i, "zZz<\\1>\\2 \\3</\\1>") - para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/\1>/i, "zZz<\\1>\\2</\\1>") - para.gsub!(/<(h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i, "zZz<\\1>\\2</\\1>zZz") #does catch some h1, h2 etc, too expensive to have biu - #para.gsub!(/<([biu]|h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i, "zZz<\\1>\\2 \\3</\\1>") #may go too far? useful for h1 h2 etc, remove biu? - #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i, "zZz<\\1>\\2 \\3</\\1>") - #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i, "zZz<\\1>\\2 \\3</\\1>") + para.gsub!(/<([biu]|h[1-6])>(?:zZz)?([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>') + para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)(?:<\/center>)?zZz(.+?)?<\/\1>/i,'zZz<\1>\2 \3</\1>') + #para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/center>zZz(.+?)?<\/\1>/i,'zZz<\1>\2 \3</\1>') + para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/\1>/i,'zZz<\1>\2</\1>') + para.gsub!(/<(h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i,'zZz<\1>\2</\1>zZz') #does catch some h1, h2 etc, too expensive to have biu + #para.gsub!(/<([biu]|h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i,'zZz<\1>\2 \3</\1>') #may go too far? useful for h1 h2 etc, remove biu? + #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>') + #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>') ### SERIOUS PROBLEM INTRODUCED # sub = para.split(/<blockquote>/i) # para = blockquotes(sub) if sub.length > 0 #check was on >1 could have serious repercussions 2004w29 - para.gsub!(/zZz(\s*zZz)*/, "\n\n") + para.gsub!(/zZz(\s*zZz)*/,"\n\n") tuned_file << para << "\n\n" unless para == nil end tuned_file end def markup_rules + @@flag_blockquote=false data=@data tuned_file=Array.new data.each do |para| @@ -224,65 +227,186 @@ WOK #p para.grep(/<a href="(http:\/\/.+?)">/i) #m=$1 #para.gsub!(/(?:<\s*)?<a href="#{m}">#{m}<\/a>(?:\s*>)?\.?/i, "#{m}") - para.gsub!(/(?:<\s*)?<a href="(http:\/\/.+?)">http:\/\/.+?<\/a>(?:\s*>)?\.?/i, "\\1") #risk that url & url are not to match + para.gsub!(/(?:<\s*)?<a href="(http:\/\/.+?)">http:\/\/.+?<\/a>(?:\s*>)?\.?/i,'\1') #risk that url & url are not to match #para.gsub!(/(?:<\s*)?<a href="(\w+\.html)">(http:\/\/.+?\/\1)<\/a>(?:\s*>)?\.?/i, "\\2") #does not match end + if para=~/<BLOCKQUOTE>/i + @@flag_blockquote=true + end + if @@flag_blockquote + para.gsub!(/^/,'_1 ') unless para.empty? or para =~/^\s*<\/?blockquote?>\s*$/i + end + if para=~/<\/BLOCKQUOTE>/i + @@flag_blockquote=false + end + para.gsub!(/<\/?blockquote?>/i,'') ### clean - para.gsub!(/^\s+/i, '') - para.gsub!(/<([bui]|em|su[pb])>\s*<\/\1>/i, '') - para.gsub!(/<\/?center>/i, '') - para.gsub!(/\s*<\/dir>/i, '') - para.gsub!(/<hr>/i, '') - para.gsub!(/\s*<a href=".+?\.html#(?:[a-z_]+)?(?:[a-z0-9_-]|\*)+">\[(\*+)\]<\/a>/i, "^{[\\1]}^ ") #other endnote marker - para.gsub!(/<a href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"(?:\s+name=".+?")?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i, '~e ') #endnote marker - para.gsub!(/<a name=".+?"\s+href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i, '~e ') #endnote marker - para.gsub!(/<a name="(?:[a-z$]+)?[0-9_-]+">\s*(<\/a>)?\s*\d+\.?\s*(<\/a>)?\s*/i, '~{{ ') #endnote - #para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i, "\\1{ \\2") # - para.gsub!(/<h([1-6])(?: align=.+?)?>\s*(.+?)\s*<\/h\1>\s*/i, "\\1{ \\2") # - para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i, "4{ \\1 \\2") #watch case insensitivity - para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i, "3{ \\1 \\2") #watch case insensitivity - para.gsub!(/^<b>(\d+\.\d+\.\d+)(.+?)<\/b>/i, "6{ \\1 \\2") #numeric, decide what to do, can be different - para.gsub!(/^<b>(\d+\.\d+)(.+?)<\/b>/i, "5{ \\1 \\2") #numeric, decide what to do, can be different - para.gsub!(/^<b>(\d+)(.+?)<\/b>/i, "4{ \\1 \\2") #numeric, decide what to do, can be different + para.gsub!(/^\s+/i,'') + para.gsub!(/<([bui]|em|su[pb])>\s*<\/\1>/i,'') + para.gsub!(/<\/?center>/i,'') + para.gsub!(/\s*<\/dir>/i,'') + para.gsub!(/<hr>/i,'') + para.gsub!(/\s*<a href=".+?\.html#(?:[a-z_]+)?(?:[a-z0-9_-]|\*)+">\[(\*+)\]<\/a>/i,'^{[\1]}^ ') #other endnote marker + para.gsub!(/<a href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"(?:\s+name=".+?")?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i,'~^ ') #endnote marker + para.gsub!(/<a name=".+?"\s+href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i,'~^ ') #endnote marker + para.gsub!(/<a name="(?:[a-z$]+)?[0-9_-]+">\s*(<\/a>)?\s*\d+\.?\s*(<\/a>)?\s*/i,'^~ ') #endnote + #para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') # + para.gsub!(/<h([1-6])(?: align=.+?)?>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') # + para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i,'4~ \1 \2') #watch case insensitivity + para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i,'3~ \1 \2') #watch case insensitivity + para.gsub!(/^<b>(\d+\.\d+\.\d+\.?)(.+?)<\/b>/i,'6~ \1 \2') #numeric, decide what to do, can be different + para.gsub!(/^<b>(\d+\.\d+\.?)(.+?)<\/b>/i,'5~ \1 \2') #numeric, decide what to do, can be different + para.gsub!(/^<b>(\d+\.?)(.+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different #<a name="ii"></a><B> - para.gsub!(/^(<a name=".+?">)(?:<small>)?<(?:b|strong)>\s*(.+?)\s*<\/(?:b|strong)>/i, "5{ \\2 \\1") #watch - para.gsub!(/^(<(a name|A NAME)=".+?">)(\s*|<\/[aA]>)?([A-Z][A-Z])+/, "5{ \\2 \\1") #watch - para.gsub!(/^(\s+|<p>)?(<a name=".+?">)(\s*|<\/a>)?<b>/i, "5{ \\2 \\1") #watch - para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i, "\\1{ \\2") # - para.gsub!(/^<b>\s*(.+?)<\/b>\s*(<\/i>\s*)?$/i, "4{ \\1\\2") # wish it all were less messy - para.gsub!(/^<i>\s*([^"(].+?)<\/i>\s*(<\/b>\s*)?$/i, "5{ \\1\\2") # wish it all were less messy - para.gsub!(/<\/?[biu]>/i, '') if para =~/[1-6]\{/ - para.gsub!(/<u>\s*(.+?)\s*<\/u>/i, "_{\\1}_") - para.gsub!(/<(b|strong)>\s*(.+?)\s*<\/\1>/i, "*{\\2}*") - para.gsub!(/<(i|em)>\s*(.+?)\s*<\/\1>/i, "/{\\2}/") - para.gsub!(/<sup>\s*(.+?)\s*<\/sup>/i, "^{\\1}^") - para.gsub!(/(([\/\*!_])\{.+?\}\2)\s\s+/i, "\\1 ") - para.gsub!(/(([\/\*!_])\{.+?\}\2)\s+([.,;?\)])\s+/i, "\\1\\3 ") - para.gsub!(/(([\/\*!_])\{.+?\}\2)(["'])\s+/i, "\\1\\3 ") - para.gsub!(/(([\/\*!_])\{.+?\}\2)\s*([a-z0-9])/i, "\\1 \\3") - para.gsub!(/(([\/\*_])\{.+?\}\2)\s*([a-z0-9])/i, "\\1 \\3") - para.gsub!(/([a-z0-9])(([\/\*_])\{.+?\}\3)/i, " \\1 \\2") #eg this/{problem}/ - para.gsub!(/([\/\*_])\{([,.;; ]+)\}\1/i, "\\2") #eg /{,}/ or *{ }* etc. - para.gsub!(/ \s+/i, ' ') + para.gsub!(/^(<a name=".+?">)(?:<small>)?<(?:b|strong)>\s*(.+?)\s*<\/(?:b|strong)>/i,'5~ \2 \1') #watch + para.gsub!(/^(<(a name|A NAME)=".+?">)(\s*|<\/[aA]>)?([A-Z][A-Z])+/,'5~ \2 \1') #watch + para.gsub!(/^(\s+|<p>)?(<a name=".+?">)(\s*|<\/a>)?<b>/i,'5~ \2 \1') #watch + para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') # + para.gsub!(/^<b>\s*(.+?)<\/b>\s*(<\/i>\s*)?$/i,'4~ \1\2') # wish it all were less messy + para.gsub!(/^<i>\s*([^"(].+?)<\/i>\s*(<\/b>\s*)?$/i,'5~ \1\2') # wish it all were less messy + para.gsub!(/<\/?[biu]>/i,'') if para =~/[1-6]\{/ + para.gsub!(/<u>\s*(.+?)\s*<\/u>/i,'_{\1}_') + para.gsub!(/<(b|strong)>\s*(.+?)\s*<\/\1>/i,'*{\2}*') + para.gsub!(/<(i|em)>\s*(.+?)\s*<\/\1>/i,'/{\2}/') + para.gsub!(/<sup>\s*(.+?)\s*<\/sup>/i,'^{\1}^') + para.gsub!(/(([\/\*!_])\{.+?\}\2)\s\s+/i,'\1 ') + para.gsub!(/(([\/\*!_])\{.+?\}\2)\s+([.,;?\)])\s+/i,'\1\3 ') + para.gsub!(/(([\/\*!_])\{.+?\}\2)(["'])\s+/i,'\1\3 ') + para.gsub!(/(([\/\*!_])\{.+?\}\2)\s*([a-z0-9])/i,'\1 \3') + para.gsub!(/(([\/\*_])\{.+?\}\2)\s*([a-z0-9])/i,'\1 \3') + para.gsub!(/([a-z0-9])(([\/\*_])\{.+?\}\3)/i,' \1 \2') #eg this/{problem}/ + para.gsub!(/([\/\*_])\{([,.;; ]+)\}\1/i,'\2') #eg /{,}/ or *{ }* etc. + para.gsub!(/ \s+/i,' ') #para.gsub!(/\/\{\*\{/i, '*{/{') #para.gsub!(/\}\*\}\//i, '}/}*') - para.gsub!(/"/i, '"') - para.gsub!(/&/i, 'and') - para.gsub!(/<!doctype html public .+/i, '') - para.gsub!(/<\/?(?:html|head|body|font|small)>/i, '') - para.gsub!(/<\/(?:title)>/i, '') - para.gsub!(/<title>/i, '#{~title? ') - para.gsub!(/<blockquote>(.+?)<\/blockquote>/mi, "\n\n_1 \\1\n\n") - para.gsub!(/<div align=.+?>|<\/div>|<font size=.+?>|<\/a><\/em><\/strong>/i, '') - para.gsub!(/~e\s+\.\s*/i, ".~e ") #check vim equiv # %s/\~e\s\+\.\s*/.\~e /c - para.gsub!(/\s+~e\s+/i, "~e ") - para.gsub!(/ \s+/i, ' ') - para.gsub!(/\s+$/i, '') - para.gsub!(/^(?:<\/[bi]>)+$/i, '') - para.gsub!(/^(?:(?:<i>)+<b>|(?:<b>)+<i>)\s*([^"(].+?)/i, "5{ \\1\\2") # wish it all were less messy - para.gsub!(/^(?:<\/?(?:[ib]|em)>\s*)+$/i, '') # cleaning up left over <i> etc. - para.gsub!(/<(?:i|em)>\s*(.+)/i, "/{\\1}/") # using up left over <i> - para.gsub!(/<b>\s*(.+)/i, "*{\\1}*") # using up left over <b> + para.gsub!(/"/i,'"') + para.gsub!(/&/i,'and') + para.gsub!(/<!doctype html public .+/i,'') + para.gsub!(/<\/?(?:html|head|body|font|small)>/i,'') + para.gsub!(/<\/(?:title)>/i,'') + para.gsub!(/<title>/i,'#{~title? ') + para.gsub!(/<blockquote>(.+?)<\/blockquote>/mi,"\n\n_1 \\1\n\n") + para.gsub!(/<div align=.+?>|<\/div>|<font size=.+?>|<\/a><\/em><\/strong>/i,'') + para.gsub!(/~^\s+\.\s*/i,'.~^ ') #check vim equiv # %s/\~e\s\+\.\s*/.\~e /c + para.gsub!(/\s+~^\s+/i,'~^ ') + para.gsub!(/ \s+/i,' ') + para.gsub!(/\s+$/i,'') + para.gsub!(/^(?:<\/[bi]>)+$/i,'') + para.gsub!(/^(?:(?:<i>)+<b>|(?:<b>)+<i>)\s*([^"(].+?)/i,'5~ \1\2') # wish it all were less messy + para.gsub!(/^(?:<\/?(?:[ib]|em)>\s*)+$/i,'') # cleaning up left over <i> etc. + para.gsub!(/<(?:i|em)>\s*(.+)/i,'/{\1}/') # using up left over <i> + para.gsub!(/<b>\s*(.+)/i,'*{\1}*') # using up left over <b> + para.gsub!(/<dd>([\d.]+)/i,'5~ \1') + para.gsub!(/<dd>(?: )+([\d.]+)/i,'6~ \1') + para.gsub!(/<dd>(\([a-z]\))/i,'7~ \1') + para.gsub!(/^([1-9]~)( .+?)<a name="(\S+?)">(.+?)(<\/a>)/i,'\1\3\2\4') + para.gsub!(/^([1-9]~)( .+?)<a name="(\S+?)">/i,'\1\3\2') + para.gsub!(/http\/\/(\S+)/i,'http:\/\/\1') + para.gsub!(/\s*<a href="\S+?">(http:\/\/\S+?)<\/a>\s*/i,' \1 ') + para.gsub!(/([a-zA-Z.,!?;:])([*\/_-]\{)/,'\1 \2') + para.gsub!(/^\s*( ){10,12}/i,'_2 ') + para.gsub!(/^\s*( ){4,5}/i,'_1 ') + para.gsub!(/	/,' ') #check + ## glyphs & tildes + para.gsub!(/¡/, '¡') #'Inverted exclamation + para.gsub!(/¢/, '¢') #'Cent sign ¢ + para.gsub!(/£/, '£') #'Pound sign £ + para.gsub!(/¤/, '¤') #'General currency sign + para.gsub!(/¥/, '¥') #'Yen sign ¥ + para.gsub!(/¦/, '¦') #'Broken vertical bar + para.gsub!(/§/, '§') #'Section sign § + para.gsub!(/¨/, '¨') #'Umlaut + para.gsub!(/©/, '©') #'Copyright © + para.gsub!(/ª/, 'ª') #'Feminine ordinal ª + para.gsub!(/«/, '«') #'Left angle quote « + para.gsub!(/¬/, '¬') #'Not sign + para.gsub!(/­/, '') #'Soft hyphen + para.gsub!(/®/, '®') #'Registered trademark ® + para.gsub!(/¯/, '¯') #'Macron accent + para.gsub!(/°/, '°') #'Degree sign ° + para.gsub!(/&plusmin;/,'±') #'Plus or minus ± + para.gsub!(/²/, '²') #'Superscript 2 ² + para.gsub!(/³/, '³') #'Superscript 3 ³ + para.gsub!(/´/, '') #'Acute accent + para.gsub!(/µ/, 'µ') #'Micro sign (Greek mu) µ + para.gsub!(/¶/, '¶') #'Paragraph sign ¶ + para.gsub!(/·/, '·') #'Middle dot + para.gsub!(/¸/, '¸') #'Cedilla + para.gsub!(/¹/, '¹') #'Superscript 1 ¹ + para.gsub!(/º/, 'º') #'Masculine ordinal º + para.gsub!(/»/, '»') #'Right angle quote + para.gsub!(/¼/, '¼') #'Fraction one quarter ¼ + para.gsub!(/½/, '½') #'Fraction on half ½ + para.gsub!(/¾/, '¾') #'Fraction three quarters ¾ + para.gsub!(/¿/, '¿') #'Inverted question mark ¿ + para.gsub!(/À/, 'À') #'Capital A, grave accent À + para.gsub!(/Á/, 'Á') #'Capital A, acute accent Á + para.gsub!(/Â/, 'Â') #'Capital A, circumflex accent  + para.gsub!(/Ã/, 'Ã') #'Capital A, tilde à + para.gsub!(/Ä/, 'Ä') #'Capital A, umlaut Ä + para.gsub!(/Å/, 'Å') #'Capital A, ring Å + para.gsub!(/Æ/, 'Æ') #'Capital AE ligature Æ + para.gsub!(/Ç/, 'Ç') #'Capital C, cedilla Ç + para.gsub!(/È/, 'È') #'Capital E, grave accent È + para.gsub!(/É/, 'É') #'Capital E, acute accent É + para.gsub!(/Ê/, 'Ê') #'Capital E, circumflex accent Ê + para.gsub!(/Ë/, 'Ë') #'Capital E, umlaut Ë + para.gsub!(/Ì/, 'Ì') #'Capital I, grave accent Ì + para.gsub!(/Í/, 'Í') #'Capital I, acute accent Í + para.gsub!(/Î/, 'Î') #'Capital I, circumflex accent Î + para.gsub!(/Ï/, 'Ï') #'Capital I, umlaut Ï + para.gsub!(/Ð/, 'Ð') #'Capital eth, Icelandic + para.gsub!(/Ñ/, 'Ñ') #'Capital N, tilde Ñ + para.gsub!(/Ò/, 'Ò') #'Capital O, grave accent Ò + para.gsub!(/Ó/, 'Ó') #'Capital O, acute accent Ó + para.gsub!(/Ô/, 'Ô') #'Capital O, circumflex accent Ô + para.gsub!(/Õ/, 'Õ') #'Capital O, tilde Õ + para.gsub!(/Ö/, 'Ö') #'Capital O, umlaut Ö + para.gsub!(/×/, '×') #'Multiply sign × + para.gsub!(/Ø/, 'Ø') #'Capital O, slash Ø + para.gsub!(/Ù/, 'Ù') #'Capital U, grave accent Ù + para.gsub!(/Ú/, 'Ú') #'Capital U, acute accent Ú + para.gsub!(/Û/, 'Û') #'Capital U, circumflex accent Û + para.gsub!(/Ü/, 'Ü') #'Capital U, umlaut Ü + para.gsub!(/Ý/, 'Ý') #'Capital Y, acute accent Ý + para.gsub!(/Þ/, 'Þ') #'Capital thorn, Icelandic Þ + para.gsub!(/ß/, 'ß') #'Small sz ligature, German ß + para.gsub!(/à/, 'à') #'Small a, grave accent à + para.gsub!(/á/, 'á') #'Small a, acute accent á + para.gsub!(/â/, 'â') #'Small a, circumflex accent â + para.gsub!(/ã/, 'ã') #'Small a, tilde ã + para.gsub!(/ä/, 'ä') #'Small a, umlaut ä + para.gsub!(/å/, 'å') #'Small a, ring å + para.gsub!(/æ/, 'æ') #'Small ae ligature æ + para.gsub!(/ç/, 'ç') #'Small c, cedilla ç + para.gsub!(/è/, 'è') #'Small e, grave accent è + para.gsub!(/é/, 'é') #'Small e, acute accent é + para.gsub!(/ê/, 'ê') #'Small e, circumflex accent ê + para.gsub!(/ë/, 'ë') #'Small e, umlaut ë + para.gsub!(/ì/, 'ì') #'Small i, grave accent ì + para.gsub!(/í/, 'í') #'Small i, acute accent í + para.gsub!(/î/, 'î') #'Small i, circumflex accent î + para.gsub!(/ï/, 'ï') #'Small i, umlaut ï + para.gsub!(/ð/, 'ð') #'Small eth, Icelandic ð + para.gsub!(/ñ/, 'ñ') #'Small n, tilde ñ + para.gsub!(/ò/, 'ò') #'Small o, grave accent ò + para.gsub!(/ó/, 'ó') #'Small o, acute accent ó + para.gsub!(/ô/, 'ô') #'Small o, circumflex accent ô + para.gsub!(/õ/, 'õ') #'Small o, tilde õ + para.gsub!(/ö/, 'ö') #'Small o, umlaut ö + para.gsub!(/÷/, '÷') #'Divide sign ÷ + para.gsub!(/ø/, 'ø') #'Small o, slash ø + para.gsub!(/ù/, 'ù') #'Small u, grave accent ù + para.gsub!(/ú/, 'ú') #'Small u, acute accent ú + para.gsub!(/û/, 'û') #'Small u, circumflex accent û + para.gsub!(/ü/, 'ü') #'Small u, umlaut ü + para.gsub!(/ý/, 'ý') #'Small y, acute accent ý + para.gsub!(/þ/, 'þ') #'Small thorn, Icelandic þ + para.gsub!(/ÿ/, 'ÿ') #'Smally y, umlaut ÿ + ## + para.gsub!(/\s\s+/,' ') + para.gsub!(/\t+/,' ') + #para.gsub!(/ +/,' ') #para.gsub!(/^(?:<(?:\/)?[bi]>)+$/i, '') tuned_file << para unless para == nil end @@ -297,7 +421,7 @@ WOK end def songsheet data=@data - print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.er9 >\n" #: <<#{@@html_title}>> + print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>> data=Default.new(data.collect, @filename, @instruct).space_paragraphs data=Default.new(data.collect, @filename, @instruct).multiline data=Default.new(data.collect.join.split("\n\n"), @filename, @instruct).markup_rules @@ -308,9 +432,9 @@ WOK data=@data tuned_file=Array.new data.each do |para| - para.gsub!(/<i>(Id\.?)(\s|$)/i, "/\{\\1\}\\2/") - para.gsub!(/^(~\{\{ .+?)(<\/LI>\s*|<\/OL>\s*)+$/i, "\\1") - para.gsub!(/\/\{Id\.\s*<\/LI>\s*\}\//i, '/{Id.}/') + para.gsub!(/<i>(Id\.?)(\s|$)/i,'/\{\1\}\2/') + para.gsub!(/^(~\{\{ .+?)(<\/LI>\s*|<\/OL>\s*)+$/i,'\1') + para.gsub!(/\/\{Id\.\s*<\/LI>\s*\}\//i,'/{Id.}/') tuned_file << para unless para == nil end tuned_file @@ -328,7 +452,7 @@ initial SiSU markup from other file formats WOK end -def doWord(argv, instruct) +def do_word(argv, instruct) argv.each do |f| if f =~/.+?\.doc$/ @argv << f[/(.+?)\.doc$/, 1] @@ -337,12 +461,12 @@ def doWord(argv, instruct) end end @argv.each do |filename| - system(%{wvWare -x #{@dir.home}/.sisu/convert/wvSiSU.xml #{filename}.doc > #{filename}.wv}) + system(%{wvWare -x #{@dir.path.home}/.sisu/convert/wvSiSU.xml #{filename}.doc > #{filename}.wv}) file_array=IO.readlines("#{filename}.wv", "") CONVERT::WareWord97.new(file_array, filename, instruct).songsheet # metaverse created here end end -def doHtml(argv, instruct) +def do_html(argv, instruct) argv.each do |f| if f =~/.+?\.html$/ @argv << f[/(.+?)\.html$/, 1] @@ -351,11 +475,11 @@ def doHtml(argv, instruct) end end @argv.each do |filename| - file_array=IO.readlines("#{filename}.html", "\n\r") - CONVERT::Html.new(file_array, filename, instruct).songsheet # metaverse created here + file_array=IO.readlines("#{filename}.html","\n\r") + CONVERT::Html.new(file_array,filename,instruct).songsheet # metaverse created here end end -def doDefault(argv, instruct) +def do_default(argv, instruct) argv.each do |f| if f =~/.+?\.html$/ @argv << f[/(.+?)\.html$/, 1] @@ -371,18 +495,21 @@ end def cases(argv, instruct) case instruct when/^--(word(97)?|doc)$/i #creates minimal sisu_small.gz package to send - doWord(argv, instruct) + do_word(argv, instruct) when/^--(html)$/i #creates sisu.gz package to send - doHtml(argv, instruct) + do_html(argv, instruct) when/^--(default)$/i #creates sisu.gz package to send - doDefault(argv, instruct) + do_default(argv, instruct) else help end end -require 'zxy_sysenv.rb' +$KCODE='u' +branch='v0' +SiSU_lib="sisu/#{branch}" +require "#{SiSU_lib}/sysenv" include SiSU_Env -@dir=SiSU_Env::Info_dir.new +@dir=SiSU_Env::Info_env.new @argv=Array.new argv=$* instruct = "#{argv[0].to_s}" diff --git a/data/sisu/conf/vim/addons/syntax/sisu.vim b/data/sisu/conf/vim/addons/syntax/sisu.vim index 13d22bb1..8a114b64 100644 --- a/data/sisu/conf/vim/addons/syntax/sisu.vim +++ b/data/sisu/conf/vim/addons/syntax/sisu.vim @@ -10,7 +10,7 @@ else endif "% 11 Errors? syn match sisu_error contains=sisu_link,sisu_error_wspace "<![^ei]\S\+!>" -"% 10 Markers: Endnote Identifiers, Pagebreaks etc.: +"% 10 Markers: Endnote Identifiers, Pagebreaks etc.: if !exists("sisu_no_identifiers") syn match sisu_mark_endnote "\~^" syn match sisu_contain contains=@NoSpell "</\?sub>" @@ -26,7 +26,7 @@ if !exists("sisu_no_identifiers") "metaverse specific syn match sisu_ocn contains=@NoSpell "<\~\d\+;\w\d\+;\w\d\+>" syn match sisu_marktail "<\~#>" - syn match sisu_markpara contains=@NoSpell "<:i[12]>" + syn match sisu_markpara contains=@NoSpell "<:i[1-9]>" syn match sisu_link " \*\~\S\+" syn match sisu_action "^<:insert\d\+>" syn match sisu_contain "<:e>" @@ -63,7 +63,7 @@ syn region sisu_linked contains=sisu_fontface,sisu_strikeout,sisu_number,sisu_co syn region sisu_linked contains=sisu_fontface,sisu_strikeout,sisu_number,sisu_control,sisu_identifier,sisu_error matchgroup=sisu_link start="{" end="}image" oneline "% some line operations syn region sisu_control contains=sisu_strikeout,sisu_identifier,sisu_content_endnote,sisu_mark_endnote,sisu_error,sisu_error_wspace matchgroup=sisu_control start="\(\(^\| \)!_ \|<:b>\)" end="$" -syn region sisu_normal contains=sisu_strikeout,sisu_identifier,sisu_content_endnote,sisu_mark_endnote,sisu_link,sisu_linked,sisu_error,sisu_error_wspace matchgroup=sisu_markpara start="^_\([12*]\|[12]\*\) " end="$" +syn region sisu_normal contains=sisu_strikeout,sisu_identifier,sisu_content_endnote,sisu_mark_endnote,sisu_link,sisu_linked,sisu_error,sisu_error_wspace matchgroup=sisu_markpara start="^_\([1-9*]\|[1-9]\*\) " end="$" syn region sisu_normal contains=sisu_strikeout,sisu_identifier,sisu_content_endnote,sisu_mark_endnote,sisu_link,sisu_linked,sisu_error,sisu_error_wspace matchgroup=sisu_markpara start="^\(#[ 1]\|_# \)" end="$" syn region sisu_comment matchgroup=sisu_comment start="^%\{1,2\} " end="$" "% font face curly brackets @@ -74,7 +74,7 @@ syn region sisu_identifier contains=sisu_strikeout,sisu_number,sisu_control,sisu syn region sisu_underline contains=sisu_strikeout,sisu_number,sisu_control,sisu_identifier,sisu_error matchgroup=sisu_fontface start="+{" end="}+" syn region sisu_identifier contains=sisu_strikeout,sisu_number,sisu_control,sisu_identifier,sisu_error matchgroup=sisu_fontface start="\^{" end="}\^" syn region sisu_identifier contains=sisu_strikeout,sisu_number,sisu_control,sisu_identifier,sisu_error matchgroup=sisu_fontface start=",{" end="}," -syn region sisu_strikeout contains=sisu_error matchgroup=sisu_fontface start="-{" end="}-" +syn region sisu_strikeout contains=sisu_error matchgroup=sisu_fontface start="-{" end="}-" syn region sisu_html contains=sisu_error contains=sisu_strikeout matchgroup=sisu_contain start="<a href=\".\{-}\">" end="</a>" oneline "% single words bold italicise etc. "workon syn region sisu_control contains=sisu_error matchgroup=sisu_control start="\([ (]\|^\)\*[^\|{\n\~\\]"hs=e-1 end="\*"he=e-0 skip="[a-zA-Z0-9']" oneline |