diff options
author | Ralph Amissah <ralph@amissah.com> | 2011-02-01 09:48:30 -0500 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2011-02-01 16:55:05 -0500 |
commit | 4b51bc00cda70d3c118401a74f1704df38c947a3 (patch) | |
tree | 8284fec609798d100b4663e42a842cb37cab985f /data/sisu/v3/conf/convert | |
parent | prepare for v3 branch (diff) |
v3 introduced as development branch, invoked using "sisu --v3 [instructions]
Diffstat (limited to 'data/sisu/v3/conf/convert')
-rw-r--r-- | data/sisu/v3/conf/convert/kdissert_to_sisu | 161 | ||||
-rw-r--r-- | data/sisu/v3/conf/convert/modify.rb | 250 | ||||
-rw-r--r-- | data/sisu/v3/conf/convert/sisu_convert | 519 | ||||
-rw-r--r-- | data/sisu/v3/conf/convert/wvHtml.xml | 388 | ||||
-rw-r--r-- | data/sisu/v3/conf/convert/wvSiSU.xml | 360 |
5 files changed, 1678 insertions, 0 deletions
diff --git a/data/sisu/v3/conf/convert/kdissert_to_sisu b/data/sisu/v3/conf/convert/kdissert_to_sisu new file mode 100644 index 00000000..35921c3e --- /dev/null +++ b/data/sisu/v3/conf/convert/kdissert_to_sisu @@ -0,0 +1,161 @@ +#!/usr/bin/env ruby +=begin + * Name: modify.rb + * Author: Ralph Amissah + * http://www.jus.uio.no/sisu + * http://www.jus.uio.no/sisu/SiSU/download + * Description: Conversion script from kdissert .kdi to sisu markup .sst + * Copyright (C) 2004, 2006 Ralph Amissah + * Packaged with: SiSU information Structuring Universe - + Structured information, Serialized Units + * SiSU Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Ralph Amissah + + * License: GPL 3 or later + + Summary of GPL 3 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + http://www.fsf.org/licenses/gpl.html + http://www.gnu.org/copyleft/gpl.html + http://www.jus.uio.no/sisu/gpl3.fsf + + © Ralph Amissah 1997, current 2006. + All Rights Reserved. + + * Ralph Amissah: ralph@amissah.com + ralph.amissah@gmail.com +=end +require 'rexml/document' +include REXML +argv=$* +@sisu,@sisu_base=Array.new,Array.new +unless argv.empty? + argv.each do |kdi| + if kdi =~/\.kdi$/ + system("cp #{kdi} #{kdi}.tar.gz && tar xzvf #{kdi}.tar.gz && rm #{kdi}.tar.gz") + file=File.new("maindoc.xml") + #system("cp #{kdi} /tmp/. && cd /tmp && tar xzvf /tmp/#{kdi} && cd -") + #file=File.new("/tmp/maindoc.xml") + @output=File.new("#{kdi}.sst",'w') + doc=Document.new(file) + root=doc.root + @el=Array.new + root.each do |x| + end + root.each_with_index do |content,idx| + if root.elements["item[#{idx}]/summary"] + id,ma,ch=nil,nil,Array.new + if root.elements["item[#{idx}]/id"] + id=root.elements["item[#{idx}]/id"].text.to_i + end + if root.elements["item[#{idx}]/parent"] + ma=root.elements["item[#{idx}]/parent"].text.to_s + end + if root.elements["item[#{idx}]/child"] #problem only get one child, even where several + root.get_elements("item[#{idx}]/child").each do |x| + ch << x.text + end + end + if root.elements["item[#{idx}]/summary"] + sum=root.elements["item[#{idx}]/summary"].text.to_s.strip + end + if root.elements["item[#{idx}]/text"] + txt=root.elements["item[#{idx}]/text"].text.to_s.strip + end + @el[id]=Hash.new + @el[id][:id]=id + @el[id][:ma]=ma + @el[id][:ch]=ch + @el[id][:sum]=sum + @el[id][:txt]=txt + if ma == '-1' + @el[id][:lev]=':A' + @top=id + end + end + end + @doc=Array.new + @title=@el[@top][:sum] + p @el[@top][:sum] + @doc << ':A~ ' + @el[@top][:sum] + @el[@top][:ch].each do |x| + @el[x.to_i][:lev]='1' + @doc << '1~ ' + @el[x.to_i][:sum] + @doc << @el[x.to_i][:txt] + @el[x.to_i][:ch].each do |y| + @el[y.to_i][:lev]='2' + @doc << '2~ ' + @el[y.to_i][:sum] + @doc << @el[y.to_i][:txt] + @el[y.to_i][:ch].each do |z| + @el[z.to_i][:lev]='3' + @doc << '3~ ' + @el[z.to_i][:sum] + @doc << @el[z.to_i][:txt] + @el[z.to_i][:ch].each do |za| #unsupported... consder + @el[za.to_i][:lev]='4' + @doc << '!_ ' + @el[za.to_i][:sum] + @doc << @el[za.to_i][:txt] + end + end + end + end + #@el.each_with_index do |x,i| + # if x: puts "#{i}: lev: #{x[:lev]}, id: #{x[:id]}, ma: #{x[:ma]}, ch: #{x[:ch].join(',')}" + # end + #end +# regexs strip most kdissert markup, and provide minimal info for sisu markup --> + @doc.each do |c,idx| + c.gsub!(/<\/summary>/,'') + c.gsub!(/<html>.+?\n|<\/body>|<\/html>|<p>/m,'') + c.gsub!(/<\/p>/,"\n") + c.gsub!(/<span style="font-weight:600">(.+?)<\/span>/," *{ \\1 }* ") + c.gsub!(/<span style="text-decoration:underline">(.+?)<\/span>/," _{ \\1 }_ ") + c.gsub!(/<span style="font-style:italic">(.+?)<\/span>/," /{ \\1 }/ ") + c.gsub!(/<ul type="disc">/,'_* ') + c.gsub!(/<\S+?>/,'') + c.gsub!(/<(:p[bn])>/,"<\\1>") + c.gsub!(/<<(|.?|@|.?|)/,"<<\\1") + c.gsub!(/"/,'"') + c.gsub!(/ \s+/,' ') + @sisu_base << c.strip + "\n\n" + end + else puts ".kdi extension expected, filename not recognised: << #{kdi} >>" + end + end +else puts '.kdi file for conversion to sisu expected' +end +sisu_head=<<WOK +% converted from Kdissert to SiSU + +@title: #@title + +@prefix: Kdissert mind/topic mapping auto conversion to SiSU markup + +@links: {Kdissert}http://freehackers.org/~tnagy/kdissert/ + +@base_program: kdissert + +% @date: + +@rcs: $Id$ + +@level: num_top=1 + +WOK +#puts @sisu_base +@output << sisu_head << @sisu_base +__END__ diff --git a/data/sisu/v3/conf/convert/modify.rb b/data/sisu/v3/conf/convert/modify.rb new file mode 100644 index 00000000..5f4f9ae0 --- /dev/null +++ b/data/sisu/v3/conf/convert/modify.rb @@ -0,0 +1,250 @@ +#!/usr/bin/env ruby +=begin + * Name: modify.rb + * Author: Ralph Amissah + * http://www.jus.uio.no/sisu + * http://www.jus.uio.no/sisu/SiSU/download + * Description: A conversion script for canned substitutions, + a fairly generic simple tool that can be used to store other canned conversions, + used here for altering SiSU markup + * Copyright (C) 2004, 2006 Ralph Amissah + * Packaged with: SiSU information Structuring Universe - + Structured information, Serialized Units + * SiSU Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Ralph Amissah + + * License: GPL 3 or later + + Summary of GPL 3 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + http://www.fsf.org/licenses/gpl.html + http://www.gnu.org/copyleft/gpl.html + http://www.jus.uio.no/sisu/gpl3.fsf + + © Ralph Amissah 1997, current 2006. + All Rights Reserved. + + * Ralph Amissah: ralph@amissah.com + ralph.amissah@gmail.com + + * Notes: configure rc in ~/.sisu/sisurc.yaml +=end +module SiSU_Modify + class Convert_markup + def initialize(cf,files) + @cf,@files=cf,files + @description="This is a script that contains canned text conversions for reuse" + end + def current_match_and_replace #Disable, edit manually + #convert_pre_37_to_38_experimental + end + def message(text) + response='' + while response !~/yes/ + print %{ + #{text} + to continue type "yes" [to exit type "no" or "quit"]: } + response=File.new('/dev/tty').gets.strip + exit if response =~/^(?:quit|no)$/ + end + end + def help + print <<WOK + +#@description + +modify.rb --default [filename/wildcard] + performs the current default conversion that is set + [handle with care, (may be disabled)] + +modify.rb --to38 [filename/wildcard] + converts pre 0.37 sisu markup to 0.38 experimental + +modify.rb --to37 [filename/wildcard] + converts pre 0.37 sisu markup to 0.38 experimental + +note converting twice in a single direction will result +in markup inconsistency + +An alternative script has been introduced, +try 'sisu --convert' + +modify.rb --default + performs the current default conversion that is set + on the files matched in default the settings + [handle with care, (may be disabled)] + +WOK + exit + end + #%% substitutions to be made + def convert_pre_37_to_38_experimental + message('convert sisu markup from 0.37 to 0.38 experimental (rad)') + [ + [/^0~(\S+?)([+-])\s+/, "@\\1:\\2 "], + [/^0~(\S+)\s+/, "@\\1: "], + [/^@toc:\s+/, "@structure: "], + [/^1~/, ':A~'], + [/^2~/, ':B~'], + [/^3~/, ':C~'], + [/^4~/, '1~'], + [/^5~/, '2~'], + [/^6~/, '3~'] + ] + end + def convert_38_experimental_to_37 + message('convert sisu markup from 0.38 experimental (rad) to 0.37') + [ + [/^@(\S+?):([+-])\s+/, "0~\\1\\2 "], + [/^@(\S+?):\s+/, "0~\\1 "], + [/^0~structure\s+/, "0~toc "], + [/^1~/, '4~'], + [/^2~/, '5~'], + [/^3~/, '6~'], + [/^:?A~/, '1~'], + [/^:?B~/, '2~'], + [/^:?C~/, '3~'] + ] + end + def markup_version?(i) + file=File.open(i,'r') + cont=file.readlines + file.close + markup_version=nil + cont.each do |y| + if y =~/^:?A~/ + markup_version='0.38' + break + end + if y =~/^1~/ + markup_version='0.37' + break + end + #if y =~/^1{~/ + # markup_version='0.16' + # break + #end + markup_version='0.37' + end + markup_version + end + def conversion + #%% do it --------------------------> + if @files and @files.length > 0 + p @files + mr=nil + #%% changes to make m match, r replace --------------------------> + if @cf =~/--help/: help + else + message("WARNING, proceed at your own risk,\npermanent changes requested for the above named files\n best that you check (manually) what this file is set to do\n conversions set are at the top of the file") + mr=case @cf + when /--convert|default/: current_match_and_replace + when /--(?:(?:37)?to38|rad)/: convert_pre_37_to_38_experimental + when /--(?:(?:38)?to37)/: convert_38_experimental_to_37 + else help + end + end + match_and_replace=mr + #start_processing =/not used in this example/i + end_processing =/END\s+OF\s+FILE/ + @files.each do |i| + @new,@matched,@flag_start,@flag_end,@empty1,@empty2=true,false,false,false,false,false + o="#{i}.bk" #o is for old + file=File.open(i,'r') + cont=file.readlines + file.close + cont.each do |y| + #p y + #p y if y =~/^[1-6]~/ + match_and_replace.each do |m,r| + if y =~m + if @new + @new=false + File.unlink(o) if File.exists?(o) + #system("lv #{i} > #{o}") + File.rename(i,o) + File.unlink(i) if File.exists?(i) + @file=File.new(i,'w') + @matched=true + break + end + end + end + end + if @matched + puts "match in #{i}" + @flag_start=true + cont.each do |y| + if y =~end_processing: @flag_end=true + end + if @flag_start and not @flag_end + match_and_replace.each do |m,r| + if y =~m + p m.to_s + ' -> ' + r + puts "in: #{y}" + y.gsub!(m,r) if m and r + puts "out: #{y}" + end + end + end + #if y =~start_processing: @flag_start=true + #end + #if y =~m1 + #end + if y=~/^\s*$/: @empty1=true + else @empty1=false + end + @file.puts y unless (@empty1==true and @empty2==true) + if y=~/^\s*$/: @empty2=true + else @empty2=false + end + #@file << y + "\n" + end + @file.close + else puts "NO match in #{f}" + end + end + else puts "this routine makes permanent changes to the contents of the files matched, as instructed within [no matches]" + end + end + end +end +#%% files to match for this conversion set -------------------------> +f=$* +p $* +cf=f[0].to_s +f.shift +match_and_replace=Array.new +#unless cf and cf =~/--/ +# puts <<WOK +# +##@description +# +#usage: +# modify.rb --convert [filename/wildcard] +#for help +# modify.rb --help +#WOK +# exit +#end +unless f.length > 0: f=Dir.glob("[a-z]*.ss?") #restricted to sisu type files, it need not be +end +#f=Dir.glob("{bin,conf,data,lib}/**/*.rb") #sisu development +#f=Dir.glob("[^_]/**/*") #all files subdirectories beneath pwd except those starting with _ +SiSU_Modify::Convert_markup.new(cf,f).conversion +__END__ diff --git a/data/sisu/v3/conf/convert/sisu_convert b/data/sisu/v3/conf/convert/sisu_convert new file mode 100644 index 00000000..a3a12189 --- /dev/null +++ b/data/sisu/v3/conf/convert/sisu_convert @@ -0,0 +1,519 @@ +#!/usr/bin/env ruby +# = sisu - SiSU information Structuring Universe +# +# Copyright (c) Ralph Amissah 1997,2004 +# +# Ralph Amissah mailto:ralph@amissah.com +# +# * Name: SiSU information Structuring Universe +# * Author: Ralph@Amissah.com +# * Description: document conversion tool, to sisu from other formats +# * License: GPL 3 or later +# * Notes: word conversion uses wvWare and wvSiSU.xml (a modified/stripped wvHtml.xml) +# * http://wvware.sourceforge.net/ +# * http://sourceforge.net/projects/wvware +# * <url:sisu.lnk>|sisu.lnk|@|^| +# * <url:sisu> +module CONVERT + class MyOutput + def initialize(data, filename, instruct) + @data=data.compact + @filename=filename + @instruct=instruct + end + def headerBasic + <<WOK +% SiSU 2.0 + +@title: + :subtitle: + +@creator: + :author: + +@classify: + :topic_register: + +@date: + :published: + +@rights: + :copyright: + :license: + +WOK + end + def headerDefault + <<WOK +% SiSU 2.0 + +@title: + :subtitle: + +@creator: + :author: + +@classify: + :topic_register: + +@date: + :published: + +@rights: + :copyright: + :license: + +WOK + end + def hardOutput + pre = Array.new + case @instruct + when /default/ + pre << headerDefault + else + pre << headerBasic + end + @filename_wv=File.new(%{,,#{@filename}.sst},'w+') + @filename_wv << pre + @data.each do |x| + y = x.split("\n") + y.each do |z| # cleaner output this way + z.strip! + @filename_wv.puts "#{z}\n\n" unless z =~/^$/ + end + end + end + end + class WareWord97 + def initialize(data, filename, instruct) + @data=data + @filename=filename + @instruct=instruct + end + def songsheet + data=@data + print "Convert to SiSU file from Word97 << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>> + data=WareWord97.new(data.collect,@filename,@instruct).strip + data=WareWord97.new(data.collect,@filename,@instruct).strip + data=WareWord97.new(data.collect,@filename,@instruct).markup_rules + data=MyOutput.new(data.collect,@filename,@instruct).hardOutput + end + def strip + data=@data + tuned_file=Array.new + endnote_no=1 + data.each do |para| + para.strip! + para.gsub!(/<u>\s*<\/u>/,'') + para.gsub!(/<\/u>\s*<u>/,'') + para.gsub!(/<b>\s*<\/b>/,'') + para.gsub!(/<\/b>\s*<b>/,'') + para.gsub!(/<i>\s*<\/i>/,'') + para.gsub!(/<\/i>\s*<i>/,'') + tuned_file << para unless para == nil + end + tuned_file + end + def markup_rules + data=@data + tuned_file=Array.new + endnote_no=1 + data.each do |para| + para.strip! + para.gsub!(/\s+/,' ') + para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i,'1~ \1 \2') #watch case insensitivity + para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i,':C~ \1 \2') #watch case insensitivity + para.gsub!(/^<b>(\d+\.\d+\.\d+\.?)(.+?)<\/b>/i,'3~ \1 \2') #numeric, decide what to do, can be different + para.gsub!(/^<b>(\d+\.\d+\.?)(.+?)<\/b>/i,'2~ \1 \2') #numeric, decide what to do, can be different + para.gsub!(/^<b>(\d.+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different + #para.gsub!(/^<b>([\d.]+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different + para.gsub!(/<u>(.+?)<\/u>/,'_{\1}_') + para.gsub!(/<b>(.+?)<\/b>/,'!{\1}!') + para.gsub!(/<i>(.+?)<\/i>/,'/{\1}/') + tuned_file << para unless para == nil + end + tuned_file + end + end + class Html + def initialize(data, filename, instruct) + @data=data + @filename=filename + @instruct=instruct + end + def songsheet + data=@data + print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>> + #data=Html.new(data.collect, @filename, @instruct).space_paragraphs + #data=Html.new(data.split(''), @filename, @instruct).space_paragraphs + data=Html.new(data.join.split(/\n\n+/), @filename, @instruct).space_paragraphs + #data=Html.new(data.split("\n"), @filename, @instruct).space_paragraphs + #data=Html.new(data.collect.join.split("\n"), @filename, @instruct).space_paragraphs + data=Html.new(data.collect, @filename, @instruct).multiline + data=Html.new(data.collect.join.split("\n\n"), @filename, @instruct).markup_rules + data=MyOutput.new(data.collect, @filename, @instruct).hardOutput + end + def space_paragraphs + #data=@data.join.split(/\n/) + data=@data + #p data.length + tuned_file=Array.new + data.each do |para| + para.strip! + para.gsub!(/\r/,'') + #para.gsub!(/\n/, ' ') #PROBLEM, serious time issues on a few files also for \n (or multiline matches which is less surprising), edit out if necessary + para.gsub!(/<\/?p>/i,'zZz') + para.gsub!(/<\/?\s*p(?:\s+ALIGN=.+?)?>/i,'zZz') #all manner of <p> para.gsub!(/<\/?p>/i, "\n\n") + para.gsub!(/<p\s+(class|align).+?>/i,'zZz') # + para.gsub!(/<\/p>/i,'zZz') # repeat actually + para.gsub!(/<(?:dir|tr|br)>/i,'zZz') # + #para.gsub!(/<(?:\/\s*)?(?:dir|tr|br)>/i, "zZz") # + para.gsub!(/(<\/center>)/i,'\1zZz') + para.gsub!(/(<\/h[1-6]>)/i,'\1zZz') + para.gsub!(/ \s+/i,' ') + para.gsub!(/(?:\s*zZz\s*)+/i,'zZz') # + tuned_file << para unless para == nil + end + tuned_file + end + def blockquotes(sub='') # SERIOUS PROBLEM INTRODUCED, some blockquotes go missing !, quite unacceptable, debug, for now not used + res=Array.new + sub.each do |x| + if x=~/(<\/blockquote>)/i + m = $1 + res << x[/(.+?)#{m}/mi,1].gsub!(/zZz/,'zZz_1 ') if x =~/.+?#{m}/mi + res << x[/#{m}(.+)/mi,1] + else + res << x #[/(.+)/mi,1] + end + end + res.join + end + def multiline + data=@data + tuned_file=Array.new + data.each do |para| + para.gsub!(/\n/,' ') + para.gsub!(/ \s+/mi,' ') + #ALL HERE could be very time EXPENSIVE but tamed? compromise ... /mi + para.gsub!(/<([biu]|h[1-6])>(?:zZz)?([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>') + para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)(?:<\/center>)?zZz(.+?)?<\/\1>/i,'zZz<\1>\2 \3</\1>') + #para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/center>zZz(.+?)?<\/\1>/i,'zZz<\1>\2 \3</\1>') + para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/\1>/i,'zZz<\1>\2</\1>') + para.gsub!(/<(h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i,'zZz<\1>\2</\1>zZz') #does catch some h1, h2 etc, too expensive to have biu + #para.gsub!(/<([biu]|h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i,'zZz<\1>\2 \3</\1>') #may go too far? useful for h1 h2 etc, remove biu? + #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>') + #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>') + ### SERIOUS PROBLEM INTRODUCED + # sub = para.split(/<blockquote>/i) + # para = blockquotes(sub) if sub.length > 0 #check was on >1 could have serious repercussions 2004w29 + para.gsub!(/zZz(\s*zZz)*/,"\n\n") + tuned_file << para << "\n\n" unless para == nil + end + tuned_file + end + def markup_rules + @@flag_blockquote=false + data=@data + tuned_file=Array.new + data.each do |para| + if para=~/<a href="(http:\/\/.+?)">/i + #p para.grep(/<a href="(http:\/\/.+?)">/i) + #m=$1 + #para.gsub!(/(?:<\s*)?<a href="#{m}">#{m}<\/a>(?:\s*>)?\.?/i, "#{m}") + para.gsub!(/(?:<\s*)?<a href="(http:\/\/.+?)">http:\/\/.+?<\/a>(?:\s*>)?\.?/i,'\1') #risk that url & url are not to match + #para.gsub!(/(?:<\s*)?<a href="(\w+\.html)">(http:\/\/.+?\/\1)<\/a>(?:\s*>)?\.?/i, "\\2") #does not match + end + if para=~/<BLOCKQUOTE>/i + @@flag_blockquote=true + end + if @@flag_blockquote + para.gsub!(/^/,'_1 ') unless para.empty? or para =~/^\s*<\/?blockquote?>\s*$/i + end + if para=~/<\/BLOCKQUOTE>/i + @@flag_blockquote=false + end + para.gsub!(/<\/?blockquote?>/i,'') + ### clean + para.gsub!(/^\s+/i,'') + para.gsub!(/<([bui]|em|su[pb])>\s*<\/\1>/i,'') + para.gsub!(/<\/?center>/i,'') + para.gsub!(/\s*<\/dir>/i,'') + para.gsub!(/<hr>/i,'') + para.gsub!(/\s*<a href=".+?\.html#(?:[a-z_]+)?(?:[a-z0-9_-]|\*)+">\[(\*+)\]<\/a>/i,'^{[\1]}^ ') #other endnote marker + para.gsub!(/<a href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"(?:\s+name=".+?")?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i,'~^ ') #endnote marker + para.gsub!(/<a name=".+?"\s+href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i,'~^ ') #endnote marker + para.gsub!(/<a name="(?:[a-z$]+)?[0-9_-]+">\s*(<\/a>)?\s*\d+\.?\s*(<\/a>)?\s*/i,'^~ ') #endnote + #para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') # + para.gsub!(/<h([1-6])(?: align=.+?)?>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') # + para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i,'4~ \1 \2') #watch case insensitivity + para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i,'3~ \1 \2') #watch case insensitivity + para.gsub!(/^<b>(\d+\.\d+\.\d+\.?)(.+?)<\/b>/i,'6~ \1 \2') #numeric, decide what to do, can be different + para.gsub!(/^<b>(\d+\.\d+\.?)(.+?)<\/b>/i,'5~ \1 \2') #numeric, decide what to do, can be different + para.gsub!(/^<b>(\d+\.?)(.+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different + #<a name="ii"></a><B> + para.gsub!(/^(<a name=".+?">)(?:<small>)?<(?:b|strong)>\s*(.+?)\s*<\/(?:b|strong)>/i,'5~ \2 \1') #watch + para.gsub!(/^(<(a name|A NAME)=".+?">)(\s*|<\/[aA]>)?([A-Z][A-Z])+/,'5~ \2 \1') #watch + para.gsub!(/^(\s+|<p>)?(<a name=".+?">)(\s*|<\/a>)?<b>/i,'5~ \2 \1') #watch + para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') # + para.gsub!(/^<b>\s*(.+?)<\/b>\s*(<\/i>\s*)?$/i,'4~ \1\2') # wish it all were less messy + para.gsub!(/^<i>\s*([^"(].+?)<\/i>\s*(<\/b>\s*)?$/i,'5~ \1\2') # wish it all were less messy + para.gsub!(/<\/?[biu]>/i,'') if para =~/[1-6]\{/ + para.gsub!(/<u>\s*(.+?)\s*<\/u>/i,'_{\1}_') + para.gsub!(/<(b|strong)>\s*(.+?)\s*<\/\1>/i,'*{\2}*') + para.gsub!(/<(i|em)>\s*(.+?)\s*<\/\1>/i,'/{\2}/') + para.gsub!(/<sup>\s*(.+?)\s*<\/sup>/i,'^{\1}^') + para.gsub!(/(([\/\*!_])\{.+?\}\2)\s\s+/i,'\1 ') + para.gsub!(/(([\/\*!_])\{.+?\}\2)\s+([.,;?\)])\s+/i,'\1\3 ') + para.gsub!(/(([\/\*!_])\{.+?\}\2)(["'])\s+/i,'\1\3 ') + para.gsub!(/(([\/\*!_])\{.+?\}\2)\s*([a-z0-9])/i,'\1 \3') + para.gsub!(/(([\/\*_])\{.+?\}\2)\s*([a-z0-9])/i,'\1 \3') + para.gsub!(/([a-z0-9])(([\/\*_])\{.+?\}\3)/i,' \1 \2') #eg this/{problem}/ + para.gsub!(/([\/\*_])\{([,.;; ]+)\}\1/i,'\2') #eg /{,}/ or *{ }* etc. + para.gsub!(/ \s+/i,' ') + #para.gsub!(/\/\{\*\{/i, '*{/{') + #para.gsub!(/\}\*\}\//i, '}/}*') + para.gsub!(/"/i,'"') + para.gsub!(/&/i,'and') + para.gsub!(/<!doctype html public .+/i,'') + para.gsub!(/<\/?(?:html|head|body|font|small)>/i,'') + para.gsub!(/<\/(?:title)>/i,'') + para.gsub!(/<title>/i,'#{~title? ') + para.gsub!(/<blockquote>(.+?)<\/blockquote>/mi,"\n\n_1 \\1\n\n") + para.gsub!(/<div align=.+?>|<\/div>|<font size=.+?>|<\/a><\/em><\/strong>/i,'') + para.gsub!(/~^\s+\.\s*/i,'.~^ ') #check vim equiv # %s/\~e\s\+\.\s*/.\~e /c + para.gsub!(/\s+~^\s+/i,'~^ ') + para.gsub!(/ \s+/i,' ') + para.gsub!(/\s+$/i,'') + para.gsub!(/^(?:<\/[bi]>)+$/i,'') + para.gsub!(/^(?:(?:<i>)+<b>|(?:<b>)+<i>)\s*([^"(].+?)/i,'5~ \1\2') # wish it all were less messy + para.gsub!(/^(?:<\/?(?:[ib]|em)>\s*)+$/i,'') # cleaning up left over <i> etc. + para.gsub!(/<(?:i|em)>\s*(.+)/i,'/{\1}/') # using up left over <i> + para.gsub!(/<b>\s*(.+)/i,'*{\1}*') # using up left over <b> + para.gsub!(/<dd>([\d.]+)/i,'5~ \1') + para.gsub!(/<dd>(?: )+([\d.]+)/i,'6~ \1') + para.gsub!(/<dd>(\([a-z]\))/i,'7~ \1') + para.gsub!(/^([1-9]~)( .+?)<a name="(\S+?)">(.+?)(<\/a>)/i,'\1\3\2\4') + para.gsub!(/^([1-9]~)( .+?)<a name="(\S+?)">/i,'\1\3\2') + para.gsub!(/http\/\/(\S+)/i,'http:\/\/\1') + para.gsub!(/\s*<a href="\S+?">(http:\/\/\S+?)<\/a>\s*/i,' \1 ') + para.gsub!(/([a-zA-Z.,!?;:])([*\/_-]\{)/,'\1 \2') + para.gsub!(/^\s*( ){10,12}/i,'_2 ') + para.gsub!(/^\s*( ){4,5}/i,'_1 ') + para.gsub!(/	/,' ') #check + ## glyphs & tildes + para.gsub!(/¡/, '¡') #'Inverted exclamation + para.gsub!(/¢/, '¢') #'Cent sign ¢ + para.gsub!(/£/, '£') #'Pound sign £ + para.gsub!(/¤/, '¤') #'General currency sign + para.gsub!(/¥/, '¥') #'Yen sign ¥ + para.gsub!(/¦/, '¦') #'Broken vertical bar + para.gsub!(/§/, '§') #'Section sign § + para.gsub!(/¨/, '¨') #'Umlaut + para.gsub!(/©/, '©') #'Copyright © + para.gsub!(/ª/, 'ª') #'Feminine ordinal ª + para.gsub!(/«/, '«') #'Left angle quote « + para.gsub!(/¬/, '¬') #'Not sign + para.gsub!(/­/, '') #'Soft hyphen + para.gsub!(/®/, '®') #'Registered trademark ® + para.gsub!(/¯/, '¯') #'Macron accent + para.gsub!(/°/, '°') #'Degree sign ° + para.gsub!(/&plusmin;/,'±') #'Plus or minus ± + para.gsub!(/²/, '²') #'Superscript 2 ² + para.gsub!(/³/, '³') #'Superscript 3 ³ + para.gsub!(/´/, '') #'Acute accent + para.gsub!(/µ/, 'µ') #'Micro sign (Greek mu) µ + para.gsub!(/¶/, '¶') #'Paragraph sign ¶ + para.gsub!(/·/, '·') #'Middle dot + para.gsub!(/¸/, '¸') #'Cedilla + para.gsub!(/¹/, '¹') #'Superscript 1 ¹ + para.gsub!(/º/, 'º') #'Masculine ordinal º + para.gsub!(/»/, '»') #'Right angle quote + para.gsub!(/¼/, '¼') #'Fraction one quarter ¼ + para.gsub!(/½/, '½') #'Fraction on half ½ + para.gsub!(/¾/, '¾') #'Fraction three quarters ¾ + para.gsub!(/¿/, '¿') #'Inverted question mark ¿ + para.gsub!(/À/, 'À') #'Capital A, grave accent À + para.gsub!(/Á/, 'Á') #'Capital A, acute accent Á + para.gsub!(/Â/, 'Â') #'Capital A, circumflex accent  + para.gsub!(/Ã/, 'Ã') #'Capital A, tilde à + para.gsub!(/Ä/, 'Ä') #'Capital A, umlaut Ä + para.gsub!(/Å/, 'Å') #'Capital A, ring Å + para.gsub!(/Æ/, 'Æ') #'Capital AE ligature Æ + para.gsub!(/Ç/, 'Ç') #'Capital C, cedilla Ç + para.gsub!(/È/, 'È') #'Capital E, grave accent È + para.gsub!(/É/, 'É') #'Capital E, acute accent É + para.gsub!(/Ê/, 'Ê') #'Capital E, circumflex accent Ê + para.gsub!(/Ë/, 'Ë') #'Capital E, umlaut Ë + para.gsub!(/Ì/, 'Ì') #'Capital I, grave accent Ì + para.gsub!(/Í/, 'Í') #'Capital I, acute accent Í + para.gsub!(/Î/, 'Î') #'Capital I, circumflex accent Î + para.gsub!(/Ï/, 'Ï') #'Capital I, umlaut Ï + para.gsub!(/Ð/, 'Ð') #'Capital eth, Icelandic + para.gsub!(/Ñ/, 'Ñ') #'Capital N, tilde Ñ + para.gsub!(/Ò/, 'Ò') #'Capital O, grave accent Ò + para.gsub!(/Ó/, 'Ó') #'Capital O, acute accent Ó + para.gsub!(/Ô/, 'Ô') #'Capital O, circumflex accent Ô + para.gsub!(/Õ/, 'Õ') #'Capital O, tilde Õ + para.gsub!(/Ö/, 'Ö') #'Capital O, umlaut Ö + para.gsub!(/×/, '×') #'Multiply sign × + para.gsub!(/Ø/, 'Ø') #'Capital O, slash Ø + para.gsub!(/Ù/, 'Ù') #'Capital U, grave accent Ù + para.gsub!(/Ú/, 'Ú') #'Capital U, acute accent Ú + para.gsub!(/Û/, 'Û') #'Capital U, circumflex accent Û + para.gsub!(/Ü/, 'Ü') #'Capital U, umlaut Ü + para.gsub!(/Ý/, 'Ý') #'Capital Y, acute accent Ý + para.gsub!(/Þ/, 'Þ') #'Capital thorn, Icelandic Þ + para.gsub!(/ß/, 'ß') #'Small sz ligature, German ß + para.gsub!(/à/, 'à') #'Small a, grave accent à + para.gsub!(/á/, 'á') #'Small a, acute accent á + para.gsub!(/â/, 'â') #'Small a, circumflex accent â + para.gsub!(/ã/, 'ã') #'Small a, tilde ã + para.gsub!(/ä/, 'ä') #'Small a, umlaut ä + para.gsub!(/å/, 'å') #'Small a, ring å + para.gsub!(/æ/, 'æ') #'Small ae ligature æ + para.gsub!(/ç/, 'ç') #'Small c, cedilla ç + para.gsub!(/è/, 'è') #'Small e, grave accent è + para.gsub!(/é/, 'é') #'Small e, acute accent é + para.gsub!(/ê/, 'ê') #'Small e, circumflex accent ê + para.gsub!(/ë/, 'ë') #'Small e, umlaut ë + para.gsub!(/ì/, 'ì') #'Small i, grave accent ì + para.gsub!(/í/, 'í') #'Small i, acute accent í + para.gsub!(/î/, 'î') #'Small i, circumflex accent î + para.gsub!(/ï/, 'ï') #'Small i, umlaut ï + para.gsub!(/ð/, 'ð') #'Small eth, Icelandic ð + para.gsub!(/ñ/, 'ñ') #'Small n, tilde ñ + para.gsub!(/ò/, 'ò') #'Small o, grave accent ò + para.gsub!(/ó/, 'ó') #'Small o, acute accent ó + para.gsub!(/ô/, 'ô') #'Small o, circumflex accent ô + para.gsub!(/õ/, 'õ') #'Small o, tilde õ + para.gsub!(/ö/, 'ö') #'Small o, umlaut ö + para.gsub!(/÷/, '÷') #'Divide sign ÷ + para.gsub!(/ø/, 'ø') #'Small o, slash ø + para.gsub!(/ù/, 'ù') #'Small u, grave accent ù + para.gsub!(/ú/, 'ú') #'Small u, acute accent ú + para.gsub!(/û/, 'û') #'Small u, circumflex accent û + para.gsub!(/ü/, 'ü') #'Small u, umlaut ü + para.gsub!(/ý/, 'ý') #'Small y, acute accent ý + para.gsub!(/þ/, 'þ') #'Small thorn, Icelandic þ + para.gsub!(/ÿ/, 'ÿ') #'Smally y, umlaut ÿ + ## + para.gsub!(/\s\s+/,' ') + para.gsub!(/\t+/,' ') + #para.gsub!(/ +/,' ') + #para.gsub!(/^(?:<(?:\/)?[bi]>)+$/i, '') + tuned_file << para unless para == nil + end + tuned_file + end + end + class Default < Html + def initialize(data, filename, instruct) + @data=data + @filename=filename + @instruct=instruct + end + def songsheet + data=@data + print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>> + data=Default.new(data.collect, @filename, @instruct).space_paragraphs + data=Default.new(data.collect, @filename, @instruct).multiline + data=Default.new(data.collect.join.split("\n\n"), @filename, @instruct).markup_rules + data=Default.new(data.collect, @filename, @instruct).markup_default + data=MyOutput.new(data.collect, @filename, @instruct).hardOutput + end + def markup_default + data=@data + tuned_file=Array.new + data.each do |para| + para.gsub!(/<i>(Id\.?)(\s|$)/i,'/\{\1\}\2/') + para.gsub!(/^(~\{\{ .+?)(<\/LI>\s*|<\/OL>\s*)+$/i,'\1') + para.gsub!(/\/\{Id\.\s*<\/LI>\s*\}\//i,'/{Id.}/') + tuned_file << para unless para == nil + end + tuned_file + end + end +end +def help + puts <<WOK +conversion program +initial SiSU markup from other file formats + + zxy_convert --word does initial conversion from word97 to sisu markup, expects [filename].doc (can also use --doc) + zxy_convert --html does initial conversion from html to sisu markup, expects [filename].html + zxy_convert --default does initial conversion from defalt html to sisu markup, expects [filename].html + +WOK +end +def do_word(argv, instruct) + argv.each do |f| + if f =~/.+?\.doc$/ + @argv << f[/(.+?)\.doc$/, 1] + else + print "not .doc? << #{f} >> " + end + end + @argv.each do |filename| + system(%{wvWare -x #{@dir.path.home}/.sisu/convert/wvSiSU.xml #{filename}.doc > #{filename}.wv}) + file_array=IO.readlines("#{filename}.wv", "") + CONVERT::WareWord97.new(file_array, filename, instruct).songsheet # metaverse created here + end +end +def do_html(argv, instruct) + argv.each do |f| + if f =~/.+?\.html?$/ + @argv << f[/(.+?)\.html?$/, 1] + else + print "not .html? << #{f} >> " + end + end + @argv.each do |filename| + file_end=if FileTest.file?("#{filename}.html") + 'html' + elsif FileTest.file?("#{filename}.htm") + 'htm' + end + file_array=IO.readlines("#{filename}.#{file_end}","\n\r") + CONVERT::Html.new(file_array,filename,instruct).songsheet # metaverse created here + end +end +def do_default(argv, instruct) + argv.each do |f| + if f =~/.+?\.html$/ + @argv << f[/(.+?)\.html$/, 1] + else + print "not .html? << #{f} >> " + end + end + @argv.each do |filename| + file_array=IO.readlines("#{filename}.html", "\n\r") + CONVERT::Default.new(file_array, filename, instruct).songsheet # metaverse created here + end +end +def cases(argv, instruct) + case instruct + when/^--(word(97)?|doc)$/i #creates minimal sisu_small.gz package to send + do_word(argv, instruct) + when/^--(html)$/i #creates sisu.gz package to send + do_html(argv, instruct) + when/^--(default)$/i #creates sisu.gz package to send + do_default(argv, instruct) + else + help + end +end +$KCODE='u' +branch='v2' +@argv=Array.new +argv=$* +SiSU_version_dir=(argv.inspect=~/--v1/) ? 'v1' : 'v2' +SiSU_lib="sisu/#{SiSU_version_dir}" +require "#{SiSU_lib}/sysenv" +include SiSU_Env +@dir=SiSU_Env::Info_env.new +instruct = "#{argv[0].to_s}" +argv.shift +instruct.chomp! +instruct = "help" if instruct.nil? or instruct == ""; +cases(argv, instruct) diff --git a/data/sisu/v3/conf/convert/wvHtml.xml b/data/sisu/v3/conf/convert/wvHtml.xml new file mode 100644 index 00000000..3cc9dd29 --- /dev/null +++ b/data/sisu/v3/conf/convert/wvHtml.xml @@ -0,0 +1,388 @@ +<main> +<charentity> +<begin>HTML</begin> +</charentity> + +<document> +<begin><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd"> +<html> +<head> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=<charset/>"> +<META NAME="GENERATOR" CONTENT="wvWare/<version/>"> +<title> +<title/> +</title> +</head> +<body bgcolor="#FFFFFF" text="#000000" link="#0000ee" vlink="#551a8b"> +</begin> +<end> +<!-- +<hr> +<address> +<a href="http://wvware.sourceforge.net/"><img +src="wvSmall.gif" height=31 width=47 +align=left border=0 alt="wvWare"></a> +<a href="http://validator.w3.org/check/referer"><img +src="vh40.gif" height=31 width=88 +align=right border=0 alt="Valid HTML 4.0!"></a> +Document created with <a href="http://wvware.sourceforge.net/">wvWare/<version/></a><br> +</address> +--> +</body> +</html> +</end> +</document> + +<section> +<begin> +<!--Section Begins--><br> +</begin> +<end> +<!--Section Ends--> +</end> +</section> + +<justification> +<left>left</left> +<right>right</right> +<center>center</center> +<block>left</block> +<asian><!--Could Someone who sees this tag tell me what was is this type of justification, asian languages only i thing--></asian> +</justification> + +<numbering> +<Arabic>type="1"</Arabic> +<UpperRoman>type="I"</UpperRoman> +<LowerRoman>type="i"</LowerRoman> +<UpperCaseN>type="A"</UpperCaseN> +<LowerCaseN>type="a"</LowerCaseN> +</numbering> + +<border> +<noned>none</noned> +<singled>solid</singled> +<thickd>solid</thickd> +<doubled>double</doubled> +<number4d>double</number4d> +<hairlined>solid</hairlined> +<dotd>dotted</dotd> +<dashlargegapd>dashed</dashlargegapd> +<dotdashd>dotted</dotdashd> +<dotdotdashd>dotted</dotdotdashd> +<tripled>double</tripled> +<thin-thicksmallgapd>dashed</thin-thicksmallgapd> +<thick-thinsmallgapd>dashed</thick-thinsmallgapd> +<thin-thick-thinsmallgapd>dashed</thin-thick-thinsmallgapd> +<thin-thickmediumgapd>dashed</thin-thickmediumgapd> +<thick-thinmediumgapd>dashed</thick-thinmediumgapd> +<thin-thick-thinmediumgapd>dashed</thin-thick-thinmediumgapd> +<thin-thicklargegapd>dashed</thin-thicklargegapd> +<thick-thinlargegapd>dashed</thick-thinlargegapd> +<thin-thick-thinlargegapd>dashed</thin-thick-thinlargegapd> +<waved>solid</waved> +<doublewaved>double</doublewaved> +<dashsmallgapd>sashed</dashsmallgapd> +<dashdotstrokedd>dashed</dashdotstrokedd> +<emboss3Dd>ridge</emboss3Dd> +<engrave3Dd>groove</engrave3Dd> +<defaultd>ridge</defaultd> +</border> + +<!-- <begin><ol <nfc/> start="<start/>"> --> +<olist> +<begin><ol <nfc/>> +</begin> +<end></ol></end> +</olist> + +<ulist> +<begin><ul> +</begin> +<end></ul></end> +</ulist> + +<!-- <begin><li></begin> --> +<entry> +<begin><li value="<start/>"></begin> +<end></li></end> +</entry> + + +<!-- +this tableoverride option can be used to turn off handling of +these tags in tables, which I find is necessary for at least netscape +--> +<tableoverrides> +<ParaBefore>0</ParaBefore> +<ParaRight>0</ParaRight> +<ParaAfter>0</ParaAfter> +<ParaLeft>0</ParaLeft> +<ParaLeft1>0</ParaLeft1> +<VertMergedCells>0</VertMergedCells> +</tableoverrides> + +<table> +<begin><table width="<tablerelwidth/>%" border="1" cols="<no_cols/>" rows="<no_rows/>"></begin> +<end></table></end> +</table> + +<row> +<begin><tr></begin> +<end></tr></end> +</row> + +<cell> +<begin><td bgcolor="<cellbgcolor/>" width="<cellrelwidth/>%" rowspan="<rowspan/>" colspan="<colspan/>"></begin> +<end></td></end> +</cell> + +<paragraph> +<begin> +<table.end/> +<table.begin/> +<row.begin/><cell.begin/><olist.end/><olist.begin/><ulist.end/><ulist.begin/><entry.begin/><text.begin/> +</begin> +<end> +<text.end/><entry.end/><olist.end/><ulist.end/><cell.end/><row.end/> +</end> +</paragraph> + +<!-- these are all the character properties that can show up in word --> +<bold><begin><b></begin><end></b></end></bold> +<italic><begin><i></begin><end></i></end></italic> + +<!-- +text that has been deleted and will be displayed with strikethrough when +revision marked text is to be displayed + +use either this line... +--> +<RMarkDel><begin><s></begin><end></s><a href="#author<ibstRMarkDel/>">[Author ID<ibstRMarkDel/>: at <dttmRMarkDel/> ]</a></end></RMarkDel> + +<!-- +or uncomment below to make deleted text dissappear (well, become commented out) +--> +<!-- +<RMarkDel><begin><!--</begin><end>--></end></RMarkDel> +--> + +<!-- Overwriting char props. like CaPs->CAPS SmallCaps->smallcaps --> +<outline><begin></begin><end></end></outline> +<smallcaps><begin><span style="text-transform:lowercase"></begin><end></span></end></smallcaps> +<caps><begin><span style="text-transform:uppercase"></begin><end></span></end></caps> +<vanish><begin></begin><end></end></vanish> + +<!--If you uncomment this then the annotation text links will become commented out by html tags--> +<!-- +<vanish><begin><!--</begin><end>--></end></vanish> +--> + +<!-- +text that has been newly typed since the last time revision marks have been accepted +and will be displayed with underline when revision marked text is to be displayed + +use either this line... +--> +<RMark><begin><u></begin><end></u><a href="#author<ibstRMark/>">[Author ID<ibstRMark/>: at <dttmRMark/>]</a></end></RMark> + +<!-- +or uncomment below to make the underline dissappear +--> +<!-- +<RMark><begin></begin><end></end></RMark> +--> + + +<strike><begin><s></begin><end></s></end></strike> +<shadow><begin></begin><end></end></shadow> +<lowercase><begin></begin><end></end></lowercase> +<emboss><begin></begin><end></end></emboss> +<imprint><begin></begin><end></end></imprint> +<!--double strike--> +<dstrike><begin><s></begin><end></s></end></dstrike> + +<!-- +ftc's +& +hps + +keep them for font face and do that later. +--> + +<super><begin><sup></begin><end></sup></end></super> +<sub><begin><sub></begin><end></sub></end></sub> + +<singleu><begin><u></begin><end></u></end></singleu> +<wordu><begin><u></begin><end></u></end></wordu> +<doubleu><begin><u></begin><end></u></end></doubleu> +<dottedu><begin><u></begin><end></u></end></dottedu> +<hiddenu><begin><u></begin><end></u></end></hiddenu> +<thicku><begin><u></begin><end></u></end></thicku> +<dashu><begin><u></begin><end></u></end></dashu> +<dotu><begin><u></begin><end></u></end></dotu> +<dotdashu><begin><u></begin><end></u></end></dotdashu> +<dotdotdashu><begin><u></begin><end></u></end></dotdotdashu> +<waveu><begin><u></begin><end></u></end></waveu> + +<!-- +text whose properties have been changed since the last time revision marks have been accepted +and will be displayed with a note showing the change points. + +use either this line (which admit it a bit scary looking, but harmless)... +--> +<PropRMark><begin><a href="#author<ibstPropRMark/>">--&gt;</a></begin><end><a href="#author<ibstPropRMark/>">[Author ID<ibstPropRMark/>: at <dttmPropRMark/>]</a></end></PropRMark> + +<!-- +or uncomment below to make the notes dissappear +--> +<!-- +<PropRMark><begin></begin><end></end></PropRMark> +--> + +<!-- +<color> +--> +<Black><begin>Black</begin><end></end></Black> +<Blue><begin>Blue</begin><end></end></Blue> +<Cyan><begin>Cyan</begin><end></end></Cyan> +<Green><begin>Green</begin><end></end></Green> +<Magenta><begin>Magenta</begin><end></end></Magenta> +<Red><begin>Red</begin><end></end></Red> +<Yellow><begin>Yellow</begin><end></end></Yellow> +<White><begin>White</begin><end></end></White> +<DkBlue><begin>DarkBlue</begin><end></end></DkBlue> +<DkCyan><begin>DarkCyan</begin><end></end></DkCyan> +<DkGreen><begin>DarkGreen</begin><end></end></DkGreen> +<DkMagenta><begin>DarkMagenta</begin><end></end></DkMagenta> +<DkRed><begin>DarkRed</begin><end></end></DkRed> +<DkYellow><begin>#8b8b00</begin><end></end></DkYellow> +<DkGray><begin>DarkGray</begin><end></end></DkGray> +<LtGray><begin>LightGrey</begin><end></end></LtGray> +<!-- +</color> +--> + +<!-- +<animation> +--> +<LasVegas><begin><blink></begin><end></blink></end></LasVegas> +<BackgroundBlink><begin><blink></begin><end></blink></end></BackgroundBlink> +<SparkleText><begin><blink></begin><end></blink></end></SparkleText> +<MarchingAnts><begin><blink></begin><end></blink></end></MarchingAnts> +<MarchingRedAnts><begin><blink></begin><end></blink></end></MarchingRedAnts> +<Shimmer><begin><blink></begin><end></blink></end></Shimmer> +<!-- +</animation> +--> + +<!-- +I dont understand what this one is, and ive never come across it + +use this sample line (which admit it a bit scary looking, but harmless)... +--> +<DispFldRMark><begin><a href="#author<ibstDispFldRMark/>">--&gt;</a></begin><end><a href="#author<ibstDispFldRMark/>">[Author ID<ibstDispFldRMark/>: at <dttmDispFldRMark/> (<xstDispFldRMark/>)]</a></end></DispFldRMark> + +<!-- +or uncomment below to ignore it, the previous might even crash wv ? +--> +<!-- +<DispFldRMark><begin></begin><end></end></DispFldRMark> +--> + +<animation> +<begin><LasVegas.begin/><BackgroundBlink.begin/><SparkleText.begin/><MarchingAnts.begin/><MarchingRedAnts.begin/><Shimmer.begin/></begin> +<end><Shimmer.end/><MarchingRedAnts.end/><MarchingAnts.end/><SparkleText.end/><BackgroundBlink.end/><LasVegas.end/></end> +</animation> + +<fontstr> +<begin><font color="<black.begin/><blue.begin/><cyan.begin/><green.begin/><magenta.begin/><red.begin/><yellow.begin/><white.begin/><dkblue.begin/><dkcyan.begin/><dkgreen.begin/><dkmagenta.begin/><dkred.begin/><dkyellow.begin/><dkgray.begin/><ltgray.begin/>"></begin> +<end><ltgray.end/><dkgray.end/><dkyellow.end/><dkred.end/><dkmagenta.end/><dkgreen.end/><dkcyan.end/><dkblue.end/><white.end/><yellow.end/><red.end/><magenta.end/><green.end/><cyan.end/><blue.end/><black.end/></font></end> +</fontstr> + +<comment> +<begin> +<a href="#comment<ibstAnno/> ">--&gt;</a> +</begin> +<end><a href="#comment<ibstAnno/> ">[Author:<xstUsrInitl/>]</a> +</end> +</comment> + +<style name="Normal"> +<character> +<begin><PropRMark.begin/><DispFldRMark.begin/><animation.begin/><fontstr.begin/><bold.begin/><italic.begin/><strike.begin/><RMarkDel.begin/><outline.begin/><smallcaps.begin/><caps.begin/><vanish.begin/><RMark.begin/><shadow.begin/><lowercase.begin/><emboss.begin/><imprint.begin/><dstrike.begin/><super.begin/><sub.begin/><singleu.begin/><wordu.begin/><doubleu.begin/><dottedu.begin/><hiddenu.begin/><thicku.begin/><dashu.begin/><dotu.begin/><dotdashu.begin/><dotdotdashu.begin/><waveu.begin/></begin> +<end><waveu.end/><dotdotdashu.end/><dotdashu.end/><dotu.end/><dashu.end/><thicku.end/><hiddenu.end/><dottedu.end/><doubleu.end/><wordu.end/><singleu.end/><sub.end/><super.end/><dstrike.end/><imprint.end/><emboss.end/><lowercase.end/><shadow.end/><RMark.end/><vanish.end/><caps.end/><smallcaps.end/><outline.end/><RMarkDel.end/><strike.end/><italic.end/><bold.end/><fontstr.end/><animation.end/><DispFldRMark.end/><PropRMark.end/></end> +</character> + +<!-- Netscape does handle this correctly yet, here is how each different side of the border should work. +border-top: thin <bordertopstyle/> <bordertopcolor/>; +border-left: thin <borderleftstyle/> <borderleftcolor/>; +border-right: thin <borderrightstyle/> <borderrightcolor/>; +border-bottom: thin <borderbottomstyle/> <borderbottomcolor/> +--> + + +<pmargin> +<begin>margin: <mmParaBefore/> <mmParaRight/> <mmParaAfter/> <mmParaLeft/>;</begin> +</pmargin> + +<pborder> +<begin> +border: thin <borderleftstyle/> <borderleftcolor/>; +<!-- +border-top: thin <bordertopstyle/> <bordertopcolor/>; +border-left: thin <borderleftstyle/> <borderleftcolor/>; +border-right: thin <borderrightstyle/> <borderrightcolor/>; +border-bottom: thin <borderbottomstyle/> <borderbottomcolor/> +--> +</begin> +</pborder> + +<text> +<begin><p><div name="<stylename/>" align="<just/>" style="<paramargin/> <paraborder/> padding: <mmPadTop/> <mmPadRight/> <mmPadBottom/> <mmPadLeft/>; "> + +<p style="text-indent: <mmParaLeft1/>; text-align: <just/>; line-height: <mmLineHeight/>; color: <parafgcolor/>; background-color: <parabgcolor/>; "></begin> + +<end></p></div></end> +</text> + +<picture> +<begin> +<img <htmlAlignGuess/> width="<pixPicWidth/>" height="<pixPicHeight/>" alt="0x01 graphic" src="placeholder.png"><htmlNextLineGuess/> +</begin> +</picture> + +</style> + +<!--we need to be override the character properties--> +<!-- +<style name="Normal"> +<character> +<begin><PropRMark.begin/><DispFldRMark.begin/><animation.begin/><fontstr.begin/><bold.begin/><italic.begin/><strike.begin/><RMarkDel.begin/><outline.begin/><smallcaps.begin/><caps.begin/><vanish.begin/><RMark.begin/><shadow.begin/><lowercase.begin/><emboss.begin/><imprint.begin/><dstrike.begin/><super.begin/><sub.begin/><singleu.begin/><wordu.begin/><doubleu.begin/><dottedu.begin/><hiddenu.begin/><thicku.begin/><dashu.begin/><dotu.begin/><dotdashu.begin/><dotdotdashu.begin/><waveu.begin/></begin> +<end><waveu.end/><dotdotdashu.end/><dotdashu.end/><dotu.end/><dashu.end/><thicku.end/><hiddenu.end/><dottedu.end/><doubleu.end/><wordu.end/><singleu.end/><sub.end/><super.end/><dstrike.end/><imprint.end/><emboss.end/><lowercase.end/><shadow.end/><RMark.end/><vanish.end/><caps.end/><smallcaps.end/><outline.end/><RMarkDel.end/><strike.end/><italic.end/><bold.end/><fontstr.end/><animation.end/><DispFldRMark.end/><PropRMark.end/></end> +</character> + +<text> +<begin><div name="<stylename/>" align="<just/>"><p></begin> +<end></p></div></end> +</text> + +</style> + +<style name="Heading 1"> + +<character> +<begin></begin> +<end></end> +</character> + +<text> +<begin><div name="<stylename/>" align="<just/>"><H1></begin> +<end></H1></div></end> +</text> + + + +</style> +--> + +</main> diff --git a/data/sisu/v3/conf/convert/wvSiSU.xml b/data/sisu/v3/conf/convert/wvSiSU.xml new file mode 100644 index 00000000..271b00ff --- /dev/null +++ b/data/sisu/v3/conf/convert/wvSiSU.xml @@ -0,0 +1,360 @@ +<main> +<charentity> +<begin>HTML</begin> +</charentity> +<!-- wvWare -x wvSiSU.xml [inputfile.doc] [outputfile.html] --> + +<document> +<begin> +</begin> +<end> +</end> +</document> + +<section> +<begin> +</begin> +<end> +</end> +</section> + +<justification> +<left>left</left> +<right>right</right> +<center>center</center> +<block>left</block> +<asian><!--Could Someone who sees this tag tell me what was is this type of justification, asian languages only i thing--></asian> +</justification> + +<numbering> +<Arabic>type="1"</Arabic> +<UpperRoman>type="I"</UpperRoman> +<LowerRoman>type="i"</LowerRoman> +<UpperCaseN>type="A"</UpperCaseN> +<LowerCaseN>type="a"</LowerCaseN> +</numbering> + +<border> +<noned>none</noned> +<singled>solid</singled> +<thickd>solid</thickd> +<doubled>double</doubled> +<number4d>double</number4d> +<hairlined>solid</hairlined> +<dotd>dotted</dotd> +<dashlargegapd>dashed</dashlargegapd> +<dotdashd>dotted</dotdashd> +<dotdotdashd>dotted</dotdotdashd> +<tripled>double</tripled> +<thin-thicksmallgapd>dashed</thin-thicksmallgapd> +<thick-thinsmallgapd>dashed</thick-thinsmallgapd> +<thin-thick-thinsmallgapd>dashed</thin-thick-thinsmallgapd> +<thin-thickmediumgapd>dashed</thin-thickmediumgapd> +<thick-thinmediumgapd>dashed</thick-thinmediumgapd> +<thin-thick-thinmediumgapd>dashed</thin-thick-thinmediumgapd> +<thin-thicklargegapd>dashed</thin-thicklargegapd> +<thick-thinlargegapd>dashed</thick-thinlargegapd> +<thin-thick-thinlargegapd>dashed</thin-thick-thinlargegapd> +<waved>solid</waved> +<doublewaved>double</doublewaved> +<dashsmallgapd>sashed</dashsmallgapd> +<dashdotstrokedd>dashed</dashdotstrokedd> +<emboss3Dd>ridge</emboss3Dd> +<engrave3Dd>groove</engrave3Dd> +<defaultd>ridge</defaultd> +</border> + +<!-- <begin><ol <nfc/> start="<start/>"> --> +<olist> +<begin> +</begin> +<end></end> +</olist> + +<ulist> +<begin> +</begin> +<end></end> +</ulist> + +<!-- <begin><li></begin> --> +<entry> +<begin></begin> +<end></end> +</entry> + + +<!-- +this tableoverride option can be used to turn off handling of +these tags in tables, which I find is necessary for at least netscape +--> +<tableoverrides> +<ParaBefore>0</ParaBefore> +<ParaRight>0</ParaRight> +<ParaAfter>0</ParaAfter> +<ParaLeft>0</ParaLeft> +<ParaLeft1>0</ParaLeft1> +<VertMergedCells>0</VertMergedCells> +</tableoverrides> + +<table> +<begin></begin> +<end></end> +</table> + +<row> +<begin><tr></begin> +<end></tr></end> +</row> + +<cell> +<begin></begin> +<end></end> +</cell> + +<!-- EDITING OUT DID MUCH OF TRICK --> +<paragraph> +<begin> +</begin> +<end> +</end> +</paragraph> + +<!-- these are all the character properties that can show up in word --> +<bold><begin><b></begin><end></b></end></bold> +<italic><begin><i></begin><end></i></end></italic> + +<!-- +text that has been deleted and will be displayed with strikethrough when +revision marked text is to be displayed + +use either this line... +--> +<RMarkDel><begin><s></begin><end></s><a href="#author<ibstRMarkDel/>">[Author ID<ibstRMarkDel/>: at <dttmRMarkDel/> ]</a></end></RMarkDel> + +<!-- +or uncomment below to make deleted text dissappear (well, become commented out) +--> +<!-- +<RMarkDel><begin><!--</begin><end>--></end></RMarkDel> +--> + +<!-- Overwriting char props. like CaPs->CAPS SmallCaps->smallcaps --> +<outline><begin></begin><end></end></outline> +<smallcaps><begin></begin><end></end></smallcaps> +<caps><begin></begin><end></end></caps> +<vanish><begin></begin><end></end></vanish> + +<!--If you uncomment this then the annotation text links will become commented out by html tags--> +<!-- +<vanish><begin><!--</begin><end>--></end></vanish> +--> + +<!-- +text that has been newly typed since the last time revision marks have been accepted +and will be displayed with underline when revision marked text is to be displayed + +use either this line... +--> +<RMark><begin></begin><end></end></RMark> + +<!-- +or uncomment below to make the underline dissappear +--> +<!-- +<RMark><begin></begin><end></end></RMark> +--> + + +<strike><begin><s></begin><end></s></end></strike> +<shadow><begin></begin><end></end></shadow> +<lowercase><begin></begin><end></end></lowercase> +<emboss><begin></begin><end></end></emboss> +<imprint><begin></begin><end></end></imprint> +<!--double strike--> +<dstrike><begin><s></begin><end></s></end></dstrike> + +<!-- +ftc's +& +hps + +keep them for font face and do that later. +--> + +<super><begin><sup></begin><end></sup></end></super> +<sub><begin><sub></begin><end></sub></end></sub> + +<singleu><begin><u></begin><end></u></end></singleu> +<wordu><begin><u></begin><end></u></end></wordu> +<doubleu><begin><u></begin><end></u></end></doubleu> +<dottedu><begin><u></begin><end></u></end></dottedu> +<hiddenu><begin><u></begin><end></u></end></hiddenu> +<thicku><begin><u></begin><end></u></end></thicku> +<dashu><begin><u></begin><end></u></end></dashu> +<dotu><begin><u></begin><end></u></end></dotu> +<dotdashu><begin><u></begin><end></u></end></dotdashu> +<dotdotdashu><begin><u></begin><end></u></end></dotdotdashu> +<waveu><begin><u></begin><end></u></end></waveu> + +<!-- +text whose properties have been changed since the last time revision marks have been accepted +and will be displayed with a note showing the change points. + +use either this line (which admit it a bit scary looking, but harmless)... +--> +<PropRMark><begin></begin><end></end></PropRMark> + +<!-- +or uncomment below to make the notes dissappear +--> +<!-- +<PropRMark><begin></begin><end></end></PropRMark> +--> + +<!-- +<color> +--> +<Black><begin>Black</begin><end></end></Black> +<Blue><begin>Blue</begin><end></end></Blue> +<Cyan><begin>Cyan</begin><end></end></Cyan> +<Green><begin>Green</begin><end></end></Green> +<Magenta><begin>Magenta</begin><end></end></Magenta> +<Red><begin>Red</begin><end></end></Red> +<Yellow><begin>Yellow</begin><end></end></Yellow> +<White><begin>White</begin><end></end></White> +<DkBlue><begin>DarkBlue</begin><end></end></DkBlue> +<DkCyan><begin>DarkCyan</begin><end></end></DkCyan> +<DkGreen><begin>DarkGreen</begin><end></end></DkGreen> +<DkMagenta><begin>DarkMagenta</begin><end></end></DkMagenta> +<DkRed><begin>DarkRed</begin><end></end></DkRed> +<DkYellow><begin>#8b8b00</begin><end></end></DkYellow> +<DkGray><begin>DarkGray</begin><end></end></DkGray> +<LtGray><begin>LightGrey</begin><end></end></LtGray> +<!-- +</color> +--> + +<!-- +<animation> +--> +<LasVegas><begin><blink></begin><end></blink></end></LasVegas> +<BackgroundBlink><begin><blink></begin><end></blink></end></BackgroundBlink> +<SparkleText><begin><blink></begin><end></blink></end></SparkleText> +<MarchingAnts><begin><blink></begin><end></blink></end></MarchingAnts> +<MarchingRedAnts><begin><blink></begin><end></blink></end></MarchingRedAnts> +<Shimmer><begin><blink></begin><end></blink></end></Shimmer> +<!-- +</animation> +--> + +<!-- +I dont understand what this one is, and ive never come across it + +use this sample line (which admit it a bit scary looking, but harmless)... +--> +<DispFldRMark><begin></begin><end></end></DispFldRMark> + +<!-- +or uncomment below to ignore it, the previous might even crash wv ? +--> +<!-- +<DispFldRMark><begin></begin><end></end></DispFldRMark> +--> + +<animation> +<begin></begin> +<end></end> +</animation> + +<fontstr> +<begin></begin> +<end></end> +</fontstr> + +<comment> +<begin> +</begin> +<end> +</end> +</comment> + +<style name="Normal"> +<character> +<begin><bold.begin/><italic.begin/><strike.begin/><RMarkDel.begin/><outline.begin/><smallcaps.begin/><caps.begin/><vanish.begin/><RMark.begin/><shadow.begin/><lowercase.begin/><emboss.begin/><imprint.begin/><dstrike.begin/><super.begin/><sub.begin/><singleu.begin/><wordu.begin/><doubleu.begin/><dottedu.begin/><hiddenu.begin/><thicku.begin/><dashu.begin/><dotu.begin/><dotdashu.begin/><dotdotdashu.begin/><waveu.begin/></begin> +<end><waveu.end/><dotdotdashu.end/><dotdashu.end/><dotu.end/><dashu.end/><thicku.end/><hiddenu.end/><dottedu.end/><doubleu.end/><wordu.end/><singleu.end/><sub.end/><super.end/><dstrike.end/><imprint.end/><emboss.end/><lowercase.end/><shadow.end/><RMark.end/><vanish.end/><caps.end/><smallcaps.end/><outline.end/><RMarkDel.end/><strike.end/><italic.end/><bold.end/></end> +</character> + +<!-- KEEP +<style name="Normal"> +<character> +<begin><PropRMark.begin/><DispFldRMark.begin/><animation.begin/><fontstr.begin/><bold.begin/><italic.begin/><strike.begin/><RMarkDel.begin/><outline.begin/><smallcaps.begin/><caps.begin/><vanish.begin/><RMark.begin/><shadow.begin/><lowercase.begin/><emboss.begin/><imprint.begin/><dstrike.begin/><super.begin/><sub.begin/><singleu.begin/><wordu.begin/><doubleu.begin/><dottedu.begin/><hiddenu.begin/><thicku.begin/><dashu.begin/><dotu.begin/><dotdashu.begin/><dotdotdashu.begin/><waveu.begin/></begin> +<end><waveu.end/><dotdotdashu.end/><dotdashu.end/><dotu.end/><dashu.end/><thicku.end/><hiddenu.end/><dottedu.end/><doubleu.end/><wordu.end/><singleu.end/><sub.end/><super.end/><dstrike.end/><imprint.end/><emboss.end/><lowercase.end/><shadow.end/><RMark.end/><vanish.end/><caps.end/><smallcaps.end/><outline.end/><RMarkDel.end/><strike.end/><italic.end/><bold.end/><fontstr.end/><animation.end/><DispFldRMark.end/><PropRMark.end/></end> +</character> +--> + +<!-- Netscape does handle this correctly yet, here is how each different side of the border should work. +border-top: thin <bordertopstyle/> <bordertopcolor/>; +border-left: thin <borderleftstyle/> <borderleftcolor/>; +border-right: thin <borderrightstyle/> <borderrightcolor/>; +border-bottom: thin <borderbottomstyle/> <borderbottomcolor/> +--> + + +<pmargin> +<begin></begin> +</pmargin> + +<pborder> +<begin> +</begin> +</pborder> + +<text> +<begin> + +<mmParaLeft1/></begin> + +<end></end> +</text> + +<picture> +<begin> +</begin> +</picture> + +</style> + +<!--we need to be override the character properties--> +<!-- +<style name="Normal"> +<character> +<begin><PropRMark.begin/><DispFldRMark.begin/><animation.begin/><fontstr.begin/><bold.begin/><italic.begin/><strike.begin/><RMarkDel.begin/><outline.begin/><smallcaps.begin/><caps.begin/><vanish.begin/><RMark.begin/><shadow.begin/><lowercase.begin/><emboss.begin/><imprint.begin/><dstrike.begin/><super.begin/><sub.begin/><singleu.begin/><wordu.begin/><doubleu.begin/><dottedu.begin/><hiddenu.begin/><thicku.begin/><dashu.begin/><dotu.begin/><dotdashu.begin/><dotdotdashu.begin/><waveu.begin/></begin> +<end><waveu.end/><dotdotdashu.end/><dotdashu.end/><dotu.end/><dashu.end/><thicku.end/><hiddenu.end/><dottedu.end/><doubleu.end/><wordu.end/><singleu.end/><sub.end/><super.end/><dstrike.end/><imprint.end/><emboss.end/><lowercase.end/><shadow.end/><RMark.end/><vanish.end/><caps.end/><smallcaps.end/><outline.end/><RMarkDel.end/><strike.end/><italic.end/><bold.end/><fontstr.end/><animation.end/><DispFldRMark.end/><PropRMark.end/></end> +</character> + +<text> +<begin><div name="<stylename/>" align="<just/>"><p></begin> +<end></p></div></end> +</text> + +</style> + +<style name="Heading 1"> + +<character> +<begin></begin> +<end></end> +</character> + +<text> +<begin><div name="<stylename/>" align="<just/>"><H1></begin> +<end></H1></div></end> +</text> + + + +</style> +--> + +</main> |