diff options
Diffstat (limited to 'org/object_munge.org')
-rw-r--r-- | org/object_munge.org | 333 |
1 files changed, 333 insertions, 0 deletions
diff --git a/org/object_munge.org b/org/object_munge.org new file mode 100644 index 00000000..b950545c --- /dev/null +++ b/org/object_munge.org @@ -0,0 +1,333 @@ +-*- mode: org -*- +#+TITLE: sisu object munge +#+DESCRIPTION: documents - structuring, various output representations & search +#+FILETAGS: :sisu:munge:objects: +#+AUTHOR: Ralph Amissah +#+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] +#+COPYRIGHT: Copyright (C) 2015 - 2021 Ralph Amissah +#+LANGUAGE: en +#+STARTUP: content hideblocks hidestars noindent entitiespretty +#+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t +#+PROPERTY: header-args :exports code +#+PROPERTY: header-args+ :noweb yes +#+PROPERTY: header-args+ :eval no +#+PROPERTY: header-args+ :results no +#+PROPERTY: header-args+ :cache no +#+PROPERTY: header-args+ :padline no +#+PROPERTY: header-args+ :mkdirp yes + +* object_munge.rb + +#+HEADER: :tangle "../lib/sisu/object_munge.rb" +#+BEGIN_SRC ruby +#<<sisu_document_header>> +module SiSU_Object_Munge + def i_src_o_strip_markup(txtobj) + txtobj=txtobj. + gsub(/#{Mx[:srcrgx_bold_o]}(.+?)#{Mx[:srcrgx_bold_c]}/m,'\1'). + gsub(/#{Mx[:srcrgx_italics_o]}(.+?)#{Mx[:srcrgx_italics_c]}/m,'\1'). + gsub(/#{Mx[:srcrgx_underscore_o]}(.+?)#{Mx[:srcrgx_underscore_c]}/m,'\1'). + gsub(/#{Mx[:srcrgx_cite_o]}(.+?)#{Mx[:srcrgx_cite_c]}/m,'\1'). + gsub(/#{Mx[:srcrgx_insert_o]}(.+?)#{Mx[:srcrgx_insert_c]}/m,'\1'). + gsub(/#{Mx[:srcrgx_strike_o]}(.+?)#{Mx[:srcrgx_strike_c]}/m,'\1'). + gsub(/#{Mx[:srcrgx_superscript_o]}(\d+)#{Mx[:srcrgx_superscript_c]}/m,'[\1]'). + gsub(/#{Mx[:srcrgx_superscript_o]}(.+?)#{Mx[:srcrgx_superscript_c]}/m,'\1'). + gsub(/#{Mx[:srcrgx_subscript_o]}(.+?)#{Mx[:srcrgx_subscript_c]}/m,'\1'). + gsub(/#{Mx[:srcrgx_hilite_o]}(.+?)#{Mx[:srcrgx_hilite_c]}/m,'\1'). + gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~'). + gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/m,''). # endnote removed + gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/m,''). # endnote removed + gsub(/(?:#{Mx[:nbsp]})+/,' '). + gsub(/(?:#{Mx[:br_nl]})+/,"\n"). + gsub(/(?:#{Mx[:br_paragraph]})+/,"\n"). + gsub(/(?:#{Mx[:br_line]})+/,"\n"). + gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<'). + gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>'). + gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&'). + gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!'). + gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#'). + gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*'). + gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-'). + gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/'). + gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_'). + gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{'). + gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}'). + gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~'). + gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©'). + gsub(/[ ][ ]s+/,' '). + strip +if txtobj =~/Reading this/ + puts txtobj + if txtobj =~ /#{Mx[:srcrgx_italics_o]}(.+?)#{Mx[:srcrgx_italics_c]}/ + puts __LINE__ + puts Mx[:srcrgx_italics_o] + puts txtobj + end +end +; txtobj + end + def i_ao_o_strip_markup(txtobj) + txtobj=txtobj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1'). + gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1'). + gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1'). + gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1'). + gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1'). + gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1'). + gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]'). + gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1'). + gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1'). + gsub(/#{Mx[:fa_hilite_o]}(.+?)#{Mx[:fa_hilite_c]}/,'\1'). + gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~'). + gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,''). # endnote removed + gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,''). # endnote removed + gsub(/(?:#{Mx[:nbsp]})+/,' '). + gsub(/(?:#{Mx[:br_nl]})+/,"\n"). + gsub(/(?:#{Mx[:br_paragraph]})+/,"\n"). + gsub(/(?:#{Mx[:br_line]})+/,"\n"). + gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<'). + gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>'). + gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&'). + gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!'). + gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#'). + gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*'). + gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-'). + gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/'). + gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_'). + gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{'). + gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}'). + gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~'). + gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©'). + gsub(/[ ][ ]s+/,' '). + strip + end + def i_ao_o_src_markup_restore(txtobj) + @txtobj=txtobj + def textface_marks + @txtobj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'*{\1}*'). + gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'/{\1}/'). + gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'_{\1}_'). + gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"{\1}"'). + gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+{\1}+'). + gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'-{\1}-'). + gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'^{[\1]}^'). + gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^{\1}^'). + gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,',{\1},'). + gsub(/#{Mx[:fa_hilite_o]}(.+?)#{Mx[:fa_hilite_c]}/,'\1'). + gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~'). + gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'~{\1 \2}~'). + gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,''). # endnote removed + gsub(/(?:#{Mx[:nbsp]})+/,' '). + gsub(/(?:#{Mx[:br_nl]})+/,"\n"). + gsub(/(?:#{Mx[:br_paragraph]})+/,"\n"). + gsub(/(?:#{Mx[:br_line]})+/,"\n"). + gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<'). + gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>'). + gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&'). + gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!'). + gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#'). + gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*'). + gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-'). + gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/'). + gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_'). + gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{'). + gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}'). + gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~'). + gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©'). + gsub(/[ ][ ]s+/,' '). + strip + end + def object_marks + @txtobj + end + self + end + def clean_text(txtobj,markup=:ao) + if txtobj.class==String + txtobj=if markup ==:ao + i_ao_o_strip_markup(txtobj) + elsif markup ==:src + i_src_o_strip_markup(txtobj) + else p __FILE__; p __LINE__ + end + elsif txtobj.class.inspect=~/^SiSU_AO_DocumentStructure::/ + txtobj.obj=i_ao_o_strip_markup(txtobj.obj) + else p 'error' + end + txtobj + end + def footnotes_inline(txtobj) + end + def footnotes_ref_and_note(txtobj) + end + def src_markup(txtobj) + txtobj + end + def extract_endnotes(doc_obj_txt,endnotes_) #% used for extraction of endnotes from paragraphs + if endnotes_ ==:separate + notes_a=doc_obj_txt.scan(/#{Mx[:en_a_o]}([\d]+\s+.+?)#{Mx[:en_a_c]}/) + ##notes_a=doc_obj_txt.scan(/#{Mx[:en_a_o]}([\d*+]+\s+.+?)#{Mx[:en_a_c]}/) + #notes_b=doc_obj_txt.scan(/#{Mx[:en_b_o]}([\d*+]+\s+.+?)#{Mx[:en_b_c]}/) + n=[] + notes_a.flatten.each do |note| #high cost to deal with <br> appropriately within plaintext, consider + note=note.dup.to_s + note=note.gsub(/^([\d]+)\s+/,'^~\1 '). + gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/, + ' \\\\\\ ') + n << note + end + notes_a=n.flatten + doc_obj_txt=doc_obj_txt. + gsub(/#{Mx[:en_a_o]}([\d]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'~^') # endnote marker marked up + else + doc_obj_txt=doc_obj_txt. + gsub(/#{Mx[:en_b_o]}[\d]+\s+(.+?)#{Mx[:en_b_c]}/, + '~[ \1 ]~'). # inline endnote with marker marked up + gsub(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)#{Mx[:en_a_c]}/, + '~{\1 \2 }~'). # inline endnote with marker marked up + gsub(/#{Mx[:en_b_o]}([*+]+)\s+(.+?)#{Mx[:en_b_c]}/, + '~[\1 \2 ]~') # inline endnote with marker marked up + end + [doc_obj_txt,notes_a] + end + def objects #def i_ao_o_src_markup_restore(txtobj) + def code_(dob) + if dob.is==:code + dob.obj=dob.obj.gsub(/(^|[^}])_([<>])/m,'\1\2'). # _> _< + gsub(/(^|[^}])_([<>])/m,'\1\2') # _<_< + end + dob + end + def block_(dob) + dob.obj=if dob.of==:block # watch + dob.obj.gsub(/#{Mx[:gl_o]}●#{Mx[:gl_c]}/,"* "). + gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n") + else dob.obj.gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n\n") + end + dob + end + def textface_marks_po4a(dob,endnotes_=:inline) + notes='' + dob.obj=dob.obj. + gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/, + Mx[:src_bold_o] + '\1' + Mx[:src_bold_c]). + gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/, + Mx[:src_italics_o] + '\1' + Mx[:src_italics_c]). + gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/, + Mx[:src_underscore_o] + '\1' + Mx[:src_underscore_c]). + gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/, + Mx[:src_subscript_o] + '\1' + Mx[:src_subscript_c]). + gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/, + Mx[:src_superscript_o] + '\1' + Mx[:src_superscript_c]). + gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/, + Mx[:src_insert_o] + '\1' + Mx[:src_insert_c]). + gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/, + Mx[:src_cite_o] + '\1' + Mx[:src_cite_c]). + gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/, + Mx[:src_strike_o] + '\1' + Mx[:src_strike_c]). + gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/, + Mx[:src_monospace_o] + '\1' + Mx[:src_monospace_c]) + unless dob.is==:code + dob.obj=dob.obj. + gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}/,'\1'). + gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1'). + gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, + '\1 [link: <\2>]'). + gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}image/, + '\1 [link: local image]'). + gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1') + dob.obj,notes=extract_endnotes(dob.obj,endnotes_) + dob.obj=dob.obj. + gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<'). + gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>'). + gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&'). + gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!'). + gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#'). + gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*'). + gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-'). + gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/'). + gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_'). + gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{'). + gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}'). + gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~'). + gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©') + end + dob=block_(dob) + dob=code_(dob) + dob.obj=dob.obj.gsub(/#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}/,''). # remove page breaks + gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1'). + gsub(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,''). # remove name links + gsub(/ |#{Mx[:nbsp]}/,' '). # decide on + gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/, + ' [ \1 ]'). #"[ #{dir.url.images_local}\/\\1 ]") + gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}image/, + ' [ \1 ]'). #"[ #{dir.url.images_local}\/\\1 ]") + gsub(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/, + '[image: "\1"]') + [dob,notes] + end + def object_marks + @txtobj + end + self + end +end +__END__ +#+END_SRC + +* document header + +#+NAME: sisu_document_header +#+BEGIN_SRC text +encoding: utf-8 +- Name: SiSU + + - Description: documents, structuring, processing, publishing, search + object_munge + + - Author: Ralph Amissah + <ralph.amissah@gmail.com> + + - Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2019, + 2020, 2021, Ralph Amissah, + All Rights Reserved. + + - License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/licenses/gpl.html> + + <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html> + + - SiSU uses: + - Standard SiSU markup syntax, + - Standard SiSU meta-markup syntax, and the + - Standard SiSU object citation numbering and system + + - Homepages: + <http://www.sisudoc.org> + + - Git + <https://git.sisudoc.org/projects/> + <https://git.sisudoc.org/projects/?p=software/sisu.git;a=summary> + <https://git.sisudoc.org/projects/?p=markup/sisu-markup-samples.git;a=summary> +#+END_SRC |