-*- mode: org -*- #+TITLE: sisudoc spine (doc_reform) output xmls #+DESCRIPTION: documents - structuring, publishing in multiple formats & search #+FILETAGS: :spine:output:text: #+AUTHOR: Ralph Amissah #+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] #+COPYRIGHT: Copyright (C) 2015 - 2025 Ralph Amissah #+LANGUAGE: en #+STARTUP: content hideblocks hidestars noindent entitiespretty #+PROPERTY: header-args :exports code #+PROPERTY: header-args+ :noweb yes #+PROPERTY: header-args+ :results output none #+PROPERTY: header-args+ :cache no #+PROPERTY: header-args+ :padline no #+PROPERTY: header-args+ :mkdirp yes #+OPTIONS: H:3 num:nil toc:t \n:t ::t |:t ^:nil -:t f:t *:t - [[./doc-reform.org][doc-reform.org]] [[./][org/]] - [[./output_hub.org][output_hub]] * Text ** outputText template #+HEADER: :tangle "../src/sisudoc/io_out/text.d" #+HEADER: :noweb yes #+BEGIN_SRC d <> module sisudoc.io_out.text; @safe: template outputText() { <> <> void outputText(D,M) ( const D doc_abstraction, M doc_matters, ) { import std.stdio; import sisudoc.io_out; <> text_out(doc_abstraction, doc_matters); } } #+END_SRC ** Output #+NAME: text_out #+HEADER: :noweb yes #+BEGIN_SRC d void text_out(D,M)( const D doc_abstraction, M doc_matters, ) { struct Text { string head; string content; string tail; } auto text = Text(); // text.head = theDocument!().text_head(doc_matters); text.content = theDocument!().text_body(doc_abstraction, doc_matters); text.tail = theDocument!().text_tail(doc_matters); auto pth_text = spinePathsText(doc_matters); try { import std.file; if (!exists(pth_text.base_pth)) { (pth_text.base_pth).mkdirRecurse; } } catch (ErrnoException ex) { } if (doc_matters.opt.action.vox_gt_1) { writeln(" ", pth_text.text_file); } // writeln(pth_text.base_pth); auto f = File(pth_text.text_file, "w"); // f.writeln(text.head); f.writeln(text.content); f.writeln(text.tail); } #+END_SRC * The Document ** theDocument template #+NAME: the_document #+HEADER: :noweb yes #+BEGIN_SRC d template theDocument() { import std.stdio; import sisudoc.io_out; <> <> <> } #+END_SRC ** the Document (assign munge) *** Head SKIP #+NAME: text_head #+HEADER: :noweb yes #+BEGIN_SRC d string text_head(M)( M doc_matters, ) { return "head"; } #+END_SRC *** Body munge assign #+NAME: text_body_assign_munge #+HEADER: :noweb yes #+BEGIN_SRC d string text_body(D,M)( const D doc_abstraction, M doc_matters, ) { string doc_object = ""; foreach (section; doc_matters.has.keys_seq.scroll) { foreach (obj; doc_abstraction[section]) { if (obj.metainfo.is_a == "toc") { doc_object ~= munge!().toc(obj, doc_matters); } if (obj.metainfo.is_a == "heading") { doc_object ~= munge!().heading(obj, doc_matters); } if (obj.metainfo.is_a == "para") { doc_object ~= munge!().para(obj, doc_matters); } if (obj.metainfo.is_a == "group") { doc_object ~= munge!().group(obj, doc_matters); } if (obj.metainfo.is_a == "block") { doc_object ~= munge!().block(obj, doc_matters); } if (obj.metainfo.is_a == "poem") { doc_object ~= munge!().poem(obj, doc_matters); } // CHECK if (obj.metainfo.is_a == "verse") { doc_object ~= munge!().verse(obj, doc_matters); } // CHECK if (obj.metainfo.is_a == "code") { doc_object ~= munge!().code(obj, doc_matters); } if (obj.metainfo.is_a == "quote") { doc_object ~= munge!().quote(obj, doc_matters); } // LATER if (obj.metainfo.is_a == "table") { doc_object ~= munge!().table(obj, doc_matters); } if (obj.metainfo.is_a == "endnote") { doc_object ~= munge!().endnote(obj, doc_matters); } if (obj.metainfo.is_a == "bookindex") { doc_object ~= munge!().bookindex(obj, doc_matters); } // CHECK if (obj.metainfo.is_a == "bibliography") { doc_object ~= munge!().bibliography(obj, doc_matters); } // CHECK if (obj.metainfo.is_a == "glossary") { doc_object ~= munge!().glossary(obj, doc_matters); } // CHECK if (obj.metainfo.is_a == "blurb") { doc_object ~= munge!().blurb(obj, doc_matters); } // CHECK if (obj.metainfo.is_a == "comment") { doc_object ~= munge!().comment(obj, doc_matters); } // LATER } } return doc_object; } #+END_SRC *** Tail #+NAME: text_tail #+HEADER: :noweb yes #+BEGIN_SRC d string text_tail(M)( M doc_matters, ) { string metadata_; if (doc_matters.opt.action.debug_do) { writeln(doc_matters.src.filename_base); writeln("Title: ", doc_matters.conf_make_meta.meta.title_full); writeln(" Author: ", doc_matters.conf_make_meta.meta.creator_author); writeln(" Published: ", doc_matters.conf_make_meta.meta.date_published); writeln(" Copyright: ", doc_matters.conf_make_meta.meta.rights_copyright); writeln(" License: ", doc_matters.conf_make_meta.meta.rights_license); } if (!(doc_matters.conf_make_meta.meta.title_full.empty)) { metadata_ ~= "Title: " ~ doc_matters.conf_make_meta.meta.title_full ~ "\n\n"; } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { writeln("ERROR no Title information provided in document header ", doc_matters.src.filename_base); } if (!(doc_matters.conf_make_meta.meta.creator_author.empty)) { if (doc_matters.opt.action.html_link_curate) { metadata_ ~= "Author: " ~ doc_matters.conf_make_meta.meta.creator_author_surname.translate([' ' : "_"]) ~ doc_matters.conf_make_meta.meta.creator_author ~ "\n\n"; } else { metadata_ ~= "Author: " ~ doc_matters.conf_make_meta.meta.creator_author ~ "\n\n"; } } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { writeln("ERROR no Author information provided in document header ", doc_matters.src.filename_base); } metadata_ ~= "Published: " ~ doc_matters.conf_make_meta.meta.date_published ~ "\n\n"; if (!(doc_matters.conf_make_meta.meta.rights_copyright.empty)) { metadata_ ~= "Copyright: " ~ doc_matters.conf_make_meta.meta.rights_copyright ~ "\n\n"; } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { writeln("WARNING no Copyright information provided in document header ", doc_matters.src.filename_base); } if (!(doc_matters.conf_make_meta.meta.rights_license.empty)) { metadata_ ~= "License: " ~ doc_matters.conf_make_meta.meta.rights_license ~ "\n\n"; } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { writeln("WARNING no License information provided in document header ", doc_matters.src.filename_base); } metadata_ ~= doc_matters.generator_program.project_name.strip ~ "\n"; metadata_ ~= doc_matters.generator_program.url_home.strip; return metadata_; } #+END_SRC * Munge #+NAME: munge #+HEADER: :noweb yes #+BEGIN_SRC d template munge() { import sisudoc.io_out; import sisudoc.io_out.rgx; import std.stdio; import std.conv; import std.conv : to; import std.typecons : Nullable; mixin spineRgxOut; static auto rgx = RgxO(); void puts(string _obj_is) { writeln(__FILE__, ":", __LINE__, ": ", _obj_is); } string newline = "\n"; string newlines = "\n\n"; template special_characters_and_font_face() { string code(string _txt){ _txt = _txt .replaceAll(rgx.nbsp_char, " "); return _txt; } string general(string _txt) { _txt = _txt .replaceAll(rgx.nbsp_char, " ") .replaceAll(rgx.br_line, "\n") .replaceAll(rgx.br_line_inline, "\n") .replaceAll(rgx.br_line_spaced, "\n\n") .replaceAll(rgx.inline_strike, "-{$1}-") .replaceAll(rgx.inline_insert, "+{$1}+") .replaceAll(rgx.inline_cite, "\"{$1}\"") .replaceAll(rgx.inline_emphasis, "!{$1}!") .replaceAll(rgx.inline_bold, "*{$1}*") .replaceAll(rgx.inline_italics, "/{$1}/") .replaceAll(rgx.inline_underscore, "_{$1}_") .replaceAll(rgx.inline_superscript, "^{$1}^") .replaceAll(rgx.inline_subscript, ",{$1},") .replaceAll(rgx.inline_mono, "#{$1}#"); return _txt; } string links_and_images(string _txt){ if (_txt.matchFirst(rgx.inline_link)) { foreach (m; _txt.matchAll(rgx.inline_link)) { if (m.captures[3] == "0") { _txt = _txt .replaceFirst(rgx.inline_link, (m.captures[1])); } else { _txt = _txt .replaceFirst(rgx.inline_link, (m.captures[1] ~ " ≫" ~ m.captures[3])); } } } if (_txt.matchFirst(rgx.inline_image)) { foreach (m; _txt.matchAll(rgx.inline_image)) { _txt = _txt .replaceFirst(rgx.inline_image, (m.captures[3])); } } return _txt; } } string generalMunge(O,M)(O obj, M doc_matters) { string _txt = obj.text; string _notes; string _ocn; string general_munge; if (obj.metainfo.ocn == 0 || doc_matters.opt.action.ocn_off) { _ocn = ""; } else { _ocn = "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newline; } if (_txt.matchFirst(rgx.inline_notes_al_gen)) { foreach (m; _txt.matchAll(rgx.inline_notes_al_regular_number_note)) { _notes ~= newlines ~ m["num"] ~ ". " ~ m["note"]; } } _txt = _txt.replaceAll(rgx.inline_notes_al_regular_number_note, "[$1]"); if (obj.metainfo.is_a == "code") { _txt = special_characters_and_font_face!().code(_txt); } else { _txt = special_characters_and_font_face!().general(_txt); } _txt = special_characters_and_font_face!().links_and_images(_txt); if (obj.metainfo.is_a == "heading") { general_munge = newline ~ _txt ~ _notes ~ newline ~ _ocn ~ newline; } else { general_munge = _txt ~ _notes ~ newline ~ _ocn ~ newline; } return general_munge; } string toc(O,M)(O obj, M doc_matters) { // puts(obj.metainfo.is_a); // return "toc\n"; // _txt = _special_characters_and_font_face(obj.text); string _txt = special_characters_and_font_face!().general(obj.text); string _spaces; switch (obj.attrib.indent_hang) { case 1: _spaces = ""; break; case 2: _spaces = ":"; break; case 3: _spaces = "∴"; break; case 4: _spaces = " "; break; case 5: _spaces = " "; break; case 6: _spaces = " "; break; case 7: _spaces = " "; break; case 8: _spaces = " "; break; default: break; } _txt = _txt.replaceAll(rgx.inline_link, (_spaces ~ "$1 ≫ $3")); return _txt ~ newline; } string heading(O,M)(O obj, M doc_matters) { // puts(obj.metainfo.is_a); // return obj.metainfo.is_a; string _general_munge = generalMunge(obj,doc_matters); return _general_munge; } string para(O,M)(O obj, M doc_matters) { // puts(obj.metainfo.is_a); // return obj.metainfo.is_a; string _general_munge = generalMunge(obj,doc_matters); return _general_munge; } string group(O,M)(O obj, M doc_matters) { /+ The "group" is different from the "block" mark in that "group" does not preserve whitespace, the "block" mark does. The text falling within the block is a single object. +/ // puts(obj.metainfo.is_a); // return obj.metainfo.is_a; string _general_munge = generalMunge(obj,doc_matters); return _general_munge; } string block(O,M)(O obj, M doc_matters) { /+ The "block" is different from the "group" mark in that the "block" mark (like the "poem" mark) preserves whitespace, the "group" mark does not. The text falling within the "block" is a single object, which is different from the "poem" mark where each identified verse is an object. +/ // puts(obj.metainfo.is_a); // return obj.metainfo.is_a; string _general_munge = generalMunge(obj,doc_matters); return _general_munge; } string poem(O,M)(O obj, M doc_matters) { // LATER /+ The "poem" mark like the "block" preserves whitespace. Text followed by two newlines are identified as verse and each verse is an object i.e. a poem may consist of multiple verse each of which is identified as an object, unlike a text "block" which is identified as a single object. +/ // puts(obj.metainfo.is_a); // return obj.metainfo.is_a; return obj.text ~ newlines; } string verse(O,M)(O obj, M doc_matters) { /+ See description of poem, the poem is demarkated but the verse is the object. +/ // puts(obj.metainfo.is_a); // return obj.metainfo.is_a; string _general_munge = generalMunge(obj,doc_matters); return _general_munge; } string code(O,M)(O obj, M doc_matters) { /+ "Code" blocks are a single text object, in which the original text is preserved. +/ // puts(obj.metainfo.is_a); // return obj.metainfo.is_a; string _general_munge = generalMunge(obj,doc_matters); return _general_munge; } string quote(O,M)(O obj, M doc_matters) { // LATER // puts(obj.metainfo.is_a); // return obj.metainfo.is_a; return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; } string table(O,M)(O obj, M doc_matters) { // puts(obj.metainfo.is_a); // return obj.metainfo.is_a; auto tablarize(O)( string _txt, const O obj, ) { string[] _table_rows = (_txt).split(rgx.table_delimiter_row); string[] _table_cols; string _table = ""; string _tablenote = ""; int[] _col_width; _col_width.length = obj.table.number_of_columns.to!ulong; foreach(row_idx, row; _table_rows) { _table_cols = row.split(rgx.table_delimiter_col); _table ~= ""; foreach(col_idx, cell; _table_cols) { if (!((_table_cols.length == 1) && (_table_rows.length <= row_idx+2))) { if (_col_width[col_idx] < (cell.length.to!int)) { _col_width[col_idx] = cell.length.to!int; } } } } foreach(row_idx, row; _table_rows) { _table_cols = row.split(rgx.table_delimiter_col); foreach(col_idx, cell; _table_cols) { if ((_table_cols.length == 1) && (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx) _tablenote ~= cell ~ newline; } else { if (obj.table.column_aligns[col_idx] == "l") { _table ~= format(q"┃%-*s%s┃", _col_width[col_idx], cell, (_table_cols.length > (col_idx + 1)) ? " ┊ " : "" ); } else { _table ~= format(q"┃%*s%s┃", _col_width[col_idx], cell, (_table_cols.length > (col_idx + 1)) ? " ┊ " : "" ); } _table = _table .replaceAll(regex("\\s*$"), ""); } } _table ~= newline; } Tuple!(string, string) t = tuple( _table, _tablenote, ); return t; } // string _txt = obj.text; // writeln(obj.table.column_widths); auto _t = tablarize(obj.text, obj); string _txt = _t[0]; string _tablenote = _t[1]; return _txt ~ _tablenote ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; } string endnote(O,M)(O obj, M doc_matters) { // puts(obj.metainfo.is_a); // return obj.metainfo.is_a; string _ocn; _ocn = "「" ~ obj.metainfo.ocn.to!string ~ "」"; string _txt = obj.text; _txt = _txt .replaceFirst(rgx.inline_link, ("$1")) .replaceFirst(rgx.inline_superscript, ("$1")); _txt = special_characters_and_font_face!().general(_txt); return _txt ~ newlines; } string bookindex(O,M)(O obj, M doc_matters) { // puts(obj.metainfo.is_a); // return obj.metainfo.is_a; string _txt = obj.text; _txt = _txt .replaceAll(rgx.inline_link, ("≫$1")) .replaceAll(regex("\\s*\\\\"), ""); _txt = special_characters_and_font_face!().general(_txt); return _txt ~ newlines; } string bibliography(O,M)(O obj, M doc_matters) { // puts(obj.metainfo.is_a); // return obj.metainfo.is_a; string _txt = obj.text; _txt = special_characters_and_font_face!().general(_txt); return _txt ~ newlines; // ALT: // string _general_munge = generalMunge(obj,doc_matters); // return _general_munge; } string glossary(O,M)(O obj, M doc_matters) { // puts(obj.metainfo.is_a); // return obj.metainfo.is_a; string _txt = obj.text; _txt = special_characters_and_font_face!().general(_txt); return _txt; } string blurb(O,M)(O obj, M doc_matters) { // puts(obj.metainfo.is_a); // return obj.metainfo.is_a; string _general_munge = generalMunge(obj,doc_matters); return _general_munge; } string comment(O,M)(O obj, M doc_matters) { // LATER /+ +/ // puts(obj.metainfo.is_a); // return obj.metainfo.is_a; return obj.text ~ newlines; } } #+END_SRC * org includes ** spine project VERSION #+NAME: spine_version #+HEADER: :noweb yes #+BEGIN_SRC emacs-lisp <<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:spine_project_version()>> #+END_SRC ** year #+NAME: year #+HEADER: :noweb yes #+BEGIN_SRC emacs-lisp <<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:year()>> #+END_SRC ** document header including copyright & license #+NAME: doc_header_including_copyright_and_license #+HEADER: :noweb yes #+BEGIN_SRC emacs-lisp <<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:spine_doc_header_including_copyright_and_license()>> #+END_SRC * __END__ * TODO WORKON #+BEGIN_SRC org TODO PRIORITY LATER - object types - comment - quote - images ? remove?? (currently with inline links) - anchor tags (for internal links)? WISH - underline headings? - endnote info on calling object ≫\d+ - break para text at set width? - text wrap at text-line-width specified option DONE - line breaks - font face: bold, italics etc. - object types - toc - inline_link /[┥┝┤├] - indents obj.metainfo.heading_lev_markup < 4 - group - block - code - verse - table - endnote section - CAVEAT: would like to, but do not point back to object number of origin REDO gathering of endnotes, (get/tie calling ocn) - bookindex - bibliography - glossary - blurb - inline_link /[┥┝┤├] #+END_SRC