diff options
| author | Ralph Amissah <ralph.amissah@gmail.com> | 2019-01-13 14:19:04 -0500 | 
|---|---|---|
| committer | Ralph Amissah <ralph.amissah@gmail.com> | 2019-05-17 16:59:38 -0400 | 
| commit | 87d62f48d6c8a2ccf9807f56c23a6ca71d1102e6 (patch) | |
| tree | e367aff1a05f89104c7c81ac8a6c618a4f38b021 /org | |
| parent | 0.4.2 xmls output, internal (diff) | |
0.4.3 stow (most) uri/links in array, separate from object text
- munge independently
  - no need to consider special munging of uri with text
  - uri can easily be munged independently (encoded as need be)
Diffstat (limited to 'org')
| -rw-r--r-- | org/default_regex.org | 4 | ||||
| -rw-r--r-- | org/doc_reform.org | 112 | ||||
| -rw-r--r-- | org/meta_abstraction.org | 49 | ||||
| -rw-r--r-- | org/output_sqlite.org | 6 | ||||
| -rw-r--r-- | org/output_xmls.org | 8 | 
5 files changed, 121 insertions, 58 deletions
diff --git a/org/default_regex.org b/org/default_regex.org index c237239..5705fb2 100644 --- a/org/default_regex.org +++ b/org/default_regex.org @@ -505,9 +505,11 @@ static inline_image                                   = ctRegex!(`(?P<pre>β₯)β  static inline_image_without_dimensions                = ctRegex!(`(?P<pre>β₯)βΌ(?P<imginf>(?P<img>\S+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?ββ€.+?β)`, "mg");  static inline_link_anchor                             = ctRegex!(`β(?P<anchor>\S+?)β`, "mg");  static inline_link                                    = ctRegex!(`β₯(?P<text>.+?)ββ€(?P<link>\S+?)β`, "mg"); +static inline_link_number_only                        = ctRegex!(`(β₯.+?β)β€(?P<num>[0-9]+)β`, "mg"); +static inline_link_stow_uri                           = ctRegex!(`β₯(?P<text>.+?)ββ€(?P<link>[^ 0-9#β₯ββ€β][^ 0-9β₯ββ€β]+)β`, "mg"); // will not stow (stowed links) or object number internal links  static inline_link_hash                               = ctRegex!(`β₯(?P<text>.+?)ββ€(?P<link>#(?P<segname>\S+?))β`, "mg");  static inline_link_clean                              = ctRegex!(`β€(?:.+?)β|[β₯β]`, "mg"); -static inline_a_url                                   = ctRegex!(`(β€)(\S+?)(β)`, "mg"); +static inline_a_url                                   = ctRegex!(`(β€)([^\sβ₯ββ€β]+)(β)`, "mg");  static url                                            = ctRegex!(`https?://`, "mg");  static inline_link_subtoc                             = ctRegex!(`^(?P<level>[5-7])~ β₯(?P<text>.+?)ββ€(?P<link>.+?)β`, "mg");  static fn_suffix                                      = ctRegex!(`\.fnSuffix`, "mg"); diff --git a/org/doc_reform.org b/org/doc_reform.org index 55bd41d..fe66011 100644 --- a/org/doc_reform.org +++ b/org/doc_reform.org @@ -26,7 +26,7 @@ struct Version {    int minor;    int patch;  } -enum _ver = Version(0, 4, 2); +enum _ver = Version(0, 4, 3);  #+END_SRC  ** compilation restrictions (supported compilers) @@ -1369,61 +1369,61 @@ dev notes  *** document objects (table)  - check, keep up to date -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| doc object   | doc object       | attributes              | inline         | appended            | structure            | delimiters                     | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| is_of        | is_a             |                         |                |                     |                      |                                | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| para/heading | heading          | - level                 | - font face    | - object number off | - level              | - delimiter == two newlines    | -|              |                  | - object number         | - endnotes     | - book index meta   | (document structure) |                                | -|              |                  | - object number off     |                |                     |                      |                                | -|              |                  | - dummy (toc & seg)     |                |                     |                      |                                | -|              |                  | - tags (internal links) |                |                     |                      |                                | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -|              | toc              | - level                 | - font face    |                     |                      | - auto generated from headings | -|              |                  |                         | - links (auto) |                     |                      |                                | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| para         | para             | - bullet                | - font face    | - object number off |                      | - delimiter == two newlines    | -|              |                  | - indent                | - links/urls * | - book index meta   |                      |                                | -|              |                  | - object number         | - images*      |                     |                      |                                | -|              |                  | - object number off     | - endnotes     |                     |                      |                                | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -|              | toc              | - indent                | - font face    |                     |                      | - delimiter == two newlines    | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -|              | endnote          |                         | - font face    |                     |                      | (generated from                | -|              |                  |                         |                |                     |                      | inline markup tags)            | -|              |                  |                         |                |                     |                      | - delimiter == two newlines    | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -|              | bookindex        | - auto indent           | - font face    |                     |                      | - delimiter == two newlines    | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -|              | blurb            | - bullet                | - font face    | - object number off |                      | - delimiter == two newlines    | -|              |                  | - indent                | - links/urls * | - book index meta   |                      |                                | -|              |                  | - object number         | - images*      |                     |                      |                                | -|              |                  | - object number off     | - endnotes     |                     |                      |                                | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| group        | group            | - object number         | - font face    | - book index meta   | - para break         | - delimiter tags (group)       | -|              |                  | - object number off     | - links/urls * |                     |                      |                                | -|              |                  |                         | - images*      |                     |                      |                                | -|              |                  |                         | - endnotes     |                     |                      |                                | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -|              | block            | - object number         | - font face    | - book index meta   | - new line           | - delimiter tags (block)       | -|              |                  | - object number off     | - links/urls * |                     |                      |                                | -|              |                  |                         | - images*      |                     |                      |                                | -|              |                  |                         | - endnotes     |                     |                      |                                | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -|              | quote            | - object number         | - font face    | - book index meta   |                      | - delimiter tags (quote)       | -|              |                  |                         | - endnotes     |                     |                      |                                | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -|              | poem (see verse) |                         |                | - book index meta   |                      | - delimiter tags (poem)        | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -|              | verse (of poem)  | - object number         | - font face    |                     | - new line           | - (see poem delimiter)         | -|              |                  |                         | - endnotes     |                     | - preceeding spaces  |                                | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| group/code   | code             | - syntax                |                |                     | - new line           | - delimiter tags (code)        | -|              |                  | - numbered              |                |                     | - preceeding spaces  |                                | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| group/table  | table            | - object number         |                |                     |                      | - delimiter tags (table)       | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| doc object       | doc object | attributes              | inline         | appended            | structure            | delimiters                     | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| is_a             | is_of_type |                         |                |                     |                      |                                | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| heading          | para       | - level                 | - font face    | - object number off | - level              | - two newlines                 | +|                  |            | - object number         | - endnotes     | - book index meta   | (document structure) |                                | +|                  |            | - object number off     |                |                     |                      |                                | +|                  |            | - dummy (toc & seg)     |                |                     |                      |                                | +|                  |            | - tags (internal links) |                |                     |                      |                                | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| toc              | para       | - level                 | - font face    |                     |                      | - auto generated from headings | +|                  |            | (auto-indent)           | - links (auto) |                     |                      |                                | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| para             | para       | - bullet                | - font face    | - object number off |                      | - two newlines                 | +|                  |            | - indent                | - links/urls * | - book index meta   |                      |                                | +|                  |            | - object number         | - images*      |                     |                      |                                | +|                  |            | - object number off     | - endnotes     |                     |                      |                                | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| bookindex        | para       | - auto indent           | - font face    |                     |                      | - two newlines                 | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| blurb            | para       | - bullet                | - font face    | - object number off |                      | - two newlines                 | +|                  |            | - indent                | - links/urls * | - book index meta   |                      |                                | +|                  |            | - object number         | - images*      |                     |                      |                                | +|                  |            | - object number off     | - endnotes     |                     |                      |                                | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| group            | block      | - object number         | - font face    | - book index meta   | - para break         | - block tags                   | +|                  |            | - object number off     | - links/urls * |                     |                      | (group)                        | +|                  |            |                         | - images*      |                     |                      |                                | +|                  |            |                         | - endnotes     |                     |                      |                                | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| block            | block      | - object number         | - font face    | - book index meta   | - new line           | - block tags                   | +|                  |            | - object number off     | - links/urls * |                     |                      | (block)                        | +|                  |            |                         | - images*      |                     |                      |                                | +|                  |            |                         | - endnotes     |                     |                      |                                | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| quote            | block      | - object number         | - font face    | - book index meta   |                      | - block tags                   | +|                  |            |                         | - endnotes     |                     |                      | (quote)                        | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| poem (see verse) | block      |                         |                | - book index meta   |                      | - block tags                   | +|                  |            |                         |                |                     |                      | (poem)                         | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| verse (of poem)  |            | - object number         | - font face    |                     | - new line           | - (see poem delimiter)         | +|                  |            |                         | - endnotes     |                     | - preceeding spaces  |                                | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| code             | block      | - syntax                |                |                     | - new line           | - block tags                   | +|                  |            | - numbered              |                |                     | - preceeding spaces  | (code)                         | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| table            | block      | - object number         |                |                     |                      | - block tags (table)           | +|                  |            |                         |                |                     |                      | (table)                        | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| endnote          |            |                         | - font face    |                     |                      | (generated from                | +|                  |            |                         |                |                     |                      | inline markup tags)            | +|                  |            |                         |                |                     |                      | - two newlines                 | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|  - consider special treatment for links/urls (& for images?) take them out of    document munge (for various outputs), by storing in own array (within each diff --git a/org/meta_abstraction.org b/org/meta_abstraction.org index 4c98f66..5249df1 100644 --- a/org/meta_abstraction.org +++ b/org/meta_abstraction.org @@ -2294,6 +2294,37 @@ auto _image_dimensions(M,O)(M manifest_matter, O obj) {  }  #+END_SRC +***** links: think about!!! +- move actual links to an array in object struct so they cannot be regex munged within text block +  - you may wish to exclude certain types of internal document link +    - object number links +      - toc +      - book index +    - footnotes and footnote numbers + +#+name: abs_post +#+BEGIN_SRC d +auto _links(O)(O obj) { +  if (auto m = obj.text.match(rgx.inline_link_stow_uri)) { +    debug(links) { +      writeln("number of link matches to stow: ", (obj.text.match(rgx.inline_link_stow_uri)).count); +      writeln("links to stow: ", (obj.text.match(rgx.inline_link_stow_uri))); +    } +    int _n_matches = (obj.text.match(rgx.inline_link_stow_uri)).count.to!int; +    for(int i=0; i < _n_matches; ++i) { +      if (obj.text.match(rgx.inline_link_stow_uri)) { +        obj.stow.link ~= obj.text.matchFirst(rgx.inline_link_stow_uri)[2]; +        obj.text = obj.text.replaceFirst( +          rgx.inline_link_stow_uri, +          format(q"ΒΆβ₯%sββ€%sβΒΆ", "$1", i) +        ); +      } +    } +  } +  return obj; +} +#+END_SRC +  ***** β» Loop section: head  #+name: abs_post @@ -2327,6 +2358,7 @@ foreach (ref obj; the_document_head_section) {      }      obj = obj_heading_ancestors(obj, lv_ancestors_txt);    } +  obj = _links(obj);  }  #+END_SRC @@ -2360,6 +2392,7 @@ if (the_table_of_contents_section.length > 1) {        }        obj = obj_heading_ancestors(obj, lv_ancestors_txt);      } +    obj = _links(obj);    }  }  #+END_SRC @@ -2412,6 +2445,7 @@ if (the_document_body_section.length > 1) {         _images ~= extract_images(obj.text);         obj = _image_dimensions(manifest_matter, obj);      } +    obj = _links(obj);    }  }  auto images=uniq(_images.sort()); @@ -2460,6 +2494,7 @@ if (the_endnotes_section.length > 1) {        }        obj = obj_heading_ancestors(obj, lv_ancestors_txt);      } +    obj = _links(obj);    }  }  #+END_SRC @@ -2507,6 +2542,7 @@ if (the_glossary_section.length > 1) {        obj.metainfo.ocn        = obj_cite_digits.object_number;        obj.metainfo.identifier = obj_cite_digits.identifier;      } +    obj = _links(obj);    }  }  #+END_SRC @@ -2554,6 +2590,7 @@ if (the_bibliography_section.length > 1) {        obj.metainfo.ocn        = obj_cite_digits.object_number;        obj.metainfo.identifier = obj_cite_digits.identifier;      } +    obj = _links(obj);    }  }  #+END_SRC @@ -2610,6 +2647,7 @@ if (the_bookindex_section.length > 1) {        obj.metainfo.o_n_book_index           = obj_cite_digits.bkidx;        obj.metainfo.object_number_type       = OCNtype.bkidx;      } +    obj = _links(obj);    }    /+ TODO assert failure, reinstate    assert(obj_cite_digit_bkidx == ocn_bidx_ @@ -2659,6 +2697,7 @@ if (the_blurb_section.length > 1) {        obj.metainfo.object_number_off  = obj_cite_digits.off;        obj.metainfo.object_number_type = OCNtype.non;      } +    obj = _links(obj);    }  }  #+END_SRC @@ -7861,6 +7900,15 @@ struct DocObj_CodeBlock_ {  }  #+END_SRC +**** stow (things to be protected from regular text transformations, so far links) + +#+name: meta_structs_init +#+BEGIN_SRC d +struct DocObj_Stow_ { +  string[]               link                               = []; +} +#+END_SRC +  **** pointers  #+name: meta_structs_init @@ -7903,6 +7951,7 @@ struct ObjGenericComposite {    DocObj_Has_            has;    DocObj_Table_          table;    DocObj_CodeBlock_      code_block; +  DocObj_Stow_           stow;    DocObj_Pointer_        ptr;  }  #+END_SRC diff --git a/org/output_sqlite.org b/org/output_sqlite.org index b1c9cf4..bdb2ca7 100644 --- a/org/output_sqlite.org +++ b/org/output_sqlite.org @@ -322,6 +322,7 @@ template SQLiteDbDrop() {  import doc_reform.output;  import    std.file, +  std.uri,    std.conv : to;  #+END_SRC @@ -534,6 +535,11 @@ auto inline_links(M,O)(    string         _xml_type = "seg",  ) {    if (obj.has.inline_links) { +    if  (obj.metainfo.is_a != "code") { +      _txt = replaceAll!(hit => +          hit[1] ~ "β€" ~ to!string((obj.stow.link[hit[2].to!ulong])).encode ~ "β" +        )(_txt, rgx.inline_link_number_only); +    }      if ((_txt.match(rgx.mark_internal_site_lnk))      && (_xml_type == "scroll")) { // conditions reversed to avoid: gdc compiled program run segfault        _txt = _txt.replaceAll( diff --git a/org/output_xmls.org b/org/output_xmls.org index b9302bd..45a1c3c 100644 --- a/org/output_xmls.org +++ b/org/output_xmls.org @@ -42,6 +42,7 @@ import    std.digest.sha,    std.file,    std.outbuffer, +  std.uri,    std.zip,    std.conv : to;  import @@ -193,7 +194,7 @@ auto header_metadata(M)(      doc_matters.conf_make_meta.meta.date_modified,      doc_matters.src.language,      doc_matters.conf_make_meta.meta.rights_copyright, -    doc_matters.generator_program.name_and_version, +    doc_matters.opt.action.debug_do ? "" : doc_matters.generator_program.name_and_version,      doc_matters.generator_program.url_home,    );    return o; @@ -466,6 +467,11 @@ auto inline_links(M,O)(  ) {    string seg_lvs;    if (obj.has.inline_links) { +    if  (obj.metainfo.is_a != "code") { +      _txt = replaceAll!(hit => +          hit[1] ~ "β€" ~ to!string((obj.stow.link[hit[2].to!ulong])).encode ~ "β" +        )(_txt, rgx.inline_link_number_only); +    }      if ((_txt.match(rgx.mark_internal_site_lnk))      && (_xml_type == "scroll")) { // conditions reversed to avoid: gdc compiled program run segfault        _txt = _txt.replaceAll(  | 
