From 860b894a10f3526e6bd73d53850764c0ad95ab99 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 29 Jun 2024 13:54:28 -0400 Subject: document digests and reduction in use of tuples --- org/in_source_files.org | 56 ++++++++++++++++++++++++++++++++++--------------- org/ocda.org | 36 +++++++++++++++++++++++-------- org/ocda_functions.org | 33 ++++++++++++++++++----------- org/ocda_obj_setter.org | 1 + org/out_latex.org | 1 - org/out_odt.org | 1 - org/spine.org | 14 ++++++------- 7 files changed, 94 insertions(+), 48 deletions(-) (limited to 'org') diff --git a/org/in_source_files.org b/org/in_source_files.org index 90143f4..6275ed4 100644 --- a/org/in_source_files.org +++ b/org/in_source_files.org @@ -298,6 +298,7 @@ module sisudoc.io_in.read_source_files; @safe: template spineRawMarkupContent() { import + std.digest.sha, std.file, std.path; import @@ -309,6 +310,14 @@ template spineRawMarkupContent() { static auto rgx = RgxI(); mixin spineRgxFiles; static auto rgx_files = RgxFiles(); + struct ST_doc_parts { + char[] header_raw; + char[][] sourcefile_body_content; + string[] insert_file_list; + string[] images_list; + ubyte[32] header_raw_digest; + ubyte[32] src_txt_digest; + } string[] _images=[]; string[] _extract_images(S)(S content_block) { string[] images_; @@ -328,7 +337,9 @@ template spineRawMarkupContent() { char[], "header", char[][], "src_txt", string[], "insert_files", - string[], "images" + string[], "images", + ubyte[32], "header_digest", + ubyte[32], "src_txt_digest" ); auto spineRawMarkupContent(O,Fn)(O _opt_action, Fn fn_src) { auto _0_header_1_body_content_2_insert_filelist_tuple @@ -342,43 +353,50 @@ template spineRawMarkupContent() { = raw.markupSourceReadIn(fn_src); return source_txt_str; } - final auto sourceContentSplitIntoHeaderAndBody(O)( + final ST_doc_parts sourceContentSplitIntoHeaderAndBody(O)( O _opt_action, in string source_txt_str, in string fn_src="" ) { auto raw = MarkupRawUnit(); - string[] insert_file_list; - string[] images_list; + string[] insert_file_list_get; + string[] images_list_get; HeaderContentInsertsImages t = raw.markupSourceHeaderContentRawLineTupleArray(source_txt_str); char[] header_raw = t.header; + ubyte[32] header_raw_digest = t.header.sha256Of; char[][] sourcefile_body_content = t.src_txt; if (fn_src.match(rgx_files.src_fn_master)) { // filename with path needed if master file (.ssm) not otherwise auto ins = Inserts(); ContentsInsertsImages tu = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src); sourcefile_body_content = tu.contents; - insert_file_list = tu.insert_files.dup; - images_list = tu.images.dup; + insert_file_list_get = tu.insert_files.dup; + images_list_get = tu.images.dup; } else if (_opt_action.source || _opt_action.pod) { auto ins = Inserts(); ContentsInsertsImages tu = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src); - images_list = tu.images.dup; - } + images_list_get = tu.images.dup; + } // image_list, if path could take sha256 digests already here? + ubyte[32] src_txt_digest = sourcefile_body_content.sha256Of; string header_type = ""; - t = tuple( - header_raw, - sourcefile_body_content, - insert_file_list, - images_list - ); - return t; + ST_doc_parts ret; + { + ret.header_raw = t.header; + ret.sourcefile_body_content = sourcefile_body_content; + ret.insert_file_list = insert_file_list_get; + ret.images_list = images_list_get; + ret.header_raw_digest = t.header.sha256Of; + ret.src_txt_digest = sourcefile_body_content.sha256Of; + } + return ret; } } struct MarkupRawUnit { - import std.file; + import + std.digest.sha, + std.file; <> <> <> @@ -517,11 +535,15 @@ HeaderContentInsertsImages markupSourceHeaderContentRawLineTupleArray(in string char[] header = hc[0]; char[] source_txt = hc[1]; char[][] source_line_arr = markupSourceLineArray(source_txt); + ubyte[32] header_digest; + ubyte[32] src_txt_digest; HeaderContentInsertsImages t = tuple( header, source_line_arr, file_insert_list, - images_list + images_list, + header_digest, + src_txt_digest ); return t; } diff --git a/org/ocda.org b/org/ocda.org index 696c4c9..9811545 100644 --- a/org/ocda.org +++ b/org/ocda.org @@ -54,6 +54,7 @@ template docAbstraction() { import std.algorithm, std.container, + std.digest.sha, std.file, std.json, std.path; @@ -1001,8 +1002,7 @@ foreach (ref obj; the_document_head_section) { } obj = _links(obj); } -if (the_document_toc_section.length > 1) { - // scroll +if (the_document_toc_section.length > 1) { // writeln("toc"); // scroll dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; foreach (ref obj; the_document_toc_section) { @@ -1028,7 +1028,7 @@ if (the_document_toc_section.length > 1) { // images string[] _images; // multiple 1~ levels, loop through document body -if (the_document_body_section.length > 1) { +if (the_document_body_section.length > 1) { // writeln("body"); foreach (ref obj; the_document_body_section) { if (!(obj.metainfo.identifier.empty)) { if (!(((obj.metainfo.identifier) in tag_assoc) @@ -1064,12 +1064,26 @@ if (the_document_body_section.length > 1) { _images ~= extract_images(obj.text); obj = _image_dimensions(obj, manifested); } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } -auto image_list = (_images.sort()).uniq; +auto image_list = (_images.sort()).uniq; // also get digest on each image here? // workon +if (_images.length > 0) { + foreach (img; image_list) { + try { // also get sha digest on image file + // read_image + auto data = (cast(byte[]) (manifested.src.image_dir_path ~ "/" ~ img).read); + // calculate, digest, hash + writefln("%s\n%-(%02x%)::%s ⋅ %s", img, data.sha256Of, data.length, img); + writefln("%-(%02x%) ⋅ %s ⋅ %s", data.sha256Of, img, data.length); + } catch (Exception ex) { + writeln("WARNING, image not found: ", img, "\n ", manifested.src.image_dir_path ~ "/" ~ img); + } + } +} // endnotes optional only one 1~ level -if (the_document_endnotes_section.length > 1) { +if (the_document_endnotes_section.length > 1) { // writeln("endnotes"); dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; dom_structure_markedup_tags_status = dom_structure_markedup_tags_status_buffer.dup; @@ -1104,7 +1118,7 @@ if (the_document_endnotes_section.length > 1) { } } // glossary optional only one 1~ level -if (the_document_glossary_section.length > 1) { +if (the_document_glossary_section.length > 1) { // writeln("glossary"); foreach (ref obj; the_document_glossary_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } @@ -1135,11 +1149,12 @@ if (the_document_glossary_section.length > 1) { obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } // bibliography optional only one 1~ level -if (the_document_bibliography_section.length > 1) { +if (the_document_bibliography_section.length > 1) { // writeln("bibliography"); foreach (ref obj; the_document_bibliography_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } @@ -1170,6 +1185,7 @@ if (the_document_bibliography_section.length > 1) { obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } @@ -1177,7 +1193,7 @@ if (the_document_bibliography_section.length > 1) { int ocn_ = obj_cite_digits.object_number; int ocn_bkidx_ = 0; int ocn_bidx_; -if (the_document_bookindex_section.length > 1) { // scroll +if (the_document_bookindex_section.length > 1) { // writeln("book index"); // scroll dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; foreach (ref obj; the_document_bookindex_section) { @@ -1214,13 +1230,14 @@ if (the_document_bookindex_section.length > 1) { obj.metainfo.o_n_book_index = obj_cite_digits.bkidx; obj.metainfo.object_number_type = OCNtype.bkidx; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } // TODO assert failure, reinstate // assert(obj_cite_digit_bkidx == ocn_bidx_ obj_cite_digit_bkidx ~ " == ocn_" ~ ocn_ ~ "?"); } // blurb optional only one 1~ level -if (the_document_blurb_section.length > 1) { +if (the_document_blurb_section.length > 1) { // writeln("blurb"); foreach (ref obj; the_document_blurb_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } @@ -1251,6 +1268,7 @@ if (the_document_blurb_section.length > 1) { obj.metainfo.object_number_off = obj_cite_digits.off; obj.metainfo.object_number_type = OCNtype.non; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } diff --git a/org/ocda_functions.org b/org/ocda_functions.org index 6c300f1..ecc8b2f 100644 --- a/org/ocda_functions.org +++ b/org/ocda_functions.org @@ -47,18 +47,6 @@ template docSectKeysSeq() { #+HEADER: :noweb yes #+BEGIN_SRC d // ↓ abstraction imports -import - std.algorithm, - std.container, - std.file, - std.json, - std.path; -import - sisudoc.meta, - sisudoc.meta.defaults, - sisudoc.meta.rgx, - sisudoc.meta.metadoc_object_setter, - sisudoc.meta.rgx; // ↓ abstraction mixins mixin ObjectSetter; mixin InternalMarkup; @@ -126,6 +114,7 @@ auto node_construct = NodeStructureMetadata(); <> <> <> +<> <> <> <> @@ -3203,6 +3192,26 @@ pure ObjGenericComposite obj_dom_set_collapsed_tags()( // ↑ - object tags #+END_SRC +***** ϝ object digest + +#+NAME: ocdaFunc_obj_digest +#+HEADER: :noweb yes +#+BEGIN_SRC d +// ↓ - object digest +pure ubyte[32] obj_digest()( + ObjGenericComposite obj, +) { + obj.metainfo.sha256 = obj.text.sha256Of; + // if (obj.metainfo.is_a == "heading") { + // writeln(obj.metainfo.sha256.toHexString, " ", obj.metainfo.ocn, " ", obj.metainfo.is_a, " ", obj.metainfo.heading_lev_markup); + // } else { + // writeln(obj.metainfo.sha256.toHexString, " ", obj.metainfo.ocn, " ", obj.metainfo.is_a); + // } + return obj.metainfo.sha256; +} +// ↑ - object digest +#+END_SRC + **** ϝ section ***** ϝ section toc - table of contents diff --git a/org/ocda_obj_setter.org b/org/ocda_obj_setter.org index 5002ee2..576c693 100644 --- a/org/ocda_obj_setter.org +++ b/org/ocda_obj_setter.org @@ -121,6 +121,7 @@ struct DocObj_MetaInfo_ { int parent_lev_markup = 0; int parent_ocn = 0; int last_descendant_ocn = 0; + ubyte[32] sha256; } #+END_SRC diff --git a/org/out_latex.org b/org/out_latex.org index bb9d6cb..9685554 100644 --- a/org/out_latex.org +++ b/org/out_latex.org @@ -664,7 +664,6 @@ fonts to try: #+NAME: ImportsAndMixins_imports #+BEGIN_SRC d import - std.digest.sha, std.file, std.outbuffer, std.uri, diff --git a/org/out_odt.org b/org/out_odt.org index 048fd11..4344473 100644 --- a/org/out_odt.org +++ b/org/out_odt.org @@ -1046,7 +1046,6 @@ import sisudoc.io_out.rgx, sisudoc.io_out.rgx_xhtml; import - std.digest.sha, std.file, std.outbuffer, std.uri, diff --git a/org/spine.org b/org/spine.org index ac9f552..56cb3f7 100644 --- a/org/spine.org +++ b/org/spine.org @@ -1463,7 +1463,6 @@ module sisudoc.meta.metadoc; template spineAbstraction() { <> <> - enum headBody { header, body_content, insert_file_list, image_list } enum makeMeta { make, meta } enum docAbst { doc_abstract_obj, doc_has } @system auto spineAbstraction(E,P,O,Cfg,M,S)( @@ -1517,8 +1516,8 @@ template spineAbstraction() { if build source pod requested all information needed to build it available at this point - manifest related information _manifest - - insert file list _header_body_insertfilelist_imagelist[headBody.insert_file_list] - - image list _header_body_insertfilelist_imagelist[headBody.image_list] + - insert file list _header_body_insertfilelist_imagelist.insert_file_list] + - image list _header_body_insertfilelist_imagelist.image_list] #+NAME: spine_each_file_do_read_and_split_dr_markup_file_content_into_header_and_body #+BEGIN_SRC d @@ -1531,7 +1530,6 @@ if ((_opt_action.debug_do) } auto _header_body_insertfilelist_imagelist = spineRawMarkupContent!()(_opt_action, _manifest.src.path_and_fn); -static assert(_header_body_insertfilelist_imagelist.length==4); if ((_opt_action.debug_do) || (_opt_action.debug_do_stages) ) { @@ -1540,7 +1538,7 @@ if ((_opt_action.debug_do) debug(header_and_body) { writeln(header); writeln(_header_body_insertfilelist_imagelist.length); - writeln(_header_body_insertfilelist_imagelist.length[headBody.body_content][0]); + // writeln(_header_body_insertfilelist_imagelist.length.body_content[0]); } #+END_SRC @@ -1568,7 +1566,7 @@ if ((_opt_action.debug_do) import sisudoc.meta.conf_make_meta_yaml; _make_and_meta_struct = docHeaderMakeAndMetaTupYamlExtractAndConvertToStruct!()( - _header_body_insertfilelist_imagelist[headBody.header], + _header_body_insertfilelist_imagelist.header_raw, _make_and_meta_struct, _manifest, _opt_action, @@ -1603,7 +1601,7 @@ if ((_opt_action.debug_do) writeln("step3 commence → (document abstraction (da); da keys; segnames; doc_matters) [", _manifest.src.filename, "]"); } auto da = docAbstraction!()( - _header_body_insertfilelist_imagelist[headBody.body_content], + _header_body_insertfilelist_imagelist.sourcefile_body_content, _make_and_meta_struct, _opt_action, _manifest, @@ -1798,7 +1796,7 @@ auto output_path() { auto srcs() { struct SRC_ { auto file_insert_list() { - return _header_body_insertfilelist_imagelist[headBody.insert_file_list]; + return _header_body_insertfilelist_imagelist.insert_file_list; } auto image_list() { return _doc_has_struct.imagelist; -- cgit v1.2.3