From 90873fabd7451e1dd8c4b39303906e19bdc481f7 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 10 Apr 2024 22:24:34 -0400 Subject: 0.16.0 sisudoc (src/sisudoc sisudoc spine) - src/sisudoc (replaces src/doc_reform) - sisudoc spine (used more) --- src/sisudoc/io_in/read_source_files.d | 396 ++++++++++++++++++++++++++++++++++ 1 file changed, 396 insertions(+) create mode 100644 src/sisudoc/io_in/read_source_files.d (limited to 'src/sisudoc/io_in/read_source_files.d') diff --git a/src/sisudoc/io_in/read_source_files.d b/src/sisudoc/io_in/read_source_files.d new file mode 100644 index 0000000..4ba0b4f --- /dev/null +++ b/src/sisudoc/io_in/read_source_files.d @@ -0,0 +1,396 @@ +/+ +- Name: SisuDoc Spine, Doc Reform [a part of] + - Description: documents, structuring, processing, publishing, search + - static content generator + + - Author: Ralph Amissah + [ralph.amissah@gmail.com] + + - Copyright: (C) 2015 - 2024 Ralph Amissah, All Rights Reserved. + + - License: AGPL 3 or later: + + Spine (SiSU), a framework for document structuring, publishing and + search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU AFERO General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see [https://www.gnu.org/licenses/]. + + If you have Internet connection, the latest version of the AGPL should be + available at these locations: + [https://www.fsf.org/licensing/licenses/agpl.html] + [https://www.gnu.org/licenses/agpl.html] + + - Spine (by Doc Reform, related to SiSU) uses standard: + - docReform markup syntax + - standard SiSU markup syntax with modified headers and minor modifications + - docReform object numbering + - standard SiSU object citation numbering & system + + - Homepages: + [https://www.sisudoc.org] + [https://www.doc-reform.org] + + - Git + [https://git.sisudoc.org/] + ++/ +/++ + module source_read_source_files;
+ - open markup files
+ - if master file scan for addional files to import/insert ++/ +module sisudoc.io_in.read_source_files; +@safe: +template spineRawMarkupContent() { + import + std.file, + std.path; + import + sisudoc.meta, + sisudoc.io_in.paths_source, + sisudoc.meta.rgx_files, + sisudoc.meta.rgx; + mixin spineRgxIn; + static auto rgx = RgxI(); + mixin spineRgxFiles; + static auto rgx_files = RgxFiles(); + string[] _images=[]; + string[] _extract_images(S)(S content_block) { + string[] images_; + string _content_block = content_block.to!string; + if (auto m = _content_block.matchAll(rgx.image)) { + images_ ~= m.captures[1].to!string; + } + return images_; + } + auto rawsrc = RawMarkupContent(); + alias ContentsInsertsImages = Tuple!( + char[][], "contents", + string[], "insert_files", + string[], "images" + ); + alias HeaderContentInsertsImages = Tuple!( + char[], "header", + char[][], "src_txt", + string[], "insert_files", + string[], "images" + ); + auto spineRawMarkupContent(O,Fn)(O _opt_action, Fn fn_src) { + auto _0_header_1_body_content_2_insert_filelist_tuple + = rawsrc.sourceContentSplitIntoHeaderAndBody(_opt_action, rawsrc.sourceContent(fn_src), fn_src); + return _0_header_1_body_content_2_insert_filelist_tuple; + } + struct RawMarkupContent { + final sourceContent(in string fn_src) { + auto raw = MarkupRawUnit(); + string source_txt_str + = raw.markupSourceReadIn(fn_src); + return source_txt_str; + } + final auto sourceContentSplitIntoHeaderAndBody(O)( + O _opt_action, + in string source_txt_str, + in string fn_src="" + ) { + auto raw = MarkupRawUnit(); + string[] insert_file_list; + string[] images_list; + HeaderContentInsertsImages t + = raw.markupSourceHeaderContentRawLineTupleArray(source_txt_str); + char[] header_raw = t.header; + char[][] sourcefile_body_content = t.src_txt; + if (fn_src.match(rgx_files.src_fn_master)) { // filename with path needed if master file (.ssm) not otherwise + auto ins = Inserts(); + ContentsInsertsImages tu + = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src); + sourcefile_body_content = tu.contents; + insert_file_list = tu.insert_files.dup; + images_list = tu.images.dup; + } else if (_opt_action.source || _opt_action.pod) { + auto ins = Inserts(); + ContentsInsertsImages tu + = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src); + images_list = tu.images.dup; + } + string header_type = ""; + t = tuple( + header_raw, + sourcefile_body_content, + insert_file_list, + images_list + ); + return t; + } + } + struct MarkupRawUnit { + import std.file; + final private string readInMarkupSource(in char[] fn_src) { + enforce( + exists(fn_src) != 0, + "file not found: «" ~ + fn_src ~ "»" + ); + string source_txt_str; + try { + if (exists(fn_src)) { + if (fn_src.getLinkAttributes.attrIsFile) { + source_txt_str = fn_src.readText; + } else { + } + } + } catch (ErrnoException ex) { + } catch (UTFException ex) { + // Handle validation errors + } catch (FileException ex) { + // Handle errors + } + std.utf.validate(source_txt_str); + return source_txt_str; + } + @trusted final private char[][] header0Content1(in string src_text) { // cast(char[]) + /+ split string on _first_ match of "^:?A~\s" into [header, content] array/tuple +/ + char[][] header_and_content; + auto m = (cast(char[]) src_text).matchFirst(rgx.heading_a); + header_and_content ~= m.pre; + header_and_content ~= m.hit ~ m.post; + assert(header_and_content.length == 2, + "document markup is broken, header body split == " + ~ header_and_content.length.to!string + ~ "; (header / body array split should == 2 (split is on level A~))" + ); + return header_and_content; + } + @trusted final private char[][] markupSourceLineArray(in char[] src_text) { // cast(char[]) + char[][] source_line_arr + = (cast(char[]) src_text).split(rgx.newline_eol_strip_preceding); + return source_line_arr; + } + string markupSourceReadIn(in string fn_src) { + static auto rgx_files = RgxFiles(); + enforce( + fn_src.match(rgx_files.src_pth_sst_or_ssm), + "not a dr markup filename: «" ~ + fn_src ~ "»" + ); + string source_txt_str = readInMarkupSource(fn_src); + return source_txt_str; + } + HeaderContentInsertsImages markupSourceHeaderContentRawLineTupleArray(in string source_txt_str) { + string[] file_insert_list = []; + string[] images_list = []; + char[][] hc = header0Content1(source_txt_str); + char[] header = hc[0]; + char[] source_txt = hc[1]; + char[][] source_line_arr = markupSourceLineArray(source_txt); + HeaderContentInsertsImages t = tuple( + header, + source_line_arr, + file_insert_list, + images_list + ); + return t; + } + final char[][] getInsertMarkupSourceContentRawLineArray( + in char[] fn_src_insert, + Regex!(char) rgx_file + ) { + enforce( + fn_src_insert.match(rgx_file), + "not a dr markup filename: «" ~ + fn_src_insert ~ "»" + ); + string source_txt_str = readInMarkupSource(fn_src_insert); + char[][] source_line_arr = markupSourceLineArray(source_txt_str); + return source_line_arr; + } + } + struct Inserts { + alias ContentsAndImages = Tuple!( + char[][], "insert_contents", + string[], "images" + ); + ContentsAndImages scan_subdoc_source(O)( + O _opt_action, + char[][] markup_sourcefile_insert_content, + string fn_src + ) { + char[][] contents_insert; + int code_block_status = 0; + enum codeBlock { off, curly, tic, } + auto fn_pth_full = fn_src.match(rgx_files.src_pth_sst_or_ssm); + auto markup_src_file_path = fn_pth_full.captures[1]; + foreach (line; markup_sourcefile_insert_content) { + if (code_block_status == codeBlock.curly) { + if (line.matchFirst(rgx.block_curly_code_close)) { + code_block_status = codeBlock.off; + } + contents_insert ~= line; + } else if (line.matchFirst(rgx.block_curly_code_open)) { + code_block_status = codeBlock.curly; + contents_insert ~= line; + } else if (code_block_status == codeBlock.tic) { + if (line.matchFirst(rgx.block_tic_close)) { + code_block_status = codeBlock.off; + } + contents_insert ~= line; + } else if (line.matchFirst(rgx.block_tic_code_open)) { + code_block_status = codeBlock.tic; + contents_insert ~= line; + } else if (auto m = line.match(rgx_files.insert_src_fn_ssi_or_sst)) { + auto insert_fn = m.captures[2]; + auto insert_sub_pth = m.captures[1]; + auto fn_src_insert + = chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array; + auto raw = MarkupRawUnit(); + auto markup_sourcesubfile_insert_content + = raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx_files.src_fn_find_inserts); + debug(insert_file) { + writeln(line); + writeln(fn_src_insert); + writeln( + " length contents insert array: ", + markup_sourcesubfile_insert_content.length + ); + } + if (_opt_action.source || _opt_action.pod) { + _images ~= _extract_images(markup_sourcesubfile_insert_content); + } + auto ins = Inserts(); + /+ + - 1. load file + - 2. read lines + - 3. scan lines + - a. if filename insert, and insert filename + - repeat 1 + - b. else + - add line to new array; + - build image list, search for any image files to add to image list + +/ + } else { + contents_insert ~= line; // images to extract for image list? + if (_opt_action.source || _opt_action.pod) { + string[] _image_linelist = _extract_images(line); + if (_image_linelist.length > 0) { + _images ~= _image_linelist; + } + } + } + } // end src subdoc (inserts) loop + ContentsAndImages t = tuple( + contents_insert, + _images + ); + return t; + } + ContentsInsertsImages scan_master_src_for_insert_files_and_import_content(O)( + O _opt_action, + char[][] sourcefile_body_content, + string fn_src + ) { + import std.algorithm; + char[][] contents; + int code_block_status = 0; + enum codeBlock { off, curly, tic, } + auto fn_pth_full = fn_src.match(rgx_files.src_pth_sst_or_ssm); + auto markup_src_file_path = fn_pth_full.captures[1]; + char[][] contents_insert; + string[] _images =[]; + string[] insert_file_list =[]; + foreach (line; sourcefile_body_content) { + if (code_block_status == codeBlock.curly) { + if (line.matchFirst(rgx.block_curly_code_close)) { + code_block_status = codeBlock.off; + } + contents ~= line; + } else if (line.matchFirst(rgx.block_curly_code_open)) { + code_block_status = codeBlock.curly; + contents ~= line; + } else if (code_block_status == codeBlock.tic) { + if (line.matchFirst(rgx.block_tic_close)) { + code_block_status = codeBlock.off; + } + contents ~= line; + } else if (line.matchFirst(rgx.block_tic_code_open)) { + code_block_status = codeBlock.tic; + contents ~= line; + } else if (auto m = line.match(rgx_files.insert_src_fn_ssi_or_sst)) { + auto insert_fn = m.captures[2]; + auto insert_sub_pth = m.captures[1]; + auto fn_src_insert + = chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array; + insert_file_list ~= fn_src_insert.to!string; + auto raw = MarkupRawUnit(); + /+ TODO +/ + auto markup_sourcefile_insert_content + = raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx_files.src_fn_find_inserts); + debug(insert_file) { + writeln(line); + writeln(fn_src_insert); + writeln( + " length contents insert array: ", + markup_sourcefile_insert_content.length + ); + } + auto ins = Inserts(); + ContentsAndImages contents_insert_tu = ins.scan_subdoc_source( + _opt_action, + markup_sourcefile_insert_content, + fn_src_insert.to!string + ); + contents ~= contents_insert_tu.insert_contents; + if (_opt_action.source || _opt_action.pod) { + string[] _image_linelist = _extract_images(contents_insert_tu.images); + if (_image_linelist.length > 0) { + _images ~= _image_linelist; + } + } + /+ + - 1. load file + - 2. read lines + - 3. scan lines + - a. if filename insert, and insert filename + - repeat 1 + - b. else + - add line to new array; + - build image list, search for any image files to add to image list + +/ + } else { + contents ~= line; + if (_opt_action.source || _opt_action.pod) { + string[] _image_linelist = _extract_images(line); + if (_image_linelist.length > 0) { + _images ~= _image_linelist; + } + } + } + } // end src doc loop + string[] images = []; + foreach(i; uniq(_images.sort())) { + images ~= i; + } + debug(insert_file) { + writeln(__LINE__); + writeln(contents.length); + } + ContentsInsertsImages t = tuple( + contents, + insert_file_list, + images + ); + return t; + } + } +} -- cgit v1.2.3