From e95c49b76f4ac7bf72c383ee43a0567dfcbf1603 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 10 Sep 2018 18:15:02 -0400 Subject: 0.1.0 renamed doc-reform, doc_reform (& rad) - from sdp --- src/doc_reform/source/read_source_files.d | 350 ++++++++++++++++++++++++++++++ 1 file changed, 350 insertions(+) create mode 100644 src/doc_reform/source/read_source_files.d (limited to 'src/doc_reform/source/read_source_files.d') diff --git a/src/doc_reform/source/read_source_files.d b/src/doc_reform/source/read_source_files.d new file mode 100644 index 0000000..294b081 --- /dev/null +++ b/src/doc_reform/source/read_source_files.d @@ -0,0 +1,350 @@ +/++ + module source_read_source_files;
+ - open markup files
+ - if master file scan for addional files to import/insert ++/ +module doc_reform.source.read_source_files; +static template SiSUrawMarkupContent() { + import + doc_reform.meta.rgx; + import + doc_reform.meta, + doc_reform.source.paths_source, + std.file, + std.path; + mixin SiSUrgxInit; + static auto rgx = Rgx(); + string[] _images=[]; + auto _extract_images(S)(S content_block) { + string[] images_; + auto _content_block = content_block.to!string; + if (auto m = _content_block.matchAll(rgx.image)) { + images_ ~= m.captures[1].to!string; + } + return images_; + } + auto rawsrc = RawMarkupContent(); + auto SiSUrawMarkupContent(O,Fn)(O _opt_action, Fn fn_src) { + auto _0_header_1_body_content_2_insert_filelist_tuple + = rawsrc.sourceContentSplitIntoHeaderAndBody(_opt_action, rawsrc.sourceContent(fn_src), fn_src); + return _0_header_1_body_content_2_insert_filelist_tuple; + } + struct RawMarkupContent { + final sourceContent(in string fn_src) { + auto raw = MarkupRawUnit(); + auto source_txt_str + = raw.markupSourceReadIn(fn_src); + return source_txt_str; + } + final auto sourceContentSplitIntoHeaderAndBody(O)(O _opt_action, in string source_txt_str, in string fn_src="") { + auto raw = MarkupRawUnit(); + string[] insert_file_list; + string[] images_list; + auto t + = raw.markupSourceHeaderContentRawLineTupleArray(source_txt_str); + auto header_raw = t[0]; + auto sourcefile_body_content = t[1]; + if (fn_src.match(rgx.src_fn_master)) { // filename with path needed if master file (.ssm) not otherwise + auto ins = Inserts(); + auto tu + = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src); + static assert(!isTypeTuple!(tu)); + sourcefile_body_content = tu[0]; + insert_file_list = tu[1].dup; + images_list = tu[2].dup; + } else if (_opt_action.source || _opt_action.sisupod) { + auto ins = Inserts(); + auto tu + = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src); + static assert(!isTypeTuple!(tu)); + images_list = tu[2].dup; + } + t = tuple( + header_raw, + sourcefile_body_content, + insert_file_list, + images_list + ); + static assert(t.length==4); + return t; + } + } + struct MarkupRawUnit { + import std.file; + final private string readInMarkupSource(in char[] fn_src) { + enforce( + exists(fn_src)!=0, + "file not found: «" ~ + fn_src ~ "»" + ); + string source_txt_str; + try { + if (exists(fn_src)) { + debug(io) { + writeln("in src, markup source file found: ", fn_src); + } + source_txt_str = fn_src.readText; + } + } catch (ErrnoException ex) { + } catch (UTFException ex) { + // Handle validation errors + } catch (FileException ex) { + // Handle errors + } + std.utf.validate(source_txt_str); + return source_txt_str; + } + final private char[][] header0Content1(in string src_text) { + /+ split string on _first_ match of "^:?A~\s" into [header, content] array/tuple +/ + char[][] header_and_content; + auto m = (cast(char[]) src_text).matchFirst(rgx.heading_a); + header_and_content ~= m.pre; + header_and_content ~= m.hit ~ m.post; + assert(header_and_content.length == 2, + "document markup is broken, header body split == " + ~ header_and_content.length.to!string + ~ "; (header / body array split should == 2 (split is on level A~))" + ); + return header_and_content; + } + final private char[][] markupSourceLineArray(in char[] src_text) { + char[][] source_line_arr + = (cast(char[]) src_text).split(rgx.newline_eol_strip_preceding); + return source_line_arr; + } + auto markupSourceReadIn(in string fn_src) { + static auto rgx = Rgx(); + enforce( + fn_src.match(rgx.src_pth_sst_or_ssm), + "not a sisu markup filename: «" ~ + fn_src ~ "»" + ); + auto source_txt_str = readInMarkupSource(fn_src); + return source_txt_str; + } + auto markupSourceHeaderContentRawLineTupleArray(in string source_txt_str) { + string[] file_insert_list = []; + string[] images_list = []; + auto hc = header0Content1(source_txt_str); + auto header = hc[0]; + char[] source_txt = hc[1]; + auto source_line_arr = markupSourceLineArray(source_txt); + auto t = tuple( + header, + source_line_arr, + file_insert_list, + images_list + ); + return t; + } + final char[][] getInsertMarkupSourceContentRawLineArray( + in char[] fn_src_insert, + Regex!(char) rgx_file + ) { + enforce( + fn_src_insert.match(rgx_file), + "not a sisu markup filename: «" ~ + fn_src_insert ~ "»" + ); + auto source_txt_str = readInMarkupSource(fn_src_insert); + auto source_line_arr = markupSourceLineArray(source_txt_str); + return source_line_arr; + } + } + struct Inserts { + auto scan_subdoc_source(O)( + O _opt_action, + char[][] markup_sourcefile_insert_content, + string fn_src + ) { + mixin SiSUrgxInitFlags; + char[][] contents_insert; + auto type1 = flags_type_init; + auto fn_pth_full = fn_src.match(rgx.src_pth_sst_or_ssm); + auto markup_src_file_path = fn_pth_full.captures[1]; + foreach (line; markup_sourcefile_insert_content) { + if (type1["curly_code"] == 1) { + type1["header_make"] = 0; + type1["header_meta"] = 0; + if (line.matchFirst(rgx.block_curly_code_close)) { + type1["curly_code"] = 0; + } + contents_insert ~= line; + } else if (line.matchFirst(rgx.block_curly_code_open)) { + type1["curly_code"] = 1; + type1["header_make"] = 0; + type1["header_meta"] = 0; + contents_insert ~= line; + } else if (type1["tic_code"] == 1) { + type1["header_make"] = 0; + type1["header_meta"] = 0; + if (line.matchFirst(rgx.block_tic_close)) { + type1["tic_code"] = 0; + } + contents_insert ~= line; + } else if (line.matchFirst(rgx.block_tic_code_open)) { + type1["tic_code"] = 1; + type1["header_make"] = 0; + type1["header_meta"] = 0; + contents_insert ~= line; + } else if ( + (type1["header_make"] == 1) + && line.matchFirst(rgx.native_header_sub) + ) { + type1["header_make"] = 1; + type1["header_meta"] = 0; + } else if ( + (type1["header_meta"] == 1) + && line.matchFirst(rgx.native_header_sub) + ) { + type1["header_meta"] = 1; + type1["header_make"] = 0; + } else if (auto m = line.match(rgx.insert_src_fn_ssi_or_sst)) { + type1["header_make"] = 0; + type1["header_meta"] = 0; + auto insert_fn = m.captures[2]; + auto insert_sub_pth = m.captures[1]; + auto fn_src_insert + = chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array; + auto raw = MarkupRawUnit(); + auto markup_sourcesubfile_insert_content + = raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx.src_fn_find_inserts); + debug(insert_file) { + writeln(line); + writeln(fn_src_insert); + writeln( + " length contents insert array: ", + markup_sourcesubfile_insert_content.length + ); + } + if (_opt_action.source || _opt_action.sisupod) { + _images ~= _extract_images(markup_sourcesubfile_insert_content); + } + auto ins = Inserts(); + /+ + - 1. load file + - 2. read lines + - 3. scan lines + - a. if filename insert, and insert filename + - repeat 1 + - b. else + - add line to new array; + - build image list, search for any image files to add to image list + +/ + } else { + type1["header_make"] = 0; + type1["header_meta"] = 0; + contents_insert ~= line; // images to extract for image list? + if (_opt_action.source || _opt_action.sisupod) { + auto _image_linelist = _extract_images(line); + if (_image_linelist.length > 0) { + _images ~= _image_linelist; + } + } + } + } // end src subdoc (inserts) loop + auto t = tuple( + contents_insert, + _images + ); + return t; + } + auto scan_master_src_for_insert_files_and_import_content(O)( + O _opt_action, + char[][] sourcefile_body_content, + string fn_src + ) { + import std.algorithm; + mixin SiSUrgxInitFlags; + char[][] contents; + auto type = flags_type_init; + auto fn_pth_full = fn_src.match(rgx.src_pth_sst_or_ssm); + auto markup_src_file_path = fn_pth_full.captures[1]; + char[][] contents_insert; + string[] _images =[]; + string[] insert_file_list =[]; + foreach (line; sourcefile_body_content) { + if (type["curly_code"] == 1) { + if (line.matchFirst(rgx.block_curly_code_close)) { + type["curly_code"] = 0; + } + contents ~= line; + } else if (line.matchFirst(rgx.block_curly_code_open)) { + type["curly_code"] = 1; + contents ~= line; + } else if (type["tic_code"] == 1) { + if (line.matchFirst(rgx.block_tic_close)) { + type["tic_code"] = 0; + } + contents ~= line; + } else if (line.matchFirst(rgx.block_tic_code_open)) { + type["tic_code"] = 1; + contents ~= line; + } else if (auto m = line.match(rgx.insert_src_fn_ssi_or_sst)) { + auto insert_fn = m.captures[2]; + auto insert_sub_pth = m.captures[1]; + auto fn_src_insert + = chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array; + insert_file_list ~= fn_src_insert.to!string; + auto raw = MarkupRawUnit(); + /+ TODO +/ + auto markup_sourcefile_insert_content + = raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx.src_fn_find_inserts); + debug(insert_file) { + writeln(line); + writeln(fn_src_insert); + writeln( + " length contents insert array: ", + markup_sourcefile_insert_content.length + ); + } + auto ins = Inserts(); + auto contents_insert_tu = ins.scan_subdoc_source( + _opt_action, + markup_sourcefile_insert_content, + fn_src_insert.to!string + ); + contents ~= contents_insert_tu[0]; // images to extract for image list? + if (_opt_action.source || _opt_action.sisupod) { + auto _image_linelist = _extract_images(contents_insert_tu[0]); + if (_image_linelist.length > 0) { + _images ~= _image_linelist; + } + } + /+ + - 1. load file + - 2. read lines + - 3. scan lines + - a. if filename insert, and insert filename + - repeat 1 + - b. else + - add line to new array; + - build image list, search for any image files to add to image list + +/ + } else { + contents ~= line; + if (_opt_action.source || _opt_action.sisupod) { + auto _image_linelist = _extract_images(line); + if (_image_linelist.length > 0) { + _images ~= _image_linelist; + } + } + } + } // end src doc loop + string[] images = []; + foreach(i; uniq(_images.sort())) { + images ~= i; + } + debug(insert_file) { + writeln(__LINE__); + writeln(contents.length); + } + auto t = tuple( + contents, + insert_file_list, + images + ); + return t; + } + } +} -- cgit v1.2.3