From aae005b24ce816d89bcda6e72de2cdeadcf7ded0 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 17 Sep 2016 16:45:36 -0400 Subject: heading anchor_tags and cleaning --- src/sdp/ao_rgx.d | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/sdp/ao_rgx.d') diff --git a/src/sdp/ao_rgx.d b/src/sdp/ao_rgx.d index cd97a02..ccaf1bd 100644 --- a/src/sdp/ao_rgx.d +++ b/src/sdp/ao_rgx.d @@ -61,11 +61,16 @@ template RgxInit() { static native_subhead_identifier = ctRegex!(`^(?:oclc|pg|isbn)$`, "m"); static native_subhead_notes = ctRegex!(`^(?:abstract|description)$`, "m"); static native_subhead_publisher = ctRegex!(`^(?:name)$`, "m"); - static native_subhead_make = ctRegex!(`^(?:cover_image|home_button_image|home_button_text|footer|headings|num_top|breaks|substitute|bold|italics|emphasis|texpdf_font|css)$`, "m"); + static native_subhead_make = ctRegex!(`^(?:cover_image|home_button_image|home_button_text|footer|headings|num_top|num_depth|breaks|substitute|bold|italics|emphasis|texpdf_font|css)$`, "m"); /+ heading & paragraph operators +/ static heading_a = ctRegex!(`^:?[A][~] `, "m"); static heading = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?) `); static heading_marker = ctRegex!(`^:?([A-D1-4])[~]`); + static heading_anchor_tag = ctRegex!(`^:?[A-D1-4][~]([a-z0-9_.-]+) `,"i"); + static heading_identify_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(?:(?:(?:chapter|article|section|clause)\s+[0-9.]+)|(?:[0-9.]+))`,"i"); // unless dob.obj =~/^:?[A-D1-4]~\s+(?:|(?:chapter|article|section|clause)\s+)([0-9.]+)/i + static heading_extract_named_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(chapter|article|section|clause)\s+([0-9.]+)`,"i"); + static heading_extract_unnamed_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+([0-9.]+)`); + static heading_marker_missing_tag = ctRegex!(`^:?([A-D1-4])[~] `); static heading_title = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`); static heading_all = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`); // test, particularly [2] name/hashtag which may or may not be, does this affect title [3] static heading_biblio = ctRegex!(`^:?(1)[~][!](biblio(?:graphy)?|references?)`); @@ -165,7 +170,7 @@ template RgxInit() { // static auto_obj_cite_number_ignore = ctRegex!(`^[+~*$-]{3,}$`); // reminder static obj_cite_number_block_marks = ctRegex!(`^--[+~-]#$`); /+ ignore outside code blocks +/ - static skip_code_block_from_regular_parse = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); // not structural info + static skip_from_regular_parse = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); // not structural info /+ line & page breaks +/ static break_line_within_object = ctRegex!(`[\\]{2}( |$)`); // static break_line_break_within_object = ctRegex!(`( |^)[\\]{2}( |$)`); -- cgit v1.2.3