aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v4/dal_doc_str.rb
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2013-09-21 00:05:24 -0400
committerRalph Amissah <ralph@amissah.com>2013-09-21 00:05:24 -0400
commit6a8a7c3cf32fa1920bfcd62c0702b14dd84e6d65 (patch)
tree348f3bc61f0fe8f0bab77ffe970802557a80d502 /lib/sisu/v4/dal_doc_str.rb
parentv4 v5: minor cleaning (diff)
v4: dal, objects, store book index as hash (simplify downstream processing)
* merge v5 * db, book index, convert hash back to string (associated downstream processing change)
Diffstat (limited to 'lib/sisu/v4/dal_doc_str.rb')
-rw-r--r--lib/sisu/v4/dal_doc_str.rb54
1 files changed, 52 insertions, 2 deletions
diff --git a/lib/sisu/v4/dal_doc_str.rb b/lib/sisu/v4/dal_doc_str.rb
index 2a3e6b83..e4a0be2b 100644
--- a/lib/sisu/v4/dal_doc_str.rb
+++ b/lib/sisu/v4/dal_doc_str.rb
@@ -168,6 +168,55 @@ module SiSU_DAL_DocumentStructureExtract
end
[str,tags]
end
+ def rgx_idx_ocn_seg
+ @rgx_idx_ocn_seg=/(.+?)\s*[+](\d+)/
+ end
+ def construct_idx_array_and_hash(idxraw)
+ idx_array_raw=idxraw.scan(/[^;]+/)
+ idx_hash,idx_array,idx_lst={},[],[]
+ idx_array_raw.each do |idx|
+ idx_lst=case idx
+ when /\S+?\s*:/
+ idx_couplet_tmp=[]
+ idx_couplet=idx.scan(/\s*[^:]+\s*/)
+ if idx_couplet[1] =~/[|]/
+ idx_couplet_tmp << idx_couplet[0] << idx_couplet[1].scan(/\s*[^|]+\s*/)
+ else
+ idx_couplet_tmp << idx_couplet[0] << [idx_couplet[1]]
+ end
+ idx_couplet=idx_couplet_tmp
+ else [idx]
+ end
+ term_nodes=[]
+ idx_lst.each do |term_node|
+ case term_node
+ when String
+ term_node=term_node[0].chr.capitalize + term_node[1,term_node.length]
+ term_node=(term_node =~/.+?[+]\d+/) \
+ ? term_node
+ : (term_node + '+0')
+ term_nodes << term_node
+ use,plus=rgx_idx_ocn_seg.match(term_node)[1,2]
+ @use=use.strip
+ idx_hash[@use]={ sub: [], plus: plus } unless idx_hash[@use] and defined? idx_hash[@use]
+ when Array
+ subterm_nodes=[]
+ term_node.each do |subterm_node|
+ subterm_node=(subterm_node =~/.+?[+]\d+/) \
+ ? subterm_node
+ : (subterm_node + '+0')
+ subterm_nodes << subterm_node
+ sub,sub_plus=rgx_idx_ocn_seg.match(subterm_node)[1,2]
+ idx_hash[@use]={ sub: [], plus: 0 } unless idx_hash[@use] and defined? idx_hash[@use]
+ idx_hash[@use][:sub] << {sub.strip => { plus: sub_plus }}
+ end
+ term_nodes << subterm_nodes
+ end
+ end
+ idx_array << term_nodes
+ end
+ { hash: idx_hash, array: idx_array }
+ end
def identify_parts
tuned_file=[]
@tuned_block,@tuned_code=[],[]
@@ -205,9 +254,10 @@ module SiSU_DAL_DocumentStructureExtract
idx=if t_o=~/^=\{(.+)\}\s*$\Z/m; m=$1
m=m.split(/\n/).join(' ').
gsub(/\s+([|:;])\s+/,'\1').
- gsub(/\s+([+])\s+/,'\1')
+ gsub(/\s+([+]\d+)\s+/,'\1')
t_o=t_o.gsub(/\n=\{.+\}\s*$\Z/m,'')
- m
+ idx_array_and_hash=construct_idx_array_and_hash(m)
+ idx_array_and_hash[:hash]
else nil
end
end