diff options
author | Ralph Amissah <ralph@amissah.com> | 2013-09-21 00:05:24 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2013-09-21 00:05:24 -0400 |
commit | 6a8a7c3cf32fa1920bfcd62c0702b14dd84e6d65 (patch) | |
tree | 348f3bc61f0fe8f0bab77ffe970802557a80d502 /lib/sisu/v4/dal_doc_str.rb | |
parent | v4 v5: minor cleaning (diff) |
v4: dal, objects, store book index as hash (simplify downstream processing)
* merge v5
* db, book index, convert hash back to string (associated downstream processing
change)
Diffstat (limited to 'lib/sisu/v4/dal_doc_str.rb')
-rw-r--r-- | lib/sisu/v4/dal_doc_str.rb | 54 |
1 files changed, 52 insertions, 2 deletions
diff --git a/lib/sisu/v4/dal_doc_str.rb b/lib/sisu/v4/dal_doc_str.rb index 2a3e6b83..e4a0be2b 100644 --- a/lib/sisu/v4/dal_doc_str.rb +++ b/lib/sisu/v4/dal_doc_str.rb @@ -168,6 +168,55 @@ module SiSU_DAL_DocumentStructureExtract end [str,tags] end + def rgx_idx_ocn_seg + @rgx_idx_ocn_seg=/(.+?)\s*[+](\d+)/ + end + def construct_idx_array_and_hash(idxraw) + idx_array_raw=idxraw.scan(/[^;]+/) + idx_hash,idx_array,idx_lst={},[],[] + idx_array_raw.each do |idx| + idx_lst=case idx + when /\S+?\s*:/ + idx_couplet_tmp=[] + idx_couplet=idx.scan(/\s*[^:]+\s*/) + if idx_couplet[1] =~/[|]/ + idx_couplet_tmp << idx_couplet[0] << idx_couplet[1].scan(/\s*[^|]+\s*/) + else + idx_couplet_tmp << idx_couplet[0] << [idx_couplet[1]] + end + idx_couplet=idx_couplet_tmp + else [idx] + end + term_nodes=[] + idx_lst.each do |term_node| + case term_node + when String + term_node=term_node[0].chr.capitalize + term_node[1,term_node.length] + term_node=(term_node =~/.+?[+]\d+/) \ + ? term_node + : (term_node + '+0') + term_nodes << term_node + use,plus=rgx_idx_ocn_seg.match(term_node)[1,2] + @use=use.strip + idx_hash[@use]={ sub: [], plus: plus } unless idx_hash[@use] and defined? idx_hash[@use] + when Array + subterm_nodes=[] + term_node.each do |subterm_node| + subterm_node=(subterm_node =~/.+?[+]\d+/) \ + ? subterm_node + : (subterm_node + '+0') + subterm_nodes << subterm_node + sub,sub_plus=rgx_idx_ocn_seg.match(subterm_node)[1,2] + idx_hash[@use]={ sub: [], plus: 0 } unless idx_hash[@use] and defined? idx_hash[@use] + idx_hash[@use][:sub] << {sub.strip => { plus: sub_plus }} + end + term_nodes << subterm_nodes + end + end + idx_array << term_nodes + end + { hash: idx_hash, array: idx_array } + end def identify_parts tuned_file=[] @tuned_block,@tuned_code=[],[] @@ -205,9 +254,10 @@ module SiSU_DAL_DocumentStructureExtract idx=if t_o=~/^=\{(.+)\}\s*$\Z/m; m=$1 m=m.split(/\n/).join(' '). gsub(/\s+([|:;])\s+/,'\1'). - gsub(/\s+([+])\s+/,'\1') + gsub(/\s+([+]\d+)\s+/,'\1') t_o=t_o.gsub(/\n=\{.+\}\s*$\Z/m,'') - m + idx_array_and_hash=construct_idx_array_and_hash(m) + idx_array_and_hash[:hash] else nil end end |