From 07fcd01699e0b67f5a822514d4fcfab4b3c70ff0 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 23 Oct 2010 14:46:08 -0400 Subject: internal links, punctuation & parentheses issue, fix * dal, exclude parentheses and square brackets from matching (requested fix, Ben Armstrong) * html, remove space between link & following punctuation, parentheses &/or square brackets (requested fix, Ben Armstrong) * epub, remove space between link & following punctuation, parentheses &/or square brackets --- lib/sisu/v2/dal_syntax.rb | 6 +++--- lib/sisu/v2/epub_tune.rb | 2 +- lib/sisu/v2/html_tune.rb | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/dal_syntax.rb b/lib/sisu/v2/dal_syntax.rb index c4059fa8..e56d4a26 100644 --- a/lib/sisu/v2/dal_syntax.rb +++ b/lib/sisu/v2/dal_syntax.rb @@ -384,11 +384,11 @@ module SiSU_Syntax "\\1#{Mx[:lnk_o]}\\2#{Mx[:lnk_c]}\\3") #linked (text or image, however text cannot include modified face, e.g. bold, ital, underline) dob.obj.gsub!(/(^|[#{Mx[:gl_c]}#{Mx[:lnk_c]}#{Mx[:en_a_o]}#{Mx[:en_b_o]}(\s])((?:https?|ftp):\/\/\S+?\.[^>< ]+?)([,.;'"]?)(?=[\s#{Mx[:en_a_c]}#{Mx[:en_b_c]}#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m, %{\\1#{Mx[:url_o]}\\2#{Mx[:url_c]}\\3}) - dob.obj.gsub!(/#{Mx[:lnk_c]}#(\S+?[^>< ]+?)([,.;'"()\[\]]?)(?=[\s#{Mx[:en_a_c]}#{Mx[:en_b_c]}#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m, + dob.obj.gsub!(/#{Mx[:lnk_c]}#(\S+?[^>< ]+?)([()\[\]]*[,.;'"]?)(?=[\s#{Mx[:en_a_c]}#{Mx[:en_b_c]}#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m, %{#{Mx[:lnk_c]}#{Mx[:rel_o]}\\1#{Mx[:rel_c]}\\2}) - dob.obj.gsub!(/#{Mx[:lnk_c]}:(\S+?[^>< ]+?)([,.;'"()\[\]]?)(?=[\s#{Mx[:en_a_c]}#{Mx[:en_b_c]}#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m, + dob.obj.gsub!(/#{Mx[:lnk_c]}:(\S+?[^>< ]+?)([()\[\]]*[,.;'"]?)(?=[\s#{Mx[:en_a_c]}#{Mx[:en_b_c]}#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m, %{#{Mx[:lnk_c]}#{Mx[:rel_o]}:\\1#{Mx[:rel_c]}\\2}) - dob.obj.gsub!(/#{Mx[:lnk_c]}[.]{2}\/(\S+?[^>< ]+?)([,.;'"()\[\]]?)(?=[\s#{Mx[:en_a_c]}#{Mx[:en_b_c]}#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m, + dob.obj.gsub!(/#{Mx[:lnk_c]}[.]{2}\/(\S+?[^>< ]+?)([()\[\]]*[,.;'"]?)(?=[\s#{Mx[:en_a_c]}#{Mx[:en_b_c]}#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m, %{#{Mx[:lnk_c]}#{Mx[:rel_o]}:\\1#{Mx[:rel_c]}\\2}) end if dob.obj=~/_(?:https?|ftp):\S+/m # _http://url #CHECK diff --git a/lib/sisu/v2/epub_tune.rb b/lib/sisu/v2/epub_tune.rb index 90bf9575..8ef19f19 100644 --- a/lib/sisu/v2/epub_tune.rb +++ b/lib/sisu/v2/epub_tune.rb @@ -333,7 +333,7 @@ module SiSU_EPUB_Tune %{}) end if dob.obj =~/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}|image)/ - @word_mode=dob.obj.scan(/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}|image)[.,;:!?)\]]?|(?:#{Mx[:gl_o]}\S+?#{Mx[:gl_c]})+|[^#{Mx[:lnk_o]}#{Mx[:lnk_c]}]+/mu) + @word_mode=dob.obj.scan(/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}|image)[()\[\]]*[.,;:!?]?|(?:#{Mx[:gl_o]}\S+?#{Mx[:gl_c]})+|[^#{Mx[:lnk_o]}#{Mx[:lnk_c]}]+/mu) words=urls(@word_mode) dob.obj.gsub!(/.+/m,words) end diff --git a/lib/sisu/v2/html_tune.rb b/lib/sisu/v2/html_tune.rb index eddd3049..65a66844 100644 --- a/lib/sisu/v2/html_tune.rb +++ b/lib/sisu/v2/html_tune.rb @@ -305,7 +305,7 @@ module SiSU_HTML_Tune data.each do |dob| unless dob.is=='code' if dob.obj =~/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}|image)/ - @word_mode=dob.obj.scan(/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}|image)[.,;:!?)\]]?|(?:#{Mx[:gl_o]}\S+?#{Mx[:gl_c]})+|[^#{Mx[:lnk_o]}#{Mx[:lnk_c]}]+/mu) + @word_mode=dob.obj.scan(/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}|image)[()\[\]]*[.,;:!?]?|(?:#{Mx[:gl_o]}\S+?#{Mx[:gl_c]})+|[^#{Mx[:lnk_o]}#{Mx[:lnk_c]}]+/mu) words=urls(@word_mode) dob.obj.gsub!(/.+/m,words) end #consider change, do a while loop -- cgit v1.2.3