diff options
Diffstat (limited to 'lib/sisu/develop/html_harvest_authors.rb')
-rw-r--r-- | lib/sisu/develop/html_harvest_authors.rb | 466 |
1 files changed, 0 insertions, 466 deletions
diff --git a/lib/sisu/develop/html_harvest_authors.rb b/lib/sisu/develop/html_harvest_authors.rb deleted file mode 100644 index 8298b1af..00000000 --- a/lib/sisu/develop/html_harvest_authors.rb +++ /dev/null @@ -1,466 +0,0 @@ -# encoding: utf-8 -=begin - -* Name: SiSU - -** Description: documents, structuring, processing, publishing, search -*** metadata harvest, extract authors and their writings from document set - -** Author: Ralph Amissah - <ralph@amissah.com> - <ralph.amissah@gmail.com> - -** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, - 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah, - All Rights Reserved. - -** License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see <http://www.gnu.org/licenses/>. - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - <http://www.fsf.org/licensing/licenses/gpl.html> - <http://www.gnu.org/licenses/gpl.html> - - <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html> - -** SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - -** Hompages: - <http://www.jus.uio.no/sisu> - <http://www.sisudoc.org> - -** Git - <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary> - <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/develop/harvest_authors.rb;hb=HEAD> - -=end -module SiSU_HarvestAuthors - require_relative 'html_harvest_author_format' # html_harvest_author_format.rb - require_relative 'html_parts' # html_parts.rb - class Songsheet - @@the_idx_authors={} - def initialize(opt,env) - @opt,@env=opt,env - @file_list=opt.files - end - def songsheet - idx_array={} - @opt.f_pths.each do |y| - lang_hash_file_array={} - name=y[:f] - filename=y[:pth] + '/' + y[:f] - File.open(filename,'r') do |file| - file.each_line("\n\n") do |line| - if line =~/^@(?:title|creator|date):(?:\s|$)/m - lang_hash_file_array[y[:lng_is]] ||= [] - lang_hash_file_array[y[:lng_is]] << line - elsif line =~/^@\S+?:(?:\s|$)/m \ - or line =~/^(?:\s*\n|%+ )/ - else break - end - end - end - lang_hash_file_array.each_pair do |lang,a| - idx_array[lang] ||= [] - idx_array=SiSU_HarvestAuthors::Harvest.new( - @opt, - @env, - a, - filename, - name, - idx_array, - lang - ).extract_harvest - end - end - the_idx=SiSU_HarvestAuthors::Index.new( - idx_array, - @@the_idx_authors - ).construct_book_author_index - SiSU_HarvestAuthors::OutputIndex.new( - @opt, - the_idx - ).html_print.html_songsheet - end - end - class Harvest - def initialize(opt,env,data,filename,name,idx_array,lang) - @opt, @env,@data,@filename,@name,@idx_array,@lang= - opt,env, data, filename, name, idx_array, lang - end - def extract_harvest - data, filename, name, idx_array, lang = - @data,@filename,@name,@idx_array,@lang - @title=@subtitle=@fulltitle=@author=@author_format=@date=nil - @authors=[] - rgx={} - rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m - rgx[:title]=/^@title:[ ]+(.+)/ - rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m - rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m - data.each do |para| - if para=~ rgx[:title] - @title=rgx[:title].match(para)[1] - end - if para=~ rgx[:subtitle] - @subtitle=rgx[:subtitle].match(para)[1] - end - if para=~ rgx[:author] - @author_format=rgx[:author].match(para)[1] - end - if para=~ rgx[:date] - @date=rgx[:date].match(para)[1] - end - break if @title && @subtitle && @author && @date - end - @fulltitle=@subtitle \ - ? (@title + ' - ' + @subtitle) - : @title - if @title \ - and @author_format - creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details - @authors,@authorship=creator[:authors],creator[:authorship] - file=if name=~/~[a-z]{2,3}\.ss[mt]$/ - name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') - else - name.sub(/\.ss[mt]$/,'') - end - page=if @env.output_dir_structure.by? == :language - "#{lang}/sisu_manifest.html" - else - "sisu_manifest.#{lang}.html" - end - idx_array[lang] <<= { - filename: filename, - file: file, - date: @date, - title: @fulltitle, - author: creator, - page: page, - lang: lang - } - else - #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}" - end - idx_array[lang]=idx_array[lang].flatten - idx_array - end - end - class Index - def initialize(idx_array,the_idx) - @idx_array,@the_idx=idx_array,the_idx - @@the_idx_authors=@the_idx - end - def capital(txt) - txt[0].chr.capitalize + txt[1,txt.length] - end - def construct_book_author_index - idx_array=@idx_array - idx_array.each_pair do |lang,idx_arr| - @@the_idx_authors[lang] ||= {} - idx_arr.each do |idx| - idx[:author][:last_first_format_a].each do |author| - author=author.strip - if @@the_idx_authors[lang][author].is_a?(NilClass) - @@the_idx_authors[lang][author]={ md: [] } - end - @@the_idx_authors[lang][author][:md] << { - filename: idx[:filename], - file: idx[:file], - author: idx[:author], - title: idx[:title], - date: idx[:date], - page: idx[:page], - lang: idx[:lang] - } - end - end - end - @the_idx=@@the_idx_authors - end - end - class OutputIndex - require_relative 'i18n' # i18n.rb - def initialize(opt,the_idx) - @opt,@the_idx=opt,the_idx - @env=SiSU_Env::InfoEnv.new - @rc=SiSU_Env::GetInit.new.sisu_yaml.rc - @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] - @alph=@alphabet_list.dup - @letter=@alph.shift - end - def html_file_open - @the_idx.keys.each do |lng| - @output ||={} - @output[lng] ||={} - harvest_pth,file='','' - if @env.output_dir_structure.by? == :language - harvest_pth=@env.path.webserv + '/' \ - + @opt.base_stub + '/' \ - + lng + '/' \ - + 'manifest' - file="#{harvest_pth}/authors.html" - elsif @env.output_dir_structure.by? == :filetype - harvest_pth=@env.path.webserv + '/' \ - + @opt.base_stub + '/' \ - + 'manifest' - file="#{harvest_pth}/authors.#{lng}.html" - elsif @env.output_dir_structure.by? == :filename - harvest_pth=@env.path.webserv + '/' \ - + @opt.base_stub - file="#{harvest_pth}/authors.#{lng}.html" - end - FileUtils::mkdir_p(harvest_pth) \ - unless FileTest.directory?(harvest_pth) - fileinfo=(@opt.act[:verbose][:set]==:on \ - || @opt.act[:verbose_plus][:set]==:on \ - || @opt.act[:urls_selected][:set]==:on \ - || @opt.act[:maintenance][:set]==:on) \ - ? ("file://#{file}") : '' - SiSU_Screen::Ansi.new( - @opt.act[:color_state][:set], - "harvest authors (#{@opt.files.length} files)", - fileinfo - ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on - @output[lng][:html]=File.new(file,'w') - end - end - def html_file_close - @the_idx.keys.each do |lng| - @output[lng][:html].close - @output[lng][:html_mnt].close \ - if @output[lng][:html_mnt].is_a?(File) - end - end - def html_print - def html_songsheet - html_file_open - html_head - html_alph - html_body - html_tail - html_file_close - end - def html_head_adjust(lng,type='') - css_path,topics='','' - if @env.output_dir_structure.by? == :language - css_path=(type !~/maintenance/) \ - ? '../../_sisu/css/harvest.css' - : 'harvest.css' - topics='topics.html' - elsif @env.output_dir_structure.by? == :filetype - css_path=(type !~/maintenance/) \ - ? '../_sisu/css/harvest.css' - : 'harvest.css' - topics="topics.#{lng}.html" - elsif @env.output_dir_structure.by? == :filename - css_path=(type !~/maintenance/) \ - ? './_sisu/css/harvest.css' - : 'harvest.css' - topics="topics.#{lng}.html" - end - ln=SiSU_i18n::Languages.new.language.list - harvest_languages='' - @the_idx.keys.each do |lg| - if @env.output_dir_structure.by? == :language - harvest_pth="../../#{lg}/manifest" - file="#{harvest_pth}/authors.html" - elsif @env.output_dir_structure.by? == :filetype - harvest_pth='.' - file="#{harvest_pth}/authors.#{lg}.html" - elsif @env.output_dir_structure.by? == :filename - harvest_pth='.' - file="#{harvest_pth}/authors.#{lg}.html" - end - l=ln[lg][:t] - harvest_languages += - %{<a href="#{file}">#{l}</a> } - end - sv=SiSU_Env::InfoVersion.instance.get_version - if @env.output_dir_structure.by? == :language - home_pth='../..' - output_structure_by= - '(output organised by language & filetype)' - elsif @env.output_dir_structure.by? == :filetype - home_pth='..' - output_structure_by= - '(output organised by filetype)' - elsif @env.output_dir_structure.by? == :filename - home_pth='.' - output_structure_by= - '(output organised by filename)' - else - home_pth='.' - output_structure_by='(output organised by ?)' - end - <<WOK -<!DOCTYPE html> -<html> -<head> -<meta charset="utf-8"> -<title>SiSU Metadata Harvest - Authors</title> -<meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> -<meta name="dc.title" content= "SiSU metadata harvest, Authors - SiSU information Structuring Universe, Structured information Serialised Units" /> -<meta name="dc.subject" content= "document structuring, ebook, publishing, PDF, LaTeX, XML, ODF, SQL, postgresql, sqlite, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, granular search, digital library" /> -<meta name="generator" content="#{sv.project} #{sv.version} of #{sv.date_stamp} (n*x and Ruby!)" /> -<link rel="generator" href="http://www.jus.uio.no/sisu/SiSU" /> -<link href="#{css_path}" rel="stylesheet" > -<link rel="shortcut icon" href="../_sisu/image/rb7.ico" /> -</head> -<body lang="en" xml:lang="en"> -<a name="top" id="top"></a> -<a name="up" id="up"></a> -<a name="start" id="start"></a> -<h1>SiSU Metadata Harvest - Authors #{output_structure_by}</h1> -<p>[<a href="#{home_pth}/index.html"> HOME </a>] also see <a href="#{topics}">SiSU Metadata Harvest - Topics</a></p> -<p>#{@env.widget_static.search_form}</p> -<hr /> -<p class="tiny">#{harvest_languages}</p> -<hr /> -WOK - end - def html_head - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] \ - << html_head_adjust(lng,'maintenance') \ - if @opt.act[:maintenance][:set]==:on - @output[lng][:html] \ - << html_head_adjust(lng) - end - end - def html_alph - a=[] - a << '<p>' - @alph.each do |x| - a << ((x =~/[0-9]/) \ - ? '' - : %{<a href="##{x}">#{x}</a>, }) - end - a=a.join - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] << a \ - if @opt.act[:maintenance][:set]==:on - @output[lng][:html] << a - end - end - def html_tail - a =<<WOK -<hr /> -<a name="bottom" id="bottom"></a> -<a name="down" id="down"></a> -<a name="end" id="end"></a> -<a name="finish" id="finish"></a> -<a name="stop" id="stop"></a> -<a name="credits"></a> -#{SiSU_Proj_HTML::Bits.new.credits_sisu} -</body> -</html> -WOK - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] << a \ - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html] << a - end - end - def do_html(lng,html) - @output[lng][:html_mnt] << html \ - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html] << html - end - def do_string_name(lng,attrib,string) - f=/^(\S)/.match(string[0])[1] - if @lng != lng - @alph=@alphabet_list.dup - @letter=@alph.shift - @lng = lng - end - if @letter < f - while @letter < f - if @alph.length > 0 - @letter=@alph.shift - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html_mnt] \ - << %{\n<p class="letter"><a name="#{@letter}"></p>#{@letter}</a><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>} - end - @output[lng][:html] \ - << %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>} - else break - end - end - end - end - def html_body - the_idx=@the_idx - the_idx.each_pair do |lng,lng_array| - lng_array.sort.each do |a| - do_string_name(lng,'',a) - name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') - x = %{<p class="author"><a name="#{name}">#{a[0]}</a></p>} - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html_mnt] << x - end - @output[lng][:html] << x - lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert - works=[] - a[1][:md].each do |i| - manifest_at=if @env.output_dir_structure.by? == :language - i[:file] + Sfx[:html] - elsif @env.output_dir_structure.by? == :filetype - i[:file] + lang_code_insert + Sfx[:html] - elsif @env.output_dir_structure.by? == :filename - './' + i[:file] + '/' + i[:page] - else '' #error - end - work=[ - "#{i[:date]} #{i[:title]}", - %{<p class="publication">#{i[:date]} <a href="#{manifest_at}">#{i[:title]}</a>, #{i[:author][:authors_s]}</p>} - ] - works<<=(@output[lng][:html_mnt].is_a?(File)) \ - ? (work.concat([%{<p class="publication">[<a href="#{i[:file]}.sst">src</a>] #{i[:date]} <a href="file://#{manifest_at}">#{i[:title]}</a>, #{i[:author][:authors_s]} -- [<a href="#{i[:file]}.sst">#{i[:file]}.sst</a>]</p>}])) - : work - end - works.sort_by {|y| y[0]}.each do |z| - @output[lng][:html] << z[1] - @output[lng][:html_mnt] << z[2] \ - if @output[lng][:html_mnt].is_a?(File) - end - end - end - end - self - end - def screen_print - def cycle - the_idx=@the_idx - the_idx.sort.each do |a| - puts a[0] - a[1][:md].each do |x| - puts "\t" + x[:file] - end - end - end - self - end - end -end -__END__ |