From 0e6fc15ada3c5d9a86b227163f35a54993b32529 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 2 Dec 2008 23:54:23 -0500 Subject: sisu harvest, introduce module along with header syntax addition & modification * sisu markup, additional header and new format rule: * @creator: / @author: header field, introduced author name format rules for more usable metadata harvesting: surname comma other names, additional authors separated by semi-colon * param added meta-tag, @topic_register: formatting topic levels are separated from sub-levels by a colon, a semi-colon separates main topics if there are multiple topics at lowest sub-level, a pipe can be used to create multiple headings * harvest module, harvests metadata from document set currently extracts: (i) authors and their writings from document set; (ii) topics and associated writings from document set (topics use topic_register header). harvest (when run against documents common to a directory of a site) extracts metadata and organises the documents on a site by author and topic information provided (there is a new "topic_register" header, with formatting rules similar to those of the book index), results are placed in [output_path]/sisu_site_metadata. sisu --harvest *.sst * by author (see change in param @creator: / @author: header field) * by topic / subject index (see addition in param of @topic_register: header field) initially there should be an example samples here: http://www.jus.uio.no/sisu/sisu_site_metadata/harvest_authors.html http://www.jus.uio.no/sisu/sisu_site_metadata/harvest_topics.html together with update markup source files The authors and their writings list will be made to take on a more biblographical form, with the use of additional fields as required. (concept example, suitable for medium sized sites [to remove size constraint: implement SQL equivalent]) make feature more robust * css, for harvest output added * remote placement of sisu_site_metadata (output produced by metadata harvest) * sisu markup, update document samples accordingly * tidy copyright marks in program headers, remove repetition of dates [version bump because formatting rule introduced to author / creator header - where new site metadata harvest feature is used, (at present changes changes should not be noticed except when using metadata harvest)] --- lib/sisu/v0/hub.rb | 48 +++++++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 19 deletions(-) (limited to 'lib/sisu/v0/hub.rb') diff --git a/lib/sisu/v0/hub.rb b/lib/sisu/v0/hub.rb index 58ea4c4f..93766140 100644 --- a/lib/sisu/v0/hub.rb +++ b/lib/sisu/v0/hub.rb @@ -14,8 +14,7 @@ SiSU, a framework for document structuring, publishing and search - Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, - 2007, 2008 Ralph Amissah + Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -133,7 +132,7 @@ module SiSU when /^xml_scaffold$/; SiSU_XML_scaffold::Source.new(@opt).read # -T #when /^air$/; SiSU_Air::Source.new(@opt).read # -A when /^plaintext$/; SiSU_Plaintext::Source.new(@opt).read # -a - when /^wikispeak$/; SiSU_Wikispeak::Source.new(@opt).read # -g + #when /^git$/; SiSU_Git::Source.new(@opt).read # -g #when /^wikispeak$/; SiSU_Wikispeak::Source.new(@opt).read # -g when /^odf$/; SiSU_ODF::Source.new(@opt).read # -o when /^xml_md_oai_pmh_dc$/; SiSU_XML_metadata::OAI_PMH.new(@opt).read # -O @@ -164,22 +163,25 @@ module SiSU Operations.new(@opt).not_found unless @req =~/^conf$/ end elsif FileTest.file?(put) - case @req - when /^urls$/; SiSU_urls::Source.new(@opt).read # -u -v -V -M - when /^remote$/ - case @message - when /scp/; SiSU_Remote::Put.new(@opt).scp # -r - when /rsync/; SiSU_Remote::Put.new(@opt).rsync # -R - else #SiSU_Remote::Put.new(put,@opt.cmd).scp + if @opt.mod.inspect !~/harvest/ #decide whether should permit harvest in single operation + case @req + when /^urls$/; SiSU_urls::Source.new(@opt).read # -u -v -V -M + when /^remote$/ + case @message + when /scp/; SiSU_Remote::Put.new(@opt).scp # -r + when /rsync/; SiSU_Remote::Put.new(@opt).rsync # -R + else #SiSU_Remote::Put.new(put,@opt.cmd).scp + end end + @n_do=@n_do+1 + tell=SiSU_Screen::Ansi.new(@opt.cmd,@n_do,"#{@req.upcase} processed") + tell.files_processed unless @opt.cmd =~/q/ end - @n_do=@n_do+1 - tell=SiSU_Screen::Ansi.new(@opt.cmd,@n_do,"#{@req.upcase} processed") - tell.files_processed unless @opt.cmd =~/q/ else Operations.new(@opt).not_found end end end + SiSU_Remote::Put.new(@opt).rsync_harvest if @opt.cmd.inspect =~/R/ and @opt.mod.inspect =~/harvest/ elsif @req =~/^dbi$/; SiSU_DBI::SiSU_SQL.new(@opt).connect # -D -d elsif @req=~/^sisupod_make$/; SiSU_Doc::Source.new(@opt).read # -S end @@ -276,6 +278,9 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/ Op.new(@opt,req,msg).select if req and msg end def actions + if @opt.mod.inspect =~/--harvest/ + require "#{SiSU_lib}/harvest" + end if @opt.mod.inspect =~/--convert|--to|--from/ require "#{SiSU_lib}/sst_convert_markup" end @@ -424,8 +429,8 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/ #end if @opt.cmd =~/a/; op('plaintext','plaintext') #% -a #-A -f -e -E plaintext -a creates ms-dos type; -A creates unix type, plaintext file end - if @opt.cmd =~/g/; op('wikispeak','wikispeak') #% -g wiki - end + #if @opt.cmd =~/g/; op('git','git') #% -g git + #end #if @opt.cmd =~/g/; op('wikispeak','wikispeak') #% -g wiki #end if @opt.cmd =~/o/; op('odf','OpenDocument') #% -o opendocument @@ -484,9 +489,14 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/ end if @opt.cmd =~/Y/; op('sitemaps','Sitemap') #% -Y sitemap end - if @opt.cmd =~/r/; op('remote','scp') #% -r copy to remote server - end - if @opt.cmd =~/R/; op('remote','rsync') #% -R copy to remote server + if @opt.mod.inspect !~/--harvest/ + if @opt.cmd =~/r/; op('remote','scp') #% -r copy to remote server + end + if @opt.cmd =~/R/; op('remote','rsync') #% -R copy to remote server + end + else + if @opt.cmd =~/R/; op('remote','rsync_harvest') #% -R copy to remote server + end end if @opt.cmd =~/[QuUvVM]/; op('urls','urls') #% -Q -u -v -V -M urls end @@ -518,7 +528,7 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/ SiSU_Remote::Put.new(@opt).rsync_sitemaps end else #% help instructions - unless @opt.mod.inspect =~/--convert|--to|--from/ + unless @opt.mod.inspect =~/--convert|--to|--from|--harvest/ if @opt.mod.inspect =~/--help/ \ and not @opt.what.empty? SiSU_Help::Help.new(@opt.what).help_request -- cgit v1.2.3