Skip to content
Snippets Groups Projects
Commit b088b9ec authored by Demian Katz's avatar Demian Katz Committed by GitHub
Browse files

Switch to OAI-PMH harvester as external dependency. (VUFIND-1176) (#721)

parent 21246d1c
No related merge requests found
......@@ -19,6 +19,9 @@
; dateGranularity = auto
; harvestedIdLog = harvest.log
; verbose = false
; autosslca = true
; sslcapath = "/etc/ssl/certs" ; e.g. for Debian systems
; sslcafile = "/etc/pki/tls/cert.pem" ; e.g. for CentOS systems
; sslverifypeer = true
; sanitize = true
; badXMLLog = bad.log
......@@ -103,6 +106,14 @@
; harvesting; this may be useful for troubleshooting purposes, but it defaults to
; false.
;
; autosslca will attempt to autodetect your SSL certificate authority.
;
; sslcafile can be used to specify the path to an SSL certificate authority
; file (e.g. /etc/pki/tls/cert.pem on CentOS/RedHat systems).
;
; sslcapath can be used to specify the path to an SSL certificate authority
; directory (e.g. /etc/ssl/certs on Debian systems).
;
; sslverifypeer may be set to false to disable SSL certificate checking; it defaults
; to true, and changing the setting is not recommended.
;
......
This diff is collapsed.
<?php
/**
* OAI-PMH harvester unit test.
*
* PHP version 5
*
* Copyright (C) Villanova University 2010.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* @category Search
* @package Service
* @author David Maus <maus@hab.de>
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License
* @link https://vufind.org/wiki/development
*/
namespace VuFindTest\Harvester;
use VuFind\Harvester\OAI;
/**
* OAI-PMH harvester unit test.
*
* PHP version 5
*
* Copyright (C) Villanova University 2010.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* @category Search
* @package Service
* @author David Maus <maus@hab.de>
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License
* @link https://vufind.org/wiki/development
*/
class OAITest extends \VuFindTest\Unit\TestCase
{
/**
* Test configuration.
*
* @return void
*/
public function testConfig()
{
$config = [
'url' => 'http://localhost',
'set' => 'myset',
'metadataPrefix' => 'fakemdprefix',
'idPrefix' => 'fakeidprefix',
'idSearch' => 'search',
'idReplace' => 'replace',
'harvestedIdLog' => '/my/harvest.log',
'injectId' => 'idtag',
'injectSetSpec' => 'setspectag',
'injectDate' => 'datetag',
'injectHeaderElements' => 'headertag',
'dateGranularity' => 'mygranularity',
'verbose' => true,
'sanitize' => true,
'badXMLLog' => '/my/xml.log',
];
$oai = new OAI('test', $config, $this->getMockClient());
// Special cases where config key != class property:
$this->assertEquals(
$config['url'], $this->getProperty($oai, 'baseURL')
);
$this->assertEquals(
$config['dateGranularity'], $this->getProperty($oai, 'granularity')
);
// Special case where value is transformed:
$this->assertEquals(
[$config['injectHeaderElements']],
$this->getProperty($oai, 'injectHeaderElements')
);
// Unset special cases in preparation for generic loop below:
unset($config['url']);
unset($config['dateGranularity']);
unset($config['injectHeaderElements']);
// Generic case for remaining configs:
foreach ($config as $key => $value) {
$this->assertEquals($value, $this->getProperty($oai, $key));
}
}
/**
* Test the injectSetName configuration.
*
* @return void
*/
public function testInjectSetNameConfig()
{
$client = $this->getMockClient();
$response = $client->send();
$response->expects($this->any())
->method('isSuccess')
->will($this->returnValue(true));
$response->expects($this->any())
->method('getBody')
->will($this->returnValue('<?xml version="1.0"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2013-10-11T10:06:06Z</responseDate><request verb="ListSets" metadataPrefix="oai_dc" resumptionToken="" submit="Go">http://vu61162/vufind3/OAI/Server</request><ListSets><set><setSpec>Audio (Music)</setSpec><setName>Audio (Music)</setName></set><set><setSpec>Audio (Non-Music)</setSpec><setName>Audio (Non-Music)</setName></set></ListSets></OAI-PMH>'));
$config = [
'url' => 'http://localhost',
'injectSetName' => 'setnametag',
'verbose' => true,
'dateGranularity' => 'mygranularity',
];
$oai = new OAI('test', $config, $client);
$this->assertEquals(
$config['injectSetName'], $this->getProperty($oai, 'injectSetName')
);
$this->assertEquals(
[
'Audio (Music)' => 'Audio (Music)',
'Audio (Non-Music)' => 'Audio (Non-Music)'
], $this->getProperty($oai, 'setNames')
);
}
/**
* Test the sslverifypeer configuration.
*
* @return void
*/
public function testSSLVerifyPeer()
{
$client = $this->getMockClient();
$client->expects($this->once())
->method('setOptions')
->with($this->equalTo(['sslverifypeer' => false]));
$config = [
'url' => 'http://localhost',
'sslverifypeer' => false,
'dateGranularity' => 'mygranularity',
];
$oai = new OAI('test', $config, $client);
}
/**
* Test date autodetection.
*
* @return void
*/
public function testDateAutodetect()
{
$client = $this->getMockClient();
$response = $client->send();
$response->expects($this->any())
->method('isSuccess')
->will($this->returnValue(true));
$response->expects($this->any())
->method('getBody')
->will($this->returnValue($this->getIdentifyResponse()));
$config = [
'url' => 'http://localhost',
'verbose' => true,
];
$oai = new OAI('test', $config, $client);
$this->assertEquals(
'YYYY-MM-DDThh:mm:ssZ', $this->getProperty($oai, 'granularity')
);
}
/**
* Test date autodetection w/503 retry.
*
* @return void
*/
public function testDateAutodetectWith503Retry()
{
$client = $this->getMockClient();
$response = $client->send();
$response->expects($this->any())
->method('isSuccess')
->will($this->returnValue(true));
$response->expects($this->at(1))
->method('getStatusCode')
->will($this->returnValue(503));
$response->expects($this->any())
->method('getBody')
->will($this->returnValue($this->getIdentifyResponse()));
$header = $this->getMock('Zend\Http\Header\RetryAfter');
$header->expects($this->once())
->method('getDeltaSeconds')
->will($this->returnValue(1));
$headers = $response->getHeaders();
$headers->expects($this->any())
->method('get')
->with($this->equalTo('Retry-After'))
->will($this->returnValue($header));
$config = [
'url' => 'http://localhost',
'verbose' => true,
];
$oai = new OAI('test', $config, $client);
$this->assertEquals(
'YYYY-MM-DDThh:mm:ssZ', $this->getProperty($oai, 'granularity')
);
}
/**
* Test HTTP error detection.
*
* @return void
*
* @expectedException Exception
* @expectedExceptionMessage HTTP Error
*/
public function testHTTPErrorDetection()
{
$client = $this->getMockClient();
$response = $client->send();
$response->expects($this->any())
->method('isSuccess')
->will($this->returnValue(false));
$config = [
'url' => 'http://localhost',
'verbose' => true,
];
$oai = new OAI('test', $config, $client);
}
/**
/**
* Test that a missing URL throws an exception.
*
* @return void
*
* @expectedException Exception
* @expectedExceptionMessage Missing base URL for test.
*/
public function testMissingURLThrowsException()
{
$oai = new OAI('test', [], $this->getMockClient());
}
// Internal API
/**
* Get a sample Identify response
*
* @return string
*/
protected function getIdentifyResponse()
{
return '<?xml version="1.0"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2013-10-11T11:12:04Z</responseDate><request verb="Identify" submit="Go">http://fake/my/OAI/Server</request><Identify><repositoryName>myuniversity University VuFind</repositoryName><baseURL>http://fake/my/OAI/Server</baseURL><protocolVersion>2.0</protocolVersion><earliestDatestamp>2000-01-01T00:00:00Z</earliestDatestamp><deletedRecord>transient</deletedRecord><granularity>YYYY-MM-DDThh:mm:ssZ</granularity><adminEmail>libtech@myuniversity.edu</adminEmail><description><oai-identifier xmlns="http://www.openarchives.org/OAI/2.0/oai-identifier" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai-identifier http://www.openarchives.org/OAI/2.0/oai-identifier.xsd"><scheme>oai</scheme><repositoryIdentifier>fake.myuniversity.edu</repositoryIdentifier><delimiter>:</delimiter><sampleIdentifier>oai:fake.myuniversity.edu:123456</sampleIdentifier></oai-identifier></description></Identify></OAI-PMH>';
}
/**
* Get a fake HTTP client
*
* @return \Zend\Http\Client
*/
protected function getMockClient()
{
$query = $this->getMock('Zend\Stdlib\Parameters');
$request = $this->getMock('Zend\Http\Request');
$request->expects($this->any())
->method('getQuery')
->will($this->returnValue($query));
$headers = $this->getMock('Zend\Http\Headers');
$response = $this->getMock('Zend\Http\Response');
$response->expects($this->any())
->method('getHeaders')
->will($this->returnValue($headers));
$client = $this->getMock('Zend\Http\Client');
$client->expects($this->any())
->method('getRequest')
->will($this->returnValue($request));
$client->expects($this->any())
->method('setMethod')
->will($this->returnValue($client));
$client->expects($this->any())
->method('send')
->will($this->returnValue($response));
return $client;
}
}
......@@ -26,7 +26,7 @@
* @link https://vufind.org/wiki/development:plugins:controllers Wiki
*/
namespace VuFindConsole\Controller;
use VuFind\Harvester\OAI, Zend\Console\Console;
use VuFindHarvest\OaiPmh\HarvesterConsoleRunner, Zend\Console\Console;
/**
* This controller handles various command-line tools
......@@ -39,6 +39,31 @@ use VuFind\Harvester\OAI, Zend\Console\Console;
*/
class HarvestController extends AbstractBase
{
/**
* Get the base directory for harvesting OAI-PMH data.
*
* @return string
*/
protected function getHarvestRoot()
{
// Get the base VuFind path:
if (strlen(LOCAL_OVERRIDE_DIR) > 0) {
$home = LOCAL_OVERRIDE_DIR;
} else {
$home = realpath(APPLICATION_PATH . '/..');
}
// Build the full harvest path:
$dir = $home . '/harvest/';
// Create the directory if it does not already exist:
if (!is_dir($dir) && !mkdir($dir)) {
throw new \Exception("Problem creating directory {$dir}.");
}
return $dir;
}
/**
* Harvest OAI-PMH records.
*
......@@ -48,56 +73,22 @@ class HarvestController extends AbstractBase
{
$this->checkLocalSetting();
// Parse switches:
$this->consoleOpts->addRules(
['from-s' => 'Harvest start date', 'until-s' => 'Harvest end date']
);
$from = $this->consoleOpts->getOption('from');
$until = $this->consoleOpts->getOption('until');
// Read Config files
$configFile = \VuFind\Config\Locator::getConfigPath('oai.ini', 'harvest');
$oaiSettings = @parse_ini_file($configFile, true);
if (empty($oaiSettings)) {
Console::writeLine("Please add OAI-PMH settings to oai.ini.");
return $this->getFailureResponse();
}
// If first command line parameter is set, see if we can limit to just the
// specified OAI harvester:
$argv = $this->consoleOpts->getRemainingArgs();
if (isset($argv[0])) {
if (isset($oaiSettings[$argv[0]])) {
$oaiSettings = [$argv[0] => $oaiSettings[$argv[0]]];
} else {
Console::writeLine("Could not load settings for {$argv[0]}.");
return $this->getFailureResponse();
}
// Get default options, add the default --ini setting if missing:
$opts = HarvesterConsoleRunner::getDefaultOptions();
if (!$opts->getOption('ini')) {
$ini = \VuFind\Config\Locator::getConfigPath('oai.ini', 'harvest');
$opts->addArguments(['--ini=' . $ini]);
}
// Loop through all the settings and perform harvests:
$processed = 0;
foreach ($oaiSettings as $target => $settings) {
if (!empty($target) && !empty($settings)) {
Console::writeLine("Processing {$target}...");
try {
$client = $this->getServiceLocator()->get('VuFind\Http')
->createClient();
$harvest = new OAI($target, $settings, $client, $from, $until);
$harvest->launch();
} catch (\Exception $e) {
Console::writeLine($e->getMessage());
return $this->getFailureResponse();
}
$processed++;
}
}
// Get the default VuFind HTTP client:
$client = $this->getServiceLocator()->get('VuFind\Http')->createClient();
// All done.
Console::writeLine(
"Completed without errors -- {$processed} source(s) processed."
// Run the job!
$runner = new HarvesterConsoleRunner(
$opts, $client, $this->getHarvestRoot()
);
return $this->getSuccessResponse();
return $runner->run()
? $this->getSuccessResponse() : $this->getFailureResponse();
}
/**
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment