Skip to content
Snippets Groups Projects
Commit 5f2c9352 authored by Demian Katz's avatar Demian Katz
Browse files

Added last_state.txt for resuming an interrupted harvest.

parent 13e6a86b
No related merge requests found
......@@ -125,6 +125,14 @@ class OAI
*/
protected $lastHarvestFile;
/**
* File for tracking last harvest state (for continuing interrupted
* connection).
*
* @var string
*/
protected $lastStateFile;
/**
* Harvest end date (null for no specific end)
*
......@@ -267,6 +275,7 @@ class OAI
// Check if there is a file containing a start date:
$this->lastHarvestFile = $this->basePath . 'last_harvest.txt';
$this->lastStateFile = $this->basePath . 'last_state.txt';
// Set up start/end dates:
$this->setStartDate(empty($from) ? $this->loadLastHarvestedDate() : $from);
......@@ -322,17 +331,46 @@ class OAI
$sets = [null];
}
// Load last state, if applicable (used to recover from server failure).
if (file_exists($this->lastStateFile)) {
$this->write("Found {$this->lastStateFile}; attempting to resume.\n");
list($resumeSet, $resumeToken, $this->startDate)
= explode("\t", file_get_contents($this->lastStateFile));
}
// Loop through all of the selected sets:
foreach ($sets as $set) {
// Start harvesting at the requested date:
$token = $this
->getRecordsByDate($this->startDate, $set, $this->harvestEndDate);
// If we're resuming and there are multiple sets, find the right one.
if (isset($resumeToken) && $resumeSet != $set) {
continue;
}
// If we have a token to resume from, pick up there now...
if (isset($resumeToken)) {
$token = $resumeToken;
unset($resumeToken);
} else {
// ...otherwise, start harvesting at the requested date:
$token = $this->getRecordsByDate(
$this->startDate, $set, $this->harvestEndDate
);
}
// Keep harvesting as long as a resumption token is provided:
while ($token !== false) {
// Save current state in case we need to resume later:
file_put_contents(
$this->lastStateFile, "$set\t$token\t{$this->startDate}"
);
$token = $this->getRecordsByToken($token);
}
}
// If we made it this far, all was successful, so we should clean up
// the "last state" file.
if (file_exists($this->lastStateFile)) {
unlink($this->lastStateFile);
}
}
/**
......@@ -529,6 +567,17 @@ class OAI
// Detect errors and die if one is found:
if ($result->error) {
$attribs = $result->error->attributes();
// If this is a bad resumption token error and we're trying to
// restore a prior state, we should clean up.
if ($attribs['code'] == 'badResumptionToken'
&& file_exists($this->lastStateFile)
) {
unlink($this->lastStateFile);
throw new \Exception(
"Token expired; removing last_state.txt. Please restart harvest."
);
}
throw new \Exception(
"OAI-PMH error -- code: {$attribs['code']}, " .
"value: {$result->error}"
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment