diff --git a/harvest/batch-delete.bat b/harvest/batch-delete.bat
new file mode 100644
index 0000000000000000000000000000000000000000..d73276833546d80a63902b6fa2e93b8af6de9bc0
--- /dev/null
+++ b/harvest/batch-delete.bat
@@ -0,0 +1,67 @@
+@echo off
+rem Make sure that environment edits are local and that we have access to the 
+rem Windows command extensions.
+setlocal enableextensions
+if not errorlevel 1 goto extensionsokay
+echo Unable to enable Windows command extensions.
+goto end
+:extensionsokay
+
+rem Make sure VUFIND_HOME is set:
+if not "!%VUFIND_HOME%!"=="!!" goto vufindhomefound
+rem VUFIND_HOME not set -- try to call vufind.bat to 
+rem fix the problem before we give up completely
+if exist %0\..\..\vufind.bat goto usevufindbat
+rem If vufind.bat doesn't exist, the user hasn't run install.bat yet.
+echo ERROR: vufind.bat does not exist -- could not set up environment.
+echo Please run install.bat to correct this problem.
+goto end
+:usevufindbat
+cd %0\..\..
+call vufind > nul
+cd %0\..
+if not "!%VUFIND_HOME%!"=="!!" goto vufindhomefound
+echo You need to set the VUFIND_HOME environmental variable before running this script.
+goto end
+:vufindhomefound
+
+rem Make sure command line parameter was included:
+if not "!%1!"=="!!" goto paramsokay
+echo This script deletes records based on files created by the OAI-PMH harvester.
+echo.
+echo Usage: %0 [harvest subdirectory] [index type]
+echo.
+echo [harvest subdirectory] is a directory name created by the OAI-PMH harvester.
+echo This script will search the harvest subdirectories of the directories defined
+echo by the VUFIND_LOCAL_DIR and VUFIND_HOME environment variables.
+echo.
+echo [index type] is optional; defaults to Solr for main bibliographic index, but
+echo can be set to SolrAuth for authority index.
+echo.
+echo Example: %0 oai_source
+goto end
+:paramsokay
+
+rem Check if the path is valid:
+set BASEPATH="%VUFIND_LOCAL_DIR%\harvest\%1"
+if exist %BASEPATH% goto basepathfound
+set BASEPATH="%VUFIND_HOME%\harvest\%1"
+if exist %BASEPATH% goto basepathfound
+echo Directory %BASEPATH% does not exist!
+goto end
+:basepathfound
+
+rem Create log/processed directories as needed:
+if exist %BASEPATH%\processed goto processedfound
+md %BASEPATH%\processed
+:processedfound
+
+rem Process all the files in the target directory:
+cd %VUFIND_HOME%\util
+for %%a in (%BASEPATH%\*.delete) do (
+  echo Processing %%a...
+  php deletes.php %%a flat %2
+  move %%a %BASEPATH%\processed\ > nul
+)
+
+:end
\ No newline at end of file
diff --git a/harvest/batch-delete.sh b/harvest/batch-delete.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d0f14464c4b1ff342c59904a80c0be0b0c4c62cf
--- /dev/null
+++ b/harvest/batch-delete.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+
+# Make sure VUFIND_HOME is set:
+if [ -z "$VUFIND_HOME" ]
+then
+  echo "Please set the VUFIND_HOME environment variable."
+  exit 1
+fi
+
+# Make sure command line parameter was included:
+if [ -z "$1" ]
+then
+  echo "This script deletes records based on files created by the OAI-PMH harvester.";
+  echo ""
+  echo "Usage: `basename $0` [harvest subdirectory] [index type]"
+  echo ""
+  echo "[harvest subdirectory] is a directory name created by the OAI-PMH harvester."
+  echo "This script will search the harvest subdirectories of the directories defined"
+  echo "by the VUFIND_LOCAL_DIR and VUFIND_HOME environment variables."
+  echo ""
+  echo "[index type] is optional; defaults to Solr for main bibliographic index, but"
+  echo "can be set to SolrAuth for authority index."
+  echo ""
+  echo "Example: `basename $0` oai_source"
+  exit 1
+fi
+
+# Check if the path is valid:
+BASEPATH="$VUFIND_LOCAL_DIR/harvest/$1"
+if [ ! -d $BASEPATH ]
+then
+  BASEPATH="$VUFIND_HOME/harvest/$1"
+fi
+if [ ! -d $BASEPATH ]
+then
+  echo "Directory $BASEPATH does not exist!"
+  exit 1
+fi
+
+# Create log/processed directories as needed:
+if [ ! -d $BASEPATH/processed ]
+then
+  mkdir $BASEPATH/processed
+fi
+
+# Process all the files in the target directory:
+cd $VUFIND_HOME/util
+for file in $BASEPATH/*.delete
+do
+  if [ -f $file ]
+  then
+    echo "Processing $file ..."
+    php deletes.php $file flat $2
+    mv $file $BASEPATH/processed/`basename $file`
+  fi
+done
diff --git a/harvest/batch-import-marc-auth.bat b/harvest/batch-import-marc-auth.bat
new file mode 100644
index 0000000000000000000000000000000000000000..6532044cf35ce46dff706df5a8919b26272dbea1
--- /dev/null
+++ b/harvest/batch-import-marc-auth.bat
@@ -0,0 +1,66 @@
+@echo off
+rem Make sure that environment edits are local and that we have access to the 
+rem Windows command extensions.
+setlocal enableextensions
+if not errorlevel 1 goto extensionsokay
+echo Unable to enable Windows command extensions.
+goto end
+:extensionsokay
+
+rem Make sure VUFIND_HOME is set:
+if not "!%VUFIND_HOME%!"=="!!" goto vufindhomefound
+rem VUFIND_HOME not set -- try to call vufind.bat to 
+rem fix the problem before we give up completely
+if exist %0\..\..\vufind.bat goto usevufindbat
+rem If vufind.bat doesn't exist, the user hasn't run install.bat yet.
+echo ERROR: vufind.bat does not exist -- could not set up environment.
+echo Please run install.bat to correct this problem.
+goto end
+:usevufindbat
+cd %0\..\..
+call vufind > nul
+cd %0\..
+if not "!%VUFIND_HOME%!"=="!!" goto vufindhomefound
+echo You need to set the VUFIND_HOME environmental variable before running this script.
+goto end
+:vufindhomefound
+
+rem Make sure command line parameter was included:
+if not "!%2!"=="!!" goto paramsokay
+echo This script processes a batch of harvested authority records.
+echo.
+echo Usage: %0 [harvest subdirectory] [SolrMarc properties file]
+echo.
+echo [harvest subdirectory] is a directory name created by the OAI-PMH harvester.
+echo This script will search the harvest subdirectories of the directories defined
+echo by the VUFIND_LOCAL_DIR and VUFIND_HOME environment variables.
+echo.
+echo Example: %0 lcnaf marc_lcnaf.properties
+goto end
+:paramsokay
+
+rem Check if the path is valid:
+set BASEPATH="%VUFIND_LOCAL_DIR%\harvest\%1"
+if exist %BASEPATH% goto basepathfound
+set BASEPATH="%VUFIND_HOME%\harvest\%1"
+if exist %BASEPATH% goto basepathfound
+echo Directory %BASEPATH% does not exist!
+goto end
+:basepathfound
+
+rem Create log/processed directories as needed:
+if exist %BASEPATH%\log goto logfound
+md %BASEPATH%\log
+:logfound
+if exist %BASEPATH%\processed goto processedfound
+md %BASEPATH%\processed
+:processedfound
+
+rem Process all the files in the target directory:
+for %%a in (%BASEPATH%\*.xml %BASEPATH%\*.mrc) do (
+  echo Processing %%a...
+  call %VUFIND_HOME%\import-marc-auth.bat %%a %2 > %BASEPATH%\log\%%~nxa.log
+  move %%a %BASEPATH%\processed\ > nul
+)
+
+:end
\ No newline at end of file
diff --git a/harvest/batch-import-marc-auth.sh b/harvest/batch-import-marc-auth.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3537a146eb097b790d1e6d7338a059aa5e4cb818
--- /dev/null
+++ b/harvest/batch-import-marc-auth.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+
+# Make sure VUFIND_HOME is set:
+if [ -z "$VUFIND_HOME" ]
+then
+  echo "Please set the VUFIND_HOME environment variable."
+  exit 1
+fi
+
+# Make sure command line parameter was included:
+if [ -z "$2" ]
+then
+  echo "This script processes a batch of harvested authority records."
+  echo ""
+  echo "Usage: `basename $0` [harvest subdirectory] [SolrMarc properties file]"
+  echo ""
+  echo "[harvest subdirectory] is a directory name created by the OAI-PMH harvester."
+  echo "This script will search the harvest subdirectories of the directories defined"
+  echo "by the VUFIND_LOCAL_DIR and VUFIND_HOME environment variables."
+  echo ""
+  echo "Example: `basename $0` lcnaf marc_lcnaf.properties"
+  exit 1
+fi
+
+# Check if the path is valid:
+BASEPATH="$VUFIND_LOCAL_DIR/harvest/$1"
+if [ ! -d $BASEPATH ]
+then
+  BASEPATH="$VUFIND_HOME/harvest/$1"
+fi
+if [ ! -d $BASEPATH ]
+then
+  echo "Directory $BASEPATH does not exist!"
+  exit 1
+fi
+
+# Create log/processed directories as needed:
+if [ ! -d $BASEPATH/log ]
+then
+  mkdir $BASEPATH/log
+fi
+if [ ! -d $BASEPATH/processed ]
+then
+  mkdir $BASEPATH/processed
+fi
+
+# Process all the files in the target directory:
+for file in $BASEPATH/*.xml $BASEPATH/*.mrc
+do
+  if [ -f $file ]
+  then
+    echo "Processing $file ..."
+    $VUFIND_HOME/import-marc-auth.sh $file $2 > $BASEPATH/log/`basename $file`.log
+    mv $file $BASEPATH/processed/`basename $file`
+  fi
+done
diff --git a/harvest/batch-import-marc.bat b/harvest/batch-import-marc.bat
new file mode 100644
index 0000000000000000000000000000000000000000..6dda9378e7d7634947769d9f2b81a575004dd7d5
--- /dev/null
+++ b/harvest/batch-import-marc.bat
@@ -0,0 +1,66 @@
+@echo off
+rem Make sure that environment edits are local and that we have access to the 
+rem Windows command extensions.
+setlocal enableextensions
+if not errorlevel 1 goto extensionsokay
+echo Unable to enable Windows command extensions.
+goto end
+:extensionsokay
+
+rem Make sure VUFIND_HOME is set:
+if not "!%VUFIND_HOME%!"=="!!" goto vufindhomefound
+rem VUFIND_HOME not set -- try to call vufind.bat to 
+rem fix the problem before we give up completely
+if exist %0\..\..\vufind.bat goto usevufindbat
+rem If vufind.bat doesn't exist, the user hasn't run install.bat yet.
+echo ERROR: vufind.bat does not exist -- could not set up environment.
+echo Please run install.bat to correct this problem.
+goto end
+:usevufindbat
+cd %0\..\..
+call vufind > nul
+cd %0\..
+if not "!%VUFIND_HOME%!"=="!!" goto vufindhomefound
+echo You need to set the VUFIND_HOME environmental variable before running this script.
+goto end
+:vufindhomefound
+
+rem Make sure command line parameter was included:
+if not "!%1!"=="!!" goto paramsokay
+echo This script processes a batch of harvested MARC records.
+echo.
+echo Usage: %0 [harvest subdirectory]
+echo.
+echo [harvest subdirectory] is a directory name created by the OAI-PMH harvester.
+echo This script will search the harvest subdirectories of the directories defined
+echo by the VUFIND_LOCAL_DIR and VUFIND_HOME environment variables.
+echo.
+echo Example: %0 oai_source
+goto end
+:paramsokay
+
+rem Check if the path is valid:
+set BASEPATH="%VUFIND_LOCAL_DIR%\harvest\%1"
+if exist %BASEPATH% goto basepathfound
+set BASEPATH="%VUFIND_HOME%\harvest\%1"
+if exist %BASEPATH% goto basepathfound
+echo Directory %BASEPATH% does not exist!
+goto end
+:basepathfound
+
+rem Create log/processed directories as needed:
+if exist %BASEPATH%\log goto logfound
+md %BASEPATH%\log
+:logfound
+if exist %BASEPATH%\processed goto processedfound
+md %BASEPATH%\processed
+:processedfound
+
+rem Process all the files in the target directory:
+for %%a in (%BASEPATH%\*.xml %BASEPATH%\*.mrc) do (
+  echo Processing %%a...
+  call %VUFIND_HOME%\import-marc.bat %%a > %BASEPATH%\log\%%~nxa.log
+  move %%a %BASEPATH%\processed\ > nul
+)
+
+:end
\ No newline at end of file
diff --git a/harvest/batch-import-marc.sh b/harvest/batch-import-marc.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ed8299f63314962a982200bb195ccdb6e3171acb
--- /dev/null
+++ b/harvest/batch-import-marc.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+
+# Make sure VUFIND_HOME is set:
+if [ -z "$VUFIND_HOME" ]
+then
+  echo "Please set the VUFIND_HOME environment variable."
+  exit 1
+fi
+
+# Make sure command line parameter was included:
+if [ -z "$1" ]
+then
+  echo "This script processes a batch of harvested MARC records."
+  echo ""
+  echo "Usage: `basename $0` [harvest subdirectory]"
+  echo ""
+  echo "[harvest subdirectory] is a directory name created by the OAI-PMH harvester."
+  echo "This script will search the harvest subdirectories of the directories defined"
+  echo "by the VUFIND_LOCAL_DIR and VUFIND_HOME environment variables."
+  echo ""
+  echo "Example: `basename $0` oai_source"
+  exit 1
+fi
+
+# Check if the path is valid:
+BASEPATH="$VUFIND_LOCAL_DIR/harvest/$1"
+if [ ! -d $BASEPATH ]
+then
+  BASEPATH="$VUFIND_HOME/harvest/$1"
+fi
+if [ ! -d $BASEPATH ]
+then
+  echo "Directory $BASEPATH does not exist!"
+  exit 1
+fi
+
+# Create log/processed directories as needed:
+if [ ! -d $BASEPATH/log ]
+then
+  mkdir $BASEPATH/log
+fi
+if [ ! -d $BASEPATH/processed ]
+then
+  mkdir $BASEPATH/processed
+fi
+
+# Process all the files in the target directory:
+for file in $BASEPATH/*.xml $BASEPATH/*.mrc
+do
+  if [ -f $file ]
+  then
+    echo "Processing $file ..."
+    $VUFIND_HOME/import-marc.sh $file > $BASEPATH/log/`basename $file`.log
+    mv $file $BASEPATH/processed/`basename $file`
+  fi
+done
diff --git a/harvest/batch-import-xsl.bat b/harvest/batch-import-xsl.bat
new file mode 100644
index 0000000000000000000000000000000000000000..c057810833cbf6754a00f9019abc2ce65b14b5d3
--- /dev/null
+++ b/harvest/batch-import-xsl.bat
@@ -0,0 +1,84 @@
+@echo off
+rem Make sure that environment edits are local and that we have access to the 
+rem Windows command extensions.
+setlocal enableextensions
+if not errorlevel 1 goto extensionsokay
+echo Unable to enable Windows command extensions.
+goto end
+:extensionsokay
+
+rem Make sure VUFIND_HOME is set:
+if not "!%VUFIND_HOME%!"=="!!" goto vufindhomefound
+rem VUFIND_HOME not set -- try to call vufind.bat to 
+rem fix the problem before we give up completely
+if exist %0\..\..\vufind.bat goto usevufindbat
+rem If vufind.bat doesn't exist, the user hasn't run install.bat yet.
+echo ERROR: vufind.bat does not exist -- could not set up environment.
+echo Please run install.bat to correct this problem.
+goto end
+:usevufindbat
+cd %0\..\..
+call vufind > nul
+cd %0\..
+if not "!%VUFIND_HOME%!"=="!!" goto vufindhomefound
+echo You need to set the VUFIND_HOME environmental variable before running this script.
+goto end
+:vufindhomefound
+
+rem Make sure command line parameter was included:
+if not "!%2!"=="!!" goto paramsokay
+echo This script processes a batch of harvested XML records using the specified XSL
+echo import configuration file.
+echo.
+echo Usage: %0 [harvest subdirectory] [properties file]
+echo.
+echo [harvest subdirectory] is a directory name created by the OAI-PMH harvester.
+echo This script will search the harvest subdirectories of the directories defined
+echo by the VUFIND_LOCAL_DIR and VUFIND_HOME environment variables.
+echo.
+echo [properties file] is a configuration file found in the import subdirectory of
+echo either your VUFIND_LOCAL_DIR or VUFIND_HOME directory.
+echo.
+echo Example: %0 oai_source ojs.properties
+goto end
+:paramsokay
+
+rem Check if the path is valid:
+set BASEPATH="%VUFIND_LOCAL_DIR%\harvest\%1"
+if exist %BASEPATH% goto basepathfound
+set BASEPATH="%VUFIND_HOME%\harvest\%1"
+if exist %BASEPATH% goto basepathfound
+echo Directory %BASEPATH% does not exist!
+goto end
+:basepathfound
+
+rem Create log/processed directories as needed:
+if exist %BASEPATH%\processed goto processedfound
+md %BASEPATH%\processed
+:processedfound
+
+rem Flag -- do we need to perform an optimize?
+set OPTIMIZE=0
+
+rem Process all the files in the target directory:
+cd %VUFIND_HOME%\import
+for %%a in (%BASEPATH%\*.xml) do (
+  echo Processing %%a...
+  php import-xsl.php %%a %2
+  rem Unfortunately, PHP doesn't seem to set apropriate errorlevels, so error
+  rem detection doesn't work under Windows like it does under Linux... however,
+  rem this code is retained in case PHP's behavior improves in the future!
+  if errorlevel 0 (
+    move %%a %BASEPATH%\processed\ > nul
+    rem We processed a file, so we need to optimize later on:
+    set OPTIMIZE=1
+  )
+)
+
+rem Optimize the index now that we are done (if necessary):
+if not "%OPTIMIZE%!"=="1!" goto end
+cd %VUFIND_HOME%\util
+echo Optimizing index...
+php optimize.php
+
+:end
\ No newline at end of file
diff --git a/harvest/batch-import-xsl.sh b/harvest/batch-import-xsl.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3ef97d235ea43a071d036f1b9d377e5440592d0d
--- /dev/null
+++ b/harvest/batch-import-xsl.sh
@@ -0,0 +1,74 @@
+#!/bin/sh
+
+# Make sure VUFIND_HOME is set:
+if [ -z "$VUFIND_HOME" ]
+then
+  echo "Please set the VUFIND_HOME environment variable."
+  exit 1
+fi
+
+# Make sure command line parameter was included:
+if [ -z "$2" ]
+then
+  echo "This script processes a batch of harvested XML records using the specified XSL"
+  echo "import configuration file."
+  echo ""
+  echo "Usage: `basename $0` [harvest subdirectory] [properties file]"
+  echo ""
+  echo "[harvest subdirectory] is a directory name created by the OAI-PMH harvester."
+  echo "This script will search the harvest subdirectories of the directories defined"
+  echo "by the VUFIND_LOCAL_DIR and VUFIND_HOME environment variables."
+  echo ""
+  echo "[properties file] is a configuration file found in the import subdirectory of"
+  echo "either your VUFIND_LOCAL_DIR or VUFIND_HOME directory."
+  echo ""
+  echo "Example: `basename $0` oai_source ojs.properties"
+  exit 1
+fi
+
+# Check if the path is valid:
+BASEPATH="$VUFIND_LOCAL_DIR/harvest/$1"
+if [ ! -d $BASEPATH ]
+then
+  BASEPATH="$VUFIND_HOME/harvest/$1"
+fi
+if [ ! -d $BASEPATH ]
+then
+  echo "Directory $BASEPATH does not exist!"
+  exit 1
+fi
+
+# Create log/processed directories as needed:
+if [ ! -d $BASEPATH/processed ]
+then
+  mkdir $BASEPATH/processed
+fi
+
+# Flag -- do we need to perform an optimize?
+OPTIMIZE=0
+
+# Process all the files in the target directory:
+cd $VUFIND_HOME/import
+for file in $BASEPATH/*.xml
+do
+  if [ -f $file ]
+  then
+    echo "Processing $file ..."
+    php import-xsl.php $file $2
+    # Only move the file into the "processed" folder if processing was successful:
+    if [ "$?" -eq "0" ]
+    then
+      mv $file $BASEPATH/processed/`basename $file`
+      # We processed a file, so we need to optimize later on:
+      OPTIMIZE=1
+    fi
+  fi
+done
+
+# Optimize the index now that we are done (if necessary):
+if [ "$OPTIMIZE" -eq "1" ]
+then
+  cd $VUFIND_HOME/util
+  echo "Optimizing index..."
+  php optimize.php
+fi
diff --git a/harvest/harvest_naf.php b/harvest/harvest_naf.php
new file mode 100644
index 0000000000000000000000000000000000000000..87b27c5c74c9ee54380436c73822d77781b218a0
--- /dev/null
+++ b/harvest/harvest_naf.php
@@ -0,0 +1,32 @@
+<?php
+/**
+ * Tool to harvest Library of Congress Name Authority File from OCLC.
+ *
+ * PHP version 5
+ *
+ * Copyright (c) Demian Katz 2010.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * @category VuFind2
+ * @package  Harvest_Tools
+ * @author   Demian Katz <demian.katz@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     http://vufind.org/wiki/authority_control Wiki
+ */
+
+// Load the Zend framework -- this will automatically trigger the appropriate
+// controller action based on directory and file names
+define('CLI_DIR', __DIR__);     // save directory name of current script
+require_once __DIR__ . '/../public/index.php';
\ No newline at end of file
diff --git a/harvest/harvest_oai.php b/harvest/harvest_oai.php
new file mode 100644
index 0000000000000000000000000000000000000000..60a8e1d910b10ea2aa63e4658a8f656850d85b40
--- /dev/null
+++ b/harvest/harvest_oai.php
@@ -0,0 +1,32 @@
+<?php
+/**
+ * OAI-PMH Harvest Tool
+ *
+ * PHP version 5
+ *
+ * Copyright (c) Demian Katz 2010.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * @category VuFind2
+ * @package  Harvest_Tools
+ * @author   Demian Katz <demian.katz@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     http://vufind.org/wiki/importing_records#oai-pmh_harvesting Wiki
+ */
+
+// Load the Zend framework -- this will automatically trigger the appropriate
+// controller action based on directory and file names
+define('CLI_DIR', __DIR__);     // save directory name of current script
+require_once __DIR__ . '/../public/index.php';
\ No newline at end of file
diff --git a/harvest/oai.ini b/harvest/oai.ini
new file mode 100644
index 0000000000000000000000000000000000000000..67de45b3729a6ee02d0a157df0cea503267a6e2b
--- /dev/null
+++ b/harvest/oai.ini
@@ -0,0 +1,82 @@
+; oai.ini -- OAI-PMH harvest settings.
+;
+; For every OAI-PMH source you would like to harvest, create a section like this:
+;
+; [section_name]
+; url = http://oai.myuniversity.edu/
+; set = my_optional_set
+; metadataPrefix = oai_dc
+; idSearch[] = "/oai:myuniversity.edu:/"
+; idReplace[] = "myprefix-"
+; injectDate = false
+; injectId = false
+; injectSetName = false
+; injectSetSpec = false
+; dateGranularity = auto
+; harvestedIdLog = harvest.log
+; verbose = false
+;
+; The section_name may be passed to harvest_oai.php as a parameter to harvest only
+; records from that source.  This is also the directory name that records will be
+; harvested into (a subdirectory of "harvest" under VUFIND_HOME).
+;
+; url is the base URL for the OAI-PMH source.
+;
+; set is the identifier of a set to harvest (normally found in the <setSpec> tag of
+; an OAI-PMH ListSets response).  Omit this setting to harvest all records.
+;
+; metadataPrefix is the metadata format to harvest (oai_dc will be used by default
+; if the value is omitted).
+;
+; idPrefix is the OAI-specific prefix attached to ID values.  If you provide the
+; value here, it will be automatically stripped for you when generating filenames,
+; injecting IDs and tracking deleted records.  If you omit the setting, full
+; OAI ids will be retained. [DEPRECATED -- use idSearch and idReplace instead]
+;
+; idSearch[] and idReplace[] may be used to manipulate IDs with regular expressions.
+; This is useful for adding or removing prefixes and swapping out problematic
+; characters.  You may use multiple pairs of search and replace settings to perform
+; multiple manipulations.
+;
+; injectDate may be set to an XML tag name in order to inject the datestamp of
+; the record into the harvested metadata (enclosed in the specified tag).  If
+; omitted or set to false, no datestamp-related changes will be made to the
+; harvested metadata.
+;
+; injectId may be set to an XML tag name in order to inject the ID of the record
+; into the harvested metadata (enclosed in the specified tag).  If omitted or set
+; to false, no ID-related changes will be made to the harvested metadata.
+;
+; injectSetName may be set to an XML tag name in order to inject the setName value
+; of the record into the harvested metadata (enclosed in the specified tag).  If
+; omitted or set to false, no setName-related changes will be made to the harvested
+; metadata.
+;
+; injectSetSpec may be set to an XML tag name in order to inject the setSpec value
+; of the record into the harvested metadata (enclosed in the specified tag).  If
+; omitted or set to false, no setSpec-related changes will be made to the harvested
+; metadata.
+;
+; dateGranularity is the granularity used by the server for representing dates.
+; This may be "YYYY-MM-DDThh:mm:ssZ," "YYYY-MM-DD" or "auto" (to query the server
+; for details).  The default is "auto."
+;
+; harvestedIdLog is a filename (inside your harvest directory) for a text file
+; listing all non-deleted harvested records encountered.  If you omit this setting,
+; no log file will be generated.  Subsequent harvests will append to the file if
+; it already exists.
+;
+; verbose may be set to true in order to display more detailed output while
+; harvesting; this may be useful for troubleshooting purposes, but it defaults to
+; false.
+
+; SAMPLE CONFIGURATION FOR OPEN JOURNAL SYSTEMS
+;[OJS]
+;url = http://ojs.myuniversity.edu/oai
+;metadataPrefix = oai_dc
+;idSearch[] = "/^oai:myuniversity.edu:/"
+;idReplace[] = "ojs-"
+;idSearch[] = "/\//"
+;idReplace[] = "-"
+;injectId = "identifier"
+;injectDate = "datestamp"
\ No newline at end of file
diff --git a/module/VuFind/CLI/Module.php b/module/VuFind/CLI/Module.php
index d9de28cf96ae73454a7cf9bf63d70fd6e854a990..5b3c9359ad452e7bd8eca6a73782fd6668ac9c9b 100644
--- a/module/VuFind/CLI/Module.php
+++ b/module/VuFind/CLI/Module.php
@@ -33,9 +33,9 @@ class Module
             $filename = $args[0];
             $pwd = $server->get('PWD', CLI_DIR);
 
-            // Convert base filename (minus .php extension) and containing directory
-            // name into action and controller, respectively:
-            $baseFilename = basename($filename);
+            // Convert base filename (minus .php extension and underscores) and
+            // containing directory name into action and controller, respectively:
+            $baseFilename = str_replace('_', '', basename($filename));
             $baseFilename = substr($baseFilename, 0, strlen($baseFilename) - 4);
             $baseDirname = basename(dirname(realpath($pwd . '/' . $filename)));
             $routeMatch = new RouteMatch(
diff --git a/module/VuFind/CLI/config/module.config.php b/module/VuFind/CLI/config/module.config.php
index 92e27c0d7250e30c700746ac737148eddd871b0b..512418d341dfbb27bfd68b487652cb4080c53f27 100644
--- a/module/VuFind/CLI/config/module.config.php
+++ b/module/VuFind/CLI/config/module.config.php
@@ -4,6 +4,7 @@ namespace VuFind\CLI\Module\Configuration;
 $config = array(
     'controllers' => array(
         'invokables' => array(
+            'harvest' => 'VuFind\CLI\Controller\HarvestController',
             'import' => 'VuFind\CLI\Controller\ImportController',
         ),
     ),
diff --git a/module/VuFind/src/VuFind/CLI/Controller/HarvestController.php b/module/VuFind/src/VuFind/CLI/Controller/HarvestController.php
new file mode 100644
index 0000000000000000000000000000000000000000..a572c8f6e47a4c10eb54c88931fe0fb3842f2152
--- /dev/null
+++ b/module/VuFind/src/VuFind/CLI/Controller/HarvestController.php
@@ -0,0 +1,116 @@
+<?php
+/**
+ * CLI Controller Module
+ *
+ * PHP version 5
+ *
+ * Copyright (C) Villanova University 2010.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * @category VuFind2
+ * @package  Controller
+ * @author   Chris Hallberg <challber@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     http://vufind.org/wiki/building_a_recommendations_module Wiki
+ */
+namespace VuFind\CLI\Controller;
+use VuFind\Config\Reader as ConfigReader, VuFind\Harvester\NAF, VuFind\Harvester\OAI;
+
+/**
+ * This controller handles various command-line tools
+ *
+ * @category VuFind2
+ * @package  Controller
+ * @author   Chris Hallberg <challber@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     http://vufind.org/wiki/building_a_recommendations_module Wiki
+ */
+class HarvestController extends AbstractBase
+{
+    /**
+     * Harvest the LC Name Authority File.
+     *
+     * @return void
+     */
+    public function harvestnafAction()
+    {
+        $this->checkLocalSetting();
+
+        // Perform the harvest. Note that first command line parameter
+        // may be used to start at a particular date.
+        try {
+            $harvest = new NAF();
+            $argv = $this->consoleOpts->getRemainingArgs();
+            if (isset($argv[0])) {
+                $harvest->setStartDate($argv[0]);
+            }
+            $harvest->launch();
+        } catch (\Exception $e) {
+            echo $e->getMessage() . "\n";
+            return $this->getFailureResponse();
+        }
+        return $this->getSuccessResponse();
+    }
+
+    /**
+     * Harvest OAI-PMH records.
+     *
+     * @return void
+     */
+    public function harvestoaiAction()
+    {
+        $this->checkLocalSetting();
+
+        // Read Config files
+        $configFile = ConfigReader::getConfigPath('oai.ini', 'harvest');
+        $oaiSettings = @parse_ini_file($configFile, true);
+        if (empty($oaiSettings)) {
+            echo "Please add OAI-PMH settings to oai.ini.\n";
+            return $this->getFailureResponse();
+        }
+
+        // If first command line parameter is set, see if we can limit to just the
+        // specified OAI harvester:
+        $argv = $this->consoleOpts->getRemainingArgs();
+        if (isset($argv[0])) {
+            if (isset($oaiSettings[$argv[0]])) {
+                $oaiSettings = array($argv[0] => $oaiSettings[$argv[0]]);
+            } else {
+                echo "Could not load settings for {$argv[0]}.\n";
+                return $this->getFailureResponse();
+            }
+        }
+
+        // Loop through all the settings and perform harvests:
+        $processed = 0;
+        foreach ($oaiSettings as $target => $settings) {
+            if (!empty($target) && !empty($settings)) {
+                echo "Processing {$target}...\n";
+                try {
+                    $harvest = new OAI($target, $settings);
+                    $harvest->launch();
+                } catch (\Exception $e) {
+                    echo $e->getMessage() . "\n";
+                    return $this->getFailureResponse();
+                }
+                $processed++;
+            }
+        }
+
+        // All done.
+        echo "Completed without errors -- {$processed} source(s) processed.\n";
+        return $this->getSuccessResponse();
+    }
+}
diff --git a/module/VuFind/src/VuFind/Harvester/NAF.php b/module/VuFind/src/VuFind/Harvester/NAF.php
new file mode 100644
index 0000000000000000000000000000000000000000..f8a944abd0806f4db6c19b5aa8039adfdbc2ef6f
--- /dev/null
+++ b/module/VuFind/src/VuFind/Harvester/NAF.php
@@ -0,0 +1,525 @@
+<?php
+/**
+ * Tool to harvest Library of Congress Name Authority File from OCLC.
+ *
+ * PHP version 5
+ *
+ * Copyright (c) Demian Katz 2010.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * @category VuFind2
+ * @package  Harvest_Tools
+ * @author   Demian Katz <demian.katz@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     http://vufind.org/wiki/authority_control Wiki
+ */
+namespace VuFind\Harvester;
+use VuFind\Connection\SRU;
+
+/**
+ * NAF Class
+ *
+ * This class harvests OCLC's Name Authority File to MARC-XML documents on the
+ * local disk.
+ *
+ * @category VuFind2
+ * @package  Harvest_Tools
+ * @author   Demian Katz <demian.katz@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     http://vufind.org/wiki/authority_control Wiki
+ */
+class NAF
+{
+    protected $sru;               // SRU connection
+    protected $basePath;          // Directory for storing harvested files
+    protected $lastHarvestFile;   // File for tracking last harvest date
+
+    // Start scanning at an arbitrary date known to be earlier than the
+    // oldest possible document.
+    protected $startDate = '1900-01-01';
+
+    /**
+     * Constructor.
+     */
+    public function __construct()
+    {
+        // Don't time out during harvest!!
+        set_time_limit(0);
+
+        // Set up base directory for harvested files:
+        if (strlen(LOCAL_OVERRIDE_DIR) > 0) {
+            $home = LOCAL_OVERRIDE_DIR;
+        } else {
+            $home = realpath(APPLICATION_PATH . '/..');
+        }
+        $this->basePath = $home . '/harvest/lcnaf/';
+        if (!is_dir($this->basePath)) {
+            if (!mkdir($this->basePath)) {
+                throw new \Exception("Problem creating directory {$this->basePath}.");
+            }
+        }
+
+        // Check if there is a file containing a start date:
+        $this->lastHarvestFile = $this->basePath . 'last_harvest.txt';
+        $this->loadLastHarvestedDate();
+
+        // Set up SRU connection:
+        $this->sru = new SRU('http://alcme.oclc.org/srw/search/lcnaf');
+    }
+
+    /**
+     * Set a start date for the harvest (only harvest records AFTER this date).
+     *
+     * @param string $date Start date (YYYY-MM-DD format).
+     *
+     * @return void
+     */
+    public function setStartDate($date)
+    {
+        $this->startDate = $date;
+    }
+
+    /**
+     * Harvest all available documents.
+     *
+     * @return void
+     */
+    public function launch()
+    {
+        $this->scanDates($this->startDate);
+        $this->detectDeletes();
+    }
+
+    /**
+     * Harvest LCCNs from OCLC to a file.
+     *
+     * @return string Filename of harvested data.
+     */
+    protected function harvestOCLCIds()
+    {
+        // Harvest all LCCNs to a file:
+        $lccnListFile = dirname(__FILE__) . '/lcnaf/lccn-list-' . time() . '.tmp';
+        $lccnList = fopen($lccnListFile, 'w');
+        if (!$lccnList) {
+            throw new \Exception('Problem opening file: ' . $lccnListFile . ".");
+        }
+        $lccn = '';
+        do {
+            $lccn = $this->scanLCCNs($lccnList, $lccn);
+        } while ($lccn);
+        fclose($lccnList);
+        return $lccnListFile;
+    }
+
+    /**
+     * Harvest IDs from local Solr index to a file.
+     *
+     * @return string Filename of harvested data.
+     */
+    protected function harvestLocalIds()
+    {
+        // Harvest all local IDs to a file:
+        $localListFile = dirname(__FILE__) . '/lcnaf/id-list-' . time() . '.tmp';
+        $localList = fopen($localListFile, 'w');
+        if (!$localList) {
+            throw new \Exception('Problem opening file: ' . $localListFile . ".");
+        }
+        $id = '';
+        $solr = ConnectionManager::connectToIndex('SolrAuth');
+        do {
+            echo "Reading IDs starting with '{$id}'...\n";
+            $list = $solr->getTerms('id', $id, 10000);
+            if (isset($list['terms']['id']) && !empty($list['terms']['id'])) {
+                foreach ($list['terms']['id'] as $id => $count) {
+                    fwrite($localList, $id . "\n");
+                }
+            } else {
+                $id = false;
+            }
+        } while ($id);
+        fclose($localList);
+        return $localListFile;
+    }
+
+    /**
+     * Given sorted ID lists, determine which have been deleted and which are
+     * missing from the index.
+     *
+     * @param string $sortedOclcFile  File containing list of remote OCLC IDs.
+     * @param string $sortedLocalFile File containing list of local IDs.
+     * @param string $deletedFile     Filename to write deleted list to.
+     *
+     * @return void
+     */
+    protected function performDeleteComparison($sortedOclcFile, $sortedLocalFile,
+        $deletedFile
+    ) {
+        $oclcIn = fopen($sortedOclcFile, 'r');
+        if (!$oclcIn) {
+            throw new \Exception("Can't open {$sortedOclcFile}");
+        }
+        $localIn = fopen($sortedLocalFile, 'r');
+        if (!$localIn) {
+            throw new \Exception("Can't open {$sortedLocalFile}");
+        }
+        $deleted = fopen($deletedFile, 'w');
+        if (!$deleted) {
+            throw new \Exception("Can't open {$deletedFile}");
+        }
+
+        // Flags to control which file(s) we read from:
+        $readOclc = $readLocal = true;
+
+        // Loop until we reach the ends of both files:
+        do {
+            // Read the next line from each file if necessary:
+            if ($readOclc) {
+                $oclcCurrent = fgets($oclcIn);
+            }
+            if ($readLocal) {
+                $localCurrent = fgets($localIn);
+            }
+
+            if (!$localCurrent || strcmp($oclcCurrent, $localCurrent) < 0) {
+                // If OCLC is less than local (or we've reached the end of the
+                // local file), we've found a record that hasn't been indexed yet;
+                // no action is needed -- just skip it and read the next OCLC line.
+                $readOclc = true;
+                $readLocal = false;
+            } else if (!$oclcCurrent || strcmp($oclcCurrent, $localCurrent) > 0) {
+                // If OCLC is greater than local (or we've reached the end of the
+                // OCLC file), we've found a deleted record; write it to file and
+                // read the next local value.
+                fputs($deleted, $localCurrent);
+                $readOclc = false;
+                $readLocal = true;
+            } else {
+                // If current lines match, just read another pair of lines:
+                $readOclc = $readLocal = true;
+            }
+        } while ($oclcCurrent || $localCurrent);
+
+        fclose($oclcIn);
+        fclose($localIn);
+        fclose($deleted);
+    }
+
+    /**
+     * Scan the index for deleted records.
+     *
+     * @return void
+     */
+    protected function detectDeletes()
+    {
+        // Harvest IDs from local and OCLC indexes:
+        $oclcFile = $this->harvestOCLCIds();
+        $localFile = $this->harvestLocalIds();
+
+        // Sort the two lists consistently:
+        $sortedOclcFile = dirname(__FILE__) . '/lcnaf/lccn-sorted.txt';
+        $sortedLocalFile = dirname(__FILE__) . '/lcnaf/id-sorted.txt';
+
+        exec("sort < {$oclcFile} > {$sortedOclcFile}");
+        exec("sort < {$localFile} > {$sortedLocalFile}");
+
+        // Delete unsorted data files:
+        unlink($oclcFile);
+        unlink($localFile);
+
+        // Diff the files in order to generate a .delete file so we can remove
+        // obsolete records from the Solr index:
+        $deletedFile = dirname(__FILE__) . '/lcnaf/' . time() . '.delete';
+        $this->performDeleteComparison(
+            $sortedOclcFile, $sortedLocalFile, $deletedFile
+        );
+
+        // Deleted sorted data files now that we are done with them:
+        unlink($sortedOclcFile);
+        unlink($sortedLocalFile);
+    }
+
+    /**
+     * Normalize an LCCN to match an ID generated by the LCNAF SolrMarc import
+     * process (see the various .bsh files in import/index_scripts).
+     *
+     * @param string $lccn Regular LCCN
+     *
+     * @return string      Normalized LCCN
+     */
+    protected function normalizeLCCN($lccn)
+    {
+        // Remove whitespace:
+        $lccn = str_replace(" ", "", $lccn);
+
+        // Chop off anything following a forward slash:
+        $parts = explode('/', $lccn, 2);
+        $lccn = $parts[0];
+
+        // Normalize any characters following a hyphen to at least six digits:
+        $parts = explode('-', $lccn, 2);
+        if (count($parts) > 1) {
+            $secondPart = $parts[1];
+            while (strlen($secondPart) < 6) {
+                $secondPart = "0" . $secondPart;
+            }
+            $lccn = $parts[0] . $secondPart;
+        }
+
+        // Send back normalized LCCN:
+        return 'lcnaf-' . $lccn;
+    }
+
+    /**
+     * Recursively obtain all of the LCCNs from the LCNAF index.
+     *
+     * @param resource $handle File handle to write normalized LCCNs to.
+     * @param string   $start  Starting point in list to read from
+     * @param int      $retry  Retry counter (in case of connection problems).
+     *
+     * @return string          Where to start the next scan to continue the
+     * operation (boolean false when finished).
+     */
+    protected function scanLCCNs($handle, $start = '', $retry = 0)
+    {
+        echo "Scanning LCCNs after \"{$start}\"...\n";
+
+        // Find all dates AFTER the specified start date
+        try {
+            $result = $this->sru->scan('local.LCCN="' . $start . '"', 0, 250);
+        } catch (\Exception $e) {
+            $result = false;
+        }
+        if (!empty($result)) {
+            // Parse the response:
+            $result = simplexml_load_string($result);
+            if (!$result) {
+                // We experienced a failure; let's retry three times before we
+                // give up and report failure.
+                if ($retry > 2) {
+                    throw new \Exception("Problem loading XML: {$result}");
+                } else {
+                    echo "Problem loading XML; retrying...\n";
+                    // Wait a few seconds in case that helps...
+                    sleep(5);
+
+                    return $this->scanLCCNs($handle, $start, $retry + 1);
+                }
+            }
+
+            // Extract terms from the response:
+            $namespaces = $result->getDocNamespaces();
+            $result->registerXPathNamespace('ns', $namespaces['']);
+            $result = $result->xpath('ns:terms/ns:term');
+
+            // No terms?  We've hit the end of the road!
+            if (!is_array($result)) {
+                return;
+            }
+
+            // Process all the dates in this batch:
+            foreach ($result as $term) {
+                $lccn = (string)$term->value;
+                $count = (int)$term->numberOfRecords;
+                fwrite($handle, $this->normalizeLCCN($lccn) . "\n");
+            }
+        }
+
+        // Continue scanning with results following the last date encountered
+        // in the loop above:
+        return isset($lccn) ? $lccn : false;
+    }
+
+    /**
+     * Retrieve the date from the "last harvested" file and use it as our start
+     * date if it is available.
+     *
+     * @return void
+     */
+    protected function loadLastHarvestedDate()
+    {
+        if (file_exists($this->lastHarvestFile)) {
+            $lines = file($this->lastHarvestFile);
+            if (is_array($lines)) {
+                $date = trim($lines[0]);
+                if (!empty($date)) {
+                    $this->setStartDate(trim($date));
+                }
+            }
+        }
+    }
+
+    /**
+     * Save a date to the "last harvested" file.
+     *
+     * @param string $date Date to save.
+     *
+     * @return void
+     */
+    protected function saveLastHarvestedDate($date)
+    {
+        file_put_contents($this->lastHarvestFile, $date);
+    }
+
+    /**
+     * Retrieve records modified on the specified date.
+     *
+     * @param string $date  Date of modification for retrieved records
+     * @param int    $count Number of records expected (double-check)
+     *
+     * @return void
+     */
+    protected function processDate($date, $count)
+    {
+        // Don't reload data we already have!
+        $path = $this->basePath . $date . '.xml';
+        if (file_exists($path)) {
+            return;
+        }
+
+        echo "Processing records for {$date}...\n";
+
+        // Open the output file:
+        $file = fopen($path, 'w');
+        $startTag = '<mx:collection xmlns:mx="http://www.loc.gov/MARC21/slim">';
+        if (!$file || !fwrite($file, $startTag)) {
+            unlink($path);
+            throw new \Exception("Unable to open {$path} for writing.");
+        }
+
+        // Pull down all the records:
+        $start = 1;
+        $limit = 250;
+        $query = 'oai.datestamp="' . $date . '"';
+        do {
+            $numFound = $this->getRecords($query, $start, $limit, $file);
+            $start += $numFound;
+        } while ($numFound == $limit);
+
+        // Close the file:
+        if (!fwrite($file, '</mx:collection>') || !fclose($file)) {
+            unlink($path);
+            throw new \Exception("Problem closing file.");
+        }
+
+        // Sanity check -- did we get as many records as we expected to?
+        $finalCount = $start - 1;
+        if ($finalCount != $count) {
+            // Delete the problem file so we can rebuild it later:
+            unlink($path);
+            throw new \Exception(
+                "Problem loading records for {$date} -- " .
+                "expected {$count}, retrieved {$finalCount}."
+            );
+        }
+
+        // Update the "last harvested" file:
+        $this->saveLastHarvestedDate($date);
+    }
+
+    /**
+     * Pull down records from LC NAF.
+     *
+     * @param string $query Search query for loading records
+     * @param int    $start Index of first record to load
+     * @param int    $limit Maximum number of records to load
+     * @param int    $file  Open file handle to write records to
+     *
+     * @return int          Actual number of records loaded
+     */
+    protected function getRecords($query, $start, $limit, $file)
+    {
+        // Retrieve the records:
+        $xml = $this->sru->search(
+            $query, $start, $limit, null, 'info:srw/schema/1/marcxml-v1.1', false
+        );
+        $result = simplexml_load_string($xml);
+        if (!$result) {
+            throw new \Exception("Problem loading XML: {$xml}");
+        }
+
+        // Extract the records from the response:
+        $namespaces = $result->getDocNamespaces();
+        $result->registerXPathNamespace('ns', $namespaces['']);
+        $result->registerXPathNamespace('mx', 'http://www.loc.gov/MARC21/slim');
+        $result = $result->xpath('ns:records/ns:record/ns:recordData/mx:record');
+
+        // No records?  We've hit the end of the line!
+        if (empty($result)) {
+            return 0;
+        }
+
+        // Process records and return a bad value if we have trouble writing
+        // (in order to ensure that we die and can retry later):
+        foreach ($result as $current) {
+            if (!fwrite($file, $current->asXML())) {
+                return 0;
+            }
+        }
+
+        // If we found less than the limit, we've hit the end of the list;
+        // otherwise, we should return the index of the next record to load:
+        return count($result);
+    }
+
+    /**
+     * Recursively scan the remote index to find dates we can retrieve.
+     *
+     * @param string $start The date to use as the basis for scanning; this date
+     * will NOT be included in results.
+     *
+     * @return void
+     */
+    protected function scanDates($start)
+    {
+        echo "Scanning dates after {$start}...\n";
+
+        // Find all dates AFTER the specified start date
+        try {
+            $result = $this->sru->scan('oai.datestamp="' . $start . '"', 0, 250);
+        } catch (\Exception $e) {
+            $result = false;
+        }
+        if (!empty($result)) {
+            // Parse the response:
+            $result = simplexml_load_string($result);
+            if (!$result) {
+                throw new \Exception("Problem loading XML: {$result}");
+            }
+
+            // Extract terms from the response:
+            $namespaces = $result->getDocNamespaces();
+            $result->registerXPathNamespace('ns', $namespaces['']);
+            $result = $result->xpath('ns:terms/ns:term');
+
+            // No terms?  We've hit the end of the road!
+            if (!is_array($result)) {
+                return;
+            }
+
+            // Process all the dates in this batch:
+            foreach ($result as $term) {
+                $date = (string)$term->value;
+                $count = (int)$term->numberOfRecords;
+                $this->processDate($date, $count);
+            }
+        }
+
+        // Continue scanning with results following the last date encountered
+        // in the loop above:
+        if (isset($date)) {
+            $this->scanDates($date);
+        }
+    }
+}
diff --git a/module/VuFind/src/VuFind/Harvester/OAI.php b/module/VuFind/src/VuFind/Harvester/OAI.php
new file mode 100644
index 0000000000000000000000000000000000000000..c4791087cc8e4c948fd5709c27d0bbc02314ceff
--- /dev/null
+++ b/module/VuFind/src/VuFind/Harvester/OAI.php
@@ -0,0 +1,600 @@
+<?php
+/**
+ * OAI-PMH Harvest Tool
+ *
+ * PHP version 5
+ *
+ * Copyright (c) Demian Katz 2010.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * @category VuFind2
+ * @package  Harvest_Tools
+ * @author   Demian Katz <demian.katz@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     http://vufind.org/wiki/importing_records#oai-pmh_harvesting Wiki
+ */
+namespace VuFind\Harvester;
+use VuFind\Http\Client;
+
+/**
+ * OAI Class
+ *
+ * This class harvests records via OAI-PMH using settings from oai.ini.
+ *
+ * @category VuFind2
+ * @package  Harvest_Tools
+ * @author   Demian Katz <demian.katz@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     http://vufind.org/wiki/importing_records#oai-pmh_harvesting Wiki
+ */
+class OAI
+{
+    protected $baseURL;               // URL to harvest from
+    protected $set = null;            // Target set to harvest (null for all records)
+    protected $metadata = 'oai_dc';   // Metadata type to harvest
+    protected $idPrefix = '';         // OAI prefix to strip from ID values
+    protected $idSearch = array();    // Regular expression searches
+    protected $idReplace = array();   // Replacements for regular expression matches
+    protected $basePath;              // Directory for storing harvested files
+    protected $lastHarvestFile;       // File for tracking last harvest date
+    protected $startDate = null;      // Harvest start date (null for all records)
+    protected $granularity = 'auto';  // Date granularity
+    protected $injectId = false;      // Tag to use for injecting IDs into XML
+    protected $injectSetSpec = false; // Tag to use for injecting setSpecs
+    protected $injectSetName = false; // Tag to use for injecting set names
+    protected $injectDate = false;    // Tag to use for injecting datestamp
+    protected $setNames = array();    // Associative array of setSpec => setName
+    protected $harvestedIdLog = false;// Filename for logging harvested IDs.
+    protected $verbose = false;       // Should we display debug output?
+
+    // As we harvest records, we want to track the most recent date encountered
+    // so we can set a start point for the next harvest.
+    protected $endDate = 0;
+
+    /**
+     * Constructor.
+     *
+     * @param string $target   Target directory for harvest.
+     * @param array  $settings OAI-PMH settings from oai.ini.
+     */
+    public function __construct($target, $settings)
+    {
+        // Don't time out during harvest!!
+        set_time_limit(0);
+
+        // Set up base directory for harvested files:
+        $this->setBasePath($target);
+
+        // Check if there is a file containing a start date:
+        $this->lastHarvestFile = $this->basePath . 'last_harvest.txt';
+        $this->loadLastHarvestedDate();
+
+        // Set up base URL:
+        if (empty($settings['url'])) {
+            throw new \Exception("Missing base URL for {$target}.");
+        }
+        $this->baseURL = $settings['url'];
+        if (isset($settings['set'])) {
+            $this->set = $settings['set'];
+        }
+        if (isset($settings['metadataPrefix'])) {
+            $this->metadata = $settings['metadataPrefix'];
+        }
+        if (isset($settings['idPrefix'])) {
+            $this->idPrefix = $settings['idPrefix'];
+        }
+        if (isset($settings['idSearch'])) {
+            $this->idSearch = $settings['idSearch'];
+        }
+        if (isset($settings['idReplace'])) {
+            $this->idReplace = $settings['idReplace'];
+        }
+        if (isset($settings['harvestedIdLog'])) {
+            $this->harvestedIdLog = $settings['harvestedIdLog'];
+        }
+        if (isset($settings['injectId'])) {
+            $this->injectId = $settings['injectId'];
+        }
+        if (isset($settings['injectSetSpec'])) {
+            $this->injectSetSpec = $settings['injectSetSpec'];
+        }
+        if (isset($settings['injectSetName'])) {
+            $this->injectSetName = $settings['injectSetName'];
+            $this->loadSetNames();
+        }
+        if (isset($settings['injectDate'])) {
+            $this->injectDate = $settings['injectDate'];
+        }
+        if (isset($settings['dateGranularity'])) {
+            $this->granularity = $settings['dateGranularity'];
+        }
+        if (isset($settings['verbose'])) {
+            $this->verbose = $settings['verbose'];
+        }
+        if ($this->granularity == 'auto') {
+            $this->loadGranularity();
+        }
+    }
+
+    /**
+     * Set a start date for the harvest (only harvest records AFTER this date).
+     *
+     * @param string $date Start date (YYYY-MM-DD format).
+     *
+     * @return void
+     */
+    public function setStartDate($date)
+    {
+        $this->startDate = $date;
+    }
+
+    /**
+     * Harvest all available documents.
+     *
+     * @return void
+     */
+    public function launch()
+    {
+        // Start harvesting at the requested date:
+        $token = $this->getRecordsByDate($this->startDate, $this->set);
+
+        // Keep harvesting as long as a resumption token is provided:
+        while ($token !== false) {
+            $token = $this->getRecordsByToken($token);
+        }
+    }
+
+    /**
+     * Set up directory structure for harvesting (support method for constructor).
+     *
+     * @param string $target The OAI-PMH target directory to create.
+     *
+     * @return void
+     */
+    protected function setBasePath($target)
+    {
+        // Get the base VuFind path:
+        if (strlen(LOCAL_OVERRIDE_DIR) > 0) {
+            $home = LOCAL_OVERRIDE_DIR;
+        } else {
+            $home = realpath(APPLICATION_PATH . '/..');
+        }
+
+        // Build the full harvest path:
+        $this->basePath = $home . '/harvest/' . $target . '/';
+
+        // Create the directory if it does not already exist:
+        if (!is_dir($this->basePath)) {
+            if (!mkdir($this->basePath)) {
+                throw new \Exception("Problem creating directory {$this->basePath}.");
+            }
+        }
+    }
+
+    /**
+     * Retrieve the date from the "last harvested" file and use it as our start
+     * date if it is available.
+     *
+     * @return void
+     */
+    protected function loadLastHarvestedDate()
+    {
+        if (file_exists($this->lastHarvestFile)) {
+            $lines = file($this->lastHarvestFile);
+            if (is_array($lines)) {
+                $date = trim($lines[0]);
+                if (!empty($date)) {
+                    $this->setStartDate(trim($date));
+                }
+            }
+        }
+    }
+
+    /**
+     * Normalize a date to a Unix timestamp.
+     *
+     * @param string $date Date (ISO-8601 or YYYY-MM-DD HH:MM:SS)
+     *
+     * @return integer     Unix timestamp (or false if $date invalid)
+     */
+    protected function normalizeDate($date)
+    {
+        // Remove timezone markers -- we don't want PHP to outsmart us by adjusting
+        // the time zone!
+        $date = str_replace(array('T', 'Z'), array(' ', ''), $date);
+
+        // Translate to a timestamp:
+        return strtotime($date);
+    }
+
+    /**
+     * Save a date to the "last harvested" file.
+     *
+     * @param string $date Date to save.
+     *
+     * @return void
+     */
+    protected function saveLastHarvestedDate($date)
+    {
+        file_put_contents($this->lastHarvestFile, $date);
+    }
+
+    /**
+     * Make an OAI-PMH request.  Die if there is an error; return a SimpleXML object
+     * on success.
+     *
+     * @param string $verb   OAI-PMH verb to execute.
+     * @param array  $params GET parameters for ListRecords method.
+     *
+     * @return object        SimpleXML-formatted response.
+     */
+    protected function sendRequest($verb, $params = array())
+    {
+        // Debug:
+        if ($this->verbose) {
+            echo "Sending request: verb = {$verb}, params = ";
+            print_r($params);
+        }
+
+        // Set up retry loop:
+        while (true) {
+            // Set up the request:
+            $request = new Client(
+                null, array('timeout' => 60)    // TODO: make timeout configurable
+            );
+            $request->setUri($this->baseURL);
+
+            // Load request parameters:
+            $query = $request->getRequest()->getQuery();
+            $query->set('verb', $verb);
+            foreach ($params as $key => $value) {
+                $query->set($key, $value);
+            }
+
+            // Perform request and die on error:
+            $result = $request->setMethod('GET')->send();
+            if ($result->getStatusCode() == 503) {
+                $delayHeader = $result->getHeaders()->get('Retry-After');
+                $delay = is_object($delayHeader)
+                    ? $delayHeader->getDeltaSeconds() : 0;
+                if ($delay > 0) {
+                    if ($this->verbose) {
+                        echo "Received 503 response; waiting {$delay} seconds...\n";
+                    }
+                    sleep($delay);
+                }
+            } else if (!$result->isSuccess()) {
+                throw new \Exception('HTTP Error');
+            } else {
+                // If we didn't get an error, we can leave the retry loop:
+                break;
+            }
+        }
+
+        // If we got this far, there was no error -- send back response.
+        return $this->processResponse($result->getBody());
+    }
+
+    /**
+     * Process an OAI-PMH response into a SimpleXML object.  Die if an error is
+     * detected.
+     *
+     * @param string $xml OAI-PMH response XML.
+     *
+     * @return object     SimpleXML-formatted response.
+     */
+    protected function processResponse($xml)
+    {
+        // Parse the XML:
+        $result = simplexml_load_string($xml);
+        if (!$result) {
+            throw new \Exception("Problem loading XML: {$xml}");
+        }
+
+        // Detect errors and die if one is found:
+        if ($result->error) {
+            $attribs = $result->error->attributes();
+            throw new \Exception(
+                "OAI-PMH error -- code: {$attribs['code']}, " .
+                "value: {$result->error}"
+            );
+        }
+
+        // If we got this far, we have a valid response:
+        return $result;
+    }
+
+    /**
+     * Get the filename for a specific record ID.
+     *
+     * @param string $id  ID of record to save.
+     * @param string $ext File extension to use.
+     *
+     * @return string     Full path + filename.
+     */
+    protected function getFilename($id, $ext)
+    {
+        return $this->basePath . time() . '_' .
+            preg_replace('/[^\w]/', '_', $id) . '.' . $ext;
+    }
+
+    /**
+     * Create a tracking file to record the deletion of a record.
+     *
+     * @param string $id ID of deleted record.
+     *
+     * @return void
+     */
+    protected function saveDeletedRecord($id)
+    {
+        $filename = $this->getFilename($id, 'delete');
+        file_put_contents($filename, $id);
+    }
+
+    /**
+     * Save a record to disk.
+     *
+     * @param string $id     ID of record to save.
+     * @param object $record Record to save (in SimpleXML format).
+     *
+     * @return void
+     */
+    protected function saveRecord($id, $record)
+    {
+        if (!isset($record->metadata)) {
+            throw new \Exception("Unexpected missing record metadata.");
+        }
+
+        // Extract the actual metadata from inside the <metadata></metadata> tags;
+        // there is probably a cleaner way to do this, but this simple method avoids
+        // the complexity of dealing with namespaces in SimpleXML:
+        $xml = trim($record->metadata->asXML());
+        $xml = preg_replace('/(^<metadata>)|(<\/metadata>$)/m', '', $xml);
+
+        // If we are supposed to inject any values, do so now inside the first
+        // tag of the file:
+        $insert = '';
+        if (!empty($this->injectId)) {
+            $insert .= "<{$this->injectId}>" . htmlspecialchars($id) .
+                "</{$this->injectId}>";
+        }
+        if (!empty($this->injectDate)) {
+            $insert .= "<{$this->injectDate}>" .
+                htmlspecialchars((string)$record->header->datestamp) .
+                "</{$this->injectDate}>";
+        }
+        if (!empty($this->injectSetSpec)) {
+            if (isset($record->header->setSpec)) {
+                foreach ($record->header->setSpec as $current) {
+                    $insert .= "<{$this->injectSetSpec}>" .
+                        htmlspecialchars((string)$current) .
+                        "</{$this->injectSetSpec}>";
+                }
+            }
+        }
+        if (!empty($this->injectSetName)) {
+            if (isset($record->header->setSpec)) {
+                foreach ($record->header->setSpec as $current) {
+                    $name = $this->setNames[(string)$current];
+                    $insert .= "<{$this->injectSetName}>" .
+                        htmlspecialchars($name) .
+                        "</{$this->injectSetName}>";
+                }
+            }
+        }
+        if (!empty($insert)) {
+            $xml = preg_replace('/>/', '>' . $insert, $xml, 1);
+        }
+
+        // Save our XML:
+        file_put_contents($this->getFilename($id, 'xml'), trim($xml));
+    }
+
+    /**
+     * Load date granularity from the server.
+     *
+     * @return void
+     */
+    protected function loadGranularity()
+    {
+        echo "Autodetecting date granularity... ";
+        $response = $this->sendRequest('Identify');
+        $this->granularity = (string)$response->Identify->granularity;
+        echo "found {$this->granularity}.\n";
+    }
+
+    /**
+     * Load set list from the server.
+     *
+     * @return void
+     */
+    protected function loadSetNames()
+    {
+        echo "Loading set list... ";
+
+        // On the first pass through the following loop, we want to get the
+        // first page of sets without using a resumption token:
+        $params = array();
+
+        // Grab set information until we have it all (at which point we will
+        // break out of this otherwise-infinite loop):
+        while (true) {
+            // Process current page of results:
+            $response = $this->sendRequest('ListSets', $params);
+            if (isset($response->ListSets->set)) {
+                foreach ($response->ListSets->set as $current) {
+                    $spec = (string)$current->setSpec;
+                    $name = (string)$current->setName;
+                    if (!empty($spec)) {
+                        $this->setNames[$spec] = $name;
+                    }
+                }
+            }
+
+            // Is there a resumption token?  If so, continue looping; if not,
+            // we're done!
+            if (isset($response->ListSets->resumptionToken)
+                && !empty($response->ListSets->resumptionToken)
+            ) {
+                $params['resumptionToken']
+                    = (string)$response->ListSets->resumptionToken;
+            } else {
+                echo "found " . count($this->setNames) . "\n";
+                return;
+            }
+        }
+    }
+
+    /**
+     * Extract the ID from a record object (support method for _processRecords()).
+     *
+     * @param object $record SimpleXML record.
+     *
+     * @return string        The ID value.
+     */
+    protected function extractID($record)
+    {
+        // Normalize to string:
+        $id = (string)$record->header->identifier;
+
+        // Strip prefix if found:
+        if (substr($id, 0, strlen($this->idPrefix)) == $this->idPrefix) {
+            $id = substr($id, strlen($this->idPrefix));
+        }
+
+        // Apply regular expression matching:
+        if (!empty($this->idSearch)) {
+            $id = preg_replace($this->idSearch, $this->idReplace, $id);
+        }
+
+        // Return final value:
+        return $id;
+    }
+
+    /**
+     * Save harvested records to disk and track the end date.
+     *
+     * @param object $records SimpleXML records.
+     *
+     * @return void
+     */
+    protected function processRecords($records)
+    {
+        echo 'Processing ' . count($records) . " records...\n";
+
+        // Array for tracking successfully harvested IDs:
+        $harvestedIds = array();
+
+        // Loop through the records:
+        foreach ($records as $record) {
+            // Die if the record is missing its header:
+            if (empty($record->header)) {
+                throw new \Exception("Unexpected missing record header.");
+            }
+
+            // Get the ID of the current record:
+            $id = $this->extractID($record);
+
+            // Save the current record, either as a deleted or as a regular file:
+            $attribs = $record->header->attributes();
+            if (strtolower($attribs['status']) == 'deleted') {
+                $this->saveDeletedRecord($id);
+            } else {
+                $this->saveRecord($id, $record);
+                $harvestedIds[] = $id;
+            }
+
+            // If the current record's date is newer than the previous end date,
+            // remember it for future reference:
+            $date = $this->normalizeDate($record->header->datestamp);
+            if ($date && $date > $this->endDate) {
+                $this->endDate = $date;
+            }
+        }
+
+        // Do we have IDs to log and a log filename?  If so, log them:
+        if (!empty($this->harvestedIdLog) && !empty($harvestedIds)) {
+            $file = fopen($this->basePath . $this->harvestedIdLog, 'a');
+            if (!$file) {
+                throw new \Exception("Problem opening {$this->harvestedIdLog}.");
+            }
+            fputs($file, implode(PHP_EOL, $harvestedIds));
+            fclose($file);
+        }
+    }
+
+    /**
+     * Harvest records using OAI-PMH.
+     *
+     * @param array $params GET parameters for ListRecords method.
+     *
+     * @return mixed        Resumption token if provided, false if finished
+     */
+    protected function getRecords($params)
+    {
+        // Make the OAI-PMH request:
+        $response = $this->sendRequest('ListRecords', $params);
+
+        // Save the records from the response:
+        if ($response->ListRecords->record) {
+            $this->processRecords($response->ListRecords->record);
+        }
+
+        // If we have a resumption token, keep going; otherwise, we're done -- save
+        // the end date.
+        if (isset($response->ListRecords->resumptionToken)
+            && !empty($response->ListRecords->resumptionToken)
+        ) {
+            return $response->ListRecords->resumptionToken;
+        } else if ($this->endDate > 0) {
+            $dateFormat = ($this->granularity == 'YYYY-MM-DD') ?
+                'Y-m-d' : 'Y-m-d\TH:i:s\Z';
+            $this->saveLastHarvestedDate(date($dateFormat, $this->endDate));
+        }
+        return false;
+    }
+
+    /**
+     * Harvest records via OAI-PMH using date and set.
+     *
+     * @param string $date Harvest start date (null for all records).
+     * @param string $set  Set to harvest (null for all records).
+     *
+     * @return mixed        Resumption token if provided, false if finished
+     */
+    protected function getRecordsByDate($date = null, $set = null)
+    {
+        $params = array('metadataPrefix' => $this->metadata);
+        if (!empty($date)) {
+            $params['from'] = $date;
+        }
+        if (!empty($set)) {
+            $params['set'] = $set;
+        }
+        return $this->getRecords($params);
+    }
+
+    /**
+     * Harvest records via OAI-PMH using resumption token.
+     *
+     * @param string $token Resumption token.
+     *
+     * @return mixed        Resumption token if provided, false if finished
+     */
+    protected function getRecordsByToken($token)
+    {
+        return $this->getRecords(array('resumptionToken' => (string)$token));
+    }
+}