From 042f634fd5322bfdd717ea1f55e435b8e6066f67 Mon Sep 17 00:00:00 2001
From: Demian Katz <demian.katz@villanova.edu>
Date: Fri, 16 Nov 2012 13:07:52 -0500
Subject: [PATCH] Resolving VUFIND-629 (improved Wikipedia results using
 authority data and VIAF API).

---
 config/vufind/searches.ini                    |   7 +-
 module/VuFind/config/module.config.php        |   6 +-
 .../src/VuFind/Recommend/AuthorInfo.php       | 124 +++++++++++++++---
 .../src/VuFind/RecordDriver/SolrAuth.php      |  20 +++
 4 files changed, 140 insertions(+), 17 deletions(-)

diff --git a/config/vufind/searches.ini b/config/vufind/searches.ini
index 9ace9c2cfba..d6f5f4d77ef 100644
--- a/config/vufind/searches.ini
+++ b/config/vufind/searches.ini
@@ -207,9 +207,14 @@ CallNumber = callnumber
 ;
 ; AuthorFacets
 ;       Display author names based on the search term.
-; AuthorInfo
+; AuthorInfo:[use_viaf]
 ;       Load author information from external providers based on the current search
 ;       and the "authors" setting in the [Content] section of config.ini.
+;       The [use_viaf] setting may be set to true to use the VIAF web service in
+;       combination with your Solr authority index to pick more appropriate authors;
+;       this defaults to false if unset.  If you do not have local authority records
+;       to draw on, the OCLC FAST data works well with the [use_viaf] option; see:
+;       http://vufind.org/wiki/open_data_sources#oclc_fast
 ; AuthorityRecommend:[field1]:[query1]:[field2]:[query2]:...:[field-n]:[query-n]
 ;       Displays see and see also results to user based on search of Authority Index.
 ;       Any number of filter queries may be specified as parameters to limit which
diff --git a/module/VuFind/config/module.config.php b/module/VuFind/config/module.config.php
index d6614d5e856..2475b5eac28 100644
--- a/module/VuFind/config/module.config.php
+++ b/module/VuFind/config/module.config.php
@@ -231,6 +231,11 @@ $config = array(
     'recommend_plugin_manager' => array(
         'abstract_factories' => array('VuFind\Recommend\PluginFactory'),
         'factories' => array(
+            'authorinfo' => function ($sm) {
+                return new \VuFind\Recommend\AuthorInfo(
+                    $sm->getServiceLocator()->get('SearchManager')
+                );
+            },
             'worldcatidentities' => function ($sm) {
                 return new \VuFind\Recommend\WorldCatIdentities(
                     $sm->getServiceLocator()->get('VuFind\WorldCatUtils')
@@ -244,7 +249,6 @@ $config = array(
         ),
         'invokables' => array(
             'authorfacets' => 'VuFind\Recommend\AuthorFacets',
-            'authorinfo' => 'VuFind\Recommend\AuthorInfo',
             'authorityrecommend' => 'VuFind\Recommend\AuthorityRecommend',
             'catalogresults' => 'VuFind\Recommend\CatalogResults',
             'europeanaresults' => 'VuFind\Recommend\EuropeanaResults',
diff --git a/module/VuFind/src/VuFind/Recommend/AuthorInfo.php b/module/VuFind/src/VuFind/Recommend/AuthorInfo.php
index 43516a560b9..b4271d3926e 100644
--- a/module/VuFind/src/VuFind/Recommend/AuthorInfo.php
+++ b/module/VuFind/src/VuFind/Recommend/AuthorInfo.php
@@ -65,6 +65,30 @@ class AuthorInfo implements RecommendInterface, TranslatorAwareInterface
      */
     protected $lang;
 
+    /**
+     * Search manager
+     *
+     * @var \VuFind\Search\Manager
+     */
+    protected $searchManager;
+
+    /**
+     * Should we use VIAF for authorized names?
+     *
+     * @var bool
+     */
+    protected $useViaf = false;
+
+    /**
+     * Constructor
+     *
+     * @param \VuFind\Search\Manager $searchManager Search manager
+     */
+    public function __construct(\VuFind\Search\Manager $searchManager)
+    {
+        $this->searchManager = $searchManager;
+    }
+
     /**
      * setConfig
      *
@@ -78,6 +102,13 @@ class AuthorInfo implements RecommendInterface, TranslatorAwareInterface
     {
         $translator = $this->getTranslator();
         $this->lang = is_object($translator) ? $translator->getLocale() : 'en';
+
+        $parts = explode(':', $settings);
+        if (isset($parts[0]) && !empty($parts[0])
+            && strtolower(trim($parts[0])) !== 'false'
+        ) {
+            $this->useViaf = true;
+        }
     }
 
     /**
@@ -403,6 +434,81 @@ class AuthorInfo implements RecommendInterface, TranslatorAwareInterface
         return $info;
     }
 
+    /**
+     * Normalize an author name using internal logic.
+     *
+     * @param string $author Author name
+     *
+     * @return string
+     */
+    protected function normalizeName($author)
+    {
+        // remove dates
+        $author = preg_replace('/[0-9]+-[0-9]*/', '', $author);
+        // if name is rearranged by commas
+        $author = trim($author, ', .');
+        $nameParts = explode(', ', $author);
+        $last = $nameParts[0];
+        // - move all names up an index, move last name to last
+        // - Last, First M. -> First M. Last
+        for ($i=1;$i<count($nameParts);$i++) {
+            $nameParts[$i-1] = $nameParts[$i];
+        }
+        $nameParts[count($nameParts)-1] = $last;
+        $author = implode($nameParts, ' ');
+        return $author;
+    }
+
+    /**
+     * Translate an LCCN to a Wikipedia name through the VIAF web service.  Returns
+     * false if no value can be found.
+     *
+     * @param string $lccn LCCN
+     *
+     * @return string|bool
+     */
+    protected function getWikipediaNameFromViaf($lccn)
+    {
+        $param = urlencode("LC|$lccn");
+        $url = "http://viaf.org/viaf/sourceID/{$param}/justlinks.json";
+        $client = new \VuFind\Http\Client();
+        $result = $client->setUri($url)->setMethod('GET')->send();
+        if (!$result->isSuccess()) {
+            return false;
+        }
+        $details = json_decode($result->getBody());
+        return isset($details->WKP[0]) ? $details->WKP[0] : false;
+    }
+
+    /**
+     * Normalize an author name using VIAF.
+     *
+     * @param string $author Author name
+     *
+     * @return string
+     */
+    protected function normalizeNameWithViaf($author)
+    {
+        // Do authority search:
+        $auth = $this->searchManager->setSearchClassId('SolrAuth')->getResults();
+        $auth->getParams()->setBasicSearch('"' . $author . '"', 'MainHeading');
+        $results = $auth->getResults();
+
+        // Find first useful LCCN:
+        foreach ($results as $i => $current) {
+            $lccn = $current->tryMethod('getRawLCCN');
+            if (!empty($lccn)) {
+                $name = $this->getWikipediaNameFromViaf($lccn);
+                if (!empty($name)) {
+                    return $name;
+                }
+            }
+        }
+
+        // No LCCN found?  Use the default normalization routine:
+        return $this->normalizeName($author);
+    }
+
     /**
      * Takes the search term and extracts a normal name from it
      *
@@ -415,21 +521,9 @@ class AuthorInfo implements RecommendInterface, TranslatorAwareInterface
             $author = $search[0]['lookfor'];
             // remove quotes
             $author = str_replace('"', '', $author);
-            // remove dates
-            $author = preg_replace('/[0-9]+-[0-9]*/', '', $author);
-            // if name is rearranged by commas
-            $author = trim($author, ', .');
-            $nameParts = explode(', ', $author);
-            $last = $nameParts[0];
-            // - move all names up an index, move last name to last
-            // - Last, First M. -> First M. Last
-            for ($i=1;$i<count($nameParts);$i++) {
-                $nameParts[$i-1] = $nameParts[$i];
-            }
-            $nameParts[count($nameParts)-1] = $last;
-            $author = implode($nameParts, ' ');
-            // remove punctuation
-            return $author;
+            return $this->useViaf
+                ? $this->normalizeNameWithViaf($author)
+                : $this->normalizeName($author);
         }
         return '';
     }
diff --git a/module/VuFind/src/VuFind/RecordDriver/SolrAuth.php b/module/VuFind/src/VuFind/RecordDriver/SolrAuth.php
index b24cca0055d..da979b6a631 100644
--- a/module/VuFind/src/VuFind/RecordDriver/SolrAuth.php
+++ b/module/VuFind/src/VuFind/RecordDriver/SolrAuth.php
@@ -89,4 +89,24 @@ class SolrAuth extends SolrMarc
             && is_array($this->fields['use_for'])
             ? $this->fields['use_for'] : array();
     }
+
+    /**
+     * Get a raw LCCN (not normalized).  Returns false if none available.
+     *
+     * @return string|bool
+     */
+    public function getRawLCCN()
+    {
+        $lccn = $this->getFirstFieldValue('010');
+        if (!empty($lccn)) {
+            return $lccn;
+        }
+        $lccns = $this->getFieldArray('700', array('0'));
+        foreach ($lccns as $lccn) {
+            if (substr($lccn, 0, '5') == '(DLC)') {
+                return substr($lccn, 5);
+            }
+        }
+        return false;
+    }
 }
-- 
GitLab