From 042f634fd5322bfdd717ea1f55e435b8e6066f67 Mon Sep 17 00:00:00 2001 From: Demian Katz <demian.katz@villanova.edu> Date: Fri, 16 Nov 2012 13:07:52 -0500 Subject: [PATCH] Resolving VUFIND-629 (improved Wikipedia results using authority data and VIAF API). --- config/vufind/searches.ini | 7 +- module/VuFind/config/module.config.php | 6 +- .../src/VuFind/Recommend/AuthorInfo.php | 124 +++++++++++++++--- .../src/VuFind/RecordDriver/SolrAuth.php | 20 +++ 4 files changed, 140 insertions(+), 17 deletions(-) diff --git a/config/vufind/searches.ini b/config/vufind/searches.ini index 9ace9c2cfba..d6f5f4d77ef 100644 --- a/config/vufind/searches.ini +++ b/config/vufind/searches.ini @@ -207,9 +207,14 @@ CallNumber = callnumber ; ; AuthorFacets ; Display author names based on the search term. -; AuthorInfo +; AuthorInfo:[use_viaf] ; Load author information from external providers based on the current search ; and the "authors" setting in the [Content] section of config.ini. +; The [use_viaf] setting may be set to true to use the VIAF web service in +; combination with your Solr authority index to pick more appropriate authors; +; this defaults to false if unset. If you do not have local authority records +; to draw on, the OCLC FAST data works well with the [use_viaf] option; see: +; http://vufind.org/wiki/open_data_sources#oclc_fast ; AuthorityRecommend:[field1]:[query1]:[field2]:[query2]:...:[field-n]:[query-n] ; Displays see and see also results to user based on search of Authority Index. ; Any number of filter queries may be specified as parameters to limit which diff --git a/module/VuFind/config/module.config.php b/module/VuFind/config/module.config.php index d6614d5e856..2475b5eac28 100644 --- a/module/VuFind/config/module.config.php +++ b/module/VuFind/config/module.config.php @@ -231,6 +231,11 @@ $config = array( 'recommend_plugin_manager' => array( 'abstract_factories' => array('VuFind\Recommend\PluginFactory'), 'factories' => array( + 'authorinfo' => function ($sm) { + return new \VuFind\Recommend\AuthorInfo( + $sm->getServiceLocator()->get('SearchManager') + ); + }, 'worldcatidentities' => function ($sm) { return new \VuFind\Recommend\WorldCatIdentities( $sm->getServiceLocator()->get('VuFind\WorldCatUtils') @@ -244,7 +249,6 @@ $config = array( ), 'invokables' => array( 'authorfacets' => 'VuFind\Recommend\AuthorFacets', - 'authorinfo' => 'VuFind\Recommend\AuthorInfo', 'authorityrecommend' => 'VuFind\Recommend\AuthorityRecommend', 'catalogresults' => 'VuFind\Recommend\CatalogResults', 'europeanaresults' => 'VuFind\Recommend\EuropeanaResults', diff --git a/module/VuFind/src/VuFind/Recommend/AuthorInfo.php b/module/VuFind/src/VuFind/Recommend/AuthorInfo.php index 43516a560b9..b4271d3926e 100644 --- a/module/VuFind/src/VuFind/Recommend/AuthorInfo.php +++ b/module/VuFind/src/VuFind/Recommend/AuthorInfo.php @@ -65,6 +65,30 @@ class AuthorInfo implements RecommendInterface, TranslatorAwareInterface */ protected $lang; + /** + * Search manager + * + * @var \VuFind\Search\Manager + */ + protected $searchManager; + + /** + * Should we use VIAF for authorized names? + * + * @var bool + */ + protected $useViaf = false; + + /** + * Constructor + * + * @param \VuFind\Search\Manager $searchManager Search manager + */ + public function __construct(\VuFind\Search\Manager $searchManager) + { + $this->searchManager = $searchManager; + } + /** * setConfig * @@ -78,6 +102,13 @@ class AuthorInfo implements RecommendInterface, TranslatorAwareInterface { $translator = $this->getTranslator(); $this->lang = is_object($translator) ? $translator->getLocale() : 'en'; + + $parts = explode(':', $settings); + if (isset($parts[0]) && !empty($parts[0]) + && strtolower(trim($parts[0])) !== 'false' + ) { + $this->useViaf = true; + } } /** @@ -403,6 +434,81 @@ class AuthorInfo implements RecommendInterface, TranslatorAwareInterface return $info; } + /** + * Normalize an author name using internal logic. + * + * @param string $author Author name + * + * @return string + */ + protected function normalizeName($author) + { + // remove dates + $author = preg_replace('/[0-9]+-[0-9]*/', '', $author); + // if name is rearranged by commas + $author = trim($author, ', .'); + $nameParts = explode(', ', $author); + $last = $nameParts[0]; + // - move all names up an index, move last name to last + // - Last, First M. -> First M. Last + for ($i=1;$i<count($nameParts);$i++) { + $nameParts[$i-1] = $nameParts[$i]; + } + $nameParts[count($nameParts)-1] = $last; + $author = implode($nameParts, ' '); + return $author; + } + + /** + * Translate an LCCN to a Wikipedia name through the VIAF web service. Returns + * false if no value can be found. + * + * @param string $lccn LCCN + * + * @return string|bool + */ + protected function getWikipediaNameFromViaf($lccn) + { + $param = urlencode("LC|$lccn"); + $url = "http://viaf.org/viaf/sourceID/{$param}/justlinks.json"; + $client = new \VuFind\Http\Client(); + $result = $client->setUri($url)->setMethod('GET')->send(); + if (!$result->isSuccess()) { + return false; + } + $details = json_decode($result->getBody()); + return isset($details->WKP[0]) ? $details->WKP[0] : false; + } + + /** + * Normalize an author name using VIAF. + * + * @param string $author Author name + * + * @return string + */ + protected function normalizeNameWithViaf($author) + { + // Do authority search: + $auth = $this->searchManager->setSearchClassId('SolrAuth')->getResults(); + $auth->getParams()->setBasicSearch('"' . $author . '"', 'MainHeading'); + $results = $auth->getResults(); + + // Find first useful LCCN: + foreach ($results as $i => $current) { + $lccn = $current->tryMethod('getRawLCCN'); + if (!empty($lccn)) { + $name = $this->getWikipediaNameFromViaf($lccn); + if (!empty($name)) { + return $name; + } + } + } + + // No LCCN found? Use the default normalization routine: + return $this->normalizeName($author); + } + /** * Takes the search term and extracts a normal name from it * @@ -415,21 +521,9 @@ class AuthorInfo implements RecommendInterface, TranslatorAwareInterface $author = $search[0]['lookfor']; // remove quotes $author = str_replace('"', '', $author); - // remove dates - $author = preg_replace('/[0-9]+-[0-9]*/', '', $author); - // if name is rearranged by commas - $author = trim($author, ', .'); - $nameParts = explode(', ', $author); - $last = $nameParts[0]; - // - move all names up an index, move last name to last - // - Last, First M. -> First M. Last - for ($i=1;$i<count($nameParts);$i++) { - $nameParts[$i-1] = $nameParts[$i]; - } - $nameParts[count($nameParts)-1] = $last; - $author = implode($nameParts, ' '); - // remove punctuation - return $author; + return $this->useViaf + ? $this->normalizeNameWithViaf($author) + : $this->normalizeName($author); } return ''; } diff --git a/module/VuFind/src/VuFind/RecordDriver/SolrAuth.php b/module/VuFind/src/VuFind/RecordDriver/SolrAuth.php index b24cca0055d..da979b6a631 100644 --- a/module/VuFind/src/VuFind/RecordDriver/SolrAuth.php +++ b/module/VuFind/src/VuFind/RecordDriver/SolrAuth.php @@ -89,4 +89,24 @@ class SolrAuth extends SolrMarc && is_array($this->fields['use_for']) ? $this->fields['use_for'] : array(); } + + /** + * Get a raw LCCN (not normalized). Returns false if none available. + * + * @return string|bool + */ + public function getRawLCCN() + { + $lccn = $this->getFirstFieldValue('010'); + if (!empty($lccn)) { + return $lccn; + } + $lccns = $this->getFieldArray('700', array('0')); + foreach ($lccns as $lccn) { + if (substr($lccn, 0, '5') == '(DLC)') { + return substr($lccn, 5); + } + } + return false; + } } -- GitLab