From edb61ffda6dc0f2d4592e09c9ab076fd46affeac Mon Sep 17 00:00:00 2001 From: Demian Katz <demian.katz@villanova.edu> Date: Thu, 2 Jul 2020 14:22:25 -0400 Subject: [PATCH] Improved EDS record driver (#1661) - Simplified/standardized existing functionality - Added more direct data access, enabling citation and export functionality to be enabled. - Improved/expanded tests (though still not complete) --- module/VuFind/src/VuFind/RecordDriver/EDS.php | 503 ++++++++++++++---- .../src/VuFindTest/RecordDriver/EDSTest.php | 92 +++- 2 files changed, 463 insertions(+), 132 deletions(-) diff --git a/module/VuFind/src/VuFind/RecordDriver/EDS.php b/module/VuFind/src/VuFind/RecordDriver/EDS.php index 22218822caa..c1005cf193b 100644 --- a/module/VuFind/src/VuFind/RecordDriver/EDS.php +++ b/module/VuFind/src/VuFind/RecordDriver/EDS.php @@ -88,14 +88,8 @@ class EDS extends DefaultRecord */ public function getItemsAbstract() { - if (isset($this->fields['Items'])) { - foreach ($this->fields['Items'] as $item) { - if ('Ab' == $item['Group']) { - return $this->toHTML($item['Data'], $item['Group']); - } - } - } - return ''; + $abstract = $this->getItems(null, null, 'Ab'); + return $abstract[0]['Data'] ?? ''; } /** @@ -105,8 +99,7 @@ class EDS extends DefaultRecord */ public function getAccessLevel() { - return isset($this->fields['Header']['AccessLevel']) - ? $this->fields['Header']['AccessLevel'] : ''; + return $this->fields['Header']['AccessLevel'] ?? ''; } /** @@ -127,15 +120,11 @@ class EDS extends DefaultRecord */ protected function getItemsAuthorsArray() { - $authors = []; - if (isset($this->fields['Items'])) { - foreach ($this->fields['Items'] as $item) { - if ('Au' == $item['Group']) { - $authors[] = $this->toHTML($item['Data'], $item['Group']); - } - } - } - return $authors; + return array_map( + function ($data) { + return $data['Data']; + }, $this->getItems(null, null, 'Au') + ); } /** @@ -145,8 +134,7 @@ class EDS extends DefaultRecord */ public function getCustomLinks() { - return isset($this->fields['CustomLinks']) - ? $this->fields['CustomLinks'] : []; + return $this->fields['CustomLinks'] ?? []; } /** @@ -156,8 +144,7 @@ class EDS extends DefaultRecord */ public function getFTCustomLinks() { - return isset($this->fields['FullText']['CustomLinks']) - ? $this->fields['FullText']['CustomLinks'] : []; + return $this->fields['FullText']['CustomLinks'] ?? []; } /** @@ -167,8 +154,7 @@ class EDS extends DefaultRecord */ public function getDbLabel() { - return isset($this->fields['Header']['DbLabel']) - ? $this->fields['Header']['DbLabel'] : ''; + return $this->fields['Header']['DbLabel'] ?? ''; } /** @@ -178,8 +164,7 @@ class EDS extends DefaultRecord */ public function getHTMLFullText() { - return isset($this->fields['FullText']['Text']['Value']) - ? $this->toHTML($this->fields['FullText']['Text']['Value']) : ''; + return $this->toHTML($this->fields['FullText']['Text']['Value'] ?? ''); } /** @@ -189,8 +174,7 @@ class EDS extends DefaultRecord */ public function hasHTMLFullTextAvailable() { - return isset($this->fields['FullText']['Text']['Availability']) - && ('1' == $this->fields['FullText']['Text']['Availability']); + return '1' == ($this->fields['FullText']['Text']['Availability'] ?? '0'); } /** @@ -233,22 +217,34 @@ class EDS extends DefaultRecord /** * Get the items of the record. * - * @param string $context The context in which items are being retrieved + * @param string $context The context in which items are being retrieved * (used for context-sensitive filtering) + * @param string $labelFilter A specific label to retrieve (filter out others; + * null for no filter) + * @param string $groupFilter A specific group to retrieve (filter out others; + * null for no filter) + * @param string $nameFilter A specific name to retrieve (filter out others; + * null for no filter) * * @return array */ - public function getItems($context = null) - { + public function getItems($context = null, $labelFilter = null, + $groupFilter = null, $nameFilter = null + ) { $items = []; foreach ($this->fields['Items'] ?? [] as $item) { $nextItem = [ 'Label' => $item['Label'] ?? '', 'Group' => $item['Group'] ?? '', + 'Name' => $item['Name'] ?? '', 'Data' => isset($item['Data']) ? $this->toHTML($item['Data'], $item['Group']) : '' ]; - if (!$this->itemIsExcluded($nextItem, $context)) { + if (!$this->itemIsExcluded($nextItem, $context) + && ($labelFilter === null || $nextItem['Label'] === $labelFilter) + && ($groupFilter === null || $nextItem['Group'] === $groupFilter) + && ($nameFilter === null || $nextItem['Name'] === $nameFilter) + ) { $items[] = $nextItem; } } @@ -262,7 +258,7 @@ class EDS extends DefaultRecord */ public function getPLink() { - return isset($this->fields['PLink']) ? $this->fields['PLink'] : ''; + return $this->fields['PLink'] ?? ''; } /** @@ -272,8 +268,7 @@ class EDS extends DefaultRecord */ public function getPubType() { - return isset($this->fields['Header']['PubType']) - ? $this->fields['Header']['PubType'] : ''; + return $this->fields['Header']['PubType'] ?? ''; } /** @@ -283,8 +278,7 @@ class EDS extends DefaultRecord */ public function getPubTypeId() { - return isset($this->fields['Header']['PubTypeId']) - ? $this->fields['Header']['PubTypeId'] : ''; + return $this->fields['Header']['PubTypeId'] ?? ''; } /** @@ -297,7 +291,7 @@ class EDS extends DefaultRecord protected function hasEbookAvailable(array $types) { foreach ($this->fields['FullText']['Links'] ?? [] as $link) { - if (isset($link['Type']) && in_array($link['Type'], $types)) { + if (in_array($link['Type'] ?? '', $types)) { return true; } } @@ -390,14 +384,11 @@ class EDS extends DefaultRecord */ public function getItemsSubjects() { - $subjects = []; - if (isset($this->fields['Items'])) { - foreach ($this->fields['Items'] as $item) { - if ('Su' == $item['Group']) { - $subjects[] = $this->toHTML($item['Data'], $item['Group']); - } - } - } + $subjects = array_map( + function ($data) { + return $data['Data']; + }, $this->getItems(null, null, 'Su') + ); return empty($subjects) ? '' : implode(', ', $subjects); } @@ -412,11 +403,9 @@ class EDS extends DefaultRecord */ public function getThumbnail($size = 'small') { - if (!empty($this->fields['ImageInfo'])) { - foreach ($this->fields['ImageInfo'] as $image) { - if (isset($image['Size']) && $size == $image['Size']) { - return (isset($image['Target'])) ? $image['Target'] : ''; - } + foreach ($this->fields['ImageInfo'] ?? [] as $image) { + if ($size == ($image['Size'] ?? '')) { + return $image['Target'] ?? ''; } } return false; @@ -429,14 +418,8 @@ class EDS extends DefaultRecord */ public function getItemsTitle() { - if (isset($this->fields['Items'])) { - foreach ($this->fields['Items'] as $item) { - if ('Ti' == $item['Group']) { - return $this->toHTML($item['Data']); - } - } - } - return ''; + $title = $this->getItems(null, null, 'Ti'); + return $title[0]['Data'] ?? ''; } /** @@ -446,13 +429,10 @@ class EDS extends DefaultRecord */ public function getTitle() { - if (isset($this->fields['RecordInfo']['BibRecord']['BibEntity']['Titles'])) { - foreach ($this->fields['RecordInfo']['BibRecord']['BibEntity']['Titles'] - as $titleRecord - ) { - if (isset($titleRecord['Type']) && 'main' == $titleRecord['Type']) { - return $titleRecord['TitleFull']; - } + $list = $this->extractEbscoDataFromRecordInfo('BibRecord/BibEntity/Titles'); + foreach ($list as $titleRecord) { + if ('main' == ($titleRecord['Type'] ?? '')) { + return $titleRecord['TitleFull']; } } return ''; @@ -465,21 +445,10 @@ class EDS extends DefaultRecord */ public function getPrimaryAuthors() { - $authors = []; - if (isset($this->fields['RecordInfo']['BibRecord']['BibRelationships'])) { - $bibRels - = & $this->fields['RecordInfo']['BibRecord']['BibRelationships']; - } - if (isset($bibRels['HasContributorRelationships']) - && !empty($bibRels['HasContributorRelationships']) - ) { - foreach ($bibRels['HasContributorRelationships'] as $entry) { - if (isset($entry['PersonEntity']['Name']['NameFull'])) { - $authors[] = $entry['PersonEntity']['Name']['NameFull']; - } - } - } - return $authors; + return $this->extractEbscoDataFromRecordInfo( + 'BibRecord/BibRelationships/HasContributorRelationships/*/' + . 'PersonEntity/Name/NameFull' + ); } /** @@ -489,14 +458,8 @@ class EDS extends DefaultRecord */ public function getItemsTitleSource() { - if (isset($this->fields['Items'])) { - foreach ($this->fields['Items'] as $item) { - if ('Src' == $item['Group']) { - return $this->toHTML($item['Data']); - } - } - } - return ''; + $title = $this->getItems(null, null, 'Src'); + return $title[0]['Data'] ?? ''; } /** @@ -647,52 +610,360 @@ class EDS extends DefaultRecord } /** - * Get an array of strings representing citation formats supported - * by this record's data (empty if none). For possible legal values, - * see /application/themes/root/helpers/Citation.php, getCitation() - * method. + * Return the first valid DOI found in the record (false if none). * - * @return array Strings representing citation formats. + * @return mixed */ - protected function getSupportedCitationFormats() + public function getCleanDOI() { - return []; + $doi = $this->getItems(null, null, null, 'DOI'); + if (isset($doi[0]['Data'])) { + return $doi[0]['Data']; + } + $dois = $this->getFilteredIdentifiers(['doi']); + return $dois[0] ?? false; } /** - * Indicate whether export is disabled for a particular format. + * Get record languages * - * @param string $format Export format + * @return array + */ + public function getLanguages() + { + return $this->extractEbscoData( + [ + 'RecordInfo:BibRecord/BibEntity/Languages/*/Text', + 'Items:Languages', + 'Items:Language', + ] + ); + } + + /** + * Retrieve identifiers from the EBSCO record and retrieve values filtered by + * type. * - * @return bool + * @param array $filter Type values to retrieve. * - * @SuppressWarnings(PHPMD.UnusedFormalParameter) + * @return array */ - public function exportDisabled($format) + protected function getFilteredIdentifiers($filter) { - // EDS is not export-friendly; disable all formats. - return true; + $raw = array_merge( + $this->extractEbscoDataFromRecordInfo( + 'BibRecord/BibRelationships/IsPartOfRelationships/*' + . '/BibEntity/Identifiers' + ), + $this->extractEbscoDataFromRecordInfo( + 'BibRecord/BibEntity/Identifiers' + ) + ); + $ids = []; + foreach ($raw as $data) { + $type = strtolower($data['Type'] ?? ''); + if (isset($data['Value']) && in_array($type, $filter)) { + $ids[] = $data['Value']; + } + } + return $ids; } /** - * Return the first valid DOI found in the record (false if none). + * Get ISSNs (of containing record) * - * @return mixed + * @return array */ - public function getCleanDOI() + public function getISSNs() { - foreach ($this->fields['Items'] ?? [] as $item) { - if ('DOI' == $item['Name'] ?? '' && isset($item['Data'])) { - return $item['Data']; + return $this->getFilteredIdentifiers(['issn-print', 'issn-electronic']); + } + + /** + * Get an array of ISBNs + * + * @return array + */ + public function getISBNs() + { + return $this->getFilteredIdentifiers(['isbn-print', 'isbn-electronic']); + } + + /** + * Get title of containing record + * + * @return string + */ + public function getContainerTitle() + { + // If there is no source, we don't want to identify a container + // (in this situation, it is likely redundant data): + if (count($this->extractEbscoDataFromItems('Source')) === 0) { + return ''; + } + $data = $this->extractEbscoDataFromRecordInfo( + 'BibRecord/BibRelationships/IsPartOfRelationships/0' + . '/BibEntity/Titles/0/TitleFull' + ); + return $data[0] ?? ''; + } + + /** + * Extract numbering data of a particular type. + * + * @param string $type Numbering type to return, if present. + * + * @return string + */ + protected function getFilteredNumbering($type) + { + $numbering = $this->extractEbscoDataFromRecordInfo( + 'BibRecord/BibRelationships/IsPartOfRelationships/*/BibEntity/Numbering' + ); + foreach ($numbering as $key => $data) { + if (strtolower($data['Type'] ?? '') == $type + && !empty($data['Value']) + ) { + return $data['Value']; } } - $ids = $this->fields['RecordInfo']['BibRecord']['BibEntity']['Identifiers'] - ?? []; - foreach ($ids as $item) { - if ('DOI' == strtoupper($item['Type'] ?? '') && isset($item['Value'])) { - return $item['Value']; + return ''; + } + + /** + * Get issue of containing record + * + * @return string + */ + public function getContainerIssue() + { + return $this->getFilteredNumbering('issue'); + } + + /** + * Get volume of containing record + * + * @return string + */ + public function getContainerVolume() + { + return $this->getFilteredNumbering('volume'); + } + + /** + * Get the publication dates of the record. See also getDateSpan(). + * + * @return array + */ + public function getPublicationDates() + { + $pubDates = array_map( + function ($data) { + return $data->getDate(); + }, $this->getRawEDSPublicationDetails() + ); + return !empty($pubDates) ? $pubDates : $this->extractEbscoDataFromRecordInfo( + 'BibRecord/BibRelationships/IsPartOfRelationships/0/BibEntity/Dates/0/Y' + ); + } + + /** + * Get year of containing record + * + * @return string + */ + public function getContainerStartPage() + { + $pagination = $this->extractEbscoDataFromRecordInfo( + 'BibRecord/BibEntity/PhysicalDescription/Pagination' + ); + return $pagination['StartPage'] ?? ''; + } + + /** + * Returns an array of formats based on publication type. + * + * @return array + */ + public function getFormats() + { + $formats = []; + $pubType = $this->getPubType(); + switch (strtolower($pubType)) { + case 'academic journal': + case 'periodical': + case 'report': + // Add "article" format for better OpenURL generation + $formats[] = $pubType; + $formats[] = 'Article'; + break; + case 'ebook': + // Treat eBooks as both "Books" and "Electronic" items + $formats[] = 'Book'; + $formats[] = 'Electronic'; + break; + case 'dissertation/thesis': + // Simplify wording for consistency with other drivers + $formats[] = 'Thesis'; + break; + default: + $formats[] = $pubType; + } + + return $formats; + } + + /** + * Get the publishers of the record. + * + * @return array + */ + public function getPublishers() + { + return array_map( + function ($data) { + return $data->getName(); + }, $this->getRawEDSPublicationDetails() + ); + } + + /** + * Get the item's place of publication. + * + * @return array + */ + public function getPlacesOfPublication() + { + return array_map( + function ($data) { + return $data->getPlace(); + }, $this->getRawEDSPublicationDetails() + ); + } + + /** + * Get an array of publication detail lines combining information from + * getPublicationDates(), getPublishers() and getPlacesOfPublication(). + * + * @return array + */ + public function getPublicationDetails() + { + $details = $this->getRawEDSPublicationDetails(); + return !empty($details) ? $details : parent::getPublicationDetails(); + } + + /** + * Attempt to build up publication details from raw EDS data. + * + * @return array + */ + protected function getRawEDSPublicationDetails() + { + $details = []; + foreach ($this->getItems(null, 'Publication Information') as $pub) { + // Try to extract place, publisher and date: + if (preg_match('/^(.+):(.*)\.\s*(\d{4})$/', $pub['Data'], $matches)) { + $placeParts = explode('.', $matches[1]); + list($place, $pub, $date) + = [trim($matches[1]), trim($matches[2]), $matches[3]]; + } elseif (preg_match('/^(.+):(.*)$/', $pub['Data'], $matches)) { + list($place, $pub, $date) + = [trim($matches[1]), trim($matches[2]), '']; + } else { + list($place, $pub, $date) = ['', $pub['Data'], '']; } + + // In some cases, the place may have noise on the front that needs + // to be removed... + $placeParts = explode('.', $place); + $shortPlace = array_pop($placeParts); + $details[] = new Response\PublicationDetails( + strlen($shortPlace) > 5 ? $shortPlace : $place, $pub, $date + ); } - return false; + return $details; + } + + /** + * Extract data from EBSCO API response using a prioritized list of selectors. + * Selectors can be of the form Items:Label to invoke extractEbscoDataFromItems, + * or RecordInfo:Path/To/Data/Element to invoke extractEbscoDataFromRecordInfo. + * + * @param array $selectors Array of selector strings for extracting data. + * + * @return array + */ + protected function extractEbscoData($selectors) + { + $result = []; + foreach ($selectors as $selector) { + list($method, $params) = explode(':', $selector, 2); + $fullMethod = 'extractEbscoDataFrom' . ucwords($method); + if (!is_callable([$this, $fullMethod])) { + throw new \Exception('Undefined method: ' . $fullMethod); + } + $result = $this->$fullMethod($params); + if (!empty($result)) { + break; + } + } + return $result; + } + + /** + * Extract data from the record's "Items" array, based on a label. + * + * @param string $label Label to filter on. + * + * @return array + */ + protected function extractEbscoDataFromItems($label) + { + $items = $this->getItems(null, $label); + $output = []; + foreach ($items as $item) { + $output[] = $item['Data']; + } + return $output; + } + + /** + * Extract data from the record's "RecordInfo" array, based on a path. + * + * @param string $path Path to select with (slash-separated element names, + * with special * selector to iterate through all children). + * + * @return array + */ + protected function extractEbscoDataFromRecordInfo($path) + { + return (array)$this->recurseIntoRecordInfo( + $this->fields['RecordInfo'] ?? [], + explode('/', $path) + ); + } + + /** + * Recursive support method for extractEbscoDataFromRecordInfo(). + * + * @param array $data Data to recurse into + * @param array $path Array representing path into data + * + * @return array + */ + protected function recurseIntoRecordInfo($data, $path) + { + $nextField = array_shift($path); + $keys = $nextField === '*' ? array_keys($data) : [$nextField]; + $values = []; + foreach ($keys as $key) { + if (isset($data[$key])) { + $values[] = empty($path) + ? $data[$key] + : $this->recurseIntoRecordInfo($data[$key], $path); + } + } + return count($values) == 1 ? $values[0] : $values; } } diff --git a/module/VuFind/tests/unit-tests/src/VuFindTest/RecordDriver/EDSTest.php b/module/VuFind/tests/unit-tests/src/VuFindTest/RecordDriver/EDSTest.php index 8f42bd0c9a0..7aeff2248df 100644 --- a/module/VuFind/tests/unit-tests/src/VuFindTest/RecordDriver/EDSTest.php +++ b/module/VuFind/tests/unit-tests/src/VuFindTest/RecordDriver/EDSTest.php @@ -42,16 +42,6 @@ use VuFind\RecordDriver\EDS; */ class EDSTest extends \VuFindTest\Unit\TestCase { - /** - * Test exportDisabled for a record. - * - * @return void - */ - public function testExportDisabled() - { - $this->assertEquals(true, $this->getDriver()->exportDisabled('endnote')); - } - /** * Test getUniqueID for a record. * @@ -287,20 +277,90 @@ class EDSTest extends \VuFindTest\Unit\TestCase $this->assertEquals("<a href='" . $str . "'>" . $str . "</a>", $this->getDriver()->linkUrls($str)); } + /** + * Test getISSNs. + * + * @return void + */ + public function testGetISSNs() + { + $driver = $this->getDriverWithIdentifierData(); + $this->assertEquals( + ['1234-5678', '5678-1234'], $driver->getISSNs() + ); + } + + /** + * Test getISBNs. + * + * @return void + */ + public function testGetISBNs() + { + $driver = $this->getDriverWithIdentifierData(); + $this->assertEquals( + ['0123456789X', 'fakeisbnxxx'], $driver->getISBNs() + ); + } + + /** + * Get a record driver with fake identifier data. + * + * @return EDS + */ + protected function getDriverWithIdentifierData() + { + return $this->getDriver( + [ + 'RecordInfo' => [ + 'BibRecord' => [ + 'BibRelationships' => [ + 'IsPartOfRelationships' => [ + [ + 'BibEntity' => [ + 'Identifiers' => [ + [ + 'Type' => 'issn-electronic', + 'Value' => '1234-5678' + ], + [ + 'Type' => 'issn-print', + 'Value' => '5678-1234' + ], + [ + 'Type' => 'isbn-electronic', + 'Value' => '0123456789X' + ], + [ + 'Type' => 'isbn-print', + 'Value' => 'fakeisbnxxx' + ], + [ + 'Type' => 'meaningless-noise', + 'Value' => 'should never be seen' + ], + ] + ] + ] + ] + ] + ] + ] + ] + ); + } + /** * Get a record driver with fake data. * - * @param array $overrides Fixture fields to override. + * @param array $overrides Raw data for testing * - * @return SolrDefault + * @return EDS */ protected function getDriver($overrides = []) { - // Simulate empty response for now: - $fixture = ['response' => ['docs' => [[]]]]; - $record = new EDS(); - $record->setRawData($overrides + $fixture['response']['docs'][0]); + $record->setRawData($overrides); return $record; } } -- GitLab