From 2e45e5ef1961ba5904cf9f4a00d0fb1d61b294e5 Mon Sep 17 00:00:00 2001 From: Demian Katz <demian.katz@villanova.edu> Date: Fri, 7 Jun 2013 08:13:21 -0400 Subject: [PATCH] Progress on VUFIND-701 (Dewey AlphaBrowse skips records w/ multiple call numbers) Some edge cases won't be fixed until next SolrMarc upgrade (need List support) --- import/index_scripts/dewey.bsh | 52 +++++++++++++++---- import/marc.properties | 3 +- index-alphabetic-browse.bat | 2 +- index-alphabetic-browse.sh | 2 +- .../src/Connection/SolrTest.php | 21 ++++++++ solr/biblio/conf/schema.xml | 3 +- tests/data/deweybrowse.mrc | 1 + 7 files changed, 70 insertions(+), 14 deletions(-) create mode 100644 tests/data/deweybrowse.mrc diff --git a/import/index_scripts/dewey.bsh b/import/index_scripts/dewey.bsh index 1f0fb3df50a..cad5c431b52 100644 --- a/import/index_scripts/dewey.bsh +++ b/import/index_scripts/dewey.bsh @@ -15,8 +15,8 @@ import org.solrmarc.tools.CallNumUtils; * * @param record * @param fieldSpec - which MARC fields / subfields need to be analyzed - * @param precisionStr - a decimal number (represented in string format) showing the - * desired precision of the returned number; i.e. 100 to round to nearest hundred, + * @param precisionStr - a decimal number (represented in string format) showing the + * desired precision of the returned number; i.e. 100 to round to nearest hundred, * 10 to round to nearest ten, 0.1 to round to nearest tenth, etc. * @return Set containing requested numeric portions of Dewey decimal call numbers */ @@ -24,23 +24,23 @@ public Set getDeweyNumber(Record record, String fieldSpec, String precisionStr) // Initialize our return value: Set result = new LinkedHashSet(); - // Precision comes in as a string, but we need to convert it to a float: + // Precision comes in as a string, but we need to convert it to a float: float precision = Float.parseFloat(precisionStr); - + // Loop through the specified MARC fields: Set input = indexer.getFieldList(record, fieldSpec); Iterator iter = input.iterator(); while (iter.hasNext()) { // Get the current string to work on: String current = iter.next(); - + if (CallNumUtils.isValidDewey(current)) { // Convert the numeric portion of the call number into a float: float currentVal = Float.parseFloat(CallNumUtils.getDeweyB4Cutter(current)); - + // Round the call number value to the specified precision: Float finalVal = new Float(Math.floor(currentVal / precision) * precision); - + // Convert the rounded value back to a string (with leading zeros) and save it: result.add(CallNumUtils.normalizeFloat(finalVal.toString(), 3, -1)); } @@ -71,14 +71,14 @@ public Set getDeweySearchable(Record record, String fieldSpec) { while (iter.hasNext()) { // Get the current string to work on: String current = iter.next(); - + // Add valid strings to the set, normalizing them to be all uppercase // and free from whitespace. if (CallNumUtils.isValidDewey(current)) { result.add(current.toUpperCase().replaceAll(" ", "")); } } - + // If we found no call numbers, return null; otherwise, return our results: if (result.isEmpty()) return null; @@ -108,7 +108,39 @@ public String getDeweySortable(Record record, String fieldSpec) { return CallNumUtils.getDeweyShelfKey(current); } } - + // If we made it this far, we didn't find a valid sortable Dewey number: return null; +} + +/** + * Normalize Dewey numbers for AlphaBrowse sorting purposes (use all numbers!) + * + * Can return null + * + * @param record + * @param fieldSpec - which MARC fields / subfields need to be analyzed + * @return Set containing normalized Dewey numbers extracted from specified fields. + */ +public Set getDeweySortables(Record record, String fieldSpec) { + // Initialize our return value: + Set result = new LinkedHashSet(); + + // Loop through the specified MARC fields: + Set input = indexer.getFieldList(record, fieldSpec); + Iterator iter = input.iterator(); + while (iter.hasNext()) { + // Get the current string to work on: + String current = iter.next(); + + // If this is a valid Dewey number, return the sortable shelf key: + if (CallNumUtils.isValidDewey(current)) { + result.add(CallNumUtils.getDeweyShelfKey(current)); + } + } + + // If we found no call numbers, return null; otherwise, return our results: + if (result.isEmpty()) + return null; + return result; } \ No newline at end of file diff --git a/import/marc.properties b/import/marc.properties index 570b21c708c..4a1eaedff66 100644 --- a/import/marc.properties +++ b/import/marc.properties @@ -76,7 +76,8 @@ dewey-tens = custom, getDeweyNumber(082a:083a, 10), ddc22_map.properties(tens) dewey-ones = custom, getDeweyNumber(082a:083a, 1), ddc22_map.properties(ones) dewey-full = custom, getDeweySearchable(082a:083a) dewey-sort = custom, getDeweySortable(082a:083a) -dewey-raw = 082a:083a, first +dewey-sort-browse = script(dewey.bsh), getDeweySortables(082a:083a) +dewey-raw = 082a:083a # Extract the numeric portion of the OCLC number using a pattern map: oclc_num = 035a, (pattern_map.oclc_num) diff --git a/index-alphabetic-browse.bat b/index-alphabetic-browse.bat index 842a96245bc..2b740cc72cc 100644 --- a/index-alphabetic-browse.bat +++ b/index-alphabetic-browse.bat @@ -64,7 +64,7 @@ call %VUFIND_HOME%\index-alphabetic-browse.bat build_browse title title_fullStr call %VUFIND_HOME%\index-alphabetic-browse.bat build_browse topic topic_browse call %VUFIND_HOME%\index-alphabetic-browse.bat build_browse author author_browse call %VUFIND_HOME%\index-alphabetic-browse.bat build_browse lcc callnumber-a 1 -call %VUFIND_HOME%\index-alphabetic-browse.bat build_browse dewey dewey-raw 1 "-Dbibleech=StoredFieldLeech -Dsortfield=dewey-sort -Dvaluefield=dewey-raw" +call %VUFIND_HOME%\index-alphabetic-browse.bat build_browse dewey dewey-raw 1 "-Dbibleech=StoredFieldLeech -Dsortfield=dewey-sort-browse -Dvaluefield=dewey-raw" goto end rem Function to process a single browse index: diff --git a/index-alphabetic-browse.sh b/index-alphabetic-browse.sh index 1aa95d5db12..b9db27164c7 100755 --- a/index-alphabetic-browse.sh +++ b/index-alphabetic-browse.sh @@ -39,4 +39,4 @@ build_browse "title" "title_fullStr" 1 "-Dbibleech=StoredFieldLeech -Dsortfield= build_browse "topic" "topic_browse" build_browse "author" "author_browse" build_browse "lcc" "callnumber-a" 1 -build_browse "dewey" "dewey-raw" 1 "-Dbibleech=StoredFieldLeech -Dsortfield=dewey-sort -Dvaluefield=dewey-raw" +build_browse "dewey" "dewey-raw" 1 "-Dbibleech=StoredFieldLeech -Dsortfield=dewey-sort-browse -Dvaluefield=dewey-raw" diff --git a/module/VuFind/tests/integration-tests/src/Connection/SolrTest.php b/module/VuFind/tests/integration-tests/src/Connection/SolrTest.php index fd305d6ca58..67a4a9fdc29 100644 --- a/module/VuFind/tests/integration-tests/src/Connection/SolrTest.php +++ b/module/VuFind/tests/integration-tests/src/Connection/SolrTest.php @@ -73,4 +73,25 @@ class SolrTest extends \VuFindTest\Unit\TestCase $this->assertTrue(empty($item['seeAlso'])); $this->assertTrue(in_array('Royal Dublin Society', $item['useInstead'])); } + + /** + * Check that expected Dewey values are present (tests VUFIND-701). + * + * @return void + */ + public function testDeweyValues() + { + $solr = $this->getServiceManager()->get('VuFind\Search\BackendManager') + ->get('Solr'); + $result = $solr->alphabeticBrowse('dewey', '123.45 .I39', 0, 1); + $item = $result['Browse']['items'][0]; + $this->assertEquals(1, $item['count']); + $this->assertEquals($item['count'], count($item['ids'])); + $this->assertEquals('123.45 .I39', $item['heading']); + $result = $solr->alphabeticBrowse('dewey', '123.46 .Q39', 0, 1); + $item = $result['Browse']['items'][0]; + $this->assertEquals(1, $item['count']); + $this->assertEquals($item['count'], count($item['ids'])); + $this->assertEquals('123.46 .Q39', $item['heading']); + } } \ No newline at end of file diff --git a/solr/biblio/conf/schema.xml b/solr/biblio/conf/schema.xml index c10198be83b..ed6b0fb2d9f 100644 --- a/solr/biblio/conf/schema.xml +++ b/solr/biblio/conf/schema.xml @@ -154,7 +154,8 @@ <field name="dewey-ones" type="string" indexed="true" stored="true" multiValued="true"/> <field name="dewey-full" type="string" indexed="true" stored="true" multiValued="true"/> <field name="dewey-sort" type="string" indexed="true" stored="true" /> - <field name="dewey-raw" type="string" indexed="true" stored="true" /> + <field name="dewey-sort-browse" type="string" indexed="true" stored="true" multiValued="true" /> + <field name="dewey-raw" type="string" indexed="true" stored="true" multiValued="true" /> <field name="author2" type="textProper" indexed="true" stored="true" multiValued="true"/> <field name="author2Str" type="string" indexed="true" stored="true" multiValued="true"/> <field name="author2-role" type="string" indexed="true" stored="true" multiValued="true"/> diff --git a/tests/data/deweybrowse.mrc b/tests/data/deweybrowse.mrc new file mode 100644 index 00000000000..2830770ea9a --- /dev/null +++ b/tests/data/deweybrowse.mrc @@ -0,0 +1 @@ +00613cam a2200229Ma 4500001001600000005001700016008004100033020001500074035002300089040002500112041001800137043001200155050002400167049000900191082001600200082001600216100003000232245002200262250002300284260004700307300002900354testdeweybrowse20110419140028.0110214s1992 it a b 001 0 ita d a8820737493 a(OCoLC)ocm30585539 aRBNcRBNdOCLCGdPVU1 aitaalathlat ae-it---14aDG848.15b.V53 1992 aPVUM a123.45 .I39 a123.46 .Q391 aPerson, Fake,d1668-1744.10aDewey browse test aFictional edition. aMorano :bCentro di Studi Vichiani,c1992. a296 p. :bill. ;c24 cm. \ No newline at end of file -- GitLab