Skip to content
Snippets Groups Projects
Commit fbf23d28 authored by Demian Katz's avatar Demian Katz Committed by GitHub
Browse files

Better handling of multiple author relators in MARC (#772)

parent 4dbacbd9
No related merge requests found
...@@ -1806,31 +1806,60 @@ public class VuFindIndexer extends SolrIndexer ...@@ -1806,31 +1806,60 @@ public class VuFindIndexer extends SolrIndexer
*/ */
protected Boolean authorHasAppropriateRelator(DataField authorField, protected Boolean authorHasAppropriateRelator(DataField authorField,
String[] noRelatorAllowed, String relatorConfig String[] noRelatorAllowed, String relatorConfig
) {
return getValidRelators(authorField, noRelatorAllowed, relatorConfig).size() > 0;
}
/**
* Extract all valid relator terms from a list of subfields using a whitelist.
* @param subfields List of subfields to check
* @param permittedRoles Whitelist to check against
* @return Set of valid relator terms
*/
public Set<String> getValidRelatorsFromSubfields(List<Subfield> subfields, List<String> permittedRoles)
{
Set<String> relators = new LinkedHashSet<String>();
for (int j = 0; j < subfields.size(); j++) {
String current = normalizeRelatorString(subfields.get(j).getData());
if (permittedRoles.contains(current)) {
relators.add(current);
}
}
return relators;
}
/**
* Extract all values that meet the specified relator requirements.
* @param authorField Field to analyze
* @param noRelatorAllowed Array of tag names which are allowed to be used with
* no declared relator.
* @param relatorConfig The setting in author-classification.ini which
* defines which relator terms are acceptable (or a colon-delimited list)
* @return Set
*/
public Set<String> getValidRelators(DataField authorField,
String[] noRelatorAllowed, String relatorConfig
) { ) {
// get tag number from Field // get tag number from Field
String tag = authorField.getTag(); String tag = authorField.getTag();
List<Subfield> subfieldE = normalizeRelatorSubfieldList(authorField.getSubfields('e')); List<Subfield> subfieldE = authorField.getSubfields('e');
List<Subfield> subfield4 = normalizeRelatorSubfieldList(authorField.getSubfields('4')); List<Subfield> subfield4 = authorField.getSubfields('4');
Set<String> relators = new LinkedHashSet<String>();
// if no relator is found, check to see if the current tag is in the "no // if no relator is found, check to see if the current tag is in the "no
// relator allowed" list. // relator allowed" list.
if (subfieldE.size() == 0 && subfield4.size() == 0) { if (subfieldE.size() == 0 && subfield4.size() == 0) {
return Arrays.asList(noRelatorAllowed).contains(tag); if (Arrays.asList(noRelatorAllowed).contains(tag)) {
} relators.add("");
// If we got this far, we need to figure out what type of relation they have
List permittedRoles = normalizeRelatorStringList(Arrays.asList(loadRelatorConfig(relatorConfig)));
for (int j = 0; j < subfield4.size(); j++) {
if (permittedRoles.contains(subfield4.get(j).getData())) {
return true;
}
}
for (int j = 0; j < subfieldE.size(); j++) {
if (permittedRoles.contains(subfieldE.get(j).getData())) {
return true;
} }
} else {
// If we got this far, we need to figure out what type of relation they have
List permittedRoles = normalizeRelatorStringList(Arrays.asList(loadRelatorConfig(relatorConfig)));
relators.addAll(getValidRelatorsFromSubfields(subfieldE, permittedRoles));
relators.addAll(getValidRelatorsFromSubfields(subfield4, permittedRoles));
} }
return false; return relators;
} }
/** /**
...@@ -1886,8 +1915,8 @@ public class VuFindIndexer extends SolrIndexer ...@@ -1886,8 +1915,8 @@ public class VuFindIndexer extends SolrIndexer
DataField authorField; DataField authorField;
while (fieldsIter.hasNext()){ while (fieldsIter.hasNext()){
authorField = (DataField) fieldsIter.next(); authorField = (DataField) fieldsIter.next();
//add all author types to the result set // add all author types to the result set; if we have multiple relators, repeat the authors
if (authorHasAppropriateRelator(authorField, noRelatorAllowed, relatorConfig)) { for (String iterator: getValidRelators(authorField, noRelatorAllowed, relatorConfig)) {
for (String subfields : parsedTagList.get(authorField.getTag())) { for (String subfields : parsedTagList.get(authorField.getTag())) {
String current = this.getDataFromVariableField(authorField, "["+subfields+"]", " ", false); String current = this.getDataFromVariableField(authorField, "["+subfields+"]", " ", false);
// TODO: we may eventually be able to use this line instead, // TODO: we may eventually be able to use this line instead,
...@@ -1981,30 +2010,7 @@ public class VuFindIndexer extends SolrIndexer ...@@ -1981,30 +2010,7 @@ public class VuFindIndexer extends SolrIndexer
while (fieldsIter.hasNext()){ while (fieldsIter.hasNext()){
authorField = (DataField) fieldsIter.next(); authorField = (DataField) fieldsIter.next();
//add all author types to the result set //add all author types to the result set
if (authorHasAppropriateRelator(authorField, noRelatorAllowed, relatorConfig)) { result.addAll(getValidRelators(authorField, noRelatorAllowed, relatorConfig));
List<Subfield> subfieldE = normalizeRelatorSubfieldList(authorField.getSubfields('e'));
List<Subfield> subfield4 = normalizeRelatorSubfieldList(authorField.getSubfields('4'));
// get the first non-empty subfield
String relator = defaultRelator;
// try subfield E first
for (int j = 0; j < subfieldE.size(); j++) {
if (!subfieldE.get(j).getData().isEmpty()) {
relator = subfieldE.get(j).getData();
continue;
}
}
// try subfield 4 now and overwrite relator as subfield 4 is most important
for (int j = 0; j < subfield4.size(); j++) {
if (!subfield4.get(j).getData().isEmpty()) {
relator = subfield4.get(j).getData();
continue;
}
}
result.add(relator);
}
} }
} }
return result; return result;
...@@ -2079,22 +2085,6 @@ public class VuFindIndexer extends SolrIndexer ...@@ -2079,22 +2085,6 @@ public class VuFindIndexer extends SolrIndexer
return stringList; return stringList;
} }
/**
* Normalizes the strings in a list of subfields.
*
* @param subfieldList List of subfields to be normalized
* @return subfieldList Normalized List of subfields
*/
protected List<Subfield> normalizeRelatorSubfieldList(List<Subfield> subfieldList)
{
for (int j = 0; j < subfieldList.size(); j++) {
subfieldList.get(j).setData(
normalizeRelatorString(subfieldList.get(j).getData())
);
}
return subfieldList;
}
/** /**
* Normalizes a string * Normalizes a string
* *
......
...@@ -19,31 +19,60 @@ org.solrmarc.index.SolrIndexer indexer = null; ...@@ -19,31 +19,60 @@ org.solrmarc.index.SolrIndexer indexer = null;
*/ */
public Boolean authorHasAppropriateRelator(DataField authorField, public Boolean authorHasAppropriateRelator(DataField authorField,
String[] noRelatorAllowed, String relatorConfig String[] noRelatorAllowed, String relatorConfig
) {
return getValidRelators(authorField, noRelatorAllowed, relatorConfig).size() > 0;
}
/**
* Extract all valid relator terms from a list of subfields using a whitelist.
* @param subfields List of subfields to check
* @param permittedRoles Whitelist to check against
* @return Set of valid relator terms
*/
public Set getValidRelatorsFromSubfields(List subfields, List permittedRoles)
{
Set relators = new LinkedHashSet();
for (int j = 0; j < subfields.size(); j++) {
String current = normalizeRelatorString(subfields.get(j).getData());
if (permittedRoles.contains(current)) {
relators.add(current);
}
}
return relators;
}
/**
* Extract all values that meet the specified relator requirements.
* @param authorField Field to analyze
* @param noRelatorAllowed Array of tag names which are allowed to be used with
* no declared relator.
* @param relatorConfig The setting in author-classification.ini which
* defines which relator terms are acceptable (or a colon-delimited list)
* @return Set
*/
public Set getValidRelators(DataField authorField,
String[] noRelatorAllowed, String relatorConfig
) { ) {
// get tag number from Field // get tag number from Field
String tag = authorField.getTag(); String tag = authorField.getTag();
List subfieldE = normalizeRelatorSubfieldList(authorField.getSubfields('e')); List subfieldE = authorField.getSubfields('e');
List subfield4 = normalizeRelatorSubfieldList(authorField.getSubfields('4')); List subfield4 = authorField.getSubfields('4');
Set relators = new LinkedHashSet();
// if no relator is found, check to see if the current tag is in the "no // if no relator is found, check to see if the current tag is in the "no
// relator allowed" list. // relator allowed" list.
if (subfieldE.size() == 0 && subfield4.size() == 0) { if (subfieldE.size() == 0 && subfield4.size() == 0) {
return Arrays.asList(noRelatorAllowed).contains(tag); if (Arrays.asList(noRelatorAllowed).contains(tag)) {
} relators.add("");
// If we got this far, we need to figure out what type of relation they have
List permittedRoles = normalizeRelatorStringList(Arrays.asList(loadRelatorConfig(relatorConfig)));
for (int j = 0; j < subfield4.size(); j++) {
if (permittedRoles.contains(subfield4.get(j).getData())) {
return true;
}
}
for (int j = 0; j < subfieldE.size(); j++) {
if (permittedRoles.contains(subfieldE.get(j).getData())) {
return true;
} }
} else {
// If we got this far, we need to figure out what type of relation they have
List permittedRoles = normalizeRelatorStringList(Arrays.asList(loadRelatorConfig(relatorConfig)));
relators.addAll(getValidRelatorsFromSubfields(subfieldE, permittedRoles));
relators.addAll(getValidRelatorsFromSubfields(subfield4, permittedRoles));
} }
return false; return relators;
} }
/** /**
...@@ -99,8 +128,8 @@ public List getAuthorsFilteredByRelator(Record record, String tagList, ...@@ -99,8 +128,8 @@ public List getAuthorsFilteredByRelator(Record record, String tagList,
DataField authorField; DataField authorField;
while (fieldsIter.hasNext()){ while (fieldsIter.hasNext()){
authorField = (DataField) fieldsIter.next(); authorField = (DataField) fieldsIter.next();
//add all author types to the result set // add all author types to the result set; if we have multiple relators, repeat the authors
if (authorHasAppropriateRelator(authorField, noRelatorAllowed, relatorConfig)) { for (String iterator: getValidRelators(authorField, noRelatorAllowed, relatorConfig)) {
for (String subfields : parsedTagList.get(authorField.getTag())) { for (String subfields : parsedTagList.get(authorField.getTag())) {
String current = indexer.getDataFromVariableField(authorField, "["+subfields+"]", " ", false); String current = indexer.getDataFromVariableField(authorField, "["+subfields+"]", " ", false);
// TODO: we may eventually be able to use this line instead, // TODO: we may eventually be able to use this line instead,
...@@ -194,30 +223,7 @@ public List getRelatorsFilteredByRelator(Record record, String tagList, ...@@ -194,30 +223,7 @@ public List getRelatorsFilteredByRelator(Record record, String tagList,
while (fieldsIter.hasNext()){ while (fieldsIter.hasNext()){
authorField = (DataField) fieldsIter.next(); authorField = (DataField) fieldsIter.next();
//add all author types to the result set //add all author types to the result set
if (authorHasAppropriateRelator(authorField, noRelatorAllowed, relatorConfig)) { result.addAll(getValidRelators(authorField, noRelatorAllowed, relatorConfig));
List subfieldE = normalizeRelatorSubfieldList(authorField.getSubfields('e'));
List subfield4 = normalizeRelatorSubfieldList(authorField.getSubfields('4'));
// get the first non-empty subfield
String relator = defaultRelator;
// try subfield E first
for (int j = 0; j < subfieldE.size(); j++) {
if (!subfieldE.get(j).getData().isEmpty()) {
relator = subfieldE.get(j).getData();
continue;
}
}
// try subfield 4 now and overwrite relator as subfield 4 is most important
for (int j = 0; j < subfield4.size(); j++) {
if (!subfield4.get(j).getData().isEmpty()) {
relator = subfield4.get(j).getData();
continue;
}
}
result.add(relator);
}
} }
} }
return result; return result;
...@@ -292,22 +298,6 @@ public List normalizeRelatorStringList(List stringList) ...@@ -292,22 +298,6 @@ public List normalizeRelatorStringList(List stringList)
return stringList; return stringList;
} }
/**
* Normalizes the strings in a list of subfields.
*
* @param subfieldList List of subfields to be normalized
* @return subfieldList Normalized List of subfields
*/
public List normalizeRelatorSubfieldList(List subfieldList)
{
for (int j = 0; j < subfieldList.size(); j++) {
subfieldList.get(j).setData(
normalizeRelatorString(subfieldList.get(j).getData())
);
}
return subfieldList;
}
/** /**
* Normalizes a string * Normalizes a string
* *
......
This diff is collapsed.
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment