diff --git a/import/index_java/src/org/vufind/index/GeoTools.java b/import/index_java/src/org/vufind/index/GeoTools.java index 342cad8c1660b39e46626b495cb673bf0fe57e95..db00d3975830ecf50339de207ea75d72d90763f7 100644 --- a/import/index_java/src/org/vufind/index/GeoTools.java +++ b/import/index_java/src/org/vufind/index/GeoTools.java @@ -25,18 +25,32 @@ package org.vufind.index; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +import java.io.*; +import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; import java.util.Iterator; +import java.util.List; +import java.util.Properties; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.Set; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; import org.apache.log4j.Logger; -import org.marc4j.marc.Record; -import org.marc4j.marc.VariableField; import org.marc4j.marc.ControlField; import org.marc4j.marc.DataField; +import org.marc4j.marc.Record; import org.marc4j.marc.Subfield; -import java.util.HashMap; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import org.marc4j.marc.VariableField; +import org.solrmarc.index.indexer.ValueIndexerFactory; +import org.solrmarc.index.SolrIndexer; +import org.solrmarc.tools.PropertyUtils; +import org.solrmarc.tools.SolrMarcIndexerException; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; /** * Geographic indexing routines. @@ -46,10 +60,26 @@ public class GeoTools private static final Pattern COORDINATES_PATTERN = Pattern.compile("^([eEwWnNsS])(\\d{3})(\\d{2})(\\d{2})"); private static final Pattern HDMSHDD_PATTERN = Pattern.compile("^([eEwWnNsS])(\\d+(\\.\\d+)?)"); private static final Pattern PMDD_PATTERN = Pattern.compile("^([-+]?\\d+(\\.\\d+)?)"); + static String datePrefix = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); + static String vufindLocal = System.getenv("VUFIND_LOCAL_DIR"); + static String vufindHome = System.getenv("VUFIND_HOME"); + private static Properties vufindConfigs = null; // Initialize logging category static Logger logger = Logger.getLogger(GeoTools.class.getName()); + /** + * Constructor + */ + public GeoTools() + { + try { + vufindConfigs = PropertyUtils.loadProperties(ValueIndexerFactory.instance().getHomeDirs(), "vufind.properties"); + } catch (IllegalArgumentException e) { + // If the properties load failed, don't worry about it -- we'll use defaults. + } + } + /** * Convert MARC coordinates into long_lat format. * @@ -62,10 +92,6 @@ public class GeoTools if (list034 != null) { for (VariableField vf : list034) { HashMap<Character, String> coords = getCoordinateValues(vf); - //DEBUG output - //ControlField recID = (ControlField) record.getVariableField("001"); - //String recNum = recID.getData(); - //logger.info("Record ID: " + recNum.trim() + " ...Coordinates: [ {" + coords.get('d') + "} {" + coords.get('e') + "} {" + coords.get('f') + "} {" + coords.get('g') + "} ]"); // Check for null coordinates if (validateCoordinateValues(record, coords)) { @@ -79,11 +105,7 @@ public class GeoTools // Note - storage in Solr follows the WENS order, but display is WSEN order String result = String.format("ENVELOPE(%s,%s,%s,%s)", new Object[] { west, east, north, south }); geo_coordinates.add(result); - } else { - logger.error(".......... Not indexing INVALID coordinates: [ {" + coords.get('d') + "} {" + coords.get('e') + "} {" + coords.get('f') + "} {" + coords.get('g') + "} ]"); - } - } else { - logger.error(".......... Not indexing INVALID coordinates: [ {" + coords.get('d') + "} {" + coords.get('e') + "} {" + coords.get('f') + "} {" + coords.get('g') + "} ]"); + } } } } @@ -106,14 +128,83 @@ public class GeoTools if (validateCoordinateValues(record, coords)) { String result = String.format("%s %s %s %s", new Object[] { coords.get('d'), coords.get('e'), coords.get('f'), coords.get('g') }); geo_coordinates.add(result); - } else { - logger.error(".......... Not indexing INVALID Display coordinates: [ {" + coords.get('d') + "} {" + coords.get('e') + "} {" + coords.get('f') + "} {" + coords.get('g') + "} ]"); } } } return geo_coordinates; } + /** + * Log coordinate indexing errors to external log file. + * + * @param Record record + * @param HashMap coords + * @param String error message + */ + public static void logErrorMessage(Record record, HashMap coords, String message) { + // Initialize error logging variables + String msgError = message; + String recNum = "Not available"; + ControlField recID = (ControlField) record.getVariableField("001"); + if (recID != null) { + recNum = recID.getData().trim(); + } + String coordinates = "Coordinates: {" + coords.get('d') + "} {" + coords.get('e') + "} {" + coords.get('f') + "} {" + coords.get('g') + "}"; + + String logPath = getLogPath(); + String logFilename = datePrefix + "_CoordinateErrors.txt"; + String outPath = logPath + "/" + logFilename; + + // Output Error message + logger.error("Not indexing INVALID coordinates for Record ID: " + recNum); + logger.error("... " + msgError); + + if (logPath != null) { + logger.error("... Check coordinate error log: " + outPath); + // Log ID and error message and coordinates in error file + try { + PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(outPath, true))); + out.println(logFilename + "\t" + recNum + "\t" + msgError + "\t" + coordinates); + out.close(); + } catch (IOException e) { + System.out.println("io exception occured"); + e.printStackTrace(); + } + } else { + // output error that log file cannot be created + logger.error("..... No coordinate error log. Check vufind.properties settings..."); + } + + } + + /** + * Get path for coordinate error log file. + * + * @return String logPath + */ + public static String getLogPath() { + // Get coordinate error log path setting + String coordLogPath = PropertyUtils.getProperty(vufindConfigs, "coordinate.log.path"); + + //If coordinate.log.path doesn't exist or is not set, try some other places + if (coordLogPath == null) { + if (vufindLocal != null) { + File dir = new File(vufindLocal + "/import"); + if (dir.exists()) { + coordLogPath = vufindLocal + "/import"; + } + } else if (vufindLocal == null && vufindHome != null) { + File dir = new File(vufindHome + "/import"); + if (dir.exists()) { + coordLogPath = vufindHome + "/import"; + } + } else { + coordLogPath = null; + } + } + return coordLogPath; + } + /** * Get all coordinate values from list034 * @@ -164,9 +255,8 @@ public class GeoTools if (coords.containsKey('d') && coords.containsKey('e') && coords.containsKey('f') && coords.containsKey('g')) { return true; } - ControlField recID = (ControlField) record.getVariableField("001"); - String recNum = recID.getData(); - logger.error("Record ID: " + recNum.trim() + " - Coordinate values contain null values."); + String msgError = "Coordinate values contain null values."; + logErrorMessage(record, coords, msgError); return false; } @@ -211,8 +301,7 @@ public class GeoTools coordinate = Double.parseDouble(PMDmatcher.group(1)); return coordinate; } else { - logger.error("Decimal Degree Coordinate Conversion Error: Poorly formed coordinate: [" + coordinateStr + "] ... Returning null value ... "); - return null; + return Double.NaN; } } @@ -235,7 +324,7 @@ public class GeoTools } return coordinate; } - return null; + return Double.NaN; } /** @@ -280,9 +369,9 @@ public class GeoTools */ public boolean validateLines(Record record, Double west, Double east, Double north, Double south) { if ((!west.equals(east) && north.equals(south)) && (north == 90 || south == -90)) { - ControlField recID = (ControlField) record.getVariableField("001"); - String recNum = recID.getData(); - logger.error("Record ID: " + recNum.trim() + " - Coordinates form a line at the pole"); + String msgError = "Coordinates form a line at the pole"; + HashMap<Character, String> coords = buildCoordinateHashMap(west, east, north, south); + logErrorMessage(record, coords, msgError); return false; } return true; @@ -296,10 +385,10 @@ public class GeoTools * @return boolean */ public boolean validateValues(Record record, Double west, Double east, Double north, Double south) { - if (west == null || east == null || north == null || south == null) { - ControlField recID = (ControlField) record.getVariableField("001"); - String recNum = recID.getData(); - logger.error("Record ID: " + recNum.trim() + " - Decimal Degree coordinates contain null values: [ {" + west + "} {" + east + "} {" + north + "} {" + south + "} ]"); + if (west.isNaN() || east.isNaN() || north.isNaN() || south.isNaN()) { + String msgError = "Decimal Degree coordinates contain invalid values"; + HashMap<Character, String> coords = buildCoordinateHashMap(west, east, north, south); + logErrorMessage(record, coords, msgError); return false; } return true; @@ -313,16 +402,12 @@ public class GeoTools * @return boolean */ public boolean validateExtent(Record record, Double west, Double east, Double north, Double south) { - if (west > 180.0 || west < -180.0 || east > 180.0 || east < -180.0) { - ControlField recID = (ControlField) record.getVariableField("001"); - String recNum = recID.getData(); - logger.error("Record ID: " + recNum.trim() + " - Coordinates exceed map extent."); - return false; - } - if (north > 90.0 || north < -90.0 || south > 90.0 || south < -90.0) { - ControlField recID = (ControlField) record.getVariableField("001"); - String recNum = recID.getData(); - logger.error("Record ID: " + recNum.trim() + " - Coordinates exceed map extent."); + if (west > 180.0 || west < -180.0 || east > 180.0 || east < -180.0 + || north > 90.0 || north < -90.0 || south > 90.0 || south < -90.0 + ) { + String msgError = "Coordinates exceed map extent."; + HashMap<Character, String> coords = buildCoordinateHashMap(west, east, north, south); + logErrorMessage(record, coords, msgError); return false; } return true; @@ -337,9 +422,9 @@ public class GeoTools */ public boolean validateNorthSouth(Record record, Double north, Double south) { if (north < south) { - ControlField recID = (ControlField) record.getVariableField("001"); - String recNum = recID.getData(); - logger.error("Record ID: " + recNum.trim() + " - North < South."); + String msgError = "North < South."; + HashMap<Character, String> coords = buildCoordinateHashMap(Double.NaN, Double.NaN, north, south); + logErrorMessage(record, coords, msgError); return false; } return true; @@ -363,9 +448,9 @@ public class GeoTools } // Check again if (east < west) { - ControlField recID = (ControlField) record.getVariableField("001"); - String recNum = recID.getData(); - logger.error("Record ID: " + recNum.trim() + " - East < West."); + String msgError = "East < West."; + HashMap<Character, String> coords = buildCoordinateHashMap(west, east, Double.NaN, Double.NaN); + logErrorMessage(record, coords, msgError); return false; } } @@ -386,19 +471,34 @@ public class GeoTools //Check for South Pole coordinate distance if ((north == -90 || south == -90) && (distNS > 0 && distNS < 0.167)) { - ControlField recID = (ControlField) record.getVariableField("001"); - String recNum = recID.getData(); - logger.error("Record ID: " + recNum.trim() + " - Coordinates < 0.167 degrees from South Pole. Coordinate Distance: "+distNS); + String msgError = "Coordinates < 0.167 degrees from South Pole. Coordinate Distance: "+distNS; + HashMap<Character, String> coords = buildCoordinateHashMap(west, east, north, south); + logErrorMessage(record, coords, msgError); return false; } //Check for East-West coordinate distance if ((west == 0 || east == 0) && (distEW > -2 && distEW <0)) { - ControlField recID = (ControlField) record.getVariableField("001"); - String recNum = recID.getData(); - logger.error("Record ID: " + recNum.trim() + " - Coordinates within 2 degrees of Prime Meridian. Coordinate Distance: "+distEW); + String msgError = "Coordinates within 2 degrees of Prime Meridian. Coordinate Distance: "+distEW; + HashMap<Character, String> coords = buildCoordinateHashMap(west, east, north, south); + logErrorMessage(record, coords, msgError); return false; } return true; } + + /** + * Build coordinate hash map for logging. + * + * @param Double west, east, north, south + * @return HashMap coords + */ + public HashMap buildCoordinateHashMap (Double west, Double east, Double north, Double south) { + HashMap<Character, String> coords = new HashMap(); + coords.put('d', Double.toString(west)); + coords.put('e', Double.toString(east)); + coords.put('f', Double.toString(north)); + coords.put('g', Double.toString(south)); + return coords; + } } diff --git a/import/vufind.properties b/import/vufind.properties index f7010139988b8e6a90806222890aa87bf12905d6..52985f62f92e878c5242027856c2d95f553097a2 100644 --- a/import/vufind.properties +++ b/import/vufind.properties @@ -6,3 +6,8 @@ # look in the VuFind 2.x-style directory path, but you can uncomment the line # below for compatibility with VuFind 1.x. #vufind.config.relative_path = web/conf + +# coordinate error log location - where to put coordinate indexing error logs +# if not set, the default path will be VUFIND_LOCAL_DIR/import, and failing that, +# VUFIND_HOME/import. +#coordinate.log.path = /usr/local/vufind/import