Commit c4b0ed61 authored by Niels Erik G. Nielsen's avatar Niels Erik G. Nielsen
Browse files

Roll back changes to MarcXMLToJson

  .. as it fails for static OAI-PMH files (which we only need for dev/testing)

  After rollback it works for

   Live OAI-PMH feed (tested with Millersville's server)
   Static OAI-PMH file
   ftp harvest of .gz file (tested with New School's server)
   Static xml file from Seton Hall

  If there are other scenarios not tried here but for which it would
  fail, we would have to revisit of course.
parent 5dc7a388
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.indexdata.masterkey.localindices.util;
import java.io.IOException;
......@@ -23,28 +28,37 @@ public class MarcXMLToJson {
JSONObject marcJson = new JSONObject();
JSONArray fields = new JSONArray();
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
documentBuilderFactory.setNamespaceAware(true);
DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
Document document = documentBuilder.parse(new InputSource(new StringReader(marcXML)));
Element root = document.getDocumentElement();
String namespace = "http://www.loc.gov/MARC21/slim";
Element record = null;
if(root.getTagName().endsWith("OAI-PMH")) { // probably a static OAI-PMH file
Element listRecords = (Element)root.getElementsByTagNameNS(namespace, "ListRecords").item(0);
Element topRecord = (Element)listRecords.getElementsByTagNameNS(namespace, "record").item(0);
Element metadata = (Element)topRecord.getElementsByTagNameNS(namespace, "metadata").item(0);
record = (Element) metadata.getElementsByTagNameNS(namespace, "record").item(0);
} else if (root.getTagName().endsWith("record")) {
//record = (Element) root.getElementsByTagName("record").item(0);
record = root;
} else if (root.getTagName().endsWith("collection")) {
record = (Element) root.getElementsByTagNameNS(namespace, "record").item(0);
if(root.getTagName().equals("OAI-PMH")) { // probably a static OAI-PMH file
Element listRecords = (Element)root.getElementsByTagName("ListRecords").item(0);
Element topRecord = (Element)listRecords.getElementsByTagName("record").item(0);
Element metadata = (Element)topRecord.getElementsByTagName("metadata").item(0);
record = (Element) metadata.getElementsByTagName("record").item(0);
} else if (root.getTagName().equals("record")) {
NodeList recordsEmbeddedInRecord = root.getElementsByTagName("record");
if (recordsEmbeddedInRecord != null && recordsEmbeddedInRecord.getLength()==1) {
// e.g. a MARC record embeddded in OAI-PMH record
record = (Element) recordsEmbeddedInRecord.item(0);
} else {
record = root;
}
} else if (root.getTagName().equals("collection")) {
NodeList records = root.getElementsByTagName("record");
if (records != null && records.getLength()==1) {
record = (Element) records.item(0);
}
if (records != null && records.getLength()>1) {
throw new IOException("MARC to XML converter received collection with multiple records. Cannot process multiple.");
}
}
if(record == null) {
throw new IOException("No record element found for root element " + root.getTagName());
throw new IOException("No record element found");
}
Node childNode = record.getFirstChild();
Element childElement;
Element childElement;
while(childNode != null) {
if(childNode.getNodeType() != record.getNodeType())
{
......@@ -53,14 +67,14 @@ public class MarcXMLToJson {
}
childElement = (Element)childNode;
String textContent = childElement.getTextContent();
if(childElement.getTagName().endsWith("leader")) {
if(childElement.getTagName().equals("leader")) {
marcJson.put("leader", textContent);
} else if(childElement.getTagName().endsWith("controlfield")) {
} else if(childElement.getTagName().equals("controlfield")) {
JSONObject field = new JSONObject();
String marcTag = childElement.getAttribute("tag");
field.put(marcTag, textContent);
fields.add(field);
} else if(childElement.getTagName().endsWith("datafield")) {
} else if(childElement.getTagName().equals("datafield")) {
JSONObject field = new JSONObject();
JSONObject fieldContent = new JSONObject();
String marcTag = childElement.getAttribute("tag");
......@@ -72,7 +86,7 @@ public class MarcXMLToJson {
}
JSONArray subfields = new JSONArray();
fieldContent.put("subfields", subfields);
NodeList nodeList = childElement.getElementsByTagNameNS(namespace, "subfield");
NodeList nodeList = childElement.getElementsByTagName("subfield");
for(int i = 0; i < nodeList.getLength(); i++) {
Element subField = (Element) nodeList.item(i);
String code = subField.getAttribute("code");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment