The Gitlab instance will be restarted on Monday April 28th at 2AM. There will be a short interruption of service.

Skip to content
Snippets Groups Projects
Commit 7e6cb9e6 authored by Demian Katz's avatar Demian Katz Committed by GitHub
Browse files

Consume Tika stderr output to avoid crash. (#1366)

- Resolves VUFIND-1330.
parent 905f83c3
No related merge requests found
...@@ -206,7 +206,7 @@ public class FullTextTools ...@@ -206,7 +206,7 @@ public class FullTextTools
//System.out.println("Loading fulltext from " + url + ". Please wait ..."); //System.out.println("Loading fulltext from " + url + ". Please wait ...");
try { try {
Process p = Runtime.getRuntime().exec(cmd); Process p = Runtime.getRuntime().exec(cmd);
// Debugging output // Debugging output
/* /*
BufferedReader stdInput = new BufferedReader(new BufferedReader stdInput = new BufferedReader(new
...@@ -216,7 +216,7 @@ public class FullTextTools ...@@ -216,7 +216,7 @@ public class FullTextTools
System.out.println(s); System.out.println(s);
} }
*/ */
// Wait for Aperture to finish // Wait for Aperture to finish
p.waitFor(); p.waitFor();
} catch (Throwable e) { } catch (Throwable e) {
...@@ -248,6 +248,28 @@ public class FullTextTools ...@@ -248,6 +248,28 @@ public class FullTextTools
return plainText; return plainText;
} }
class ErrorStreamHandler extends Thread {
InputStream stdErr;
ErrorStreamHandler(InputStream stdErr) {
this.stdErr = stdErr;
}
public void run()
{
try {
InputStreamReader isr = new InputStreamReader(stdErr, "UTF8");
BufferedReader br = new BufferedReader(isr);
String line = null;
while ((line = br.readLine()) != null) {
logger.debug(line);
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
/** /**
* Harvest the contents of a document file (PDF, Word, etc.) using Tika. * Harvest the contents of a document file (PDF, Word, etc.) using Tika.
* This method will only work if Tika is properly configured in the fulltext.ini * This method will only work if Tika is properly configured in the fulltext.ini
...@@ -258,16 +280,17 @@ public class FullTextTools ...@@ -258,16 +280,17 @@ public class FullTextTools
* @return the full-text * @return the full-text
*/ */
public String harvestWithTika(String url, String scraperPath) { public String harvestWithTika(String url, String scraperPath) {
// Construct the command
String cmd = "java -jar " + scraperPath + " -t -eUTF8 " + url;
StringBuilder stringBuilder= new StringBuilder(); StringBuilder stringBuilder= new StringBuilder();
// Call our scraper // Call our scraper
//System.out.println("Loading fulltext from " + url + ". Please wait ..."); //System.out.println("Loading fulltext from " + url + ". Please wait ...");
try { try {
Process p = Runtime.getRuntime().exec(cmd); ProcessBuilder pb = new ProcessBuilder(
"java", "-jar", scraperPath, "-t", "-eutf8", url
);
Process p = pb.start();
ErrorStreamHandler esh = new ErrorStreamHandler(p.getErrorStream());
esh.start();
BufferedReader stdInput = new BufferedReader(new BufferedReader stdInput = new BufferedReader(new
InputStreamReader(p.getInputStream(), "UTF8")); InputStreamReader(p.getInputStream(), "UTF8"));
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment