Skip to content
Snippets Groups Projects
Commit e51cc9c7 authored by Demian Katz's avatar Demian Katz
Browse files

Minor improvements/cleanup for web indexer.

parent 031b8570
No related merge requests found
...@@ -59,7 +59,7 @@ class VuFindSitemap extends VuFind ...@@ -59,7 +59,7 @@ class VuFindSitemap extends VuFind
} }
// Extract and decode the full text from the XML: // Extract and decode the full text from the XML:
$xml = file_get_contents($xmlFile); $xml = str_replace(chr(0), ' ', file_get_contents($xmlFile));
@unlink($xmlFile); @unlink($xmlFile);
preg_match('/<plainTextContent[^>]*>([^<]*)</ms', $xml, $matches); preg_match('/<plainTextContent[^>]*>([^<]*)</ms', $xml, $matches);
$final = isset($matches[1]) ? $final = isset($matches[1]) ?
...@@ -138,6 +138,9 @@ class VuFindSitemap extends VuFind ...@@ -138,6 +138,9 @@ class VuFindSitemap extends VuFind
/** /**
* Extract key metadata from HTML. * Extract key metadata from HTML.
* *
* NOTE: This method uses some non-standard meta tags; it is intended as an
* example that can be overridden/extended to support local practices.
*
* @param string $html HTML content. * @param string $html HTML content.
* *
* @return array * @return array
...@@ -164,7 +167,6 @@ class VuFindSitemap extends VuFind ...@@ -164,7 +167,6 @@ class VuFindSitemap extends VuFind
// Extract the use count from the HTML: // Extract the use count from the HTML:
preg_match_all('/<meta name="useCount" content="([^"]*)"/ms', $html, $matches); preg_match_all('/<meta name="useCount" content="([^"]*)"/ms', $html, $matches);
$linkTypes = array();
$useCount = isset($matches[1][0]) ? $matches[1][0] : 1; $useCount = isset($matches[1][0]) ? $matches[1][0] : 1;
return array( return array(
...@@ -208,6 +210,19 @@ class VuFindSitemap extends VuFind ...@@ -208,6 +210,19 @@ class VuFindSitemap extends VuFind
* @access public * @access public
*/ */
public static function getDocument($url) public static function getDocument($url)
{
// Turn the array into XML:
return static::arrayToSolrXml(static::getDocumentFieldArray($url));
}
/**
* Support method for getDocument() -- retrieve associative array of field data.
*
* @param string $url URL of file to retrieve.
*
* @return array
*/
protected static function getDocumentFieldArray($url)
{ {
$parser = static::getParser(); $parser = static::getParser();
if ($parser == 'None') { if ($parser == 'None') {
...@@ -251,7 +266,6 @@ class VuFindSitemap extends VuFind ...@@ -251,7 +266,6 @@ class VuFindSitemap extends VuFind
$fields['url'] = $url; $fields['url'] = $url;
$fields['last_indexed'] = date('Y-m-d\TH:i:s\Z'); $fields['last_indexed'] = date('Y-m-d\TH:i:s\Z');
// Turn the array into XML: return $fields;
return static::arrayToSolrXml($fields);
} }
} }
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment