Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
Fedora.php 14.34 KiB
<?php
/**
 * VuDL to Fedora connection class (defines some methods to talk to Fedora)
 *
 * PHP version 5
 *
 * Copyright (C) Villanova University 2010.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2,
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * @category VuFind2
 * @package  Controller
 * @author   Chris Hallberg <challber@villanova.edu>
 * @license  http://opensource.org/licenses/gpl-3.0.php GNU General Public License
 * @link     http://vufind.org/wiki/
 */
namespace VuDL\Connection;
use VuFindHttp\HttpServiceInterface,
    VuFindSearch\ParamBag;

/**
 * VuDL-Fedora connection class
 *
 * @category VuFind2
 * @package  Controller
 * @author   Chris Hallberg <challber@villanova.edu>
 * @license  http://opensource.org/licenses/gpl-3.0.php GNU General Public License
 * @link     http://vufind.org/wiki/
 */
class Fedora extends AbstractBase
{
    /**
     * Datastreams data cache
     *
     * @var array
     */
    protected $datastreams = array();

    /**
     * Get Fedora Base URL.
     *
     * @return string
     */
    public function getBase()
    {
        return isset($this->config->Fedora->url_base)
            ? $this->config->Fedora->url_base
            : null;
    }

    /**
     * Returns an array of classes for this object
     *
     * @param string $id record id
     *
     * @return array
     */
    public function getClasses($id)
    {
        $data = $this->getDatastreamContent($id, 'RELS-EXT');
        $matches = array();
        preg_match_all(
            '/rdf:resource="info:fedora\/vudl-system:([^"]+)/',
            $data,
            $matches
        );
        return $matches[1];
    }

    /**
     * Returns file contents of the structmap, our most common call
     *
     * @param string  $id  Record id
     * @param bool    $xml Return data as SimpleXMLElement?
     *
     * @return string|\SimpleXMLElement
     */
    public function getDatastreams($id, $xml = false)
    {
        if (!isset($this->datastreams[$id])) {
            $this->datastreams[$id] = $this->getDatastreamContent(
                $id,
                '/datastreams?format=xml',
                true
            );
        }
        if ($xml) {
            return simplexml_load_string($this->datastreams[$id]);
        } else {
            return $this->datastreams[$id];
        }
    }

    /**
     * Return the content of a datastream.
     *
     * @param string  $id         Record id
     * @param string  $stream     Name of stream to retrieve
     * @param bool    $justStream Do not append /content and return from url as is
     *
     * @return string
     */
    public function getDatastreamContent($id, $stream, $justStream = false)
    {
        if ($justStream) {
            $url = $this->getBase() . $id . '/datastreams' . $stream;
        } else {
            $url = $this->getBase() . $id . '/datastreams/' . $stream . '/content';
        }
        return file_get_contents($url);
    }

    /**
     * Return the headers of a datastream.
     *
     * @param string $id     Record id
     * @param string $stream Name of stream to retrieve
     *
     * @return string
     */
    public function getDatastreamHeaders($id, $stream)
    {
        return get_headers(
            $this->getBase() . $id . '/datastreams/' . $stream . '/content'
        );
    }

    /**
     * Get details for the sidebar on a record.
     *
     * @param string  $id     ID to retrieve
     * @param bool    $format Send result through formatDetails?
     *
     * @return string
     */
    public function getDetails($id, $format = false)
    {
        $dc = array();
        preg_match_all(
            '/<[^\/]*dc:([^ >]+)>([^<]+)/',
            $this->getDatastreamContent($id, 'DC'),
            $dc
        );
        $details = array();
        foreach ($dc[2] as $i=>$detail) {
            $details[$dc[1][$i]] = $detail;
        }
        if ($format) {
            return $this->formatDetails($details);
        }
        return $details;
    }

    /**
     * Get an HTTP client
     *
     * @param string $url URL for client to access
     *
     * @return \Zend\Http\Client
     */
    public function getHttpClient($url)
    {
        if ($this->httpService) {
            return $this->httpService->createClient($url);
        }
        return new \Zend\Http\Client($url);
    }

    /**
     * Get an item's label
     *
     * @param string $id Record's id
     *
     * @return string
     */
    public function getLabel($id)
    {
        $query = 'select $memberTitle from <#ri> '
            . 'where $member <dc:identifier> \''. $id .'\' '
            . 'and $member <fedora-model:label> $memberTitle';
        $response = $this->query($query);
        $list = explode("\n", $response->getBody());
        return $list[1];
    }

    /**
     * Tuple call to return and parse a list of members...
     *
     * @param string $root ...for this id
     *
     * @return array of members in order
     */
    public function getMemberList($root)
    {
        $query = 'select $memberPID $memberTitle from <#ri> '
            . 'where $member <fedora-rels-ext:isMemberOf> <info:fedora/' .$root. '> '
            . 'and $member <fedora-model:label> $memberTitle '
            . 'and $member <dc:identifier> $memberPID';
        $response = $this->query($query);
        $list = explode("\n", $response->getBody());
        $items = array();
        for ($i=1;$i<count($list);$i++) {
            if (empty($list[$i])) {
                continue;
            }
            list($id,) = explode(',', $list[$i], 2);
            $items[] = $id;
        }
        return $items;
    }

    /**
     * Get the last modified date from Solr
     *
     * @param string $id ID to look up
     *
     * @return array
     * @throws \Exception
     */
    public function getModDate($id)
    {
        $query = 'select $lastModDate from <#ri> '
            . 'where $member '
            . '<info:fedora/fedora-system:def/view#lastModifiedDate> '
            . '$lastModDate '
            . 'and $member <dc:identifier> \''. $id .'\'';
        $response = $this->query($query);
        $list = explode("\n", $response->getBody());
        return $list[1];
    }

    /**
     * Returns file contents of the structmap, our most common call
     *
     * @param string $root record id
     *
     * @return array of ids
     */
    public function getOrderedMembers($root)
    {
        $query = 'select $memberPID $memberTitle $sequence $member from <#ri> '
            . 'where $member <fedora-rels-ext:isMemberOf> <info:fedora/'.$root.'> '
            . 'and $member <http://vudl.org/relationships#sequence> $sequence '
            . 'and $member <fedora-model:label> $memberTitle '
            . 'and $member <dc:identifier> $memberPID';
        $response = $this->query($query);
        $list = explode("\n", $response->getBody());
        if (count($list) > 2) {
            $items = array();
            $sequenced = true;
            for ($i=1;$i<count($list);$i++) {
                if (empty($list[$i])) {
                    continue;
                }
                list($id, $title, $sequence,) = explode(',', $list[$i], 4);
                list($seqID, $seq) = explode('#', $sequence);
                if ($seqID != $root) {
                    $sequenced = false;
                    break;
                }
                $items[] = array(
                    'seq' => $seq,
                    'id' =>$id
                );
            }
            if ($sequenced) {
                usort(
                    $items,
                    function ($a, $b) {
                        return intval($a['seq'])-intval($b['seq']);
                    }
                );
                return array_map(
                    function ($op) {
                        return $op['id'];
                    },
                    $items
                );;
            }
        }
        // No sequence? Title sort.
        $query = 'select $memberPID $memberTitle from <#ri> '
            . 'where $member <fedora-rels-ext:isMemberOf> <info:fedora/' .$root. '> '
            . 'and $member <fedora-model:label> $memberTitle '
            . 'and $member <dc:identifier> $memberPID '
            . 'order by $memberTitle';
        $response = $this->query($query);
        $list = explode("\n", $response->getBody());
        $items = array();
        for ($i=1;$i<count($list);$i++) {
            if (empty($list[$i])) {
                continue;
            }
            list($id, $title, ) = explode(',', $list[$i], 3);
            $items[] = $id;
        }
        return $items;
    }

    /**
     * Tuple call to return and parse a list of parents...
     *
     * @param string $id ...for this id
     *
     * @return array of parents in order from top-down
     */
    public function getParentList($id)
    {
        if (isset($this->parentLists[$id])) {
            return $this->parentLists[$id];
        }
        // Walk to get all parents to root
        $query = 'select $child $parent $parentTitle from <#ri> '
                . 'where walk ('
                        . '<info:fedora/' .$id. '> '
                        . '<fedora-rels-ext:isMemberOf> '
                        . '$parent '
                    . 'and $child <fedora-rels-ext:isMemberOf> $parent) '
                . 'and $parent <fedora-model:label> $parentTitle';
        // Parse out relationships
        $response = $this->query($query);
        $list = explode("\n", trim($response->getBody(), "\n"));
        $tree = array();
        for ($i=1;$i<count($list);$i++) {
            list($child, $parent, $title) = explode(',', substr($list[$i], 12), 3);
            $parent = substr($parent, 12);
            if (!isset($tree[$parent])) {
                $tree[$parent] = array(
                    'children' => array(),
                    'title' => $title
                );
            }
            $tree[$parent]['children'][] = $child;
        }
        $ret = $this->traceParents($tree, $id);
        // Store in cache
        $this->parentLists[$id] = $ret;
        return $ret;
    }

    /**
     * Get Fedora Query URL.
     *
     * @return string
     */
    public function getQueryURL()
    {
        return isset($this->config->Fedora->query_url)
            ? $this->config->Fedora->query_url
            : null;
    }

    /**
     * Get collapsable XML for an id
     *
     * @param object        $record   Record data
     * @param View\Renderer $renderer View renderer to get techinfo template
     *
     * @return html string
     */
    public function getTechInfo($record = null, $renderer = null)
    {
        if ($record == null) {
            return false;
        }
        $ret = array();
        // OCR
        if (isset($record['ocr-dirty'])) {
            $record['ocr-dirty'] = $this->getDatastreamContent(
                $record['id'],
                'OCR-DIRTY'
            );
        }
        // Technical Information
        if (isset($record['master-md'])) {
            $record['techinfo'] = $this->getDatastreamContent(
                $record['id'],
                'MASTER-MD'
            );
            $info = $this->getSizeAndTypeInfo($record['techinfo']);
            $ret['size']     = $info['size'];
            $ret['type'] = $info['type'];
        }
        if ($renderer != null) {
            $ret['div'] = $renderer
                ->render('vudl/techinfo.phtml', array('record'=>$record));
        }
        return $ret;
    }

    /**
     * Get size/type information out of the technical metadata.
     *
     * @param string $techInfo Technical metadata
     *
     * @return array
     */
    protected function getSizeAndTypeInfo($techInfo)
    {
        $data = $type = array();
        preg_match('/<size[^>]*>([^<]*)/', $techInfo, $data);
        preg_match('/mimetype="([^"]*)/', $techInfo, $type);
        $size_index = 0;
        if (count($data) > 1) {
            $bytes = intval($data[1]);
            $sizes = array('bytes','KB','MB');
            while ($size_index < count($sizes)-1 && $bytes > 1024) {
                $bytes /= 1024;
                $size_index++;
            }
            return array(
                'size' => round($bytes, 1) . ' ' . $sizes[$size_index],
                'type' => $type[1]
            );
        }
        return array();
    }

    /**
     * Get copyright URL and compare it to special cases from VuDL.ini
     *
     * @param array $id          record id
     * @param array $setLicenses ids are strings to match urls to,
     *  the values are abbreviations. Parsed in details.phtml later.
     *
     * @return array
     */
    public function getCopyright($id, $setLicenses)
    {
        $check = $this->getDatastreamHeaders($id, 'LICENSE');
        if (!strpos($check[0], '404')) {
            $xml = $this->getDatastreamContent($id, 'LICENSE');
            preg_match('/xlink:href="(.*?)"/', $xml, $license);
            $license = $license[1];
            foreach ($setLicenses as $tell=>$value) {
                if (strpos($license, $tell)) {
                    return array($license, $value);
                }
            }
            return array($license, false);
        }
        return null;
    }

    /**
     * Consolidation of Zend Client calls
     *
     * @param string $query   Query for call
     * @param array  $options Additional options
     *
     * @return Response
     */
    protected function query($query, $options = array())
    {
        $data = array(
            'type'  => 'tuples',
            'flush' => false,
            'lang'  => 'itql',
            'format'=> 'CSV',
            'query' => $query
        );
        foreach ($options as $key=>$value) {
            $data[$key] = $value;
        }
        $client = $this->getHttpClient($this->getQueryURL());
        $client->setMethod('POST');
        $client->setAuth(
            $this->config->Fedora->adminUser, $this->config->Fedora->adminPass
        );
        $client->setParameterPost($data);
        return $client->send();
    }
}