From 2fe79719f8b83ebc4701a1c0fb4ca232de9b2aea Mon Sep 17 00:00:00 2001
From: Demian Katz <demian.katz@villanova.edu>
Date: Wed, 23 Jan 2013 09:42:58 -0500
Subject: [PATCH] Resolving VUFIND-739 (Wikipedia circular redirect causes
 infinite loop).

---
 .../src/VuFind/Recommend/AuthorInfo.php       | 62 +++++++++++++++----
 1 file changed, 51 insertions(+), 11 deletions(-)

diff --git a/module/VuFind/src/VuFind/Recommend/AuthorInfo.php b/module/VuFind/src/VuFind/Recommend/AuthorInfo.php
index de0e938429c..e0ee8f14543 100644
--- a/module/VuFind/src/VuFind/Recommend/AuthorInfo.php
+++ b/module/VuFind/src/VuFind/Recommend/AuthorInfo.php
@@ -86,6 +86,13 @@ class AuthorInfo implements RecommendInterface, TranslatorAwareInterface
      */
     protected $useViaf = false;
 
+    /**
+     * Log of Wikipedia pages already retrieved
+     *
+     * @var array
+     */
+    protected $pagesRetrieved = array();
+
     /**
      * Constructor
      *
@@ -179,6 +186,23 @@ class AuthorInfo implements RecommendInterface, TranslatorAwareInterface
         $this->searchObject = $results;
     }
 
+    /**
+     * Check if a page has already been retrieved; if it hasn't, flag it as
+     * retrieved for future reference.
+     *
+     * @param string $author Author being retrieved
+     *
+     * @return bool
+     */
+    protected function alreadyRetrieved($author)
+    {
+        if (isset($this->pagesRetrieved[$author])) {
+            return true;
+        }
+        $this->pagesRetrieved[$author] = true;
+        return false;
+    }
+
     /**
      * Returns info from Wikipedia to the view
      *
@@ -218,6 +242,12 @@ class AuthorInfo implements RecommendInterface, TranslatorAwareInterface
      */
     protected function getWikipedia($author)
     {
+        // Don't retrieve the same page multiple times; this indicates a loop
+        // that needs to be broken!
+        if ($this->alreadyRetrieved($author)) {
+            return array();
+        }
+
         // Get information from Wikipedia API
         $uri = 'http://' . $this->lang . '.wikipedia.org/w/api.php' .
                '?action=query&prop=revisions&rvprop=content&format=php' .
@@ -248,18 +278,28 @@ class AuthorInfo implements RecommendInterface, TranslatorAwareInterface
             return null;
         }
 
-        // Get the default page
-        $body = array_shift($body['query']['pages']);
-        $info = array('name' => $body['title'], 'wiki_lang' => $this->lang);
-
-        // Get the latest revision
-        $body = array_shift($body['revisions']);
-        // Check for redirection
-        $as_lines = explode("\n", $body['*']);
-        if (stristr($as_lines[0], '#REDIRECT')) {
-            preg_match('/\[\[(.*)\]\]/', $as_lines[0], $matches);
-            return $this->getWikipedia($matches[1]);
+        // Loop through the pages and find the first that isn't a redirect:
+        foreach ($body['query']['pages'] as $page) {
+            $info['name'] = $page['title'];
+
+            // Get the latest revision
+            $page = array_shift($page['revisions']);
+            // Check for redirection
+            $as_lines = explode("\n", $page['*']);
+            if (stristr($as_lines[0], '#REDIRECT')) {
+                preg_match('/\[\[(.*)\]\]/', $as_lines[0], $matches);
+                $redirectTo = $matches[1];
+            } else {
+                $redirectTo = false;
+                break;
+            }
+        }
+
+        // Recurse if we only found redirects:
+        if ($redirectTo) {
+            return $this->getWikipedia($redirectTo);
         }
+        $body = $page;
 
         /* Infobox */
 
-- 
GitLab