From a73182fe1d2cd63f5ba863d17f592915e0008d02 Mon Sep 17 00:00:00 2001
From: Demian Katz <demian.katz@villanova.edu>
Date: Tue, 15 Jan 2013 13:40:06 -0500
Subject: [PATCH] Added database encoding conversion to upgrade script
 (resolving VUFIND-294).

---
 module/VuFind/config/module.config.php        |   3 +-
 .../VuFind/Controller/Plugin/DbUpgrade.php    | 113 +++++++++++++++++-
 .../VuFind/Controller/UpgradeController.php   |  64 ++++++++++
 .../upgrade/getdbencodingpreference.phtml     |  26 ++++
 4 files changed, 204 insertions(+), 2 deletions(-)
 create mode 100644 themes/blueprint/templates/upgrade/getdbencodingpreference.phtml

diff --git a/module/VuFind/config/module.config.php b/module/VuFind/config/module.config.php
index 5e82e314997..e1d4563a06a 100644
--- a/module/VuFind/config/module.config.php
+++ b/module/VuFind/config/module.config.php
@@ -601,7 +601,8 @@ $staticRoutes = array(
     'Tag/Home',
     'Upgrade/Home', 'Upgrade/FixAnonymousTags', 'Upgrade/FixConfig',
     'Upgrade/FixDatabase', 'Upgrade/FixMetadata', 'Upgrade/GetDBCredentials',
-    'Upgrade/GetSourceDir', 'Upgrade/Reset', 'Upgrade/ShowSQL',
+    'Upgrade/GetDbEncodingPreference', 'Upgrade/GetSourceDir', 'Upgrade/Reset',
+    'Upgrade/ShowSQL',
     'VuDL/Browse', 'VuDL/DSRecord', 'VuDL/Record',
     'Worldcat/Advanced', 'Worldcat/Home', 'Worldcat/Search'
 );
diff --git a/module/VuFind/src/VuFind/Controller/Plugin/DbUpgrade.php b/module/VuFind/src/VuFind/Controller/Plugin/DbUpgrade.php
index 3d24e46aa7c..0ca24c584df 100644
--- a/module/VuFind/src/VuFind/Controller/Plugin/DbUpgrade.php
+++ b/module/VuFind/src/VuFind/Controller/Plugin/DbUpgrade.php
@@ -111,10 +111,11 @@ class DbUpgrade extends AbstractPlugin
     public function query($sql, $logsql)
     {
         if ($logsql) {
-            return $sql . ";\n";
+            return rtrim($sql, ';') . ";\n";
         } else {
             $this->getAdapter()->query($sql, DbAdapter::QUERY_MODE_EXECUTE);
         }
+        return '';
     }
 
     /**
@@ -148,6 +149,116 @@ class DbUpgrade extends AbstractPlugin
         return array_keys($this->getTableInfo());
     }
 
+    /**
+     * Support method for getEncodingProblems() -- get column details
+     *
+     * @param string $table Table to check
+     *
+     * @throws \Exception
+     * @return array
+     */
+    protected function getEncodingProblemsForTable($table)
+    {
+        // Get column summary:
+        $sql = "SHOW FULL COLUMNS FROM `{$table}`";
+        $results = $this->getAdapter()->query($sql, DbAdapter::QUERY_MODE_EXECUTE);
+
+        // Load details:
+        $retVal = array();
+        foreach ($results as $current) {
+            if (strtolower(substr($current->Collation, 0, 6)) == 'latin1') {
+                $retVal[$current->Field] = (array)$current;
+            }
+        }
+        return $retVal;
+    }
+
+    /**
+     * Get information on incorrectly encoded tables/columns.
+     *
+     * @throws \Exception
+     * @return array
+     */
+    public function getEncodingProblems()
+    {
+        // Get table summary:
+        $sql = "SHOW TABLE STATUS";
+        $results = $this->getAdapter()->query($sql, DbAdapter::QUERY_MODE_EXECUTE);
+
+        // Load details:
+        $retVal = array();
+        foreach ($results as $current) {
+            if (strtolower(substr($current->Collation, 0, 6)) == 'latin1') {
+                $retVal[$current->Name]
+                    = $this->getEncodingProblemsForTable($current->Name);
+            }
+        }
+
+        return $retVal;
+    }
+
+    /**
+     * Fix encoding problems based on the output of getEncodingProblems().
+     *
+     * @param array $tables Output of getEncodingProblems()
+     * @param bool  $logsql Should we return the SQL as a string rather than
+     * execute it?
+     *
+     * @throws \Exception
+     * @return string       SQL if $logsql is true, empty string otherwise
+     */
+    public function fixEncodingProblems($tables, $logsql = false)
+    {
+        $newCollation = "utf8_general_ci";
+        $sqlcommands = '';
+
+        // Database conversion routines inspired by:
+        //     https://github.com/nicjansma/mysql-convert-latin1-to-utf8
+        foreach ($tables as $table => $columns) {
+            foreach ($columns as $column => $details) {
+                $oldType = $details['Type'];
+                $parts = explode('(', $oldType);
+                switch ($parts[0]) {
+                case 'text':
+                    $newType = 'blob';
+                    break;
+                case 'varchar':
+                    $newType = 'varbinary(' . $parts[1];
+                    break;
+                default:
+                    throw new \Exception('Unexpected column type: ' . $parts[0]);
+                }
+                // Set up default:
+                if (null !== $details['Default']) {
+                    $safeDefault = mysql_real_escape_string($details['Default']);
+                    $currentDefault = " DEFAULT '{$safeDefault}'";
+                } else {
+                    $currentDefault = '';
+                }
+
+                // Change to binary equivalent:
+                $sql = "ALTER TABLE `$table` MODIFY `$column` $newType"
+                    . (strtoupper($details['Null']) == 'NO' ? ' NOT NULL' : '')
+                    . $currentDefault
+                    . ";";
+                $sqlcommands .= $this->query($sql, $logsql);
+
+                // Change back to appropriate character data with fixed encoding:
+                $sql = "ALTER TABLE `$table` MODIFY `$column` $oldType"
+                    . " COLLATE $newCollation"
+                    . (strtoupper($details['Null']) == 'NO' ? ' NOT NULL' : '')
+                    . $currentDefault
+                    . ";";
+                $sqlcommands .= $this->query($sql, $logsql);
+            }
+
+            // Adjust default table collation:
+            $sql = "ALTER TABLE `$table` DEFAULT COLLATE $newCollation;";
+            $sqlcommands .= $this->query($sql, $logsql);
+        }
+        return $sqlcommands;
+    }
+
     /**
      * Get information on all columns in a table, keyed by column name.
      *
diff --git a/module/VuFind/src/VuFind/Controller/UpgradeController.php b/module/VuFind/src/VuFind/Controller/UpgradeController.php
index f82b12a05c5..053dd5cad6b 100644
--- a/module/VuFind/src/VuFind/Controller/UpgradeController.php
+++ b/module/VuFind/src/VuFind/Controller/UpgradeController.php
@@ -189,6 +189,24 @@ class UpgradeController extends AbstractBase
             && isset($this->session->dbRootPass);
     }
 
+    /**
+     * Configure the database encoding.
+     *
+     * @param string $charset Encoding setting to use.
+     *
+     * @throws \Exception
+     * @return void
+     */
+    protected function setDbEncodingConfiguration($charset)
+    {
+        $config = ConfigReader::getLocalConfigPath('config.ini', null, true);
+        $writer = new \VuFind\Config\Writer($config);
+        $writer->set('Database', 'charset', $charset);
+        if (!$writer->save()) {
+            throw new \Exception('Problem writing DB encoding to config.ini');
+        }
+    }
+
     /**
      * Upgrade the database.
      *
@@ -261,6 +279,34 @@ class UpgradeController extends AbstractBase
                     ->updateModifiedColumns($modifiedCols, $this->logsql);
             }
 
+            // Check for encoding problems.
+            $encProblems = $this->dbUpgrade()->getEncodingProblems();
+            if (!empty($encProblems)) {
+                if (!isset($this->session->dbChangeEncoding)) {
+                    return $this->forwardTo('Upgrade', 'GetDbEncodingPreference');
+                }
+
+                if ($this->session->dbChangeEncoding) {
+                    // Only manipulate DB if we're not in logging mode:
+                    if (!$this->logsql) {
+                        if (!$this->hasDatabaseRootCredentials()) {
+                            return $this->forwardTo('Upgrade', 'GetDbCredentials');
+                        }
+                        $this->dbUpgrade()->setAdapter($this->getRootDbAdapter());
+                        $this->session->warnings->append(
+                            "Modified encoding settings in table(s): "
+                            . implode(', ', array_keys($encProblems))
+                        );
+                    }
+                    $sql .= $this->dbUpgrade()
+                        ->fixEncodingProblems($encProblems, $this->logsql);
+                    $this->setDbEncodingConfiguration('utf8');
+                } else {
+                    // User has requested that we skip encoding conversion:
+                    $this->setDbEncodingConfiguration('latin1');
+                }
+            }
+
             // Don't keep DB credentials in session longer than necessary:
             unset($this->session->dbRootUser);
             unset($this->session->dbRootPass);
@@ -338,6 +384,24 @@ class UpgradeController extends AbstractBase
         return $this->createViewModel(array('dbrootuser' => $dbrootuser));
     }
 
+    /**
+     * Prompt the user for action on encoding problems.
+     *
+     * @return mixed
+     */
+    public function getdbencodingpreferenceAction()
+    {
+        $action = $this->params()->fromPost('encodingaction', '');
+        if ($action == 'Change') {
+            $this->session->dbChangeEncoding = true;
+            return $this->forwardTo('Upgrade', 'FixDatabase');
+        } else if ($action == 'Keep') {
+            $this->session->dbChangeEncoding = false;
+            return $this->forwardTo('Upgrade', 'FixDatabase');
+        }
+        return $this->createViewModel();
+    }
+
     /**
      * Prompt the user about fixing anonymous tags.
      *
diff --git a/themes/blueprint/templates/upgrade/getdbencodingpreference.phtml b/themes/blueprint/templates/upgrade/getdbencodingpreference.phtml
new file mode 100644
index 00000000000..0b329f2a164
--- /dev/null
+++ b/themes/blueprint/templates/upgrade/getdbencodingpreference.phtml
@@ -0,0 +1,26 @@
+<?
+    // Set page title.
+    $this->headTitle($this->translate('Upgrade VuFind'));
+
+    // Set up breadcrumbs:
+    $this->layout()->breadcrumbs = '<em>' . $this->transEsc('Upgrade VuFind') . '</em>';
+?>
+<h1><?=$this->transEsc('Upgrade VuFind')?></h1>
+<?=$this->flashmessages()?>
+
+<p>Your current VuFind database is encoded in Latin-1 format.  This may cause incorrect sorting and
+display of records containing characters outside of the basic ASCII character set.</p>
+
+<p>It is <b>STRONGLY RECOMMENDED</b> that you convert your database to UTF-8.  However, this will
+prevent older versions of VuFind from reading the database correctly.</p>
+
+<p>If you need to maintain backward compatibility with 1.x, choose "Keep."  You can return to this
+upgrade tool later to perform UTF-8 conversion.</p>
+
+<p>If backward compatibility is not necessary, choose "Change" now.
+(You should make a backup first if you have not already!)</p>
+
+<form method="post" action="<?=$this->url('upgrade-getdbencodingpreference')?>">
+  <input type="submit" name="encodingaction" value="Change" /> encoding to UTF-8<br />
+  <input type="submit" name="encodingaction" value="Keep" /> Latin-1 encoding
+</form>
\ No newline at end of file
-- 
GitLab