Current File : /home/tradevaly/www/phpmy/libraries/classes/Plugins/Import/ImportCsv.php
<?php
/**
 * CSV import plugin for phpMyAdmin
 *
 * @todo       add an option for handling NULL values
 */

declare(strict_types=1);

namespace PhpMyAdmin\Plugins\Import;

use PhpMyAdmin\File;
use PhpMyAdmin\Html\Generator;
use PhpMyAdmin\Message;
use PhpMyAdmin\Properties\Options\Groups\OptionsPropertyRootGroup;
use PhpMyAdmin\Properties\Options\Items\BoolPropertyItem;
use PhpMyAdmin\Properties\Options\Items\NumberPropertyItem;
use PhpMyAdmin\Properties\Options\Items\TextPropertyItem;
use PhpMyAdmin\Properties\Plugins\ImportPluginProperties;
use PhpMyAdmin\Util;

use function __;
use function array_shift;
use function array_splice;
use function basename;
use function count;
use function mb_strlen;
use function mb_strtolower;
use function mb_substr;
use function preg_grep;
use function preg_replace;
use function preg_split;
use function rtrim;
use function str_contains;
use function strlen;
use function strtr;
use function trim;

/**
 * Handles the import for the CSV format
 */
class ImportCsv extends AbstractImportCsv
{
    /**
     * Whether to analyze tables
     *
     * @var bool
     */
    private $analyze;

    /**
     * @psalm-return non-empty-lowercase-string
     */
    public function getName(): string
    {
        return 'csv';
    }

    protected function setProperties(): ImportPluginProperties
    {
        $this->setAnalyze(false);

        if ($GLOBALS['plugin_param'] !== 'table') {
            $this->setAnalyze(true);
        }

        $importPluginProperties = new ImportPluginProperties();
        $importPluginProperties->setText('CSV');
        $importPluginProperties->setExtension('csv');
        $importPluginProperties->setOptionsText(__('Options'));

        // create the root group that will be the options field for
        // $importPluginProperties
        // this will be shown as "Format specific options"
        $importSpecificOptions = new OptionsPropertyRootGroup('Format Specific Options');

        $generalOptions = $this->getGeneralOptions();

        if ($GLOBALS['plugin_param'] !== 'table') {
            $leaf = new TextPropertyItem(
                'new_tbl_name',
                __(
                    'Name of the new table (optional):'
                )
            );
            $generalOptions->addProperty($leaf);

            if ($GLOBALS['plugin_param'] === 'server') {
                $leaf = new TextPropertyItem(
                    'new_db_name',
                    __(
                        'Name of the new database (optional):'
                    )
                );
                $generalOptions->addProperty($leaf);
            }

            $leaf = new NumberPropertyItem(
                'partial_import',
                __(
                    'Import these many number of rows (optional):'
                )
            );
            $generalOptions->addProperty($leaf);

            $leaf = new BoolPropertyItem(
                'col_names',
                __(
                    'The first line of the file contains the table column names'
                    . ' <i>(if this is unchecked, the first line will become part'
                    . ' of the data)</i>'
                )
            );
            $generalOptions->addProperty($leaf);
        } else {
            $leaf = new NumberPropertyItem(
                'partial_import',
                __(
                    'Import these many number of rows (optional):'
                )
            );
            $generalOptions->addProperty($leaf);

            $hint = new Message(
                __(
                    'If the data in each row of the file is not'
                    . ' in the same order as in the database, list the corresponding'
                    . ' column names here. Column names must be separated by commas'
                    . ' and not enclosed in quotations.'
                )
            );
            $leaf = new TextPropertyItem(
                'columns',
                __('Column names:') . ' ' . Generator::showHint($hint->getMessage())
            );
            $generalOptions->addProperty($leaf);
        }

        $leaf = new BoolPropertyItem(
            'ignore',
            __('Do not abort on INSERT error')
        );
        $generalOptions->addProperty($leaf);

        // add the main group to the root group
        $importSpecificOptions->addProperty($generalOptions);

        // set the options for the import plugin property item
        $importPluginProperties->setOptions($importSpecificOptions);

        return $importPluginProperties;
    }

    /**
     * Handles the whole import logic
     *
     * @param array $sql_data 2-element array with sql data
     */
    public function doImport(?File $importHandle = null, array &$sql_data = []): void
    {
        global $error, $message, $dbi;
        global $db, $table, $csv_terminated, $csv_enclosed, $csv_escaped,
               $csv_new_line, $csv_columns, $errorUrl;
        // $csv_replace and $csv_ignore should have been here,
        // but we use directly from $_POST
        global $timeout_passed, $finished;

        $replacements = [
            '\\n' => "\n",
            '\\t' => "\t",
            '\\r' => "\r",
        ];
        $csv_terminated = strtr($csv_terminated, $replacements);
        $csv_enclosed = strtr($csv_enclosed, $replacements);
        $csv_escaped = strtr($csv_escaped, $replacements);
        $csv_new_line = strtr($csv_new_line, $replacements);

        [$error, $message] = $this->buildErrorsForParams(
            $csv_terminated,
            $csv_enclosed,
            $csv_escaped,
            $csv_new_line,
            (string) $errorUrl
        );

        [$sql_template, $required_fields, $fields] = $this->getSqlTemplateAndRequiredFields($db, $table, $csv_columns);

        // Defaults for parser
        $i = 0;
        $len = 0;
        $lastlen = null;
        $line = 1;
        $lasti = -1;
        $values = [];
        $csv_finish = false;
        $max_lines = 0; // defaults to 0 (get all the lines)

        /**
         * If we get a negative value, probably someone changed min value
         * attribute in DOM or there is an integer overflow, whatever be
         * the case, get all the lines.
         */
        if (isset($_REQUEST['csv_partial_import']) && $_REQUEST['csv_partial_import'] > 0) {
            $max_lines = $_REQUEST['csv_partial_import'];
        }

        $max_lines_constraint = $max_lines + 1;
        // if the first row has to be counted as column names, include one more row in the max lines
        if (isset($_REQUEST['csv_col_names'])) {
            $max_lines_constraint++;
        }

        $tempRow = [];
        $rows = [];
        $col_names = [];
        $tables = [];

        $buffer = '';
        $col_count = 0;
        $max_cols = 0;
        $csv_terminated_len = mb_strlen($csv_terminated);
        while (! ($finished && $i >= $len) && ! $error && ! $timeout_passed) {
            $data = $this->import->getNextChunk($importHandle);
            if ($data === false) {
                // subtract data we didn't handle yet and stop processing
                $GLOBALS['offset'] -= strlen($buffer);
                break;
            }

            if ($data !== true) {
                // Append new data to buffer
                $buffer .= $data;
                unset($data);

                // Force a trailing new line at EOF to prevent parsing problems
                if ($finished && $buffer) {
                    $finalch = mb_substr($buffer, -1);
                    if ($csv_new_line === 'auto' && $finalch != "\r" && $finalch != "\n") {
                        $buffer .= "\n";
                    } elseif ($csv_new_line !== 'auto' && $finalch != $csv_new_line) {
                        $buffer .= $csv_new_line;
                    }
                }

                // Do not parse string when we're not at the end
                // and don't have new line inside
                if (
                    ($csv_new_line === 'auto'
                    && ! str_contains($buffer, "\r")
                    && ! str_contains($buffer, "\n"))
                    || ($csv_new_line !== 'auto'
                    && ! str_contains($buffer, $csv_new_line))
                ) {
                    continue;
                }
            }

            // Current length of our buffer
            $len = mb_strlen($buffer);
            // Currently parsed char

            $ch = mb_substr($buffer, $i, 1);
            if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
                $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len);
                $i += $csv_terminated_len - 1;
            }

            while ($i < $len) {
                // Deadlock protection
                if ($lasti == $i && $lastlen == $len) {
                    $message = Message::error(
                        __('Invalid format of CSV input on line %d.')
                    );
                    $message->addParam($line);
                    $error = true;
                    break;
                }

                $lasti = $i;
                $lastlen = $len;

                // This can happen with auto EOL and \r at the end of buffer
                if (! $csv_finish) {
                    // Grab empty field
                    if ($ch == $csv_terminated) {
                        if ($i == $len - 1) {
                            break;
                        }

                        $values[] = '';
                        $i++;
                        $ch = mb_substr($buffer, $i, 1);
                        if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
                            $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len);
                            $i += $csv_terminated_len - 1;
                        }

                        continue;
                    }

                    // Grab one field
                    $fallbacki = $i;
                    if ($ch == $csv_enclosed) {
                        if ($i == $len - 1) {
                            break;
                        }

                        $need_end = true;
                        $i++;
                        $ch = mb_substr($buffer, $i, 1);
                        if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
                            $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len);
                            $i += $csv_terminated_len - 1;
                        }
                    } else {
                        $need_end = false;
                    }

                    $fail = false;
                    $value = '';
                    while (
                        ($need_end
                            && ($ch != $csv_enclosed
                                || $csv_enclosed == $csv_escaped))
                        || (! $need_end
                            && ! ($ch == $csv_terminated
                                || $ch == $csv_new_line
                                || ($csv_new_line === 'auto'
                                    && ($ch == "\r" || $ch == "\n"))))
                    ) {
                        if ($ch == $csv_escaped) {
                            if ($i == $len - 1) {
                                $fail = true;
                                break;
                            }

                            $i++;
                            $ch = mb_substr($buffer, $i, 1);
                            if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
                                $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len);
                                $i += $csv_terminated_len - 1;
                            }

                            if (
                                $csv_enclosed == $csv_escaped
                                && ($ch == $csv_terminated
                                || $ch == $csv_new_line
                                || ($csv_new_line === 'auto'
                                && ($ch == "\r" || $ch == "\n")))
                            ) {
                                break;
                            }
                        }

                        $value .= $ch;
                        if ($i == $len - 1) {
                            if (! $finished) {
                                $fail = true;
                            }

                            break;
                        }

                        $i++;
                        $ch = mb_substr($buffer, $i, 1);
                        if ($csv_terminated_len <= 1 || $ch != $csv_terminated[0]) {
                            continue;
                        }

                        $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len);
                        $i += $csv_terminated_len - 1;
                    }

                    // unquoted NULL string
                    if ($need_end === false && $value === 'NULL') {
                        $value = null;
                    }

                    if ($fail) {
                        $i = $fallbacki;
                        $ch = mb_substr($buffer, $i, 1);
                        if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
                            $i += $csv_terminated_len - 1;
                        }

                        break;
                    }

                    // Need to strip trailing enclosing char?
                    if ($need_end && $ch == $csv_enclosed) {
                        if ($finished && $i == $len - 1) {
                            $ch = null;
                        } elseif ($i == $len - 1) {
                            $i = $fallbacki;
                            $ch = mb_substr($buffer, $i, 1);
                            if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
                                $i += $csv_terminated_len - 1;
                            }

                            break;
                        } else {
                            $i++;
                            $ch = mb_substr($buffer, $i, 1);
                            if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
                                $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len);
                                $i += $csv_terminated_len - 1;
                            }
                        }
                    }

                    // Are we at the end?
                    if (
                        $ch == $csv_new_line
                        || ($csv_new_line === 'auto' && ($ch == "\r" || $ch == "\n"))
                        || ($finished && $i == $len - 1)
                    ) {
                        $csv_finish = true;
                    }

                    // Go to next char
                    if ($ch == $csv_terminated) {
                        if ($i == $len - 1) {
                            $i = $fallbacki;
                            $ch = mb_substr($buffer, $i, 1);
                            if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
                                $i += $csv_terminated_len - 1;
                            }

                            break;
                        }

                        $i++;
                        $ch = mb_substr($buffer, $i, 1);
                        if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
                            $ch = $this->readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len);
                            $i += $csv_terminated_len - 1;
                        }
                    }

                    // If everything went okay, store value
                    $values[] = $value;
                }

                // End of line
                if (
                    ! $csv_finish
                    && $ch != $csv_new_line
                    && ($csv_new_line !== 'auto' || ($ch != "\r" && $ch != "\n"))
                ) {
                    continue;
                }

                if ($csv_new_line === 'auto' && $ch == "\r") { // Handle "\r\n"
                    if ($i >= ($len - 2) && ! $finished) {
                        break; // We need more data to decide new line
                    }

                    if (mb_substr($buffer, $i + 1, 1) == "\n") {
                        $i++;
                    }
                }

                // We didn't parse value till the end of line, so there was
                // empty one
                if (! $csv_finish) {
                    $values[] = '';
                }

                if ($this->getAnalyze()) {
                    foreach ($values as $val) {
                        $tempRow[] = $val;
                        ++$col_count;
                    }

                    if ($col_count > $max_cols) {
                        $max_cols = $col_count;
                    }

                    $col_count = 0;

                    $rows[] = $tempRow;
                    $tempRow = [];
                } else {
                    // Do we have correct count of values?
                    if (count($values) != $required_fields) {
                        // Hack for excel
                        if ($values[count($values) - 1] !== ';') {
                            $message = Message::error(
                                __(
                                    'Invalid column count in CSV input on line %d.'
                                )
                            );
                            $message->addParam($line);
                            $error = true;
                            break;
                        }

                        unset($values[count($values) - 1]);
                    }

                    $first = true;
                    $sql = $sql_template;
                    foreach ($values as $val) {
                        if (! $first) {
                            $sql .= ', ';
                        }

                        if ($val === null) {
                            $sql .= 'NULL';
                        } else {
                            $sql .= '\''
                                . $dbi->escapeString($val)
                                . '\'';
                        }

                        $first = false;
                    }

                    $sql .= ')';
                    if (isset($_POST['csv_replace'])) {
                        $sql .= ' ON DUPLICATE KEY UPDATE ';
                        foreach ($fields as $field) {
                            $fieldName = Util::backquote($field['Field']);
                            $sql .= $fieldName . ' = VALUES(' . $fieldName
                                . '), ';
                        }

                        $sql = rtrim($sql, ', ');
                    }

                    /**
                     * @todo maybe we could add original line to verbose
                     * SQL in comment
                     */
                    $this->import->runQuery($sql, $sql, $sql_data);
                }

                $line++;
                $csv_finish = false;
                $values = [];
                $buffer = mb_substr($buffer, $i + 1);
                $len = mb_strlen($buffer);
                $i = 0;
                $lasti = -1;
                $ch = mb_substr($buffer, 0, 1);
                if ($max_lines > 0 && $line == $max_lines_constraint) {
                    $finished = 1;
                    break;
                }
            }

            if ($max_lines > 0 && $line == $max_lines_constraint) {
                $finished = 1;
                break;
            }
        }

        if ($this->getAnalyze()) {
            /* Fill out all rows */
            $num_rows = count($rows);
            for ($i = 0; $i < $num_rows; ++$i) {
                for ($j = count($rows[$i]); $j < $max_cols; ++$j) {
                    $rows[$i][] = 'NULL';
                }
            }

            $col_names = $this->getColumnNames($col_names, $max_cols, $rows);

            /* Remove the first row if it contains the column names */
            if (isset($_REQUEST['csv_col_names'])) {
                array_shift($rows);
            }

            $tbl_name = $this->getTableNameFromImport((string) $db);

            $tables[] = [
                $tbl_name,
                $col_names,
                $rows,
            ];

            /* Obtain the best-fit MySQL types for each column */
            $analyses = [];
            $analyses[] = $this->import->analyzeTable($tables[0]);

            /**
             * string $db_name (no backquotes)
             *
             * array $table = array(table_name, array() column_names, array()() rows)
             * array $tables = array of "$table"s
             *
             * array $analysis = array(array() column_types, array() column_sizes)
             * array $analyses = array of "$analysis"s
             *
             * array $create = array of SQL strings
             *
             * array $options = an associative array of options
             */

            /* Set database name to the currently selected one, if applicable,
             * Otherwise, check if user provided the database name in the request,
             * if not, set the default name
             */
            if (isset($_REQUEST['csv_new_db_name']) && strlen($_REQUEST['csv_new_db_name']) > 0) {
                $newDb = $_REQUEST['csv_new_db_name'];
            } else {
                $result = $dbi->fetchResult('SHOW DATABASES');

                $newDb = 'CSV_DB ' . (count($result) + 1);
            }

            [$db_name, $options] = $this->getDbnameAndOptions($db, $newDb);

            /* Non-applicable parameters */
            $create = null;

            /* Created and execute necessary SQL statements from data */
            $this->import->buildSql($db_name, $tables, $analyses, $create, $options, $sql_data);

            unset($tables, $analyses);
        }

        // Commit any possible data in buffers
        $this->import->runQuery('', '', $sql_data);

        if (count($values) == 0 || $error !== false) {
            return;
        }

        $message = Message::error(
            __('Invalid format of CSV input on line %d.')
        );
        $message->addParam($line);
        $error = true;
    }

    private function buildErrorsForParams(
        string $csvTerminated,
        string $csvEnclosed,
        string $csvEscaped,
        string $csvNewLine,
        string $errUrl
    ): array {
        global $error, $message;

        $param_error = false;
        if (strlen($csvTerminated) === 0) {
            $message = Message::error(
                __('Invalid parameter for CSV import: %s')
            );
            $message->addParam(__('Columns terminated with'));
            $error = true;
            $param_error = true;
            // The default dialog of MS Excel when generating a CSV produces a
            // semi-colon-separated file with no chance of specifying the
            // enclosing character. Thus, users who want to import this file
            // tend to remove the enclosing character on the Import dialog.
            // I could not find a test case where having no enclosing characters
            // confuses this script.
            // But the parser won't work correctly with strings so we allow just
            // one character.
        } elseif (mb_strlen($csvEnclosed) > 1) {
            $message = Message::error(
                __('Invalid parameter for CSV import: %s')
            );
            $message->addParam(__('Columns enclosed with'));
            $error = true;
            $param_error = true;
            // I could not find a test case where having no escaping characters
            // confuses this script.
            // But the parser won't work correctly with strings so we allow just
            // one character.
        } elseif (mb_strlen($csvEscaped) > 1) {
            $message = Message::error(
                __('Invalid parameter for CSV import: %s')
            );
            $message->addParam(__('Columns escaped with'));
            $error = true;
            $param_error = true;
        } elseif (mb_strlen($csvNewLine) != 1 && $csvNewLine !== 'auto') {
            $message = Message::error(
                __('Invalid parameter for CSV import: %s')
            );
            $message->addParam(__('Lines terminated with'));
            $error = true;
            $param_error = true;
        }

        // If there is an error in the parameters entered,
        // indicate that immediately.
        if ($param_error) {
            Generator::mysqlDie(
                $message->getMessage(),
                '',
                false,
                $errUrl
            );
        }

        return [$error, $message];
    }

    private function getTableNameFromImport(string $databaseName): string
    {
        global $import_file_name, $dbi;

        $importFileName = basename($import_file_name, '.csv');
        $importFileName = mb_strtolower($importFileName);
        $importFileName = (string) preg_replace('/[^a-zA-Z0-9_]/', '_', $importFileName);

        // get new table name, if user didn't provide one, set the default name
        if (isset($_REQUEST['csv_new_tbl_name']) && strlen($_REQUEST['csv_new_tbl_name']) > 0) {
            return $_REQUEST['csv_new_tbl_name'];
        }

        if (mb_strlen($databaseName)) {
            $result = $dbi->fetchResult('SHOW TABLES');

            // logic to get table name from filename
            // if no table then use filename as table name
            if (count($result) === 0) {
                return $importFileName;
            }

            // check to see if {filename} as table exist
            $nameArray = preg_grep('/' . $importFileName . '/isU', $result);
            // if no use filename as table name
            if ($nameArray === false || count($nameArray) === 0) {
                return $importFileName;
            }

            // check if {filename}_ as table exist
            $nameArray = preg_grep('/' . $importFileName . '_/isU', $result);
            if ($nameArray === false) {
                return $importFileName;
            }

            return $importFileName . '_' . (count($nameArray) + 1);
        }

        return $importFileName;
    }

    private function getColumnNames(array $columnNames, int $maxCols, array $rows): array
    {
        if (isset($_REQUEST['csv_col_names'])) {
            $columnNames = array_splice($rows, 0, 1);
            $columnNames = $columnNames[0];
            // MySQL column names can't end with a space character.
            foreach ($columnNames as $key => $col_name) {
                $columnNames[$key] = rtrim($col_name);
            }
        }

        if ((isset($columnNames) && count($columnNames) != $maxCols) || ! isset($columnNames)) {
            // Fill out column names
            for ($i = 0; $i < $maxCols; ++$i) {
                $columnNames[] = 'COL ' . ($i + 1);
            }
        }

        return $columnNames;
    }

    private function getSqlTemplateAndRequiredFields(
        ?string $db,
        ?string $table,
        ?string $csvColumns
    ): array {
        global $dbi, $error, $message;

        $requiredFields = 0;
        $sqlTemplate = '';
        $fields = [];
        if (! $this->getAnalyze() && $db !== null && $table !== null) {
            $sqlTemplate = 'INSERT';
            if (isset($_POST['csv_ignore'])) {
                $sqlTemplate .= ' IGNORE';
            }

            $sqlTemplate .= ' INTO ' . Util::backquote($table);

            $tmp_fields = $dbi->getColumns($db, $table);

            if (empty($csvColumns)) {
                $fields = $tmp_fields;
            } else {
                $sqlTemplate .= ' (';
                $fields = [];
                $tmp = preg_split('/,( ?)/', $csvColumns);
                if ($tmp === false) {
                    $tmp = [];
                }

                foreach ($tmp as $val) {
                    if (count($fields) > 0) {
                        $sqlTemplate .= ', ';
                    }

                    /* Trim also `, if user already included backquoted fields */
                    $val = trim($val, " \t\r\n\0\x0B`");
                    $found = false;
                    foreach ($tmp_fields as $field) {
                        if ($field['Field'] == $val) {
                            $found = true;
                            break;
                        }
                    }

                    if (! $found) {
                        $message = Message::error(
                            __(
                                'Invalid column (%s) specified! Ensure that columns'
                                . ' names are spelled correctly, separated by commas'
                                . ', and not enclosed in quotes.'
                            )
                        );
                        $message->addParam($val);
                        $error = true;
                        break;
                    }

                    if (isset($field)) {
                        $fields[] = $field;
                    }

                    $sqlTemplate .= Util::backquote($val);
                }

                $sqlTemplate .= ') ';
            }

            $requiredFields = count($fields);

            $sqlTemplate .= ' VALUES (';
        }

        return [$sqlTemplate, $requiredFields, $fields];
    }

    /**
     * Read the expected column_separated_with String of length
     * $csv_terminated_len from the $buffer
     * into variable $ch and return the read string $ch
     *
     * @param string $buffer             The original string buffer read from
     *                                   csv file
     * @param string $ch                 Partially read "column Separated with"
     *                                   string, also used to return after
     *                                   reading length equal $csv_terminated_len
     * @param int    $i                  Current read counter of buffer string
     * @param int    $csv_terminated_len The length of "column separated with"
     *                                   String
     *
     * @return string
     */
    public function readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len)
    {
        for ($j = 0; $j < $csv_terminated_len - 1; $j++) {
            $i++;
            $ch .= mb_substr($buffer, $i, 1);
        }

        return $ch;
    }

    /* ~~~~~~~~~~~~~~~~~~~~ Getters and Setters ~~~~~~~~~~~~~~~~~~~~ */

    /**
     * Returns true if the table should be analyzed, false otherwise
     */
    private function getAnalyze(): bool
    {
        return $this->analyze;
    }

    /**
     * Sets to true if the table should be analyzed, false otherwise
     *
     * @param bool $analyze status
     */
    private function setAnalyze($analyze): void
    {
        $this->analyze = $analyze;
    }
}