From ae5bba92397939658a8b722af586a431c7b2d753 Mon Sep 17 00:00:00 2001 From: Paul Hachmang Date: Mon, 23 Sep 2013 11:22:56 +0200 Subject: [PATCH 01/38] [+FEAT] Implement Seekable Iterator --- SpreadsheetReader.php | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/SpreadsheetReader.php b/SpreadsheetReader.php index 31a8af4..6b6dbee 100644 --- a/SpreadsheetReader.php +++ b/SpreadsheetReader.php @@ -5,7 +5,7 @@ * @version 0.5.6 * @author Martins Pilsetnieks */ - class SpreadsheetReader implements Iterator, Countable + class SpreadsheetReader implements SeekableIterator, Countable { const TYPE_XLSX = 'XLSX'; const TYPE_XLS = 'XLS'; @@ -294,5 +294,38 @@ public function count() } return 0; } + + + /** + * @param int $position + * + * @return null + * @throws OutOfBoundsException + */ + public function seek($position) { + if (! $this->Handle) { + return null; + }; + + if ($position != $this->Handle->key()) { + if (0 == $position) { + $this->rewind(); + return; + } elseif ($position > 0) { + if ($this->Handle->key() === null || $position < $this->Handle->key()) { + $this->rewind(); + } + + while ($nodeStr = $this->Handle->next()) { + if ($this->Handle->key() == $position) { + return; + } + } + } + throw new OutOfBoundsException(Mage::helper('importexport')->__('Invalid seek position')); + } + + return null; + } } ?> From 8ec9ce24a0437004a90c9390b00a5e5c414f170c Mon Sep 17 00:00:00 2001 From: karlis-i Date: Fri, 18 Mar 2016 12:50:25 +0200 Subject: [PATCH 02/38] Fixed LibreOffice date import Dates from xls files created with LibreOffice were turned into invalid values, so here's a fix for that From 3a844ec243de7f1ce476a1bed12ffb1a45237937 Mon Sep 17 00:00:00 2001 From: karlis-i Date: Fri, 18 Mar 2016 12:53:02 +0200 Subject: [PATCH 03/38] Fixed LibreOffice date import Dates from xls files created with LibreOffice were turned into invalid values, so here's a fix for that From 237ad9edd072173da15e89e17266d8e67191c74d Mon Sep 17 00:00:00 2001 From: karlis-i Date: Fri, 18 Mar 2016 13:05:42 +0200 Subject: [PATCH 04/38] Fixed LibreOffice date import Dates from xls files created with LibreOffice were turned into invalid values, so here's a fix for that --- php-excel-reader/excel_reader2.php | 138 +++++++++++++++-------------- 1 file changed, 73 insertions(+), 65 deletions(-) diff --git a/php-excel-reader/excel_reader2.php b/php-excel-reader/excel_reader2.php index 75351b7..1995090 100644 --- a/php-excel-reader/excel_reader2.php +++ b/php-excel-reader/excel_reader2.php @@ -77,7 +77,7 @@ function GetInt4d($data, $pos) { function gmgetdate($ts = null){ $k = array('seconds','minutes','hours','mday','wday','mon','year','yday','weekday','month',0); return(array_comb($k,explode(":",gmdate('s:i:G:j:w:n:Y:z:l:F:U',is_null($ts)?time():$ts)))); - } + } // Added for PHP4 compatibility function array_comb($array1, $array2) { @@ -321,7 +321,7 @@ function myHex($d) { if ($d < 16) return "0" . dechex($d); return dechex($d); } - + function dumpHexData($data, $pos, $length) { $info = ""; for ($i = 0; $i <= $length; $i++) { @@ -394,7 +394,7 @@ function colcount($sheet=0) { } function colwidth($col,$sheet=0) { // Col width is actually the width of the number 0. So we have to estimate and come close - return $this->colInfo[$sheet][$col]['width']/9142*200; + return $this->colInfo[$sheet][$col]['width']/9142*200; } function colhidden($col,$sheet=0) { return !!$this->colInfo[$sheet][$col]['hidden']; @@ -405,7 +405,7 @@ function rowheight($row,$sheet=0) { function rowhidden($row,$sheet=0) { return !!$this->rowInfo[$sheet][$row]['hidden']; } - + // GET THE CSS FOR FORMATTING // ========================== function style($row,$col,$sheet=0,$properties='') { @@ -467,10 +467,10 @@ function style($row,$col,$sheet=0,$properties='') { if ($bRight!="" && $bRightCol!="") { $css .= "border-right-color:" . $bRightCol .";"; } if ($bTop!="" && $bTopCol!="") { $css .= "border-top-color:" . $bTopCol . ";"; } if ($bBottom!="" && $bBottomCol!="") { $css .= "border-bottom-color:" . $bBottomCol .";"; } - + return $css; } - + // FORMAT PROPERTIES // ================= function format($row,$col,$sheet=0) { @@ -482,7 +482,7 @@ function formatIndex($row,$col,$sheet=0) { function formatColor($row,$col,$sheet=0) { return $this->info($row,$col,'formatColor',$sheet); } - + // CELL (XF) PROPERTIES // ==================== function xfRecord($row,$col,$sheet=0) { @@ -581,7 +581,7 @@ function height($row,$col,$sheet=0) { function font($row,$col,$sheet=0) { return $this->fontProperty($row,$col,$sheet,'font'); } - + // DUMP AN HTML TABLE OF THE ENTIRE XLS DATA // ========================================= function dump($row_numbers=false,$col_letters=false,$sheet=0,$table_class='excel') { @@ -600,7 +600,7 @@ function dump($row_numbers=false,$col_letters=false,$sheet=0,$table_class='excel } $out .= "\n"; } - + $out .= "\n"; for($row=1;$row<=$this->rowcount($sheet);$row++) { $rowheight = $this->rowheight($row,$sheet); @@ -631,8 +631,8 @@ function dump($row_numbers=false,$col_letters=false,$sheet=0,$table_class='excel $out .= "\n\t\t 1?" colspan=$colspan":"") . ($rowspan > 1?" rowspan=$rowspan":"") . ">"; $val = $this->val($row,$col,$sheet); if ($val=='') { $val=" "; } - else { - $val = htmlentities($val); + else { + $val = htmlentities($val); $link = $this->hyperlink($row,$col,$sheet); if ($link!='') { $val = "$val"; @@ -647,7 +647,7 @@ function dump($row_numbers=false,$col_letters=false,$sheet=0,$table_class='excel $out .= ""; return $out; } - + // -------------- // END PUBLIC API @@ -658,7 +658,7 @@ function dump($row_numbers=false,$col_letters=false,$sheet=0,$table_class='excel var $xfRecords = array(); var $colInfo = array(); var $rowInfo = array(); - + var $sst = array(); var $sheets = array(); @@ -807,36 +807,36 @@ function dump($row_numbers=false,$col_letters=false,$sheet=0,$table_class='excel 0x0B => "Thin dash-dot-dotted", 0x0C => "Medium dash-dot-dotted", 0x0D => "Slanted medium dash-dotted" - ); + ); var $lineStylesCss = array( - "Thin" => "1px solid", - "Medium" => "2px solid", - "Dashed" => "1px dashed", - "Dotted" => "1px dotted", - "Thick" => "3px solid", - "Double" => "double", - "Hair" => "1px solid", - "Medium dashed" => "2px dashed", - "Thin dash-dotted" => "1px dashed", - "Medium dash-dotted" => "2px dashed", - "Thin dash-dot-dotted" => "1px dashed", - "Medium dash-dot-dotted" => "2px dashed", - "Slanted medium dash-dotte" => "2px dashed" + "Thin" => "1px solid", + "Medium" => "2px solid", + "Dashed" => "1px dashed", + "Dotted" => "1px dotted", + "Thick" => "3px solid", + "Double" => "double", + "Hair" => "1px solid", + "Medium dashed" => "2px dashed", + "Thin dash-dotted" => "1px dashed", + "Medium dash-dotted" => "2px dashed", + "Thin dash-dot-dotted" => "1px dashed", + "Medium dash-dot-dotted" => "2px dashed", + "Slanted medium dash-dotte" => "2px dashed" ); - + function read16bitstring($data, $start) { $len = 0; while (ord($data[$start + $len]) + ord($data[$start + $len + 1]) > 0) $len++; return substr($data, $start, $len); } - + // ADDED by Matt Kruse for better formatting function _format_value($format,$num,$f) { // 49==TEXT format // http://code.google.com/p/php-excel-reader/issues/detail?id=7 - if ( (!$f && $format=="%s") || ($f==49) || ($format=="GENERAL") ) { - return array('string'=>$num, 'formatColor'=>null); + if ( (!$f && $format=="%s") || ($f==49) || ($format=="GENERAL") ) { + return array('string'=>$num, 'formatColor'=>null); } // Custom pattern can be POSITIVE;NEGATIVE;ZERO @@ -860,13 +860,13 @@ function _format_value($format,$num,$f) { $color = strtolower($matches[1]); $pattern = preg_replace($color_regex,"",$pattern); } - + // In Excel formats, "_" is used to add spacing, which we can't do in HTML $pattern = preg_replace("/_./","",$pattern); - + // Some non-number characters are escaped with \, which we don't need $pattern = preg_replace("/\\\/","",$pattern); - + // Some non-number strings are quoted, so we'll get rid of the quotes $pattern = preg_replace("/\"/","",$pattern); @@ -901,6 +901,11 @@ function _format_value($format,$num,$f) { $pattern = preg_replace($number_regex, $formatted, $pattern); } + // prevent changing of big integers to '@' + if ($pattern === '@') { + $pattern = strval($num); + } + return array( 'string'=>$pattern, 'formatColor'=>$color @@ -915,7 +920,7 @@ function _format_value($format,$num,$f) { function Spreadsheet_Excel_Reader($file='',$store_extended_info=true,$outputEncoding='') { $this->_ole = new OLERead(); $this->setUTFEncoder('iconv'); - if ($outputEncoding != '') { + if ($outputEncoding != '') { $this->setOutputEncoding($outputEncoding); } for ($i=1; $i<245; $i++) { @@ -1163,7 +1168,7 @@ function _parse() { $font = substr($data, $pos+20, $numchars); } else { $font = substr($data, $pos+20, $numchars*2); - $font = $this->_encodeUTF16($font); + $font = $this->_encodeUTF16($font); } $this->fontRecords[] = array( 'height' => $height / 20, @@ -1216,14 +1221,14 @@ function _parse() { $xf['borderRight'] = $this->lineStyles[($border & 0xF0) >> 4]; $xf['borderTop'] = $this->lineStyles[($border & 0xF00) >> 8]; $xf['borderBottom'] = $this->lineStyles[($border & 0xF000) >> 12]; - + $xf['borderLeftColor'] = ($border & 0x7F0000) >> 16; $xf['borderRightColor'] = ($border & 0x3F800000) >> 23; $border = (ord($data[$pos+18]) | ord($data[$pos+19]) << 8); $xf['borderTopColor'] = ($border & 0x7F); $xf['borderBottomColor'] = ($border & 0x3F80) >> 7; - + if (array_key_exists($indexCode, $this->dateFormats)) { $xf['type'] = 'date'; $xf['format'] = $this->dateFormats[$indexCode]; @@ -1244,21 +1249,28 @@ function _parse() { if (preg_match("/[^hmsday\/\-:\s\\\,AMP]/i", $tmp) == 0) { // found day and time format $isdate = TRUE; $formatstr = $tmp; - $formatstr = str_replace(array('AM/PM','mmmm','mmm'), array('a','F','M'), $formatstr); - // m/mm are used for both minutes and months - oh SNAP! - // This mess tries to fix for that. - // 'm' == minutes only if following h/hh or preceding s/ss - $formatstr = preg_replace("/(h:?)mm?/","$1i", $formatstr); - $formatstr = preg_replace("/mm?(:?s)/","i$1", $formatstr); - // A single 'm' = n in PHP - $formatstr = preg_replace("/(^|[^m])m([^m]|$)/", '$1n$2', $formatstr); - $formatstr = preg_replace("/(^|[^m])m([^m]|$)/", '$1n$2', $formatstr); - // else it's months - $formatstr = str_replace('mm', 'm', $formatstr); - // Convert single 'd' to 'j' - $formatstr = preg_replace("/(^|[^d])d([^d]|$)/", '$1j$2', $formatstr); - $formatstr = str_replace(array('dddd','ddd','dd','yyyy','yy','hh','h'), array('l','D','d','Y','y','H','g'), $formatstr); - $formatstr = preg_replace("/ss?/", 's', $formatstr); + if ($formatstr === 'YYYY/MM/DD') { + // LibreOffice turns this pattern into invalid dates: + // 2015201520152015/OctOct/WedWed + // here we fix it + $formatstr = 'Y/m/d'; + } else { + $formatstr = str_replace(array('AM/PM','mmmm','mmm'), array('a','F','M'), $formatstr); + // m/mm are used for both minutes and months - oh SNAP! + // This mess tries to fix for that. + // 'm' == minutes only if following h/hh or preceding s/ss + $formatstr = preg_replace("/(h:?)mm?/","$1i", $formatstr); + $formatstr = preg_replace("/mm?(:?s)/","i$1", $formatstr); + // A single 'm' = n in PHP + $formatstr = preg_replace("/(^|[^m])m([^m]|$)/", '$1n$2', $formatstr); + $formatstr = preg_replace("/(^|[^m])m([^m]|$)/", '$1n$2', $formatstr); + // else it's months + $formatstr = str_replace('mm', 'm', $formatstr); + // Convert single 'd' to 'j' + $formatstr = preg_replace("/(^|[^d])d([^d]|$)/", '$1j$2', $formatstr); + $formatstr = str_replace(array('dddd','ddd','dd','yyyy','yy','hh','h'), array('l','D','d','Y','y','H','g'), $formatstr); + $formatstr = preg_replace("/ss?/", 's', $formatstr); + } } } } @@ -1553,24 +1565,24 @@ function _parsesheet($spos) { } $linkdata['desc'] = $udesc; $linkdata['link'] = $this->_encodeUTF16($ulink); - for ($r=$row; $r<=$row2; $r++) { + for ($r=$row; $r<=$row2; $r++) { for ($c=$column; $c<=$column2; $c++) { $this->sheets[$this->sn]['cellsInfo'][$r+1][$c+1]['hyperlink'] = $linkdata; } } break; case SPREADSHEET_EXCEL_READER_TYPE_DEFCOLWIDTH: - $this->defaultColWidth = ord($data[$spos+4]) | ord($data[$spos+5]) << 8; + $this->defaultColWidth = ord($data[$spos+4]) | ord($data[$spos+5]) << 8; break; case SPREADSHEET_EXCEL_READER_TYPE_STANDARDWIDTH: - $this->standardColWidth = ord($data[$spos+4]) | ord($data[$spos+5]) << 8; + $this->standardColWidth = ord($data[$spos+4]) | ord($data[$spos+5]) << 8; break; case SPREADSHEET_EXCEL_READER_TYPE_COLINFO: $colfrom = ord($data[$spos+0]) | ord($data[$spos+1]) << 8; $colto = ord($data[$spos+2]) | ord($data[$spos+3]) << 8; - $cw = ord($data[$spos+4]) | ord($data[$spos+5]) << 8; - $cxf = ord($data[$spos+6]) | ord($data[$spos+7]) << 8; - $co = ord($data[$spos+8]); + $cw = ord($data[$spos+4]) | ord($data[$spos+5]) << 8; + $cxf = ord($data[$spos+6]) | ord($data[$spos+7]) << 8; + $co = ord($data[$spos+8]); for ($coli = $colfrom; $coli <= $colto; $coli++) { $this->colInfo[$this->sn][$coli+1] = Array('width' => $cw, 'xf' => $cxf, 'hidden' => ($co & 0x01), 'collapsed' => ($co & 0x1000) >> 12); } @@ -1714,12 +1726,8 @@ function _GetIEEE754($rknum) { function _encodeUTF16($string) { $result = $string; if ($this->_defaultEncoding){ - switch ($this->_encoderFunction){ - case 'iconv' : $result = iconv('UTF-16LE', $this->_defaultEncoding, $string); - break; - case 'mb_convert_encoding' : $result = mb_convert_encoding($string, $this->_defaultEncoding, 'UTF-16LE' ); - break; - } + // iconv changed to mb_convert_encoding + $result = mb_convert_encoding($string, $this->_defaultEncoding, 'UTF-16LE' ); } return $result; } From f48d859fc8d683de51a16cdbacdb9606f8353ee8 Mon Sep 17 00:00:00 2001 From: karlis-i Date: Fri, 18 Mar 2016 13:23:16 +0200 Subject: [PATCH 05/38] XLS date fix (proper commit) Added a fix for recognising dates in XLS files created by LibreOffice --- CHANGELOG.md | 22 +++++++++++++--------- composer.json | 15 ++++++++------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30a09a9..eea5f41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +### v.0.5.12 2016-03-18 + +- Added a fix for recognising dates in XLS files created by LibreOffice + ### v.0.5.11 2015-04-30 - Added a special case for cells formatted as text in XLSX. Previously leading zeros would get truncated if a text cell contained only numbers. @@ -50,10 +54,10 @@ Currently only decimal number values are converted to PHP's floats. ### v.0.5.1 2013-06-27 -- Fixed file type choice when using mime-types (previously there were problems with +- Fixed file type choice when using mime-types (previously there were problems with XLSX and ODS mime-types) (Thanks to [incratec](https://github.com/incratec)) -- Fixed an error in XLSX iterator where `current()` would advance the iterator forward +- Fixed an error in XLSX iterator where `current()` would advance the iterator forward with each call. (Thanks to [osuwariboy](https://github.com/osuwariboy)) ### v.0.5.0 2013-06-17 @@ -62,19 +66,19 @@ with each call. (Thanks to [osuwariboy](https://github.com/osuwariboy)) - The `Sheets()` method lets you retrieve a list of all sheets present in the file. - `ChangeSheet($Index)` method changes the sheet in the reader to the one specified. -- Previously temporary files that were extracted, were deleted after the SpreadsheetReader -was destroyed but the empty directories remained. Now those are cleaned up as well. +- Previously temporary files that were extracted, were deleted after the SpreadsheetReader +was destroyed but the empty directories remained. Now those are cleaned up as well. ### v.0.4.3 2013-06-14 -- Bugfix for shared string caching in XLSX files. When the shared string count was larger -than the caching limit, instead of them being read from file, empty strings were returned. +- Bugfix for shared string caching in XLSX files. When the shared string count was larger +than the caching limit, instead of them being read from file, empty strings were returned. ### v.0.4.2 2013-06-02 -- XLS file reading relies on the external Spreadsheet_Excel_Reader class which, by default, -reads additional information about cells like fonts, styles, etc. Now that is disabled -to save some memory since the style data is unnecessary anyway. +- XLS file reading relies on the external Spreadsheet_Excel_Reader class which, by default, +reads additional information about cells like fonts, styles, etc. Now that is disabled +to save some memory since the style data is unnecessary anyway. (Thanks to [ChALkeR](https://github.com/ChALkeR) for the tip.) Martins Pilsetnieks \ No newline at end of file diff --git a/composer.json b/composer.json index 0e64a78..5bd5b4a 100644 --- a/composer.json +++ b/composer.json @@ -1,10 +1,10 @@ { - "name": "nuovo/spreadsheet-reader", + "name": "karlis-i/spreadsheet-reader", "description": "Spreadsheet reader library for Excel, OpenOffice and structured text files", "keywords": ["spreadsheet", "xls", "xlsx", "ods", "csv", "excel", "openoffice"], - "homepage": "https://github.com/nuovo/spreadsheet-reader", - "version": "0.5.11", - "time": "2015-04-30", + "homepage": "https://github.com/karlis-i/spreadsheet-reader", + "version": "0.5.12", + "time": "2016-03-18", "type": "library", "license": ["MIT"], "authors": [ @@ -12,11 +12,12 @@ "name": "Martins Pilsetnieks", "email": "pilsetnieks@gmail.com", "homepage": "http://www.nuovo.lv/" + }, + { + "name": "karlis-i", + "email": "karlis.im@gmail.com" } ], - "support": { - "email": "spreadsheet-reader@nuovo.lv" - }, "require": { "php": ">= 5.3.0", "ext-zip": "*" From adc4f60ba6e9bb90351e97415481c3348743f1ad Mon Sep 17 00:00:00 2001 From: karlis-i Date: Fri, 18 Mar 2016 15:30:55 +0200 Subject: [PATCH 06/38] LibreOffice date pattern fix --- php-excel-reader/excel_reader2.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/php-excel-reader/excel_reader2.php b/php-excel-reader/excel_reader2.php index 1995090..af9253c 100644 --- a/php-excel-reader/excel_reader2.php +++ b/php-excel-reader/excel_reader2.php @@ -1249,11 +1249,11 @@ function _parse() { if (preg_match("/[^hmsday\/\-:\s\\\,AMP]/i", $tmp) == 0) { // found day and time format $isdate = TRUE; $formatstr = $tmp; - if ($formatstr === 'YYYY/MM/DD') { + if ($formatstr === 'YYYY/MM/DD' || $formatstr === 'YYYY\-MM\-DD') { // LibreOffice turns this pattern into invalid dates: // 2015201520152015/OctOct/WedWed // here we fix it - $formatstr = 'Y/m/d'; + $formatstr = 'Y-m-d'; } else { $formatstr = str_replace(array('AM/PM','mmmm','mmm'), array('a','F','M'), $formatstr); // m/mm are used for both minutes and months - oh SNAP! @@ -1742,4 +1742,4 @@ function _GetInt4d($data, $pos) { } -?> \ No newline at end of file +?> From 6350aebed6bf50263d99af0a73f4378df471ce01 Mon Sep 17 00:00:00 2001 From: karlis-i Date: Fri, 18 Mar 2016 16:35:25 +0200 Subject: [PATCH 07/38] Fixed LibreOffice date import Another try at committing correctly --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 5bd5b4a..5dc032c 100644 --- a/composer.json +++ b/composer.json @@ -3,7 +3,7 @@ "description": "Spreadsheet reader library for Excel, OpenOffice and structured text files", "keywords": ["spreadsheet", "xls", "xlsx", "ods", "csv", "excel", "openoffice"], "homepage": "https://github.com/karlis-i/spreadsheet-reader", - "version": "0.5.12", + "version": "0.5.14", "time": "2016-03-18", "type": "library", "license": ["MIT"], From 1e9b801b49044bffb5d17b06b2efd21a8c8addc2 Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 1 Apr 2016 14:54:54 +0200 Subject: [PATCH 08/38] SAF-58: Fixed bug where XLSX reader could not be found Warning: include(SpreadsheetReader/XLSX.php): failed to open stream: No such file or directory in /Users/username/Sites/sitename/lib/Varien/Autoload.php on line 93 --- SpreadsheetReader.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SpreadsheetReader.php b/SpreadsheetReader.php index 6b6dbee..067b25e 100644 --- a/SpreadsheetReader.php +++ b/SpreadsheetReader.php @@ -204,7 +204,7 @@ private static function Load($Type) throw new Exception('SpreadsheetReader: Invalid type ('.$Type.')'); } - if (!class_exists('SpreadsheetReader_'.$Type)) + if (!class_exists('SpreadsheetReader_'.$Type, false)) { require(dirname(__FILE__).DIRECTORY_SEPARATOR.'SpreadsheetReader_'.$Type.'.php'); } From ee26fe8c68913129f21424c101afcdab92b57595 Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 1 Apr 2016 15:06:52 +0200 Subject: [PATCH 09/38] SAF-58: Unlimited shared string cache. Fixed bug where first XSLX worksheet was inaccessible Original bug report of XLSX bug: https://github.com/nuovo/spreadsheet-reader/issues/59#issue-24686316 --- SpreadsheetReader_XLSX.php | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index 581a87a..be04b4d 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -20,7 +20,7 @@ class SpreadsheetReader_XLSX implements Iterator, Countable * With large shared string caches there are huge performance gains, however a lot of memory could be used which * can be a problem, especially on shared hosting. */ - const SHARED_STRING_CACHE_LIMIT = 50000; + const SHARED_STRING_CACHE_LIMIT = null; private $Options = array( 'TempDir' => '', @@ -367,16 +367,14 @@ public function Sheets() $this -> Sheets = array(); foreach ($this -> WorkbookXML -> sheets -> sheet as $Index => $Sheet) { - $Attributes = $Sheet -> attributes('r', true); + $Attributes = $Sheet -> attributes(); foreach ($Attributes as $Name => $Value) { - if ($Name == 'id') - { - $SheetID = (int)str_replace('rId', '', (string)$Value); + if ($Name == 'sheetId') { + $SheetID = (int)$Value; break; } } - $this -> Sheets[$SheetID] = (string)$Sheet['name']; } ksort($this -> Sheets); From 7bc115e527f27698017a2da38f9141399adf1194 Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 1 Apr 2016 16:43:33 +0200 Subject: [PATCH 10/38] Fixed bug where first XSLX worksheet was inaccessible (again) Original bug report of XLSX bug: https://github.com/nuovo/spreadsheet-reader/issues/59#issue-24686316 --- SpreadsheetReader_XLSX.php | 97 ++++++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 24 deletions(-) diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index be04b4d..64917ef 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -104,6 +104,7 @@ class SpreadsheetReader_XLSX implements Iterator, Countable private $SSForwarded = false; private static $BuiltinFormats = array( + 0 => '', 1 => '0', 2 => '0.00', 3 => '#,##0', @@ -243,7 +244,7 @@ public function __construct($Filepath, array $Options = null) } $Sheets = $this -> Sheets(); - + foreach ($this -> Sheets as $Index => $Name) { if ($Zip -> locateName('xl/worksheets/sheet'.$Index.'.xml') !== false) @@ -263,7 +264,8 @@ public function __construct($Filepath, array $Options = null) { foreach ($this -> StylesXML -> cellXfs -> xf as $Index => $XF) { - if ($XF -> attributes() -> applyNumberFormat) + // Format #0 is a special case - it is the "General" format that is applied regardless of applyNumberFormat + if ($XF -> attributes() -> applyNumberFormat || (0 == (int)$XF -> attributes() -> numFmtId)) { $FormatId = (int)$XF -> attributes() -> numFmtId; // If format ID >= 164, it is a custom format and should be read from styleSheet\numFmts @@ -271,7 +273,8 @@ public function __construct($Filepath, array $Options = null) } else { - $this -> Styles[] = false; + // 0 for "General" format + $this -> Styles[] = 0; } } } @@ -367,15 +370,29 @@ public function Sheets() $this -> Sheets = array(); foreach ($this -> WorkbookXML -> sheets -> sheet as $Index => $Sheet) { + $AttributesWithPrefix = $Sheet -> attributes('r', true); $Attributes = $Sheet -> attributes(); + + $rId = 0; + $sheetId = 0; + + foreach ($AttributesWithPrefix as $Name => $Value) + { + if ($Name == 'id') + { + $rId = (int)str_replace('rId', '', (string)$Value); + break; + } + } foreach ($Attributes as $Name => $Value) { if ($Name == 'sheetId') { - $SheetID = (int)$Value; + $sheetId = (int)$Value; break; } } - $this -> Sheets[$SheetID] = (string)$Sheet['name']; + + $this -> Sheets[min($rId, $sheetId)] = (string)$Sheet['name']; } ksort($this -> Sheets); } @@ -404,6 +421,7 @@ public function ChangeSheet($Index) if ($RealSheetIndex !== false && is_readable($TempWorksheetPath)) { $this -> WorksheetPath = $TempWorksheetPath; + $this -> rewind(); return true; } @@ -610,7 +628,7 @@ private function FormatValue($Value, $Index) return $Value; } - if (!empty($this -> Styles[$Index])) + if (isset($this -> Styles[$Index]) && ($this -> Styles[$Index] !== false)) { $Index = $this -> Styles[$Index]; } @@ -619,6 +637,12 @@ private function FormatValue($Value, $Index) return $Value; } + // A special case for the "General" format + if ($Index == 0) + { + return $this -> GeneralFormat($Value); + } + $Format = array(); if (isset($this -> ParsedFormatCache[$Index])) @@ -778,8 +802,12 @@ private function FormatValue($Value, $Index) // Applying format to value if ($Format) { + if ($Format['Code'] == '@') + { + return (string)$Value; + } // Percentages - if ($Format['Type'] == 'Percentage') + elseif ($Format['Type'] == 'Percentage') { if ($Format['Code'] === '0%') { @@ -875,7 +903,7 @@ private function FormatValue($Value, $Index) // Scaling $Value = $Value / $Format['Scale']; - if ($Format['MinWidth'] && $Format['Decimals']) + if (!empty($Format['MinWidth']) && $Format['Decimals']) { if ($Format['Thousands']) { @@ -903,6 +931,23 @@ private function FormatValue($Value, $Index) return $Value; } + /** + * Attempts to approximate Excel's "general" format. + * + * @param mixed Value + * + * @return mixed Result + */ + public function GeneralFormat($Value) + { + // Numeric format + if (is_numeric($Value)) + { + $Value = (float)$Value; + } + return $Value; + } + // !Iterator interface methods /** * Rewind the Iterator to the first element. @@ -910,25 +955,24 @@ private function FormatValue($Value, $Index) */ public function rewind() { - if ($this -> Index > 0 || !($this -> Worksheet instanceof XMLReader)) - { - // If the worksheet was already iterated, XML file is reopened. - // Otherwise it should be at the beginning anyway - if ($this -> Worksheet instanceof XMLReader) - { - $this -> Worksheet -> close(); - } - else - { - $this -> Worksheet = new XMLReader; - } + // Removed the check whether $this -> Index == 0 otherwise ChangeSheet doesn't work properly - $this -> Worksheet -> open($this -> WorksheetPath); - $this -> Valid = true; - - $this -> RowOpen = false; + // If the worksheet was already iterated, XML file is reopened. + // Otherwise it should be at the beginning anyway + if ($this -> Worksheet instanceof XMLReader) + { + $this -> Worksheet -> close(); } + else + { + $this -> Worksheet = new XMLReader; + } + + $this -> Worksheet -> open($this -> WorksheetPath); + $this -> Valid = true; + $this -> RowOpen = false; + $this -> CurrentRow = false; $this -> Index = 0; } @@ -1045,6 +1089,7 @@ public function next() break; // Cell value case 'v': + case 'is': if ($this -> Worksheet -> nodeType == XMLReader::END_ELEMENT) { continue; @@ -1062,6 +1107,10 @@ public function next() { $Value = $this -> FormatValue($Value, $StyleId); } + elseif ($Value) + { + $Value = $this -> GeneralFormat($Value); + } $this -> CurrentRow[$Index] = $Value; break; From 9185a3d5c8df63d6d3bec5de9d0cfb5be7f976e8 Mon Sep 17 00:00:00 2001 From: Martins Pilsetnieks Date: Sat, 31 Jan 2015 14:32:41 +0200 Subject: [PATCH 11/38] Excel's "General" format (v.0.5.7) # Conflicts: # SpreadsheetReader_XLSX.php --- .gitignore | 3 +- CHANGELOG.md | 5 ++ SpreadsheetReader.php | 2 +- SpreadsheetReader_XLSX.php | 100 ++++++++++--------------------------- 4 files changed, 35 insertions(+), 75 deletions(-) diff --git a/.gitignore b/.gitignore index 8c558d3..9b670cb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .DS_Store -test/* \ No newline at end of file +test +materials \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index f524d89..629363b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +### v.0.5.7 2013-10-29 + +- Attempt to replicate Excel's "General" format in XLSX files that is applied to otherwise unformatted cells. +Currently only decimal number values are converted to PHP's floats. + ### v.0.5.6 2013-09-04 - Fix for formulas being returned along with values in XLSX files. (Thanks to [marktag](https://github.com/marktag)) diff --git a/SpreadsheetReader.php b/SpreadsheetReader.php index 067b25e..6acbe3c 100644 --- a/SpreadsheetReader.php +++ b/SpreadsheetReader.php @@ -2,7 +2,7 @@ /** * Main class for spreadsheet reading * - * @version 0.5.6 + * @version 0.5.7 * @author Martins Pilsetnieks */ class SpreadsheetReader implements SeekableIterator, Countable diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index 64917ef..0781c71 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -20,7 +20,7 @@ class SpreadsheetReader_XLSX implements Iterator, Countable * With large shared string caches there are huge performance gains, however a lot of memory could be used which * can be a problem, especially on shared hosting. */ - const SHARED_STRING_CACHE_LIMIT = null; + const SHARED_STRING_CACHE_LIMIT = 50000; private $Options = array( 'TempDir' => '', @@ -104,7 +104,6 @@ class SpreadsheetReader_XLSX implements Iterator, Countable private $SSForwarded = false; private static $BuiltinFormats = array( - 0 => '', 1 => '0', 2 => '0.00', 3 => '#,##0', @@ -244,7 +243,7 @@ public function __construct($Filepath, array $Options = null) } $Sheets = $this -> Sheets(); - + foreach ($this -> Sheets as $Index => $Name) { if ($Zip -> locateName('xl/worksheets/sheet'.$Index.'.xml') !== false) @@ -264,8 +263,7 @@ public function __construct($Filepath, array $Options = null) { foreach ($this -> StylesXML -> cellXfs -> xf as $Index => $XF) { - // Format #0 is a special case - it is the "General" format that is applied regardless of applyNumberFormat - if ($XF -> attributes() -> applyNumberFormat || (0 == (int)$XF -> attributes() -> numFmtId)) + if ($XF -> attributes() -> applyNumberFormat) { $FormatId = (int)$XF -> attributes() -> numFmtId; // If format ID >= 164, it is a custom format and should be read from styleSheet\numFmts @@ -273,8 +271,7 @@ public function __construct($Filepath, array $Options = null) } else { - // 0 for "General" format - $this -> Styles[] = 0; + $this -> Styles[] = false; } } } @@ -370,29 +367,17 @@ public function Sheets() $this -> Sheets = array(); foreach ($this -> WorkbookXML -> sheets -> sheet as $Index => $Sheet) { - $AttributesWithPrefix = $Sheet -> attributes('r', true); - $Attributes = $Sheet -> attributes(); - - $rId = 0; - $sheetId = 0; - - foreach ($AttributesWithPrefix as $Name => $Value) + $Attributes = $Sheet -> attributes('r', true); + foreach ($Attributes as $Name => $Value) { if ($Name == 'id') { - $rId = (int)str_replace('rId', '', (string)$Value); - break; - } - } - foreach ($Attributes as $Name => $Value) - { - if ($Name == 'sheetId') { - $sheetId = (int)$Value; + $SheetID = (int)str_replace('rId', '', (string)$Value); break; } } - $this -> Sheets[min($rId, $sheetId)] = (string)$Sheet['name']; + $this -> Sheets[$SheetID] = (string)$Sheet['name']; } ksort($this -> Sheets); } @@ -421,7 +406,6 @@ public function ChangeSheet($Index) if ($RealSheetIndex !== false && is_readable($TempWorksheetPath)) { $this -> WorksheetPath = $TempWorksheetPath; - $this -> rewind(); return true; } @@ -628,7 +612,7 @@ private function FormatValue($Value, $Index) return $Value; } - if (isset($this -> Styles[$Index]) && ($this -> Styles[$Index] !== false)) + if (!empty($this -> Styles[$Index])) { $Index = $this -> Styles[$Index]; } @@ -637,12 +621,6 @@ private function FormatValue($Value, $Index) return $Value; } - // A special case for the "General" format - if ($Index == 0) - { - return $this -> GeneralFormat($Value); - } - $Format = array(); if (isset($this -> ParsedFormatCache[$Index])) @@ -802,12 +780,8 @@ private function FormatValue($Value, $Index) // Applying format to value if ($Format) { - if ($Format['Code'] == '@') - { - return (string)$Value; - } // Percentages - elseif ($Format['Type'] == 'Percentage') + if ($Format['Type'] == 'Percentage') { if ($Format['Code'] === '0%') { @@ -903,7 +877,7 @@ private function FormatValue($Value, $Index) // Scaling $Value = $Value / $Format['Scale']; - if (!empty($Format['MinWidth']) && $Format['Decimals']) + if ($Format['MinWidth'] && $Format['Decimals']) { if ($Format['Thousands']) { @@ -931,23 +905,6 @@ private function FormatValue($Value, $Index) return $Value; } - /** - * Attempts to approximate Excel's "general" format. - * - * @param mixed Value - * - * @return mixed Result - */ - public function GeneralFormat($Value) - { - // Numeric format - if (is_numeric($Value)) - { - $Value = (float)$Value; - } - return $Value; - } - // !Iterator interface methods /** * Rewind the Iterator to the first element. @@ -955,24 +912,25 @@ public function GeneralFormat($Value) */ public function rewind() { - // Removed the check whether $this -> Index == 0 otherwise ChangeSheet doesn't work properly - - // If the worksheet was already iterated, XML file is reopened. - // Otherwise it should be at the beginning anyway - if ($this -> Worksheet instanceof XMLReader) + if ($this -> Index > 0 || !($this -> Worksheet instanceof XMLReader)) { - $this -> Worksheet -> close(); - } - else - { - $this -> Worksheet = new XMLReader; - } + // If the worksheet was already iterated, XML file is reopened. + // Otherwise it should be at the beginning anyway + if ($this -> Worksheet instanceof XMLReader) + { + $this -> Worksheet -> close(); + } + else + { + $this -> Worksheet = new XMLReader; + } - $this -> Worksheet -> open($this -> WorksheetPath); + $this -> Worksheet -> open($this -> WorksheetPath); + $this -> Valid = true; + + $this -> RowOpen = false; + } - $this -> Valid = true; - $this -> RowOpen = false; - $this -> CurrentRow = false; $this -> Index = 0; } @@ -1089,7 +1047,6 @@ public function next() break; // Cell value case 'v': - case 'is': if ($this -> Worksheet -> nodeType == XMLReader::END_ELEMENT) { continue; @@ -1108,9 +1065,6 @@ public function next() $Value = $this -> FormatValue($Value, $StyleId); } elseif ($Value) - { - $Value = $this -> GeneralFormat($Value); - } $this -> CurrentRow[$Index] = $Value; break; From 84249771d814042b1352e418678646b15c5fcda5 Mon Sep 17 00:00:00 2001 From: Martins Pilsetnieks Date: Sat, 31 Jan 2015 14:33:25 +0200 Subject: [PATCH 12/38] Excel's "General" format (v.0.5.7) --- SpreadsheetReader_XLSX.php | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index 0781c71..20ec41e 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -104,6 +104,7 @@ class SpreadsheetReader_XLSX implements Iterator, Countable private $SSForwarded = false; private static $BuiltinFormats = array( + 0 => '', 1 => '0', 2 => '0.00', 3 => '#,##0', @@ -263,7 +264,8 @@ public function __construct($Filepath, array $Options = null) { foreach ($this -> StylesXML -> cellXfs -> xf as $Index => $XF) { - if ($XF -> attributes() -> applyNumberFormat) + // Format #0 is a special case - it is the "General" format that is applied regardless of applyNumberFormat + if ($XF -> attributes() -> applyNumberFormat || (0 == (int)$XF -> attributes() -> numFmtId)) { $FormatId = (int)$XF -> attributes() -> numFmtId; // If format ID >= 164, it is a custom format and should be read from styleSheet\numFmts @@ -271,7 +273,8 @@ public function __construct($Filepath, array $Options = null) } else { - $this -> Styles[] = false; + // 0 for "General" format + $this -> Styles[] = 0; } } } @@ -611,8 +614,8 @@ private function FormatValue($Value, $Index) { return $Value; } - - if (!empty($this -> Styles[$Index])) + + if (isset($this -> Styles[$Index]) && ($this -> Styles[$Index] !== false)) { $Index = $this -> Styles[$Index]; } @@ -621,6 +624,12 @@ private function FormatValue($Value, $Index) return $Value; } + // A special case for the "General" format + if ($Index == 0) + { + return $this -> GeneralFormat($Value); + } + $Format = array(); if (isset($this -> ParsedFormatCache[$Index])) @@ -905,6 +914,23 @@ private function FormatValue($Value, $Index) return $Value; } + /** + * Attempts to approximate Excel's "general" format. + * + * @param mixed Value + * + * @return mixed Result + */ + public function GeneralFormat($Value) + { + // Numeric format + if (is_numeric($Value)) + { + $Value = (float)$Value; + } + return $Value; + } + // !Iterator interface methods /** * Rewind the Iterator to the first element. @@ -1065,6 +1091,9 @@ public function next() $Value = $this -> FormatValue($Value, $StyleId); } elseif ($Value) + { + $Value = $this -> GeneralFormat($Value); + } $this -> CurrentRow[$Index] = $Value; break; From 3bee7dd4715c8d88bdc34e384de78635b01e92e0 Mon Sep 17 00:00:00 2001 From: Martins Pilsetnieks Date: Sat, 31 Jan 2015 17:32:24 +0200 Subject: [PATCH 13/38] v.0.5.8. # Conflicts: # SpreadsheetReader.php --- CHANGELOG.md | 9 +++++++ LICENSE.md | 2 +- SpreadsheetReader.php | 51 ++++++++++++-------------------------- SpreadsheetReader_XLSX.php | 35 +++++++++++++------------- composer.json | 4 +-- 5 files changed, 46 insertions(+), 55 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 629363b..24cd8d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +### v.0.5.8 2015-01-31 + +- [Issue #50](https://github.com/nuovo/spreadsheet-reader/issues/50): Fixed an XLSX rewind issue. (Thanks to [osuwariboy](https://github.com/osuwariboy)) +- [Issue #52](https://github.com/nuovo/spreadsheet-reader/issues/52), [#53](https://github.com/nuovo/spreadsheet-reader/issues/53): Apache POI compatibility for XLSX. (Thanks to [dimapashkov](https://github.com/dimapashkov)) +- [Issue #61](https://github.com/nuovo/spreadsheet-reader/issues/61): Autoload fix in the main class. (Thanks to [i-bash](https://github.com/i-bash)) +- [Issue #60](https://github.com/nuovo/spreadsheet-reader/issues/60), [#69](https://github.com/nuovo/spreadsheet-reader/issues/69), [#72](https://github.com/nuovo/spreadsheet-reader/issues/72): Fixed an issue where XLSX ChangeSheet may not work. (Thanks to [jtresponse](https://github.com/jtresponse), [osuwariboy](https://github.com/osuwariboy)) +- [Issue #70](https://github.com/nuovo/spreadsheet-reader/issues/70): Added a check for constructor parameter correctness. + + ### v.0.5.7 2013-10-29 - Attempt to replicate Excel's "General" format in XLSX files that is applied to otherwise unformatted cells. diff --git a/LICENSE.md b/LICENSE.md index fe7bc99..b30deee 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ ### spreadsheet-reader is licensed under the MIT License -Copyright (C) 2012-2013 Nuovo +Copyright (C) 2012-2015 Martins Pilsetnieks Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/SpreadsheetReader.php b/SpreadsheetReader.php index 6acbe3c..afe2055 100644 --- a/SpreadsheetReader.php +++ b/SpreadsheetReader.php @@ -2,10 +2,10 @@ /** * Main class for spreadsheet reading * - * @version 0.5.7 + * @version 0.5.8 * @author Martins Pilsetnieks */ - class SpreadsheetReader implements SeekableIterator, Countable + class SpreadsheetReader implements Iterator, Countable { const TYPE_XLSX = 'XLSX'; const TYPE_XLS = 'XLS'; @@ -51,6 +51,18 @@ public function __construct($Filepath, $OriginalFilename = false, $MimeType = fa date_default_timezone_set($DefaultTZ); } + // Checking the other parameters for correctness + + // This should be a check for string but we're lenient + if (!empty($OriginalFilename) && !is_scalar($OriginalFilename)) + { + throw new Exception('SpreadsheetReader: Original file (2nd parameter) path is not a string or a scalar value.'); + } + if (!empty($MimeType) && !is_scalar($MimeType)) + { + throw new Exception('SpreadsheetReader: Mime type (3nd parameter) path is not a string or a scalar value.'); + } + // 1. Determine type if (!$OriginalFilename) { @@ -204,6 +216,8 @@ private static function Load($Type) throw new Exception('SpreadsheetReader: Invalid type ('.$Type.')'); } + // 2nd parameter is to prevent autoloading for the class. + // If autoload works, the require line is unnecessary, if it doesn't, it ends badly. if (!class_exists('SpreadsheetReader_'.$Type, false)) { require(dirname(__FILE__).DIRECTORY_SEPARATOR.'SpreadsheetReader_'.$Type.'.php'); @@ -294,38 +308,5 @@ public function count() } return 0; } - - - /** - * @param int $position - * - * @return null - * @throws OutOfBoundsException - */ - public function seek($position) { - if (! $this->Handle) { - return null; - }; - - if ($position != $this->Handle->key()) { - if (0 == $position) { - $this->rewind(); - return; - } elseif ($position > 0) { - if ($this->Handle->key() === null || $position < $this->Handle->key()) { - $this->rewind(); - } - - while ($nodeStr = $this->Handle->next()) { - if ($this->Handle->key() == $position) { - return; - } - } - } - throw new OutOfBoundsException(Mage::helper('importexport')->__('Invalid seek position')); - } - - return null; - } } ?> diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index 20ec41e..9ea6168 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -244,7 +244,7 @@ public function __construct($Filepath, array $Options = null) } $Sheets = $this -> Sheets(); - + foreach ($this -> Sheets as $Index => $Name) { if ($Zip -> locateName('xl/worksheets/sheet'.$Index.'.xml') !== false) @@ -409,6 +409,7 @@ public function ChangeSheet($Index) if ($RealSheetIndex !== false && is_readable($TempWorksheetPath)) { $this -> WorksheetPath = $TempWorksheetPath; + $this -> rewind(); return true; } @@ -938,25 +939,24 @@ public function GeneralFormat($Value) */ public function rewind() { - if ($this -> Index > 0 || !($this -> Worksheet instanceof XMLReader)) - { - // If the worksheet was already iterated, XML file is reopened. - // Otherwise it should be at the beginning anyway - if ($this -> Worksheet instanceof XMLReader) - { - $this -> Worksheet -> close(); - } - else - { - $this -> Worksheet = new XMLReader; - } - - $this -> Worksheet -> open($this -> WorksheetPath); - $this -> Valid = true; + // Removed the check whether $this -> Index == 0 otherwise ChangeSheet doesn't work properly - $this -> RowOpen = false; + // If the worksheet was already iterated, XML file is reopened. + // Otherwise it should be at the beginning anyway + if ($this -> Worksheet instanceof XMLReader) + { + $this -> Worksheet -> close(); } + else + { + $this -> Worksheet = new XMLReader; + } + + $this -> Worksheet -> open($this -> WorksheetPath); + $this -> Valid = true; + $this -> RowOpen = false; + $this -> CurrentRow = false; $this -> Index = 0; } @@ -1073,6 +1073,7 @@ public function next() break; // Cell value case 'v': + case 'is': if ($this -> Worksheet -> nodeType == XMLReader::END_ELEMENT) { continue; diff --git a/composer.json b/composer.json index 3ed23dd..8ed70aa 100644 --- a/composer.json +++ b/composer.json @@ -3,8 +3,8 @@ "description": "Spreadsheet reader library for Excel, OpenOffice and structured text files", "keywords": ["spreadsheet", "xls", "xlsx", "ods", "csv", "excel", "openoffice"], "homepage": "https://github.com/nuovo/spreadsheet-reader", - "version": "0.5.6", - "time": "2013-09-04", + "version": "0.5.8", + "time": "2015-01-31", "type": "library", "license": ["MIT"], "authors": [ From e1b2e4011cb11a5ed188caec692edc7cf5915b19 Mon Sep 17 00:00:00 2001 From: Martins Pilsetnieks Date: Sat, 31 Jan 2015 17:43:03 +0200 Subject: [PATCH 14/38] v.0.5.8. --- SpreadsheetReader_ODS.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/SpreadsheetReader_ODS.php b/SpreadsheetReader_ODS.php index d48bbbd..40d97c4 100644 --- a/SpreadsheetReader_ODS.php +++ b/SpreadsheetReader_ODS.php @@ -25,6 +25,8 @@ class SpreadsheetReader_ODS implements Iterator, Countable */ private $Sheets = false; + private $CurrentRow = false; + /** * @var int Number of the sheet we're currently reading */ From 4a28865482ffc0ffa48ca86dfcaa29f0a41e31b0 Mon Sep 17 00:00:00 2001 From: pascal Date: Fri, 17 Apr 2015 17:24:09 +0200 Subject: [PATCH 15/38] avoid undefined offset error --- SpreadsheetReader_XLSX.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index 9ea6168..0adebb7 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -887,7 +887,7 @@ private function FormatValue($Value, $Index) // Scaling $Value = $Value / $Format['Scale']; - if ($Format['MinWidth'] && $Format['Decimals']) + if (!empty($Format['MinWidth']) && $Format['Decimals']) { if ($Format['Thousands']) { From 3880f98fb849d2cddc4a92ecc01d7261a60bfdbb Mon Sep 17 00:00:00 2001 From: Martins Pilsetnieks Date: Sat, 18 Apr 2015 18:40:14 +0300 Subject: [PATCH 16/38] v.0.5.9 --- CHANGELOG.md | 4 ++++ SpreadsheetReader.php | 2 +- composer.json | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 24cd8d1..3c8a3eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +### v.0.5.9 2015-04-18 + +- [Pull request #85](https://github.com/nuovo/spreadsheet-reader/pull/85): Fixed an index check. (Thanks to [pa-m](https://github.com/pa-m)). + ### v.0.5.8 2015-01-31 - [Issue #50](https://github.com/nuovo/spreadsheet-reader/issues/50): Fixed an XLSX rewind issue. (Thanks to [osuwariboy](https://github.com/osuwariboy)) diff --git a/SpreadsheetReader.php b/SpreadsheetReader.php index afe2055..903ff79 100644 --- a/SpreadsheetReader.php +++ b/SpreadsheetReader.php @@ -2,7 +2,7 @@ /** * Main class for spreadsheet reading * - * @version 0.5.8 + * @version 0.5.9 * @author Martins Pilsetnieks */ class SpreadsheetReader implements Iterator, Countable diff --git a/composer.json b/composer.json index 8ed70aa..9f4f0ce 100644 --- a/composer.json +++ b/composer.json @@ -3,8 +3,8 @@ "description": "Spreadsheet reader library for Excel, OpenOffice and structured text files", "keywords": ["spreadsheet", "xls", "xlsx", "ods", "csv", "excel", "openoffice"], "homepage": "https://github.com/nuovo/spreadsheet-reader", - "version": "0.5.8", - "time": "2015-01-31", + "version": "0.5.9", + "time": "2015-04-18", "type": "library", "license": ["MIT"], "authors": [ From a982e9c5574c1e280b59d77a450b5ee9fd06ed12 Mon Sep 17 00:00:00 2001 From: Martins Pilsetnieks Date: Sat, 18 Apr 2015 19:32:01 +0300 Subject: [PATCH 17/38] v.0.5.10. - Implemented SeekableIterator. Thanks to [paales](https://github.com/paales) for suggestion ([Issue #54](https://github.com/nuovo/spreadsheet-reader/issues/54) and [Pull request #55](https://github.com/nuovo/spreadsheet-reader/pull/55)). - Fixed a bug in CSV and ODS reading where reading position 0 multiple times in a row would result in internal pointer being advanced and reading the next line. (E.g. reading row #0 three times would result in rows #0, #1, and #2.). This could have happened on multiple calls to `current()` while in #0 position, or calls to `seek(0)` and `current()`. --- CHANGELOG.md | 5 +++++ SpreadsheetReader.php | 40 +++++++++++++++++++++++++++++++++++++-- SpreadsheetReader_CSV.php | 7 +++++-- SpreadsheetReader_ODS.php | 6 ++++-- composer.json | 2 +- 5 files changed, 53 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c8a3eb..9237f66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +### v.0.5.10 2015-04-18 + +- Implemented SeekableIterator. Thanks to [paales](https://github.com/paales) for suggestion ([Issue #54](https://github.com/nuovo/spreadsheet-reader/issues/54) and [Pull request #55](https://github.com/nuovo/spreadsheet-reader/pull/55)). +- Fixed a bug in CSV and ODS reading where reading position 0 multiple times in a row would result in internal pointer being advanced and reading the next line. (E.g. reading row #0 three times would result in rows #0, #1, and #2.). This could have happened on multiple calls to `current()` while in #0 position, or calls to `seek(0)` and `current()`. + ### v.0.5.9 2015-04-18 - [Pull request #85](https://github.com/nuovo/spreadsheet-reader/pull/85): Fixed an index check. (Thanks to [pa-m](https://github.com/pa-m)). diff --git a/SpreadsheetReader.php b/SpreadsheetReader.php index 903ff79..b019f8f 100644 --- a/SpreadsheetReader.php +++ b/SpreadsheetReader.php @@ -2,10 +2,10 @@ /** * Main class for spreadsheet reading * - * @version 0.5.9 + * @version 0.5.10 * @author Martins Pilsetnieks */ - class SpreadsheetReader implements Iterator, Countable + class SpreadsheetReader implements SeekableIterator, Countable { const TYPE_XLSX = 'XLSX'; const TYPE_XLS = 'XLS'; @@ -308,5 +308,41 @@ public function count() } return 0; } + + /** + * Method for SeekableIterator interface. Takes a posiiton and traverses the file to that position + * The value can be retrieved with a `current()` call afterwards. + * + * @param int Position in file + */ + public function seek($Position) + { + if (!$this -> Handle) + { + throw new OutOfBoundsException('SpreadsheetReader: No file opened'); + } + + $CurrentIndex = $this -> Handle -> key(); + + if ($CurrentIndex != $Position) + { + if ($Position < $CurrentIndex || is_null($CurrentIndex) || $Position == 0) + { + $this -> rewind(); + } + + while ($this -> Handle -> valid() && ($Position > $this -> Handle -> key())) + { + $this -> Handle -> next(); + } + + if (!$this -> Handle -> valid()) + { + throw new OutOfBoundsException('SpreadsheetError: Position '.$Position.' not found'); + } + } + + return null; + } } ?> diff --git a/SpreadsheetReader_CSV.php b/SpreadsheetReader_CSV.php index 109f2e5..1cae82b 100644 --- a/SpreadsheetReader_CSV.php +++ b/SpreadsheetReader_CSV.php @@ -26,7 +26,7 @@ class SpreadsheetReader_CSV implements Iterator, Countable private $Index = 0; - private $CurrentRow = array(); + private $CurrentRow = null; /** * @param string Path to file @@ -159,6 +159,7 @@ public function ChangeSheet($Index) public function rewind() { fseek($this -> Handle, $this -> BOMLength); + $this -> CurrentRow = null; $this -> Index = 0; } @@ -170,7 +171,7 @@ public function rewind() */ public function current() { - if ($this -> Index == 0) + if ($this -> Index == 0 && is_null($this -> CurrentRow)) { $this -> next(); $this -> Index--; @@ -184,6 +185,8 @@ public function current() */ public function next() { + $this -> CurrentRow = array(); + // Finding the place the next line starts for UTF-16 encoded files // Line breaks could be 0x0D 0x00 0x0A 0x00 and PHP could split lines on the // first or the second linebreak leaving unnecessary \0 characters that mess up diff --git a/SpreadsheetReader_ODS.php b/SpreadsheetReader_ODS.php index 40d97c4..b12d9e7 100644 --- a/SpreadsheetReader_ODS.php +++ b/SpreadsheetReader_ODS.php @@ -25,7 +25,7 @@ class SpreadsheetReader_ODS implements Iterator, Countable */ private $Sheets = false; - private $CurrentRow = false; + private $CurrentRow = null; /** * @var int Number of the sheet we're currently reading @@ -169,6 +169,8 @@ public function rewind() $this -> TableOpen = false; $this -> RowOpen = false; + + $this -> CurrentRow = null; } $this -> Index = 0; @@ -182,7 +184,7 @@ public function rewind() */ public function current() { - if ($this -> Index == 0) + if ($this -> Index == 0 && is_null($this -> CurrentRow)) { $this -> next(); $this -> Index--; diff --git a/composer.json b/composer.json index 9f4f0ce..71dea71 100644 --- a/composer.json +++ b/composer.json @@ -3,7 +3,7 @@ "description": "Spreadsheet reader library for Excel, OpenOffice and structured text files", "keywords": ["spreadsheet", "xls", "xlsx", "ods", "csv", "excel", "openoffice"], "homepage": "https://github.com/nuovo/spreadsheet-reader", - "version": "0.5.9", + "version": "0.5.10", "time": "2015-04-18", "type": "library", "license": ["MIT"], From 812bde0576bfc931e91d13a4eeea9f8cc34ef2d8 Mon Sep 17 00:00:00 2001 From: Martins Pilsetnieks Date: Thu, 30 Apr 2015 11:54:58 +0300 Subject: [PATCH 18/38] v.0.5.11: XLSX text cells --- CHANGELOG.md | 4 ++++ SpreadsheetReader_XLSX.php | 8 ++++++-- composer.json | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9237f66..30a09a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +### v.0.5.11 2015-04-30 + +- Added a special case for cells formatted as text in XLSX. Previously leading zeros would get truncated if a text cell contained only numbers. + ### v.0.5.10 2015-04-18 - Implemented SeekableIterator. Thanks to [paales](https://github.com/paales) for suggestion ([Issue #54](https://github.com/nuovo/spreadsheet-reader/issues/54) and [Pull request #55](https://github.com/nuovo/spreadsheet-reader/pull/55)). diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index 0adebb7..9cf8d12 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -615,7 +615,7 @@ private function FormatValue($Value, $Index) { return $Value; } - + if (isset($this -> Styles[$Index]) && ($this -> Styles[$Index] !== false)) { $Index = $this -> Styles[$Index]; @@ -790,8 +790,12 @@ private function FormatValue($Value, $Index) // Applying format to value if ($Format) { + if ($Format['Code'] == '@') + { + return (string)$Value; + } // Percentages - if ($Format['Type'] == 'Percentage') + elseif ($Format['Type'] == 'Percentage') { if ($Format['Code'] === '0%') { diff --git a/composer.json b/composer.json index 71dea71..0e64a78 100644 --- a/composer.json +++ b/composer.json @@ -3,8 +3,8 @@ "description": "Spreadsheet reader library for Excel, OpenOffice and structured text files", "keywords": ["spreadsheet", "xls", "xlsx", "ods", "csv", "excel", "openoffice"], "homepage": "https://github.com/nuovo/spreadsheet-reader", - "version": "0.5.10", - "time": "2015-04-18", + "version": "0.5.11", + "time": "2015-04-30", "type": "library", "license": ["MIT"], "authors": [ From 8842207a89aeabf9888b2e3bf913766e65d70ca1 Mon Sep 17 00:00:00 2001 From: Chris Date: Mon, 4 Apr 2016 10:22:58 +0200 Subject: [PATCH 19/38] Fixed bug where first XSLX worksheet was inaccessible, after updating spreadsheetreader (again) Updated from https://github.com/nuovo/spreadsheet-reader/ Original bug report of XLSX bug: https://github.com/nuovo/spreadsheet-reader/issues/59#issue-24686316 --- SpreadsheetReader_XLSX.php | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index 9cf8d12..a2f5eee 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -370,17 +370,29 @@ public function Sheets() $this -> Sheets = array(); foreach ($this -> WorkbookXML -> sheets -> sheet as $Index => $Sheet) { - $Attributes = $Sheet -> attributes('r', true); - foreach ($Attributes as $Name => $Value) + $AttributesWithPrefix = $Sheet -> attributes('r', true); + $Attributes = $Sheet -> attributes(); + + $rId = 0; + $sheetId = 0; + + foreach ($AttributesWithPrefix as $Name => $Value) { if ($Name == 'id') { - $SheetID = (int)str_replace('rId', '', (string)$Value); + $rId = (int)str_replace('rId', '', (string)$Value); + break; + } + } + foreach ($Attributes as $Name => $Value) + { + if ($Name == 'sheetId') { + $sheetId = (int)$Value; break; } } - $this -> Sheets[$SheetID] = (string)$Sheet['name']; + $this -> Sheets[min($rId, $sheetId)] = (string)$Sheet['name']; } ksort($this -> Sheets); } @@ -1093,7 +1105,7 @@ public function next() // Format value if necessary if ($Value !== '' && $StyleId && isset($this -> Styles[$StyleId])) { - $Value = $this -> FormatValue($Value, $StyleId); +// $Value = $this -> FormatValue($Value, $StyleId); } elseif ($Value) { From ee473f14e6dfe6ba8d1e6d3681e0127542c8cd59 Mon Sep 17 00:00:00 2001 From: Chris Date: Mon, 4 Apr 2016 10:35:20 +0200 Subject: [PATCH 20/38] SAF-58: Unlimited shared string cache. --- SpreadsheetReader_XLSX.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index a2f5eee..4e7f171 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -20,7 +20,7 @@ class SpreadsheetReader_XLSX implements Iterator, Countable * With large shared string caches there are huge performance gains, however a lot of memory could be used which * can be a problem, especially on shared hosting. */ - const SHARED_STRING_CACHE_LIMIT = 50000; + const SHARED_STRING_CACHE_LIMIT = null; private $Options = array( 'TempDir' => '', From d5e6cd96cf1ad40ea676ad1325b505c1126905eb Mon Sep 17 00:00:00 2001 From: Paul Hachmang Date: Mon, 4 Apr 2016 11:28:29 +0200 Subject: [PATCH 21/38] SAF-58: Solve PHP7 Deprecation warnings --- php-excel-reader/excel_reader2.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/php-excel-reader/excel_reader2.php b/php-excel-reader/excel_reader2.php index 75351b7..6d38762 100644 --- a/php-excel-reader/excel_reader2.php +++ b/php-excel-reader/excel_reader2.php @@ -94,7 +94,7 @@ function v($data,$pos) { class OLERead { var $data = ''; - function OLERead(){ } + function __construct(){ } function read($sFileName){ // check if file exist and is readable (Darko Miljanovic) @@ -912,7 +912,7 @@ function _format_value($format,$num,$f) { * * Some basic initialisation */ - function Spreadsheet_Excel_Reader($file='',$store_extended_info=true,$outputEncoding='') { + function __construct($file='',$store_extended_info=true,$outputEncoding='') { $this->_ole = new OLERead(); $this->setUTFEncoder('iconv'); if ($outputEncoding != '') { From 1c983ab5e42fb24354680f12a578e705c6b6039f Mon Sep 17 00:00:00 2001 From: karlis-i Date: Thu, 2 Jun 2016 12:26:59 +0300 Subject: [PATCH 22/38] Another LibreOffice XLS fix --- php-excel-reader/excel_reader2.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/php-excel-reader/excel_reader2.php b/php-excel-reader/excel_reader2.php index af9253c..76211fb 100644 --- a/php-excel-reader/excel_reader2.php +++ b/php-excel-reader/excel_reader2.php @@ -835,7 +835,7 @@ function read16bitstring($data, $start) { function _format_value($format,$num,$f) { // 49==TEXT format // http://code.google.com/p/php-excel-reader/issues/detail?id=7 - if ( (!$f && $format=="%s") || ($f==49) || ($format=="GENERAL") ) { + if ( (!$f && $format=="%s") || ($f==49) || ($format=="GENERAL") || ($format == "General") ) { return array('string'=>$num, 'formatColor'=>null); } From 1eb524d639c4911f4eb8c8bc687866cdf981ddf3 Mon Sep 17 00:00:00 2001 From: karlis-i Date: Thu, 2 Jun 2016 12:47:43 +0300 Subject: [PATCH 23/38] Update composer.json --- composer.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/composer.json b/composer.json index 5dc032c..155c9bb 100644 --- a/composer.json +++ b/composer.json @@ -3,8 +3,8 @@ "description": "Spreadsheet reader library for Excel, OpenOffice and structured text files", "keywords": ["spreadsheet", "xls", "xlsx", "ods", "csv", "excel", "openoffice"], "homepage": "https://github.com/karlis-i/spreadsheet-reader", - "version": "0.5.14", - "time": "2016-03-18", + "version": "0.5.15", + "time": "2016-06-02", "type": "library", "license": ["MIT"], "authors": [ From 577a35a22de8cdc635c936c95d81e7c7a54ec935 Mon Sep 17 00:00:00 2001 From: karlis-i Date: Thu, 2 Jun 2016 12:48:49 +0300 Subject: [PATCH 24/38] Update composer.json --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 155c9bb..caf1a2f 100644 --- a/composer.json +++ b/composer.json @@ -3,7 +3,7 @@ "description": "Spreadsheet reader library for Excel, OpenOffice and structured text files", "keywords": ["spreadsheet", "xls", "xlsx", "ods", "csv", "excel", "openoffice"], "homepage": "https://github.com/karlis-i/spreadsheet-reader", - "version": "0.5.15", + "version": "0.5.16", "time": "2016-06-02", "type": "library", "license": ["MIT"], From 7fe9ff5db07ed1bb2ff4544b3b1ba6ec5872f2d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B5nis=20Ormisson?= Date: Wed, 8 Jun 2016 20:21:57 +0300 Subject: [PATCH 25/38] Get Sheet Id in various formats Get sheet id with any format string wrapper around it eg box/spout writes xslx where sheet id is formatted as rIdSheet{n} not rId{0} --- SpreadsheetReader_XLSX.php | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index 9cf8d12..7bcc264 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -375,8 +375,14 @@ public function Sheets() { if ($Name == 'id') { - $SheetID = (int)str_replace('rId', '', (string)$Value); - break; + //$SheetID = (int)str_replace('rId', '', (string)$Value); + /** + * Get sheet id with any format string wrapper around it + * eg box/spout writes xslx where sheet id is + * formatted as rIdSheet{n} not rId{0} + */ + $SheetID= intval(preg_replace('/[^0-9]+/', '', $Value), 10); + break; } } From 90e9414b1b33c21c8cf361903881db0b1be7b90c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B5nis=20Ormisson?= Date: Fri, 25 Nov 2016 22:13:44 +0200 Subject: [PATCH 26/38] test --- SpreadsheetReader_XLSX.php | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index 7bcc264..463d7a9 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -376,13 +376,13 @@ public function Sheets() if ($Name == 'id') { //$SheetID = (int)str_replace('rId', '', (string)$Value); - /** - * Get sheet id with any format string wrapper around it - * eg box/spout writes xslx where sheet id is - * formatted as rIdSheet{n} not rId{0} - */ - $SheetID= intval(preg_replace('/[^0-9]+/', '', $Value), 10); - break; + /** + * Get sheet id with any format string wrapper around it + * eg box/spout writes xslx where sheet id is + * formatted as rIdSheet{n} not rId{0} + */ + $SheetID= intval(preg_replace('/[^0-9]+/', '', $Value), 10); + break; } } From ecd25710b753a816109f543c37311092874658fa Mon Sep 17 00:00:00 2001 From: Starchenko Alex Date: Wed, 10 Jan 2018 07:25:25 +0200 Subject: [PATCH 27/38] Fixed read for general format. If cell have general format, in her value written 'General'. After fix in cell we will see real value. --- php-excel-reader/excel_reader2.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/php-excel-reader/excel_reader2.php b/php-excel-reader/excel_reader2.php index 75351b7..ded661c 100644 --- a/php-excel-reader/excel_reader2.php +++ b/php-excel-reader/excel_reader2.php @@ -835,7 +835,7 @@ function read16bitstring($data, $start) { function _format_value($format,$num,$f) { // 49==TEXT format // http://code.google.com/p/php-excel-reader/issues/detail?id=7 - if ( (!$f && $format=="%s") || ($f==49) || ($format=="GENERAL") ) { + if ( (!$f && $format=="%s") || ($f==49) || (strtoupper($format)=="GENERAL") ) { return array('string'=>$num, 'formatColor'=>null); } From e7c0887324118f1d2768389d38e9a029862d2914 Mon Sep 17 00:00:00 2001 From: fujaru Date: Fri, 9 Mar 2018 18:49:11 +0700 Subject: [PATCH 28/38] merge pull request --- CHANGELOG.md | 24 ++--- SpreadsheetReader_XLSX.php | 4 + composer.json | 15 ++-- php-excel-reader/excel_reader2.php | 140 +++++++++++++++-------------- 4 files changed, 100 insertions(+), 83 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30a09a9..591ca8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +### v.0.5.12 2016-03-18 + +- Added a fix for recognising dates in XLS files created by LibreOffice + ### v.0.5.11 2015-04-30 - Added a special case for cells formatted as text in XLSX. Previously leading zeros would get truncated if a text cell contained only numbers. @@ -50,10 +54,10 @@ Currently only decimal number values are converted to PHP's floats. ### v.0.5.1 2013-06-27 -- Fixed file type choice when using mime-types (previously there were problems with +- Fixed file type choice when using mime-types (previously there were problems with XLSX and ODS mime-types) (Thanks to [incratec](https://github.com/incratec)) -- Fixed an error in XLSX iterator where `current()` would advance the iterator forward +- Fixed an error in XLSX iterator where `current()` would advance the iterator forward with each call. (Thanks to [osuwariboy](https://github.com/osuwariboy)) ### v.0.5.0 2013-06-17 @@ -62,19 +66,19 @@ with each call. (Thanks to [osuwariboy](https://github.com/osuwariboy)) - The `Sheets()` method lets you retrieve a list of all sheets present in the file. - `ChangeSheet($Index)` method changes the sheet in the reader to the one specified. -- Previously temporary files that were extracted, were deleted after the SpreadsheetReader -was destroyed but the empty directories remained. Now those are cleaned up as well. +- Previously temporary files that were extracted, were deleted after the SpreadsheetReader +was destroyed but the empty directories remained. Now those are cleaned up as well. ### v.0.4.3 2013-06-14 -- Bugfix for shared string caching in XLSX files. When the shared string count was larger -than the caching limit, instead of them being read from file, empty strings were returned. +- Bugfix for shared string caching in XLSX files. When the shared string count was larger +than the caching limit, instead of them being read from file, empty strings were returned. ### v.0.4.2 2013-06-02 -- XLS file reading relies on the external Spreadsheet_Excel_Reader class which, by default, -reads additional information about cells like fonts, styles, etc. Now that is disabled -to save some memory since the style data is unnecessary anyway. +- XLS file reading relies on the external Spreadsheet_Excel_Reader class which, by default, +reads additional information about cells like fonts, styles, etc. Now that is disabled +to save some memory since the style data is unnecessary anyway. (Thanks to [ChALkeR](https://github.com/ChALkeR) for the tip.) -Martins Pilsetnieks \ No newline at end of file +Martins Pilsetnieks diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index 4e7f171..7371e73 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -1111,6 +1111,10 @@ public function next() { $Value = $this -> GeneralFormat($Value); } + elseif ($Value) + { + $Value = $this -> GeneralFormat($Value); + } $this -> CurrentRow[$Index] = $Value; break; diff --git a/composer.json b/composer.json index 0e64a78..caf1a2f 100644 --- a/composer.json +++ b/composer.json @@ -1,10 +1,10 @@ { - "name": "nuovo/spreadsheet-reader", + "name": "karlis-i/spreadsheet-reader", "description": "Spreadsheet reader library for Excel, OpenOffice and structured text files", "keywords": ["spreadsheet", "xls", "xlsx", "ods", "csv", "excel", "openoffice"], - "homepage": "https://github.com/nuovo/spreadsheet-reader", - "version": "0.5.11", - "time": "2015-04-30", + "homepage": "https://github.com/karlis-i/spreadsheet-reader", + "version": "0.5.16", + "time": "2016-06-02", "type": "library", "license": ["MIT"], "authors": [ @@ -12,11 +12,12 @@ "name": "Martins Pilsetnieks", "email": "pilsetnieks@gmail.com", "homepage": "http://www.nuovo.lv/" + }, + { + "name": "karlis-i", + "email": "karlis.im@gmail.com" } ], - "support": { - "email": "spreadsheet-reader@nuovo.lv" - }, "require": { "php": ">= 5.3.0", "ext-zip": "*" diff --git a/php-excel-reader/excel_reader2.php b/php-excel-reader/excel_reader2.php index 6d38762..0486a2f 100644 --- a/php-excel-reader/excel_reader2.php +++ b/php-excel-reader/excel_reader2.php @@ -77,7 +77,7 @@ function GetInt4d($data, $pos) { function gmgetdate($ts = null){ $k = array('seconds','minutes','hours','mday','wday','mon','year','yday','weekday','month',0); return(array_comb($k,explode(":",gmdate('s:i:G:j:w:n:Y:z:l:F:U',is_null($ts)?time():$ts)))); - } + } // Added for PHP4 compatibility function array_comb($array1, $array2) { @@ -321,7 +321,7 @@ function myHex($d) { if ($d < 16) return "0" . dechex($d); return dechex($d); } - + function dumpHexData($data, $pos, $length) { $info = ""; for ($i = 0; $i <= $length; $i++) { @@ -394,7 +394,7 @@ function colcount($sheet=0) { } function colwidth($col,$sheet=0) { // Col width is actually the width of the number 0. So we have to estimate and come close - return $this->colInfo[$sheet][$col]['width']/9142*200; + return $this->colInfo[$sheet][$col]['width']/9142*200; } function colhidden($col,$sheet=0) { return !!$this->colInfo[$sheet][$col]['hidden']; @@ -405,7 +405,7 @@ function rowheight($row,$sheet=0) { function rowhidden($row,$sheet=0) { return !!$this->rowInfo[$sheet][$row]['hidden']; } - + // GET THE CSS FOR FORMATTING // ========================== function style($row,$col,$sheet=0,$properties='') { @@ -467,10 +467,10 @@ function style($row,$col,$sheet=0,$properties='') { if ($bRight!="" && $bRightCol!="") { $css .= "border-right-color:" . $bRightCol .";"; } if ($bTop!="" && $bTopCol!="") { $css .= "border-top-color:" . $bTopCol . ";"; } if ($bBottom!="" && $bBottomCol!="") { $css .= "border-bottom-color:" . $bBottomCol .";"; } - + return $css; } - + // FORMAT PROPERTIES // ================= function format($row,$col,$sheet=0) { @@ -482,7 +482,7 @@ function formatIndex($row,$col,$sheet=0) { function formatColor($row,$col,$sheet=0) { return $this->info($row,$col,'formatColor',$sheet); } - + // CELL (XF) PROPERTIES // ==================== function xfRecord($row,$col,$sheet=0) { @@ -581,7 +581,7 @@ function height($row,$col,$sheet=0) { function font($row,$col,$sheet=0) { return $this->fontProperty($row,$col,$sheet,'font'); } - + // DUMP AN HTML TABLE OF THE ENTIRE XLS DATA // ========================================= function dump($row_numbers=false,$col_letters=false,$sheet=0,$table_class='excel') { @@ -600,7 +600,7 @@ function dump($row_numbers=false,$col_letters=false,$sheet=0,$table_class='excel } $out .= "\n"; } - + $out .= "\n"; for($row=1;$row<=$this->rowcount($sheet);$row++) { $rowheight = $this->rowheight($row,$sheet); @@ -631,8 +631,8 @@ function dump($row_numbers=false,$col_letters=false,$sheet=0,$table_class='excel $out .= "\n\t\t 1?" colspan=$colspan":"") . ($rowspan > 1?" rowspan=$rowspan":"") . ">"; $val = $this->val($row,$col,$sheet); if ($val=='') { $val=" "; } - else { - $val = htmlentities($val); + else { + $val = htmlentities($val); $link = $this->hyperlink($row,$col,$sheet); if ($link!='') { $val = "$val"; @@ -647,7 +647,7 @@ function dump($row_numbers=false,$col_letters=false,$sheet=0,$table_class='excel $out .= ""; return $out; } - + // -------------- // END PUBLIC API @@ -658,7 +658,7 @@ function dump($row_numbers=false,$col_letters=false,$sheet=0,$table_class='excel var $xfRecords = array(); var $colInfo = array(); var $rowInfo = array(); - + var $sst = array(); var $sheets = array(); @@ -807,36 +807,36 @@ function dump($row_numbers=false,$col_letters=false,$sheet=0,$table_class='excel 0x0B => "Thin dash-dot-dotted", 0x0C => "Medium dash-dot-dotted", 0x0D => "Slanted medium dash-dotted" - ); + ); var $lineStylesCss = array( - "Thin" => "1px solid", - "Medium" => "2px solid", - "Dashed" => "1px dashed", - "Dotted" => "1px dotted", - "Thick" => "3px solid", - "Double" => "double", - "Hair" => "1px solid", - "Medium dashed" => "2px dashed", - "Thin dash-dotted" => "1px dashed", - "Medium dash-dotted" => "2px dashed", - "Thin dash-dot-dotted" => "1px dashed", - "Medium dash-dot-dotted" => "2px dashed", - "Slanted medium dash-dotte" => "2px dashed" + "Thin" => "1px solid", + "Medium" => "2px solid", + "Dashed" => "1px dashed", + "Dotted" => "1px dotted", + "Thick" => "3px solid", + "Double" => "double", + "Hair" => "1px solid", + "Medium dashed" => "2px dashed", + "Thin dash-dotted" => "1px dashed", + "Medium dash-dotted" => "2px dashed", + "Thin dash-dot-dotted" => "1px dashed", + "Medium dash-dot-dotted" => "2px dashed", + "Slanted medium dash-dotte" => "2px dashed" ); - + function read16bitstring($data, $start) { $len = 0; while (ord($data[$start + $len]) + ord($data[$start + $len + 1]) > 0) $len++; return substr($data, $start, $len); } - + // ADDED by Matt Kruse for better formatting function _format_value($format,$num,$f) { // 49==TEXT format // http://code.google.com/p/php-excel-reader/issues/detail?id=7 - if ( (!$f && $format=="%s") || ($f==49) || ($format=="GENERAL") ) { - return array('string'=>$num, 'formatColor'=>null); + if ( (!$f && $format=="%s") || ($f==49) || ($format=="GENERAL") || ($format == "General") ) { + return array('string'=>$num, 'formatColor'=>null); } // Custom pattern can be POSITIVE;NEGATIVE;ZERO @@ -860,13 +860,13 @@ function _format_value($format,$num,$f) { $color = strtolower($matches[1]); $pattern = preg_replace($color_regex,"",$pattern); } - + // In Excel formats, "_" is used to add spacing, which we can't do in HTML $pattern = preg_replace("/_./","",$pattern); - + // Some non-number characters are escaped with \, which we don't need $pattern = preg_replace("/\\\/","",$pattern); - + // Some non-number strings are quoted, so we'll get rid of the quotes $pattern = preg_replace("/\"/","",$pattern); @@ -901,6 +901,11 @@ function _format_value($format,$num,$f) { $pattern = preg_replace($number_regex, $formatted, $pattern); } + // prevent changing of big integers to '@' + if ($pattern === '@') { + $pattern = strval($num); + } + return array( 'string'=>$pattern, 'formatColor'=>$color @@ -915,7 +920,7 @@ function _format_value($format,$num,$f) { function __construct($file='',$store_extended_info=true,$outputEncoding='') { $this->_ole = new OLERead(); $this->setUTFEncoder('iconv'); - if ($outputEncoding != '') { + if ($outputEncoding != '') { $this->setOutputEncoding($outputEncoding); } for ($i=1; $i<245; $i++) { @@ -1163,7 +1168,7 @@ function _parse() { $font = substr($data, $pos+20, $numchars); } else { $font = substr($data, $pos+20, $numchars*2); - $font = $this->_encodeUTF16($font); + $font = $this->_encodeUTF16($font); } $this->fontRecords[] = array( 'height' => $height / 20, @@ -1216,14 +1221,14 @@ function _parse() { $xf['borderRight'] = $this->lineStyles[($border & 0xF0) >> 4]; $xf['borderTop'] = $this->lineStyles[($border & 0xF00) >> 8]; $xf['borderBottom'] = $this->lineStyles[($border & 0xF000) >> 12]; - + $xf['borderLeftColor'] = ($border & 0x7F0000) >> 16; $xf['borderRightColor'] = ($border & 0x3F800000) >> 23; $border = (ord($data[$pos+18]) | ord($data[$pos+19]) << 8); $xf['borderTopColor'] = ($border & 0x7F); $xf['borderBottomColor'] = ($border & 0x3F80) >> 7; - + if (array_key_exists($indexCode, $this->dateFormats)) { $xf['type'] = 'date'; $xf['format'] = $this->dateFormats[$indexCode]; @@ -1244,21 +1249,28 @@ function _parse() { if (preg_match("/[^hmsday\/\-:\s\\\,AMP]/i", $tmp) == 0) { // found day and time format $isdate = TRUE; $formatstr = $tmp; - $formatstr = str_replace(array('AM/PM','mmmm','mmm'), array('a','F','M'), $formatstr); - // m/mm are used for both minutes and months - oh SNAP! - // This mess tries to fix for that. - // 'm' == minutes only if following h/hh or preceding s/ss - $formatstr = preg_replace("/(h:?)mm?/","$1i", $formatstr); - $formatstr = preg_replace("/mm?(:?s)/","i$1", $formatstr); - // A single 'm' = n in PHP - $formatstr = preg_replace("/(^|[^m])m([^m]|$)/", '$1n$2', $formatstr); - $formatstr = preg_replace("/(^|[^m])m([^m]|$)/", '$1n$2', $formatstr); - // else it's months - $formatstr = str_replace('mm', 'm', $formatstr); - // Convert single 'd' to 'j' - $formatstr = preg_replace("/(^|[^d])d([^d]|$)/", '$1j$2', $formatstr); - $formatstr = str_replace(array('dddd','ddd','dd','yyyy','yy','hh','h'), array('l','D','d','Y','y','H','g'), $formatstr); - $formatstr = preg_replace("/ss?/", 's', $formatstr); + if ($formatstr === 'YYYY/MM/DD' || $formatstr === 'YYYY\-MM\-DD') { + // LibreOffice turns this pattern into invalid dates: + // 2015201520152015/OctOct/WedWed + // here we fix it + $formatstr = 'Y-m-d'; + } else { + $formatstr = str_replace(array('AM/PM','mmmm','mmm'), array('a','F','M'), $formatstr); + // m/mm are used for both minutes and months - oh SNAP! + // This mess tries to fix for that. + // 'm' == minutes only if following h/hh or preceding s/ss + $formatstr = preg_replace("/(h:?)mm?/","$1i", $formatstr); + $formatstr = preg_replace("/mm?(:?s)/","i$1", $formatstr); + // A single 'm' = n in PHP + $formatstr = preg_replace("/(^|[^m])m([^m]|$)/", '$1n$2', $formatstr); + $formatstr = preg_replace("/(^|[^m])m([^m]|$)/", '$1n$2', $formatstr); + // else it's months + $formatstr = str_replace('mm', 'm', $formatstr); + // Convert single 'd' to 'j' + $formatstr = preg_replace("/(^|[^d])d([^d]|$)/", '$1j$2', $formatstr); + $formatstr = str_replace(array('dddd','ddd','dd','yyyy','yy','hh','h'), array('l','D','d','Y','y','H','g'), $formatstr); + $formatstr = preg_replace("/ss?/", 's', $formatstr); + } } } } @@ -1553,24 +1565,24 @@ function _parsesheet($spos) { } $linkdata['desc'] = $udesc; $linkdata['link'] = $this->_encodeUTF16($ulink); - for ($r=$row; $r<=$row2; $r++) { + for ($r=$row; $r<=$row2; $r++) { for ($c=$column; $c<=$column2; $c++) { $this->sheets[$this->sn]['cellsInfo'][$r+1][$c+1]['hyperlink'] = $linkdata; } } break; case SPREADSHEET_EXCEL_READER_TYPE_DEFCOLWIDTH: - $this->defaultColWidth = ord($data[$spos+4]) | ord($data[$spos+5]) << 8; + $this->defaultColWidth = ord($data[$spos+4]) | ord($data[$spos+5]) << 8; break; case SPREADSHEET_EXCEL_READER_TYPE_STANDARDWIDTH: - $this->standardColWidth = ord($data[$spos+4]) | ord($data[$spos+5]) << 8; + $this->standardColWidth = ord($data[$spos+4]) | ord($data[$spos+5]) << 8; break; case SPREADSHEET_EXCEL_READER_TYPE_COLINFO: $colfrom = ord($data[$spos+0]) | ord($data[$spos+1]) << 8; $colto = ord($data[$spos+2]) | ord($data[$spos+3]) << 8; - $cw = ord($data[$spos+4]) | ord($data[$spos+5]) << 8; - $cxf = ord($data[$spos+6]) | ord($data[$spos+7]) << 8; - $co = ord($data[$spos+8]); + $cw = ord($data[$spos+4]) | ord($data[$spos+5]) << 8; + $cxf = ord($data[$spos+6]) | ord($data[$spos+7]) << 8; + $co = ord($data[$spos+8]); for ($coli = $colfrom; $coli <= $colto; $coli++) { $this->colInfo[$this->sn][$coli+1] = Array('width' => $cw, 'xf' => $cxf, 'hidden' => ($co & 0x01), 'collapsed' => ($co & 0x1000) >> 12); } @@ -1714,12 +1726,8 @@ function _GetIEEE754($rknum) { function _encodeUTF16($string) { $result = $string; if ($this->_defaultEncoding){ - switch ($this->_encoderFunction){ - case 'iconv' : $result = iconv('UTF-16LE', $this->_defaultEncoding, $string); - break; - case 'mb_convert_encoding' : $result = mb_convert_encoding($string, $this->_defaultEncoding, 'UTF-16LE' ); - break; - } + // iconv changed to mb_convert_encoding + $result = mb_convert_encoding($string, $this->_defaultEncoding, 'UTF-16LE' ); } return $result; } @@ -1734,4 +1742,4 @@ function _GetInt4d($data, $pos) { } -?> \ No newline at end of file +?> From 0b7479a7f8881f344a58b432cf3d72e30e75eb4c Mon Sep 17 00:00:00 2001 From: fujaru Date: Fri, 9 Mar 2018 20:24:46 +0700 Subject: [PATCH 29/38] Enabled FormatValue (number, date, etc) for xlsx --- SpreadsheetReader_XLSX.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index 7371e73..35edda9 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -1105,7 +1105,7 @@ public function next() // Format value if necessary if ($Value !== '' && $StyleId && isset($this -> Styles[$StyleId])) { -// $Value = $this -> FormatValue($Value, $StyleId); + $Value = $this -> FormatValue($Value, $StyleId); } elseif ($Value) { From aa687f1c82b1d386ac6f63f344ed0cf4fc40bc52 Mon Sep 17 00:00:00 2001 From: Tonis Ormisson Date: Thu, 9 Jan 2020 16:36:15 +0200 Subject: [PATCH 30/38] fix composer changes --- composer.json | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/composer.json b/composer.json index caf1a2f..a0e38e2 100644 --- a/composer.json +++ b/composer.json @@ -1,10 +1,8 @@ { - "name": "karlis-i/spreadsheet-reader", + "name": "nuovo/spreadsheet-reader", "description": "Spreadsheet reader library for Excel, OpenOffice and structured text files", "keywords": ["spreadsheet", "xls", "xlsx", "ods", "csv", "excel", "openoffice"], - "homepage": "https://github.com/karlis-i/spreadsheet-reader", - "version": "0.5.16", - "time": "2016-06-02", + "homepage": "https://github.com/nuovo/spreadsheet-reader", "type": "library", "license": ["MIT"], "authors": [ @@ -12,12 +10,11 @@ "name": "Martins Pilsetnieks", "email": "pilsetnieks@gmail.com", "homepage": "http://www.nuovo.lv/" - }, - { - "name": "karlis-i", - "email": "karlis.im@gmail.com" } ], + "support": { + "email": "spreadsheet-reader@nuovo.lv" + }, "require": { "php": ">= 5.3.0", "ext-zip": "*" From ec3e81d5612c903272c22aa0c89df5ce2c9db3a2 Mon Sep 17 00:00:00 2001 From: Bo Date: Thu, 16 Apr 2020 14:44:14 +0800 Subject: [PATCH 31/38] Fix use of `continue` statement under `switch case` --- SpreadsheetReader_XLSX.php | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index c5e9f2b..098a9e5 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -465,7 +465,7 @@ private function PrepareSharedStringCache() case 't': if ($this -> SharedStrings -> nodeType == XMLReader::END_ELEMENT) { - continue; + break; } $CacheValue .= $this -> SharedStrings -> readString(); break; @@ -568,7 +568,7 @@ private function GetSharedString($Index) $this -> SharedStrings -> next('si'); $this -> SSForwarded = true; $this -> SharedStringIndex++; - continue; + break; } else { @@ -590,7 +590,7 @@ private function GetSharedString($Index) case 't': if ($this -> SharedStrings -> nodeType == XMLReader::END_ELEMENT) { - continue; + break; } $Value .= $this -> SharedStrings -> readString(); break; @@ -1058,7 +1058,7 @@ public function next() // If it is a closing tag, skip it if ($this -> Worksheet -> nodeType == XMLReader::END_ELEMENT) { - continue; + break; } $StyleId = (int)$this -> Worksheet -> getAttribute('s'); @@ -1092,7 +1092,7 @@ public function next() case 'is': if ($this -> Worksheet -> nodeType == XMLReader::END_ELEMENT) { - continue; + break; } $Value = $this -> Worksheet -> readString(); From 54753a41ad6ab04c05456582f74a120b8cfb8b68 Mon Sep 17 00:00:00 2001 From: jibo Date: Thu, 23 Jul 2020 11:16:44 +0800 Subject: [PATCH 32/38] Fix currency format --- SpreadsheetReader_XLSX.php | 2 +- php-excel-reader/excel_reader2.php | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index 098a9e5..065e391 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -922,7 +922,7 @@ private function FormatValue($Value, $Index) // Currency/Accounting if ($Format['Currency']) { - $Value = preg_replace('', $Format['Currency'], $Value); + $Value = preg_replace('/\[.+\]/', $Format['Currency'], $Value); } } diff --git a/php-excel-reader/excel_reader2.php b/php-excel-reader/excel_reader2.php index 0d53393..991e163 100644 --- a/php-excel-reader/excel_reader2.php +++ b/php-excel-reader/excel_reader2.php @@ -906,6 +906,10 @@ function _format_value($format,$num,$f) { $pattern = strval($num); } + if (preg_match('/\$(?.*)-/', $pattern, $matches)) { + $pattern = preg_replace('/\[.+\]/', $matches['currency'], $pattern); + } + return array( 'string'=>$pattern, 'formatColor'=>$color From d0b32946a9b2954d7d2b51b41e53b6c483c154cb Mon Sep 17 00:00:00 2001 From: jibo Date: Tue, 1 Dec 2020 15:58:07 +0800 Subject: [PATCH 33/38] Don't format value as float if starts with 0 --- SpreadsheetReader_XLSX.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index 065e391..dcab552 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -941,7 +941,7 @@ private function FormatValue($Value, $Index) public function GeneralFormat($Value) { // Numeric format - if (is_numeric($Value)) + if (is_numeric($Value) && $Value[0] != 0) { $Value = (float)$Value; } From c30ba56ba02c62efb1a56a8bcd9dc535d59a7b5f Mon Sep 17 00:00:00 2001 From: Tonis Ormisson Date: Tue, 27 Apr 2021 08:41:05 +0300 Subject: [PATCH 34/38] ignore .idea --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 9b670cb..bac498d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .DS_Store test -materials \ No newline at end of file +materials +.idea From a6420d49dc0e42714b2cfc92e27763b242d4df85 Mon Sep 17 00:00:00 2001 From: Pavel Kuznetsov Date: Mon, 4 Jul 2022 17:49:49 +0300 Subject: [PATCH 35/38] Get options before creating handle --- SpreadsheetReader.php | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/SpreadsheetReader.php b/SpreadsheetReader.php index b019f8f..cbc288d 100644 --- a/SpreadsheetReader.php +++ b/SpreadsheetReader.php @@ -13,7 +13,7 @@ class SpreadsheetReader implements SeekableIterator, Countable const TYPE_ODS = 'ODS'; private $Options = array( - 'Delimiter' => '', + 'Delimiter' => ';', 'Enclosure' => '"' ); @@ -37,7 +37,7 @@ class SpreadsheetReader implements SeekableIterator, Countable * @param string Original filename (in case of an uploaded file), used to determine file type, optional * @param string MIME type from an upload, used to determine file type, optional */ - public function __construct($Filepath, $OriginalFilename = false, $MimeType = false) + public function __construct($Filepath, $OriginalFilename = false, $MimeType = false, $Options = array()) { if (!is_readable($Filepath)) { @@ -158,12 +158,15 @@ public function __construct($Filepath, $OriginalFilename = false, $MimeType = fa } } + // Get options before creating handle + $this -> Options = array_merge($this -> Options, $Options); + // 2. Create handle switch ($this -> Type) { case self::TYPE_XLSX: self::Load(self::TYPE_XLSX); - $this -> Handle = new SpreadsheetReader_XLSX($Filepath); + $this -> Handle = new SpreadsheetReader_XLSX($Filepath, $this -> Options); break; case self::TYPE_CSV: self::Load(self::TYPE_CSV); From e4d56f07e3d55368fe6ce09f8177ffb2bbb79908 Mon Sep 17 00:00:00 2001 From: Pavel Kuznetsov Date: Mon, 4 Jul 2022 18:07:41 +0300 Subject: [PATCH 36/38] Example for options to readme --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 6b1a451..1edac3c 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,15 @@ Example: If a sheet is changed to the same that is currently open, the position in the file still reverts to the beginning, so as to conform to the same behavior as when changed to a different sheet. +Passing options: + + "/tmp" + ); + $Reader = new SpreadsheetReader('example.xlsx', false, false, $Options); + ?> + ### Testing From the command line: From bdfc94636c6fb96a68b501442a967aa3b5d0d50f Mon Sep 17 00:00:00 2001 From: Pavel Kuznetsov Date: Fri, 22 Jul 2022 15:06:40 +0300 Subject: [PATCH 37/38] XLSX: format value without Decimals --- SpreadsheetReader_XLSX.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SpreadsheetReader_XLSX.php b/SpreadsheetReader_XLSX.php index dcab552..230349c 100644 --- a/SpreadsheetReader_XLSX.php +++ b/SpreadsheetReader_XLSX.php @@ -903,7 +903,7 @@ private function FormatValue($Value, $Index) // Scaling $Value = $Value / $Format['Scale']; - if (!empty($Format['MinWidth']) && $Format['Decimals']) + if (!empty($Format['MinWidth'])) { if ($Format['Thousands']) { From 3be2499cb7c734bde13118a58a9b146a3f50f139 Mon Sep 17 00:00:00 2001 From: Pavel Kuznetsov Date: Wed, 17 Aug 2022 16:35:49 +0300 Subject: [PATCH 38/38] Apply encoding from Options for CSV files --- SpreadsheetReader_CSV.php | 66 +++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/SpreadsheetReader_CSV.php b/SpreadsheetReader_CSV.php index 1cae82b..77e8986 100644 --- a/SpreadsheetReader_CSV.php +++ b/SpreadsheetReader_CSV.php @@ -11,7 +11,8 @@ class SpreadsheetReader_CSV implements Iterator, Countable */ private $Options = array( 'Delimiter' => ';', - 'Enclosure' => '"' + 'Enclosure' => '"', + 'Encoding' => 'auto' ); private $Encoding = 'UTF-8'; @@ -49,6 +50,43 @@ public function __construct($Filepath, array $Options = null) $this -> Options = array_merge($this -> Options, $Options); $this -> Handle = fopen($Filepath, 'r'); + if( $this -> Options['Encoding'] == "auto" ) + { + $this -> AutoDetectEncoding(); + } + else{ + $this -> Encoding = $this -> Options['Encoding']; + } + + // Checking for the delimiter if it should be determined automatically + if (!$this -> Options['Delimiter']) + { + // fgetcsv needs single-byte separators + $Semicolon = ';'; + $Tab = "\t"; + $Comma = ','; + + // Reading the first row and checking if a specific separator character + // has more columns than others (it means that most likely that is the delimiter). + $SemicolonCount = count(fgetcsv($this -> Handle, null, $Semicolon)); + fseek($this -> Handle, $this -> BOMLength); + $TabCount = count(fgetcsv($this -> Handle, null, $Tab)); + fseek($this -> Handle, $this -> BOMLength); + $CommaCount = count(fgetcsv($this -> Handle, null, $Comma)); + fseek($this -> Handle, $this -> BOMLength); + + $Delimiter = $Semicolon; + if ($TabCount > $SemicolonCount || $CommaCount > $SemicolonCount) + { + $Delimiter = $CommaCount > $TabCount ? $Comma : $Tab; + } + + $this -> Options['Delimiter'] = $Delimiter; + } + } + + private function AutoDetectEncoding() + { // Checking the file for byte-order mark to determine encoding $BOM16 = bin2hex(fread($this -> Handle, 2)); if ($BOM16 == 'fffe') @@ -95,32 +133,6 @@ public function __construct($Filepath, array $Options = null) { fseek($this -> Handle, $this -> BOMLength); } - - // Checking for the delimiter if it should be determined automatically - if (!$this -> Options['Delimiter']) - { - // fgetcsv needs single-byte separators - $Semicolon = ';'; - $Tab = "\t"; - $Comma = ','; - - // Reading the first row and checking if a specific separator character - // has more columns than others (it means that most likely that is the delimiter). - $SemicolonCount = count(fgetcsv($this -> Handle, null, $Semicolon)); - fseek($this -> Handle, $this -> BOMLength); - $TabCount = count(fgetcsv($this -> Handle, null, $Tab)); - fseek($this -> Handle, $this -> BOMLength); - $CommaCount = count(fgetcsv($this -> Handle, null, $Comma)); - fseek($this -> Handle, $this -> BOMLength); - - $Delimiter = $Semicolon; - if ($TabCount > $SemicolonCount || $CommaCount > $SemicolonCount) - { - $Delimiter = $CommaCount > $TabCount ? $Comma : $Tab; - } - - $this -> Options['Delimiter'] = $Delimiter; - } } /**