Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Spout implementation #1

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,17 @@
"docs": "http://docs.portphp.org"
},
"require": {
"php": ">=5.4.0"
"php": ">=5.5.0",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should require at least php 5.6, because portphp/portphp requires at least php 5.6.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

"portphp/portphp": "^1.0",
"box/spout": "^2.7"
},
"autoload": {
"psr-4": {
"Port\\Spout\\": "src/"
}
},
"require-dev": {
"phpunit/phpunit": "^4.0",
"phpspec/phpspec": "^2.1"
"phpunit/phpunit": "^4.0"
},
"autoload-dev": {
"psr-4": {
Expand Down
245 changes: 245 additions & 0 deletions src/SpoutReader.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
<?php

namespace Port\Spout;

use Box\Spout\Common\Type;
use Box\Spout\Reader\ReaderFactory;
use OutOfBoundsException;
use Port\Exception\ReaderException;
use Port\Reader\CountableReader;
use SeekableIterator;


/**
* Reads Excel files with the help of Spout
*
* Spout must be installed.
*
* @link http://opensource.box.com/spout/
* @link https://github.com/box/spout
*/
class SpoutReader implements CountableReader, SeekableIterator
{
/**
* @var \Box\Spout\Reader\XLSX\Sheet
*/
protected $sheet;

/**
* @var integer
*/
protected $headerRowNumber;

/**
* @var array
*/
protected $columnHeaders;

/**
* Total number of rows
*
* @var integer
*/
protected $count;

/**
* @param \SplFileObject $file Excel file
* @param int $headerRowNumber Optional number of header row
* @param int $activeSheet Index of active sheet to read from
* @param bool $shouldPreserveEmptyRows Sets whether empty rows should be returned or skipped
*
* @throws ReaderException
*/
public function __construct(\SplFileObject $file, $headerRowNumber = null, $activeSheet = null, $shouldPreserveEmptyRows = true)
{
$reader = $this->createReaderForFile($file, $shouldPreserveEmptyRows);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather inject the ReaderInterface directly.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed, the user must call the ReaderInterface::open method before use the SpoutReader

Copy link
Author

@aaa2000 aaa2000 Mar 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this fix, the SpoutReader is compatible with CSV, XLSX and ODS.

But the method count of the ODS format throw an exception because the RowIterator can't be rewind more than once. See https://github.com/box/spout/blob/3681a3421a868ab9a65da156c554f756541f452b/src/Spout/Reader/ODS/RowIterator.php#L101

Maybe, I will not implement the CountableReader interface


$activeSheet = null === $activeSheet ? 0 : (int) $activeSheet;
foreach ($reader->getSheetIterator() as $sheet) {
if ($sheet->getIndex() === $activeSheet) {
break;
}
}

if (!$reader->getSheetIterator()->valid()) {
throw new ReaderException(sprintf('Sheet at index %d is not found', $activeSheet));
}
$this->sheet = $reader->getSheetIterator()->current();

if (null !== $headerRowNumber) {
$this->setHeaderRowNumber($headerRowNumber);
}
}

/**
* @param \SplFileObject $file
* @param $shouldPreserveEmptyRows
*
* @return \Box\Spout\Reader\XLSX\Reader
*/
private function createReaderForFile(\SplFileObject $file, $shouldPreserveEmptyRows)
{
/** @var \Box\Spout\Reader\XLSX\Reader $reader */
$reader = ReaderFactory::create(Type::XLSX);
$reader->setShouldPreserveEmptyRows($shouldPreserveEmptyRows);
$reader->open($file->getPathname());

return $reader;
}

/**
* Return the current row as an array
*
* If a header row has been set, an associative array will be returned
*
* @return array
*/
public function current()
{
$row = $this->sheet->getRowIterator()->current();

// If the CSV has column headers, use them to construct an associative
// array for the columns in this line
if (!empty($this->columnHeaders)) {
// Count the number of elements in both: they must be equal.
// If not, ignore the row
if (count($this->columnHeaders) === count($row)) {
return array_combine(array_values($this->columnHeaders), $row);
}
} else {
// Else just return the column values
return $row;
}
}

/**
* Get column headers
*
* @return array
*/
public function getColumnHeaders()
{
return $this->columnHeaders;
}

/**
* Set column headers
*
* @param array $columnHeaders
*/
public function setColumnHeaders(array $columnHeaders)
{
$this->columnHeaders = $columnHeaders;
}

/**
* Rewind the file pointer
*
* If a header row has been set, the pointer is set just below the header
* row. That way, when you iterate over the rows, that header row is
* skipped.
*/
public function rewind()
{
$this->sheet->getRowIterator()->rewind();
if (null !== $this->headerRowNumber) {
$this->seekIndex($this->headerRowNumber + 2);
}
}

/**
* Set header row number
*
* @param integer $rowNumber Number of the row that contains column header names
*/
public function setHeaderRowNumber($rowNumber)
{
$rowNumber = (int) $rowNumber;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather use a static type hint for this method.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to require PHP7

$this->seekIndex($rowNumber + 1);
$this->columnHeaders = $this->sheet->getRowIterator()->current();
$this->headerRowNumber = $rowNumber;
$this->sheet->getRowIterator()->next();
}

/**
* {@inheritdoc}
*/
public function next()
{
$this->sheet->getRowIterator()->next();
}

/**
* {@inheritdoc}
*/
public function valid()
{
return $this->sheet->getRowIterator()->valid();
}

/**
* {@inheritdoc}
*/
public function key()
{
return $this->sheet->getRowIterator()->key();
}

/**
* {@inheritdoc}
*/
public function seek($position)
{
$positionIndex = $position + 1;
if (null !== $this->headerRowNumber) {
$positionIndex += $this->headerRowNumber + 1;
}

$this->seekIndex($positionIndex);
}

/**
* Seeks to a row index (one-based)
*
* @param $index
*/
private function seekIndex($index)
{
$rowIterator = $this->sheet->getRowIterator();
foreach ($rowIterator as $rowIndex => $row) {
if ($rowIndex === $index) {
return;
}
}

throw new OutOfBoundsException(sprintf('Row number %d is out of range', $index));
}

/**
* {@inheritdoc}
*/
public function count()
{
$count = iterator_count($this->sheet->getRowIterator());

if (null === $this->headerRowNumber) {
return $count;
}

return $count - ($this->headerRowNumber + 1);
}

/**
* Get a row
*
* @param integer $number
*
* @return array
*/
public function getRow($number)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the use case for this method?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

{
$this->seek($number);

return $this->current();
}
}
46 changes: 46 additions & 0 deletions src/SpoutReaderFactory.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?php

namespace Port\Spout;

use Port\Reader\ReaderFactory;

/**
* Factory that creates Spout Readers
*/
class SpoutReaderFactory implements ReaderFactory
{
/**
* @var integer
*/
protected $headerRowNumber;

/**
* @var integer
*/
protected $activeSheet;
/**
* @var bool
*/
private $shouldPreserveEmptyRows;

/**
* @param integer $headerRowNumber
* @param integer $activeSheet
*/
public function __construct($headerRowNumber = null, $activeSheet = null, $shouldPreserveEmptyRows = true)
{
$this->headerRowNumber = $headerRowNumber;
$this->activeSheet = $activeSheet;
$this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
}

/**
* @param \SplFileObject $file
*
* @return \Port\Spout\SpoutReader
*/
public function getReader(\SplFileObject $file)
{
return new SpoutReader($file, $this->headerRowNumber, $this->activeSheet, $this->shouldPreserveEmptyRows);
}
}
Loading