From 5ffc3fc0bf175fdb99580a94a6c7af26c509edd6 Mon Sep 17 00:00:00 2001 From: David Maus <maus@hab.de> Date: Mon, 14 Jan 2013 14:22:16 +0100 Subject: [PATCH] Implement experimental Reader for normalized Pica+ --- src/HAB/Pica/Reader/PicaNormReader.php | 244 ++++++++++++++++++ .../HAB/Pica/Reader/PicaNormReaderTest.php | 42 +++ 2 files changed, 286 insertions(+) create mode 100644 src/HAB/Pica/Reader/PicaNormReader.php create mode 100644 tests/src/HAB/Pica/Reader/PicaNormReaderTest.php diff --git a/src/HAB/Pica/Reader/PicaNormReader.php b/src/HAB/Pica/Reader/PicaNormReader.php new file mode 100644 index 0000000..9fdffd3 --- /dev/null +++ b/src/HAB/Pica/Reader/PicaNormReader.php @@ -0,0 +1,244 @@ +<?php + +/** + * Reader for normalized Pica+ records. + * + * @see http://www.gbv.de/wikis/cls/PICA%2B#Normalisiertes_PICA.2B + * + * This file is part of PicaReader. + * + * PicaReader is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PicaReader is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PicaReader. If not, see <http://www.gnu.org/licenses/>. + * + * @author David Maus <maus@hab.de> + * @copyright Copyright (c) 2013 by Herzog August Bibliothek Wolfenbüttel + * @license http://www.gnu.org/licenses/gpl.txt GNU General Public License v3 + */ + +namespace HAB\Pica\Reader; + +use InvalidArgumentException; + +class PicaNormReader extends Reader +{ + /** + * Separators. + * + * @var string + */ + const RECORD_SEPARATOR = "\x1d"; + const FIELD_SEPARATOR = "\x1e"; + const SUBFIELD_SEPARATOR = "\x1f"; + + /** + * Input stream. + * + * @var resource + */ + private $stream; + + /** + * Read-buffer. + * + * @var string + */ + private $buffer; + + /** + * Read-buffer size. + * + * @var integer + */ + private $bufferSize; + + /** + * Position in read-buffer. + * + * @var integer + */ + private $bufferPosition; + + /** + * Regular expression to split a field. + * + * @var string + */ + private $fieldRegexp = "|^([012][0-9]{2}[A-Z@])(/([0-9]{2}))? \x1f(.+)$|uD"; + + /** + * Constructor. + * + * @return void + */ + public function __construct () + {} + + /** + * Open the reader with input stream. + * + * @throws InvalidArgumentException Invalid stream type + * @throws InvalidArgumentException Argument neither string nor stream + * + * @param resource|string $stream + * @return void + */ + public function open ($stream) + { + if (is_string($stream)) { + $stream = fopen('data://text/plain;base64,' . base64_encode($stream), 'rb'); + } + if (!is_resource($stream)) { + throw new InvalidArgumentException(sprintf('Invalid type of argument: resource|string, %s', gettype($stream))); + } + $meta = stream_get_meta_data($stream); + if ($meta['stream_type'] !== 'STDIO' && $meta['stream_type'] !== 'RFC2397') { + throw new InvalidArgumentException(sprintf('Invalid stream type: STDIO|RFC297, %s', $meta['stream_type'])); + } + $this->buffer = null; + $this->stream = $stream; + $this->bufferSize = 0; + $this->bufferPosition = 0; + } + + /** + * Close reader. + * + * @return void + */ + public function close () + { + if ($this->stream) { + fclose($this->stream); + } + } + + /** + * Return next record from input stream. + * + * @return array + */ + protected function next () + { + if ($this->feof()) { + return false; + } + + $record = array(); + while (!$this->feof() && $this->peek() !== self::RECORD_SEPARATOR) { + $field = $this->field(); + if ($field) { + $record['fields'] []= $field; + } + } + return empty($record) ? false : $record; + } + + /// + + /** + * Return Pica+ field. + * + * @return array|null + */ + private function field () + { + if ($this->feof()) { + return false; + } + + $line = ''; + while (!$this->feof() && $this->peek() !== self::FIELD_SEPARATOR) { + $octet = $this->getc(); + if ($octet !== null) { + $line .= $octet; + } + } + if (!$this->feof()) { + // Swallow field separator + $this->getc(); + } + + $matches = array(); + if (!preg_match($this->fieldRegexp, $line, $matches)) { + throw new RuntimeException(sprintf('Unexpected data in input stream: %s', $line)); + } + $subfields = array_map(array($this, 'splitSubfield'), explode(self::SUBFIELD_SEPARATOR, $matches[4])); + $field = array( + 'tag' => $matches[1], + 'occurrence' => $matches[3] ?: null, + 'subfields' => $subfields + ); + return $field; + } + + /** + * Split subfields into array structures. + * + * @param string $subfield + * @return array + */ + private function splitSubfield ($subfield) + { + return array('code' => $subfield[0], 'value' => substr($subfield, 1)); + } + + /** + * Return next octet without moving pointer. + * + * @return string|null + */ + private function peek () + { + return $this->getc(true); + } + + /** + * Return next octet. + * + * If argument is true, the internal pointer is not moved after reading + * the octet. + * + * @param boolean $peek + * @return string|null + */ + private function getc ($peek = false) + { + if ($this->feof()) { + return null; + } + if ($this->bufferPosition == $this->bufferSize) { + $buffer = fread($this->stream, 4096); + if ($buffer === false) { + throw new RuntimeException('Error reading input stream'); + } + $this->bufferPosition = 0; + $this->bufferSize = strlen($buffer); + $this->buffer = $buffer; + } + $octet = $this->buffer[$this->bufferPosition]; + if (!$peek) { + $this->bufferPosition++; + } + return $octet; + } + + /** + * Return true if input stream and read-buffer exhausted. + * + * @return boolean + */ + private function feof () + { + return (feof($this->stream) && ($this->bufferPosition == $this->bufferSize)); + } +} \ No newline at end of file diff --git a/tests/src/HAB/Pica/Reader/PicaNormReaderTest.php b/tests/src/HAB/Pica/Reader/PicaNormReaderTest.php new file mode 100644 index 0000000..1fb13f3 --- /dev/null +++ b/tests/src/HAB/Pica/Reader/PicaNormReaderTest.php @@ -0,0 +1,42 @@ +<?php + +/** + * Unit test for the PicaNormReader class. + * + * This file is part of PicaReader. + * + * PicaReader is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PicaReader is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PicaReader. If not, see <http://www.gnu.org/licenses/>. + * + * @package PicaReader + * @author David Maus <maus@hab.de> + * @copyright Copyright (c) 2012, 2013 by Herzog August Bibliothek Wolfenbüttel + * @license http://www.gnu.org/licenses/gpl.html GNU General Public License v3 + */ + +namespace HAB\Pica\Reader; + +use PHPUnit_FrameWork_TestCase; + +class PicaNormReaderTest extends PHPUnit_FrameWork_TestCase +{ + public function testReadStringData () + { + $data = "003@ \x1f0test\x1e002@ \x1f0Aau"; + $reader = new PicaNormReader(); + $reader->open($data); + $record = $reader->read(); + $this->assertInstanceOf('HAB\Pica\Record\TitleRecord', $record); + $reader->close(); + } +} -- GitLab