From 5ffc3fc0bf175fdb99580a94a6c7af26c509edd6 Mon Sep 17 00:00:00 2001
From: David Maus <maus@hab.de>
Date: Mon, 14 Jan 2013 14:22:16 +0100
Subject: [PATCH] Implement experimental Reader for normalized Pica+

---
 src/HAB/Pica/Reader/PicaNormReader.php        | 244 ++++++++++++++++++
 .../HAB/Pica/Reader/PicaNormReaderTest.php    |  42 +++
 2 files changed, 286 insertions(+)
 create mode 100644 src/HAB/Pica/Reader/PicaNormReader.php
 create mode 100644 tests/src/HAB/Pica/Reader/PicaNormReaderTest.php

diff --git a/src/HAB/Pica/Reader/PicaNormReader.php b/src/HAB/Pica/Reader/PicaNormReader.php
new file mode 100644
index 0000000..9fdffd3
--- /dev/null
+++ b/src/HAB/Pica/Reader/PicaNormReader.php
@@ -0,0 +1,244 @@
+<?php
+
+/**
+ * Reader for normalized Pica+ records.
+ *
+ * @see http://www.gbv.de/wikis/cls/PICA%2B#Normalisiertes_PICA.2B
+ *
+ * This file is part of PicaReader.
+ *
+ * PicaReader is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PicaReader is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PicaReader.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * @author    David Maus <maus@hab.de>
+ * @copyright Copyright (c) 2013 by Herzog August Bibliothek Wolfenbüttel
+ * @license   http://www.gnu.org/licenses/gpl.txt GNU General Public License v3
+ */
+
+namespace HAB\Pica\Reader;
+
+use InvalidArgumentException;
+
+class PicaNormReader extends Reader
+{
+    /**
+     * Separators.
+     *
+     * @var string
+     */
+    const RECORD_SEPARATOR = "\x1d";
+    const FIELD_SEPARATOR  = "\x1e";
+    const SUBFIELD_SEPARATOR = "\x1f";
+
+    /**
+     * Input stream.
+     *
+     * @var resource
+     */
+    private $stream;
+
+    /**
+     * Read-buffer.
+     *
+     * @var string
+     */
+    private $buffer;
+
+    /**
+     * Read-buffer size.
+     *
+     * @var integer
+     */
+    private $bufferSize;
+
+    /**
+     * Position in read-buffer.
+     *
+     * @var integer
+     */
+    private $bufferPosition;
+
+    /**
+     * Regular expression to split a field.
+     *
+     * @var string
+     */
+    private $fieldRegexp = "|^([012][0-9]{2}[A-Z@])(/([0-9]{2}))? \x1f(.+)$|uD";
+
+    /**
+     * Constructor.
+     *
+     * @return void
+     */
+    public function __construct ()
+    {}
+
+    /**
+     * Open the reader with input stream.
+     *
+     * @throws InvalidArgumentException Invalid stream type
+     * @throws InvalidArgumentException Argument neither string nor stream
+     *
+     * @param  resource|string $stream
+     * @return void
+     */
+    public function open ($stream)
+    {
+        if (is_string($stream)) {
+            $stream = fopen('data://text/plain;base64,' . base64_encode($stream), 'rb');
+        }
+        if (!is_resource($stream)) {
+            throw new InvalidArgumentException(sprintf('Invalid type of argument: resource|string, %s', gettype($stream)));
+        }
+        $meta = stream_get_meta_data($stream);
+        if ($meta['stream_type'] !== 'STDIO' && $meta['stream_type'] !== 'RFC2397') {
+            throw new InvalidArgumentException(sprintf('Invalid stream type: STDIO|RFC297, %s', $meta['stream_type']));
+        }
+        $this->buffer         = null;
+        $this->stream         = $stream;
+        $this->bufferSize     = 0;
+        $this->bufferPosition = 0;
+    }
+
+    /**
+     * Close reader.
+     *
+     * @return void
+     */
+    public function close ()
+    {
+        if ($this->stream) {
+            fclose($this->stream);
+        }
+    }
+
+    /**
+     * Return next record from input stream.
+     *
+     * @return array
+     */
+    protected function next ()
+    {
+        if ($this->feof()) {
+            return false;
+        }
+
+        $record = array();
+        while (!$this->feof() && $this->peek() !== self::RECORD_SEPARATOR) {
+            $field = $this->field();
+            if ($field) {
+                $record['fields'] []= $field;
+            }
+        }
+        return empty($record) ? false : $record;
+    }
+
+    ///
+
+    /**
+     * Return Pica+ field.
+     *
+     * @return array|null
+     */
+    private function field ()
+    {
+        if ($this->feof()) {
+            return false;
+        }
+
+        $line  = '';
+        while (!$this->feof() && $this->peek() !== self::FIELD_SEPARATOR) {
+            $octet = $this->getc();
+            if ($octet !== null) {
+                $line .= $octet;
+            }
+        }
+        if (!$this->feof()) {
+            // Swallow field separator
+            $this->getc();
+        }
+
+        $matches = array();
+        if (!preg_match($this->fieldRegexp, $line, $matches)) {
+            throw new RuntimeException(sprintf('Unexpected data in input stream: %s', $line));
+        }
+        $subfields = array_map(array($this, 'splitSubfield'), explode(self::SUBFIELD_SEPARATOR, $matches[4]));
+        $field = array(
+            'tag' => $matches[1],
+            'occurrence' => $matches[3] ?: null,
+            'subfields' => $subfields
+        );
+        return $field;
+    }
+
+    /**
+     * Split subfields into array structures.
+     *
+     * @param  string $subfield
+     * @return array
+     */
+    private function splitSubfield ($subfield)
+    {
+        return array('code' => $subfield[0], 'value' => substr($subfield, 1));
+    }
+
+    /**
+     * Return next octet without moving pointer.
+     *
+     * @return string|null
+     */
+    private function peek ()
+    {
+        return $this->getc(true);
+    }
+
+    /**
+     * Return next octet.
+     *
+     * If argument is true, the internal pointer is not moved after reading
+     * the octet.
+     *
+     * @param  boolean $peek
+     * @return string|null
+     */
+    private function getc ($peek = false)
+    {
+        if ($this->feof()) {
+            return null;
+        }
+        if ($this->bufferPosition == $this->bufferSize) {
+            $buffer = fread($this->stream, 4096);
+            if ($buffer === false) {
+                throw new RuntimeException('Error reading input stream');
+            }
+            $this->bufferPosition = 0;
+            $this->bufferSize = strlen($buffer);
+            $this->buffer = $buffer;
+        }
+        $octet = $this->buffer[$this->bufferPosition];
+        if (!$peek) {
+            $this->bufferPosition++;
+        }
+        return $octet;
+    }
+
+    /**
+     * Return true if input stream and read-buffer exhausted.
+     *
+     * @return boolean
+     */
+    private function feof ()
+    {
+        return (feof($this->stream) && ($this->bufferPosition == $this->bufferSize));
+    }
+}
\ No newline at end of file
diff --git a/tests/src/HAB/Pica/Reader/PicaNormReaderTest.php b/tests/src/HAB/Pica/Reader/PicaNormReaderTest.php
new file mode 100644
index 0000000..1fb13f3
--- /dev/null
+++ b/tests/src/HAB/Pica/Reader/PicaNormReaderTest.php
@@ -0,0 +1,42 @@
+<?php
+
+/**
+ * Unit test for the PicaNormReader class.
+ *
+ * This file is part of PicaReader.
+ *
+ * PicaReader is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PicaReader is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PicaReader.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * @package   PicaReader
+ * @author    David Maus <maus@hab.de>
+ * @copyright Copyright (c) 2012, 2013 by Herzog August Bibliothek Wolfenbüttel
+ * @license   http://www.gnu.org/licenses/gpl.html GNU General Public License v3
+ */
+
+namespace HAB\Pica\Reader;
+
+use PHPUnit_FrameWork_TestCase;
+
+class PicaNormReaderTest extends PHPUnit_FrameWork_TestCase
+{
+    public function testReadStringData ()
+    {
+        $data   = "003@ \x1f0test\x1e002@ \x1f0Aau";
+        $reader = new PicaNormReader();
+        $reader->open($data);
+        $record = $reader->read();
+        $this->assertInstanceOf('HAB\Pica\Record\TitleRecord', $record);
+        $reader->close();
+    }
+}
-- 
GitLab