Skip to content
Snippets Groups Projects
Commit 4076f763 authored by David Maus's avatar David Maus
Browse files

Factor out PicaPlain parsing

parent 914c9321
No related branches found
No related tags found
No related merge requests found
<?php
/**
* The PicaPlainParser class file.
*
* This file is part of PicaReader.
*
* PicaReader is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* PicaReader is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with PicaReader. If not, see <http://www.gnu.org/licenses/>.
*
* @package PicaReader
* @author David Maus <maus@hab.de>
* @copyright Copyright (c) 2012 by Herzog August Bibliothek Wolfenbüttel
* @license http://www.gnu.org/licenses/gpl.html GNU General Public License v3
*/
namespace HAB\Pica\Parser;
/**
* Parser for Pica+ records encoded in PicaPlain.
*
* @package PicaReader
* @author David Maus <maus@hab.de>
* @copyright Copyright (c) 2012 by Herzog August Bibliothek Wolfenbüttel
* @license http://www.gnu.org/licenses/gpl.html GNU General Public License v3
*/
class PicaPlainParser
{
/**
* Return array representation of the field encoded in a line.
*
* @throws \RuntimeException Invalid characters in line
* @param string $line PicaPlain record line
* @return array Array representation of the encoded field
*/
public static function parseField ($line) {
$field = array('subfields' => array());
$match = array();
if (preg_match('#^([012][0-9]{2}[A-Z@])(/([0-9]{2}))? (\$.*)$#Du', $line, $match)) {
$field = array('tag' => $match[1],
'occurrence' => $match[3] ?: null,
'subfields' => self::parseSubfields($match[4]));;
} else {
throw new \RuntimeException("Invalid characters in PicaPlain record at line: {$line}");
}
return $field;
}
/**
* Return array of array representations of the subfields encode in argument.
*
* @param string $str Encoded subfields
* @return array Array representions of the encoded subfields
*/
public static function parseSubfields ($str) {
$subfields = array();
$subfield = null;
$pos = 0;
$max = strlen($str);
$state = '$';
do {
switch ($state) {
case '$':
if (is_array($subfield)) {
$subfields []= $subfield;
$subfield = array();
}
$pos += 1;
$state = 'code';
break;
case 'code':
$subfield['code'] = $str[$pos];
$subfield['value'] = '';
$pos += 1;
$state = 'value';
break;
case 'value':
$next = strpos($str, '$', $pos);
if ($next === false) {
$subfield['value'] .= substr($str, $pos);
$pos = $max;
} else {
$subfield['value'] .= substr($str, $pos, ($next - $pos));
$pos = $next;
if (isset($str[$pos + 1]) && $str[$pos + 1] === '$') {
$subfield['value'] .= '$';
$pos += 2;
} else {
$state = '$';
}
}
break;
}
} while ($pos < $max);
$subfields []= $subfield;
return $subfields;
}
}
\ No newline at end of file
......@@ -26,6 +26,8 @@
namespace HAB\Pica\Reader;
use HAB\Pica\Parser\PicaPlainParser;
/**
* Reader for Pica+ records encoded in PicaPlain.
*
......@@ -67,83 +69,13 @@ class PicaPlainReader extends Reader {
$record = array('fields' => array());
do {
$line = current($this->_data);
$record['fields'] []= $this->readField($line);
$record['fields'] []= PicaPlainParser::parseField($line);
} while (next($this->_data));
next($this->_data);
}
return $record;
}
/**
* Return array representation of the field encoded in a line.
*
* @throws \RuntimeException Invalid characters in line
* @param string $line PicaPlain record line
* @return array Array representation of the encoded field
*/
protected function readField ($line) {
$field = array('subfields' => array());
$match = array();
if (preg_match('#^([012][0-9]{2}[A-Z@])(/([0-9]{2}))? (\$.*)$#Du', $line, $match)) {
$field = array('tag' => $match[1],
'occurrence' => $match[3] ?: null,
'subfields' => $this->parseSubfields($match[4]));;
} else {
throw new \RuntimeException("Invalid characters in PicaPlain record near line {$this->getCurrentLineNumber()}");
}
return $field;
}
/**
* Return array of array representations of the subfields encode in argument.
*
* @param string $str Encoded subfields
* @return array Array representions of the encoded subfields
*/
protected function parseSubfields ($str) {
$subfields = array();
$subfield = null;
$pos = 0;
$max = strlen($str);
$state = '$';
do {
switch ($state) {
case '$':
if (is_array($subfield)) {
$subfields []= $subfield;
$subfield = array();
}
$pos += 1;
$state = 'code';
break;
case 'code':
$subfield['code'] = $str[$pos];
$subfield['value'] = '';
$pos += 1;
$state = 'value';
break;
case 'value':
$next = strpos($str, '$', $pos);
if ($next === false) {
$subfield['value'] .= substr($str, $pos);
$pos = $max;
} else {
$subfield['value'] .= substr($str, $pos, ($next - $pos));
$pos = $next;
if (isset($str[$pos + 1]) && $str[$pos + 1] === '$') {
$subfield['value'] .= '$';
$pos += 2;
} else {
$state = '$';
}
}
break;
}
} while ($pos < $max);
$subfields []= $subfield;
return $subfields;
}
/**
* Close the reader.
*
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment