Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dmj
PicaReader
Commits
4076f763
Commit
4076f763
authored
Mar 23, 2012
by
David Maus
Browse files
Factor out PicaPlain parsing
parent
914c9321
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/php/HAB/Pica/Parser/PicaPlainParser.php
0 → 100644
View file @
4076f763
<?php
/**
* The PicaPlainParser class file.
*
* This file is part of PicaReader.
*
* PicaReader is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* PicaReader is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with PicaReader. If not, see <http://www.gnu.org/licenses/>.
*
* @package PicaReader
* @author David Maus <maus@hab.de>
* @copyright Copyright (c) 2012 by Herzog August Bibliothek Wolfenbüttel
* @license http://www.gnu.org/licenses/gpl.html GNU General Public License v3
*/
namespace
HAB\Pica\Parser
;
/**
* Parser for Pica+ records encoded in PicaPlain.
*
* @package PicaReader
* @author David Maus <maus@hab.de>
* @copyright Copyright (c) 2012 by Herzog August Bibliothek Wolfenbüttel
* @license http://www.gnu.org/licenses/gpl.html GNU General Public License v3
*/
class
PicaPlainParser
{
/**
* Return array representation of the field encoded in a line.
*
* @throws \RuntimeException Invalid characters in line
* @param string $line PicaPlain record line
* @return array Array representation of the encoded field
*/
public
static
function
parseField
(
$line
)
{
$field
=
array
(
'subfields'
=>
array
());
$match
=
array
();
if
(
preg_match
(
'#^([012][0-9]{2}[A-Z@])(/([0-9]{2}))? (\$.*)$#Du'
,
$line
,
$match
))
{
$field
=
array
(
'tag'
=>
$match
[
1
],
'occurrence'
=>
$match
[
3
]
?:
null
,
'subfields'
=>
self
::
parseSubfields
(
$match
[
4
]));;
}
else
{
throw
new
\
RuntimeException
(
"Invalid characters in PicaPlain record at line:
{
$line
}
"
);
}
return
$field
;
}
/**
* Return array of array representations of the subfields encode in argument.
*
* @param string $str Encoded subfields
* @return array Array representions of the encoded subfields
*/
public
static
function
parseSubfields
(
$str
)
{
$subfields
=
array
();
$subfield
=
null
;
$pos
=
0
;
$max
=
strlen
(
$str
);
$state
=
'$'
;
do
{
switch
(
$state
)
{
case
'$'
:
if
(
is_array
(
$subfield
))
{
$subfields
[]
=
$subfield
;
$subfield
=
array
();
}
$pos
+=
1
;
$state
=
'code'
;
break
;
case
'code'
:
$subfield
[
'code'
]
=
$str
[
$pos
];
$subfield
[
'value'
]
=
''
;
$pos
+=
1
;
$state
=
'value'
;
break
;
case
'value'
:
$next
=
strpos
(
$str
,
'$'
,
$pos
);
if
(
$next
===
false
)
{
$subfield
[
'value'
]
.
=
substr
(
$str
,
$pos
);
$pos
=
$max
;
}
else
{
$subfield
[
'value'
]
.
=
substr
(
$str
,
$pos
,
(
$next
-
$pos
));
$pos
=
$next
;
if
(
isset
(
$str
[
$pos
+
1
])
&&
$str
[
$pos
+
1
]
===
'$'
)
{
$subfield
[
'value'
]
.
=
'$'
;
$pos
+=
2
;
}
else
{
$state
=
'$'
;
}
}
break
;
}
}
while
(
$pos
<
$max
);
$subfields
[]
=
$subfield
;
return
$subfields
;
}
}
\ No newline at end of file
src/php/HAB/Pica/Reader/PicaPlainReader.php
View file @
4076f763
...
...
@@ -26,6 +26,8 @@
namespace
HAB\Pica\Reader
;
use
HAB\Pica\Parser\PicaPlainParser
;
/**
* Reader for Pica+ records encoded in PicaPlain.
*
...
...
@@ -67,83 +69,13 @@ class PicaPlainReader extends Reader {
$record
=
array
(
'fields'
=>
array
());
do
{
$line
=
current
(
$this
->
_data
);
$record
[
'fields'
]
[]
=
$this
->
read
Field
(
$line
);
$record
[
'fields'
]
[]
=
PicaPlainParser
::
parse
Field
(
$line
);
}
while
(
next
(
$this
->
_data
));
next
(
$this
->
_data
);
}
return
$record
;
}
/**
* Return array representation of the field encoded in a line.
*
* @throws \RuntimeException Invalid characters in line
* @param string $line PicaPlain record line
* @return array Array representation of the encoded field
*/
protected
function
readField
(
$line
)
{
$field
=
array
(
'subfields'
=>
array
());
$match
=
array
();
if
(
preg_match
(
'#^([012][0-9]{2}[A-Z@])(/([0-9]{2}))? (\$.*)$#Du'
,
$line
,
$match
))
{
$field
=
array
(
'tag'
=>
$match
[
1
],
'occurrence'
=>
$match
[
3
]
?:
null
,
'subfields'
=>
$this
->
parseSubfields
(
$match
[
4
]));;
}
else
{
throw
new
\
RuntimeException
(
"Invalid characters in PicaPlain record near line
{
$this
->
getCurrentLineNumber
()
}
"
);
}
return
$field
;
}
/**
* Return array of array representations of the subfields encode in argument.
*
* @param string $str Encoded subfields
* @return array Array representions of the encoded subfields
*/
protected
function
parseSubfields
(
$str
)
{
$subfields
=
array
();
$subfield
=
null
;
$pos
=
0
;
$max
=
strlen
(
$str
);
$state
=
'$'
;
do
{
switch
(
$state
)
{
case
'$'
:
if
(
is_array
(
$subfield
))
{
$subfields
[]
=
$subfield
;
$subfield
=
array
();
}
$pos
+=
1
;
$state
=
'code'
;
break
;
case
'code'
:
$subfield
[
'code'
]
=
$str
[
$pos
];
$subfield
[
'value'
]
=
''
;
$pos
+=
1
;
$state
=
'value'
;
break
;
case
'value'
:
$next
=
strpos
(
$str
,
'$'
,
$pos
);
if
(
$next
===
false
)
{
$subfield
[
'value'
]
.
=
substr
(
$str
,
$pos
);
$pos
=
$max
;
}
else
{
$subfield
[
'value'
]
.
=
substr
(
$str
,
$pos
,
(
$next
-
$pos
));
$pos
=
$next
;
if
(
isset
(
$str
[
$pos
+
1
])
&&
$str
[
$pos
+
1
]
===
'$'
)
{
$subfield
[
'value'
]
.
=
'$'
;
$pos
+=
2
;
}
else
{
$state
=
'$'
;
}
}
break
;
}
}
while
(
$pos
<
$max
);
$subfields
[]
=
$subfield
;
return
$subfields
;
}
/**
* Close the reader.
*
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment