Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dmj
PicaReader
Commits
5ffc3fc0
Commit
5ffc3fc0
authored
Jan 14, 2013
by
David Maus
Browse files
Implement experimental Reader for normalized Pica+
parent
5c879dc7
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/HAB/Pica/Reader/PicaNormReader.php
0 → 100644
View file @
5ffc3fc0
<?php
/**
* Reader for normalized Pica+ records.
*
* @see http://www.gbv.de/wikis/cls/PICA%2B#Normalisiertes_PICA.2B
*
* This file is part of PicaReader.
*
* PicaReader is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* PicaReader is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with PicaReader. If not, see <http://www.gnu.org/licenses/>.
*
* @author David Maus <maus@hab.de>
* @copyright Copyright (c) 2013 by Herzog August Bibliothek Wolfenbüttel
* @license http://www.gnu.org/licenses/gpl.txt GNU General Public License v3
*/
namespace
HAB\Pica\Reader
;
use
InvalidArgumentException
;
class
PicaNormReader
extends
Reader
{
/**
* Separators.
*
* @var string
*/
const
RECORD_SEPARATOR
=
"
\x1d
"
;
const
FIELD_SEPARATOR
=
"
\x1e
"
;
const
SUBFIELD_SEPARATOR
=
"
\x1f
"
;
/**
* Input stream.
*
* @var resource
*/
private
$stream
;
/**
* Read-buffer.
*
* @var string
*/
private
$buffer
;
/**
* Read-buffer size.
*
* @var integer
*/
private
$bufferSize
;
/**
* Position in read-buffer.
*
* @var integer
*/
private
$bufferPosition
;
/**
* Regular expression to split a field.
*
* @var string
*/
private
$fieldRegexp
=
"|^([012][0-9]
{
2}[A-Z@])(/([0-9]{2
}
))?
\x1f
(.+)$|uD"
;
/**
* Constructor.
*
* @return void
*/
public
function
__construct
()
{}
/**
* Open the reader with input stream.
*
* @throws InvalidArgumentException Invalid stream type
* @throws InvalidArgumentException Argument neither string nor stream
*
* @param resource|string $stream
* @return void
*/
public
function
open
(
$stream
)
{
if
(
is_string
(
$stream
))
{
$stream
=
fopen
(
'data://text/plain;base64,'
.
base64_encode
(
$stream
),
'rb'
);
}
if
(
!
is_resource
(
$stream
))
{
throw
new
InvalidArgumentException
(
sprintf
(
'Invalid type of argument: resource|string, %s'
,
gettype
(
$stream
)));
}
$meta
=
stream_get_meta_data
(
$stream
);
if
(
$meta
[
'stream_type'
]
!==
'STDIO'
&&
$meta
[
'stream_type'
]
!==
'RFC2397'
)
{
throw
new
InvalidArgumentException
(
sprintf
(
'Invalid stream type: STDIO|RFC297, %s'
,
$meta
[
'stream_type'
]));
}
$this
->
buffer
=
null
;
$this
->
stream
=
$stream
;
$this
->
bufferSize
=
0
;
$this
->
bufferPosition
=
0
;
}
/**
* Close reader.
*
* @return void
*/
public
function
close
()
{
if
(
$this
->
stream
)
{
fclose
(
$this
->
stream
);
}
}
/**
* Return next record from input stream.
*
* @return array
*/
protected
function
next
()
{
if
(
$this
->
feof
())
{
return
false
;
}
$record
=
array
();
while
(
!
$this
->
feof
()
&&
$this
->
peek
()
!==
self
::
RECORD_SEPARATOR
)
{
$field
=
$this
->
field
();
if
(
$field
)
{
$record
[
'fields'
]
[]
=
$field
;
}
}
return
empty
(
$record
)
?
false
:
$record
;
}
///
/**
* Return Pica+ field.
*
* @return array|null
*/
private
function
field
()
{
if
(
$this
->
feof
())
{
return
false
;
}
$line
=
''
;
while
(
!
$this
->
feof
()
&&
$this
->
peek
()
!==
self
::
FIELD_SEPARATOR
)
{
$octet
=
$this
->
getc
();
if
(
$octet
!==
null
)
{
$line
.
=
$octet
;
}
}
if
(
!
$this
->
feof
())
{
// Swallow field separator
$this
->
getc
();
}
$matches
=
array
();
if
(
!
preg_match
(
$this
->
fieldRegexp
,
$line
,
$matches
))
{
throw
new
RuntimeException
(
sprintf
(
'Unexpected data in input stream: %s'
,
$line
));
}
$subfields
=
array_map
(
array
(
$this
,
'splitSubfield'
),
explode
(
self
::
SUBFIELD_SEPARATOR
,
$matches
[
4
]));
$field
=
array
(
'tag'
=>
$matches
[
1
],
'occurrence'
=>
$matches
[
3
]
?:
null
,
'subfields'
=>
$subfields
);
return
$field
;
}
/**
* Split subfields into array structures.
*
* @param string $subfield
* @return array
*/
private
function
splitSubfield
(
$subfield
)
{
return
array
(
'code'
=>
$subfield
[
0
],
'value'
=>
substr
(
$subfield
,
1
));
}
/**
* Return next octet without moving pointer.
*
* @return string|null
*/
private
function
peek
()
{
return
$this
->
getc
(
true
);
}
/**
* Return next octet.
*
* If argument is true, the internal pointer is not moved after reading
* the octet.
*
* @param boolean $peek
* @return string|null
*/
private
function
getc
(
$peek
=
false
)
{
if
(
$this
->
feof
())
{
return
null
;
}
if
(
$this
->
bufferPosition
==
$this
->
bufferSize
)
{
$buffer
=
fread
(
$this
->
stream
,
4096
);
if
(
$buffer
===
false
)
{
throw
new
RuntimeException
(
'Error reading input stream'
);
}
$this
->
bufferPosition
=
0
;
$this
->
bufferSize
=
strlen
(
$buffer
);
$this
->
buffer
=
$buffer
;
}
$octet
=
$this
->
buffer
[
$this
->
bufferPosition
];
if
(
!
$peek
)
{
$this
->
bufferPosition
++
;
}
return
$octet
;
}
/**
* Return true if input stream and read-buffer exhausted.
*
* @return boolean
*/
private
function
feof
()
{
return
(
feof
(
$this
->
stream
)
&&
(
$this
->
bufferPosition
==
$this
->
bufferSize
));
}
}
\ No newline at end of file
tests/src/HAB/Pica/Reader/PicaNormReaderTest.php
0 → 100644
View file @
5ffc3fc0
<?php
/**
* Unit test for the PicaNormReader class.
*
* This file is part of PicaReader.
*
* PicaReader is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* PicaReader is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with PicaReader. If not, see <http://www.gnu.org/licenses/>.
*
* @package PicaReader
* @author David Maus <maus@hab.de>
* @copyright Copyright (c) 2012, 2013 by Herzog August Bibliothek Wolfenbüttel
* @license http://www.gnu.org/licenses/gpl.html GNU General Public License v3
*/
namespace
HAB\Pica\Reader
;
use
PHPUnit_FrameWork_TestCase
;
class
PicaNormReaderTest
extends
PHPUnit_FrameWork_TestCase
{
public
function
testReadStringData
()
{
$data
=
"003@
\x1f
0test
\x1e
002@
\x1f
0Aau"
;
$reader
=
new
PicaNormReader
();
$reader
->
open
(
$data
);
$record
=
$reader
->
read
();
$this
->
assertInstanceOf
(
'HAB\Pica\Record\TitleRecord'
,
$record
);
$reader
->
close
();
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment