Skip to content

Commit

Permalink
Issue number: Changes for 2.4 release
Browse files Browse the repository at this point in the history
Submitted by:  Robert Haschart
  • Loading branch information
haschart committed Sep 26, 2008
1 parent 1d3bf47 commit 1dba2aa
Show file tree
Hide file tree
Showing 25 changed files with 2,957 additions and 265 deletions.
2 changes: 1 addition & 1 deletion build.properties
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ src.dir=src
build.dir=build
dist.dir=dist
apidoc.dir=apidoc
version=2.3.2
version=2.4
project.name=marc4j
18 changes: 15 additions & 3 deletions build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,22 @@
<mkdir dir="${build.dir}" />
</target>

<target name="codetablegen" unless="codegen.notrequired">
<java fork="true" classpath="${build.dir}" classname="org.marc4j.converter.impl.CodeTable" output="${src.dir}/org/marc4j/converter/impl/CodeTableGenerated.java" />
</target>

<target name="compile" depends="prepare">
<javac srcdir="${src.dir}" destdir="${build.dir}">
<classpath refid="classpath" />
</javac>
<javac srcdir="${src.dir}" destdir="${build.dir}" includes="**/*.java" excludes="**/CodeTableGenerated.java" debug="true" debuglevel="source,lines,vars">
<classpath refid="classpath" />
</javac>
<uptodate property="codegen.notrequired" targetfile="${src.dir}/org/marc4j/converter/impl/CodeTableGenerated.java" >
<srcfiles dir= "${src.dir}" includes="**/*.xml,**/CodeTable.java"/>
</uptodate>
<antcall target="codetablegen"/>

<javac srcdir="${src.dir}" destdir="${build.dir}" includes="**/CodeTableGenerated.java" debug="false">
<classpath refid="classpath" />
</javac>
<copy todir="${build.dir}">
<fileset dir="${src.dir}">
<include name="**/*.properties" />
Expand Down
27 changes: 26 additions & 1 deletion changes.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,34 @@
Changes to MARC4J 2.4

MARC4J 2.4 is a minor release providing some bug fixes and some new functionality.

- Added MarcPermissiveStreamReader which is more capable of reading records that contain structural or
encoding errors, and is capable of translating the records to UTF-8 as they are read.
- Added ErrorHandler which is used for tracking and reporting structural or encoding errors
encountered by the MarcPermissiveStreamReader.
- Added MarcDirStreamReader which iterates over all of the MARC record files in a given directory.
- Modified MarcStreamReader so that if an exception is thrown for an error in one record you can
choose to catch the exception, discard the erroneous record and continue reading from the input file.
- Modified AnselToUnicode to fix some problems that would occur when trying to handle Chinese characters,
to fix an infinite loop problem that would occur sometimes when extraneous characters appear within a
MARC8 character set escape sequence, and made many changes to support the MarcPermissiveStreamReader
to report and try to recover from encoding errors in the records being read.
- Modified CodeTable (which is used by AnselToUnicode) so that rather than reading and parsing a large
XML file to create the hash tables for mapping MARC8 to Unicode at runtime, the parsing is done once
at compile time, and a class that handles the mapping directly via switch statements is automatically
generated.
- Made minor changes to the MarcXmlReader so that if an exception occurs in the MarcXmlParserThread that
it starts, the exception is passed to the MarcXmlReader rather than simply hanging the parser thread.
- Added PermissiveReaderExample which demonstrates how to use the MarcPermissiveReader to examine and/or
validate records for structural or encoding errors.


Changes to MARC4J 2.3.1

MARC4J 2.3.1 is a minor release with some encoding fixes

- Fixed encoding bug in MarcStreamReader: now sets ISO8859_1 as default as alternative for MARC-8 and
UNIMARC encoding alternative. For MARC 21 the ledare is checked: space is ISO 8859_1 and a is UTF-8.
UNIMARC encoding alternative. For MARC 21 the leader is checked: space is ISO 8859_1 and a is UTF-8.
When an encoding is provided in the MarcStreamReader constructor, this encoding overrides
the default encoding and the leader encoding value.
- MarcXmlDriver: when converting from MARC-8 to UTF-8 character coding scheme in leader (pos. 9) is set to 'a'.
Expand Down
205 changes: 205 additions & 0 deletions src/org/marc4j/ErrorHandler.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
// $Id: ErrorHandler.java,v 1.6 2008/09/26 21:17:42 haschart Exp $
/**
* Copyright (C) 2004 Bas Peters
*
* This file is part of MARC4J
*
* MARC4J is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* MARC4J is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with MARC4J; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
package org.marc4j;

import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

/**
* Defines and describes errors encountered in the processing a given MARC record.
* Used in conjunction with the MarcPermissiveReader class.
*
* @author Robert Haschart
* @version $Revision: 1.6 $
*/
public class ErrorHandler {

public final static int FATAL = 5;
public final static int MAJOR_ERROR = 4;
public final static int MINOR_ERROR = 3;
public final static int ERROR_TYPO = 2;
public final static int WARNING = 1;
public final static int INFO = 0;

private List<Object> errors;
private String curRecordID;
private String curField;
private String curSubfield;
boolean hasMissingID;
int maxSeverity;

public class Error {
private String curRecordID;
private String curField;
private String curSubfield;
private int severity;
private String message;

public Error(String recordID, String field, String subfield, int severity, String message)
{
curRecordID = recordID;
curField = field;
curSubfield = subfield;
this.severity = severity;
this.message = message;
}

public String toString()
{
String severityMsg = getSeverityMsg(severity);
String ret = severityMsg +" : " + message + " --- [ " + curField + " : " + curSubfield + " ]" ;
return(ret);
}

public void setCurRecordID(String curRecordID)
{
this.curRecordID = curRecordID;
}

public String getCurRecordID()
{
return(curRecordID);
}

public int getSeverity()
{
return severity;
}

public void setSeverity(int severity)
{
this.severity = severity;
}
}

public ErrorHandler()
{
errors = null;
hasMissingID = false;
maxSeverity = INFO;
}

public String getSeverityMsg(int severity)
{
switch (severity) {
case FATAL: return("FATAL ");
case MAJOR_ERROR: return("Major Error ");
case MINOR_ERROR: return("Minor Error ");
case ERROR_TYPO: return("Typo ");
case WARNING: return("Warning ");
case INFO: return("Info ");
}
return(null);
}

public boolean hasErrors()
{
return (errors != null && errors.size() > 0 && maxSeverity > INFO);
}

public int getMaxSeverity()
{
return (maxSeverity);
}

public List<Object> getErrors()
{
if (errors == null || errors.size() == 0) return null;
return(errors);
}

public void reset()
{
errors = null;
maxSeverity = INFO;
}

public void addError(String id, String field, String subfield, int severity, String message)
{
if (errors == null)
{
errors = new LinkedList<Object>();
hasMissingID = false;
}
if (id != null && id.equals("unknown")) hasMissingID = true;
else if (hasMissingID)
{
setRecordIDForAll(id);
}
errors.add(new Error(id, field, subfield, severity, message));
if (severity > maxSeverity) maxSeverity = severity;
}

public void addError(int severity, String message)
{
addError(curRecordID, curField, curSubfield, severity, message);
}

public String getRecordID()
{
return curRecordID;
}

private void setRecordIDForAll(String id)
{
if (id != null)
{
Iterator<Object> iter = errors.iterator();
while (iter.hasNext())
{
Error err = (Error)(iter.next());
if (err.getCurRecordID() == null || err.getCurRecordID().equals("unknown"))
{
err.setCurRecordID(id);
}
}
hasMissingID = false;
}
}

public void setRecordID(String recordID)
{
curRecordID = recordID;
if (hasMissingID && errors != null) setRecordIDForAll(recordID);
}

public String getField()
{
return curField;
}

public void setField(String curField)
{
this.curField = curField;
}

public String getCurSubfield()
{
return curSubfield;
}

public void setCurSubfield(String curSubfield)
{
this.curSubfield = curSubfield;
}
}
Loading

0 comments on commit 1dba2aa

Please sign in to comment.