From 1dba2aab97bbc4ada2e23b7205882c58afd7825a Mon Sep 17 00:00:00 2001
From: haschart <haschart>
Date: Fri, 26 Sep 2008 21:17:42 +0000
Subject: [PATCH] Issue number:  Changes for 2.4 release Submitted by:  Robert
 Haschart

---
 build.properties                              |    2 +-
 build.xml                                     |   18 +-
 changes.txt                                   |   27 +-
 src/org/marc4j/ErrorHandler.java              |  205 +++
 src/org/marc4j/MarcDirStreamReader.java       |  145 ++
 .../marc4j/MarcPermissiveStreamReader.java    | 1523 +++++++++++++++++
 src/org/marc4j/MarcStreamReader.java          |  311 ++--
 src/org/marc4j/MarcXmlParserThread.java       |   11 +-
 src/org/marc4j/RecordStack.java               |   20 +-
 src/org/marc4j/converter/CharConverter.java   |   27 +-
 .../marc4j/converter/impl/AnselToUnicode.java |  375 +++-
 src/org/marc4j/converter/impl/CodeTable.java  |  168 +-
 .../converter/impl/CodeTableHandler.java      |   26 +-
 .../converter/impl/CodeTableInterface.java    |    7 +
 .../converter/impl/Iso5426ToUnicode.java      |    9 +-
 .../converter/impl/Iso6937ToUnicode.java      |    9 +-
 .../marc4j/converter/impl/UnicodeToAnsel.java |    9 +-
 .../converter/impl/UnicodeToIso5426.java      |    9 +-
 .../converter/impl/UnicodeToIso6937.java      |    9 +-
 src/org/marc4j/marc/impl/RecordImpl.java      |    8 +-
 src/org/marc4j/marc/impl/Verifier.java        |   16 +-
 .../samples/HandleExceptionExample.java       |   57 +
 .../samples/PermissiveReaderExample.java      |  229 +++
 .../marc4j/samples/resources/diacritic4.mrc   |    1 +
 src/org/marc4j/samples/resources/error.mrc    |    1 +
 25 files changed, 2957 insertions(+), 265 deletions(-)
 create mode 100644 src/org/marc4j/ErrorHandler.java
 create mode 100644 src/org/marc4j/MarcDirStreamReader.java
 create mode 100644 src/org/marc4j/MarcPermissiveStreamReader.java
 create mode 100644 src/org/marc4j/converter/impl/CodeTableInterface.java
 create mode 100644 src/org/marc4j/samples/HandleExceptionExample.java
 create mode 100644 src/org/marc4j/samples/PermissiveReaderExample.java
 create mode 100644 src/org/marc4j/samples/resources/diacritic4.mrc
 create mode 100644 src/org/marc4j/samples/resources/error.mrc
diff --git a/build.properties b/build.properties
index 5d6f38bf..2c4c6096 100644
--- a/build.properties
+++ b/build.properties
@@ -3,5 +3,5 @@ src.dir=src
 build.dir=build
 dist.dir=dist
 apidoc.dir=apidoc
-version=2.3.2
+version=2.4
 project.name=marc4j
\ No newline at end of file
diff --git a/build.xml b/build.xml
index c619e3fc..22995475 100644
--- a/build.xml
+++ b/build.xml
@@ -27,10 +27,22 @@
 		<mkdir dir="${build.dir}" />
 	</target>
 
+	<target name="codetablegen"  unless="codegen.notrequired">
+		<java fork="true" classpath="${build.dir}" classname="org.marc4j.converter.impl.CodeTable" output="${src.dir}/org/marc4j/converter/impl/CodeTableGenerated.java" />
+	</target>
+	
 	<target name="compile" depends="prepare">
-		<javac srcdir="${src.dir}" destdir="${build.dir}">
-			<classpath refid="classpath" />
-		</javac>
+        <javac srcdir="${src.dir}" destdir="${build.dir}" includes="**/*.java" excludes="**/CodeTableGenerated.java" debug="true" debuglevel="source,lines,vars">
+            <classpath refid="classpath" />
+        </javac>
+		<uptodate property="codegen.notrequired" targetfile="${src.dir}/org/marc4j/converter/impl/CodeTableGenerated.java" >
+		    <srcfiles dir= "${src.dir}" includes="**/*.xml,**/CodeTable.java"/>
+		</uptodate>
+		<antcall target="codetablegen"/>
+	    
+        <javac srcdir="${src.dir}" destdir="${build.dir}" includes="**/CodeTableGenerated.java" debug="false">
+            <classpath refid="classpath" />
+        </javac>
 		<copy todir="${build.dir}">
 			<fileset dir="${src.dir}">
 				<include name="**/*.properties" />
diff --git a/changes.txt b/changes.txt
index 1f567c7b..9a672dd3 100644
--- a/changes.txt
+++ b/changes.txt
@@ -1,9 +1,34 @@
+Changes to MARC4J 2.4
+
+MARC4J 2.4 is a minor release providing some bug fixes and some new functionality.
+
+- Added MarcPermissiveStreamReader which is more capable of reading records that contain structural or
+  encoding errors, and is capable of translating the records to UTF-8 as they are read.
+- Added ErrorHandler which is used for tracking and reporting structural or encoding errors 
+  encountered by the MarcPermissiveStreamReader.
+- Added MarcDirStreamReader which iterates over all of the MARC record files in a given directory.
+- Modified MarcStreamReader so that if an exception is thrown for an error in one record you can
+  choose to catch the exception, discard the erroneous record and continue reading from the input file.
+- Modified AnselToUnicode to fix some problems that would occur when trying to handle Chinese characters,
+  to fix an infinite loop problem that would occur sometimes when extraneous characters appear within a
+  MARC8 character set escape sequence, and made many changes to support the MarcPermissiveStreamReader
+  to report and try to recover from encoding errors in the records being read.
+- Modified CodeTable (which is used by AnselToUnicode) so that rather than reading and parsing a large 
+  XML file to create the hash tables for mapping MARC8 to Unicode at runtime, the parsing is done once
+  at compile time, and a class that handles the mapping directly via switch statements is automatically 
+  generated.
+- Made minor changes to the MarcXmlReader so that if an exception occurs in the MarcXmlParserThread that
+  it starts, the exception is passed to the MarcXmlReader rather than simply hanging the parser thread.
+- Added PermissiveReaderExample which demonstrates how to use the MarcPermissiveReader to examine and/or
+  validate records for structural or encoding errors.  
+  
+
 Changes to MARC4J 2.3.1
 
 MARC4J 2.3.1 is a minor release with some encoding fixes
 
 - Fixed encoding bug in MarcStreamReader: now sets ISO8859_1 as default as alternative for MARC-8 and 
-  UNIMARC encoding alternative. For MARC 21 the ledare is checked: space is ISO 8859_1 and a is UTF-8.
+  UNIMARC encoding alternative. For MARC 21 the leader is checked: space is ISO 8859_1 and a is UTF-8.
   When an encoding is provided in the MarcStreamReader constructor, this encoding overrides 
   the default encoding and the leader encoding value.
 - MarcXmlDriver: when converting from MARC-8 to UTF-8 character coding scheme in leader (pos. 9) is set to 'a'.
diff --git a/src/org/marc4j/ErrorHandler.java b/src/org/marc4j/ErrorHandler.java
new file mode 100644
index 00000000..42041075
--- /dev/null
+++ b/src/org/marc4j/ErrorHandler.java
@@ -0,0 +1,205 @@
+// $Id: ErrorHandler.java,v 1.6 2008/09/26 21:17:42 haschart Exp $
+/**
+ * Copyright (C) 2004 Bas Peters
+ *
+ * This file is part of MARC4J
+ *
+ * MARC4J is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public 
+ * License as published by the Free Software Foundation; either 
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * MARC4J is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public 
+ * License along with MARC4J; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+package org.marc4j;
+
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Defines and describes errors encountered in the processing a given MARC record.
+ * Used in conjunction with the MarcPermissiveReader class. 
+ *
+ * @author Robert Haschart
+ * @version $Revision: 1.6 $
+ */
+public class ErrorHandler {
+
+    public final static int FATAL = 5;
+    public final static int MAJOR_ERROR = 4;
+    public final static int MINOR_ERROR = 3;
+    public final static int ERROR_TYPO = 2;
+    public final static int WARNING = 1;
+    public final static int INFO = 0;
+    
+    private List<Object> errors;
+    private String curRecordID;
+    private String curField;
+    private String curSubfield;
+    boolean hasMissingID;
+    int maxSeverity;
+    
+    public class Error {
+        private String curRecordID;
+        private String curField;
+        private String curSubfield;
+        private int severity;
+        private String message;
+        
+        public Error(String recordID, String field, String subfield, int severity, String message)
+        {
+            curRecordID = recordID;
+            curField = field;
+            curSubfield = subfield;
+            this.severity = severity;
+            this.message = message;
+        }
+        
+        public String toString()
+        {
+            String severityMsg = getSeverityMsg(severity);
+            String ret = severityMsg +" : " + message + " --- [ " + curField + " : " + curSubfield  + " ]" ;
+            return(ret);
+        }
+
+        public void setCurRecordID(String curRecordID)
+        {
+            this.curRecordID = curRecordID;
+        }
+        
+        public String getCurRecordID()
+        {
+            return(curRecordID);
+        }
+
+        public int getSeverity()
+        {
+            return severity;
+        }
+
+        public void setSeverity(int severity)
+        {
+            this.severity = severity;
+        }
+    }
+    
+    public ErrorHandler() 
+    {
+        errors = null;
+        hasMissingID = false;
+        maxSeverity = INFO;
+    }
+
+    public String getSeverityMsg(int severity)
+    {
+        switch (severity) {
+            case FATAL:                 return("FATAL       ");
+            case MAJOR_ERROR:          return("Major Error ");
+            case MINOR_ERROR:          return("Minor Error ");
+            case ERROR_TYPO:            return("Typo        ");
+            case WARNING:               return("Warning     ");
+            case INFO:                  return("Info        ");
+        }
+        return(null);
+    }
+
+    public boolean hasErrors()
+    {
+        return (errors != null && errors.size() > 0 && maxSeverity > INFO);
+    }
+    
+    public int getMaxSeverity()
+    {
+        return (maxSeverity);
+    }
+    
+    public List<Object> getErrors()
+    {
+        if (errors == null || errors.size() == 0) return null;        
+        return(errors);
+    }
+    
+    public void reset()
+    {
+        errors = null;
+        maxSeverity = INFO;
+    }
+    
+    public void addError(String id, String field, String subfield, int severity, String message)
+    {
+        if (errors == null) 
+        {
+            errors = new LinkedList<Object>();
+            hasMissingID = false;
+        }
+        if (id != null && id.equals("unknown"))  hasMissingID = true;
+        else if (hasMissingID)  
+        {
+            setRecordIDForAll(id);
+        }
+        errors.add(new Error(id, field, subfield, severity, message));
+        if (severity > maxSeverity)   maxSeverity = severity; 
+    }
+    
+    public void addError(int severity, String message)
+    {
+        addError(curRecordID, curField, curSubfield, severity, message);
+    }
+
+    public String getRecordID()
+    {
+        return curRecordID;
+    }
+
+    private void setRecordIDForAll(String id)
+    {
+        if (id != null)
+        { 
+            Iterator<Object> iter = errors.iterator();       
+            while (iter.hasNext())
+            {
+                Error err = (Error)(iter.next());
+                if (err.getCurRecordID() == null || err.getCurRecordID().equals("unknown"))
+                {
+                    err.setCurRecordID(id);
+                }
+            }
+            hasMissingID = false;
+        }
+    }
+    
+    public void setRecordID(String recordID)
+    {
+        curRecordID = recordID;
+        if (hasMissingID && errors != null) setRecordIDForAll(recordID);
+    }
+
+    public String getField()
+    {
+        return curField;
+    }
+
+    public void setField(String curField)
+    {
+        this.curField = curField;
+    }
+
+    public String getCurSubfield()
+    {
+        return curSubfield;
+    }
+
+    public void setCurSubfield(String curSubfield)
+    {
+        this.curSubfield = curSubfield;
+    }
+}
diff --git a/src/org/marc4j/MarcDirStreamReader.java b/src/org/marc4j/MarcDirStreamReader.java
new file mode 100644
index 00000000..d8c02723
--- /dev/null
+++ b/src/org/marc4j/MarcDirStreamReader.java
@@ -0,0 +1,145 @@
+// $Id: MarcDirStreamReader.java,v 1.1 2008/09/26 21:17:42 haschart Exp $
+/**
+ * Copyright (C) 2004 Bas Peters
+ *
+ * This file is part of MARC4J
+ *
+ * MARC4J is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public 
+ * License as published by the Free Software Foundation; either 
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * MARC4J is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public 
+ * License along with MARC4J; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+package org.marc4j;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FilenameFilter;
+
+import org.marc4j.MarcPermissiveStreamReader;
+import org.marc4j.MarcReader;
+import org.marc4j.marc.Record;
+
+/**
+ * 
+ * @author Robert Haschart
+ * @version $Id: MarcDirStreamReader.java,v 1.1 2008/09/26 21:17:42 haschart Exp $
+ *
+ */
+public class MarcDirStreamReader implements MarcReader
+{
+    File list[];
+    MarcReader curFileReader;
+    int curFileNum;
+    boolean permissive;
+    boolean convertToUTF8;
+    String defaultEncoding;
+    
+    public MarcDirStreamReader(String dirName)
+    {
+        File dir = new File(dirName);
+        init(dir, false, false, null);
+    }
+    
+    public MarcDirStreamReader(File dir)
+    {
+        init(dir, false, false, null);
+    }
+
+    public MarcDirStreamReader(String dirName, boolean permissive, boolean convertToUTF8)
+    {
+        File dir = new File(dirName);
+        init(dir, permissive, convertToUTF8, null);
+    }
+    
+    public MarcDirStreamReader(File dir, boolean permissive, boolean convertToUTF8)
+    {
+        init(dir, permissive, convertToUTF8, null);
+    }
+
+    public MarcDirStreamReader(String dirName, boolean permissive, boolean convertToUTF8, String defaultEncoding)
+    {
+        File dir = new File(dirName);
+        init(dir, permissive, convertToUTF8, defaultEncoding);
+    }
+    
+    public MarcDirStreamReader(File dir, boolean permissive, boolean convertToUTF8, String defaultEncoding)
+    {
+        init(dir, permissive, convertToUTF8, defaultEncoding);
+    }
+
+    private void init(File dir, boolean permissive, boolean convertToUTF8, String defaultEncoding)
+    {
+        FilenameFilter filter = new FilenameFilter()
+        {
+            public boolean accept(File dir, String name)
+            {
+                return(name.endsWith("mrc"));
+            }
+        };
+        this.permissive = permissive;
+        this.convertToUTF8 = convertToUTF8;
+        list = dir.listFiles(filter);
+        java.util.Arrays.sort(list);
+        curFileNum = 0;
+        curFileReader = null;
+        this.defaultEncoding = defaultEncoding;
+    }
+    
+    public boolean hasNext()
+    {
+        if (curFileReader == null || curFileReader.hasNext() == false)
+        {
+            nextFile();
+        }
+        return (curFileReader == null ? false : curFileReader.hasNext());
+    }
+
+    private void nextFile()
+    {
+        if (curFileNum != list.length)
+        {
+            try
+            {
+                System.err.println("Switching to input file: "+ list[curFileNum]);
+                if (defaultEncoding != null)
+                {
+                    curFileReader = new MarcPermissiveStreamReader(new FileInputStream(list[curFileNum++]), permissive, convertToUTF8, defaultEncoding);
+                }
+                else
+                {
+                    curFileReader = new MarcPermissiveStreamReader(new FileInputStream(list[curFileNum++]), permissive, convertToUTF8);
+                }
+            }
+            catch (FileNotFoundException e)
+            {
+                nextFile();
+            }
+        }
+        else 
+        {
+            curFileReader = null;
+        }
+    }
+
+    public Record next()
+    {
+        if (curFileReader == null || curFileReader.hasNext() == false)
+        {
+            nextFile();
+        }
+        return (curFileReader == null ? null : curFileReader.next());
+    }
+
+}
diff --git a/src/org/marc4j/MarcPermissiveStreamReader.java b/src/org/marc4j/MarcPermissiveStreamReader.java
new file mode 100644
index 00000000..de76846d
--- /dev/null
+++ b/src/org/marc4j/MarcPermissiveStreamReader.java
@@ -0,0 +1,1523 @@
+/**
+ * Copyright (C) 2004 Bas Peters
+ *
+ * This file is part of MARC4J
+ *
+ * MARC4J is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public 
+ * License as published by the Free Software Foundation; either 
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * MARC4J is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public 
+ * License along with MARC4J; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+package org.marc4j;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.marc4j.Constants;
+import org.marc4j.MarcException;
+import org.marc4j.MarcReader;
+import org.marc4j.converter.CharConverter;
+import org.marc4j.converter.impl.AnselToUnicode;
+import org.marc4j.converter.impl.Iso5426ToUnicode;
+import org.marc4j.marc.ControlField;
+import org.marc4j.marc.DataField;
+import org.marc4j.marc.Leader;
+import org.marc4j.marc.MarcFactory;
+import org.marc4j.marc.Record;
+import org.marc4j.marc.Subfield;
+import org.marc4j.marc.VariableField;
+import org.marc4j.marc.impl.Verifier;
+
+import com.ibm.icu.text.Normalizer;
+
+/**
+ * An iterator over a collection of MARC records in ISO 2709 format, that is designed
+ * to be able to handle MARC records that have errors in their structure or their encoding.
+ * If the permissive flag is set in the call to the constructor, or if a ErrorHandler object
+ * is passed in as a parameter to the constructor, this reader will do its best to detect 
+ * and recover from a number of structural or encoding errors that can occur in a MARC record.
+ * Note that if this reader is not set to read permissively, its will operate pretty much 
+ * identically to the MarcStreamReader class.
+ * 
+ * Note that no attempt is made to validate the contents of the record at a semantic level.
+ * This reader does not know and does not care whether the record has a 245 field, or if the
+ * 008 field is the right length, but if the record claims to be UTF-8 or MARC8 encoded and 
+ * you are seeing gibberish in the output, or if the reader is throwing an exception in trying
+ * to read a record, then this reader may be able to produce a usable record from the bad 
+ * data you have.
+ * 
+ * The ability to directly translate the record to UTF-8 as it is being read in is useful in
+ * cases where the UTF-8 version of the record will be used directly by the program that is
+ * reading the MARC data, for instance if the marc records are to be indexed into a SOLR search
+ * engine.  Previously the MARC record could only be translated to UTF-8 as it was being written 
+ * out via a MarcStreamWriter or a MarcXmlWriter.
+ * 
+ * <p>
+ * Example usage:
+ * 
+ * <pre>
+ * InputStream input = new FileInputStream(&quot;file.mrc&quot;);
+ * MarcReader reader = new MarcPermissiveReader(input, true, false);
+ * while (reader.hasNext()) {
+ *     Record record = reader.next();
+ *     // Process record
+ * }
+ * </pre>
+ * 
+ * <p>
+ * Check the {@link org.marc4j.marc}&nbsp;package for examples about the use of
+ * the {@link org.marc4j.marc.Record}&nbsp;object model.
+ * Check the file org.marc4j.samples.PermissiveReaderExample.java for an
+ * example about using the MarcPermissiveStreamReader.
+ * </p>
+ * 
+ * <p>
+ * When no encoding is given as an constructor argument the parser tries to
+ * resolve the encoding by looking at the character coding scheme (leader
+ * position 9) in MARC21 records. For UNIMARC records this position is not
+ * defined.   If the reader is operating in permissive mode and no encoding 
+ * is given as an constructor argument the reader will look at the leader, 
+ * and also at the data of the record to determine to the best of its ability 
+ * what character encoding scheme has been used to encode the data in a 
+ * particular MARC record.
+ *   
+ * </p>
+ * 
+ * @author Robert Haschart
+ * @version $Revision: 1.1 $
+ * 
+ */
+public class MarcPermissiveStreamReader implements MarcReader {
+
+    private DataInputStream input = null;
+
+    private Record record;
+
+    private MarcFactory factory;
+
+    private String encoding = "ISO8859_1";
+
+    // This represents the expected encoding of the data when a 
+    // MARC record does not have a 'a' in character 9 of the leader.
+    private String defaultEncoding = "ISO8859_1";
+
+    private boolean convertToUTF8 = false;
+   
+    private boolean permissive = false;
+    
+    private CharConverter converterAnsel = null;
+
+    private CharConverter converterUnimarc = null;
+    
+    // These are used to algorithmically determine what encoding scheme was 
+    // used to encode the data in the Marc record
+    private String conversionCheck1 = null;    
+    private String conversionCheck2 = null;
+    private String conversionCheck3 = null;
+
+    private ErrorHandler errors;
+       
+    /**
+     * Constructs an instance with the specified input stream with possible additional functionality
+     * being enabled by setting permissive and/or convertToUTF8 to true.
+     * 
+     * If permissive and convertToUTF8 are both set to false, it functions almost identically to the
+     * MarcStreamReader class.
+     */
+    public MarcPermissiveStreamReader(InputStream input, boolean permissive, boolean convertToUTF8) {
+        this.permissive = permissive;
+        this.input = new DataInputStream(new BufferedInputStream(input));
+        factory = MarcFactory.newInstance();
+        this.convertToUTF8 = convertToUTF8;
+        errors = null;
+        if (permissive) 
+        {
+            errors = new ErrorHandler();
+            defaultEncoding = "BESTGUESS";
+        }
+    }
+    
+    /**
+     * Constructs an instance with the specified input stream with possible additional functionality
+     * being enabled by passing in an ErrorHandler object and/or setting convertToUTF8 to true.
+     * 
+     * If errors and convertToUTF8 are both set to false, it functions almost identically to the
+     * MarcStreamReader class.
+     * 
+     * If an ErrorHandler object is passed in, that object will be used to log and track any errors 
+     * in the records as the records are decoded.  After the next() function returns, you can query 
+     * to determine whether any errors were detected in the decoding process.
+     * 
+     * See the  file org.marc4j.samples.PermissiveReaderExample.java to see how this can be done.
+     */     
+    public MarcPermissiveStreamReader(InputStream input, ErrorHandler errors, boolean convertToUTF8 ) 
+    {
+        if (errors != null) 
+        {
+            permissive = true;
+            defaultEncoding = "BESTGUESS";
+        }
+        this.input = new DataInputStream(new BufferedInputStream(input));
+        factory = MarcFactory.newInstance();
+        this.convertToUTF8 = convertToUTF8;
+        this.errors = errors;
+    }
+    
+    /**
+     * Constructs an instance with the specified input stream with possible additional functionality
+     * being enabled by setting permissive and/or convertToUTF8 to true.
+     * 
+     * If permissive and convertToUTF8 are both set to false, it functions almost identically to the
+     * MarcStreamReader class.
+     * 
+     * The parameter defaultEncoding is used to specify the character encoding that is used in the records
+     * that will be read from the input stream.   If permissive is set to true, you can specify "BESTGUESS"
+     * as the default encoding, and the reader will attempt to determine the character encoding used in the 
+     * records being read from the input stream.   This is especially useful if you are working with records 
+     * downloaded from an external source and the encoding is either unknown or the encoding is different from
+     * what the records claim to be.
+     */
+    public MarcPermissiveStreamReader(InputStream input, boolean permissive, boolean convertToUTF8, String defaultEncoding) 
+    {
+        this.permissive = permissive;
+        this.input = new DataInputStream(new BufferedInputStream(input));
+        factory = MarcFactory.newInstance();
+        this.convertToUTF8 = convertToUTF8;
+        this.defaultEncoding = defaultEncoding;
+        errors = null;
+        if (permissive) errors = new ErrorHandler();
+    }
+    
+    /**
+     * Constructs an instance with the specified input stream with possible additional functionality
+     * being enabled by setting permissive and/or convertToUTF8 to true.
+     * 
+     * If errors and convertToUTF8 are both set to false, it functions almost identically to the
+     * MarcStreamReader class.
+     * 
+     * The parameter defaultEncoding is used to specify the character encoding that is used in the records
+     * that will be read from the input stream.   If permissive is set to true, you can specify "BESTGUESS"
+     * as the default encoding, and the reader will attempt to determine the character encoding used in the 
+     * records being read from the input stream.   This is especially useful if you are working with records 
+     * downloaded from an external source and the encoding is either unknown or the encoding is different from
+     * what the records claim to be.
+     * 
+     * If an ErrorHandler object is passed in, that object will be used to log and track any errors 
+     * in the records as the records are decoded.  After the next() function returns, you can query 
+     * to determine whether any errors were detected in the decoding process.
+     * 
+     * See the  file org.marc4j.samples.PermissiveReaderExample.java to see how this can be done.
+     */          
+    public MarcPermissiveStreamReader(InputStream input, ErrorHandler errors, boolean convertToUTF8, String defaultEncoding) 
+    {
+        this.permissive = true;
+        this.input = new DataInputStream(new BufferedInputStream(input));
+        factory = MarcFactory.newInstance();
+        this.convertToUTF8 = convertToUTF8;
+        this.defaultEncoding = defaultEncoding;
+        this.errors = errors;
+    }
+    
+    /**
+     * Returns true if the iteration has more records, false otherwise.
+     */
+    public boolean hasNext() 
+    {
+        try {
+            if (input.available() == 0)
+                return false;
+        } catch (IOException e) {
+            throw new MarcException(e.getMessage(), e);
+        }
+        return true;
+    }
+
+    /**
+     * Returns the next record in the iteration.
+     * 
+     * @return Record - the record object
+     */
+    public Record next() 
+    {
+        record = factory.newRecord();
+        if (errors != null) errors.reset();
+        
+        try {
+            byte[] byteArray = new byte[24];
+            input.readFully(byteArray);
+
+            int recordLength = parseRecordLength(byteArray);
+            byte[] recordBuf = new byte[recordLength - 24];
+            if (permissive) 
+            {
+                input.mark(recordLength * 2);
+                input.readFully(recordBuf);
+                if (recordBuf[recordBuf.length-1] != Constants.RT)
+                {
+                    errors.addError("unknown", "n/a", "n/a", ErrorHandler.ERROR_TYPO, 
+                                    "Record terminator character not found at end of record length");
+                    recordBuf = rereadPermissively(input, recordBuf, recordLength);
+                    recordLength = recordBuf.length + 24;
+                }
+            }
+            else
+            {
+                input.readFully(recordBuf);
+            }
+            String tmp = new String(recordBuf);
+            parseRecord(record, byteArray, recordBuf, recordLength);
+
+            if (this.convertToUTF8)
+            {
+                Leader l = record.getLeader();
+                l.setCharCodingScheme('a');
+                record.setLeader(l);
+            }
+            return(record);
+        }
+        catch (EOFException e) {
+            throw new MarcException("Premature end of file encountered", e);
+        } 
+        catch (IOException e) {
+            throw new MarcException("an error occured reading input", e);
+        }   
+    }
+    
+    private byte[] rereadPermissively(DataInputStream input, byte[] recordBuf, int recordLength) throws IOException
+    {
+        int loc = arrayContainsAt(recordBuf, Constants.RT);
+        if (loc != -1)  // stated record length is too long
+        {
+            errors.addError("unknown", "n/a", "n/a", ErrorHandler.ERROR_TYPO, 
+                            "Record terminator appears before stated record length, using shorter record");
+            recordLength = loc + 24;
+            input.reset();
+            recordBuf = new byte[recordLength - 24];
+            input.readFully(recordBuf);
+        }
+        else  // stated record length is too short read ahead
+        {
+            loc = recordLength - 24;
+            int c = 0;
+            do 
+            {
+                c = input.read();
+                loc++;
+            } while (loc < recordLength + 100 && c != Constants.RT && c != -1);
+ 
+            if (c == Constants.RT)
+            {
+                errors.addError("unknown", "n/a", "n/a", ErrorHandler.ERROR_TYPO, 
+                                "Record terminator appears after stated record length, reading extra bytes");
+                recordLength = loc + 24;
+                input.reset();
+                recordBuf = new byte[recordLength - 24];
+                input.readFully(recordBuf);
+            }
+            else if (c == -1)
+            {
+                errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR, 
+                                "No Record terminator found, end of file reached, Terminator appended");
+                recordLength = loc + 24;
+                input.reset();
+                recordBuf = new byte[recordLength - 24 + 1];
+                input.readFully(recordBuf);
+                recordBuf[recordBuf.length-1] = Constants.RT;  
+            }
+            else
+            {
+                errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL, 
+                                "No Record terminator found within 100 byts of stated location, giving up.");
+            }
+        }
+        return(recordBuf);
+    }
+        
+    private void parseRecord(Record record, byte[] byteArray, byte[] recordBuf, int recordLength)
+    {
+        Leader ldr;
+        ldr = factory.newLeader();
+        ldr.setRecordLength(recordLength);
+        int directoryLength=0;
+        // These variables are used when the permissive reader is trying to make its best guess 
+        // as to what character encoding is actually used in the record being processed.
+        conversionCheck1 = "";
+        conversionCheck2 = "";
+        conversionCheck3 = "";
+        
+        try {                
+            parseLeader(ldr, byteArray);
+            directoryLength = ldr.getBaseAddressOfData() - (24 + 1);
+        } 
+        catch (IOException e) {
+            throw new MarcException("error parsing leader with data: "
+                    + new String(byteArray), e);
+        } 
+        catch (MarcException e) {
+            if (permissive)
+            {
+                if (recordBuf[recordBuf.length-1] == Constants.RT && recordBuf[recordBuf.length-2] == Constants.FT)
+                {
+                    errors.addError("unknown", "n/a", "n/a", ErrorHandler.WARNING, 
+                                    "Error parsing leader, trying to re-read leader either shorter or longer");
+                    // make an attempt to recover record.
+                    int offset = 0;
+                    while (offset < recordBuf.length)
+                    {
+                        if (recordBuf[offset] == Constants.FT)
+                        {
+                            break;
+                        }
+                        offset++;
+                    }
+                    if (offset % 12 == 1)
+                    {
+                        // move one byte from body to leader, make new leader, and try again
+                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR, 
+                                        "Leader appears to be too short, moving one byte from record body to leader, and trying again");
+                        byte oldBody[] = recordBuf;
+                        recordBuf = new byte[oldBody.length-1];
+                        System.arraycopy(oldBody, 1, recordBuf, 0, oldBody.length-1);
+                        directoryLength = offset-1;
+                        ldr.setIndicatorCount(2);
+                        ldr.setSubfieldCodeLength(2);
+                        ldr.setImplDefined1((""+(char)byteArray[7]+" ").toCharArray());
+                        ldr.setImplDefined2((""+(char)byteArray[18]+(char)byteArray[19]+(char)byteArray[20]).toCharArray());
+                        ldr.setEntryMap("4500".toCharArray());
+                        if (byteArray[10] == (byte)' ' || byteArray[10] == (byte)'a') // if its ' ' or 'a'
+                        {
+                            ldr.setCharCodingScheme((char)byteArray[10]);
+                        }
+                    }
+                    else if (offset % 12 == 11) 
+                    {
+                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR, 
+                                        "Leader appears to be too long, moving one byte from leader to record body, and trying again");
+                        byte oldBody[] = recordBuf;
+                        recordBuf = new byte[oldBody.length+1];
+                        System.arraycopy(oldBody, 0, recordBuf, 1, oldBody.length);
+                        recordBuf[0] = (byte)'0';
+                        directoryLength = offset+1;
+                        ldr.setIndicatorCount(2);
+                        ldr.setSubfieldCodeLength(2);
+                        ldr.setImplDefined1((""+(char)byteArray[7]+" ").toCharArray());
+                        ldr.setImplDefined2((""+(char)byteArray[16]+(char)byteArray[17]+(char)byteArray[18]).toCharArray());
+                        ldr.setEntryMap("4500".toCharArray());
+                        if (byteArray[8] == (byte)' ' || byteArray[8] == (byte)'a') // if its ' ' or 'a'
+                        {
+                            ldr.setCharCodingScheme((char)byteArray[10]);
+                        }
+                        if (byteArray[10] == (byte)' ' || byteArray[10] == (byte)'a') // if its ' ' or 'a'
+                        {
+                            ldr.setCharCodingScheme((char)byteArray[10]);
+                        }
+                    }
+                    else
+                    {
+                        throw new MarcException("error parsing leader with data: "
+                                + new String(byteArray), e);
+                    }
+                }
+            }
+            else
+            {
+                throw new MarcException("error parsing leader with data: "
+                        + new String(byteArray), e);
+            }
+        }
+        char tmp[] = ldr.getEntryMap();
+        if (permissive && !(""+ tmp[0]+tmp[1]+tmp[2]+tmp[3]).equals("4500"))
+        {
+            if (tmp[0] >= '0' && tmp[0] <= '9' && 
+                    tmp[1] >= '0' && tmp[1] <= '9' && 
+                    tmp[2] >= '0' && tmp[2] <= '9' && 
+                    tmp[3] >= '0' && tmp[3] <= '9')
+            {
+                errors.addError("unknown", "n/a", "n/a", ErrorHandler.WARNING, 
+                            "Unusual character found at end of leader [ "+tmp[0]+tmp[1]+tmp[2]+tmp[3]+" ]");
+            }
+            else
+            {
+                errors.addError("unknown", "n/a", "n/a", ErrorHandler.ERROR_TYPO, 
+                                "Erroneous character found at end of leader [ "+tmp[0]+tmp[1]+tmp[2]+tmp[3]+" ]; changing them to the standard \"4500\"");
+                ldr.setEntryMap("4500".toCharArray());
+            }
+        }
+
+        // if MARC 21 then check encoding
+        switch (ldr.getCharCodingScheme()) {
+        case 'a':
+            encoding = "UTF8";
+            break;
+        case ' ':
+            if (convertToUTF8)
+                encoding = defaultEncoding;
+            else 
+                encoding = "ISO8859_1";
+            break;
+        default: 
+            if (convertToUTF8)
+                encoding = defaultEncoding;
+            else 
+                encoding = "ISO8859_1";
+            break;
+
+        }
+        String utfCheck;
+        if (encoding.equalsIgnoreCase("BESTGUESS"))
+        {
+            try
+            {
+                String marc8EscSeqCheck = new String(recordBuf, "ISO-8859-1");
+                //  If record has MARC8 character set selection strings, it must be MARC8 encoded
+                if (marc8EscSeqCheck.split("\\e[-(,)$bsp]", 2).length > 1)
+                {
+                    encoding = "MARC8";
+                }
+                else
+                {
+                    boolean hasHighBitChars = false;
+                    for (int i = 0; i < recordBuf.length; i++)
+                    {
+                        if (recordBuf[i] < 0) // the high bit is set
+                        {
+                            hasHighBitChars = true; 
+                            break;
+                        }
+                    }
+                    if (!hasHighBitChars)
+                    {
+                        encoding = "ISO8859_1";  //  You can choose any encoding you want here, the results will be the same.
+                    }
+                    else
+                    {
+                        utfCheck = new String(recordBuf, "UTF-8");
+                        byte byteCheck[] = utfCheck.getBytes("UTF-8");
+                        encoding = "UTF8";  
+                        if (recordBuf.length == byteCheck.length)
+                        {
+                            for (int i = 0; i < recordBuf.length; i++)
+                            {
+                                if (byteCheck[i] != recordBuf[i])
+                                {
+                                    encoding = "MARC8-Maybe";
+                                    break;
+                                }
+                            }
+                        }
+                        else 
+                        {
+                            encoding = "MARC8-Maybe";
+                        }
+                    }
+                }
+            }
+            catch (UnsupportedEncodingException e)
+            {
+                // TODO Auto-generated catch block
+                e.printStackTrace();
+            }
+        }
+        else if (permissive && encoding.equals("UTF8"))
+        {
+            try
+            {
+                utfCheck = new String(recordBuf, "UTF-8");
+                byte byteCheck[] = utfCheck.getBytes("UTF-8");
+                if (recordBuf.length != byteCheck.length)
+                {
+                    boolean foundESC = false;
+                    for (int i = 0; i < recordBuf.length; i++)
+                    {
+                        if (recordBuf[i] == 0x1B)
+                        {
+                            errors.addError("unknown", "n/a", "n/a", ErrorHandler.ERROR_TYPO, 
+                                            "Record claims to be UTF-8, but its not. Its probably MARC8.");
+                            encoding = "MARC8-Maybe";
+                            foundESC = true;
+                            break;
+                        }
+                        if (byteCheck[i] != recordBuf[i])
+                        {
+                            encoding = "MARC8-Maybe";
+                        }
+                        
+                    }
+                    if (!foundESC)
+                    {
+                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.ERROR_TYPO, 
+                                "Record claims to be UTF-8, but its not. It may be MARC8, or maybe UNIMARC, or maybe raw ISO-8859-1 ");
+                    }
+                }
+                if (utfCheck.contains("a$1!"))
+                {
+                    encoding = "MARC8-Broken";
+                    errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR, 
+                                "Record claims to be UTF-8, but its not. It seems to be MARC8-encoded but with missing escape codes.");
+                }
+            }
+            catch (UnsupportedEncodingException e)
+            {
+                // TODO Auto-generated catch block
+                e.printStackTrace();
+            }
+        }
+        else if (permissive && !encoding.equals("UTF8"))
+        {
+            try
+            {
+                utfCheck = new String(recordBuf, "UTF-8");
+                byte byteCheck[] = utfCheck.getBytes("UTF-8");
+                if (recordBuf.length == byteCheck.length)
+                {
+	                for (int i = 0; i < recordBuf.length; i++)
+	                {
+	                    // need to check for byte < 0 to see if the high bit is set, because Java doesn't have unsigned types.
+	                    if (recordBuf[i] < 0x00 || byteCheck[i] != recordBuf[i])
+	                    {
+	                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR, 
+                                            "Record claims not to be UTF-8, but it seems to be.");
+	                        encoding = "UTF8-Maybe";
+	                        break;
+	                    }
+	                }
+                }
+             }
+            catch (UnsupportedEncodingException e)
+            {
+                // TODO Auto-generated catch block
+                e.printStackTrace();
+            }
+        }
+        record.setLeader(ldr);
+        
+        boolean discardOneAtStartOfDirectory = false;
+        boolean discardOneSomewhereInDirectory = false;
+        
+        if ((directoryLength % 12) != 0)
+        {
+            if (permissive && directoryLength % 12 == 11 && recordBuf[1] != (byte)'0') 
+            {
+                errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR, 
+                                "Directory length is not a multiple of 12 bytes long.  Prepending a zero and trying to continue.");
+                byte oldBody[] = recordBuf;
+                recordBuf = new byte[oldBody.length+1];
+                System.arraycopy(oldBody, 0, recordBuf, 1, oldBody.length);
+                recordBuf[0] = (byte)'0';
+                directoryLength = directoryLength+1;
+            }
+            else
+            {
+                if (permissive && directoryLength % 12 == 1 && recordBuf[1] == (byte)'0' && recordBuf[2] == (byte)'0') 
+                {
+                    discardOneAtStartOfDirectory = true;
+                    errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR, 
+                                    "Directory length is not a multiple of 12 bytes long. Discarding byte from start of directory and trying to continue.");
+                }
+                else if (permissive && directoryLength % 12 == 1 && recordLength > 10000 && recordBuf[0] == (byte)'0' && 
+                         recordBuf[1] == (byte)'0' && recordBuf[2] > (byte)'0' && recordBuf[2] <= (byte)'9')
+                {
+                    discardOneSomewhereInDirectory = true;
+                    errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR, 
+                                    "Directory length is not a multiple of 12 bytes long.  Will look for oversized field and try to work around it.");
+                }                
+                else 
+                {
+                    if (errors != null)                
+                    {    
+                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL, 
+                                "Directory length is not a multiple of 12 bytes long. Unable to continue.");
+                    }
+                    throw new MarcException("Directory length is not a multiple of 12 bytes long. Unable to continue.");
+                }
+            }
+        }
+        DataInputStream inputrec = new DataInputStream(new ByteArrayInputStream(recordBuf));
+        int size = directoryLength / 12;
+
+        String[] tags = new String[size];
+        int[] lengths = new int[size];
+
+        byte[] tag = new byte[3];
+        byte[] length = new byte[4];
+        byte[] start = new byte[5];
+
+        String tmpStr;
+        try {
+            if (discardOneAtStartOfDirectory)  inputrec.read();
+            int totalOffset = 0;
+            for (int i = 0; i < size; i++) 
+            {
+                inputrec.readFully(tag);                
+                tmpStr = new String(tag);
+                tags[i] = tmpStr;
+    
+                boolean proceedNormally = true;
+                if (discardOneSomewhereInDirectory)
+                {
+                    byte lenCheck[] = new byte[10];
+                    inputrec.mark(20);
+                    inputrec.readFully(lenCheck);                
+                    if (byteCompare(lenCheck, 4, 5, totalOffset)) // proceed normally
+                    {
+                        proceedNormally = true;
+                    }
+                    else if (byteCompare(lenCheck, 5, 5, totalOffset)) // field length is 5 bytes!  Bad Marc record, proceed normally
+                    {
+                        discardOneSomewhereInDirectory = false;
+                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR, 
+                                        "Field is longer than 9999 bytes.  Writing this record out will result in a bad record.");
+                        proceedNormally = false;
+                    }
+                    else
+                    {
+                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL, 
+                                        "Unable to reconcile problems in directory. Unable to continue.");                    
+                        throw new MarcException("Directory length is not a multiple of 12 bytes long. Unable to continue.");
+                    }
+                    inputrec.reset();
+                }
+                if (proceedNormally)
+                {
+                    inputrec.readFully(length);
+                    tmpStr = new String(length);
+                    lengths[i] = Integer.parseInt(tmpStr);
+    
+                    inputrec.readFully(start);
+                }
+                else // length is 5 bytes long 
+                {
+                    inputrec.readFully(start);
+                    tmpStr = new String(start);
+                    lengths[i] = Integer.parseInt(tmpStr);
+    
+                    inputrec.readFully(start);                    
+                }
+                totalOffset += lengths[i];
+            }
+            
+            // If we still haven't found the extra byte, throw out the last byte and try to continue;
+            if (discardOneSomewhereInDirectory)  inputrec.read();
+    
+            if (inputrec.read() != Constants.FT)
+            {
+                errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL, 
+                                "Expected field terminator at end of directory. Unable to continue.");
+                throw new MarcException("expected field terminator at end of directory");
+            }
+            
+            int numBadLengths = 0;
+            
+            int totalLength = 0;
+            for (int i = 0; i < size; i++) 
+            {
+                int fieldLength = getFieldLength(inputrec);
+                if (fieldLength+1 != lengths[i] && permissive)
+                {
+                    if (numBadLengths < 3 && (totalLength + fieldLength < recordLength + 26))
+                    {
+                        numBadLengths++;
+                        lengths[i] = fieldLength+1;
+                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.MINOR_ERROR, 
+                                        "Field length found in record different from length stated in the directory.");
+                        if (fieldLength+1 > 9999)
+                        {
+                            errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR, 
+                                        "Field length is greater than 9999, record cannot be represented as a binary Marc record.");
+                        }
+                    }
+                }
+                totalLength += lengths[i];
+                if (isControlField(tags[i])) 
+                {
+                    byteArray = new byte[lengths[i] - 1];
+                    inputrec.readFully(byteArray);
+    
+                    if (inputrec.read() != Constants.FT)
+                    {
+                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL, 
+                                        "Expected field terminator at end of field. Unable to continue.");
+                        throw new MarcException("expected field terminator at end of field");
+                    }
+    
+                    ControlField field = factory.newControlField();
+                    field.setTag(tags[i]);
+                    field.setData(getDataAsString(byteArray));
+                    record.addVariableField(field);
+    
+                } 
+                else 
+                {
+                    byteArray = new byte[lengths[i]];
+                    inputrec.readFully(byteArray);
+                    try {
+                        record.addVariableField(parseDataField(tags[i], byteArray));
+                    } catch (IOException e) {
+                        throw new MarcException(
+                                "error parsing data field for tag: " + tags[i]
+                                        + " with data: "
+                                        + new String(byteArray), e);
+                    }
+                }
+            }
+            
+            // We've determined that although the record says it is UTF-8, it is not. 
+            // Here we make an attempt to determine the actual encoding of the data in the record.
+            if (permissive && conversionCheck1.length() > 1 && 
+                    conversionCheck2.length() > 1 && conversionCheck3.length() > 1)
+            {
+                guessAndSelectCorrectNonUTF8Encoding();
+            }
+            if (inputrec.read() != Constants.RT)
+            {
+                errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL, 
+                                "Expected record terminator at end of record. Unable to continue.");
+                throw new MarcException("expected record terminator");
+            } 
+        }
+        catch (IOException e)
+        {
+            errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL, 
+                            "Error reading from data file. Unable to continue.");
+            throw new MarcException("an error occured reading input", e);            
+        }
+    }
+
+    private boolean byteCompare(byte[] lenCheck, int offset, int length, int totalOffset)
+    {
+        int divisor = 1;
+        for (int i = offset + length - 1; i >= offset; i-- , divisor *= 10)
+        {
+            if (((totalOffset / divisor) % 10) + '0' != lenCheck[i])
+            {
+                return(false);
+            }
+        }
+        return true;
+    }
+
+    private boolean isControlField(String tag)
+    {
+        boolean isControl = false;
+        try {
+            isControl = Verifier.isControlField(tag);
+        }
+        catch (NumberFormatException nfe)
+        {
+            if (permissive) 
+            {
+                errors.addError(record.getControlNumber(), tag, "n/a", ErrorHandler.ERROR_TYPO, 
+                                "Field tag contains non-numeric characters (" + tag + ").");
+                isControl = false;
+            }
+        }
+        return isControl;
+    }
+
+    private void guessAndSelectCorrectNonUTF8Encoding()
+    {
+        int defaultPart = 0;
+        if (record.getVariableField("245") == null)  defaultPart = 1;
+        int partToUse = 0;
+        int l1 = conversionCheck1.length();
+        int l2 = conversionCheck2.length();
+        int l3 = conversionCheck3.length();
+        int tst;
+
+        if (l1 < l3 && l2 == l3 && defaultPart == 0)
+        {
+            errors.addError(ErrorHandler.INFO, "MARC8 translation shorter than ISO-8859-1, choosing MARC8.");
+            partToUse = 0;
+        }
+        else if (l2 < l1-2 && l2 < l3-2 )             
+        {
+            errors.addError(ErrorHandler.INFO, "Unimarc translation shortest, choosing it.");
+            partToUse = 1;
+        }
+        else if ((tst = onlyOneStartsWithUpperCase(conversionCheck1, conversionCheck2, conversionCheck3)) != -1)
+        {
+            partToUse = tst;
+        }
+        else if (l2 < l1 && l2 < l3 )             
+        {
+            errors.addError(ErrorHandler.INFO, "Unimarc translation shortest, choosing it.");
+            partToUse = 1;
+        }
+        else if (conversionCheck2.equals(conversionCheck3) && !conversionCheck1.trim().contains(" "))
+        {
+            errors.addError(ErrorHandler.INFO, "Unimarc and ISO-8859-1 translations identical, choosing ISO-8859-1.");
+            partToUse = 2;
+        }
+        else if (!specialCharIsBetweenLetters(conversionCheck1))
+        {
+            errors.addError(ErrorHandler.INFO, "To few letters in translations, choosing "+(defaultPart == 0 ? "MARC8" : "Unimarc"));
+            partToUse = defaultPart;
+        }
+        else if (l2 == l1 && l2 == l3)
+        {
+            errors.addError(ErrorHandler.INFO, "All three version equal length. Choosing ISO-8859-1 ");
+            partToUse = 2;
+        }
+        else if (l2 == l3 && defaultPart == 1)
+        {
+            errors.addError(ErrorHandler.INFO, "Unimarc and ISO-8859-1 translations equal length, choosing ISO-8859-1.");
+            partToUse = 2;
+        }
+        else
+        {
+            errors.addError(ErrorHandler.INFO, "No Determination made, defaulting to "+ (defaultPart == 0 ? "MARC8" : "Unimarc") );
+            partToUse = defaultPart;
+        }
+        List<VariableField> fields = record.getVariableFields();
+        Iterator<VariableField> iter = fields.iterator();
+        while (iter.hasNext())
+        {
+            VariableField field = iter.next();
+            if (field instanceof DataField)
+            {
+                DataField df = (DataField)field;
+                List<Subfield> subf = df.getSubfields();
+                Iterator<Subfield> sfiter = subf.iterator();
+                while (sfiter.hasNext())
+                {
+                    Subfield sf = sfiter.next();
+                    if (sf.getData().contains("%%@%%"))
+                    {
+                        String parts[] = sf.getData().split("%%@%%", 3);
+                        sf.setData(parts[partToUse]);
+                    }
+                }
+            }
+        }                      
+    }
+        
+    private int onlyOneStartsWithUpperCase(String conversionCheck12, String conversionCheck22, String conversionCheck32)
+    {
+        if (conversionCheck1.length() == 0 || conversionCheck2.length() == 0 || conversionCheck3.length() == 0) return -1;
+        String check1Parts[] = conversionCheck1.trim().split("[|]>");
+        String check2Parts[] = conversionCheck2.trim().split("[|]>");
+        String check3Parts[] = conversionCheck3.trim().split("[|]>");
+        for (int i = 1; i < check1Parts.length && i < check2Parts.length  && i < check3Parts.length; i++)
+        {
+            boolean tst1 = Character.isUpperCase(check1Parts[i].charAt(0));
+            boolean tst2 = Character.isUpperCase(check2Parts[i].charAt(0));
+            boolean tst3 = Character.isUpperCase(check3Parts[i].charAt(0));
+            if (tst1 && !tst2 && !tst3)  
+                return(0);
+            if (!tst1 && tst2 && !tst3)  
+                return(-1);
+            if (!tst1 && !tst2 && tst3)  
+                return(2);
+        }
+        return -1;
+    }
+
+    private boolean specialCharIsBetweenLetters(String conversionCheck)
+    {
+        boolean bewteenLetters = true;
+        for (int i = 0; i < conversionCheck.length(); i++)
+        {
+            int charCode = (int)(conversionCheck.charAt(i));
+            if (charCode > 0x7f)
+            {
+                bewteenLetters = false;
+                if (i > 0 && Character.isLetter((int)(conversionCheck.charAt(i-1))) || 
+                   (i < conversionCheck.length()-1 && Character.isLetter((int)(conversionCheck.charAt(i+1)))))
+                {
+                    bewteenLetters = true;
+                    break;
+                }
+            }                
+        }
+        return(bewteenLetters);
+    }
+
+    private int arrayContainsAt(byte[] byteArray, int ft)
+    {
+        for (int i = 0; i < byteArray.length; i++)
+        {
+            if (byteArray[i] == (byte)ft)  return(i);
+        }
+        return(-1);
+    }
+
+    private DataField parseDataField(String tag, byte[] field)  throws IOException 
+    {
+        if (permissive)
+        {
+            errors.setRecordID(record.getControlNumber());
+            errors.setField(tag); 
+            errors.setCurSubfield("n/a");
+            cleanupBadFieldSeperators(field);
+        }
+        ByteArrayInputStream bais = new ByteArrayInputStream(field);
+        char ind1 = (char) bais.read();
+        char ind2 = (char) bais.read();
+
+        DataField dataField = factory.newDataField();
+        dataField.setTag(tag);
+        dataField.setIndicator1(ind1);
+        dataField.setIndicator2(ind2);
+
+        int code;
+        int size;
+        int readByte;
+        byte[] data;
+        Subfield subfield;
+        while (true) {
+            readByte = bais.read();
+            if (readByte < 0)
+                break;
+            switch (readByte) {
+            case Constants.US:
+                code = bais.read();
+                if (code < 0)
+                    throw new IOException("unexpected end of data field");
+                if (code == Constants.FT)
+                    break;
+                size = getSubfieldLength(bais);
+                data = new byte[size];
+                bais.read(data);
+                subfield = factory.newSubfield();
+                if (permissive) errors.setCurSubfield("" + (char)code);
+                String dataAsString = getDataAsString(data);
+                if (permissive && code == Constants.US)
+                {
+                    code = data[0];
+                    dataAsString = dataAsString.substring(1);
+                    errors.addError(ErrorHandler.MAJOR_ERROR, 
+                                    "Subfield tag is a subfield separator, using first character of field as subfield tag.");
+                }
+                subfield.setCode((char) code);
+                subfield.setData(dataAsString);
+                dataField.addSubfield(subfield);
+                break;
+            case Constants.FT:
+                break;
+            }
+        }
+        return dataField;
+    }
+    
+    static AnselToUnicode conv = null;
+ 
+    private void cleanupBadFieldSeperators(byte[] field)
+    {
+        if (conv == null) conv = new AnselToUnicode(true);
+        boolean hasEsc = false;
+        boolean inMultiByte = false;
+        boolean justCleaned = false;
+        int mbOffset = 0;
+        
+        for (int i = 0 ; i < field.length-1; i++)
+        {
+            if (field[i] == 0x1B)
+            {   
+                hasEsc = true;
+                if ("(,)-'".indexOf((char)field[i+1]) != -1)
+                {
+                    inMultiByte = false;
+                }
+                else if (i + 2 < field.length && field[i+1] == '$' && field[i+2] == '1')
+                {
+                    inMultiByte = true;
+                    mbOffset = 3;
+                }
+                else if (i + 3 < field.length && (field[i+1] == '$' || field[i+2] == '$')&& ( field[i+2] == '1' || field[i+3] == '1'))
+                {
+                    inMultiByte = true;
+                    mbOffset = 4;
+                }
+
+            }
+            else if (inMultiByte && field[i] != 0x20)   mbOffset = ( mbOffset == 0) ? 2 : mbOffset - 1;
+            if (inMultiByte && mbOffset == 0 && i + 2 < field.length)
+            {
+                char c;
+                byte f1 = field[i];
+                byte f2 = field[i+1] == 0x20 ? field[i+2] : field[i+1];
+                byte f3 = (field[i+1] == 0x20 || field[i+2] == 0x20) ? field[i+3] : field[i+2];
+                c = conv.getMBChar(conv.makeMultibyte((char)((f1 == Constants.US) ? 0x7C : f1),
+                                                      (char)((f2 == Constants.US) ? 0x7C : f2),
+                                                      (char)((f3 == Constants.US) ? 0x7C : f3)));
+                if (c == 0 && !justCleaned) 
+                {
+                    errors.addError(ErrorHandler.MAJOR_ERROR, 
+                                    "Bad Multibyte character found, reinterpreting data as non-multibyte data");
+                    inMultiByte = false; 
+                }
+                else if (c == 0 && justCleaned)
+                {
+                    c = conv.getMBChar(conv.makeMultibyte('!',(char)((f2 == Constants.US) ? 0x7C : f2),
+                                                          (char)((f3 == Constants.US) ? 0x7C : f3)));
+                    if (c == 0)
+                    {
+                        errors.addError(ErrorHandler.MAJOR_ERROR, 
+                                        "Bad Multibyte character found, reinterpreting data as non-multibyte data");
+                        inMultiByte = false; 
+                    }                        
+                    else
+                    {
+                        errors.addError(ErrorHandler.MAJOR_ERROR, 
+                                        "Character after restored vertical bar character makes bad multibyte character, changing it to \"!\"");
+                        field[i] = '!';
+                    }
+                }
+            }
+            justCleaned = false;
+            if (field[i] == Constants.US )
+            {
+                if (inMultiByte && mbOffset != 0)
+                {
+                    field[i] = 0x7C;
+                    errors.addError(ErrorHandler.MAJOR_ERROR, 
+                                    "Subfield separator found in middle of a multibyte character, changing it to a vertical bar, and continuing");
+                    if (field[i+1] == '0')
+                    { 
+                        if (field[i+2] == '(' && field[i+3] == 'B' )  
+                        {
+                            field[i+1] = 0x1B;
+                            errors.addError(ErrorHandler.MAJOR_ERROR, 
+                                            "Character after restored vertical bar character makes bad multibyte character, changing it to ESC");
+                        }
+                        else
+                        {
+                            field[i+1] = 0x21;
+                            errors.addError(ErrorHandler.MAJOR_ERROR, 
+                                            "Character after restored vertical bar character makes bad multibyte character, changing it to \"!\"");
+                        }
+                    }
+                    justCleaned = true;
+                }
+                else if (hasEsc && !((field[i+1] >= 'a' && field[i+1] <= 'z') || (field[i+1] >= '0' && field[i+1] <= '9')))
+                {
+                    errors.addError(ErrorHandler.MAJOR_ERROR, 
+                                    "Subfield separator followed by invalid subfield tag, changing separator to a vertical bar, and continuing");
+                    field[i] = 0x7C;
+                    justCleaned = true;
+                }
+                else if (hasEsc && i < field.length-3 && 
+                        (field[i+1] == '0' && field[i+2] == '('  && field[i+3] == 'B' ))
+                {
+                    errors.addError(ErrorHandler.MAJOR_ERROR, 
+                                    "Subfield separator followed by invalid subfield tag, changing separator to a vertical bar, and continuing");
+                    field[i] = 0x7C;
+                    field[i+1] = 0x1B;
+                    justCleaned = true;
+                }
+                else if (hasEsc && (field[i+1] == '0' ))
+                {
+                    errors.addError(ErrorHandler.MAJOR_ERROR, 
+                                    "Subfield separator followed by invalid subfield tag, changing separator to a vertical bar, and continuing");
+                    field[i] = 0x7C;
+                    field[i+1] = 0x21;
+                    justCleaned = true;
+                }
+                else if (field[i+1] == Constants.US && field[i+2] == Constants.US )
+                {
+                    errors.addError(ErrorHandler.MAJOR_ERROR, 
+                                    "Three consecutive subfield separators, changing first two to vertical bars.");
+                    field[i] = 0x7C;
+                    field[i+1] = 0x7C;
+                    justCleaned = true;
+                }
+            }
+        }
+    }
+
+    private int getFieldLength(DataInputStream bais) throws IOException 
+    {
+        bais.mark(9999);
+        int bytesRead = 0;
+        while (true) {
+            switch (bais.read()) {
+             case Constants.FT:
+                bais.reset();
+                return bytesRead;
+            case -1:
+                bais.reset();
+                if (permissive)
+                {
+                    errors.addError(ErrorHandler.ERROR_TYPO, 
+                                    "Field not terminated trying to continue");
+                    return (bytesRead);
+                }
+                else
+                    throw new IOException("Field not terminated");
+            case Constants.US:
+            default:
+                bytesRead++;
+            }
+        }
+    }
+
+    private int getSubfieldLength(ByteArrayInputStream bais) throws IOException {
+        bais.mark(9999);
+        int bytesRead = 0;
+        while (true) {
+            switch (bais.read()) {
+            case Constants.FT:
+                bais.reset();
+                return bytesRead;
+            case Constants.US:
+                bais.reset();
+                return bytesRead;
+            case -1:
+                bais.reset();
+                if (permissive)
+                {
+                    errors.addError(ErrorHandler.ERROR_TYPO, "Subfield not terminated trying to continue");
+                    return (bytesRead);
+                }
+                else
+                    throw new IOException("subfield not terminated");
+            default:
+                bytesRead++;
+            }
+        }
+    }
+
+    private int parseRecordLength(byte[] leaderData) throws IOException {
+        InputStreamReader isr = new InputStreamReader(new ByteArrayInputStream(
+                leaderData));
+        int length = -1;
+        char[] tmp = new char[5];
+        isr.read(tmp);
+        try {
+            length = Integer.parseInt(new String(tmp));
+        } catch (NumberFormatException e) {
+            errors.addError(ErrorHandler.FATAL, 
+                            "Unable to parse record length, Unable to Continue");
+            throw new MarcException("unable to parse record length", e);
+        }
+        return(length);
+    }
+    
+    private void parseLeader(Leader ldr, byte[] leaderData) throws IOException {
+        InputStreamReader isr = new InputStreamReader(new ByteArrayInputStream(
+                leaderData));
+        char[] tmp = new char[5];
+        isr.read(tmp);
+        //  Skip over bytes for record length, If we get here, its already been computed.
+        ldr.setRecordStatus((char) isr.read());
+        ldr.setTypeOfRecord((char) isr.read());
+        tmp = new char[2];
+        isr.read(tmp);
+        ldr.setImplDefined1(tmp);
+        ldr.setCharCodingScheme((char) isr.read());
+        char indicatorCount = (char) isr.read();
+        char subfieldCodeLength = (char) isr.read();
+        char baseAddr[] = new char[5];
+        isr.read(baseAddr);
+        tmp = new char[3];
+        isr.read(tmp);
+        ldr.setImplDefined2(tmp);
+        tmp = new char[4];
+        isr.read(tmp);
+        ldr.setEntryMap(tmp);
+        isr.close();
+        try {
+            ldr.setIndicatorCount(Integer.parseInt(String.valueOf(indicatorCount)));
+        } catch (NumberFormatException e) {
+            throw new MarcException("unable to parse indicator count", e);
+        }
+        try {
+            ldr.setSubfieldCodeLength(Integer.parseInt(String
+                    .valueOf(subfieldCodeLength)));
+        } catch (NumberFormatException e) {
+            throw new MarcException("unable to parse subfield code length", e);
+        }
+        try {
+            ldr.setBaseAddressOfData(Integer.parseInt(new String(baseAddr)));
+        } catch (NumberFormatException e) {
+            throw new MarcException("unable to parse base address of data", e);
+        }
+
+    }
+
+    private String getDataAsString(byte[] bytes) 
+    {
+        String dataElement = null;
+        if (encoding.equals("UTF-8") || encoding.equals("UTF8"))
+        {
+            try {
+                dataElement = new String(bytes, "UTF-8");
+            } 
+            catch (UnsupportedEncodingException e) {
+                throw new MarcException("unsupported encoding", e);
+            }
+        }
+        else if (encoding.equals("UTF8-Maybe"))
+        {
+            try {
+                dataElement = new String(bytes, "UTF-8");
+            } 
+            catch (UnsupportedEncodingException e) {
+                throw new MarcException("unsupported encoding", e);
+            }
+        }
+        else if (encoding.equals("MARC-8") || encoding.equals("MARC8"))
+        {
+            dataElement = getMarc8Conversion(bytes);
+        }
+        else if (encoding.equalsIgnoreCase("Unimarc") || encoding.equals("IS05426"))
+        {
+            dataElement = getUnimarcConversion(bytes);
+        }
+        else if (encoding.equals("MARC8-Maybe"))
+        {
+            String dataElement1 = getMarc8Conversion(bytes);
+            String dataElement2 = getUnimarcConversion(bytes);
+            String dataElement3 = null;
+            try
+            {
+                dataElement3 = new String(bytes, "ISO-8859-1");
+            }
+            catch (UnsupportedEncodingException e)
+            {
+                // TODO Auto-generated catch block
+                e.printStackTrace();
+            }
+            if (dataElement1.equals(dataElement2) && dataElement1.equals(dataElement3))
+            {
+                dataElement = dataElement1;
+            }
+            else 
+            {
+                conversionCheck1 = conversionCheck1 + "|>" + Normalizer.compose(dataElement1, false);
+                conversionCheck2 = conversionCheck2 + "|>" + dataElement2;
+                conversionCheck3 = conversionCheck3 + "|>" + dataElement3;
+                dataElement = dataElement1 + "%%@%%" + dataElement2 + "%%@%%" + dataElement3;                
+            }            
+        }
+        else if (encoding.equals("MARC8-Broken"))
+        {
+            try
+            {
+                dataElement = new String(bytes, "ISO-8859-1");
+            }
+            catch (UnsupportedEncodingException e)
+            {
+                // TODO Auto-generated catch block
+                e.printStackTrace();
+            }
+            String newdataElement = dataElement.replaceAll("&lt;", "<");
+            newdataElement = newdataElement.replaceAll("&gt;", ">");
+            newdataElement = newdataElement.replaceAll("&amp;", "&");
+            newdataElement = newdataElement.replaceAll("&apos;", "'");
+            newdataElement = newdataElement.replaceAll("&quot;", "\"");
+            if (!newdataElement.equals(dataElement))   
+            {
+                dataElement = newdataElement;
+                errors.addError(ErrorHandler.ERROR_TYPO, "Subfield contains escaped html character entities, un-escaping them. ");
+            }
+            String rep1 = ""+(char)0x1b+"\\$1$1";
+            String rep2 = ""+(char)0x1b+"\\(B";                    
+            newdataElement = dataElement.replaceAll("\\$1(.)", rep1);
+            newdataElement = newdataElement.replaceAll("\\(B", rep2);
+            if (!newdataElement.equals(dataElement))   
+            {
+                dataElement = newdataElement;
+                errors.addError(ErrorHandler.MAJOR_ERROR, "Subfield seems to be missing MARC8 escape sequences, trying to restore them.");
+            }
+            try
+            {
+                dataElement = getMarc8Conversion(dataElement.getBytes("ISO-8859-1"));
+            }
+            catch (UnsupportedEncodingException e)
+            {
+                // TODO Auto-generated catch block
+                e.printStackTrace();
+            }
+
+        }
+        else if (encoding.equals("ISO-8859-1") || encoding.equals("ISO8859_1"))
+        {
+            try {
+                dataElement = new String(bytes, "ISO-8859-1");
+            } 
+            catch (UnsupportedEncodingException e) {
+                throw new MarcException("unsupported encoding", e);
+            }
+        }
+        else 
+        {
+            throw new MarcException("Unknown or unsupported Marc character encoding:" + encoding);           
+        }
+        if (errors != null && dataElement.matches("[^&]*&[a-z]*;.*"))
+        {
+            String newdataElement = dataElement.replaceAll("&lt;", "<");
+            newdataElement = newdataElement.replaceAll("&gt;", ">");
+            newdataElement = newdataElement.replaceAll("&amp;", "&");
+            newdataElement = newdataElement.replaceAll("&apos;", "'");
+            newdataElement = newdataElement.replaceAll("&quot;", "\"");
+            if (!newdataElement.equals(dataElement))   
+            {
+                dataElement = newdataElement;
+                errors.addError(ErrorHandler.ERROR_TYPO, "Subfield contains escaped html character entities, un-escaping them. ");
+            }
+        }
+        return dataElement;
+    }
+
+    private boolean byteArrayContains(byte[] bytes, byte[] seq)
+    {
+        for ( int i = 0; i < bytes.length - seq.length; i++)
+        {
+            if (bytes[i] == seq[0])
+            {
+                for (int j = 0; j < seq.length; j++)
+                {
+                    if (bytes[i+j] != seq[j])
+                    {
+                        break;
+                    }
+                    if (j == seq.length-1) return(true);
+                }
+            }
+        }
+        return(false);
+    }
+    
+    static byte badEsc[] = { (byte)('b'), (byte)('-'), 0x1b, (byte)('s') };
+    static byte overbar[] = { (byte)(char)(0xaf) };
+     
+    private String getMarc8Conversion(byte[] bytes)
+    {
+        String dataElement = null;
+        if (converterAnsel == null) converterAnsel = new AnselToUnicode(errors);            
+        if (permissive && (byteArrayContains(bytes, badEsc) || byteArrayContains(bytes, overbar)))  
+        {
+            String newDataElement = null;
+            try
+            {
+                dataElement = new String(bytes, "ISO-8859-1");
+                newDataElement = dataElement.replaceAll("(\\e)b-\\es([psb])", "$1$2");
+                if (!newDataElement.equals(dataElement))
+                {
+                    dataElement = newDataElement;
+                    errors.addError(ErrorHandler.ERROR_TYPO, "Subfield contains odd pattern of subscript or superscript escapes. ");
+                }
+                newDataElement = dataElement.replace((char)0xaf, (char)0xe5);
+                if (!newDataElement.equals(dataElement))
+                {
+                    dataElement = newDataElement;
+                    errors.addError(ErrorHandler.ERROR_TYPO, "Subfield contains 0xaf overbar character, changing it to proper MARC8 representation ");
+                }
+                dataElement = converterAnsel.convert(dataElement);                    
+            }
+            catch (UnsupportedEncodingException e)
+            {
+                // TODO Auto-generated catch block
+                e.printStackTrace();
+            }
+        }
+        else 
+        {
+            dataElement = converterAnsel.convert(bytes);
+        }
+        if (permissive && dataElement.matches("[^&]*&#x[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f];.*"))
+        {
+            Pattern pattern = Pattern.compile("&#x([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]);"); 
+            Matcher matcher = pattern.matcher(dataElement);
+            StringBuffer newElement = new StringBuffer();
+            int prevEnd = 0;
+            while (matcher.find())
+            {
+                newElement.append(dataElement.substring(prevEnd, matcher.start()));
+                newElement.append(getChar(matcher.group(1)));
+                prevEnd = matcher.end();
+            }
+            newElement.append(dataElement.substring(prevEnd));
+            dataElement = newElement.toString();
+        }
+        return(dataElement);
+    }
+    
+    private String getUnimarcConversion(byte[] bytes)
+    {
+        if (converterUnimarc == null) converterUnimarc = new Iso5426ToUnicode();
+        String dataElement = converterUnimarc.convert(bytes);
+        dataElement = dataElement.replaceAll("\u0088", "");
+        dataElement = dataElement.replaceAll("\u0089", "");
+//        for ( int i = 0 ; i < bytes.length; i++)
+//        {
+//            if (bytes[i] == -120 || bytes[i] == -119)
+//            {
+//                char tmp = (char)bytes[i]; 
+//                char temp2 = dataElement.charAt(0);
+//                char temp3 = dataElement.charAt(4);
+//                int tmpi = (int)tmp;
+//                int tmp2 = (int)temp2;
+//                int tmp3 = (int)temp3;
+//                i = i;
+//
+//            }
+//        }
+        if (dataElement.matches("[^<]*<U[+][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]>.*"))
+        {
+            Pattern pattern = Pattern.compile("<U[+]([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])>"); 
+            Matcher matcher = pattern.matcher(dataElement);
+            StringBuffer newElement = new StringBuffer();
+            int prevEnd = 0;
+            while (matcher.find())
+            {
+                newElement.append(dataElement.substring(prevEnd, matcher.start()));
+                newElement.append(getChar(matcher.group(1)));
+                prevEnd = matcher.end();
+            }
+            newElement.append(dataElement.substring(prevEnd));
+            dataElement = newElement.toString();
+        }
+        return(dataElement);
+
+    }
+    
+    private String getChar(String charCodePoint)
+    {
+        int charNum = Integer.parseInt(charCodePoint, 16);
+        String result = ""+((char)charNum);
+        return(result);
+    }
+
+    public boolean isPermissive()
+    {
+        return permissive;
+    }
+
+    public void setPermissive(boolean permissive)
+    {
+        this.permissive = permissive;        
+    }
+    
+    public boolean hasErrors()
+    {
+        return(errors.hasErrors());
+    }
+
+    public List<Object> getErrors()
+    {
+        return(errors.getErrors());
+    }
+
+}
\ No newline at end of file
diff --git a/src/org/marc4j/MarcStreamReader.java b/src/org/marc4j/MarcStreamReader.java
index 27d85dae..96215969 100644
--- a/src/org/marc4j/MarcStreamReader.java
+++ b/src/org/marc4j/MarcStreamReader.java
@@ -1,4 +1,4 @@
-// $Id: MarcStreamReader.java,v 1.10 2006/12/04 18:45:44 bpeters Exp $
+// $Id: MarcStreamReader.java,v 1.11 2008/09/26 21:17:42 haschart Exp $
 /**
  * Copyright (C) 2004 Bas Peters
  *
@@ -20,18 +20,27 @@
  */
 package org.marc4j;
 
+import java.io.BufferedInputStream;
 import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.EOFException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.UnsupportedEncodingException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
 
+import org.marc4j.converter.CharConverter;
+import org.marc4j.converter.impl.AnselToUnicode;
 import org.marc4j.marc.ControlField;
 import org.marc4j.marc.DataField;
 import org.marc4j.marc.Leader;
 import org.marc4j.marc.MarcFactory;
 import org.marc4j.marc.Record;
 import org.marc4j.marc.Subfield;
+import org.marc4j.marc.VariableField;
 import org.marc4j.marc.impl.Verifier;
 
 /**
@@ -61,12 +70,12 @@
  * </p>
  * 
  * @author Bas Peters
- * @version $Revision: 1.10 $
+ * @version $Revision: 1.11 $
  * 
  */
 public class MarcStreamReader implements MarcReader {
 
-    private InputStream input = null;
+    private DataInputStream input = null;
 
     private Record record;
 
@@ -75,8 +84,8 @@ public class MarcStreamReader implements MarcReader {
     private String encoding = "ISO8859_1";
 
     private boolean override = false;
-
-    private boolean hasNext = true;
+       
+    private CharConverter converterAnsel = null;
 
     /**
      * Constructs an instance with the specified input stream.
@@ -86,11 +95,10 @@ public MarcStreamReader(InputStream input) {
     }
 
     /**
-     * Constructs an instance with the specified input stream and character
-     * encoding.
+     * Constructs an instance with the specified input stream.
      */
     public MarcStreamReader(InputStream input, String encoding) {
-        this.input = input;
+        this.input = new DataInputStream(new BufferedInputStream(input));
         factory = MarcFactory.newInstance();
         if (encoding != null) {
             this.encoding = encoding;
@@ -116,117 +124,121 @@ public boolean hasNext() {
      * 
      * @return Record - the record object
      */
-    public Record next() {
-        Leader ldr;
-        int bytesRead = 0;
-
+    public Record next() 
+    {
         record = factory.newRecord();
 
         try {
 
             byte[] byteArray = new byte[24];
-            bytesRead = input.read(byteArray);
-
-            if (bytesRead == -1)
-                throw new MarcException("no data to read");
-
-            while (bytesRead != -1 && bytesRead != byteArray.length)
-                bytesRead += input.read(byteArray, bytesRead, byteArray.length
-                        - bytesRead);
-
-            try {
-                ldr = parseLeader(byteArray);
-            } catch (IOException e) {
-                throw new MarcException("error parsing leader with data: "
-                        + new String(byteArray), e);
-            }
-
-            // if MARC 21 then check encoding
-            switch (ldr.getCharCodingScheme()) {
-            case ' ':
-                if (!override)
-                    encoding = "ISO8859_1";
-                break;
-            case 'a':
-                if (!override)
-                    encoding = "UTF8";
-            }
-
-            record.setLeader(ldr);
+            input.readFully(byteArray);
 
-            int directoryLength = ldr.getBaseAddressOfData() - (24 + 1);
-            if ((directoryLength % 12) != 0)
-                throw new MarcException("invalid directory");
-            int size = directoryLength / 12;
-
-            String[] tags = new String[size];
-            int[] lengths = new int[size];
+            int recordLength = parseRecordLength(byteArray);
+            byte[] recordBuf = new byte[recordLength - 24];
+            input.readFully(recordBuf);
+            parseRecord(record, byteArray, recordBuf, recordLength);
+            return(record);
+        }
+        catch (EOFException e) {
+            throw new MarcException("Premature end of file encountered", e);
+        } 
+        catch (IOException e) {
+            throw new MarcException("an error occured reading input", e);
+        }   
+    }
+    
+    private void parseRecord(Record record, byte[] byteArray, byte[] recordBuf, int recordLength)
+    {
+        Leader ldr;
+        ldr = factory.newLeader();
+        ldr.setRecordLength(recordLength);
+        int directoryLength=0;
+        
+        try {                
+            parseLeader(ldr, byteArray);
+            directoryLength = ldr.getBaseAddressOfData() - (24 + 1);
+        } 
+        catch (IOException e) {
+            throw new MarcException("error parsing leader with data: "
+                    + new String(byteArray), e);
+        } 
+        catch (MarcException e) {
+            throw new MarcException("error parsing leader with data: "
+                    + new String(byteArray), e);
+        }
 
-            byte[] tag = new byte[3];
-            byte[] length = new byte[4];
-            byte[] start = new byte[5];
+        // if MARC 21 then check encoding
+        switch (ldr.getCharCodingScheme()) {
+        case ' ':
+            if (!override)
+                encoding = "ISO-8859-1";
+            break;
+        case 'a':
+            if (!override)
+                encoding = "UTF8";
+        }
+        record.setLeader(ldr);
+        
+        if ((directoryLength % 12) != 0)
+        {
+            throw new MarcException("invalid directory");
+        }
+        DataInputStream inputrec = new DataInputStream(new ByteArrayInputStream(recordBuf));
+        int size = directoryLength / 12;
 
-            String tmp;
+        String[] tags = new String[size];
+        int[] lengths = new int[size];
 
-            for (int i = 0; i < size; i++) {
-                bytesRead = input.read(tag);
+        byte[] tag = new byte[3];
+        byte[] length = new byte[4];
+        byte[] start = new byte[5];
 
-                while (bytesRead != -1 && bytesRead != tag.length)
-                    bytesRead += input.read(tag, bytesRead, tag.length
-                            - bytesRead);
+        String tmp;
 
+        try {
+            for (int i = 0; i < size; i++) 
+            {
+                inputrec.readFully(tag);                
                 tmp = new String(tag);
                 tags[i] = tmp;
-
-                bytesRead = input.read(length);
-
-                while (bytesRead != -1 && bytesRead != length.length)
-                    bytesRead += input.read(length, bytesRead, length.length
-                            - bytesRead);
-
+    
+                inputrec.readFully(length);
                 tmp = new String(length);
                 lengths[i] = Integer.parseInt(tmp);
-
-                bytesRead = input.read(start);
-
-                while (bytesRead != -1 && bytesRead != start.length)
-                    bytesRead += input.read(start, bytesRead, start.length
-                            - bytesRead);
+    
+                inputrec.readFully(start);
             }
-
-            if (input.read() != Constants.FT)
-                throw new MarcException(
-                        "expected field terminator at end of directory");
-
-            for (int i = 0; i < size; i++) {
-                if (Verifier.isControlField(tags[i])) {
+    
+            if (inputrec.read() != Constants.FT)
+            {
+                throw new MarcException("expected field terminator at end of directory");
+            }
+            
+            for (int i = 0; i < size; i++) 
+            {
+                int fieldLength = getFieldLength(inputrec);
+                if (Verifier.isControlField(tags[i])) 
+                {
                     byteArray = new byte[lengths[i] - 1];
-                    bytesRead = input.read(byteArray);
-
-                    while (bytesRead != -1 && bytesRead != byteArray.length)
-                        bytesRead += input.read(byteArray, bytesRead,
-                                byteArray.length - bytesRead);
-
-                    if (input.read() != Constants.FT)
-                        throw new MarcException(
-                                "expected field terminator at end of field");
-
+                    inputrec.readFully(byteArray);
+    
+                    if (inputrec.read() != Constants.FT)
+                    {
+                        throw new MarcException("expected field terminator at end of field");
+                    }
+    
                     ControlField field = factory.newControlField();
                     field.setTag(tags[i]);
                     field.setData(getDataAsString(byteArray));
                     record.addVariableField(field);
-
-                } else {
+                } 
+                else 
+                {
                     byteArray = new byte[lengths[i]];
-                    bytesRead = input.read(byteArray);
-
-                    while (bytesRead != -1 && bytesRead != byteArray.length)
-                        bytesRead += input.read(byteArray, bytesRead,
-                                byteArray.length - bytesRead);
-
+                    inputrec.readFully(byteArray);
+    
                     try {
-                        record.addVariableField(parseDataField(tags[i],
-                                byteArray));
+                        record.addVariableField(parseDataField(tags[i], byteArray));
                     } catch (IOException e) {
                         throw new MarcException(
                                 "error parsing data field for tag: " + tags[i]
@@ -235,14 +247,16 @@ record = factory.newRecord();
                     }
                 }
             }
-
-            if (input.read() != Constants.RT)
+            
+            if (inputrec.read() != Constants.RT)
+            {
                 throw new MarcException("expected record terminator");
-
-        } catch (IOException e) {
-            throw new MarcException("an error occured reading input", e);
+            } 
+        }
+        catch (IOException e)
+        {
+            throw new MarcException("an error occured reading input", e);            
         }
-        return record;
     }
 
     private DataField parseDataField(String tag, byte[] field)
@@ -286,6 +300,25 @@ private DataField parseDataField(String tag, byte[] field)
         }
         return dataField;
     }
+    
+    private int getFieldLength(DataInputStream bais) throws IOException 
+    {
+        bais.mark(9999);
+        int bytesRead = 0;
+        while (true) {
+            switch (bais.read()) {
+             case Constants.FT:
+                bais.reset();
+                return bytesRead;
+            case -1:
+                bais.reset();
+                throw new IOException("Field not terminated");
+            case Constants.US:
+            default:
+                bytesRead++;
+            }
+        }
+    }
 
     private int getSubfieldLength(ByteArrayInputStream bais) throws IOException {
         bais.mark(9999);
@@ -305,63 +338,89 @@ private int getSubfieldLength(ByteArrayInputStream bais) throws IOException {
         }
     }
 
-    private Leader parseLeader(byte[] leaderData) throws IOException {
+    private int parseRecordLength(byte[] leaderData) throws IOException {
         InputStreamReader isr = new InputStreamReader(new ByteArrayInputStream(
                 leaderData));
-        Leader ldr = factory.newLeader();
+        int length = -1;
         char[] tmp = new char[5];
         isr.read(tmp);
         try {
-            ldr.setRecordLength(Integer.parseInt(new String(tmp)));
+            length = Integer.parseInt(new String(tmp));
         } catch (NumberFormatException e) {
             throw new MarcException("unable to parse record length", e);
         }
+        return(length);
+    }
+    
+    private void parseLeader(Leader ldr, byte[] leaderData) throws IOException {
+        InputStreamReader isr = new InputStreamReader(new ByteArrayInputStream(
+                leaderData));
+        char[] tmp = new char[5];
+        isr.read(tmp);
+        //  Skip over bytes for record length, If we get here, its already been computed.
         ldr.setRecordStatus((char) isr.read());
         ldr.setTypeOfRecord((char) isr.read());
         tmp = new char[2];
         isr.read(tmp);
         ldr.setImplDefined1(tmp);
         ldr.setCharCodingScheme((char) isr.read());
+        char indicatorCount = (char) isr.read();
+        char subfieldCodeLength = (char) isr.read();
+        char baseAddr[] = new char[5];
+        isr.read(baseAddr);
+        tmp = new char[3];
+        isr.read(tmp);
+        ldr.setImplDefined2(tmp);
+        tmp = new char[4];
+        isr.read(tmp);
+        ldr.setEntryMap(tmp);
+        isr.close();
         try {
-            ldr.setIndicatorCount(Integer.parseInt(String.valueOf((char) isr
-                    .read())));
+            ldr.setIndicatorCount(Integer.parseInt(String.valueOf(indicatorCount)));
         } catch (NumberFormatException e) {
             throw new MarcException("unable to parse indicator count", e);
         }
         try {
             ldr.setSubfieldCodeLength(Integer.parseInt(String
-                    .valueOf((char) isr.read())));
+                    .valueOf(subfieldCodeLength)));
         } catch (NumberFormatException e) {
             throw new MarcException("unable to parse subfield code length", e);
         }
-        tmp = new char[5];
-        isr.read(tmp);
         try {
-            ldr.setBaseAddressOfData(Integer.parseInt(new String(tmp)));
+            ldr.setBaseAddressOfData(Integer.parseInt(new String(baseAddr)));
         } catch (NumberFormatException e) {
             throw new MarcException("unable to parse base address of data", e);
         }
-        tmp = new char[3];
-        isr.read(tmp);
-        ldr.setImplDefined2(tmp);
-        tmp = new char[4];
-        isr.read(tmp);
-        ldr.setEntryMap(tmp);
-        isr.close();
-        return ldr;
+
     }
 
-    private String getDataAsString(byte[] bytes) {
+    private String getDataAsString(byte[] bytes) 
+    {
         String dataElement = null;
-        if (encoding != null)
+        if (encoding.equals("UTF-8") || encoding.equals("UTF8"))
+        {
+            try {
+                dataElement = new String(bytes, "UTF8");
+            } 
+            catch (UnsupportedEncodingException e) {
+                throw new MarcException("unsupported encoding", e);
+            }
+        }
+        else if (encoding.equals("MARC-8") || encoding.equals("MARC8"))
+        {
+            if (converterAnsel == null) converterAnsel = new AnselToUnicode();
+            dataElement = converterAnsel.convert(bytes);
+        }
+        else if (encoding.equals("ISO-8859-1") || encoding.equals("ISO8859_1"))
+        {
             try {
-                dataElement = new String(bytes, encoding);
-            } catch (UnsupportedEncodingException e) {
+                dataElement = new String(bytes, "ISO-8859-1");
+            } 
+            catch (UnsupportedEncodingException e) {
                 throw new MarcException("unsupported encoding", e);
             }
-        else
-            dataElement = new String(bytes);
+        }
         return dataElement;
     }
-
+    
 }
\ No newline at end of file
diff --git a/src/org/marc4j/MarcXmlParserThread.java b/src/org/marc4j/MarcXmlParserThread.java
index fc7c4ced..5c98b5f4 100644
--- a/src/org/marc4j/MarcXmlParserThread.java
+++ b/src/org/marc4j/MarcXmlParserThread.java
@@ -1,4 +1,4 @@
-// $Id: MarcXmlParserThread.java,v 1.2 2006/05/20 09:25:46 bpeters Exp $
+// $Id: MarcXmlParserThread.java,v 1.3 2008/09/26 21:17:42 haschart Exp $
 /**
  * Copyright (C) 2004 Bas Peters
  * 
@@ -30,7 +30,7 @@
  * MARCXML data.
  * 
  * @author Bas Peters
- * @version $Revision: 1.2 $
+ * @version $Revision: 1.3 $
  */
 public class MarcXmlParserThread extends Thread {
 
@@ -115,7 +115,12 @@ public void run() {
                 parser.parse(input);
             else
                 parser.parse(input, th);
-        } finally {
+        } 
+        catch (MarcException me)
+        {
+            queue.passException(me);
+        }
+        finally {
             queue.end();
         }
     }
diff --git a/src/org/marc4j/RecordStack.java b/src/org/marc4j/RecordStack.java
index a0a72699..97ff7773 100644
--- a/src/org/marc4j/RecordStack.java
+++ b/src/org/marc4j/RecordStack.java
@@ -1,4 +1,4 @@
-// $Id: RecordStack.java,v 1.1 2005/05/04 10:06:46 bpeters Exp $
+// $Id: RecordStack.java,v 1.2 2008/09/26 21:17:42 haschart Exp $
 /**
  * Copyright (C) 2004 Bas Peters
  *
@@ -31,12 +31,12 @@
  * <code>Record</code> objects created by <code>MarcXmlParser</code>.
  * 
  * @author Bas Peters
- * @version $Revision: 1.1 $
+ * @version $Revision: 1.2 $
  */
 public class RecordStack {
 
   private List list;
-
+  private RuntimeException re = null;
   private boolean eof = false;
 
   /**
@@ -77,6 +77,7 @@ public synchronized Record pop() {
       } catch (Exception e) {
       }
     }
+    if (re != null) throw(re);
     Record record = null;
     if (list.size() > 0)
       record = (Record) list.remove(0);
@@ -98,12 +99,23 @@ public synchronized boolean hasNext() {
       } catch (Exception e) {
       }
     }
-
+    if (re != null) throw(re);
     if (!isEmpty() || !eof)
       return true;
     return false;
   }
 
+  /**
+   * Passes the exception to the thread where the MarcXMLReader is running, so that the  next() call
+   * that is blocked waiting for this thread, will receive the exception.
+   *  
+   */
+  public synchronized void passException(RuntimeException e) {
+    re = e;
+    eof = true;
+    notifyAll();
+  }
+
   /**
    * Called when the end of the document is reached.
    *  
diff --git a/src/org/marc4j/converter/CharConverter.java b/src/org/marc4j/converter/CharConverter.java
index e04b94cb..74d4e2f7 100644
--- a/src/org/marc4j/converter/CharConverter.java
+++ b/src/org/marc4j/converter/CharConverter.java
@@ -1,4 +1,4 @@
-//$Id: CharConverter.java,v 1.1 2005/05/04 10:06:46 bpeters Exp $
+//$Id: CharConverter.java,v 1.2 2008/09/26 21:18:16 haschart Exp $
 /**
  * Copyright (C) 2005 Bas Peters
  *
@@ -25,9 +25,9 @@
  * Implement this class to create a character converter.
  * 
  * @author Bas Peters
- * @version $Revision: 1.1 $
+ * @version $Revision: 1.2 $
  */
-public interface CharConverter {
+public abstract class CharConverter {
 
   /**
    * Converts the dataElement and returns the result as a <code>String</code>
@@ -36,6 +36,25 @@ public interface CharConverter {
    * @param dataElement the data to convert
    * @return String the conversion result
    */
-  public String convert(String dataElement);
+    public abstract String convert(char[] dataElement);
+    
+    public String convert(byte[] data) 
+    {
+        char cData[] = new char[data.length];
+        for (int i = 0; i < data.length; i++)
+        {
+            byte b = data[i];
+            cData[i] =  (char)(b >= 0 ? b : 256 + b);
+        }
+        return convert(cData);
+    }
+
+    public String convert(String dataElement) 
+    {
+        char[] data = null;
+        data = dataElement.toCharArray();
+        return (convert(data));
+    }
+    
 
 }
\ No newline at end of file
diff --git a/src/org/marc4j/converter/impl/AnselToUnicode.java b/src/org/marc4j/converter/impl/AnselToUnicode.java
index 6a0b3754..c54b29b3 100644
--- a/src/org/marc4j/converter/impl/AnselToUnicode.java
+++ b/src/org/marc4j/converter/impl/AnselToUnicode.java
@@ -1,4 +1,4 @@
-// $Id: AnselToUnicode.java,v 1.3 2005/12/14 17:11:30 bpeters Exp $
+// $Id: AnselToUnicode.java,v 1.4 2008/09/26 21:17:42 haschart Exp $
 /**
  * Copyright (C) 2002 Bas Peters (mail@bpeters.com)
  *
@@ -21,8 +21,11 @@
 package org.marc4j.converter.impl;
 
 import java.io.InputStream;
+import java.lang.reflect.Constructor;
 import java.util.Vector;
 
+import org.marc4j.ErrorHandler;
+import org.marc4j.MarcException;
 import org.marc4j.converter.CharConverter;
 
 /**
@@ -37,9 +40,9 @@
  * 
  * @author Bas Peters
  * @author Corey Keith
- * @version $Revision: 1.3 $
+ * @version $Revision: 1.4 $
  */
-public class AnselToUnicode implements CharConverter {
+public class AnselToUnicode extends CharConverter {
 
     class Queue extends Vector {
 
@@ -101,20 +104,80 @@ public String toString() {
         }
     }
 
-    protected CodeTable ct;
+    protected CodeTableInterface ct;
 
     protected boolean loadedMultibyte = false;
 
+    protected ErrorHandler errorList = null;
     /**
      * Creates a new instance and loads the MARC4J supplied
      * conversion tables based on the official LC tables.
      *  
      */
-    public AnselToUnicode() {
-        this(AnselToUnicode.class
-                .getResourceAsStream("resources/codetablesnocjk.xml"));
+    public AnselToUnicode() 
+    {
+        ct = loadGeneratedTable(false);
     }
+    
+    /**
+     * Creates a new instance and loads the MARC4J supplied
+     * conversion tables based on the official LC tables.
+     *  
+     */
+    public AnselToUnicode(boolean loadMultibyte) 
+    {
+        ct = loadGeneratedTable(loadMultibyte);
+    }
+    /**
+     * Creates a new instance and loads the MARC4J supplied
+     * conversion tables based on the official LC tables.
+     *  
+     */
+    public AnselToUnicode(ErrorHandler errorList) 
+    {
+        ct = loadGeneratedTable(false);
+        this.errorList = errorList;
+    }
+
+    /**
+     * Creates a new instance and loads the MARC4J supplied
+     * conversion tables based on the official LC tables.
+     *  
+     */
+    public AnselToUnicode(ErrorHandler errorList, boolean loadMultibyte) 
+    {
+        ct = loadGeneratedTable(loadMultibyte);
+        this.errorList = errorList;
+    }
+
 
+    private CodeTableInterface loadGeneratedTable(boolean loadMultibyte) 
+    {
+        try
+        {
+            Class generated = Class.forName("org.marc4j.converter.impl.CodeTableGenerated");
+            Constructor cons = generated.getConstructor();
+            Object ct = cons.newInstance();
+            loadedMultibyte = true;
+            return((CodeTableInterface)ct);
+        }
+        catch (Exception e)
+        {
+            CodeTableInterface ct;
+            if (loadMultibyte)
+            {
+                ct = new CodeTable(AnselToUnicode.class.getResourceAsStream("resources/codetables.xml"));                
+            }
+            else
+            {
+                ct = new CodeTable(AnselToUnicode.class.getResourceAsStream("resources/codetablesnocjk.xml"));
+            }
+            loadedMultibyte = loadMultibyte;
+            return(ct);
+         }
+
+    }
+    
     /**
      * Constructs an instance with the specified pathname.
      * 
@@ -144,7 +207,7 @@ public AnselToUnicode(InputStream in) {
     }
 
     /**
-     * Loads the entire maping (including multibyte characters) from the Library
+     * Loads the entire mapping (including multibyte characters) from the Library
      * of Congress.
      */
     private void loadMultibyte() {
@@ -153,58 +216,146 @@ private void loadMultibyte() {
     }
 
     private void checkMode(char[] data, CodeTracker cdt) {
-        while (cdt.offset < data.length && isEscape(data[cdt.offset])) {
-            switch (data[cdt.offset + 1]) {
-            case 0x28:
-            case 0x2c:
-                cdt.g0 = data[cdt.offset + 2];
-                cdt.offset += 3;
-                cdt.multibyte = false;
+        int extra = 0;
+        int extra2 = 0;
+        int extra3 = 0;
+        while (cdt.offset + extra + extra2< data.length && isEscape(data[cdt.offset])) {
+            switch (data[cdt.offset + 1 + extra]) {
+            case 0x28:  // '('
+            case 0x2c:  // ','
+                set_cdt(cdt, 0, data, 2 + extra, false); 
                 break;
-            case 0x29:
-            case 0x2d:
-                cdt.g1 = data[cdt.offset + 2];
-                cdt.offset += 3;
-                cdt.multibyte = false;
+            case 0x29:  // ')'
+            case 0x2d:  // '-'
+                set_cdt(cdt, 1, data, 2 + extra, false); 
                 break;
-            case 0x24:
-                cdt.multibyte = true;
+            case 0x24:  // '$'
                 if (!loadedMultibyte) {
                     loadMultibyte();
                     loadedMultibyte = true;
                 }
-                switch (data[cdt.offset + 1]) {
-                case 0x29:
-                case 0x2d:
-                    cdt.g1 = data[cdt.offset + 3];
-                    cdt.offset += 4;
+                switch (data[cdt.offset + 2 + extra + extra2]) {
+                case 0x29:  // ')'
+                case 0x2d:  // '-'
+                    set_cdt(cdt, 1, data, 3 + extra + extra2, true); 
+                    break;
+                case 0x2c:  // ','
+                    set_cdt(cdt, 0, data, 3 + extra + extra2, true); 
                     break;
-                case 0x2c:
-                    cdt.g0 = data[cdt.offset + 3];
-                    cdt.offset += 4;
+                case 0x31:  // '1'
+                    cdt.g0 = data[cdt.offset + 2 + extra + extra2];
+                    cdt.offset += 3 + extra + extra2;
+                    cdt.multibyte = true;
                     break;
-                default:
-                    cdt.g0 = data[cdt.offset + 2];
-                    cdt.offset += 3;
+                case 0x20:  // ' ' 
+                    // space found in escape code: look ahead and try to proceed
+                    extra2++;
+                    break;
+                default: 
+                    // unknown code character found: discard escape sequence and return
+                    cdt.offset += 1;
+                    if (errorList != null)
+                    {
+                        errorList.addError(ErrorHandler.MINOR_ERROR, "Unknown character set code found following escape character. Discarding escape character.");
+                    }
+                    else
+                    {
+                        throw new MarcException("Unknown character set code found following escape character.");
+                    }
                     break;
                 }
                 break;
-            case 0x67:
-            case 0x62:
-            case 0x70:
-                cdt.g0 = data[cdt.offset + 1];
-                cdt.offset += 2;
+            case 0x67:  // 'g'
+            case 0x62:  // 'b'
+            case 0x70:  // 'p'
+                cdt.g0 = data[cdt.offset + 1 + extra];
+                cdt.offset += 2 + extra;
                 cdt.multibyte = false;
                 break;
-            case 0x73:
+            case 0x73:  // 's'
                 cdt.g0 = 0x42;
-                cdt.offset += 2;
+                cdt.offset += 2 + extra;
                 cdt.multibyte = false;
                 break;
+            case 0x20:  // ' ' 
+                // space found in escape code: look ahead and try to proceed
+                if (errorList == null)
+                {
+                    throw new MarcException("Extraneous space character found within MARC8 character set escape sequence");
+                }
+                extra++;
+                break;
+            default: 
+                // unknown code character found: discard escape sequence and return
+                cdt.offset += 1;
+                if (errorList != null)
+                {
+                    errorList.addError(ErrorHandler.MINOR_ERROR, "Unknown character set code found following escape character. Discarding escape character.");
+                }
+                else
+                {
+                    throw new MarcException("Unknown character set code found following escape character.");
+                }
+                break;
             }
         }
+        if (errorList != null && ( extra != 0 || extra2 != 0))
+        {
+            errorList.addError(ErrorHandler.MINOR_ERROR, "" + (extra+extra2) + " extraneous space characters found within MARC8 character set escape sequence");
+        }
     }
 
+    private void set_cdt(CodeTracker cdt, int g0_or_g1, char[] data, int addnlOffset, boolean multibyte)
+    {
+        if (data[cdt.offset + addnlOffset] == '!' && data[cdt.offset + addnlOffset + 1] == 'E') 
+        {
+            addnlOffset++;
+        }
+        else if (data[cdt.offset + addnlOffset] == ' ') 
+        {
+            if (errorList != null)
+            {
+                errorList.addError(ErrorHandler.MINOR_ERROR, "Extraneous space character found within MARC8 character set escape sequence. Skipping over space.");
+            }           
+            else
+            {
+                throw new MarcException("Extraneous space character found within MARC8 character set escape sequence");
+            }
+            addnlOffset++;
+        }
+        else if ("(,)-$!".indexOf(data[cdt.offset + addnlOffset]) != -1) 
+        {
+            if (errorList != null)
+            {
+                errorList.addError(ErrorHandler.MINOR_ERROR, "Extraneaous intermediate character found following escape character. Discarding intermediate character.");
+            }           
+            else
+            {
+                throw new MarcException("Extraneaous intermediate character found following escape character.");
+            }
+            addnlOffset++;
+        }
+        if ("34BE1NQS2".indexOf(data[cdt.offset + addnlOffset]) == -1)
+        {
+            cdt.offset += 1;
+            cdt.multibyte = false;
+            if (errorList != null)
+            {
+                errorList.addError(ErrorHandler.MINOR_ERROR, "Unknown character set code found following escape character. Discarding escape character.");
+            }           
+            else
+            {
+                throw new MarcException("Unknown character set code found following escape character.");
+            }
+        }
+        else  // All is well, proceed normally
+        {
+            if (g0_or_g1 == 0) cdt.g0 = data[cdt.offset + addnlOffset];
+            else               cdt.g1 = data[cdt.offset + addnlOffset];
+            cdt.offset += 1 + addnlOffset;
+            cdt.multibyte = multibyte;
+        }
+    }
     /**
      * <p>
      * Converts MARC-8 data to UCS/Unicode.
@@ -214,9 +365,8 @@ private void checkMode(char[] data, CodeTracker cdt) {
      *            the MARC-8 data
      * @return String - the UCS/Unicode data
      */
-    public String convert(String dataElement) {
-        char[] data = null;
-        data = dataElement.toCharArray();
+    public String convert(char  data[]) 
+    {
         StringBuffer sb = new StringBuffer();
         int len = data.length;
 
@@ -232,14 +382,17 @@ public String convert(String dataElement) {
 
         Queue diacritics = new Queue();
 
-        while (cdt.offset < data.length) {
+        while (cdt.offset < data.length) 
+        {
             if (ct.isCombining(data[cdt.offset], cdt.g0, cdt.g1)
-                    && hasNext(cdt.offset, len)) {
+                    && hasNext(cdt.offset, len)) 
+            {
 
                 while (ct.isCombining(data[cdt.offset], cdt.g0, cdt.g1)
-                        && hasNext(cdt.offset, len)) {
-                    diacritics.put(new Character(getChar(data[cdt.offset],
-                            cdt.g0, cdt.g1)));
+                        && hasNext(cdt.offset, len)) 
+                {
+                    char c = getChar(data[cdt.offset], cdt.g0, cdt.g1);
+                    if (c != 0) diacritics.put(new Character(c));
                     cdt.offset++;
                     checkMode(data, cdt);
                 }
@@ -247,23 +400,122 @@ && hasNext(cdt.offset, len)) {
                 char c2 = getChar(data[cdt.offset], cdt.g0, cdt.g1);
                 cdt.offset++;
                 checkMode(data, cdt);
-                sb.append(c2);
+                if (c2 != 0) sb.append(c2);
 
-                while (!diacritics.isEmpty()) {
+                while (!diacritics.isEmpty()) 
+                {
                     char c1 = ((Character) diacritics.get()).charValue();
                     sb.append(c1);
                 }
 
-            } else if (cdt.multibyte) {
-                sb.append(ct.getChar(makeMultibyte(new String(data).substring(
-                        cdt.offset, cdt.offset + 4).toCharArray()), cdt.g0));
-                cdt.offset += 3;
-            } else {
-                sb.append(getChar(data[cdt.offset], cdt.g0, cdt.g1));
+            } 
+            else if (cdt.multibyte)
+            {
+                if (data[cdt.offset]== 0x20)
+                {
+                    // if a 0x20 byte occurs amidst a sequence of multibyte characters
+                    // skip over it and output a space.
+                    // Hmmm.  If the following line is present it seems to output two spaces 
+                    // when a space occurs in multibytes chars, without it one seems to be output.
+                    //    sb.append(getChar(data[cdt.offset], cdt.g0, cdt.g1));
+                    cdt.offset += 1;
+                }
+                else if (cdt.offset + 3 <= data.length && (errorList == null || data[cdt.offset+1]!= 0x20 && data[cdt.offset+2]!= 0x20)) 
+                {
+                    char c = getMBChar(makeMultibyte(data[cdt.offset], data[cdt.offset+1], data[cdt.offset+2]));
+                    if (errorList == null  || c != 0)
+                    { 
+                        sb.append(c);
+                        cdt.offset += 3;
+                    }
+                    else if (cdt.offset + 6 <= data.length && data[cdt.offset+4]!= 0x20 && data[cdt.offset+5]!= 0x20 &&
+                            getMBChar(makeMultibyte(data[cdt.offset+3], data[cdt.offset+4], data[cdt.offset+5])) != 0)
+                    {
+                        if (errorList != null)
+                        {
+                            errorList.addError(ErrorHandler.MAJOR_ERROR, "Erroneous MARC8 multibyte character, Discarding bad character and continuing reading Multibyte characters");
+                            sb.append("[?]");
+                            cdt.offset += 3;
+                        }
+                    }
+                    else if (cdt.offset + 4 <= data.length && data[cdt.offset] > 0x7f && 
+                            getMBChar(makeMultibyte(data[cdt.offset+1], data[cdt.offset+2], data[cdt.offset+3])) != 0)
+                    {
+                        if (errorList != null)
+                        {
+                            errorList.addError(ErrorHandler.MAJOR_ERROR, "Erroneous character in MARC8 multibyte character, Copying bad character and continuing reading Multibyte characters");
+                            sb.append(getChar(data[cdt.offset], 0x42, 0x45));
+                            cdt.offset += 1;
+                        }
+                    }
+                    else
+                    {
+                        if (errorList != null)
+                        {
+                            errorList.addError(ErrorHandler.MAJOR_ERROR, "Erroneous MARC8 multibyte character, inserting change to default character set");
+                        }
+                        cdt.multibyte = false;
+                        cdt.g0 = 0x42;
+                        cdt.g1 = 0x45;
+                    }
+                } 
+                else if (errorList != null && cdt.offset + 4 <= data.length && ( data[cdt.offset+1] == 0x20 || data[cdt.offset+2]== 0x20)) 
+                {
+                    int multiByte = makeMultibyte( data[cdt.offset], ((data[cdt.offset+1] != 0x20)? data[cdt.offset+1] : data[cdt.offset+2]),  data[cdt.offset+3]);
+                    char c = getMBChar(multiByte);
+                    if (c != 0) 
+                    {
+                        if (errorList != null)
+                        {
+                            errorList.addError(ErrorHandler.ERROR_TYPO, "Extraneous space found within MARC8 multibyte character");
+                        }
+                        sb.append(c);
+                        sb.append(' ');
+                        cdt.offset += 4;
+                    }
+                    else
+                    {
+                        if (errorList != null)
+                        {
+                            errorList.addError(ErrorHandler.MAJOR_ERROR, "Erroneous MARC8 multibyte character, inserting change to default character set");
+                        }
+                        cdt.multibyte = false;
+                        cdt.g0 = 0x42;
+                        cdt.g1 = 0x45;
+                    }
+                } 
+                else if (cdt.offset + 3 > data.length) 
+                {
+                    if (errorList != null)
+                    {
+                        errorList.addError(ErrorHandler.MAJOR_ERROR, "Partial MARC8 multibyte character, inserting change to default character set");
+                        cdt.multibyte = false;
+                        cdt.g0 = 0x42;
+                        cdt.g1 = 0x45;
+                    }
+                    // if a field ends with an incomplete encoding of a multibyte character
+                    // simply discard that final partial character.
+                    else 
+                    {
+                        cdt.offset += 3;
+                    }
+                } 
+            }
+            else 
+            {
+                char c = getChar(data[cdt.offset], cdt.g0, cdt.g1);
+                if (c != 0) sb.append(c);
+                else 
+                {
+                    String val = "0000"+Integer.toHexString((int)(data[cdt.offset]));
+                    sb.append("<U+"+ (val.substring(val.length()-4, val.length()))+ ">" );
+                }
                 cdt.offset += 1;
             }
             if (hasNext(cdt.offset, len))
+            {
                 checkMode(data, cdt);
+            }
         }
         return sb.toString();
     }
@@ -275,6 +527,15 @@ private int makeMultibyte(char[] data) {
         chars[2] = data[2];
         return chars[0] | chars[1] | chars[2];
     }
+    
+    public int makeMultibyte(char c1, char c2, char c3) 
+    {
+        int[] chars = new int[3];
+        chars[0] = c1 << 16;
+        chars[1] = c2 << 8;
+        chars[2] = c3;
+        return chars[0] | chars[1] | chars[2];
+    }
 
     private char getChar(int ch, int g0, int g1) {
         if (ch <= 0x7E)
@@ -283,7 +544,7 @@ private char getChar(int ch, int g0, int g1) {
             return ct.getChar(ch, g1);
     }
 
-    private char getMBChar(int ch) {
+    public char getMBChar(int ch) {
         return ct.getChar(ch, 0x31);
     }
 
diff --git a/src/org/marc4j/converter/impl/CodeTable.java b/src/org/marc4j/converter/impl/CodeTable.java
index 586efd30..ed125ec4 100644
--- a/src/org/marc4j/converter/impl/CodeTable.java
+++ b/src/org/marc4j/converter/impl/CodeTable.java
@@ -1,4 +1,4 @@
-// $Id: CodeTable.java,v 1.2 2005/12/14 17:11:30 bpeters Exp $
+// $Id: CodeTable.java,v 1.3 2008/09/26 21:17:42 haschart Exp $
 /**
  * Copyright (C) 2002 Bas Peters
  *
@@ -23,8 +23,11 @@
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.InputStream;
+import java.io.PrintStream;
 import java.net.URI;
-import java.util.Hashtable;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
 import java.util.Vector;
 
 import javax.xml.parsers.SAXParser;
@@ -41,21 +44,21 @@
  * </p>
  * 
  * @author Corey Keith
- * @version $Revision: 1.2 $
+ * @version $Revision: 1.3 $
  *  
  */
-public class CodeTable {
-    protected static Hashtable charsets = null;
+public class CodeTable implements CodeTableInterface {
+    protected static HashMap charsets = null;
 
-    protected static Hashtable combining = null;
+    protected static HashMap combining = null;
 
     public boolean isCombining(int i, int g0, int g1) {
         if (i <= 0x7E) {
             Vector v = (Vector) combining.get(new Integer(g0));
-            return v.contains(new Integer(i));
+            return (v != null && v.contains(new Integer(i)));
         } else {
             Vector v = (Vector) combining.get(new Integer(g1));
-            return v.contains(new Integer(i));
+            return (v != null && v.contains(new Integer(i)));
         }
     }
 
@@ -63,7 +66,7 @@ public char getChar(int c, int mode) {
         if (c == 0x20)
             return (char) c;
         else {
-            Hashtable charset = (Hashtable) charsets.get(new Integer(mode));
+            HashMap charset = (HashMap) charsets.get(new Integer(mode));
 
             if (charset == null) {
 //                System.err.println("Hashtable not found: "
@@ -72,18 +75,13 @@ public char getChar(int c, int mode) {
             } else {
                 Character ch = (Character) charset.get(new Integer(c));
                 if (ch == null) {
-
-                    int newc;
-                    if (c < 0x80)
-                        newc = c + 0x80;
-                    else
-                        newc = c - 0x80;
+                    int newc = (c < 0x80) ? c + 0x80 : c - 0x80;
                     ch = (Character) charset.get(new Integer(newc));
                     if (ch == null) {
 //                        System.err.println("Character not found: "
 //                                + Integer.toHexString(c) + " in Code Table: "
 //                                + Integer.toHexString(mode));
-                        return (char) c;
+                        return (char) 0;
                     } else
                         return ch.charValue();
                 } else
@@ -92,6 +90,144 @@ public char getChar(int c, int mode) {
         }
     }
 
+    public void dumpTableAsSwitchStatement(PrintStream output)
+    {
+        output.println("package org.marc4j.converter.impl;");
+        output.println("");
+        output.println("// Warning: This file is generated by running the main routine in the file CodeTable.java ");
+        output.println("// Warning: Do not edit this file, or all edits will be lost at the next build. ");
+        output.println("public class CodeTableGenerated implements CodeTableInterface {");
+        output.println("\tpublic boolean isCombining(int i, int g0, int g1) {");
+        output.println("\t\tswitch (i <= 0x7E ? g0 : g1) {");
+        Object combiningKeys[] = combining.keySet().toArray();
+        Arrays.sort(combiningKeys);
+        for (int combiningSel = 0; combiningSel < combiningKeys.length; combiningSel++)
+        {
+            Integer nextKey = (Integer)combiningKeys[combiningSel];
+            output.println("\t\t\tcase 0x"+Integer.toHexString(nextKey)+":");
+            Vector v = (Vector) combining.get(nextKey);
+            Iterator vIter = v.iterator();
+            if (vIter.hasNext())
+            {
+                output.println("\t\t\t\tswitch(i) {");
+                while (vIter.hasNext())
+                {
+                    Integer vVal = (Integer)vIter.next();
+                    output.println("\t\t\t\t\tcase 0x"+Integer.toHexString(vVal)+":");
+                }
+                output.println("\t\t\t\t\t\treturn(true);");
+                output.println("\t\t\t\t\tdefault:");
+                output.println("\t\t\t\t\t\treturn(false);");
+                output.println("\t\t\t\t}");
+            }
+            else
+            {
+                output.println("\t\t\t\treturn(false);");
+            }
+        }
+        output.println("\t\t\tdefault:");
+        output.println("\t\t\t\treturn(false);");
+        output.println("\t\t\t}");
+        output.println("\t}");
+        output.println("");
+        output.println("\tpublic char getChar(int c, int mode) {");
+        output.println("\t\tint code = getCharCode(c, mode);");
+        output.println("\t\tif (code == -1) return((char)0);");
+        output.println("\t\tif (code != 0) return((char)code);");
+        output.println("\t\tcode = getCharCode(c < 0x80 ? c + 0x80 : c - 0x80 , mode);");
+        output.println("\t\treturn((char)code);");
+        output.println("\t}");
+        output.println("");
+        output.println("\tprivate int getCharCode(int c, int mode) {");
+        output.println("\t\tif (c == 0x20) return  c;");
+        output.println("\t\tswitch (mode) {");
+        Object charsetsKeys[] = charsets.keySet().toArray();
+        Arrays.sort(charsetsKeys);
+        for (int charsetSel = 0; charsetSel < charsetsKeys.length; charsetSel++)
+        {
+            Integer nextKey = (Integer)charsetsKeys[charsetSel];
+            output.println("\t\t\tcase 0x"+Integer.toHexString(nextKey)+":");
+            if (nextKey.intValue() == 0x31)
+            {
+                output.println("\t\t\t\treturn(getMultiByteChar(c));");
+            }
+            else
+            {
+                HashMap map = (HashMap) charsets.get(nextKey);
+                Object keyArray[] = map.keySet().toArray();
+                Arrays.sort(keyArray);
+                output.println("\t\t\t\tswitch(c) {");
+                for (int sel = 0; sel < keyArray.length; sel++)
+                {
+                    Integer mKey = (Integer)keyArray[sel];
+                    Character c = (Character)map.get(mKey);
+                    if (c != null)
+                        output.println("\t\t\t\t\tcase 0x"+Integer.toHexString(mKey)+":  return(0x"+Integer.toHexString((int)c.charValue())+"); ");
+                    else
+                        output.println("\t\t\t\t\tcase 0x"+Integer.toHexString(mKey)+":  return(0); ");
+                }
+                output.println("\t\t\t\t\tdefault:  return(0);");
+                output.println("\t\t\t\t}");
+            }
+        }
+        output.println("\t\t\tdefault: return(-1);  // unknown charset specified ");
+        output.println("\t\t}");
+        output.println("\t}");
+        output.println("");
+        StringBuffer getMultiByteFunc = new StringBuffer();
+        getMultiByteFunc.append("\tpublic int getMultiByteChar(int c) {\n");
+                
+        HashMap map = (HashMap) charsets.get(new Integer(0x31));
+        Object keyArray[] = map.keySet().toArray();
+        Arrays.sort(keyArray);
+        
+        dumpPartialMultiByteTable(output, getMultiByteFunc, keyArray, map, 0x210000, 0x214fff);
+        dumpPartialMultiByteTable(output, getMultiByteFunc, keyArray, map, 0x215000, 0x21ffff);
+//        dumpPartialMultiByteTable(output, getMultiByteFunc, keyArray, map, 0x210000, 0x21ffff);
+        dumpPartialMultiByteTable(output, getMultiByteFunc, keyArray, map, 0x220000, 0x22ffff);
+        dumpPartialMultiByteTable(output, getMultiByteFunc, keyArray, map, 0x230000, 0x27ffff);
+        dumpPartialMultiByteTable(output, getMultiByteFunc, keyArray, map, 0x280000, 0x7f7fff);
+        
+        getMultiByteFunc.append("\t\treturn(0);\n");
+        getMultiByteFunc.append("\t}");
+        output.println(getMultiByteFunc.toString());
+        
+        output.println("}");
+
+    }
+
+    public void dumpPartialMultiByteTable(PrintStream output, StringBuffer buffer, Object keyArray[], HashMap map, int startByte, int endByte)
+    {
+        String startByteStr = "0x"+Integer.toHexString(startByte);
+        String endByteStr = "0x"+Integer.toHexString(endByte);
+        buffer.append("\t\tif (c >= "+startByteStr+" && c <= "+endByteStr+")  return (getMultiByteChar_"+startByteStr+"_"+endByteStr+"(c));\n");
+        
+        output.println("\tpublic char getMultiByteChar_"+startByteStr+"_"+endByteStr+"(int c) {");
+        output.println("\t\tswitch(c) {");
+        for (int sel = 0; sel < keyArray.length; sel++)
+        {
+            Integer mKey = (Integer)keyArray[sel];
+            Character c = (Character)map.get(mKey);
+            if (mKey >= startByte && mKey <= endByte)
+            {
+                if (c != null)
+                    output.println("\t\t\tcase 0x"+Integer.toHexString(mKey)+":  return((char)0x"+Integer.toHexString((int)c.charValue())+"); ");
+                else
+                    output.println("\t\t\tcase 0x"+Integer.toHexString(mKey)+":  return((char)0); ");
+            }
+        }
+        output.println("\t\t\tdefault: return((char)0);");
+        output.println("\t\t}");
+        output.println("\t}");  
+        output.println("");  
+    }
+
+    public static void main(String args[])
+    {
+        CodeTable ct = new CodeTable(CodeTable.class.getResourceAsStream("resources/codetables.xml"));
+        ct.dumpTableAsSwitchStatement(System.out);
+    }
+    
     public CodeTable(InputStream byteStream) {
         try {
 
diff --git a/src/org/marc4j/converter/impl/CodeTableHandler.java b/src/org/marc4j/converter/impl/CodeTableHandler.java
index 3ea843dc..9a9a4321 100644
--- a/src/org/marc4j/converter/impl/CodeTableHandler.java
+++ b/src/org/marc4j/converter/impl/CodeTableHandler.java
@@ -1,4 +1,4 @@
-// $Id: CodeTableHandler.java,v 1.1 2005/05/04 10:06:46 bpeters Exp $
+// $Id: CodeTableHandler.java,v 1.2 2008/09/26 21:17:42 haschart Exp $
 /**
  * Copyright (C) 2002 Bas Peters
  *
@@ -22,7 +22,7 @@
 
 import java.io.File;
 import java.io.FileInputStream;
-import java.util.Hashtable;
+import java.util.HashMap;
 import java.util.Vector;
 
 import javax.xml.parsers.SAXParser;
@@ -41,17 +41,17 @@
  * builds a data structure to facilitate AnselToUnicode character conversion.
  * 
  * @author Corey Keith
- * @version $Revision: 1.1 $
+ * @version $Revision: 1.2 $
  * 
  * @see DefaultHandler
  */
 public class CodeTableHandler extends DefaultHandler {
 
-  private Hashtable sets;
+  private HashMap sets;
 
-  private Hashtable charset;
+  private HashMap charset;
 
-  private Hashtable combiningchars;
+  private HashMap combiningchars;
 
   /** Data element identifier */
   private Integer isocode;
@@ -75,11 +75,11 @@ public class CodeTableHandler extends DefaultHandler {
   /** Locator object */
   private Locator locator;
 
-  public Hashtable getCharSets() {
+  public HashMap getCharSets() {
     return sets;
   }
 
-  public Hashtable getCombiningChars() {
+  public HashMap getCombiningChars() {
     return combiningchars;
   }
 
@@ -98,14 +98,14 @@ public void setDocumentLocator(Locator locator) {
   public void startElement(String uri, String name, String qName,
       Attributes atts) throws SAXParseException {
     if (name.equals("characterSet")) {
-      charset = new Hashtable();
+      charset = new HashMap();
       isocode = Integer.valueOf(atts.getValue("ISOcode"), 16);
       combining = new Vector();
     } else if (name.equals("marc"))
       data = new StringBuffer();
     else if (name.equals("codeTables")) {
-      sets = new Hashtable();
-      combiningchars = new Hashtable();
+      sets = new HashMap();
+      combiningchars = new HashMap();
     } else if (name.equals("ucs"))
       data = new StringBuffer();
     else if (name.equals("alt"))
@@ -135,7 +135,7 @@ public void endElement(String uri, String name, String qName)
       if (data.length() > 0)
         ucs = new Character((char) Integer.parseInt(data.toString(), 16));
       else
-        useAlt = true;
+        ucs = null;
     } else if (name.equals("alt")) {
       if (useAlt && data.length() > 0) {
         ucs = new Character((char) Integer.parseInt(data.toString(), 16));
@@ -155,7 +155,7 @@ public void endElement(String uri, String name, String qName)
   }
 
   public static void main(String[] args) {
-    Hashtable charsets = null;
+    HashMap charsets = null;
 
     try {
 
diff --git a/src/org/marc4j/converter/impl/CodeTableInterface.java b/src/org/marc4j/converter/impl/CodeTableInterface.java
new file mode 100644
index 00000000..b8b31bed
--- /dev/null
+++ b/src/org/marc4j/converter/impl/CodeTableInterface.java
@@ -0,0 +1,7 @@
+package org.marc4j.converter.impl;
+
+public interface CodeTableInterface
+{
+    public boolean isCombining(int i, int g0, int g1);
+    public char getChar(int c, int mode);
+};
diff --git a/src/org/marc4j/converter/impl/Iso5426ToUnicode.java b/src/org/marc4j/converter/impl/Iso5426ToUnicode.java
index 371735a3..91974521 100644
--- a/src/org/marc4j/converter/impl/Iso5426ToUnicode.java
+++ b/src/org/marc4j/converter/impl/Iso5426ToUnicode.java
@@ -1,4 +1,4 @@
-// $Id: Iso5426ToUnicode.java,v 1.1 2005/05/04 10:06:46 bpeters Exp $
+// $Id: Iso5426ToUnicode.java,v 1.2 2008/09/26 21:17:42 haschart Exp $
 /**
  * Copyright (C) 2002 Bas  Peters  (mail@bpeters.com)
  * Copyright (C) 2002 Yves Pratter (ypratter@club-internet.fr)
@@ -30,9 +30,9 @@
  * 
  * @author Bas Peters
  * @author Yves Pratter
- * @version $Revision: 1.1 $
+ * @version $Revision: 1.2 $
  */
-public class Iso5426ToUnicode implements CharConverter {
+public class Iso5426ToUnicode extends CharConverter {
 
   /**
    * <p>
@@ -43,8 +43,7 @@ public class Iso5426ToUnicode implements CharConverter {
    *          the UNIMARC data
    * @return {@link String}- the UCS/Unicode data
    */
-  public String convert(String dataElement) {
-    char[] data = dataElement.toCharArray();
+  public String convert(char data[]) {
     StringBuffer sb = new StringBuffer();
 
     for (int i = 0; i < data.length; i++) {
diff --git a/src/org/marc4j/converter/impl/Iso6937ToUnicode.java b/src/org/marc4j/converter/impl/Iso6937ToUnicode.java
index 35e938b0..21c24ccd 100644
--- a/src/org/marc4j/converter/impl/Iso6937ToUnicode.java
+++ b/src/org/marc4j/converter/impl/Iso6937ToUnicode.java
@@ -1,4 +1,4 @@
-// $Id: Iso6937ToUnicode.java,v 1.1 2005/05/04 10:06:46 bpeters Exp $
+// $Id: Iso6937ToUnicode.java,v 1.2 2008/09/26 21:17:42 haschart Exp $
 /**
  * Copyright (C) 2002 Bas  Peters  (mail@bpeters.com)
  * Copyright (C) 2002 Yves Pratter (ypratter@club-internet.fr)
@@ -30,9 +30,9 @@
  * 
  * @author Bas Peters
  * @author Yves Pratter
- * @version $Revision: 1.1 $
+ * @version $Revision: 1.2 $
  */
-public class Iso6937ToUnicode implements CharConverter {
+public class Iso6937ToUnicode extends CharConverter {
 
   /**
    * <p>
@@ -43,8 +43,7 @@ public class Iso6937ToUnicode implements CharConverter {
    *          the ISO 6937 data
    * @return {@link String}- the UCS/Unicode data
    */
-  public String convert(String dataElement) {
-    char[] data = dataElement.toCharArray();
+  public String convert(char data[]) {
     StringBuffer sb = new StringBuffer();
 
     for (int i = 0; i < data.length; i++) {
diff --git a/src/org/marc4j/converter/impl/UnicodeToAnsel.java b/src/org/marc4j/converter/impl/UnicodeToAnsel.java
index 5831bde4..774ca8ad 100644
--- a/src/org/marc4j/converter/impl/UnicodeToAnsel.java
+++ b/src/org/marc4j/converter/impl/UnicodeToAnsel.java
@@ -1,4 +1,4 @@
-// $Id: UnicodeToAnsel.java,v 1.2 2005/11/28 16:50:22 bpeters Exp $
+// $Id: UnicodeToAnsel.java,v 1.3 2008/09/26 21:17:42 haschart Exp $
 /**
  * Copyright (C) 2002 Bas Peters (mail@bpeters.com)
  *
@@ -36,9 +36,9 @@
  * 
  * @author Bas Peters
  * @author Corey Keith
- * @version $Revision: 1.2 $
+ * @version $Revision: 1.3 $
  */
-public class UnicodeToAnsel implements CharConverter {
+public class UnicodeToAnsel extends CharConverter {
     protected ReverseCodeTable rct;
 
     static final char ESC = 0x1b;
@@ -97,8 +97,7 @@ public UnicodeToAnsel(InputStream in) {
      *            the UCS/Unicode data
      * @return String - the MARC-8 data
      */
-    public String convert(String dataElement) {
-        char[] data = dataElement.toCharArray();
+    public String convert(char data[]) {
         StringBuffer sb = new StringBuffer();
         CodeTableTracker ctt = new CodeTableTracker();
 
diff --git a/src/org/marc4j/converter/impl/UnicodeToIso5426.java b/src/org/marc4j/converter/impl/UnicodeToIso5426.java
index e2bfa958..ee139579 100644
--- a/src/org/marc4j/converter/impl/UnicodeToIso5426.java
+++ b/src/org/marc4j/converter/impl/UnicodeToIso5426.java
@@ -1,4 +1,4 @@
-// $Id: UnicodeToIso5426.java,v 1.1 2005/05/04 10:06:46 bpeters Exp $
+// $Id: UnicodeToIso5426.java,v 1.2 2008/09/26 21:17:42 haschart Exp $
 /**
  * Copyright (C) 2002 Bas Peters (mail@bpeters.com)
  *
@@ -29,9 +29,9 @@
  * 
  * @author Bas Peters
  * @author Yves Pratter
- * @version $Revision: 1.1 $
+ * @version $Revision: 1.2 $
  */
-public class UnicodeToIso5426 implements CharConverter {
+public class UnicodeToIso5426 extends CharConverter {
 
   /**
    * <p>
@@ -46,8 +46,7 @@ public class UnicodeToIso5426 implements CharConverter {
    *          the UCS/Unicode data
    * @return {@link String}- the UNIMARC (ISO 5426 charset) data
    */
-  public String convert(String dataElement) {
-    char[] data = dataElement.toCharArray();
+  public String convert(char data[]) {
     StringBuffer sb = new StringBuffer();
     for (int i = 0; i < data.length; i++) {
       char c = data[i];
diff --git a/src/org/marc4j/converter/impl/UnicodeToIso6937.java b/src/org/marc4j/converter/impl/UnicodeToIso6937.java
index a6f66eff..d6c5d8f0 100644
--- a/src/org/marc4j/converter/impl/UnicodeToIso6937.java
+++ b/src/org/marc4j/converter/impl/UnicodeToIso6937.java
@@ -1,4 +1,4 @@
-// $Id: UnicodeToIso6937.java,v 1.1 2005/05/04 10:06:46 bpeters Exp $
+// $Id: UnicodeToIso6937.java,v 1.2 2008/09/26 21:17:42 haschart Exp $
 /**
  * Copyright (C) 2002 Bas Peters (mail@bpeters.com)
  *
@@ -29,9 +29,9 @@
  * 
  * @author Bas Peters
  * @author Yves Pratter
- * @version $Revision: 1.1 $
+ * @version $Revision: 1.2 $
  */
-public class UnicodeToIso6937 implements CharConverter {
+public class UnicodeToIso6937 extends CharConverter {
 
   /**
    * <p>
@@ -46,8 +46,7 @@ public class UnicodeToIso6937 implements CharConverter {
    *          the UCS/Unicode data
    * @return {@link String}- the ISO 6937 data
    */
-  public String convert(String dataElement) {
-    char[] data = dataElement.toCharArray();
+  public String convert(char data[]) {
     StringBuffer sb = new StringBuffer();
     for (int i = 0; i < data.length; i++) {
       char c = data[i];
diff --git a/src/org/marc4j/marc/impl/RecordImpl.java b/src/org/marc4j/marc/impl/RecordImpl.java
index d6f59cc8..d5a32c5b 100644
--- a/src/org/marc4j/marc/impl/RecordImpl.java
+++ b/src/org/marc4j/marc/impl/RecordImpl.java
@@ -1,4 +1,4 @@
-// $Id: RecordImpl.java,v 1.4 2006/08/04 12:29:01 bpeters Exp $
+// $Id: RecordImpl.java,v 1.5 2008/09/26 21:17:43 haschart Exp $
 /**
  * Copyright (C) 2004 Bas Peters
  *
@@ -35,7 +35,7 @@
  * Represents a MARC record.
  * 
  * @author Bas Peters
- * @version $Revision: 1.4 $
+ * @version $Revision: 1.5 $
  */
 public class RecordImpl implements Record {
 
@@ -179,7 +179,9 @@ public List getVariableFields() {
     }
 
     public String getControlNumber() {
-        return new String(getControlNumberField().getData());
+        ControlField f = getControlNumberField();
+        String result = (f == null || f.getData() == null) ? null : new String(f.getData());
+        return(result);
     }
 
     public List getVariableFields(String[] tags) {
diff --git a/src/org/marc4j/marc/impl/Verifier.java b/src/org/marc4j/marc/impl/Verifier.java
index d9b6970d..2f3a7bbf 100644
--- a/src/org/marc4j/marc/impl/Verifier.java
+++ b/src/org/marc4j/marc/impl/Verifier.java
@@ -1,4 +1,4 @@
-// $Id: Verifier.java,v 1.1 2005/05/04 10:06:47 bpeters Exp $
+// $Id: Verifier.java,v 1.2 2008/09/26 21:17:42 haschart Exp $
 /**
  * Copyright (C) 2004 Bas Peters
  *
@@ -29,7 +29,7 @@
  * Handles MARC checks on tags, data elements and <code>Record</code> objects.
  * 
  * @author Bas Peters
- * @version $Revision: 1.1 $
+ * @version $Revision: 1.2 $
  */
 public class Verifier {
 
@@ -40,8 +40,8 @@ private Verifier() {
    * Returns true if the given <code>String</code> value identifies a tag for
    * a control field (001 through 009).
    */
-  public static boolean isControlField(String tag) throws NumberFormatException {
-    if (Integer.parseInt(tag) < 10)
+  public static boolean isControlField(String tag) {
+    if (tag.length() == 3 && tag.charAt(0) == '0' && tag.charAt(1) == '0' && tag.charAt(2) >= '0' && tag.charAt(2) <= '9')// if (Integer.parseInt(tag) < 10)
       return true;
     return false;
   }
@@ -50,14 +50,12 @@ public static boolean isControlField(String tag) throws NumberFormatException {
    * Returns true if the given <code>String</code> value identifies a tag for
    * a control number field (001).
    */
-  public static boolean isControlNumberField(String tag)
-      throws NumberFormatException {
-    if (Integer.parseInt(tag) == 1)
+  public static boolean isControlNumberField(String tag){
+    if (tag.equals("001"))
       return true;
     return false;
   }
-
-  /**
+/**
    * Returns true if the given <code>Collection</code> contains an instance of
    * a <code>ControlField</code> with a control number field tag (001).
    * 
diff --git a/src/org/marc4j/samples/HandleExceptionExample.java b/src/org/marc4j/samples/HandleExceptionExample.java
new file mode 100644
index 00000000..4bcae7aa
--- /dev/null
+++ b/src/org/marc4j/samples/HandleExceptionExample.java
@@ -0,0 +1,57 @@
+// $Id: HandleExceptionExample.java,v 1.1 2008/09/26 21:17:42 haschart Exp $
+/**
+ * Copyright (C) 2002-2006 Bas Peters
+ *
+ * This file is part of MARC4J
+ *
+ * MARC4J is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public 
+ * License as published by the Free Software Foundation; either 
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * MARC4J is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public 
+ * License along with MARC4J; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+package org.marc4j.samples;
+
+import java.io.InputStream;
+
+import org.marc4j.MarcException;
+import org.marc4j.MarcReader;
+import org.marc4j.MarcStreamReader;
+import org.marc4j.marc.Record;
+
+/**
+ * Reads MARC input.
+ * 
+ * @author Bas Peters
+ * @version $Revision: 1.1 $
+ */
+public class HandleExceptionExample {
+
+    public static void main(String args[]) throws Exception {
+
+        InputStream input = HandleExceptionExample.class
+                .getResourceAsStream("resources/error.mrc");
+
+        try {
+            MarcReader reader = new MarcStreamReader(input);
+            while (reader.hasNext()) {
+                Record record = reader.next();
+                System.out.println(record.toString());
+            }
+        } catch (MarcException e) {
+            System.out.println("something went wrong man!");
+
+        }
+
+        System.out.println("damn!");
+    }
+
+}
\ No newline at end of file
diff --git a/src/org/marc4j/samples/PermissiveReaderExample.java b/src/org/marc4j/samples/PermissiveReaderExample.java
new file mode 100644
index 00000000..de167ba0
--- /dev/null
+++ b/src/org/marc4j/samples/PermissiveReaderExample.java
@@ -0,0 +1,229 @@
+package org.marc4j.samples;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Iterator;
+import java.util.List;
+
+import org.marc4j.ErrorHandler;
+import org.marc4j.MarcException;
+import org.marc4j.MarcPermissiveStreamReader;
+import org.marc4j.MarcReader;
+import org.marc4j.MarcStreamWriter;
+import org.marc4j.MarcWriter;
+import org.marc4j.marc.Record;
+
+public class PermissiveReaderExample
+{
+
+    /**
+     * This test program demonstrates the use of the MarcPermissiveStreamReader 
+     * to read Marc records, with the permissive setting turned on.  It also 
+     * demonstrates the capability of printing out the error messages that are
+     * generated when the MarcPermissiveStreamReader encounters records with 
+     * structural error or encoding errors.
+     * 
+     *  When run in verbose mode, (by passing -v as the first parameter) the 
+     *  program will display the entire record highlighting the lines in the 
+     *  record that have errors that the permissive reader was able to detect 
+     *  and make an attempt at correcting.  Following that the program will 
+     *  list all of the errors that it found in the record.
+     *  
+     *  When run in verbose mode as described above, the program is useful for
+     *  validating records.
+     *  
+     *  Shown below is the output generated when the program is run on the file
+     *  error.mrc found in the resources sub-directory in the samples directory:
+     *  
+     *  Fatal Exception: error parsing data field for tag: 250 with data:    a1st ed.
+     *  Typo         : Record terminator character not found at end of record length --- [ n/a : n/a ]
+     *  Typo         : Record terminator appears after stated record length, reading extra bytes --- [ n/a : n/a ]
+     *  Minor Error  : Field length found in record different from length stated in the directory. --- [ n/a : n/a ]
+     *     LEADER 00715cam a2200205 a 4500
+     *     001 12883376
+     *     005 20030616111422.0
+     *     008 020805s2002    nyu    j      000 1 eng  
+     *     020   $a0786808772
+     *     020   $a0786816155 (pbk.)
+     *     040   $aDLC$cDLC$dDLC
+     *     100 1 $aChabon, Michael.
+     *     245 10$aSummerland /$cMichael Chabon.
+     *     250   $a1st ed.
+     *     260   $aNew York :$bMiramax Books/Hyperion Books for Children,$cc2002.
+     *     300   $a500 p. ;$c22 cm.
+     *     520   $aEthan Feld, the worst baseball player in the history of the game, finds himself recruited by a 100-year-old scout to help a band of fairies triumph over an ancient enemy.
+     *     650  1$aFantasy.
+     *     650  1$aBaseball$vFiction.
+     *     650  1$aMagic$vFiction.
+     */
+    public static void main(String[] args)
+    {
+        PrintStream out = System.out;
+        boolean verbose = Boolean.parseBoolean(System.getProperty("marc.verbose"));
+        boolean veryverbose = Boolean.parseBoolean(System.getProperty("marc.verbose"));
+        if (args[0].equals("-v")) 
+        {
+            verbose = true;
+            String newArgs[] = new String[args.length-1];
+            System.arraycopy(args, 1, newArgs, 0, args.length-1);
+            args = newArgs;
+        }
+        if (args[0].equals("-vv")) 
+        {
+            verbose = true;
+            veryverbose = true;
+            String newArgs[] = new String[args.length-1];
+            System.arraycopy(args, 1, newArgs, 0, args.length-1);
+            args = newArgs;
+        }
+        String fileStr = args[0];
+        File file = new File(fileStr);
+        MarcReader readerNormal = null;
+        MarcReader readerPermissive = null;
+        boolean to_utf_8 = true;
+       
+        InputStream inNorm;
+        InputStream inPerm;
+        OutputStream patchedRecStream = null;
+        MarcWriter patchedRecs = null;
+        ErrorHandler errorHandler = new ErrorHandler();
+        try
+        {
+            inNorm = new FileInputStream(file);
+            readerNormal = new MarcPermissiveStreamReader(inNorm, false, to_utf_8);
+            inPerm = new FileInputStream(file);
+            readerPermissive = new MarcPermissiveStreamReader(inPerm, errorHandler, to_utf_8, "BESTGUESS");
+        }
+        catch (FileNotFoundException e)
+        {
+            // TODO Auto-generated catch block
+            e.printStackTrace();
+        }
+        boolean done = false;
+        if (args.length > 1)
+        {
+            try
+            {
+                patchedRecStream = new FileOutputStream(new File(args[1]));
+                patchedRecs = new MarcStreamWriter(patchedRecStream);
+            }
+            catch (FileNotFoundException e)
+            {
+                // TODO Auto-generated catch block
+                e.printStackTrace();
+            }
+        }
+        while (readerNormal.hasNext() && readerPermissive.hasNext())
+        {
+            Record recNorm;
+            Record recPerm;
+            recPerm = readerPermissive.next();
+            String strPerm = recPerm.toString();
+            try {
+                recNorm = readerNormal.next();
+            }
+            catch (MarcException me)
+            {
+                if (verbose)
+                {
+                    out.println("Fatal Exception: "+ me.getMessage());
+                    dumpErrors(out, errorHandler);
+                    showDiffs(out, null, strPerm);
+                    out.println("-------------------------------------------------------------------------------------");
+                }
+                continue;
+            }
+            String strNorm = recNorm.toString();
+            if (!strNorm.equals(strPerm))
+            {
+                if (verbose)
+                {
+                    dumpErrors(out, errorHandler);
+                    showDiffs(out, strNorm, strPerm);
+                    out.println("-------------------------------------------------------------------------------------");
+                    
+                }
+                if (patchedRecs != null)
+                {
+                    patchedRecs.write(recPerm);
+                }
+            }
+            else if (errorHandler.hasErrors())
+            {
+                if (verbose)
+                {
+                    out.println("Results identical, but errors reported");
+                    dumpErrors(out, errorHandler);
+                    showDiffs(out, strNorm, strPerm);
+                    out.println("-------------------------------------------------------------------------------------");
+                }
+                if (patchedRecs != null)
+                {
+                    patchedRecs.write(recPerm);
+                }
+            }
+            else if (veryverbose)
+            {
+                showDiffs(out, strNorm, strPerm);                
+            }
+                
+        }
+    }
+
+    public static void showDiffs(PrintStream out, String strNorm, String strPerm)
+    {
+        if (strNorm != null)
+        {
+            String normLines[] = strNorm.split("\n");
+            String permLines[] = strPerm.split("\n");
+            if (normLines.length == permLines.length)
+            {
+                for (int i = 0; i < normLines.length; i++)
+                {
+                    if (normLines[i].equals(permLines[i]))
+                    {
+                        out.println("   " + normLines[i]);
+                    }
+                    else
+                    {
+                        out.println(" < " + normLines[i]);
+                        out.println(" > " + permLines[i]);                    
+                    }
+                }
+            }
+        }
+        else
+        {
+            String permLines[] = strPerm.split("\n");
+            for (int i = 0; i < permLines.length; i++)
+            {
+                out.println("   " + permLines[i]);
+            }
+        }
+
+    }
+    
+    public static void dumpErrors(PrintStream out, ErrorHandler errorHandler)
+    {
+        List<Object> errors = errorHandler.getErrors();
+        if (errors != null) 
+        {
+            Iterator<Object> iter = errors.iterator();
+            while (iter.hasNext())
+            {
+                Object error = iter.next();
+                if (((ErrorHandler.Error)(error)).getSeverity() >= ErrorHandler.MINOR_ERROR)
+                {
+                    int i = 10;
+                }
+                out.println(error.toString());
+            }
+        }
+    }
+}
diff --git a/src/org/marc4j/samples/resources/diacritic4.mrc b/src/org/marc4j/samples/resources/diacritic4.mrc
new file mode 100644
index 00000000..1972ded5
--- /dev/null
+++ b/src/org/marc4j/samples/resources/diacritic4.mrc
@@ -0,0 +1 @@
+03059cam  2200301 i 4500001001300000003000400013005001700017008004100034040001800075010001700093245005400110260002100164300002100185500019000206500019000396500015700586500018600743500019700929500023801126500022301364500020801587500016801795500020701963500020902170500016102379500014102540500007602681   77123332 DLC20051218154744.0981008b2001    ilu           000 0 eng    aDLCcDLCdDLC  a   77123332 00aOCLC diacritic and special character test record.  any :bny,c2001.  a100 p. ;c12 cm.  aVOYAGER COLUMN 0  (NEW):  Degree sign (�);  Phono Copyright mark (�);  Copyright mark (�);  Sharp (�);  Inverted Question mark (�);  Inverted Exclamation mark (�); Eszett (�); Euro (�).  aVOYAGER COLUMN 1:  Script L (�);  Polish L (�);  Scandanavian O (�);  D with Crossbar (�);  Icelandic Thorn (�);  AE Digraph (�); OE Digraph (�);  Miagkii Znak (�);  Dot at Midline (�).  aVOYAGER COLUMN 2:  Musical Flat (�);  Patent Mark (�);  Plus or Minus (�);  O Hook (�);  U Hook (�);  Alif (�);  alpha (gas);  Ayn (�);  Polish l (�).  aVOYAGER COLUMN 3:  Scandanavian o (�);  d with crossbar (�);  Icelandic Thorn (�);  ae Digraph (�);  oe Digraph (�);  Tverdii Znak (�);  Turkish i (�);  British Pound (�);  eth (�).  aVOYAGER COLUMN 4:  Dagger (DO NOT USE);  o Hook (�);  u Hook (�);  Beta (gbs);  Gamma (gcs);  Superscript 0 (p0s);  Superscript 1 (p1s);  Superscript 2 (p2s);  Superscript 3 (p3s).  aVOYAGER COLUMN 5:  Superscript 4 (p4s);  Superscript 5 (p5s);  Superscript 6 (p6s);  Superscript 7 (p7s);  Superscript 8 (p8s);  Superscript 9 (p9s);  Superscript + (p+s);  Superscript - (p-s);  Superscript ( (p(s).  aVOYAGER COLUMN 6:  Superscript ) (p)s);  Subscript 0 (b0s);  Subscript  1 (b1s);  Subscript 2 (b2s);  Subscript 3 (b3s);  Subscript 4 (b4s);  Subscript 5 (b5s);  Subscript 6 (b6s);  Subscript 7 (b7s).  aVOYAGER COLUMN 7:  Subscript 8 (b8s);  Subscript 9 (b9s);  Subscript + (b+s);  Subscript - (b-s);  Subscript ( (b(s);  Subscript ) (b)s);  Pseudo Question Mark (�o);  Grave (�o);  Acute (�o).  aVOYAGER COLUMN 8:  Circumflex (�o);  Tilde (�o);  Macron (�o);  Breve (�o);  Superior Dot (�o);  Umlaut (�o);  Hacek (�o);  Circle Above (�o);  Ligature left (�o).  aVOYAGER COLUMN 9:  Ligature right (�o) ;  High Comma off center (�o);  Double Acute (�o);  Candrabindu (�o);  Cedilla (�o);  Right Hook (�o);  Dot Below (�o);  Double Dot Below (�o);  Circle Below (�o).  aVOYAGER COLUMN 10:  Double Underscore (�o);  Underscore (�o);  Left Hook (�o);  Right Cedilla (�o);  Upadhmaniya (�o);  Double Tilde 1st half (�o);  Double Tilde 2nd half (�o) ;  High Comma centered (�o).  aVOYAGER PC Keyboard:  Spacing Circumflex (^); Spacing Underscore (_); Spacing Grave (`); Open Curly Bracket ({); Close Curly Bracket (}); Spacing Tilde (~).  aStandard PC Keyboard:   1234567890-=   !@#$%^&*()_+   qwertyuiop[]\   QWERTYUIOP{}|   asdfghjkl;'  ASDFGHJKL:"   zxcvbnm,./   ZXCVBNM<>?  aDouble Tilde, 1st and 2nd halves (�o�o) ; Ligature, both halves (�o�o).
\ No newline at end of file
diff --git a/src/org/marc4j/samples/resources/error.mrc b/src/org/marc4j/samples/resources/error.mrc
new file mode 100644
index 00000000..cc8c4c94
--- /dev/null
+++ b/src/org/marc4j/samples/resources/error.mrc
@@ -0,0 +1 @@
+00714cam a2200205 a 45000010009000000050017000090080041000260200015000670200022000820400018001041000021001222450034001432500012001772600067001893000021002565200175002776500013004526500023004656500020004881288337620030616111422.0020805s2002    nyu    j      000 1 eng    a0786808772  a0786816155 (pbk.)  aDLCcDLCdDLC1 aChabon, Michael.10aSummerland /cMichael Chabon.   a1st ed.  aNew York :bMiramax Books/Hyperion Books for Children,cc2002.  a500 p. ;c22 cm.  aEthan Feld, the worst baseball player in the history of the game, finds himself recruited by a 100-year-old scout to help a band of fairies triumph over an ancient enemy. 1aFantasy. 1aBaseballvFiction. 1aMagicvFiction.
\ No newline at end of file