-
-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor to avoid archive difficulties in corpus
- Loading branch information
Showing
7 changed files
with
107 additions
and
79 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,34 @@ | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.IO.Compression; | ||
using System.Linq; | ||
|
||
namespace SIL.Machine.Corpora | ||
{ | ||
public class ParatextBackupTermsCorpus : ParatextTermsCorpusBase | ||
public class ParatextBackupTermsCorpus : DictionaryTextCorpus | ||
{ | ||
private readonly ZipArchive _archive; | ||
|
||
public ParatextBackupTermsCorpus( | ||
ZipArchive archive, | ||
string fileName, | ||
IEnumerable<string> termCategories, | ||
bool useTermGlosses = true | ||
) | ||
{ | ||
_archive = archive; | ||
AddTexts(new ZipParatextProjectSettingsParser(archive).Parse(), termCategories, useTermGlosses); | ||
} | ||
|
||
protected override bool Exists(string fileName) | ||
{ | ||
return _archive.GetEntry(fileName) != null; | ||
} | ||
using (var archive = ZipFile.OpenRead(fileName)) | ||
{ | ||
ParatextProjectSettings settings = new ZipParatextProjectSettingsParser(archive).Parse(); | ||
IEnumerable<(string, IEnumerable<string>)> glosses = new ZipParatextTermsParser(archive).Parse( | ||
settings, | ||
termCategories, | ||
useTermGlosses | ||
); | ||
string textId = | ||
$"{settings.BiblicalTermsListType}:{settings.BiblicalTermsProjectName}:{settings.BiblicalTermsFileName}"; | ||
|
||
protected override Stream Open(string fileName) | ||
{ | ||
ZipArchiveEntry entry = _archive.GetEntry(fileName); | ||
if (entry == null) | ||
return null; | ||
return entry.Open(); | ||
IText text = new MemoryText( | ||
textId, | ||
glosses.Select(kvp => new TextRow(textId, kvp.Item1) { Segment = kvp.Item2.ToList() }) | ||
); | ||
AddText(text); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
using System.IO; | ||
using System.IO.Compression; | ||
|
||
namespace SIL.Machine.Corpora | ||
{ | ||
public class ZipParatextTermsParser : ParatextTermsParserBase | ||
{ | ||
private readonly ZipArchive _archive; | ||
|
||
public ZipParatextTermsParser(ZipArchive archive) | ||
{ | ||
_archive = archive; | ||
} | ||
|
||
protected override bool Exists(string fileName) | ||
{ | ||
return _archive.GetEntry(fileName) != null; | ||
} | ||
|
||
protected override Stream Open(string fileName) | ||
{ | ||
ZipArchiveEntry entry = _archive.GetEntry(fileName); | ||
if (entry == null) | ||
return null; | ||
return entry.Open(); | ||
} | ||
} | ||
} |
31 changes: 0 additions & 31 deletions
31
tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsCorpus.cs
This file was deleted.
Oops, something went wrong.
20 changes: 20 additions & 0 deletions
20
tests/SIL.Machine.Tests/Corpora/MemoryProjectTermsParser.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
using System.Text; | ||
|
||
namespace SIL.Machine.Corpora; | ||
|
||
public class MemoryParatextTermsParser(IDictionary<string, string> files) : ParatextTermsParserBase | ||
{ | ||
public IDictionary<string, string> Files { get; } = files; | ||
|
||
protected override bool Exists(string fileName) | ||
{ | ||
return Files.ContainsKey(fileName); | ||
} | ||
|
||
protected override Stream? Open(string fileName) | ||
{ | ||
if (!Files.TryGetValue(fileName, out string? contents)) | ||
return null; | ||
return new MemoryStream(Encoding.UTF8.GetBytes(contents)); | ||
} | ||
} |
26 changes: 26 additions & 0 deletions
26
tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsCorpus.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
namespace SIL.Machine.Corpora; | ||
|
||
public class ParatextProjectTermsCorpus : DictionaryTextCorpus | ||
{ | ||
public ParatextProjectTermsCorpus( | ||
IDictionary<string, string> files, | ||
ParatextProjectSettings settings, | ||
IEnumerable<string> termCategories, | ||
bool useTermGlosses = true | ||
) | ||
{ | ||
IEnumerable<(string, IEnumerable<string>)> glosses = new MemoryParatextTermsParser(files).Parse( | ||
settings, | ||
termCategories, | ||
useTermGlosses | ||
); | ||
string textId = | ||
$"{settings.BiblicalTermsListType}:{settings.BiblicalTermsProjectName}:{settings.BiblicalTermsFileName}"; | ||
|
||
IText text = new MemoryText( | ||
textId, | ||
glosses.Select(kvp => new TextRow(textId, kvp.Item1) { Segment = kvp.Item2.ToList() }) | ||
); | ||
AddText(text); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters