fix cannot read \n properly

dtlnor · Jan 16, 2023 · bd7251c · bd7251c
1 parent 64d57ea
commit bd7251c
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 10 deletions.
diff --git a/.gitignore b/.gitignore
@@ -25,6 +25,7 @@ share/python-wheels/
 .installed.cfg
 *.egg
 MANIFEST
+*.zip
 
 # PyInstaller
 #  Usually these files are written by a python script from a template

diff --git a/REMSG.py b/REMSG.py
@@ -419,7 +419,7 @@ def writeMSG(self) -> bytes:
             stringPoolSet.update([entry.langs[lang] for lang in self.languages])
             stringPoolSet.update([entry.attributes[idx] for idx in isStrAttrIdx])
 
-        strOffsetDict = helper.calcStrPoolOffsets(stringPoolSet) # remove duplicate and make sure \n is \r\n
+        strOffsetDict = helper.calcStrPoolOffsets(stringPoolSet) # not doing string processing here, as it will change the key.
         # debug use, to let input output stringpool keeps same
         # strOffsetDict = dict((v,k) for k,v in self.stringDict.items()) 
         wcharPool = b''.join(helper.toWcharBytes(x) for x in strOffsetDict.keys())

diff --git a/REMSGUtil.py b/REMSGUtil.py
@@ -53,6 +53,7 @@ def searchSameGuid(msg: MSG):
         else:
             print(str(entry.guid)+":"+entry.name)
 
+
 def searchGuid(msg: MSG, guid: uuid.UUID):
     """research use, print out the entry name with that guid"""
     for entry in msg.entrys:
@@ -188,13 +189,13 @@ def importCSV(msgObj: MSG, filename: str, version: int = None, langCount: int =
             attributes.append(value)
 
         entry.buildEntry(
-            fEntry[guididx],
-            int(fEntry[crcidx]),
-            fEntry[nameidx],
-            attributes,
-            [fEntry[i] for i in langidxs],
-            hash=mmh3.hash(key = fEntry[nameidx].encode('utf-16-le'), seed = -1, signed = False) if version > 15 else None,
-            index=i if version <= 15 else None)
+            guid = fEntry[guididx],
+            crc = int(fEntry[crcidx]),
+            name = fEntry[nameidx],
+            attributeValues = attributes,
+            langs = [helper.forceWindowsLineBreak(fEntry[i]) for i in langidxs],
+            hash = mmh3.hash(key = fEntry[nameidx].encode('utf-16-le'), seed = -1, signed = False) if version > 15 else None,
+            index = i if version <= 15 else None)
 
         # not gonna check, left it to user
         # if entry.guid in oldEntrys.keys():
@@ -325,7 +326,7 @@ def importJson(msgObj: MSG, filename: str):
             name=jEntry["name"],
             attributeValues=list([ readAttributeFromStr(next(iter(attr.values())), msg.attributeHeaders[i]["valueType"])
                                     for i, attr in enumerate(jEntry["attributes"])]),
-            langs=list([ content for content in jEntry["content"] ]),
+            langs=list([ helper.forceWindowsLineBreak(content) for content in jEntry["content"] ]),
             hash=mmh3.hash(key = jEntry["name"].encode('utf-16-le'), seed = -1, signed = False) if msg.version > 15 else None,
             index = jIndex if msg.version <= 15 else None
         )

diff --git a/REWString.py b/REWString.py
@@ -75,11 +75,16 @@ def wcharPool2StrPool(wcharPool: bytes) -> str:
     return stringPool
 
 
+def forceWindowsLineBreak(string: str) -> str:
+    """Force /r/n for every linebreak"""
+    return string.replace("\r\n", "\n").replace("\r", "\n").replace("\n", "\r\n")
+
+
 def calcStrPoolOffsets(stringlist: list[str]) -> dict[str, int]:
     """build a offset dict with {string : offset}"""
     newDict = dict()
     sizeCount = 0
-    for string in sorted(set([s.replace("\r\n", "\n").replace("\r", "\n").replace("\n", "\r\n") for s in stringlist])):
+    for string in sorted(set(stringlist)):
         # not adding null terminator here, it will done by toWcharBytes()
         newDict[string] = sizeCount
         sizeCount = sizeCount + len(string) * 2 + 2