From 6947c997203817f36d31a01f646855d04ad4e6fc Mon Sep 17 00:00:00 2001 From: Octavia Togami Date: Tue, 1 Oct 2024 20:14:12 -0700 Subject: [PATCH 1/3] Update wrapper --- gradle/wrapper/gradle-wrapper.jar | Bin 43504 -> 43583 bytes gradle/wrapper/gradle-wrapper.properties | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index 2c3521197d7c4586c843d1d3e9090525f1898cde..a4b76b9530d66f5e68d973ea569d8e19de379189 100644 GIT binary patch delta 3990 zcmV;H4{7l5(*nQL0Kr1kzC=_KMxQY0|W5(lc#i zH*M1^P4B}|{x<+fkObwl)u#`$GxKKV&3pg*-y6R6txw)0qU|Clf9Uds3x{_-**c=7 z&*)~RHPM>Rw#Hi1R({;bX|7?J@w}DMF>dQQU2}9yj%iLjJ*KD6IEB2^n#gK7M~}6R zkH+)bc--JU^pV~7W=3{E*4|ZFpDpBa7;wh4_%;?XM-5ZgZNnVJ=vm!%a2CdQb?oTa z70>8rTb~M$5Tp!Se+4_OKWOB1LF+7gv~$$fGC95ToUM(I>vrd$>9|@h=O?eARj0MH zT4zo(M>`LWoYvE>pXvqG=d96D-4?VySz~=tPVNyD$XMshoTX(1ZLB5OU!I2OI{kb) zS8$B8Qm>wLT6diNnyJZC?yp{Kn67S{TCOt-!OonOK7$K)e-13U9GlnQXPAb&SJ0#3 z+vs~+4Qovv(%i8g$I#FCpCG^C4DdyQw3phJ(f#y*pvNDQCRZ~MvW<}fUs~PL=4??j zmhPyg<*I4RbTz|NHFE-DC7lf2=}-sGkE5e!RM%3ohM7_I^IF=?O{m*uUPH(V?gqyc(Rp?-Qu(3bBIL4Fz(v?=_Sh?LbK{nqZMD>#9D_hNhaV$0ef3@9V90|0u#|PUNTO>$F=qRhg1duaE z0`v~X3G{8RVT@kOa-pU+z8{JWyP6GF*u2e8eKr7a2t1fuqQy)@d|Qn(%YLZ62TWtoX@$nL}9?atE#Yw`rd(>cr0gY;dT9~^oL;u)zgHUvxc2I*b&ZkGM-iq=&(?kyO(3}=P! zRp=rErEyMT5UE9GjPHZ#T<`cnD)jyIL!8P{H@IU#`e8cAG5jMK zVyKw7--dAC;?-qEu*rMr$5@y535qZ6p(R#+fLA_)G~!wnT~~)|s`}&fA(s6xXN`9j zP#Fd3GBa#HeS{5&8p?%DKUyN^X9cYUc6vq}D_3xJ&d@=6j(6BZKPl?!k1?!`f3z&a zR4ZF60Mx7oBxLSxGuzA*Dy5n-d2K=+)6VMZh_0KetK|{e;E{8NJJ!)=_E~1uu=A=r zrn&gh)h*SFhsQJo!f+wKMIE;-EOaMSMB@aXRU(UcnJhZW^B^mgs|M9@5WF@s6B0p& zm#CTz)yiQCgURE{%hjxHcJ6G&>G9i`7MyftL!QQd5 z@RflRs?7)99?X`kHNt>W3l7YqscBpi*R2+fsgABor>KVOu(i(`03aytf2UA!&SC9v z!E}whj#^9~=XHMinFZ;6UOJjo=mmNaWkv~nC=qH9$s-8roGeyaW-E~SzZ3Gg>j zZ8}<320rg4=$`M0nxN!w(PtHUjeeU?MvYgWKZ6kkzABK;vMN0|U;X9abJleJA(xy<}5h5P(5 z{RzAFPvMnX2m0yH0Jn2Uo-p`daE|(O`YQiC#jB8;6bVIUf?SY(k$#C0`d6qT`>Xe0+0}Oj0=F&*D;PVe=Z<=0AGI<6$gYLwa#r` zm449x*fU;_+J>Mz!wa;T-wldoBB%&OEMJgtm#oaI60TSYCy7;+$5?q!zi5K`u66Wq zvg)Fx$s`V3Em{=OEY{3lmh_7|08ykS&U9w!kp@Ctuzqe1JFOGz6%i5}Kmm9>^=gih z?kRxqLA<3@e=}G4R_?phW{4DVr?`tPfyZSN@R=^;P;?!2bh~F1I|fB7P=V=9a6XU5 z<#0f>RS0O&rhc&nTRFOW7&QhevP0#>j0eq<1@D5yAlgMl5n&O9X|Vq}%RX}iNyRFF z7sX&u#6?E~bm~N|z&YikXC=I0E*8Z$v7PtWfjy)$e_Ez25fnR1Q=q1`;U!~U>|&YS zaOS8y!^ORmr2L4ik!IYR8@Dcx8MTC=(b4P6iE5CnrbI~7j7DmM8em$!da&D!6Xu)!vKPdLG z9f#)se|6=5yOCe)N6xDhPI!m81*dNe7u985zi%IVfOfJh69+#ag4ELzGne?o`eA`42K4T)h3S+s)5IT97%O>du- z0U54L8m4}rkRQ?QBfJ%DLssy^+a7Ajw;0&`NOTY4o;0-ivm9 zBz1C%nr_hQ)X)^QM6T1?=yeLkuG9Lf50(eH}`tFye;01&(p?8i+6h};VV-2B~qdxeC#=X z(JLlzy&fHkyi9Ksbcs~&r^%lh^2COldLz^H@X!s~mr9Dr6z!j+4?zkD@Ls7F8(t(f z9`U?P$Lmn*Y{K}aR4N&1N=?xtQ1%jqf1~pJyQ4SgBrEtR`j4lQuh7cqP49Em5cO=I zB(He2`iPN5M=Y0}h(IU$37ANTGx&|b-u1BYA*#dE(L-lptoOpo&th~E)_)y-`6kSH z3vvyVrcBwW^_XYReJ=JYd9OBQrzv;f2AQdZH#$Y{Y+Oa33M70XFI((fs;mB4e`<<{ ze4dv2B0V_?Ytsi>>g%qs*}oDGd5d(RNZ*6?7qNbdp7wP4T72=F&r?Ud#kZr8Ze5tB z_oNb7{G+(o2ajL$!69FW@jjPQ2a5C)m!MKKRirC$_VYIuVQCpf9rIms0GRDf)8AH${I`q^~5rjot@#3$2#zT2f`(N^P7Z;6(@EK$q*Jgif00I6*^ZGV+XB5uw*1R-@23yTw&WKD{s1;HTL;dO)%5i#`dc6b7;5@^{KU%N|A-$zsYw4)7LA{3`Zp>1 z-?K9_IE&z)dayUM)wd8K^29m-l$lFhi$zj0l!u~4;VGR6Y!?MAfBC^?QD53hy6VdD z@eUZIui}~L%#SmajaRq1J|#> z4m=o$vZ*34=ZWK2!QMNEcp2Lbc5N1q!lEDq(bz0b;WI9;e>l=CG9^n#ro`w>_0F$Q zfZ={2QyTkfByC&gy;x!r*NyXXbk=a%~~(#K?< zTke0HuF5{Q+~?@!KDXR|g+43$+;ab`^flS%miup_0OUTm=nIc%d5nLP)i308PIjl_YMF6cpQ__6&$n6it8K- z8PIjl_YMF6cpQ_!r)L8IivW`WdK8mBs6PXdjR2DYdK8nCs73=4j{uVadK8oNjwX|E wpAeHLsTu^*Y>Trk?aBtSQ(D-o$(D8Px^?ZI-PUB? z*1fv!{YdHme3Fc8%cR@*@zc5A_nq&2=R47Hp@$-JF4Fz*;SLw5}K^y>s-s;V!}b2i=5=M- zComP?ju>8Fe@=H@rlwe1l`J*6BTTo`9b$zjQ@HxrAhp0D#u?M~TxGC_!?ccCHCjt| zF*PgJf@kJB`|Ml}cmsyrAjO#Kjr^E5p29w+#>$C`Q|54BoDv$fQ9D?3n32P9LPMIzu?LjNqggOH=1@T{9bMn*u8(GI z!;MLTtFPHal^S>VcJdiYqX0VU|Rn@A}C1xOlxCribxes0~+n2 z6qDaIA2$?e`opx3_KW!rAgbpzU)gFdjAKXh|5w``#F0R|c)Y)Du0_Ihhz^S?k^pk% zP>9|pIDx)xHH^_~+aA=^$M!<8K~Hy(71nJGf6`HnjtS=4X4=Hk^O71oNia2V{HUCC zoN3RSBS?mZCLw;l4W4a+D8qc)XJS`pUJ5X-f^1ytxwr`@si$lAE?{4G|o; zO0l>`rr?;~c;{ZEFJ!!3=7=FdGJ?Q^xfNQh4A?i;IJ4}B+A?4olTK(fN++3CRBP97 ze~lG9h%oegkn)lpW-4F8o2`*WW0mZHwHez`ko@>U1_;EC_6ig|Drn@=DMV9YEUSCa zIf$kHei3(u#zm9I!Jf(4t`Vm1lltJ&lVHy(eIXE8sy9sUpmz%I_gA#8x^Zv8%w?r2 z{GdkX1SkzRIr>prRK@rqn9j2wG|rUvf6PJbbin=yy-TAXrguvzN8jL$hUrIXzr^s5 zVM?H4;eM-QeRFr06@ifV(ocvk?_)~N@1c2ien56UjWXid6W%6ievIh)>dk|rIs##^kY67ib8Kw%#-oVFaXG7$ERyA9(NSJUvWiOA5H(!{uOpcW zg&-?iqPhds%3%tFspHDqqr;A!e@B#iPQjHd=c>N1LoOEGRehVoPOdxJ>b6>yc#o#+ zl8s8!(|NMeqjsy@0x{8^j0d00SqRZjp{Kj)&4UHYGxG+z9b-)72I*&J70?+8e?p_@ z=>-(>l6z5vYlP~<2%DU02b!mA{7mS)NS_eLe=t)sm&+Pmk?asOEKlkPQ)EUvvfC=;4M&*|I!w}(@V_)eUKLA_t^%`o z0PM9LV|UKTLnk|?M3u!|f2S0?UqZsEIH9*NJS-8lzu;A6-rr-ot=dg9SASoluZUkFH$7X; zP=?kYX!K?JL-b~<#7wU;b;eS)O;@?h%sPPk{4xEBxb{!sm0AY|f9cNvx6>$3F!*0c z75H=dy8JvTyO8}g1w{$9T$p~5en}AeSLoCF>_RT9YPMpChUjl310o*$QocjbH& zbnwg#gssR#jDVN{uEi3n(PZ%PFZ|6J2 z5_rBf0-u>e4sFe0*Km49ATi7>Kn0f9!uc|rRMR1Dtt6m1LW8^>qFlo}h$@br=Rmpi z;mI&>OF64Be{dVeHI8utrh)v^wsZ0jii%x8UgZ8TC%K~@I(4E};GFW&(;WVov}3%H zH;IhRkfD^(vt^DjZz(MyHLZxv8}qzPc(%itBkBwf_fC~sDBgh<3XAv5cxxfF3<2U! z03Xe&z`is!JDHbe;mNmfkH+_LFE*I2^mdL@7(@9DfAcP6O04V-ko;Rpgp<%Cj5r8Z zd0`sXoIjV$j)--;jA6Zy^D5&5v$o^>e%>Q?9GLm{i~p^lAn!%ZtF$I~>39XVZxk0b zROh^Bk9cE0AJBLozZIEmy7xG(yHWGztvfnr0(2ro1%>zsGMS^EMu+S$r=_;9 zWwZkgf7Q7`H9sLf2Go^Xy6&h~a&%s2_T@_Csf19MntF$aVFiFkvE3_hUg(B@&Xw@YJ zpL$wNYf78=0c@!QU6_a$>CPiXT7QAGDM}7Z(0z#_ZA=fmLUj{2z7@Ypo71UDy8GHr z-&TLKf6a5WCf@Adle3VglBt4>Z>;xF}}-S~B7<(%B;Y z0QR55{z-buw>8ilNM3u6I+D$S%?)(p>=eBx-HpvZj{7c*_?K=d()*7q?93us}1dq%FAFYLsW8ZTQ_XZLh`P2*6(NgS}qGcfGXVWpwsp#Rs}IuKbk*`2}&) zI^Vsk6S&Q4@oYS?dJ`NwMVBs6f57+RxdqVub#PvMu?$=^OJy5xEl0<5SLsSRy%%a0 zi}Y#1-F3m;Ieh#Y12UgW?-R)|eX>ZuF-2cc!1>~NS|XSF-6In>zBoZg+ml!6%fk7U zw0LHcz8VQk(jOJ+Yu)|^|15ufl$KQd_1eUZZzj`aC%umU6F1&D5XVWce_wAe(qCSZ zpX-QF4e{EmEVN9~6%bR5U*UT{eMHfcUo`jw*u?4r2s_$`}U{?NjvEm(u&<>B|%mq$Q3weshxk z76<``8vh{+nX`@9CB6IE&z)I%IFjR^LH{s1p|eppv=x za(g_jLU|xjWMAn-V7th$f({|LG8zzIE0g?cyW;%Dmtv%C+0@xVxPE^ zyZzi9P%JAD6ynwHptuzP`Kox7*9h7XSMonCalv;Md0i9Vb-c*!f0ubfk?&T&T}AHh z4m8Bz{JllKcdNg?D^%a5MFQ;#1z|*}H^qHLzW)L}wp?2tY7RejtSh8<;Zw)QGJYUm z|MbTxyj*McKlStlT9I5XlSWtQGN&-LTr2XyNU+`490rg?LYLMRnz-@oKqT1hpCGqP zyRXt4=_Woj$%n5ee<3zhLF>5>`?m9a#xQH+Jk_+|RM8Vi;2*XbK- zEL6sCpaGPzP>k8f4Kh|##_imt#zJMB;ir|JrMPGW`rityK1vHXMLy18%qmMQAm4WZ zP)i30KR&5vs15)C+8dM66&$k~i|ZT;KR&5vs15)C+8dJ(sAmGPijyIz6_bsqKLSFH zlOd=TljEpH0>h4zA*dCTK&emy#FCRCs1=i^sZ9bFmXjf<6_X39E(XY)00000#N437 diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 09523c0..df97d72 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.9-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-bin.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME From 4fc44fac6a0342fa171ac605327efb109055e3f5 Mon Sep 17 00:00:00 2001 From: Octavia Togami Date: Tue, 1 Oct 2024 20:14:20 -0700 Subject: [PATCH 2/3] Support JNBT string encoding as an option Fixes #9 --- gradle/libs.versions.toml | 1 + .../linbus/gui/javafx/MainSceneSetup.java | 95 ++++--- .../linbus/gui/javafx/NbtTreeView.java | 5 +- stream/build.gradle.kts | 1 + .../enginehub/linbus/stream/LinBinaryIO.java | 36 ++- .../linbus/stream/LinReadOptions.java | 102 +++++++ .../linbus/stream/impl/LinNbtReader.java | 129 ++++++++- .../JnbtCompatibilityIntegrationTest.java | 262 ++++++++++++++++++ .../stream/LinBinaryIOIntegrationTest.java | 153 +++++----- .../linbus/stream/StreamTestUtil.java | 6 +- version.txt | 2 +- 11 files changed, 677 insertions(+), 115 deletions(-) create mode 100644 stream/src/main/java/org/enginehub/linbus/stream/LinReadOptions.java create mode 100644 stream/src/test/java/org/enginehub/linbus/stream/JnbtCompatibilityIntegrationTest.java diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 1716b9d..fa5d655 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -13,6 +13,7 @@ ikonli-fontawesome5.module = "org.kordamp.ikonli:ikonli-fontawesome5-pack" junit-bom = "org.junit:junit-bom:5.10.3" junit-jupiter-api.module = "org.junit.jupiter:junit-jupiter-api" junit-jupiter-engine.module = "org.junit.jupiter:junit-jupiter-engine" +junit-jupiter-params.module = "org.junit.jupiter:junit-jupiter-params" truth = "com.google.truth:truth:1.4.4" [libraries.tinylog-api] diff --git a/gui/src/main/java/org/enginehub/linbus/gui/javafx/MainSceneSetup.java b/gui/src/main/java/org/enginehub/linbus/gui/javafx/MainSceneSetup.java index b90bffa..4ff8f50 100644 --- a/gui/src/main/java/org/enginehub/linbus/gui/javafx/MainSceneSetup.java +++ b/gui/src/main/java/org/enginehub/linbus/gui/javafx/MainSceneSetup.java @@ -47,12 +47,14 @@ import javafx.stage.Stage; import org.enginehub.linbus.gui.LinBusGui; import org.enginehub.linbus.gui.util.ErrorReporter; +import org.enginehub.linbus.stream.LinReadOptions; import org.jspecify.annotations.Nullable; import org.kordamp.ikonli.fontawesome5.FontAwesomeSolid; import org.kordamp.ikonli.javafx.FontIcon; import java.io.File; import java.io.IOException; +import java.io.UTFDataFormatException; import java.nio.file.Path; import java.util.Optional; import java.util.concurrent.ExecutorService; @@ -106,28 +108,7 @@ private MenuItem openFile(Stage stage, ExecutorService backgroundExecutor) { return; } Path path = file.toPath(); - backgroundExecutor.submit(new Task>() { - - @Override - protected TreeItem call() throws Exception { - return NbtTreeView.loadTreeItem(path); - } - - @Override - protected void succeeded() { - openPath.set(path); - TreeItem value = getValue(); - originalTag.set(value.getValue()); - treeTableView.setRoot(value); - } - - @Override - protected void failed() { - ErrorReporter.reportError( - ErrorReporter.Level.INFORM, "Failed to open file " + path, getException() - ); - } - }); + backgroundExecutor.submit(new LoadTreeItemTask(path, backgroundExecutor, false)); }); return openFile; } @@ -239,15 +220,6 @@ private Button moveEntryDown() { public final Scene mainScene; public MainSceneSetup(Stage stage, ExecutorService backgroundExecutor) { - openPath.addListener((__, oldPath, newPath) -> { - if (newPath != null) { - try { - treeTableView.setRoot(NbtTreeView.loadTreeItem(newPath)); - } catch (IOException e) { - ErrorReporter.reportError(ErrorReporter.Level.INFORM, "Failed to open file " + newPath, e); - } - } - }); ObservableValue rootTagEntry = treeTableView.rootProperty().flatMap(TreeItem::valueProperty); isModified = originalTag.isNotEqualTo( // Promote ObservableValue to ObjectBinding @@ -267,4 +239,65 @@ public MainSceneSetup(Stage stage, ExecutorService backgroundExecutor) { VBox.setVgrow(treeTableView, Priority.ALWAYS); mainScene = new Scene(mainPane, 900, 600); } + + private class LoadTreeItemTask extends Task> { + + private final Path path; + private final ExecutorService backgroundExecutor; + private final boolean tryingLegacyCompat; + + public LoadTreeItemTask(Path path, ExecutorService backgroundExecutor, boolean tryingLegacyCompat) { + this.path = path; + this.backgroundExecutor = backgroundExecutor; + this.tryingLegacyCompat = tryingLegacyCompat; + } + + @Override + protected TreeItem call() throws Exception { + LinReadOptions.Builder options = LinReadOptions.builder(); + if (tryingLegacyCompat) { + options.allowJnbtStringEncoding(true); + } + return NbtTreeView.loadTreeItem(path, options.build()); + } + + @Override + protected void succeeded() { + TreeItem value = getValue(); + openPath.set(path); + originalTag.set(value.getValue()); + treeTableView.setRoot(value); + } + + @Override + protected void failed() { + Throwable ex = getException(); + if (ex instanceof UTFDataFormatException && !tryingLegacyCompat) { + Alert alert = createUtfAlert(); + Optional result = alert.showAndWait(); + if (result.isPresent() && result.get() == ButtonType.YES) { + backgroundExecutor.submit(new LoadTreeItemTask(path, backgroundExecutor, true)); + return; + } + } + ErrorReporter.reportError( + ErrorReporter.Level.INFORM, "Failed to open file " + path, getException() + ); + } + + private static Alert createUtfAlert() { + Alert alert = new Alert(Alert.AlertType.WARNING); + alert.setTitle(LinBusGui.TITLE_BASE + " - Invalid File"); + alert.setHeaderText("File contained invalid modified UTF-8"); + alert.setContentText("The file you tried to open contained invalid modified UTF-8, " + + "but may be a legacy JNBT file. Would you like to try opening it with JNBT compatibility?\n" + + "Saving the file will convert it to standard NBT format."); + alert.getButtonTypes().setAll( + ButtonType.YES, + ButtonType.NO + ); + return alert; + } + } + } diff --git a/gui/src/main/java/org/enginehub/linbus/gui/javafx/NbtTreeView.java b/gui/src/main/java/org/enginehub/linbus/gui/javafx/NbtTreeView.java index a3e0538..3c4f02b 100644 --- a/gui/src/main/java/org/enginehub/linbus/gui/javafx/NbtTreeView.java +++ b/gui/src/main/java/org/enginehub/linbus/gui/javafx/NbtTreeView.java @@ -26,6 +26,7 @@ import javafx.scene.control.TreeTableRow; import javafx.scene.control.TreeTableView; import org.enginehub.linbus.stream.LinBinaryIO; +import org.enginehub.linbus.stream.LinReadOptions; import org.enginehub.linbus.tree.LinCompoundTag; import org.enginehub.linbus.tree.LinListTag; import org.enginehub.linbus.tree.LinRootEntry; @@ -97,10 +98,10 @@ private static TreeTableColumn createValueColumn() { return valueCol; } - public static TreeItem loadTreeItem(Path file) throws IOException { + public static TreeItem loadTreeItem(Path file, LinReadOptions options) throws IOException { LinRootEntry root; try (var dataInput = new DataInputStream(new GZIPInputStream(Files.newInputStream(file)))) { - root = LinBinaryIO.readUsing(dataInput, LinRootEntry::readFrom); + root = LinBinaryIO.readUsing(dataInput, options, LinRootEntry::readFrom); } assert root != null; return new TagEntryTreeItem(root.name(), root.value()); diff --git a/stream/build.gradle.kts b/stream/build.gradle.kts index a319030..e855881 100644 --- a/stream/build.gradle.kts +++ b/stream/build.gradle.kts @@ -12,6 +12,7 @@ dependencies { testImplementation(platform(libs.junit.bom)) testImplementation(libs.junit.jupiter.api) + testImplementation(libs.junit.jupiter.params) testRuntimeOnly(libs.junit.jupiter.engine) testImplementation(libs.truth) { diff --git a/stream/src/main/java/org/enginehub/linbus/stream/LinBinaryIO.java b/stream/src/main/java/org/enginehub/linbus/stream/LinBinaryIO.java index 4e4a4b2..556c6d3 100644 --- a/stream/src/main/java/org/enginehub/linbus/stream/LinBinaryIO.java +++ b/stream/src/main/java/org/enginehub/linbus/stream/LinBinaryIO.java @@ -50,7 +50,22 @@ public class LinBinaryIO { * @return the stream of NBT tokens */ public static LinStream read(DataInput input) { - return new LinNbtReader(input); + return read(input, LinReadOptions.builder().build()); + } + + /** + * Read a stream of NBT tokens from a {@link DataInput}. + * + *

+ * The input will not be closed by the iterator. The caller is responsible for managing the lifetime of the input. + *

+ * + * @param input the input to read from + * @param options the options for reading + * @return the stream of NBT tokens + */ + public static LinStream read(DataInput input, LinReadOptions options) { + return new LinNbtReader(input, options); } /** @@ -71,6 +86,25 @@ public static LinStream read(DataInput input) { return transform.apply(read(input)); } + /** + * Read a result using a stream of NBT tokens from a {@link DataInput}. + * + *

+ * The input will not be closed by this method. The caller is responsible for managing the lifetime of the input. + *

+ * + * @param input the input to read from + * @param options the options for reading + * @param transform the function to transform the stream of NBT tokens into the result + * @param the type of the result + * @return the result + * @throws IOException if an I/O error occurs ({@link UncheckedIOException} is unwrapped) + */ + public static R readUsing(DataInput input, LinReadOptions options, IOFunction transform) + throws IOException { + return transform.apply(read(input, options)); + } + /** * Write a stream of NBT tokens to a {@link DataOutput}. * diff --git a/stream/src/main/java/org/enginehub/linbus/stream/LinReadOptions.java b/stream/src/main/java/org/enginehub/linbus/stream/LinReadOptions.java new file mode 100644 index 0000000..07a886a --- /dev/null +++ b/stream/src/main/java/org/enginehub/linbus/stream/LinReadOptions.java @@ -0,0 +1,102 @@ +/* + * Copyright (c) EngineHub + * Copyright (c) contributors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +package org.enginehub.linbus.stream; + + +/** + * Options for reading NBT streams. + */ +public final class LinReadOptions { + + /** + * Create a new builder. + * + * @return a new builder + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder for {@link LinReadOptions}. + */ + public static final class Builder { + private boolean allowJnbtStringEncoding = false; + + private Builder() { + } + + /** + * Set whether to allow the string encoding used by JNBT. It is not compliant with the NBT specification and + * uses normal UTF-8 encoding instead of the modified UTF-8 encoding of {@link java.io.DataInput}. + * + *

+ * Note that this option will force checking the bytes to select the correct encoding, which will be slower. + *

+ * + * @param allowJnbtStringEncoding whether to allow the string encoding used by JNBT + * @return this builder + */ + public Builder allowJnbtStringEncoding(boolean allowJnbtStringEncoding) { + this.allowJnbtStringEncoding = allowJnbtStringEncoding; + return this; + } + + /** + * Build the options. + * + * @return the options + */ + public LinReadOptions build() { + return new LinReadOptions(this); + } + + @Override + public String toString() { + return "LinReadOptions.Builder{" + + "allowJnbtStringEncoding=" + allowJnbtStringEncoding + + '}'; + } + } + + private final boolean allowJnbtStringEncoding; + + private LinReadOptions(Builder builder) { + this.allowJnbtStringEncoding = builder.allowJnbtStringEncoding; + } + + /** + * {@return whether to allow the string encoding used by JNBT} It is not compliant with the NBT specification and + * uses normal UTF-8 encoding instead of the modified UTF-8 encoding of {@link java.io.DataInput}. + * + *

+ * Note that this option will force checking the bytes to select the correct encoding, which will be slower. + *

+ */ + public boolean allowJnbtStringEncoding() { + return allowJnbtStringEncoding; + } + + @Override + public String toString() { + return "LinReadOptions{" + + "allowJnbtStringEncoding=" + allowJnbtStringEncoding + + '}'; + } +} diff --git a/stream/src/main/java/org/enginehub/linbus/stream/impl/LinNbtReader.java b/stream/src/main/java/org/enginehub/linbus/stream/impl/LinNbtReader.java index b4a7156..cfcf9b2 100644 --- a/stream/src/main/java/org/enginehub/linbus/stream/impl/LinNbtReader.java +++ b/stream/src/main/java/org/enginehub/linbus/stream/impl/LinNbtReader.java @@ -19,14 +19,19 @@ package org.enginehub.linbus.stream.impl; import org.enginehub.linbus.common.LinTagId; +import org.enginehub.linbus.stream.LinReadOptions; import org.enginehub.linbus.stream.LinStream; import org.enginehub.linbus.stream.exception.NbtParseException; import org.enginehub.linbus.stream.token.LinToken; import org.jspecify.annotations.Nullable; +import java.io.ByteArrayInputStream; import java.io.DataInput; +import java.io.DataInputStream; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.StandardCharsets; import java.util.ArrayDeque; import java.util.Deque; import java.util.List; @@ -35,6 +40,72 @@ * Reads a stream of tokens from a {@link DataInput}. */ public class LinNbtReader implements LinStream { + + /** + * The start of a 2-byte null character in modified UTF-8. + */ + private static final byte TWO_BYTE_NULL_START = (byte) 0b1100_0000; + /** + * The end of a 2-byte null character in modified UTF-8. + */ + public static final byte TWO_BYTE_NULL_END = (byte) 0b10000000; + private static final int TOP_5_BITS = 0b1111_1000; + /** + * The start of a 4-byte character in UTF-8 (top 5 bits). + */ + private static final int FOUR_BYTE_START = 0b1111_0000; + /** + * The 3-byte start {@code 1110} plus {@code 1101}, the start of the surrogate indicator bits. + */ + private static final byte THREE_BYTE_SURROGATE_START = (byte) 0b1110_1101; + private static final int TOP_3_BITS = 0b1110_0000; + /** + * The continuation from {@link #THREE_BYTE_SURROGATE_START} for the surrogate indicator bits, with the + * {@code 10} bits for the second byte of a 3-byte character (top 3 bits). + */ + private static final int THREE_BYTE_SURROGATE_CONTINUATION = 0b1010_0000; + + private static StringEncoding getGuaranteedStringEncoding(byte[] bytes) { + // The differences between the modified UTF-8 format and the standard UTF-8 format are the following: + // The null byte '\u0000' is encoded in 2-byte format rather than 1-byte, so that the encoded strings never have embedded nulls. + // Only the 1-byte, 2-byte, and 3-byte formats are used. + // Supplementary characters are represented in the form of surrogate pairs. + + // However, the DataInputStream will accept a null-byte. + // So we can't use those as a definitive indicator of modified UTF-8 or not. + boolean sawTwoByteNullStart = false; + boolean sawThreeByteSurrogateStart = false; + for (byte b : bytes) { + if (b == TWO_BYTE_NULL_START) { + sawTwoByteNullStart = true; + } else if (sawTwoByteNullStart) { + if (b == TWO_BYTE_NULL_END) { + return StringEncoding.MODIFIED_UTF_8; + } else { + sawTwoByteNullStart = false; + } + } + + if ((b & TOP_5_BITS) == FOUR_BYTE_START) { + // 4-byte start + return StringEncoding.NORMAL_UTF_8; + } + + if (b == THREE_BYTE_SURROGATE_START) { + sawThreeByteSurrogateStart = true; + } else if (sawThreeByteSurrogateStart) { + if ((b & TOP_3_BITS) == THREE_BYTE_SURROGATE_CONTINUATION) { + // Assume this is a properly encoded surrogate, and that this is modified UTF-8 + // Any errors will be caught by the UTF-8 decoder. + return StringEncoding.MODIFIED_UTF_8; + } else { + sawThreeByteSurrogateStart = false; + } + } + } + return StringEncoding.UNKNOWN; + } + private sealed interface State { /** * We need to initialize and return the root name. @@ -88,20 +159,31 @@ record ReadLongArray(int remaining) implements State { } } + private enum StringEncoding { + MODIFIED_UTF_8, + NORMAL_UTF_8, + UNKNOWN, + } + private final DataInput input; /** * The state stack. We're currently on the one that's LAST. */ private final Deque stateStack; + private StringEncoding stringEncoding; /** * Creates a new reader. * * @param input the input to read from + * @param options the options to use when reading */ - public LinNbtReader(DataInput input) { + public LinNbtReader(DataInput input, LinReadOptions options) { this.input = input; this.stateStack = new ArrayDeque<>(List.of(new State.Initial())); + // We only need to check strings if we're allowing JNBT encoding. + this.stringEncoding = options.allowJnbtStringEncoding() + ? StringEncoding.UNKNOWN : StringEncoding.MODIFIED_UTF_8; } @Override @@ -114,7 +196,7 @@ public LinNbtReader(DataInput input) { throw new NbtParseException("NBT stream does not start with a compound tag"); } stateStack.addLast(new State.CompoundStart()); - yield new LinToken.Name(input.readUTF(), LinTagId.COMPOUND); + yield new LinToken.Name(readUtf(), LinTagId.COMPOUND); } case State.CompoundStart compoundStart -> { stateStack.addLast(new State.CompoundEntryName()); @@ -129,7 +211,7 @@ public LinNbtReader(DataInput input) { // After we read the value, we'll be back at reading the name. stateStack.addLast(new State.CompoundEntryName()); stateStack.addLast(new State.ReadValue(id)); - yield new LinToken.Name(input.readUTF(), id); + yield new LinToken.Name(readUtf(), id); } case State.ReadValue(LinTagId id) -> handleReadValue(id); case State.ReadByteArray(int remaining) -> { @@ -188,7 +270,7 @@ private LinToken handleReadValue(LinTagId id) throws IOException { stateStack.addLast(new State.ReadByteArray(size)); yield new LinToken.ByteArrayStart(size); } - case STRING -> new LinToken.String(input.readUTF()); + case STRING -> new LinToken.String(readUtf()); case LIST -> { var elementId = LinTagId.fromId(input.readUnsignedByte()); int size = input.readInt(); @@ -212,4 +294,43 @@ private LinToken handleReadValue(LinTagId id) throws IOException { case END -> throw new NbtParseException("Invalid id: " + id); }; } + + private String readUtf() throws IOException { + return switch (stringEncoding) { + case MODIFIED_UTF_8 -> input.readUTF(); + case NORMAL_UTF_8 -> { + int length = input.readUnsignedShort(); + byte[] bytes = new byte[length]; + input.readFully(bytes); + yield decodeNormalUtf8(bytes); + } + case UNKNOWN -> { + int length = input.readUnsignedShort(); + byte[] bytes = new byte[length]; + input.readFully(bytes); + StringEncoding knownEncoding = getGuaranteedStringEncoding(bytes); + yield switch (knownEncoding) { + case MODIFIED_UTF_8 -> { + stringEncoding = knownEncoding; + byte[] withLength = new byte[bytes.length + 2]; + withLength[0] = (byte) (length >> 8); + withLength[1] = (byte) length; + System.arraycopy(bytes, 0, withLength, 2, bytes.length); + yield new DataInputStream(new ByteArrayInputStream(withLength)).readUTF(); + } + case NORMAL_UTF_8 -> { + stringEncoding = knownEncoding; + yield decodeNormalUtf8(bytes); + } + // These are valid UTF-8 bytes that fit either encoding. Just read them as normal UTF-8, + // but don't change the encoding. + case UNKNOWN -> decodeNormalUtf8(bytes); + }; + } + }; + } + + private static String decodeNormalUtf8(byte[] bytes) throws CharacterCodingException { + return StandardCharsets.UTF_8.newDecoder().decode(ByteBuffer.wrap(bytes)).toString(); + } } diff --git a/stream/src/test/java/org/enginehub/linbus/stream/JnbtCompatibilityIntegrationTest.java b/stream/src/test/java/org/enginehub/linbus/stream/JnbtCompatibilityIntegrationTest.java new file mode 100644 index 0000000..5a49d55 --- /dev/null +++ b/stream/src/test/java/org/enginehub/linbus/stream/JnbtCompatibilityIntegrationTest.java @@ -0,0 +1,262 @@ +/* + * Copyright (c) EngineHub + * Copyright (c) contributors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +package org.enginehub.linbus.stream; + +import com.google.common.collect.ImmutableList; +import com.google.common.io.ByteArrayDataOutput; +import com.google.common.io.ByteStreams; +import org.enginehub.linbus.common.LinTagId; +import org.enginehub.linbus.stream.token.LinToken; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; +import org.junit.jupiter.params.provider.ValueSource; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.UTFDataFormatException; +import java.io.UncheckedIOException; +import java.nio.charset.MalformedInputException; +import java.nio.charset.StandardCharsets; + +import static com.google.common.truth.Truth.assertThat; +import static org.enginehub.linbus.stream.StreamTestUtil.convertNbtStream; +import static org.enginehub.linbus.stream.StreamTestUtil.streamFromIterator; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class JnbtCompatibilityIntegrationTest { + + private static final LinReadOptions OPTIONS = LinReadOptions.builder().allowJnbtStringEncoding(true).build(); + + private static final String NULL_BYTE_TEST_STRING = "Null: \0"; + private static final String TWO_BYTE_TEST_STRING = "2-byte: Ø"; + private static final String THREE_BYTE_TEST_STRING = "3-byte: ඞ"; + private static final String FOUR_BYTE_TEST_STRING = "4-byte: 🐲"; + private static final String FULL_UNICODE_TEST_STRING = NULL_BYTE_TEST_STRING + TWO_BYTE_TEST_STRING + + THREE_BYTE_TEST_STRING + FOUR_BYTE_TEST_STRING; + + @Test + void parsesNormalNbtWhenUsingFlag() throws IOException { + var tokens = convertNbtStream("all-types.nbt.gz", OPTIONS, s -> ImmutableList.copyOf(s.asIterator())); + assertThat(tokens).containsExactlyElementsIn(LinBinaryIOIntegrationTest.ALL_TYPES_TOKENS).inOrder(); + } + + @Test + void allDifferentUnicode() throws IOException { + byte[] bytes = createDifferentUnicodeJnbt(); + + // By default, this should not parse due to incorrect encoding + var uncheckedIoEx = assertThrows(UncheckedIOException.class, () -> ImmutableList.copyOf( + LinBinaryIO.read( + new DataInputStream(new ByteArrayInputStream(bytes)) + ).asIterator() + )); + assertThat(uncheckedIoEx.getCause()).isInstanceOf(UTFDataFormatException.class); + + // With the compatibility flag on, it should parse correctly + var tokens = ImmutableList.copyOf( + LinBinaryIO.read( + new DataInputStream(new ByteArrayInputStream(bytes)), + OPTIONS + ).asIterator() + ); + assertThat(tokens).containsExactly( + new LinToken.Name(FULL_UNICODE_TEST_STRING, LinTagId.COMPOUND), + new LinToken.CompoundStart(), + new LinToken.CompoundEnd() + ).inOrder(); + + var byteCollector = ByteStreams.newDataOutput(); + LinBinaryIO.write(byteCollector, streamFromIterator(tokens.iterator())); + // We use modified UTF-8 encoding, so the bytes should not be equal + assertThat(byteCollector.toByteArray()).isNotEqualTo(bytes); + } + + private byte[] createDifferentUnicodeJnbt() { + var byteCollector = ByteStreams.newDataOutput(); + byteCollector.write(LinTagId.COMPOUND.id()); + writeNormalUtf8(byteCollector, FULL_UNICODE_TEST_STRING); + byteCollector.write(LinTagId.END.id()); + return byteCollector.toByteArray(); + } + + @ParameterizedTest + @ValueSource(strings = {NULL_BYTE_TEST_STRING, FOUR_BYTE_TEST_STRING, FULL_UNICODE_TEST_STRING}) + void locksInModifiedUtf8IfPossible(String testStr) { + byte[] bytes = createDualModifiedFirst(testStr); + + // By default, this should not parse due to incorrect encoding + var uncheckedIoEx = assertThrows(UncheckedIOException.class, () -> ImmutableList.copyOf( + LinBinaryIO.read( + new DataInputStream(new ByteArrayInputStream(bytes)) + ).asIterator() + )); + assertThat(uncheckedIoEx.getCause()).isInstanceOf(UTFDataFormatException.class); + + // With the compatibility flag on, it should also not parse because we lock in the modified UTF-8 encoding + uncheckedIoEx = assertThrows(UncheckedIOException.class, () -> ImmutableList.copyOf( + LinBinaryIO.read( + new DataInputStream(new ByteArrayInputStream(bytes)), + OPTIONS + ).asIterator() + )); + assertThat(uncheckedIoEx.getCause()).isInstanceOf(UTFDataFormatException.class); + } + + private byte[] createDualModifiedFirst(String testStr) { + var byteCollector = ByteStreams.newDataOutput(); + byteCollector.write(LinTagId.COMPOUND.id()); + byteCollector.writeUTF(""); // Name + byteCollector.write(LinTagId.STRING.id()); + byteCollector.writeUTF("determiner"); // Name + byteCollector.writeUTF(testStr); + byteCollector.write(LinTagId.STRING.id()); + byteCollector.writeUTF("detector"); // Name + writeNormalUtf8(byteCollector, FULL_UNICODE_TEST_STRING); + byteCollector.write(LinTagId.END.id()); + return byteCollector.toByteArray(); + } + + // NULL_BYTE_TEST_STRING cannot lock in the encoding, as it is acceptable as input to modified UTF-8 + @ParameterizedTest + @ValueSource(strings = {FOUR_BYTE_TEST_STRING, FULL_UNICODE_TEST_STRING}) + void locksInNormalUtf8IfPossible(String testStr) { + byte[] bytes = createDualNormalFirst(testStr); + + // By default, this should not parse due to incorrect encoding + var uncheckedIoEx = assertThrows(UncheckedIOException.class, () -> ImmutableList.copyOf( + LinBinaryIO.read( + new DataInputStream(new ByteArrayInputStream(bytes)) + ).asIterator() + )); + assertThat(uncheckedIoEx.getCause()).isInstanceOf(UTFDataFormatException.class); + + // With the compatibility flag on, it should also not parse because we lock in the normal UTF-8 encoding + uncheckedIoEx = assertThrows(UncheckedIOException.class, () -> ImmutableList.copyOf( + LinBinaryIO.read( + new DataInputStream(new ByteArrayInputStream(bytes)), + OPTIONS + ).asIterator() + )); + assertThat(uncheckedIoEx.getCause()).isInstanceOf(MalformedInputException.class); + } + + private byte[] createDualNormalFirst(String testStr) { + var byteCollector = ByteStreams.newDataOutput(); + byteCollector.write(LinTagId.COMPOUND.id()); + byteCollector.writeUTF(""); // Name + byteCollector.write(LinTagId.STRING.id()); + byteCollector.writeUTF("determiner"); // Name + writeNormalUtf8(byteCollector, testStr); + byteCollector.write(LinTagId.STRING.id()); + byteCollector.writeUTF("detector"); // Name + byteCollector.writeUTF(FULL_UNICODE_TEST_STRING); + byteCollector.write(LinTagId.END.id()); + return byteCollector.toByteArray(); + } + + private enum StringEncoding { + NORMAL, MODIFIED + } + + @ParameterizedTest + @EnumSource(value = StringEncoding.class) + void doesNotLockInWhenUndetectable(StringEncoding encoding) { + byte[] bytes = createAmbiguousWithPostFullString(encoding); + + if (encoding != StringEncoding.MODIFIED) { + // By default, this should not parse due to incorrect encoding + var uncheckedIoEx = assertThrows(UncheckedIOException.class, () -> ImmutableList.copyOf( + LinBinaryIO.read( + new DataInputStream(new ByteArrayInputStream(bytes)) + ).asIterator() + )); + assertThat(uncheckedIoEx.getCause()).isInstanceOf(UTFDataFormatException.class); + } + + // With the compatibility flag on, it should parse because we couldn't lock in the encoding + var tokens = ImmutableList.copyOf( + LinBinaryIO.read( + new DataInputStream(new ByteArrayInputStream(bytes)), + LinReadOptions.builder().allowJnbtStringEncoding(true).build() + ).asIterator() + ); + assertThat(tokens).containsExactly( + new LinToken.Name("", LinTagId.COMPOUND), + new LinToken.CompoundStart(), + new LinToken.Name("null-normal", LinTagId.STRING), + new LinToken.String(NULL_BYTE_TEST_STRING), + new LinToken.Name("2byte-normal", LinTagId.STRING), + new LinToken.String(TWO_BYTE_TEST_STRING), + new LinToken.Name("2byte-modified", LinTagId.STRING), + new LinToken.String(TWO_BYTE_TEST_STRING), + new LinToken.Name("3byte-normal", LinTagId.STRING), + new LinToken.String(THREE_BYTE_TEST_STRING), + new LinToken.Name("3byte-modified", LinTagId.STRING), + new LinToken.String(THREE_BYTE_TEST_STRING), + new LinToken.Name("decider", LinTagId.STRING), + new LinToken.String(FULL_UNICODE_TEST_STRING), + new LinToken.CompoundEnd() + ).inOrder(); + } + + private byte[] createAmbiguousWithPostFullString(StringEncoding encoding) { + var byteCollector = ByteStreams.newDataOutput(); + byteCollector.write(LinTagId.COMPOUND.id()); + byteCollector.writeUTF(""); + + // All these strings encode the same in normal and modified UTF-8 + // So we can't lock in the encoding + + byteCollector.write(LinTagId.STRING.id()); + byteCollector.writeUTF("null-normal"); // Name + writeNormalUtf8(byteCollector, NULL_BYTE_TEST_STRING); + + // We don't bother writing the modified version of null, as it can determine the encoding + + byteCollector.write(LinTagId.STRING.id()); + byteCollector.writeUTF("2byte-normal"); // Name + writeNormalUtf8(byteCollector, TWO_BYTE_TEST_STRING); + byteCollector.write(LinTagId.STRING.id()); + byteCollector.writeUTF("2byte-modified"); // Name + byteCollector.writeUTF(TWO_BYTE_TEST_STRING); + byteCollector.write(LinTagId.STRING.id()); + byteCollector.writeUTF("3byte-normal"); // Name + writeNormalUtf8(byteCollector, THREE_BYTE_TEST_STRING); + byteCollector.write(LinTagId.STRING.id()); + byteCollector.writeUTF("3byte-modified"); // Name + byteCollector.writeUTF(THREE_BYTE_TEST_STRING); + byteCollector.write(LinTagId.STRING.id()); // String + byteCollector.writeUTF("decider"); // Name + switch (encoding) { + case NORMAL -> writeNormalUtf8(byteCollector, FULL_UNICODE_TEST_STRING); + case MODIFIED -> byteCollector.writeUTF(FULL_UNICODE_TEST_STRING); + default -> throw new AssertionError("Unknown encoding: " + encoding); + } + byteCollector.write(LinTagId.END.id()); + return byteCollector.toByteArray(); + } + + private static void writeNormalUtf8(ByteArrayDataOutput byteCollector, String str) { + byte[] stringContent = str.getBytes(StandardCharsets.UTF_8); + byteCollector.writeShort(stringContent.length); + byteCollector.write(stringContent, 0, stringContent.length); + } +} diff --git a/stream/src/test/java/org/enginehub/linbus/stream/LinBinaryIOIntegrationTest.java b/stream/src/test/java/org/enginehub/linbus/stream/LinBinaryIOIntegrationTest.java index eb7a751..112d02d 100644 --- a/stream/src/test/java/org/enginehub/linbus/stream/LinBinaryIOIntegrationTest.java +++ b/stream/src/test/java/org/enginehub/linbus/stream/LinBinaryIOIntegrationTest.java @@ -29,6 +29,7 @@ import java.nio.ByteBuffer; import java.nio.IntBuffer; import java.nio.LongBuffer; +import java.util.List; import static com.google.common.truth.Truth.assertThat; import static org.enginehub.linbus.stream.StreamTestUtil.convertNbtStream; @@ -114,85 +115,87 @@ void bigtest() throws IOException { assertThat(byteCollector.toByteArray()).isEqualTo(bytes); } + static final List ALL_TYPES_TOKENS = List.of( + new LinToken.Name("root", LinTagId.COMPOUND), + new LinToken.CompoundStart(), + new LinToken.Name("byte", LinTagId.BYTE), + new LinToken.Byte((byte) 1), + new LinToken.Name("short", LinTagId.SHORT), + new LinToken.Short((short) 127), + new LinToken.Name("int", LinTagId.INT), + new LinToken.Int(127), + new LinToken.Name("long", LinTagId.LONG), + new LinToken.Long(127), + new LinToken.Name("float", LinTagId.FLOAT), + new LinToken.Float(127), + new LinToken.Name("double", LinTagId.DOUBLE), + new LinToken.Double(127), + new LinToken.Name("string", LinTagId.STRING), + new LinToken.String("this is a string"), + new LinToken.Name("byteArray", LinTagId.BYTE_ARRAY), + new LinToken.ByteArrayStart(1), + new LinToken.ByteArrayContent(ByteBuffer.wrap(new byte[]{(byte) 1}).asReadOnlyBuffer()), + new LinToken.ByteArrayEnd(), + new LinToken.Name("intArray", LinTagId.INT_ARRAY), + new LinToken.IntArrayStart(1), + new LinToken.IntArrayContent(IntBuffer.wrap(new int[]{127}).asReadOnlyBuffer()), + new LinToken.IntArrayEnd(), + new LinToken.Name("longArray", LinTagId.LONG_ARRAY), + new LinToken.LongArrayStart(1), + new LinToken.LongArrayContent(LongBuffer.wrap(new long[]{127}).asReadOnlyBuffer()), + new LinToken.LongArrayEnd(), + new LinToken.Name("byteList", LinTagId.LIST), + new LinToken.ListStart(1, LinTagId.BYTE), + new LinToken.Byte((byte) 1), + new LinToken.ListEnd(), + new LinToken.Name("shortList", LinTagId.LIST), + new LinToken.ListStart(1, LinTagId.SHORT), + new LinToken.Short((short) 127), + new LinToken.ListEnd(), + new LinToken.Name("intList", LinTagId.LIST), + new LinToken.ListStart(1, LinTagId.INT), + new LinToken.Int(127), + new LinToken.ListEnd(), + new LinToken.Name("longList", LinTagId.LIST), + new LinToken.ListStart(1, LinTagId.LONG), + new LinToken.Long(127), + new LinToken.ListEnd(), + new LinToken.Name("floatList", LinTagId.LIST), + new LinToken.ListStart(1, LinTagId.FLOAT), + new LinToken.Float(127), + new LinToken.ListEnd(), + new LinToken.Name("doubleList", LinTagId.LIST), + new LinToken.ListStart(1, LinTagId.DOUBLE), + new LinToken.Double(127), + new LinToken.ListEnd(), + new LinToken.Name("compound1", LinTagId.COMPOUND), + new LinToken.CompoundStart(), + new LinToken.Name("compound2", LinTagId.COMPOUND), + new LinToken.CompoundStart(), + new LinToken.Name("compound3", LinTagId.COMPOUND), + new LinToken.CompoundStart(), + new LinToken.Name("list", LinTagId.LIST), + new LinToken.ListStart(2, LinTagId.COMPOUND), + new LinToken.CompoundStart(), + new LinToken.Name("key", LinTagId.STRING), + new LinToken.String("value"), + new LinToken.CompoundEnd(), + new LinToken.CompoundStart(), + new LinToken.Name("key", LinTagId.STRING), + new LinToken.String("value"), + new LinToken.CompoundEnd(), + new LinToken.ListEnd(), + new LinToken.CompoundEnd(), + new LinToken.CompoundEnd(), + new LinToken.CompoundEnd(), + new LinToken.CompoundEnd() + ); + @Test void allTypes() throws IOException { var bytes = loadResource("all-types.nbt.gz", InputStream::readAllBytes); var tokens = convertNbtStream("all-types.nbt.gz", s -> ImmutableList.copyOf(s.asIterator())); - assertThat(tokens).containsExactly( - new LinToken.Name("root", LinTagId.COMPOUND), - new LinToken.CompoundStart(), - new LinToken.Name("byte", LinTagId.BYTE), - new LinToken.Byte((byte) 1), - new LinToken.Name("short", LinTagId.SHORT), - new LinToken.Short((short) 127), - new LinToken.Name("int", LinTagId.INT), - new LinToken.Int(127), - new LinToken.Name("long", LinTagId.LONG), - new LinToken.Long(127), - new LinToken.Name("float", LinTagId.FLOAT), - new LinToken.Float(127), - new LinToken.Name("double", LinTagId.DOUBLE), - new LinToken.Double(127), - new LinToken.Name("string", LinTagId.STRING), - new LinToken.String("this is a string"), - new LinToken.Name("byteArray", LinTagId.BYTE_ARRAY), - new LinToken.ByteArrayStart(1), - new LinToken.ByteArrayContent(ByteBuffer.wrap(new byte[]{(byte) 1}).asReadOnlyBuffer()), - new LinToken.ByteArrayEnd(), - new LinToken.Name("intArray", LinTagId.INT_ARRAY), - new LinToken.IntArrayStart(1), - new LinToken.IntArrayContent(IntBuffer.wrap(new int[]{127}).asReadOnlyBuffer()), - new LinToken.IntArrayEnd(), - new LinToken.Name("longArray", LinTagId.LONG_ARRAY), - new LinToken.LongArrayStart(1), - new LinToken.LongArrayContent(LongBuffer.wrap(new long[]{127}).asReadOnlyBuffer()), - new LinToken.LongArrayEnd(), - new LinToken.Name("byteList", LinTagId.LIST), - new LinToken.ListStart(1, LinTagId.BYTE), - new LinToken.Byte((byte) 1), - new LinToken.ListEnd(), - new LinToken.Name("shortList", LinTagId.LIST), - new LinToken.ListStart(1, LinTagId.SHORT), - new LinToken.Short((short) 127), - new LinToken.ListEnd(), - new LinToken.Name("intList", LinTagId.LIST), - new LinToken.ListStart(1, LinTagId.INT), - new LinToken.Int(127), - new LinToken.ListEnd(), - new LinToken.Name("longList", LinTagId.LIST), - new LinToken.ListStart(1, LinTagId.LONG), - new LinToken.Long(127), - new LinToken.ListEnd(), - new LinToken.Name("floatList", LinTagId.LIST), - new LinToken.ListStart(1, LinTagId.FLOAT), - new LinToken.Float(127), - new LinToken.ListEnd(), - new LinToken.Name("doubleList", LinTagId.LIST), - new LinToken.ListStart(1, LinTagId.DOUBLE), - new LinToken.Double(127), - new LinToken.ListEnd(), - new LinToken.Name("compound1", LinTagId.COMPOUND), - new LinToken.CompoundStart(), - new LinToken.Name("compound2", LinTagId.COMPOUND), - new LinToken.CompoundStart(), - new LinToken.Name("compound3", LinTagId.COMPOUND), - new LinToken.CompoundStart(), - new LinToken.Name("list", LinTagId.LIST), - new LinToken.ListStart(2, LinTagId.COMPOUND), - new LinToken.CompoundStart(), - new LinToken.Name("key", LinTagId.STRING), - new LinToken.String("value"), - new LinToken.CompoundEnd(), - new LinToken.CompoundStart(), - new LinToken.Name("key", LinTagId.STRING), - new LinToken.String("value"), - new LinToken.CompoundEnd(), - new LinToken.ListEnd(), - new LinToken.CompoundEnd(), - new LinToken.CompoundEnd(), - new LinToken.CompoundEnd(), - new LinToken.CompoundEnd() - ).inOrder(); + assertThat(tokens).containsExactlyElementsIn(ALL_TYPES_TOKENS).inOrder(); var byteCollector = ByteStreams.newDataOutput(); LinBinaryIO.write(byteCollector, streamFromIterator(tokens.iterator())); diff --git a/stream/src/test/java/org/enginehub/linbus/stream/StreamTestUtil.java b/stream/src/test/java/org/enginehub/linbus/stream/StreamTestUtil.java index 780820e..fecce74 100644 --- a/stream/src/test/java/org/enginehub/linbus/stream/StreamTestUtil.java +++ b/stream/src/test/java/org/enginehub/linbus/stream/StreamTestUtil.java @@ -45,7 +45,11 @@ public static T loadResource(String name, ResourceLoader loader) throws I } public static T convertNbtStream(String name, Function converter) throws IOException { - return loadResource(name, stream -> converter.apply(LinBinaryIO.read(new DataInputStream(stream)))); + return convertNbtStream(name, LinReadOptions.builder().build(), converter); + } + + public static T convertNbtStream(String name, LinReadOptions options, Function converter) throws IOException { + return loadResource(name, stream -> converter.apply(LinBinaryIO.read(new DataInputStream(stream), options))); } public static LinStream streamFromIterator(Iterator tokens) { diff --git a/version.txt b/version.txt index 85ce25f..d144648 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.1.3-SNAPSHOT \ No newline at end of file +0.2.0-SNAPSHOT From 6e4c7663d4c68a821bb14f3a92fc204e46312ea5 Mon Sep 17 00:00:00 2001 From: Octavia Togami Date: Wed, 2 Oct 2024 00:04:58 -0700 Subject: [PATCH 3/3] Heavily improve performance of normal UTF-8 decoding --- .../linbus/stream/impl/LinNbtReader.java | 104 +++++++++++++++--- 1 file changed, 88 insertions(+), 16 deletions(-) diff --git a/stream/src/main/java/org/enginehub/linbus/stream/impl/LinNbtReader.java b/stream/src/main/java/org/enginehub/linbus/stream/impl/LinNbtReader.java index cfcf9b2..622f81b 100644 --- a/stream/src/main/java/org/enginehub/linbus/stream/impl/LinNbtReader.java +++ b/stream/src/main/java/org/enginehub/linbus/stream/impl/LinNbtReader.java @@ -30,7 +30,10 @@ import java.io.DataInputStream; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; import java.nio.charset.StandardCharsets; import java.util.ArrayDeque; import java.util.Deque; @@ -65,7 +68,7 @@ public class LinNbtReader implements LinStream { */ private static final int THREE_BYTE_SURROGATE_CONTINUATION = 0b1010_0000; - private static StringEncoding getGuaranteedStringEncoding(byte[] bytes) { + private static StringEncoding getGuaranteedStringEncoding(ByteBuffer bytes) { // The differences between the modified UTF-8 format and the standard UTF-8 format are the following: // The null byte '\u0000' is encoded in 2-byte format rather than 1-byte, so that the encoded strings never have embedded nulls. // Only the 1-byte, 2-byte, and 3-byte formats are used. @@ -75,7 +78,8 @@ private static StringEncoding getGuaranteedStringEncoding(byte[] bytes) { // So we can't use those as a definitive indicator of modified UTF-8 or not. boolean sawTwoByteNullStart = false; boolean sawThreeByteSurrogateStart = false; - for (byte b : bytes) { + for (int i = 0; i < bytes.remaining(); i++) { + byte b = bytes.get(i); if (b == TWO_BYTE_NULL_START) { sawTwoByteNullStart = true; } else if (sawTwoByteNullStart) { @@ -165,12 +169,75 @@ private enum StringEncoding { UNKNOWN, } + private static final class NormalUtf8Decoder { + private final CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder(); + // Default to some small allocation that is likely to cover most strings. + private ByteBuffer sourceBuffer = ByteBuffer.allocate(128); + private CharBuffer decodeBuffer = CharBuffer.allocate(128); + + void fill(DataInput input, int length) throws IOException { + ensureSourceBufferCapacity(length); + input.readFully(sourceBuffer.array(), 0, length); + sourceBuffer.limit(length); + } + + private void ensureSourceBufferCapacity(int requiredCapacity) { + if (sourceBuffer.capacity() < requiredCapacity) { + sourceBuffer = ByteBuffer.allocate(requiredCapacity); + } else { + sourceBuffer.clear(); + } + } + + private void ensureCharBufferCapacity(int requiredCapacity) { + if (decodeBuffer.capacity() < requiredCapacity) { + decodeBuffer = CharBuffer.allocate(requiredCapacity); + } else { + decodeBuffer.clear(); + } + } + + public String decode() throws CharacterCodingException { + int n = (int) (sourceBuffer.remaining() * decoder.averageCharsPerByte()); + ensureCharBufferCapacity(n); + + if ((n == 0) && (sourceBuffer.remaining() == 0)) + return ""; + decoder.reset(); + for (; ; ) { + CoderResult cr = sourceBuffer.hasRemaining() + ? decoder.decode(sourceBuffer, decodeBuffer, true) + : CoderResult.UNDERFLOW; + if (cr.isUnderflow()) { + cr = decoder.flush(decodeBuffer); + } + + if (cr.isUnderflow()) { + break; + } + if (cr.isOverflow()) { + // Ensure progress; n might be 0! + n += n / 2 + 1; + CharBuffer o = CharBuffer.allocate(n); + decodeBuffer.flip(); + o.put(decodeBuffer); + decodeBuffer = o; + continue; + } + cr.throwException(); + } + decodeBuffer.flip(); + return decodeBuffer.toString(); + } + } + private final DataInput input; /** * The state stack. We're currently on the one that's LAST. */ private final Deque stateStack; private StringEncoding stringEncoding; + private @Nullable NormalUtf8Decoder decoder; /** * Creates a new reader. @@ -295,42 +362,47 @@ private LinToken handleReadValue(LinTagId id) throws IOException { }; } + private NormalUtf8Decoder getNormalUtf8Decoder() { + NormalUtf8Decoder decoder = this.decoder; + if (decoder == null) { + decoder = new NormalUtf8Decoder(); + this.decoder = decoder; + } + return decoder; + } + private String readUtf() throws IOException { return switch (stringEncoding) { case MODIFIED_UTF_8 -> input.readUTF(); case NORMAL_UTF_8 -> { int length = input.readUnsignedShort(); - byte[] bytes = new byte[length]; - input.readFully(bytes); - yield decodeNormalUtf8(bytes); + NormalUtf8Decoder decoder = getNormalUtf8Decoder(); + decoder.fill(input, length); + yield decoder.decode(); } case UNKNOWN -> { int length = input.readUnsignedShort(); - byte[] bytes = new byte[length]; - input.readFully(bytes); - StringEncoding knownEncoding = getGuaranteedStringEncoding(bytes); + NormalUtf8Decoder decoder = getNormalUtf8Decoder(); + decoder.fill(input, length); + StringEncoding knownEncoding = getGuaranteedStringEncoding(decoder.sourceBuffer); yield switch (knownEncoding) { case MODIFIED_UTF_8 -> { stringEncoding = knownEncoding; - byte[] withLength = new byte[bytes.length + 2]; + byte[] withLength = new byte[length + 2]; withLength[0] = (byte) (length >> 8); withLength[1] = (byte) length; - System.arraycopy(bytes, 0, withLength, 2, bytes.length); + System.arraycopy(decoder.sourceBuffer.array(), 0, withLength, 2, length); yield new DataInputStream(new ByteArrayInputStream(withLength)).readUTF(); } case NORMAL_UTF_8 -> { stringEncoding = knownEncoding; - yield decodeNormalUtf8(bytes); + yield decoder.decode(); } // These are valid UTF-8 bytes that fit either encoding. Just read them as normal UTF-8, // but don't change the encoding. - case UNKNOWN -> decodeNormalUtf8(bytes); + case UNKNOWN -> decoder.decode(); }; } }; } - - private static String decodeNormalUtf8(byte[] bytes) throws CharacterCodingException { - return StandardCharsets.UTF_8.newDecoder().decode(ByteBuffer.wrap(bytes)).toString(); - } }