From 02a557a0e93c21b0164c8f407c722c8cbb59330d Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 21 Dec 2023 13:42:16 -0600 Subject: [PATCH] Arity-split String#encode! This led to a few other things getting split, plus a lambda form that avoids the temporary string carrier array. --- core/src/main/java/org/jruby/RubyString.java | 57 +++-- .../java/org/jruby/util/io/EncodingUtils.java | 232 ++++++++++++------ 2 files changed, 194 insertions(+), 95 deletions(-) diff --git a/core/src/main/java/org/jruby/RubyString.java b/core/src/main/java/org/jruby/RubyString.java index 29aee67554b6..36b78db833bf 100644 --- a/core/src/main/java/org/jruby/RubyString.java +++ b/core/src/main/java/org/jruby/RubyString.java @@ -6596,39 +6596,43 @@ public IRubyObject encoding(ThreadContext context) { return context.runtime.getEncodingService().getEncoding(value.getEncoding()); } - // TODO: re-split this - public IRubyObject encode_bang(ThreadContext context, IRubyObject arg0) { - return encode_bang(context, new IRubyObject[]{arg0}); - } + @JRubyMethod(name = "encode!") + public IRubyObject encode_bang(ThreadContext context) { + modify19(); - public IRubyObject encode_bang(ThreadContext context, IRubyObject arg0, IRubyObject arg1) { - return encode_bang(context, new IRubyObject[]{arg0,arg1}); + return EncodingUtils.strTranscode(context, this, RubyString::updateFromTranscode); } - public IRubyObject encode_bang(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) { - return encode_bang(context, new IRubyObject[]{arg0,arg1,arg2}); + @JRubyMethod(name = "encode!") + public IRubyObject encode_bang(ThreadContext context, IRubyObject arg0) { + modify19(); + + return EncodingUtils.strTranscode(context, arg0, this, RubyString::updateFromTranscode); } - @JRubyMethod(name = "encode!", optional = 3, checkArity = false) - public IRubyObject encode_bang(ThreadContext context, IRubyObject[] args) { - Arity.checkArgumentCount(context, args, 0, 3); + @JRubyMethod(name = "encode!") + public IRubyObject encode_bang(ThreadContext context, IRubyObject arg0, IRubyObject arg1) { + modify19(); - IRubyObject[] newstr_p; - Encoding encindex; + return EncodingUtils.strTranscode(context, arg0, arg1, this, RubyString::updateFromTranscode); + } + @JRubyMethod(name = "encode!") + public IRubyObject encode_bang(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) { modify19(); - newstr_p = new IRubyObject[]{this}; - encindex = EncodingUtils.strTranscode(context, args, newstr_p); + return EncodingUtils.strTranscode(context, arg0, arg1, arg2, this, RubyString::updateFromTranscode); + } - if (encindex == null) return this; - if (newstr_p[0] == this) { - setEncoding(encindex); - return this; + private static RubyString updateFromTranscode(ThreadContext context, RubyString self, Encoding encindex, RubyString newstr) { + if (encindex == null) return self; + if (newstr == self) { + self.setEncoding(encindex); + return self; } - replace(newstr_p[0]); - setEncoding(encindex); - return this; + self.replace(newstr); + self.setEncoding(encindex); + return self; } @JRubyMethod @@ -7244,4 +7248,13 @@ public RubyArray unpack(IRubyObject obj) { return Pack.unpack(getRuntime(), this.value, stringValue(obj).value); } + @Deprecated + public IRubyObject encode_bang(ThreadContext context, IRubyObject[] args) { + Arity.checkArgumentCount(context, args, 0, 2); + + modify19(); + + return EncodingUtils.strTranscode(context, args, this, RubyString::updateFromTranscode); + } + } diff --git a/core/src/main/java/org/jruby/util/io/EncodingUtils.java b/core/src/main/java/org/jruby/util/io/EncodingUtils.java index 24b61789fa2f..69be3b0af769 100644 --- a/core/src/main/java/org/jruby/util/io/EncodingUtils.java +++ b/core/src/main/java/org/jruby/util/io/EncodingUtils.java @@ -870,26 +870,26 @@ public static Encoding toEncodingIndex(ThreadContext context, IRubyObject enc) { } // encoded_dup - public static IRubyObject encodedDup(ThreadContext context, IRubyObject newstr, IRubyObject str, Encoding encindex) { - if (encindex == null) return str.dup(); + public static RubyString encodedDup(ThreadContext context, RubyString str, Encoding encindex, RubyString newstr) { + if (encindex == null) return (RubyString) str.dup(); if (newstr == str) { - newstr = str.dup(); + newstr = (RubyString) str.dup(); } else { // set to same superclass - ((RubyBasicObject)newstr).setMetaClass(str.getMetaClass()); + newstr.setMetaClass(str.getMetaClass()); } - ((RubyString)newstr).modify19(); - return strEncodeAssociate(context, newstr, encindex); + newstr.modify19(); + return strEncodeAssociate(newstr, encindex); } // str_encode_associate - public static IRubyObject strEncodeAssociate(ThreadContext context, IRubyObject str, Encoding encidx) { + public static RubyString strEncodeAssociate(RubyString str, Encoding encidx) { encAssociateIndex(str, encidx); if (encAsciicompat(encidx)) { - ((RubyString)str).scanForCodeRange(); + str.scanForCodeRange(); } else { - ((RubyString)str).setCodeRange(StringSupport.CR_VALID); + str.setCodeRange(StringSupport.CR_VALID); } return str; @@ -911,21 +911,25 @@ public static IRubyObject encAssociateIndex(IRubyObject obj, Encoding encidx) { } // str_encode - public static IRubyObject strEncode(ThreadContext context, IRubyObject str, IRubyObject... args) { - IRubyObject[] newstr_p = {str}; + public static IRubyObject strEncode(ThreadContext context, RubyString str) { + return strTranscode(context, str, EncodingUtils::encodedDup); + } + + public static IRubyObject strEncode(ThreadContext context, RubyString str, IRubyObject arg0) { + return strTranscode(context, arg0, str, EncodingUtils::encodedDup); + } - Encoding dencindex = strTranscode(context, args, newstr_p); + public static IRubyObject strEncode(ThreadContext context, RubyString str, IRubyObject arg0, IRubyObject arg1) { + return strTranscode(context, arg0, arg1, str, EncodingUtils::encodedDup); + } - return encodedDup(context, newstr_p[0], str, dencindex); + public static IRubyObject strEncode(ThreadContext context, RubyString str, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) { + return strTranscode(context, arg0, arg1, arg2, str, EncodingUtils::encodedDup); } // rb_str_encode public static IRubyObject rbStrEncode(ThreadContext context, IRubyObject str, IRubyObject to, int ecflags, IRubyObject ecopt) { - IRubyObject[] newstr_p = {str}; - - Encoding dencindex = strTranscode0(context, 1, new IRubyObject[]{to}, newstr_p, ecflags, ecopt); - - return encodedDup(context, newstr_p[0], str, dencindex); + return strTranscode1(context, to, (RubyString) str, ecflags, ecopt, EncodingUtils::encodedDup); } // rb_str_encode @@ -973,76 +977,116 @@ protected static boolean noDecorators(int ecflags) { } // str_transcode - public static Encoding strTranscode(ThreadContext context, IRubyObject[] args, IRubyObject[] self_p) { - int ecflags = 0; - int argc = args.length; - IRubyObject[] ecopts_p = {context.nil}; + public static IRubyObject strTranscode(ThreadContext context, IRubyObject[] args, RubyString str, TranscodeResult result) { + switch (args.length) { + case 0: + return strTranscode(context, str, result); + case 1: + return strTranscode(context, args[0], str, result); + case 2: + return strTranscode(context, args[0], args[1], str, result); + default: + throw context.runtime.newArgumentError(args.length, 2); + } + } - if (args.length >= 1) { - IRubyObject tmp = TypeConverter.checkHashType(context.runtime, args[args.length - 1]); - if (!tmp.isNil()) { - argc--; - ecflags = econvPrepareOpts(context, tmp, ecopts_p); - } + public interface TranscodeResult { + RubyString apply(ThreadContext context, RubyString str, Encoding enc, RubyString newStr); + } + + public static IRubyObject strTranscode(ThreadContext context, RubyString str, TranscodeResult result) { + return strTranscode0(context, str, 0, context.nil, result); + } + + public static IRubyObject strTranscode(ThreadContext context, IRubyObject arg0, RubyString str, TranscodeResult result) { + return strTranscode1(context, arg0, str, 0, context.nil, result); + } + + public static IRubyObject strTranscode(ThreadContext context, IRubyObject arg0, IRubyObject arg1, RubyString str, TranscodeResult result) { + return strTranscode2(context, arg0, arg1, str, 0, context.nil, result); + } + + public static IRubyObject strTranscode(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2, RubyString str, TranscodeResult result) { + return strTranscode3(context, arg0, arg1, arg2, str, 0, context.nil, result); + } + + private static IRubyObject strTranscode0(ThreadContext context, RubyString str, int ecflags, IRubyObject ecopts, TranscodeResult result) { + IRubyObject arg1 = context.runtime.getEncodingService().getDefaultInternal(); + if (arg1 == null || arg1.isNil()) { + if (ecflags == 0) return null; + arg1 = objEncoding(context, str); } - return strTranscode0(context, argc, args, self_p, ecflags, ecopts_p[0]); + boolean explicitlyInvalidReplace = (ecflags & EConvFlags.INVALID_MASK) != 0; + + ecflags |= EConvFlags.INVALID_REPLACE | EConvFlags.UNDEF_REPLACE; + + return strTranscode(context, arg1, context.nil, str, ecflags, ecopts, result, explicitlyInvalidReplace); } - // str_transcode0 - public static Encoding strTranscode0(ThreadContext context, int argc, IRubyObject[] args, IRubyObject[] self_p, int ecflags, IRubyObject ecopts) { - Ruby runtime = context.runtime; + private static IRubyObject strTranscode1(ThreadContext context, IRubyObject arg1, RubyString str, int ecflags, IRubyObject ecopts, TranscodeResult result) { + IRubyObject tmp = TypeConverter.checkHashType(context.runtime, arg1); + if (!tmp.isNil()) { + IRubyObject[] ecopts_p = {context.nil}; + ecflags = econvPrepareOpts(context, tmp, ecopts_p); + return strTranscode0(context, str, ecflags, ecopts_p[0], result); + } - IRubyObject str = self_p[0]; - IRubyObject arg1, arg2; - Encoding[] senc_p = {null}, denc_p = {null}; - byte[][] sname_p = {null}, dname_p = {null}; - Encoding dencindex; - boolean explicitlyInvalidReplace = true; + return strTranscode(context, arg1, context.nil, str, ecflags, ecopts, result, true); + } - if (argc > 2) { - throw context.runtime.newArgumentError(args.length, 2); + private static IRubyObject strTranscode2(ThreadContext context, IRubyObject arg1, IRubyObject arg2, RubyString str, int ecflags, IRubyObject ecopts, TranscodeResult result) { + IRubyObject tmp = TypeConverter.checkHashType(context.runtime, arg2); + if (!tmp.isNil()) { + IRubyObject[] ecopts_p = {context.nil}; + ecflags = econvPrepareOpts(context, tmp, ecopts_p); + return strTranscode1(context, arg1, str, ecflags, ecopts_p[0], result); } - if (argc == 0) { - arg1 = runtime.getEncodingService().getDefaultInternal(); - if (arg1 == null || arg1.isNil()) { - if (ecflags == 0) return null; - arg1 = objEncoding(context, str); - } - if ((ecflags & EConvFlags.INVALID_MASK) == 0) { - explicitlyInvalidReplace = false; - } - ecflags |= EConvFlags.INVALID_REPLACE | EConvFlags.UNDEF_REPLACE; - } else { - arg1 = args[0]; + return strTranscode(context, arg1, arg2, str, ecflags, ecopts, result, true); + } + + private static IRubyObject strTranscode3(ThreadContext context, IRubyObject arg1, IRubyObject arg2, IRubyObject arg3, RubyString str, int ecflags, IRubyObject ecopts, TranscodeResult result) { + IRubyObject tmp = TypeConverter.checkHashType(context.runtime, arg3); + if (tmp.isNil()) { + throw context.runtime.newArgumentError(3, 0, 2); } - arg2 = argc <= 1 ? context.nil : args[1]; - dencindex = strTranscodeEncArgs(context, str, arg1, arg2, sname_p, senc_p, dname_p, denc_p); + IRubyObject[] ecopts_p = {context.nil}; + ecflags = econvPrepareOpts(context, tmp, ecopts_p); + return strTranscode2(context, arg1, arg2, str, ecflags, ecopts_p[0], result); + } + + private static RubyString strTranscode(ThreadContext context, IRubyObject arg1, IRubyObject arg2, RubyString str, int ecflags, IRubyObject ecopts, TranscodeResult result, boolean explicitlyInvalidReplace) { + Ruby runtime = context.runtime; + + Encoding[] senc_p = {null}, denc_p = {null}; + byte[][] sname_p = {null}, dname_p = {null}; + Encoding dencindex = strTranscodeEncArgs(context, str, arg1, arg2, sname_p, senc_p, dname_p, denc_p); - IRubyObject dest; + RubyString dest; if (noDecorators(ecflags)) { + dest = str; if (senc_p[0] != null && senc_p[0] == denc_p[0]) { if ((ecflags & EConvFlags.INVALID_MASK) != 0 && explicitlyInvalidReplace) { IRubyObject rep = context.nil; if (!ecopts.isNil()) { - rep = ((RubyHash)ecopts).op_aref(context, runtime.newSymbol("replace")); + rep = ((RubyHash) ecopts).op_aref(context, runtime.newSymbol("replace")); } - dest = ((RubyString)str).encStrScrub(context, senc_p[0], rep, Block.NULL_BLOCK); + dest = (RubyString) str.encStrScrub(context, senc_p[0], rep, Block.NULL_BLOCK); if (dest.isNil()) dest = str; - self_p[0] = dest; - return dencindex; - } - return arg2.isNil() ? null : dencindex; - } else if (senc_p[0] != null && denc_p[0] != null && senc_p[0].isAsciiCompatible() && denc_p[0].isAsciiCompatible()) { - if (((RubyString)str).scanForCodeRange() == StringSupport.CR_7BIT) { - return dencindex; + } else if (arg2.isNil()){ + dencindex = null; } - } - if (encodingEqual(sname_p[0], dname_p[0])) { - return arg2.isNil() ? null : dencindex; + return result.apply(context, str, dencindex, dest); + } else if (senc_p[0] != null && denc_p[0] != null + && senc_p[0].isAsciiCompatible() && denc_p[0].isAsciiCompatible() + && str.scanForCodeRange() == StringSupport.CR_7BIT) { + return result.apply(context, str, dencindex, str); + } else if (encodingEqual(sname_p[0], dname_p[0])) { + if (arg2.isNil()) dencindex = null; + return result.apply(context, str, dencindex, str); } } else { if (encodingEqual(sname_p[0], dname_p[0])) { @@ -1051,12 +1095,12 @@ public static Encoding strTranscode0(ThreadContext context, int argc, IRubyObjec } } - ByteList sp = ((RubyString)str).getByteList(); + ByteList sp = str.getByteList(); ByteList fromp = sp; - int slen = ((RubyString)str).size(); + int slen = str.size(); int blen = slen + 30; dest = RubyString.newStringLight(runtime, blen); - ByteList destp = ((RubyString)dest).getByteList(); + ByteList destp = dest.getByteList(); byte[] frompBytes = fromp.unsafeBytes(); byte[] destpBytes = destp.unsafeBytes(); @@ -1074,9 +1118,7 @@ public static Encoding strTranscode0(ThreadContext context, int argc, IRubyObjec dencindex = defineDummyEncoding(context, dname_p[0]); } - self_p[0] = dest; - - return dencindex; + return result.apply(context, str, dencindex, dest); } // rb_obj_encoding @@ -2341,4 +2383,48 @@ public static Encoding ioStripBOM(RubyIO io) { return ioStripBOM(io.getRuntime().getCurrentContext(), io); } + @Deprecated + public static Encoding strTranscode0(ThreadContext context, int argc, IRubyObject[] args, IRubyObject[] self_p, int ecflags, IRubyObject ecopts) { + Encoding[] enc_p = {null}; + TranscodeResult result = (ctx, str, enc, newStr) -> {enc_p[0] = enc; self_p[0] = newStr; return newStr;}; + switch (argc) { + case 0: + strTranscode0(context, (RubyString) self_p[0], ecflags, ecopts, result); + return enc_p[0]; + case 1: + strTranscode1(context, args[0], (RubyString) self_p[0], ecflags, ecopts, result); + return enc_p[0]; + case 2: + strTranscode2(context, args[0], args[1], (RubyString) self_p[0], ecflags, ecopts, result); + return enc_p[0]; + default: + throw context.runtime.newArgumentError(args.length, 2); + } + } + + @Deprecated + public static Encoding strTranscode(ThreadContext context, IRubyObject[] args, IRubyObject[] self_p) { + Encoding[] enc_p = {null}; + TranscodeResult result = (ctx, str, enc, newStr) -> {enc_p[0] = enc; self_p[0] = newStr; return newStr;}; + + strTranscode(context, args, (RubyString) self_p[0], result); + + return enc_p[0]; + } + + @Deprecated + public static IRubyObject strEncode(ThreadContext context, IRubyObject str, IRubyObject... args) { + return strTranscode(context, args, (RubyString) str, EncodingUtils::encodedDup); + } + + @Deprecated + public static IRubyObject encodedDup(ThreadContext context, IRubyObject newstr, IRubyObject str, Encoding encindex) { + return encodedDup(context, (RubyString) newstr, encindex, (RubyString) str); + } + + @Deprecated + public static IRubyObject strEncodeAssociate(ThreadContext context, IRubyObject str, Encoding encidx) { + return strEncodeAssociate((RubyString) str, encidx); + } + }