Skip to content

Commit

Permalink
Add a direct byte[] path for IO writes
Browse files Browse the repository at this point in the history
Several Java-based consumers of RubyIO write to it without having
a RubyString in hand, by wrapping incoming byte[] or ByteList.
This patch adds a write path that can accept unwrapped byte[] plus
encoding to reduce allocation and follow a fast path.

Users that hit this logic, mostly via IOOutputStream:

* The Psych ext when dumping to an IO
* For stdout and stderr streams provided by Ruby.getError/OutputStream
* By Marshal for dumping to a target IO or IO-like
* By GzipWriter for writing to a stream
* Anyone that calls to_outputstream on an IO

Part of work for jruby#6589
  • Loading branch information
headius committed Mar 2, 2021
1 parent 5434394 commit 236f7ba
Show file tree
Hide file tree
Showing 6 changed files with 236 additions and 72 deletions.
43 changes: 36 additions & 7 deletions core/src/main/java/org/jruby/RubyIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -346,12 +346,12 @@ public void write(int b) throws IOException {

@Override
public void write(byte[] b) throws IOException {
RubyIO.this.write(runtime.getCurrentContext(), RubyString.newStringNoCopy(runtime, b));
RubyIO.this.write(runtime.getCurrentContext(), b, 0, b.length, ASCIIEncoding.INSTANCE);
}

@Override
public void write(byte[] b, int off, int len) throws IOException {
RubyIO.this.write(runtime.getCurrentContext(), RubyString.newStringNoCopy(runtime, b, off, len));
RubyIO.this.write(runtime.getCurrentContext(), b, off, len, ASCIIEncoding.INSTANCE);
}

@Override
Expand Down Expand Up @@ -1441,7 +1441,7 @@ public IRubyObject write(ThreadContext context, IRubyObject[] args) {
return RubyFixnum.newFixnum(context.runtime, acc);
}

final IRubyObject write(ThreadContext context, int ch) {
public final IRubyObject write(ThreadContext context, int ch) {
RubyString str = RubyString.newStringShared(context.runtime, RubyInteger.singleCharByteList((byte) ch));
return write(context, str, false);
}
Expand All @@ -1455,14 +1455,14 @@ public IRubyObject write(ThreadContext context, IRubyObject str, boolean nosync)

RubyIO io = GetWriteIO();

str = str.asString();
RubyString string = str.asString();
tmp = TypeConverter.ioCheckIO(runtime, io);
if (tmp == context.nil) {
/* port is not IO, call write method for it. */
return sites(context).write.call(context, io, io, str);
return sites(context).write.call(context, io, io, string);
}
io = (RubyIO) tmp;
if (((RubyString) str).size() == 0) return RubyFixnum.zero(runtime);
if (string.size() == 0) return RubyFixnum.zero(runtime);

fptr = io.getOpenFileChecked();

Expand All @@ -1471,7 +1471,36 @@ public IRubyObject write(ThreadContext context, IRubyObject str, boolean nosync)
fptr = io.getOpenFileChecked();
fptr.checkWritable(context);

n = fptr.fwrite(context, str, nosync);
n = fptr.fwrite(context, string, nosync);
if (n == -1) throw runtime.newErrnoFromErrno(fptr.errno(), fptr.getPath());
} finally {
if (locked) fptr.unlock();
}

return RubyFixnum.newFixnum(runtime, n);
}

// io_write_m with source bytes
public IRubyObject write(ThreadContext context, byte[] bytes, int start, int length, Encoding encoding) {
return write(context, bytes, start, length, encoding, false);
}

// io_write with source bytes
public IRubyObject write(ThreadContext context, byte[] bytes, int start, int length, Encoding encoding, boolean nosync) {
Ruby runtime = context.runtime;
OpenFile fptr;
long n;

if (length == 0) return RubyFixnum.zero(runtime);

fptr = getOpenFileChecked();

boolean locked = fptr.lock();
try {
fptr = getOpenFileChecked();
fptr.checkWritable(context);

n = fptr.fwrite(context, bytes, start, length, encoding, nosync);
if (n == -1) throw runtime.newErrnoFromErrno(fptr.errno(), fptr.getPath());
} finally {
if (locked) fptr.unlock();
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/java/org/jruby/RubyInteger.java
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ public IRubyObject succ(ThreadContext context) {
@Deprecated
public static final ByteList[] SINGLE_CHAR_BYTELISTS19 = SINGLE_CHAR_BYTELISTS;

static ByteList singleCharByteList(final byte index) {
public static ByteList singleCharByteList(final byte index) {
return SINGLE_CHAR_BYTELISTS[index & 0xFF];
}

Expand Down
14 changes: 14 additions & 0 deletions core/src/main/java/org/jruby/util/ByteList.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,20 @@ public ByteList(int size) {
realSize = 0;
}

/**
* Creates a new instance of Bytelist with a pre-allocated size and specified encoding.
*
* See {@link #ByteList(int)}
*
* @param size to preallocate the bytelist to
* @param enc encoding to set
*/
public ByteList(int size, Encoding enc) {
bytes = new byte[size];
realSize = 0;
encoding = safeEncoding(enc);
}

/**
* Create a new instance of ByteList with the bytes supplied using the specified encoding.
*
Expand Down
12 changes: 4 additions & 8 deletions core/src/main/java/org/jruby/util/IOOutputStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,12 @@ public IOOutputStream(final IRubyObject io, Encoding encoding) {
public void write(final int bite) throws IOException {
ThreadContext context = runtime.getCurrentContext();

RubyString str = RubyString.newStringLight(runtime, new ByteList(new byte[]{(byte) bite}, encoding, false));

RubyIO realIO = this.realIO;
if (realIO != null) {
realIO.write(context, str);
realIO.write(context, bite);
} else {
IRubyObject io = this.io;
writeAdapter.call(context, io, io, str);
writeAdapter.call(context, io, io, RubyString.newStringShared(runtime, new ByteList(new byte[]{(byte) bite}, encoding, false)));
}
}

Expand All @@ -131,14 +129,12 @@ public void write(final byte[] b) throws IOException {
public void write(final byte[] b,final int off, final int len) throws IOException {
ThreadContext context = runtime.getCurrentContext();

RubyString str = RubyString.newStringLight(runtime, new ByteList(b, off, len, encoding, false));

RubyIO realIO = this.realIO;
if (realIO != null) {
realIO.write(context, str);
realIO.write(context, b, off, len, encoding);
} else {
IRubyObject io = this.io;
writeAdapter.call(context, io, io, str);
writeAdapter.call(context, io, io, RubyString.newStringLight(runtime, new ByteList(b, off, len, encoding, false)));
}
}

Expand Down
87 changes: 66 additions & 21 deletions core/src/main/java/org/jruby/util/io/EncodingUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import org.jcodings.transcode.TranscoderDB;
import org.jcodings.transcode.Transcoding;
import org.jcodings.unicode.UnicodeEncoding;
import org.jcodings.util.CaseInsensitiveBytesHash;
import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyBasicObject;
Expand All @@ -30,7 +29,6 @@
import org.jruby.RubyFixnum;
import org.jruby.RubyHash;
import org.jruby.RubyIO;
import org.jruby.RubyInteger;
import org.jruby.RubyMethod;
import org.jruby.RubyNumeric;
import org.jruby.RubyProc;
Expand All @@ -47,7 +45,6 @@
import org.jruby.util.ByteListHolder;
import org.jruby.util.CodeRangeSupport;
import org.jruby.util.CodeRangeable;
import org.jruby.util.Sprintf;
import org.jruby.util.StringSupport;
import org.jruby.util.TypeConverter;

Expand Down Expand Up @@ -518,6 +515,11 @@ public static ByteList econvStrConvert(ThreadContext context, EConv ec, ByteList
return econvSubstrAppend(context, ec, src, null, flags);
}

// rb_econv_str_convert with source bytes
public static ByteList econvByteConvert(ThreadContext context, EConv ec, byte[] bytes, int start, int length, int flags) {
return econvAppend(context, ec, bytes, start, length, new ByteList(length, ec.destinationEncoding), flags);
}

// rb_econv_substr_append
public static ByteList econvSubstrAppend(ThreadContext context, EConv ec, ByteList src, ByteList dst, int flags) {
return econvAppend(context, ec, src, dst, flags);
Expand All @@ -527,23 +529,25 @@ public static ByteList econvSubstrAppend(ThreadContext context, EConv ec, ByteLi
public static ByteList econvAppend(ThreadContext context, EConv ec, ByteList sByteList, ByteList dst, int flags) {
int len = sByteList.getRealSize();

if (dst == null) {
dst = new ByteList(len, ec.destinationEncoding);
}

return econvAppend(context, ec, sByteList.unsafeBytes(), sByteList.begin(), len, dst, flags);
}

// rb_econv_append with source bytes
public static ByteList econvAppend(ThreadContext context, EConv ec, byte[] bytes, int start, int length, ByteList dst, int flags) {
Ptr sp = new Ptr(0);
int se;
int ds;
int ss = sByteList.getBegin();
int ss = start;
byte[] dBytes;
Ptr dp = new Ptr(0);
int de;
EConvResult res;
int maxOutput;

if (dst == null) {
dst = new ByteList(len);
if (ec.destinationEncoding != null) {
dst.setEncoding(ec.destinationEncoding);
}
}

if (ec.lastTranscoding != null) {
maxOutput = ec.lastTranscoding.transcoder.maxOutput;
} else {
Expand All @@ -552,22 +556,22 @@ public static ByteList econvAppend(ThreadContext context, EConv ec, ByteList sBy

do {
int dlen = dst.getRealSize();
if ((dst.getUnsafeBytes().length - dst.getBegin()) - dlen < len + maxOutput) {
long newCapa = dlen + len + maxOutput;
if ((dst.getUnsafeBytes().length - dst.getBegin()) - dlen < length + maxOutput) {
long newCapa = dlen + length + maxOutput;
if (Integer.MAX_VALUE < newCapa) {
throw context.runtime.newArgumentError("too long string");
}
dst.ensure((int)newCapa);
dst.setRealSize(dlen);
}
sp.p = ss;
se = sp.p + len;
se = sp.p + length;
dBytes = dst.getUnsafeBytes();
ds = dst.getBegin();
de = dBytes.length;
dp.p = ds += dlen;
res = ec.convert(sByteList.getUnsafeBytes(), sp, se, dBytes, dp, de, flags);
len -= sp.p - ss;
res = ec.convert(bytes, sp, se, dBytes, dp, de, flags);
length -= sp.p - ss;
ss = sp.p;
dst.setRealSize(dlen + (dp.p - ds));
EncodingUtils.econvCheckError(context, ec);
Expand Down Expand Up @@ -829,7 +833,7 @@ public static Encoding encGet(ThreadContext context, IRubyObject obj) {

// encoding_equal
public static boolean encodingEqual(byte[] enc1, byte[] enc2) {
return new String(enc1).equalsIgnoreCase(new String(enc2));
return ByteList.memcmp(enc1, 0, enc1.length, enc2, 0, enc2.length) == 0;
}

// enc_arg
Expand Down Expand Up @@ -917,6 +921,50 @@ public static IRubyObject rbStrEncode(ThreadContext context, IRubyObject str, IR
return encodedDup(context, newstr_p[0], str, dencindex);
}

// rb_str_encode
public static ByteList rbByteEncode(ThreadContext context, byte[] bytes, int start, int length, Encoding encoding, int cr, Encoding to, int ecflags, IRubyObject ecopt) {
byte[] sname, dname;

sname = encoding.getName();
dname = to.getName();

if (noDecorators(ecflags)) {
if (encoding.isAsciiCompatible() && to.isAsciiCompatible()) {
if (cr == StringSupport.CR_7BIT) {
return null;
}
} else if (encodingEqual(sname, dname)) {
return null;
}
} else if (encodingEqual(sname, dname)) {
sname = NULL_BYTE_ARRAY;
dname = NULL_BYTE_ARRAY;
}

int slen = length;
int blen = slen + 30;
ByteList dest = new ByteList(blen, to);

Ptr fromPos = new Ptr(start);
int destBegin = dest.getBegin();
transcodeLoop(context, bytes, fromPos, dest.unsafeBytes(), new Ptr(destBegin), start + slen, destBegin + blen, dest, strTranscodingResize, sname, dname, ecflags, ecopt);

if (fromPos.p != start + slen) {
throw context.runtime.newArgumentError("not fully converted, " + (slen - fromPos.p) + " bytes left");
}

dest.setEncoding(to);

return dest;
}

protected static boolean noDecorators(int ecflags) {
return (ecflags & (EConvFlags.NEWLINE_DECORATOR_MASK
| EConvFlags.XML_TEXT_DECORATOR
| EConvFlags.XML_ATTR_CONTENT_DECORATOR
| EConvFlags.XML_ATTR_QUOTE_DECORATOR)) == 0;
}

// str_transcode
public static Encoding strTranscode(ThreadContext context, IRubyObject[] args, IRubyObject[] self_p) {
int ecflags = 0;
Expand Down Expand Up @@ -968,10 +1016,7 @@ public static Encoding strTranscode0(ThreadContext context, int argc, IRubyObjec

IRubyObject dest;

if ((ecflags & (EConvFlags.NEWLINE_DECORATOR_MASK
| EConvFlags.XML_TEXT_DECORATOR
| EConvFlags.XML_ATTR_CONTENT_DECORATOR
| EConvFlags.XML_ATTR_QUOTE_DECORATOR)) == 0) {
if (noDecorators(ecflags)) {
if (senc_p[0] != null && senc_p[0] == denc_p[0]) {
if ((ecflags & EConvFlags.INVALID_MASK) != 0 && explicitlyInvalidReplace) {
IRubyObject rep = context.nil;
Expand Down
Loading

0 comments on commit 236f7ba

Please sign in to comment.