From 6ff94230aa19fb10ffd914f2bedf50910f3c98cb Mon Sep 17 00:00:00 2001 From: Jatin Bhateja Date: Tue, 3 Sep 2024 22:18:07 +0530 Subject: [PATCH] 8339494: Porting HalfFloatVector classes. --- .../share/classes/java/lang/Float16.java | 50 +- .../jdk/internal/vm/vector/VectorSupport.java | 1 + .../jdk/incubator/vector/AbstractSpecies.java | 32 +- .../jdk/incubator/vector/AbstractVector.java | 25 + .../jdk/incubator/vector/Byte128Vector.java | 10 +- .../jdk/incubator/vector/Byte256Vector.java | 10 +- .../jdk/incubator/vector/Byte512Vector.java | 10 +- .../jdk/incubator/vector/Byte64Vector.java | 10 +- .../jdk/incubator/vector/ByteMaxVector.java | 10 +- .../jdk/incubator/vector/ByteVector.java | 13 +- .../jdk/incubator/vector/Double128Vector.java | 10 +- .../jdk/incubator/vector/Double256Vector.java | 10 +- .../jdk/incubator/vector/Double512Vector.java | 10 +- .../jdk/incubator/vector/Double64Vector.java | 10 +- .../jdk/incubator/vector/DoubleMaxVector.java | 10 +- .../jdk/incubator/vector/DoubleVector.java | 16 +- .../jdk/incubator/vector/Float128Vector.java | 10 +- .../jdk/incubator/vector/Float256Vector.java | 10 +- .../jdk/incubator/vector/Float512Vector.java | 10 +- .../jdk/incubator/vector/Float64Vector.java | 10 +- .../jdk/incubator/vector/FloatMaxVector.java | 10 +- .../jdk/incubator/vector/FloatVector.java | 18 +- .../incubator/vector/Halffloat128Vector.java | 917 ++++ .../incubator/vector/Halffloat256Vector.java | 933 ++++ .../incubator/vector/Halffloat512Vector.java | 933 ++++ .../incubator/vector/Halffloat64Vector.java | 909 ++++ .../incubator/vector/HalffloatMaxVector.java | 902 ++++ .../jdk/incubator/vector/HalffloatVector.java | 3853 +++++++++++++++++ .../jdk/incubator/vector/Int128Vector.java | 10 +- .../jdk/incubator/vector/Int256Vector.java | 10 +- .../jdk/incubator/vector/Int512Vector.java | 10 +- .../jdk/incubator/vector/Int64Vector.java | 10 +- .../jdk/incubator/vector/IntMaxVector.java | 10 +- .../jdk/incubator/vector/IntVector.java | 13 +- .../jdk/incubator/vector/LaneType.java | 24 +- .../jdk/incubator/vector/Long128Vector.java | 10 +- .../jdk/incubator/vector/Long256Vector.java | 10 +- .../jdk/incubator/vector/Long512Vector.java | 10 +- .../jdk/incubator/vector/Long64Vector.java | 10 +- .../jdk/incubator/vector/LongMaxVector.java | 10 +- .../jdk/incubator/vector/LongVector.java | 13 +- .../jdk/incubator/vector/Short128Vector.java | 10 +- .../jdk/incubator/vector/Short256Vector.java | 10 +- .../jdk/incubator/vector/Short512Vector.java | 10 +- .../jdk/incubator/vector/Short64Vector.java | 10 +- .../jdk/incubator/vector/ShortMaxVector.java | 10 +- .../jdk/incubator/vector/ShortVector.java | 20 +- .../classes/jdk/incubator/vector/Vector.java | 13 + .../jdk/incubator/vector/VectorShape.java | 10 +- .../incubator/vector/X-Vector.java.template | 899 ++-- .../vector/X-VectorBits.java.template | 112 +- .../classes/jdk/incubator/vector/gen-src.sh | 50 +- .../incubator/vector/Short128VectorTests.java | 9 +- .../incubator/vector/Short256VectorTests.java | 9 +- .../incubator/vector/Short512VectorTests.java | 9 +- .../incubator/vector/Short64VectorTests.java | 9 +- .../incubator/vector/ShortMaxVectorTests.java | 9 +- .../vector/templates/Unit-header.template | 6 +- 58 files changed, 9438 insertions(+), 669 deletions(-) create mode 100644 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat128Vector.java create mode 100644 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat256Vector.java create mode 100644 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat512Vector.java create mode 100644 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat64Vector.java create mode 100644 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatMaxVector.java create mode 100644 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatVector.java diff --git a/src/java.base/share/classes/java/lang/Float16.java b/src/java.base/share/classes/java/lang/Float16.java index 9a326b49d9d..49fcc0bb661 100644 --- a/src/java.base/share/classes/java/lang/Float16.java +++ b/src/java.base/share/classes/java/lang/Float16.java @@ -73,6 +73,7 @@ // Enhanced Primitive Boxes described by JEP-402 (https://openjdk.org/jeps/402) @jdk.internal.MigratedValueClass @jdk.internal.ValueBased +@SuppressWarnings("serial") public final class Float16 extends Number implements Comparable { @@ -80,14 +81,11 @@ public final class Float16 private static final long serialVersionUID = 16; // Not needed for a value class? // Functionality for future consideration: - // float16ToShortBits that normalizes NaNs, c.f. floatToIntBits vs floatToRawIntBits // copysign // scalb // nextUp / nextDown // IEEEremainder / remainder operator remainder // signum - // valueOf(BigDecimal) -- main implementation could be package private in BigDecimal - /** * Returns a {@code Float16} instance wrapping IEEE 754 binary16 * encoded {@code short} value. @@ -281,11 +279,11 @@ public static Float16 valueOf(int value) { * @param value a {@code long} value. */ public static Float16 valueOf(long value) { - if (value < -65_504L) { + if (value <= -65_520L) { // -(Float16.MAX_VALUE + Float16.ulp(Float16.MAX_VALUE) / 2) return NEGATIVE_INFINITY; } else { - if (value > 65_504L) { - return NEGATIVE_INFINITY; + if (value >= 65_520L) { // Float16.MAX_VALUE + Float16.ulp(Float16.MAX_VALUE) / 2 + return POSITIVE_INFINITY; } // Remaining range of long, the integers in approx. +/- // 2^16, all fit in a float so the correct conversion can @@ -572,6 +570,45 @@ public double doubleValue() { return (double)floatValue(); } + /** + * Returns a representation of the specified floating-point value + * according to the IEEE 754 floating-point "binary16" bit + * layout. + * + *

Bit 15 (the bit that is selected by the mask + * {@code 0x80000000}) represents the sign of the floating-point + * number. + * Bits 14-10 (the bits that are selected by the mask + * {@code 0x7f800000}) represent the exponent. + * Bits 9-0 (the bits that are selected by the mask + * {@code 0x007fffff}) represent the significand (sometimes called + * the mantissa) of the floating-point number. + * + *

If the argument is positive infinity, the result is + * {@code 0x7C00}. + * + *

If the argument is negative infinity, the result is + * {@code 0xfC00}. + * + *

If the argument is NaN, the result is {@code 0x7E00}. + * + *

In all cases, the result is a short that, when given to the + * {@link #shortBitsToFloat16(short)} method, will produce a floating-point + * value the same as the argument to {@code float16ToShortBits} + * (except all NaN values are collapsed to a single + * "canonical" NaN value). + * + * @param f16 an IEEE 754 binary16 floating-point number. + * @return the bits that represent the floating-point number. + */ + //@IntrinsicCandidate + public static short float16ToShortBits(Float16 f16) { + if (!isNaN(f16)) { + return float16ToRawShortBits(f16); + } + return 0x7E00; + } + // Skipping for now: // public int hashCode() // public static int hashCode(Float16 value) @@ -855,7 +892,6 @@ public static Float16 divide(Float16 dividend, Float16 divisor) { * * @param radicand the argument to have its square root taken * - * @see Math#sqrt(float) * @see Math#sqrt(double) */ // @IntrinsicCandidate diff --git a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java index ccfa006b102..eb30975997f 100644 --- a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java +++ b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java @@ -139,6 +139,7 @@ public class VectorSupport { // BasicType codes, for primitives only: public static final int + T_FLOAT16 = 5, T_FLOAT = 6, T_DOUBLE = 7, T_BYTE = 8, diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java index 0ff4830ded5..c816c61e26e 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java @@ -406,18 +406,26 @@ final IllegalArgumentException badArrayBits(Object iv, Object iotaArray() { // Create an iota array. It's OK if this is really slow, // because it happens only once per species. - Object ia = Array.newInstance(laneType.elementType, - laneCount); - assert(ia.getClass() == laneType.arrayType); + Object ia = Array.newInstance(laneType.elementType, laneCount); checkValue(laneCount-1); // worst case - for (int i = 0; i < laneCount; i++) { - if ((byte)i == i) - Array.setByte(ia, i, (byte)i); - else if ((short)i == i) - Array.setShort(ia, i, (short)i); - else - Array.setInt(ia, i, i); - assert(Array.getDouble(ia, i) == i); + assert(ia.getClass() == laneType.arrayType); + if (elementType() == Float16.class) { + Float16 [] f16arr = (Float16[])ia; + for (int i = 0; i < laneCount; i++) { + // Note: All the numbers in the range [0:2049) are directly + // representable in FP16 format without the precision loss. + f16arr[i] = Float16.valueOf((float)i); + } + } else { + for (int i = 0; i < laneCount; i++) { + if ((byte)i == i) + Array.setByte(ia, i, (byte)i); + else if ((short)i == i) + Array.setShort(ia, i, (short)i); + else + Array.setInt(ia, i, i); + assert(Array.getDouble(ia, i) == i); + } } return ia; } @@ -615,6 +623,8 @@ AbstractSpecies computeSpecies(LaneType laneType, s = IntVector.species(shape); break; case LaneType.SK_LONG: s = LongVector.species(shape); break; + case LaneType.SK_FLOAT16: + s = HalffloatVector.species(shape); break; } if (s == null) { // NOTE: The result of this method is guaranteed to be diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java index 64d681e4aee..da612e97a1f 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java @@ -262,6 +262,15 @@ public DoubleVector reinterpretAsDoubles() { return (DoubleVector) asVectorRaw(LaneType.DOUBLE); } + /** + * {@inheritDoc} + */ + @Override + @ForceInline + public HalffloatVector reinterpretAsHalffloats() { + return (HalffloatVector) asVectorRaw(LaneType.FLOAT16); + } + /** * {@inheritDoc} */ @@ -533,6 +542,8 @@ AbstractVector defaultReinterpret(AbstractSpecies rsp) { return FloatVector.fromMemorySegment(rsp.check(float.class), ms, 0, bo, m.check(float.class)).check0(rsp); case LaneType.SK_DOUBLE: return DoubleVector.fromMemorySegment(rsp.check(double.class), ms, 0, bo, m.check(double.class)).check0(rsp); + case LaneType.SK_FLOAT16: + return HalffloatVector.fromMemorySegment(rsp.check(Float16.class), ms, 0, bo, m.check(Float16.class)).check0(rsp); default: throw new AssertionError(rsp.toString()); } @@ -595,6 +606,13 @@ AbstractVector defaultCast(AbstractSpecies dsp) { } return DoubleVector.fromArray(dsp.check(double.class), a, 0).check0(dsp); } + case LaneType.SK_FLOAT16: { + Float16[] a = new Float16[rlength]; + for (int i = 0; i < limit; i++) { + a[i] = Float16.valueOf(lanes[i]); + } + return HalffloatVector.fromArray(dsp.check(Float16.class), a, 0).check0(dsp); + } default: break; } } else { @@ -645,6 +663,13 @@ AbstractVector defaultCast(AbstractSpecies dsp) { } return DoubleVector.fromArray(dsp.check(double.class), a, 0).check0(dsp); } + case LaneType.SK_FLOAT16: { + Float16[] a = new Float16[rlength]; + for (int i = 0; i < limit; i++) { + a[i] = Float16.valueOf(lanes[i]); + } + return HalffloatVector.fromArray(dsp.check(Float16.class), a, 0).check0(dsp); + } default: break; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java index af60895899f..a99b419538a 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,14 +347,16 @@ public final byte reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + byte res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Byte128Mask.class, (Byte128Mask) m); // specialized + byte res = super.reduceLanesTemplate(op, Byte128Mask.class, (Byte128Mask) m); // specialized + return (long) res; } @ForceInline @@ -682,7 +684,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Byte128Mask indexPartiallyInUpperRange(long offset, long limit) { return (Byte128Mask) VectorSupport.indexPartiallyInUpperRange( - Byte128Mask.class, byte.class, VLENGTH, offset, limit, + Byte128Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Byte128Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java index 1dcbbd26907..1fcb0359f14 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,14 +347,16 @@ public final byte reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + byte res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Byte256Mask.class, (Byte256Mask) m); // specialized + byte res = super.reduceLanesTemplate(op, Byte256Mask.class, (Byte256Mask) m); // specialized + return (long) res; } @ForceInline @@ -714,7 +716,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Byte256Mask indexPartiallyInUpperRange(long offset, long limit) { return (Byte256Mask) VectorSupport.indexPartiallyInUpperRange( - Byte256Mask.class, byte.class, VLENGTH, offset, limit, + Byte256Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Byte256Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java index 9e99a1916a7..ae9950cfe43 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,14 +347,16 @@ public final byte reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + byte res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Byte512Mask.class, (Byte512Mask) m); // specialized + byte res = super.reduceLanesTemplate(op, Byte512Mask.class, (Byte512Mask) m); // specialized + return (long) res; } @ForceInline @@ -778,7 +780,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Byte512Mask indexPartiallyInUpperRange(long offset, long limit) { return (Byte512Mask) VectorSupport.indexPartiallyInUpperRange( - Byte512Mask.class, byte.class, VLENGTH, offset, limit, + Byte512Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Byte512Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java index 85276b2eb19..f6760cd2ff1 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,14 +347,16 @@ public final byte reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + byte res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Byte64Mask.class, (Byte64Mask) m); // specialized + byte res = super.reduceLanesTemplate(op, Byte64Mask.class, (Byte64Mask) m); // specialized + return (long) res; } @ForceInline @@ -666,7 +668,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Byte64Mask indexPartiallyInUpperRange(long offset, long limit) { return (Byte64Mask) VectorSupport.indexPartiallyInUpperRange( - Byte64Mask.class, byte.class, VLENGTH, offset, limit, + Byte64Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Byte64Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java index ff035f13294..a20ee7ec591 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,14 +347,16 @@ public final byte reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + byte res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, ByteMaxMask.class, (ByteMaxMask) m); // specialized + byte res = super.reduceLanesTemplate(op, ByteMaxMask.class, (ByteMaxMask) m); // specialized + return (long) res; } @ForceInline @@ -652,7 +654,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ ByteMaxMask indexPartiallyInUpperRange(long offset, long limit) { return (ByteMaxMask) VectorSupport.indexPartiallyInUpperRange( - ByteMaxMask.class, byte.class, VLENGTH, offset, limit, + ByteMaxMask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (ByteMaxMask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java index 4fc8626754a..add6bbc1ce2 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java @@ -2220,8 +2220,7 @@ final ByteVector addIndexTemplate(int scale) { // instruction directly, load IOTA from memory // and multiply. ByteVector iota = s.iota(); - byte sc = (byte) scale_; - return v.add(sc == 1 ? iota : iota.mul(sc)); + return v.add(scale_ == 1 ? iota : iota.mul((byte)scale_)); }); } @@ -2284,7 +2283,8 @@ ByteVector sliceTemplate(int origin, Vector v1) { that.check(this); Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(length() - origin)))); + byte pivotidx = (byte)(length() - origin); + VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); iota = iotaShuffle(origin, 1, true); return that.rearrange(iota).blend(this.rearrange(iota), blendMask); } @@ -2314,7 +2314,8 @@ ByteVector slice(int origin, ByteVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(length() - origin)))); + byte pivotidx = (byte)(length() - origin); + VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); iota = iotaShuffle(origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2376,7 +2377,7 @@ ByteVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); VectorMask blendMask = iota.toVector().compare(VectorOperators.GE, - (broadcast((byte)(origin)))); + broadcast((byte)(origin))); iota = iotaShuffle(-origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2948,7 +2949,7 @@ public final double[] toDoubleArray() { byte[] a = toArray(); double[] res = new double[a.length]; for (int i = 0; i < a.length; i++) { - res[i] = (double) a[i]; + res[i] = ((double) a[i]); } return res; } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java index 385fbba55a3..edf738ed3cf 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -334,14 +334,16 @@ public final double reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + double res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Double128Mask.class, (Double128Mask) m); // specialized + double res = super.reduceLanesTemplate(op, Double128Mask.class, (Double128Mask) m); // specialized + return (long) res; } @ForceInline @@ -643,7 +645,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Double128Mask indexPartiallyInUpperRange(long offset, long limit) { return (Double128Mask) VectorSupport.indexPartiallyInUpperRange( - Double128Mask.class, double.class, VLENGTH, offset, limit, + Double128Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Double128Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java index e73ada8a088..e6110fce2aa 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -334,14 +334,16 @@ public final double reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + double res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Double256Mask.class, (Double256Mask) m); // specialized + double res = super.reduceLanesTemplate(op, Double256Mask.class, (Double256Mask) m); // specialized + return (long) res; } @ForceInline @@ -647,7 +649,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Double256Mask indexPartiallyInUpperRange(long offset, long limit) { return (Double256Mask) VectorSupport.indexPartiallyInUpperRange( - Double256Mask.class, double.class, VLENGTH, offset, limit, + Double256Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Double256Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java index 5f239d2a527..5674cdae1d6 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -334,14 +334,16 @@ public final double reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + double res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Double512Mask.class, (Double512Mask) m); // specialized + double res = super.reduceLanesTemplate(op, Double512Mask.class, (Double512Mask) m); // specialized + return (long) res; } @ForceInline @@ -655,7 +657,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Double512Mask indexPartiallyInUpperRange(long offset, long limit) { return (Double512Mask) VectorSupport.indexPartiallyInUpperRange( - Double512Mask.class, double.class, VLENGTH, offset, limit, + Double512Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Double512Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java index cd5f14c47db..476c4757264 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -334,14 +334,16 @@ public final double reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + double res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Double64Mask.class, (Double64Mask) m); // specialized + double res = super.reduceLanesTemplate(op, Double64Mask.class, (Double64Mask) m); // specialized + return (long) res; } @ForceInline @@ -641,7 +643,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Double64Mask indexPartiallyInUpperRange(long offset, long limit) { return (Double64Mask) VectorSupport.indexPartiallyInUpperRange( - Double64Mask.class, double.class, VLENGTH, offset, limit, + Double64Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Double64Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java index 84b0b240ca5..cf615934454 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -334,14 +334,16 @@ public final double reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + double res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, DoubleMaxMask.class, (DoubleMaxMask) m); // specialized + double res = super.reduceLanesTemplate(op, DoubleMaxMask.class, (DoubleMaxMask) m); // specialized + return (long) res; } @ForceInline @@ -640,7 +642,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ DoubleMaxMask indexPartiallyInUpperRange(long offset, long limit) { return (DoubleMaxMask) VectorSupport.indexPartiallyInUpperRange( - DoubleMaxMask.class, double.class, VLENGTH, offset, limit, + DoubleMaxMask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (DoubleMaxMask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java index 59e67195732..8b876caece7 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java @@ -486,7 +486,7 @@ AbstractMask bTest(int cond, /*package-private*/ @ForceInline static long toBits(double e) { - return Double.doubleToRawLongBits(e); + return Double.doubleToRawLongBits(e); } /*package-private*/ @@ -1027,8 +1027,7 @@ opc, getClass(), maskClass, double.class, length(), private static TernaryOperation> ternaryOperations(int opc_) { switch (opc_) { - case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> - v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c)); + case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c)); default: return null; } } @@ -2062,8 +2061,7 @@ final DoubleVector addIndexTemplate(int scale) { // instruction directly, load IOTA from memory // and multiply. DoubleVector iota = s.iota(); - double sc = (double) scale_; - return v.add(sc == 1 ? iota : iota.mul(sc)); + return v.add(scale_ == 1 ? iota : iota.mul((double)scale_)); }); } @@ -2126,7 +2124,8 @@ DoubleVector sliceTemplate(int origin, Vector v1) { that.check(this); Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((double)(length() - origin)))); + double pivotidx = (double)(length() - origin); + VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); iota = iotaShuffle(origin, 1, true); return that.rearrange(iota).blend(this.rearrange(iota), blendMask); } @@ -2156,7 +2155,8 @@ DoubleVector slice(int origin, DoubleVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((double)(length() - origin)))); + double pivotidx = (double)(length() - origin); + VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); iota = iotaShuffle(origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2218,7 +2218,7 @@ DoubleVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); VectorMask blendMask = iota.toVector().compare(VectorOperators.GE, - (broadcast((double)(origin)))); + broadcast((double)(origin))); iota = iotaShuffle(-origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java index d6b66f77431..73ca6710625 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -334,14 +334,16 @@ public final float reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + float res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Float128Mask.class, (Float128Mask) m); // specialized + float res = super.reduceLanesTemplate(op, Float128Mask.class, (Float128Mask) m); // specialized + return (long) res; } @ForceInline @@ -647,7 +649,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Float128Mask indexPartiallyInUpperRange(long offset, long limit) { return (Float128Mask) VectorSupport.indexPartiallyInUpperRange( - Float128Mask.class, float.class, VLENGTH, offset, limit, + Float128Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Float128Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java index 38e5bee8a97..a4a252b2362 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -334,14 +334,16 @@ public final float reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + float res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Float256Mask.class, (Float256Mask) m); // specialized + float res = super.reduceLanesTemplate(op, Float256Mask.class, (Float256Mask) m); // specialized + return (long) res; } @ForceInline @@ -655,7 +657,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Float256Mask indexPartiallyInUpperRange(long offset, long limit) { return (Float256Mask) VectorSupport.indexPartiallyInUpperRange( - Float256Mask.class, float.class, VLENGTH, offset, limit, + Float256Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Float256Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java index 3a398976d98..da7db208936 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -334,14 +334,16 @@ public final float reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + float res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Float512Mask.class, (Float512Mask) m); // specialized + float res = super.reduceLanesTemplate(op, Float512Mask.class, (Float512Mask) m); // specialized + return (long) res; } @ForceInline @@ -671,7 +673,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Float512Mask indexPartiallyInUpperRange(long offset, long limit) { return (Float512Mask) VectorSupport.indexPartiallyInUpperRange( - Float512Mask.class, float.class, VLENGTH, offset, limit, + Float512Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Float512Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java index 867b3e284ae..abbdd83b8bd 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -334,14 +334,16 @@ public final float reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + float res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Float64Mask.class, (Float64Mask) m); // specialized + float res = super.reduceLanesTemplate(op, Float64Mask.class, (Float64Mask) m); // specialized + return (long) res; } @ForceInline @@ -643,7 +645,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Float64Mask indexPartiallyInUpperRange(long offset, long limit) { return (Float64Mask) VectorSupport.indexPartiallyInUpperRange( - Float64Mask.class, float.class, VLENGTH, offset, limit, + Float64Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Float64Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java index 242d405eafb..26f6afa64f0 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -334,14 +334,16 @@ public final float reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + float res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, FloatMaxMask.class, (FloatMaxMask) m); // specialized + float res = super.reduceLanesTemplate(op, FloatMaxMask.class, (FloatMaxMask) m); // specialized + return (long) res; } @ForceInline @@ -640,7 +642,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ FloatMaxMask indexPartiallyInUpperRange(long offset, long limit) { return (FloatMaxMask) VectorSupport.indexPartiallyInUpperRange( - FloatMaxMask.class, float.class, VLENGTH, offset, limit, + FloatMaxMask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (FloatMaxMask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java index 45427817e3d..08c9dbd763c 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java @@ -486,7 +486,7 @@ AbstractMask bTest(int cond, /*package-private*/ @ForceInline static long toBits(float e) { - return Float.floatToRawIntBits(e); + return Float.floatToRawIntBits(e); } /*package-private*/ @@ -1027,8 +1027,7 @@ opc, getClass(), maskClass, float.class, length(), private static TernaryOperation> ternaryOperations(int opc_) { switch (opc_) { - case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> - v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c)); + case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c)); default: return null; } } @@ -2074,8 +2073,7 @@ final FloatVector addIndexTemplate(int scale) { // instruction directly, load IOTA from memory // and multiply. FloatVector iota = s.iota(); - float sc = (float) scale_; - return v.add(sc == 1 ? iota : iota.mul(sc)); + return v.add(scale_ == 1 ? iota : iota.mul((float)scale_)); }); } @@ -2138,7 +2136,8 @@ FloatVector sliceTemplate(int origin, Vector v1) { that.check(this); Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((float)(length() - origin)))); + float pivotidx = (float)(length() - origin); + VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); iota = iotaShuffle(origin, 1, true); return that.rearrange(iota).blend(this.rearrange(iota), blendMask); } @@ -2168,7 +2167,8 @@ FloatVector slice(int origin, FloatVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((float)(length() - origin)))); + float pivotidx = (float)(length() - origin); + VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); iota = iotaShuffle(origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2230,7 +2230,7 @@ FloatVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); VectorMask blendMask = iota.toVector().compare(VectorOperators.GE, - (broadcast((float)(origin)))); + broadcast((float)(origin))); iota = iotaShuffle(-origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2770,7 +2770,7 @@ public final double[] toDoubleArray() { float[] a = toArray(); double[] res = new double[a.length]; for (int i = 0; i < a.length; i++) { - res[i] = (double) a[i]; + res[i] = ((double) a[i]); } return res; } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat128Vector.java new file mode 100644 index 00000000000..2f53e6675d2 --- /dev/null +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat128Vector.java @@ -0,0 +1,917 @@ +/* + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.incubator.vector; + +import java.lang.foreign.MemorySegment; +import java.util.Arrays; +import java.util.Objects; +import java.util.function.IntUnaryOperator; + +import jdk.internal.vm.annotation.ForceInline; +import jdk.internal.vm.vector.VectorSupport; + +import static jdk.internal.vm.vector.VectorSupport.*; + +import static jdk.incubator.vector.VectorOperators.*; + +// -- This file was mechanically generated: Do not edit! -- // + +@SuppressWarnings("cast") // warning: redundant cast +final class Halffloat128Vector extends HalffloatVector { + static final HalffloatSpecies VSPECIES = + (HalffloatSpecies) HalffloatVector.SPECIES_128; + + static final VectorShape VSHAPE = + VSPECIES.vectorShape(); + + static final Class VCLASS = Halffloat128Vector.class; + + static final int VSIZE = VSPECIES.vectorBitSize(); + + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + + static final Class ETYPE = Float16.class; // used by the JVM + + Halffloat128Vector(Float16[] v) { + super(v); + } + + // For compatibility as Halffloat128Vector::new, + // stored into species.vectorFactory. + Halffloat128Vector(Object v) { + this((Float16[]) v); + } + + static final Halffloat128Vector ZERO = new Halffloat128Vector(new Float16[VLENGTH]); + static final Halffloat128Vector IOTA = new Halffloat128Vector(VSPECIES.iotaArray()); + + static { + // Warm up a few species caches. + // If we do this too much we will + // get NPEs from bootstrap circularity. + VSPECIES.dummyVector(); + VSPECIES.withLanes(LaneType.BYTE); + } + + // Specialized extractors + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractVector, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + @Override + public final Class elementType() { return Float16.class; } + + @ForceInline + @Override + public final int elementSize() { return Float16.SIZE; } + + @ForceInline + @Override + public final VectorShape shape() { return VSHAPE; } + + @ForceInline + @Override + public final int length() { return VLENGTH; } + + @ForceInline + @Override + public final int bitSize() { return VSIZE; } + + @ForceInline + @Override + public final int byteSize() { return VSIZE / Byte.SIZE; } + + /*package-private*/ + @ForceInline + final @Override + Float16[] vec() { + return (Float16[])getPayload(); + } + + // Virtualized constructors + + @Override + @ForceInline + public final Halffloat128Vector broadcast(Float16 e) { + return (Halffloat128Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + public final Halffloat128Vector broadcast(long e) { + return (Halffloat128Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + Halffloat128Mask maskFromArray(boolean[] bits) { + return new Halffloat128Mask(bits); + } + + @Override + @ForceInline + Halffloat128Shuffle iotaShuffle() { return Halffloat128Shuffle.IOTA; } + + @ForceInline + Halffloat128Shuffle iotaShuffle(int start, int step, boolean wrap) { + if (wrap) { + return (Halffloat128Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat128Shuffle.class, VSPECIES, VLENGTH, start, step, 1, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); + } else { + return (Halffloat128Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat128Shuffle.class, VSPECIES, VLENGTH, start, step, 0, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); + } + } + + @Override + @ForceInline + Halffloat128Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat128Shuffle(reorder); } + + @Override + @ForceInline + Halffloat128Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat128Shuffle(indexes, i); } + + @Override + @ForceInline + Halffloat128Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat128Shuffle(fn); } + + // Make a vector of the same species but the given elements: + @ForceInline + final @Override + Halffloat128Vector vectorFactory(Float16[] vec) { + return new Halffloat128Vector(vec); + } + + @ForceInline + final @Override + Byte128Vector asByteVectorRaw() { + return (Byte128Vector) super.asByteVectorRawTemplate(); // specialize + } + + @ForceInline + final @Override + AbstractVector asVectorRaw(LaneType laneType) { + return super.asVectorRawTemplate(laneType); // specialize + } + + // Unary operator + + @ForceInline + final @Override + Halffloat128Vector uOp(FUnOp f) { + return (Halffloat128Vector) super.uOpTemplate(f); // specialize + } + + @ForceInline + final @Override + Halffloat128Vector uOp(VectorMask m, FUnOp f) { + return (Halffloat128Vector) + super.uOpTemplate((Halffloat128Mask)m, f); // specialize + } + + // Binary operator + + @ForceInline + final @Override + Halffloat128Vector bOp(Vector v, FBinOp f) { + return (Halffloat128Vector) super.bOpTemplate((Halffloat128Vector)v, f); // specialize + } + + @ForceInline + final @Override + Halffloat128Vector bOp(Vector v, + VectorMask m, FBinOp f) { + return (Halffloat128Vector) + super.bOpTemplate((Halffloat128Vector)v, (Halffloat128Mask)m, + f); // specialize + } + + // Ternary operator + + @ForceInline + final @Override + Halffloat128Vector tOp(Vector v1, Vector v2, FTriOp f) { + return (Halffloat128Vector) + super.tOpTemplate((Halffloat128Vector)v1, (Halffloat128Vector)v2, + f); // specialize + } + + @ForceInline + final @Override + Halffloat128Vector tOp(Vector v1, Vector v2, + VectorMask m, FTriOp f) { + return (Halffloat128Vector) + super.tOpTemplate((Halffloat128Vector)v1, (Halffloat128Vector)v2, + (Halffloat128Mask)m, f); // specialize + } + + @ForceInline + final @Override + Float16 rOp(Float16 v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize + } + + @Override + @ForceInline + public final + Vector convertShape(VectorOperators.Conversion conv, + VectorSpecies rsp, int part) { + return super.convertShapeTemplate(conv, rsp, part); // specialize + } + + @Override + @ForceInline + public final + Vector reinterpretShape(VectorSpecies toSpecies, int part) { + return super.reinterpretShapeTemplate(toSpecies, part); // specialize + } + + // Specialized algebraic operations: + + // The following definition forces a specialized version of this + // crucial method into the v-table of this class. A call to add() + // will inline to a call to lanewise(ADD,), at which point the JIT + // intrinsic will have the opcode of ADD, plus all the metadata + // for this particular class, enabling it to generate precise + // code. + // + // There is probably no benefit to the JIT to specialize the + // masked or broadcast versions of the lanewise method. + + @Override + @ForceInline + public Halffloat128Vector lanewise(Unary op) { + return (Halffloat128Vector) super.lanewiseTemplate(op); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector lanewise(Unary op, VectorMask m) { + return (Halffloat128Vector) super.lanewiseTemplate(op, Halffloat128Mask.class, (Halffloat128Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector lanewise(Binary op, Vector v) { + return (Halffloat128Vector) super.lanewiseTemplate(op, v); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Halffloat128Vector) super.lanewiseTemplate(op, Halffloat128Mask.class, v, (Halffloat128Mask) m); // specialize + } + + + /*package-private*/ + @Override + @ForceInline + public final + Halffloat128Vector + lanewise(Ternary op, Vector v1, Vector v2) { + return (Halffloat128Vector) super.lanewiseTemplate(op, v1, v2); // specialize + } + + @Override + @ForceInline + public final + Halffloat128Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Halffloat128Vector) super.lanewiseTemplate(op, Halffloat128Mask.class, v1, v2, (Halffloat128Mask) m); // specialize + } + + @Override + @ForceInline + public final + Halffloat128Vector addIndex(int scale) { + return (Halffloat128Vector) super.addIndexTemplate(scale); // specialize + } + + // Type specific horizontal reductions + + @Override + @ForceInline + public final Float16 reduceLanes(VectorOperators.Associative op) { + return super.reduceLanesTemplate(op); // specialized + } + + @Override + @ForceInline + public final Float16 reduceLanes(VectorOperators.Associative op, + VectorMask m) { + return super.reduceLanesTemplate(op, Halffloat128Mask.class, (Halffloat128Mask) m); // specialized + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op) { + Float16 res = super.reduceLanesTemplate(op); // specialized + return res.longValue(); + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op, + VectorMask m) { + Float16 res = super.reduceLanesTemplate(op, Halffloat128Mask.class, (Halffloat128Mask) m); // specialized + return res.longValue(); + } + + @ForceInline + public VectorShuffle toShuffle() { + return super.toShuffleTemplate(Halffloat128Shuffle.class); // specialize + } + + // Specialized unary testing + + @Override + @ForceInline + public final Halffloat128Mask test(Test op) { + return super.testTemplate(Halffloat128Mask.class, op); // specialize + } + + @Override + @ForceInline + public final Halffloat128Mask test(Test op, VectorMask m) { + return super.testTemplate(Halffloat128Mask.class, op, (Halffloat128Mask) m); // specialize + } + + // Specialized comparisons + + @Override + @ForceInline + public final Halffloat128Mask compare(Comparison op, Vector v) { + return super.compareTemplate(Halffloat128Mask.class, op, v); // specialize + } + + @Override + @ForceInline + public final Halffloat128Mask compare(Comparison op, Float16 s) { + return super.compareTemplate(Halffloat128Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat128Mask compare(Comparison op, long s) { + return super.compareTemplate(Halffloat128Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat128Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Halffloat128Mask.class, op, v, (Halffloat128Mask) m); + } + + + @Override + @ForceInline + public Halffloat128Vector blend(Vector v, VectorMask m) { + return (Halffloat128Vector) + super.blendTemplate(Halffloat128Mask.class, + (Halffloat128Vector) v, + (Halffloat128Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector slice(int origin, Vector v) { + return (Halffloat128Vector) super.sliceTemplate(origin, v); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector slice(int origin) { + return (Halffloat128Vector) super.sliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector unslice(int origin, Vector w, int part) { + return (Halffloat128Vector) super.unsliceTemplate(origin, w, part); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector unslice(int origin, Vector w, int part, VectorMask m) { + return (Halffloat128Vector) + super.unsliceTemplate(Halffloat128Mask.class, + origin, w, part, + (Halffloat128Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector unslice(int origin) { + return (Halffloat128Vector) super.unsliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector rearrange(VectorShuffle s) { + return (Halffloat128Vector) + super.rearrangeTemplate(Halffloat128Shuffle.class, + (Halffloat128Shuffle) s); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector rearrange(VectorShuffle shuffle, + VectorMask m) { + return (Halffloat128Vector) + super.rearrangeTemplate(Halffloat128Shuffle.class, + Halffloat128Mask.class, + (Halffloat128Shuffle) shuffle, + (Halffloat128Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector rearrange(VectorShuffle s, + Vector v) { + return (Halffloat128Vector) + super.rearrangeTemplate(Halffloat128Shuffle.class, + (Halffloat128Shuffle) s, + (Halffloat128Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector compress(VectorMask m) { + return (Halffloat128Vector) + super.compressTemplate(Halffloat128Mask.class, + (Halffloat128Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector expand(VectorMask m) { + return (Halffloat128Vector) + super.expandTemplate(Halffloat128Mask.class, + (Halffloat128Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector selectFrom(Vector v) { + return (Halffloat128Vector) + super.selectFromTemplate((Halffloat128Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector selectFrom(Vector v, + VectorMask m) { + return (Halffloat128Vector) + super.selectFromTemplate((Halffloat128Vector) v, + (Halffloat128Mask) m); // specialize + } + + + @ForceInline + @Override + public Float16 lane(int i) { + short bits; + switch(i) { + case 0: bits = laneHelper(0); break; + case 1: bits = laneHelper(1); break; + case 2: bits = laneHelper(2); break; + case 3: bits = laneHelper(3); break; + case 4: bits = laneHelper(4); break; + case 5: bits = laneHelper(5); break; + case 6: bits = laneHelper(6); break; + case 7: bits = laneHelper(7); break; + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + return Float16.shortBitsToFloat16(bits); + } + + public short laneHelper(int i) { + return (short) VectorSupport.extract( + VCLASS, ETYPE, VLENGTH, + this, i, + (vec, ix) -> { + Float16[] vecarr = vec.vec(); + return (long)Float16.float16ToShortBits(vecarr[ix]); + }); + } + + @ForceInline + @Override + public Halffloat128Vector withLane(int i, Float16 e) { + switch(i) { + case 0: return withLaneHelper(0, e); + case 1: return withLaneHelper(1, e); + case 2: return withLaneHelper(2, e); + case 3: return withLaneHelper(3, e); + case 4: return withLaneHelper(4, e); + case 5: return withLaneHelper(5, e); + case 6: return withLaneHelper(6, e); + case 7: return withLaneHelper(7, e); + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + } + + public Halffloat128Vector withLaneHelper(int i, Float16 e) { + return VectorSupport.insert( + VCLASS, ETYPE, VLENGTH, + this, i, (long)Float16.float16ToShortBits(e), + (v, ix, bits) -> { + Float16[] res = v.vec().clone(); + res[ix] = Float16.shortBitsToFloat16((short)bits); + return v.vectorFactory(res); + }); + } + + // Mask + + static final class Halffloat128Mask extends AbstractMask { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class ETYPE = Float16.class; // used by the JVM + + Halffloat128Mask(boolean[] bits) { + this(bits, 0); + } + + Halffloat128Mask(boolean[] bits, int offset) { + super(prepare(bits, offset)); + } + + Halffloat128Mask(boolean val) { + super(prepare(val)); + } + + private static boolean[] prepare(boolean[] bits, int offset) { + boolean[] newBits = new boolean[VSPECIES.laneCount()]; + for (int i = 0; i < newBits.length; i++) { + newBits[i] = bits[offset + i]; + } + return newBits; + } + + private static boolean[] prepare(boolean val) { + boolean[] bits = new boolean[VSPECIES.laneCount()]; + Arrays.fill(bits, val); + return bits; + } + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractMask, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + boolean[] getBits() { + return (boolean[])getPayload(); + } + + @Override + Halffloat128Mask uOp(MUnOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i]); + } + return new Halffloat128Mask(res); + } + + @Override + Halffloat128Mask bOp(VectorMask m, MBinOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + boolean[] mbits = ((Halffloat128Mask)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i], mbits[i]); + } + return new Halffloat128Mask(res); + } + + @ForceInline + @Override + public final + Halffloat128Vector toVector() { + return (Halffloat128Vector) super.toVectorTemplate(); // specialize + } + + /** + * Helper function for lane-wise mask conversions. + * This function kicks in after intrinsic failure. + */ + @ForceInline + private final + VectorMask defaultMaskCast(AbstractSpecies dsp) { + if (length() != dsp.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + boolean[] maskArray = toArray(); + return dsp.maskFactory(maskArray).check(dsp); + } + + @Override + @ForceInline + public VectorMask cast(VectorSpecies dsp) { + AbstractSpecies species = (AbstractSpecies) dsp; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); + } + + @Override + @ForceInline + /*package-private*/ + Halffloat128Mask indexPartiallyInUpperRange(long offset, long limit) { + return (Halffloat128Mask) VectorSupport.indexPartiallyInUpperRange( + Halffloat128Mask.class, ETYPE, VLENGTH, offset, limit, + (o, l) -> (Halffloat128Mask) TRUE_MASK.indexPartiallyInRange(o, l)); + } + + // Unary operations + + @Override + @ForceInline + public Halffloat128Mask not() { + return xor(maskAll(true)); + } + + @Override + @ForceInline + public Halffloat128Mask compress() { + return (Halffloat128Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS, + Halffloat128Vector.class, Halffloat128Mask.class, ETYPE, VLENGTH, null, this, + (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, Float.floatToFloat16(m1.trueCount()))); + } + + + // Binary operations + + @Override + @ForceInline + public Halffloat128Mask and(VectorMask mask) { + Objects.requireNonNull(mask); + Halffloat128Mask m = (Halffloat128Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat128Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); + } + + @Override + @ForceInline + public Halffloat128Mask or(VectorMask mask) { + Objects.requireNonNull(mask); + Halffloat128Mask m = (Halffloat128Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat128Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); + } + + @Override + @ForceInline + public Halffloat128Mask xor(VectorMask mask) { + Objects.requireNonNull(mask); + Halffloat128Mask m = (Halffloat128Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat128Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + } + + // Mask Query operations + + @Override + @ForceInline + public int trueCount() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat128Mask.class, short.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); + } + + @Override + @ForceInline + public int firstTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat128Mask.class, short.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public int lastTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat128Mask.class, short.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat128Mask.class, short.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); + } + + // laneIsSet + + @Override + @ForceInline + public boolean laneIsSet(int i) { + Objects.checkIndex(i, length()); + return VectorSupport.extract(Halffloat128Mask.class, Float16.class, VLENGTH, + this, i, (m, idx) -> (m.getBits()[idx] ? 1L : 0L)) == 1L; + } + + // Reductions + + @Override + @ForceInline + public boolean anyTrue() { + return VectorSupport.test(BT_ne, Halffloat128Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> anyTrueHelper(((Halffloat128Mask)m).getBits())); + } + + @Override + @ForceInline + public boolean allTrue() { + return VectorSupport.test(BT_overflow, Halffloat128Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> allTrueHelper(((Halffloat128Mask)m).getBits())); + } + + @ForceInline + /*package-private*/ + static Halffloat128Mask maskAll(boolean bit) { + return VectorSupport.fromBitsCoerced(Halffloat128Mask.class, short.class, VLENGTH, + (bit ? -1 : 0), MODE_BROADCAST, null, + (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK)); + } + private static final Halffloat128Mask TRUE_MASK = new Halffloat128Mask(true); + private static final Halffloat128Mask FALSE_MASK = new Halffloat128Mask(false); + + } + + // Shuffle + + static final class Halffloat128Shuffle extends AbstractShuffle { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class ETYPE = Float16.class; // used by the JVM + + Halffloat128Shuffle(byte[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat128Shuffle(int[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat128Shuffle(int[] reorder, int i) { + super(VLENGTH, reorder, i); + } + + public Halffloat128Shuffle(IntUnaryOperator fn) { + super(VLENGTH, fn); + } + + @Override + public HalffloatSpecies vspecies() { + return VSPECIES; + } + + static { + // There must be enough bits in the shuffle lanes to encode + // VLENGTH valid indexes and VLENGTH exceptional ones. + assert(VLENGTH < Byte.MAX_VALUE); + assert(Byte.MIN_VALUE <= -VLENGTH); + } + static final Halffloat128Shuffle IOTA = new Halffloat128Shuffle(IDENTITY); + + @Override + @ForceInline + public Halffloat128Vector toVector() { + return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat128Shuffle.class, this, VLENGTH, + (s) -> ((Halffloat128Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + } + + @Override + @ForceInline + public VectorShuffle cast(VectorSpecies s) { + AbstractSpecies species = (AbstractSpecies) s; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorShuffle length and species length differ"); + int[] shuffleArray = toArray(); + return s.shuffleFromArray(shuffleArray, 0).check(s); + } + + @ForceInline + @Override + public Halffloat128Shuffle rearrange(VectorShuffle shuffle) { + Halffloat128Shuffle s = (Halffloat128Shuffle) shuffle; + byte[] reorder1 = reorder(); + byte[] reorder2 = s.reorder(); + byte[] r = new byte[reorder1.length]; + for (int i = 0; i < reorder1.length; i++) { + int ssi = reorder2[i]; + r[i] = reorder1[ssi]; // throws on exceptional index + } + return new Halffloat128Shuffle(r); + } + } + + // ================================================ + + // Specialized low-level memory operations. + + @ForceInline + @Override + final + HalffloatVector fromArray0(Float16[] a, int offset) { + return super.fromArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromArray0(Float16[] a, int offset, VectorMask m, int offsetInRange) { + return super.fromArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m, offsetInRange); // specialize + } + + + + + @ForceInline + @Override + final + HalffloatVector fromMemorySegment0(MemorySegment ms, long offset) { + return super.fromMemorySegment0Template(ms, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask m, int offsetInRange) { + return super.fromMemorySegment0Template(Halffloat128Mask.class, ms, offset, (Halffloat128Mask) m, offsetInRange); // specialize + } + + @ForceInline + @Override + final + void intoArray0(Float16[] a, int offset) { + super.intoArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + void intoArray0(Float16[] a, int offset, VectorMask m) { + super.intoArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m); + } + + + + @ForceInline + @Override + final + void intoMemorySegment0(MemorySegment ms, long offset, VectorMask m) { + super.intoMemorySegment0Template(Halffloat128Mask.class, ms, offset, (Halffloat128Mask) m); + } + + + // End of specialized low-level memory operations. + + // ================================================ + +} + diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat256Vector.java new file mode 100644 index 00000000000..c0b05cc3b53 --- /dev/null +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat256Vector.java @@ -0,0 +1,933 @@ +/* + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.incubator.vector; + +import java.lang.foreign.MemorySegment; +import java.util.Arrays; +import java.util.Objects; +import java.util.function.IntUnaryOperator; + +import jdk.internal.vm.annotation.ForceInline; +import jdk.internal.vm.vector.VectorSupport; + +import static jdk.internal.vm.vector.VectorSupport.*; + +import static jdk.incubator.vector.VectorOperators.*; + +// -- This file was mechanically generated: Do not edit! -- // + +@SuppressWarnings("cast") // warning: redundant cast +final class Halffloat256Vector extends HalffloatVector { + static final HalffloatSpecies VSPECIES = + (HalffloatSpecies) HalffloatVector.SPECIES_256; + + static final VectorShape VSHAPE = + VSPECIES.vectorShape(); + + static final Class VCLASS = Halffloat256Vector.class; + + static final int VSIZE = VSPECIES.vectorBitSize(); + + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + + static final Class ETYPE = Float16.class; // used by the JVM + + Halffloat256Vector(Float16[] v) { + super(v); + } + + // For compatibility as Halffloat256Vector::new, + // stored into species.vectorFactory. + Halffloat256Vector(Object v) { + this((Float16[]) v); + } + + static final Halffloat256Vector ZERO = new Halffloat256Vector(new Float16[VLENGTH]); + static final Halffloat256Vector IOTA = new Halffloat256Vector(VSPECIES.iotaArray()); + + static { + // Warm up a few species caches. + // If we do this too much we will + // get NPEs from bootstrap circularity. + VSPECIES.dummyVector(); + VSPECIES.withLanes(LaneType.BYTE); + } + + // Specialized extractors + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractVector, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + @Override + public final Class elementType() { return Float16.class; } + + @ForceInline + @Override + public final int elementSize() { return Float16.SIZE; } + + @ForceInline + @Override + public final VectorShape shape() { return VSHAPE; } + + @ForceInline + @Override + public final int length() { return VLENGTH; } + + @ForceInline + @Override + public final int bitSize() { return VSIZE; } + + @ForceInline + @Override + public final int byteSize() { return VSIZE / Byte.SIZE; } + + /*package-private*/ + @ForceInline + final @Override + Float16[] vec() { + return (Float16[])getPayload(); + } + + // Virtualized constructors + + @Override + @ForceInline + public final Halffloat256Vector broadcast(Float16 e) { + return (Halffloat256Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + public final Halffloat256Vector broadcast(long e) { + return (Halffloat256Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + Halffloat256Mask maskFromArray(boolean[] bits) { + return new Halffloat256Mask(bits); + } + + @Override + @ForceInline + Halffloat256Shuffle iotaShuffle() { return Halffloat256Shuffle.IOTA; } + + @ForceInline + Halffloat256Shuffle iotaShuffle(int start, int step, boolean wrap) { + if (wrap) { + return (Halffloat256Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat256Shuffle.class, VSPECIES, VLENGTH, start, step, 1, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); + } else { + return (Halffloat256Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat256Shuffle.class, VSPECIES, VLENGTH, start, step, 0, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); + } + } + + @Override + @ForceInline + Halffloat256Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat256Shuffle(reorder); } + + @Override + @ForceInline + Halffloat256Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat256Shuffle(indexes, i); } + + @Override + @ForceInline + Halffloat256Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat256Shuffle(fn); } + + // Make a vector of the same species but the given elements: + @ForceInline + final @Override + Halffloat256Vector vectorFactory(Float16[] vec) { + return new Halffloat256Vector(vec); + } + + @ForceInline + final @Override + Byte256Vector asByteVectorRaw() { + return (Byte256Vector) super.asByteVectorRawTemplate(); // specialize + } + + @ForceInline + final @Override + AbstractVector asVectorRaw(LaneType laneType) { + return super.asVectorRawTemplate(laneType); // specialize + } + + // Unary operator + + @ForceInline + final @Override + Halffloat256Vector uOp(FUnOp f) { + return (Halffloat256Vector) super.uOpTemplate(f); // specialize + } + + @ForceInline + final @Override + Halffloat256Vector uOp(VectorMask m, FUnOp f) { + return (Halffloat256Vector) + super.uOpTemplate((Halffloat256Mask)m, f); // specialize + } + + // Binary operator + + @ForceInline + final @Override + Halffloat256Vector bOp(Vector v, FBinOp f) { + return (Halffloat256Vector) super.bOpTemplate((Halffloat256Vector)v, f); // specialize + } + + @ForceInline + final @Override + Halffloat256Vector bOp(Vector v, + VectorMask m, FBinOp f) { + return (Halffloat256Vector) + super.bOpTemplate((Halffloat256Vector)v, (Halffloat256Mask)m, + f); // specialize + } + + // Ternary operator + + @ForceInline + final @Override + Halffloat256Vector tOp(Vector v1, Vector v2, FTriOp f) { + return (Halffloat256Vector) + super.tOpTemplate((Halffloat256Vector)v1, (Halffloat256Vector)v2, + f); // specialize + } + + @ForceInline + final @Override + Halffloat256Vector tOp(Vector v1, Vector v2, + VectorMask m, FTriOp f) { + return (Halffloat256Vector) + super.tOpTemplate((Halffloat256Vector)v1, (Halffloat256Vector)v2, + (Halffloat256Mask)m, f); // specialize + } + + @ForceInline + final @Override + Float16 rOp(Float16 v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize + } + + @Override + @ForceInline + public final + Vector convertShape(VectorOperators.Conversion conv, + VectorSpecies rsp, int part) { + return super.convertShapeTemplate(conv, rsp, part); // specialize + } + + @Override + @ForceInline + public final + Vector reinterpretShape(VectorSpecies toSpecies, int part) { + return super.reinterpretShapeTemplate(toSpecies, part); // specialize + } + + // Specialized algebraic operations: + + // The following definition forces a specialized version of this + // crucial method into the v-table of this class. A call to add() + // will inline to a call to lanewise(ADD,), at which point the JIT + // intrinsic will have the opcode of ADD, plus all the metadata + // for this particular class, enabling it to generate precise + // code. + // + // There is probably no benefit to the JIT to specialize the + // masked or broadcast versions of the lanewise method. + + @Override + @ForceInline + public Halffloat256Vector lanewise(Unary op) { + return (Halffloat256Vector) super.lanewiseTemplate(op); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector lanewise(Unary op, VectorMask m) { + return (Halffloat256Vector) super.lanewiseTemplate(op, Halffloat256Mask.class, (Halffloat256Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector lanewise(Binary op, Vector v) { + return (Halffloat256Vector) super.lanewiseTemplate(op, v); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Halffloat256Vector) super.lanewiseTemplate(op, Halffloat256Mask.class, v, (Halffloat256Mask) m); // specialize + } + + + /*package-private*/ + @Override + @ForceInline + public final + Halffloat256Vector + lanewise(Ternary op, Vector v1, Vector v2) { + return (Halffloat256Vector) super.lanewiseTemplate(op, v1, v2); // specialize + } + + @Override + @ForceInline + public final + Halffloat256Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Halffloat256Vector) super.lanewiseTemplate(op, Halffloat256Mask.class, v1, v2, (Halffloat256Mask) m); // specialize + } + + @Override + @ForceInline + public final + Halffloat256Vector addIndex(int scale) { + return (Halffloat256Vector) super.addIndexTemplate(scale); // specialize + } + + // Type specific horizontal reductions + + @Override + @ForceInline + public final Float16 reduceLanes(VectorOperators.Associative op) { + return super.reduceLanesTemplate(op); // specialized + } + + @Override + @ForceInline + public final Float16 reduceLanes(VectorOperators.Associative op, + VectorMask m) { + return super.reduceLanesTemplate(op, Halffloat256Mask.class, (Halffloat256Mask) m); // specialized + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op) { + Float16 res = super.reduceLanesTemplate(op); // specialized + return res.longValue(); + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op, + VectorMask m) { + Float16 res = super.reduceLanesTemplate(op, Halffloat256Mask.class, (Halffloat256Mask) m); // specialized + return res.longValue(); + } + + @ForceInline + public VectorShuffle toShuffle() { + return super.toShuffleTemplate(Halffloat256Shuffle.class); // specialize + } + + // Specialized unary testing + + @Override + @ForceInline + public final Halffloat256Mask test(Test op) { + return super.testTemplate(Halffloat256Mask.class, op); // specialize + } + + @Override + @ForceInline + public final Halffloat256Mask test(Test op, VectorMask m) { + return super.testTemplate(Halffloat256Mask.class, op, (Halffloat256Mask) m); // specialize + } + + // Specialized comparisons + + @Override + @ForceInline + public final Halffloat256Mask compare(Comparison op, Vector v) { + return super.compareTemplate(Halffloat256Mask.class, op, v); // specialize + } + + @Override + @ForceInline + public final Halffloat256Mask compare(Comparison op, Float16 s) { + return super.compareTemplate(Halffloat256Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat256Mask compare(Comparison op, long s) { + return super.compareTemplate(Halffloat256Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat256Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Halffloat256Mask.class, op, v, (Halffloat256Mask) m); + } + + + @Override + @ForceInline + public Halffloat256Vector blend(Vector v, VectorMask m) { + return (Halffloat256Vector) + super.blendTemplate(Halffloat256Mask.class, + (Halffloat256Vector) v, + (Halffloat256Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector slice(int origin, Vector v) { + return (Halffloat256Vector) super.sliceTemplate(origin, v); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector slice(int origin) { + return (Halffloat256Vector) super.sliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector unslice(int origin, Vector w, int part) { + return (Halffloat256Vector) super.unsliceTemplate(origin, w, part); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector unslice(int origin, Vector w, int part, VectorMask m) { + return (Halffloat256Vector) + super.unsliceTemplate(Halffloat256Mask.class, + origin, w, part, + (Halffloat256Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector unslice(int origin) { + return (Halffloat256Vector) super.unsliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector rearrange(VectorShuffle s) { + return (Halffloat256Vector) + super.rearrangeTemplate(Halffloat256Shuffle.class, + (Halffloat256Shuffle) s); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector rearrange(VectorShuffle shuffle, + VectorMask m) { + return (Halffloat256Vector) + super.rearrangeTemplate(Halffloat256Shuffle.class, + Halffloat256Mask.class, + (Halffloat256Shuffle) shuffle, + (Halffloat256Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector rearrange(VectorShuffle s, + Vector v) { + return (Halffloat256Vector) + super.rearrangeTemplate(Halffloat256Shuffle.class, + (Halffloat256Shuffle) s, + (Halffloat256Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector compress(VectorMask m) { + return (Halffloat256Vector) + super.compressTemplate(Halffloat256Mask.class, + (Halffloat256Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector expand(VectorMask m) { + return (Halffloat256Vector) + super.expandTemplate(Halffloat256Mask.class, + (Halffloat256Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector selectFrom(Vector v) { + return (Halffloat256Vector) + super.selectFromTemplate((Halffloat256Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector selectFrom(Vector v, + VectorMask m) { + return (Halffloat256Vector) + super.selectFromTemplate((Halffloat256Vector) v, + (Halffloat256Mask) m); // specialize + } + + + @ForceInline + @Override + public Float16 lane(int i) { + short bits; + switch(i) { + case 0: bits = laneHelper(0); break; + case 1: bits = laneHelper(1); break; + case 2: bits = laneHelper(2); break; + case 3: bits = laneHelper(3); break; + case 4: bits = laneHelper(4); break; + case 5: bits = laneHelper(5); break; + case 6: bits = laneHelper(6); break; + case 7: bits = laneHelper(7); break; + case 8: bits = laneHelper(8); break; + case 9: bits = laneHelper(9); break; + case 10: bits = laneHelper(10); break; + case 11: bits = laneHelper(11); break; + case 12: bits = laneHelper(12); break; + case 13: bits = laneHelper(13); break; + case 14: bits = laneHelper(14); break; + case 15: bits = laneHelper(15); break; + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + return Float16.shortBitsToFloat16(bits); + } + + public short laneHelper(int i) { + return (short) VectorSupport.extract( + VCLASS, ETYPE, VLENGTH, + this, i, + (vec, ix) -> { + Float16[] vecarr = vec.vec(); + return (long)Float16.float16ToShortBits(vecarr[ix]); + }); + } + + @ForceInline + @Override + public Halffloat256Vector withLane(int i, Float16 e) { + switch(i) { + case 0: return withLaneHelper(0, e); + case 1: return withLaneHelper(1, e); + case 2: return withLaneHelper(2, e); + case 3: return withLaneHelper(3, e); + case 4: return withLaneHelper(4, e); + case 5: return withLaneHelper(5, e); + case 6: return withLaneHelper(6, e); + case 7: return withLaneHelper(7, e); + case 8: return withLaneHelper(8, e); + case 9: return withLaneHelper(9, e); + case 10: return withLaneHelper(10, e); + case 11: return withLaneHelper(11, e); + case 12: return withLaneHelper(12, e); + case 13: return withLaneHelper(13, e); + case 14: return withLaneHelper(14, e); + case 15: return withLaneHelper(15, e); + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + } + + public Halffloat256Vector withLaneHelper(int i, Float16 e) { + return VectorSupport.insert( + VCLASS, ETYPE, VLENGTH, + this, i, (long)Float16.float16ToShortBits(e), + (v, ix, bits) -> { + Float16[] res = v.vec().clone(); + res[ix] = Float16.shortBitsToFloat16((short)bits); + return v.vectorFactory(res); + }); + } + + // Mask + + static final class Halffloat256Mask extends AbstractMask { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class ETYPE = Float16.class; // used by the JVM + + Halffloat256Mask(boolean[] bits) { + this(bits, 0); + } + + Halffloat256Mask(boolean[] bits, int offset) { + super(prepare(bits, offset)); + } + + Halffloat256Mask(boolean val) { + super(prepare(val)); + } + + private static boolean[] prepare(boolean[] bits, int offset) { + boolean[] newBits = new boolean[VSPECIES.laneCount()]; + for (int i = 0; i < newBits.length; i++) { + newBits[i] = bits[offset + i]; + } + return newBits; + } + + private static boolean[] prepare(boolean val) { + boolean[] bits = new boolean[VSPECIES.laneCount()]; + Arrays.fill(bits, val); + return bits; + } + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractMask, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + boolean[] getBits() { + return (boolean[])getPayload(); + } + + @Override + Halffloat256Mask uOp(MUnOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i]); + } + return new Halffloat256Mask(res); + } + + @Override + Halffloat256Mask bOp(VectorMask m, MBinOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + boolean[] mbits = ((Halffloat256Mask)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i], mbits[i]); + } + return new Halffloat256Mask(res); + } + + @ForceInline + @Override + public final + Halffloat256Vector toVector() { + return (Halffloat256Vector) super.toVectorTemplate(); // specialize + } + + /** + * Helper function for lane-wise mask conversions. + * This function kicks in after intrinsic failure. + */ + @ForceInline + private final + VectorMask defaultMaskCast(AbstractSpecies dsp) { + if (length() != dsp.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + boolean[] maskArray = toArray(); + return dsp.maskFactory(maskArray).check(dsp); + } + + @Override + @ForceInline + public VectorMask cast(VectorSpecies dsp) { + AbstractSpecies species = (AbstractSpecies) dsp; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); + } + + @Override + @ForceInline + /*package-private*/ + Halffloat256Mask indexPartiallyInUpperRange(long offset, long limit) { + return (Halffloat256Mask) VectorSupport.indexPartiallyInUpperRange( + Halffloat256Mask.class, ETYPE, VLENGTH, offset, limit, + (o, l) -> (Halffloat256Mask) TRUE_MASK.indexPartiallyInRange(o, l)); + } + + // Unary operations + + @Override + @ForceInline + public Halffloat256Mask not() { + return xor(maskAll(true)); + } + + @Override + @ForceInline + public Halffloat256Mask compress() { + return (Halffloat256Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS, + Halffloat256Vector.class, Halffloat256Mask.class, ETYPE, VLENGTH, null, this, + (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, Float.floatToFloat16(m1.trueCount()))); + } + + + // Binary operations + + @Override + @ForceInline + public Halffloat256Mask and(VectorMask mask) { + Objects.requireNonNull(mask); + Halffloat256Mask m = (Halffloat256Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat256Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); + } + + @Override + @ForceInline + public Halffloat256Mask or(VectorMask mask) { + Objects.requireNonNull(mask); + Halffloat256Mask m = (Halffloat256Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat256Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); + } + + @Override + @ForceInline + public Halffloat256Mask xor(VectorMask mask) { + Objects.requireNonNull(mask); + Halffloat256Mask m = (Halffloat256Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat256Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + } + + // Mask Query operations + + @Override + @ForceInline + public int trueCount() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat256Mask.class, short.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); + } + + @Override + @ForceInline + public int firstTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat256Mask.class, short.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public int lastTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat256Mask.class, short.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat256Mask.class, short.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); + } + + // laneIsSet + + @Override + @ForceInline + public boolean laneIsSet(int i) { + Objects.checkIndex(i, length()); + return VectorSupport.extract(Halffloat256Mask.class, Float16.class, VLENGTH, + this, i, (m, idx) -> (m.getBits()[idx] ? 1L : 0L)) == 1L; + } + + // Reductions + + @Override + @ForceInline + public boolean anyTrue() { + return VectorSupport.test(BT_ne, Halffloat256Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> anyTrueHelper(((Halffloat256Mask)m).getBits())); + } + + @Override + @ForceInline + public boolean allTrue() { + return VectorSupport.test(BT_overflow, Halffloat256Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> allTrueHelper(((Halffloat256Mask)m).getBits())); + } + + @ForceInline + /*package-private*/ + static Halffloat256Mask maskAll(boolean bit) { + return VectorSupport.fromBitsCoerced(Halffloat256Mask.class, short.class, VLENGTH, + (bit ? -1 : 0), MODE_BROADCAST, null, + (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK)); + } + private static final Halffloat256Mask TRUE_MASK = new Halffloat256Mask(true); + private static final Halffloat256Mask FALSE_MASK = new Halffloat256Mask(false); + + } + + // Shuffle + + static final class Halffloat256Shuffle extends AbstractShuffle { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class ETYPE = Float16.class; // used by the JVM + + Halffloat256Shuffle(byte[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat256Shuffle(int[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat256Shuffle(int[] reorder, int i) { + super(VLENGTH, reorder, i); + } + + public Halffloat256Shuffle(IntUnaryOperator fn) { + super(VLENGTH, fn); + } + + @Override + public HalffloatSpecies vspecies() { + return VSPECIES; + } + + static { + // There must be enough bits in the shuffle lanes to encode + // VLENGTH valid indexes and VLENGTH exceptional ones. + assert(VLENGTH < Byte.MAX_VALUE); + assert(Byte.MIN_VALUE <= -VLENGTH); + } + static final Halffloat256Shuffle IOTA = new Halffloat256Shuffle(IDENTITY); + + @Override + @ForceInline + public Halffloat256Vector toVector() { + return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat256Shuffle.class, this, VLENGTH, + (s) -> ((Halffloat256Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + } + + @Override + @ForceInline + public VectorShuffle cast(VectorSpecies s) { + AbstractSpecies species = (AbstractSpecies) s; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorShuffle length and species length differ"); + int[] shuffleArray = toArray(); + return s.shuffleFromArray(shuffleArray, 0).check(s); + } + + @ForceInline + @Override + public Halffloat256Shuffle rearrange(VectorShuffle shuffle) { + Halffloat256Shuffle s = (Halffloat256Shuffle) shuffle; + byte[] reorder1 = reorder(); + byte[] reorder2 = s.reorder(); + byte[] r = new byte[reorder1.length]; + for (int i = 0; i < reorder1.length; i++) { + int ssi = reorder2[i]; + r[i] = reorder1[ssi]; // throws on exceptional index + } + return new Halffloat256Shuffle(r); + } + } + + // ================================================ + + // Specialized low-level memory operations. + + @ForceInline + @Override + final + HalffloatVector fromArray0(Float16[] a, int offset) { + return super.fromArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromArray0(Float16[] a, int offset, VectorMask m, int offsetInRange) { + return super.fromArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m, offsetInRange); // specialize + } + + + + + @ForceInline + @Override + final + HalffloatVector fromMemorySegment0(MemorySegment ms, long offset) { + return super.fromMemorySegment0Template(ms, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask m, int offsetInRange) { + return super.fromMemorySegment0Template(Halffloat256Mask.class, ms, offset, (Halffloat256Mask) m, offsetInRange); // specialize + } + + @ForceInline + @Override + final + void intoArray0(Float16[] a, int offset) { + super.intoArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + void intoArray0(Float16[] a, int offset, VectorMask m) { + super.intoArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m); + } + + + + @ForceInline + @Override + final + void intoMemorySegment0(MemorySegment ms, long offset, VectorMask m) { + super.intoMemorySegment0Template(Halffloat256Mask.class, ms, offset, (Halffloat256Mask) m); + } + + + // End of specialized low-level memory operations. + + // ================================================ + +} + diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat512Vector.java new file mode 100644 index 00000000000..1865119a030 --- /dev/null +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat512Vector.java @@ -0,0 +1,933 @@ +/* + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.incubator.vector; + +import java.lang.foreign.MemorySegment; +import java.util.Arrays; +import java.util.Objects; +import java.util.function.IntUnaryOperator; + +import jdk.internal.vm.annotation.ForceInline; +import jdk.internal.vm.vector.VectorSupport; + +import static jdk.internal.vm.vector.VectorSupport.*; + +import static jdk.incubator.vector.VectorOperators.*; + +// -- This file was mechanically generated: Do not edit! -- // + +@SuppressWarnings("cast") // warning: redundant cast +final class Halffloat512Vector extends HalffloatVector { + static final HalffloatSpecies VSPECIES = + (HalffloatSpecies) HalffloatVector.SPECIES_512; + + static final VectorShape VSHAPE = + VSPECIES.vectorShape(); + + static final Class VCLASS = Halffloat512Vector.class; + + static final int VSIZE = VSPECIES.vectorBitSize(); + + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + + static final Class ETYPE = Float16.class; // used by the JVM + + Halffloat512Vector(Float16[] v) { + super(v); + } + + // For compatibility as Halffloat512Vector::new, + // stored into species.vectorFactory. + Halffloat512Vector(Object v) { + this((Float16[]) v); + } + + static final Halffloat512Vector ZERO = new Halffloat512Vector(new Float16[VLENGTH]); + static final Halffloat512Vector IOTA = new Halffloat512Vector(VSPECIES.iotaArray()); + + static { + // Warm up a few species caches. + // If we do this too much we will + // get NPEs from bootstrap circularity. + VSPECIES.dummyVector(); + VSPECIES.withLanes(LaneType.BYTE); + } + + // Specialized extractors + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractVector, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + @Override + public final Class elementType() { return Float16.class; } + + @ForceInline + @Override + public final int elementSize() { return Float16.SIZE; } + + @ForceInline + @Override + public final VectorShape shape() { return VSHAPE; } + + @ForceInline + @Override + public final int length() { return VLENGTH; } + + @ForceInline + @Override + public final int bitSize() { return VSIZE; } + + @ForceInline + @Override + public final int byteSize() { return VSIZE / Byte.SIZE; } + + /*package-private*/ + @ForceInline + final @Override + Float16[] vec() { + return (Float16[])getPayload(); + } + + // Virtualized constructors + + @Override + @ForceInline + public final Halffloat512Vector broadcast(Float16 e) { + return (Halffloat512Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + public final Halffloat512Vector broadcast(long e) { + return (Halffloat512Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + Halffloat512Mask maskFromArray(boolean[] bits) { + return new Halffloat512Mask(bits); + } + + @Override + @ForceInline + Halffloat512Shuffle iotaShuffle() { return Halffloat512Shuffle.IOTA; } + + @ForceInline + Halffloat512Shuffle iotaShuffle(int start, int step, boolean wrap) { + if (wrap) { + return (Halffloat512Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat512Shuffle.class, VSPECIES, VLENGTH, start, step, 1, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); + } else { + return (Halffloat512Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat512Shuffle.class, VSPECIES, VLENGTH, start, step, 0, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); + } + } + + @Override + @ForceInline + Halffloat512Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat512Shuffle(reorder); } + + @Override + @ForceInline + Halffloat512Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat512Shuffle(indexes, i); } + + @Override + @ForceInline + Halffloat512Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat512Shuffle(fn); } + + // Make a vector of the same species but the given elements: + @ForceInline + final @Override + Halffloat512Vector vectorFactory(Float16[] vec) { + return new Halffloat512Vector(vec); + } + + @ForceInline + final @Override + Byte512Vector asByteVectorRaw() { + return (Byte512Vector) super.asByteVectorRawTemplate(); // specialize + } + + @ForceInline + final @Override + AbstractVector asVectorRaw(LaneType laneType) { + return super.asVectorRawTemplate(laneType); // specialize + } + + // Unary operator + + @ForceInline + final @Override + Halffloat512Vector uOp(FUnOp f) { + return (Halffloat512Vector) super.uOpTemplate(f); // specialize + } + + @ForceInline + final @Override + Halffloat512Vector uOp(VectorMask m, FUnOp f) { + return (Halffloat512Vector) + super.uOpTemplate((Halffloat512Mask)m, f); // specialize + } + + // Binary operator + + @ForceInline + final @Override + Halffloat512Vector bOp(Vector v, FBinOp f) { + return (Halffloat512Vector) super.bOpTemplate((Halffloat512Vector)v, f); // specialize + } + + @ForceInline + final @Override + Halffloat512Vector bOp(Vector v, + VectorMask m, FBinOp f) { + return (Halffloat512Vector) + super.bOpTemplate((Halffloat512Vector)v, (Halffloat512Mask)m, + f); // specialize + } + + // Ternary operator + + @ForceInline + final @Override + Halffloat512Vector tOp(Vector v1, Vector v2, FTriOp f) { + return (Halffloat512Vector) + super.tOpTemplate((Halffloat512Vector)v1, (Halffloat512Vector)v2, + f); // specialize + } + + @ForceInline + final @Override + Halffloat512Vector tOp(Vector v1, Vector v2, + VectorMask m, FTriOp f) { + return (Halffloat512Vector) + super.tOpTemplate((Halffloat512Vector)v1, (Halffloat512Vector)v2, + (Halffloat512Mask)m, f); // specialize + } + + @ForceInline + final @Override + Float16 rOp(Float16 v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize + } + + @Override + @ForceInline + public final + Vector convertShape(VectorOperators.Conversion conv, + VectorSpecies rsp, int part) { + return super.convertShapeTemplate(conv, rsp, part); // specialize + } + + @Override + @ForceInline + public final + Vector reinterpretShape(VectorSpecies toSpecies, int part) { + return super.reinterpretShapeTemplate(toSpecies, part); // specialize + } + + // Specialized algebraic operations: + + // The following definition forces a specialized version of this + // crucial method into the v-table of this class. A call to add() + // will inline to a call to lanewise(ADD,), at which point the JIT + // intrinsic will have the opcode of ADD, plus all the metadata + // for this particular class, enabling it to generate precise + // code. + // + // There is probably no benefit to the JIT to specialize the + // masked or broadcast versions of the lanewise method. + + @Override + @ForceInline + public Halffloat512Vector lanewise(Unary op) { + return (Halffloat512Vector) super.lanewiseTemplate(op); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector lanewise(Unary op, VectorMask m) { + return (Halffloat512Vector) super.lanewiseTemplate(op, Halffloat512Mask.class, (Halffloat512Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector lanewise(Binary op, Vector v) { + return (Halffloat512Vector) super.lanewiseTemplate(op, v); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Halffloat512Vector) super.lanewiseTemplate(op, Halffloat512Mask.class, v, (Halffloat512Mask) m); // specialize + } + + + /*package-private*/ + @Override + @ForceInline + public final + Halffloat512Vector + lanewise(Ternary op, Vector v1, Vector v2) { + return (Halffloat512Vector) super.lanewiseTemplate(op, v1, v2); // specialize + } + + @Override + @ForceInline + public final + Halffloat512Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Halffloat512Vector) super.lanewiseTemplate(op, Halffloat512Mask.class, v1, v2, (Halffloat512Mask) m); // specialize + } + + @Override + @ForceInline + public final + Halffloat512Vector addIndex(int scale) { + return (Halffloat512Vector) super.addIndexTemplate(scale); // specialize + } + + // Type specific horizontal reductions + + @Override + @ForceInline + public final Float16 reduceLanes(VectorOperators.Associative op) { + return super.reduceLanesTemplate(op); // specialized + } + + @Override + @ForceInline + public final Float16 reduceLanes(VectorOperators.Associative op, + VectorMask m) { + return super.reduceLanesTemplate(op, Halffloat512Mask.class, (Halffloat512Mask) m); // specialized + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op) { + Float16 res = super.reduceLanesTemplate(op); // specialized + return res.longValue(); + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op, + VectorMask m) { + Float16 res = super.reduceLanesTemplate(op, Halffloat512Mask.class, (Halffloat512Mask) m); // specialized + return res.longValue(); + } + + @ForceInline + public VectorShuffle toShuffle() { + return super.toShuffleTemplate(Halffloat512Shuffle.class); // specialize + } + + // Specialized unary testing + + @Override + @ForceInline + public final Halffloat512Mask test(Test op) { + return super.testTemplate(Halffloat512Mask.class, op); // specialize + } + + @Override + @ForceInline + public final Halffloat512Mask test(Test op, VectorMask m) { + return super.testTemplate(Halffloat512Mask.class, op, (Halffloat512Mask) m); // specialize + } + + // Specialized comparisons + + @Override + @ForceInline + public final Halffloat512Mask compare(Comparison op, Vector v) { + return super.compareTemplate(Halffloat512Mask.class, op, v); // specialize + } + + @Override + @ForceInline + public final Halffloat512Mask compare(Comparison op, Float16 s) { + return super.compareTemplate(Halffloat512Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat512Mask compare(Comparison op, long s) { + return super.compareTemplate(Halffloat512Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat512Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Halffloat512Mask.class, op, v, (Halffloat512Mask) m); + } + + + @Override + @ForceInline + public Halffloat512Vector blend(Vector v, VectorMask m) { + return (Halffloat512Vector) + super.blendTemplate(Halffloat512Mask.class, + (Halffloat512Vector) v, + (Halffloat512Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector slice(int origin, Vector v) { + return (Halffloat512Vector) super.sliceTemplate(origin, v); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector slice(int origin) { + return (Halffloat512Vector) super.sliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector unslice(int origin, Vector w, int part) { + return (Halffloat512Vector) super.unsliceTemplate(origin, w, part); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector unslice(int origin, Vector w, int part, VectorMask m) { + return (Halffloat512Vector) + super.unsliceTemplate(Halffloat512Mask.class, + origin, w, part, + (Halffloat512Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector unslice(int origin) { + return (Halffloat512Vector) super.unsliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector rearrange(VectorShuffle s) { + return (Halffloat512Vector) + super.rearrangeTemplate(Halffloat512Shuffle.class, + (Halffloat512Shuffle) s); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector rearrange(VectorShuffle shuffle, + VectorMask m) { + return (Halffloat512Vector) + super.rearrangeTemplate(Halffloat512Shuffle.class, + Halffloat512Mask.class, + (Halffloat512Shuffle) shuffle, + (Halffloat512Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector rearrange(VectorShuffle s, + Vector v) { + return (Halffloat512Vector) + super.rearrangeTemplate(Halffloat512Shuffle.class, + (Halffloat512Shuffle) s, + (Halffloat512Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector compress(VectorMask m) { + return (Halffloat512Vector) + super.compressTemplate(Halffloat512Mask.class, + (Halffloat512Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector expand(VectorMask m) { + return (Halffloat512Vector) + super.expandTemplate(Halffloat512Mask.class, + (Halffloat512Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector selectFrom(Vector v) { + return (Halffloat512Vector) + super.selectFromTemplate((Halffloat512Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector selectFrom(Vector v, + VectorMask m) { + return (Halffloat512Vector) + super.selectFromTemplate((Halffloat512Vector) v, + (Halffloat512Mask) m); // specialize + } + + + @ForceInline + @Override + public Float16 lane(int i) { + short bits; + switch(i) { + case 0: bits = laneHelper(0); break; + case 1: bits = laneHelper(1); break; + case 2: bits = laneHelper(2); break; + case 3: bits = laneHelper(3); break; + case 4: bits = laneHelper(4); break; + case 5: bits = laneHelper(5); break; + case 6: bits = laneHelper(6); break; + case 7: bits = laneHelper(7); break; + case 8: bits = laneHelper(8); break; + case 9: bits = laneHelper(9); break; + case 10: bits = laneHelper(10); break; + case 11: bits = laneHelper(11); break; + case 12: bits = laneHelper(12); break; + case 13: bits = laneHelper(13); break; + case 14: bits = laneHelper(14); break; + case 15: bits = laneHelper(15); break; + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + return Float16.shortBitsToFloat16(bits); + } + + public short laneHelper(int i) { + return (short) VectorSupport.extract( + VCLASS, ETYPE, VLENGTH, + this, i, + (vec, ix) -> { + Float16[] vecarr = vec.vec(); + return (long)Float16.float16ToShortBits(vecarr[ix]); + }); + } + + @ForceInline + @Override + public Halffloat512Vector withLane(int i, Float16 e) { + switch(i) { + case 0: return withLaneHelper(0, e); + case 1: return withLaneHelper(1, e); + case 2: return withLaneHelper(2, e); + case 3: return withLaneHelper(3, e); + case 4: return withLaneHelper(4, e); + case 5: return withLaneHelper(5, e); + case 6: return withLaneHelper(6, e); + case 7: return withLaneHelper(7, e); + case 8: return withLaneHelper(8, e); + case 9: return withLaneHelper(9, e); + case 10: return withLaneHelper(10, e); + case 11: return withLaneHelper(11, e); + case 12: return withLaneHelper(12, e); + case 13: return withLaneHelper(13, e); + case 14: return withLaneHelper(14, e); + case 15: return withLaneHelper(15, e); + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + } + + public Halffloat512Vector withLaneHelper(int i, Float16 e) { + return VectorSupport.insert( + VCLASS, ETYPE, VLENGTH, + this, i, (long)Float16.float16ToShortBits(e), + (v, ix, bits) -> { + Float16[] res = v.vec().clone(); + res[ix] = Float16.shortBitsToFloat16((short)bits); + return v.vectorFactory(res); + }); + } + + // Mask + + static final class Halffloat512Mask extends AbstractMask { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class ETYPE = Float16.class; // used by the JVM + + Halffloat512Mask(boolean[] bits) { + this(bits, 0); + } + + Halffloat512Mask(boolean[] bits, int offset) { + super(prepare(bits, offset)); + } + + Halffloat512Mask(boolean val) { + super(prepare(val)); + } + + private static boolean[] prepare(boolean[] bits, int offset) { + boolean[] newBits = new boolean[VSPECIES.laneCount()]; + for (int i = 0; i < newBits.length; i++) { + newBits[i] = bits[offset + i]; + } + return newBits; + } + + private static boolean[] prepare(boolean val) { + boolean[] bits = new boolean[VSPECIES.laneCount()]; + Arrays.fill(bits, val); + return bits; + } + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractMask, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + boolean[] getBits() { + return (boolean[])getPayload(); + } + + @Override + Halffloat512Mask uOp(MUnOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i]); + } + return new Halffloat512Mask(res); + } + + @Override + Halffloat512Mask bOp(VectorMask m, MBinOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + boolean[] mbits = ((Halffloat512Mask)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i], mbits[i]); + } + return new Halffloat512Mask(res); + } + + @ForceInline + @Override + public final + Halffloat512Vector toVector() { + return (Halffloat512Vector) super.toVectorTemplate(); // specialize + } + + /** + * Helper function for lane-wise mask conversions. + * This function kicks in after intrinsic failure. + */ + @ForceInline + private final + VectorMask defaultMaskCast(AbstractSpecies dsp) { + if (length() != dsp.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + boolean[] maskArray = toArray(); + return dsp.maskFactory(maskArray).check(dsp); + } + + @Override + @ForceInline + public VectorMask cast(VectorSpecies dsp) { + AbstractSpecies species = (AbstractSpecies) dsp; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); + } + + @Override + @ForceInline + /*package-private*/ + Halffloat512Mask indexPartiallyInUpperRange(long offset, long limit) { + return (Halffloat512Mask) VectorSupport.indexPartiallyInUpperRange( + Halffloat512Mask.class, ETYPE, VLENGTH, offset, limit, + (o, l) -> (Halffloat512Mask) TRUE_MASK.indexPartiallyInRange(o, l)); + } + + // Unary operations + + @Override + @ForceInline + public Halffloat512Mask not() { + return xor(maskAll(true)); + } + + @Override + @ForceInline + public Halffloat512Mask compress() { + return (Halffloat512Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS, + Halffloat512Vector.class, Halffloat512Mask.class, ETYPE, VLENGTH, null, this, + (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, Float.floatToFloat16(m1.trueCount()))); + } + + + // Binary operations + + @Override + @ForceInline + public Halffloat512Mask and(VectorMask mask) { + Objects.requireNonNull(mask); + Halffloat512Mask m = (Halffloat512Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat512Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); + } + + @Override + @ForceInline + public Halffloat512Mask or(VectorMask mask) { + Objects.requireNonNull(mask); + Halffloat512Mask m = (Halffloat512Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat512Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); + } + + @Override + @ForceInline + public Halffloat512Mask xor(VectorMask mask) { + Objects.requireNonNull(mask); + Halffloat512Mask m = (Halffloat512Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat512Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + } + + // Mask Query operations + + @Override + @ForceInline + public int trueCount() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat512Mask.class, short.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); + } + + @Override + @ForceInline + public int firstTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat512Mask.class, short.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public int lastTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat512Mask.class, short.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat512Mask.class, short.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); + } + + // laneIsSet + + @Override + @ForceInline + public boolean laneIsSet(int i) { + Objects.checkIndex(i, length()); + return VectorSupport.extract(Halffloat512Mask.class, Float16.class, VLENGTH, + this, i, (m, idx) -> (m.getBits()[idx] ? 1L : 0L)) == 1L; + } + + // Reductions + + @Override + @ForceInline + public boolean anyTrue() { + return VectorSupport.test(BT_ne, Halffloat512Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> anyTrueHelper(((Halffloat512Mask)m).getBits())); + } + + @Override + @ForceInline + public boolean allTrue() { + return VectorSupport.test(BT_overflow, Halffloat512Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> allTrueHelper(((Halffloat512Mask)m).getBits())); + } + + @ForceInline + /*package-private*/ + static Halffloat512Mask maskAll(boolean bit) { + return VectorSupport.fromBitsCoerced(Halffloat512Mask.class, short.class, VLENGTH, + (bit ? -1 : 0), MODE_BROADCAST, null, + (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK)); + } + private static final Halffloat512Mask TRUE_MASK = new Halffloat512Mask(true); + private static final Halffloat512Mask FALSE_MASK = new Halffloat512Mask(false); + + } + + // Shuffle + + static final class Halffloat512Shuffle extends AbstractShuffle { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class ETYPE = Float16.class; // used by the JVM + + Halffloat512Shuffle(byte[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat512Shuffle(int[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat512Shuffle(int[] reorder, int i) { + super(VLENGTH, reorder, i); + } + + public Halffloat512Shuffle(IntUnaryOperator fn) { + super(VLENGTH, fn); + } + + @Override + public HalffloatSpecies vspecies() { + return VSPECIES; + } + + static { + // There must be enough bits in the shuffle lanes to encode + // VLENGTH valid indexes and VLENGTH exceptional ones. + assert(VLENGTH < Byte.MAX_VALUE); + assert(Byte.MIN_VALUE <= -VLENGTH); + } + static final Halffloat512Shuffle IOTA = new Halffloat512Shuffle(IDENTITY); + + @Override + @ForceInline + public Halffloat512Vector toVector() { + return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat512Shuffle.class, this, VLENGTH, + (s) -> ((Halffloat512Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + } + + @Override + @ForceInline + public VectorShuffle cast(VectorSpecies s) { + AbstractSpecies species = (AbstractSpecies) s; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorShuffle length and species length differ"); + int[] shuffleArray = toArray(); + return s.shuffleFromArray(shuffleArray, 0).check(s); + } + + @ForceInline + @Override + public Halffloat512Shuffle rearrange(VectorShuffle shuffle) { + Halffloat512Shuffle s = (Halffloat512Shuffle) shuffle; + byte[] reorder1 = reorder(); + byte[] reorder2 = s.reorder(); + byte[] r = new byte[reorder1.length]; + for (int i = 0; i < reorder1.length; i++) { + int ssi = reorder2[i]; + r[i] = reorder1[ssi]; // throws on exceptional index + } + return new Halffloat512Shuffle(r); + } + } + + // ================================================ + + // Specialized low-level memory operations. + + @ForceInline + @Override + final + HalffloatVector fromArray0(Float16[] a, int offset) { + return super.fromArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromArray0(Float16[] a, int offset, VectorMask m, int offsetInRange) { + return super.fromArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m, offsetInRange); // specialize + } + + + + + @ForceInline + @Override + final + HalffloatVector fromMemorySegment0(MemorySegment ms, long offset) { + return super.fromMemorySegment0Template(ms, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask m, int offsetInRange) { + return super.fromMemorySegment0Template(Halffloat512Mask.class, ms, offset, (Halffloat512Mask) m, offsetInRange); // specialize + } + + @ForceInline + @Override + final + void intoArray0(Float16[] a, int offset) { + super.intoArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + void intoArray0(Float16[] a, int offset, VectorMask m) { + super.intoArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m); + } + + + + @ForceInline + @Override + final + void intoMemorySegment0(MemorySegment ms, long offset, VectorMask m) { + super.intoMemorySegment0Template(Halffloat512Mask.class, ms, offset, (Halffloat512Mask) m); + } + + + // End of specialized low-level memory operations. + + // ================================================ + +} + diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat64Vector.java new file mode 100644 index 00000000000..05e0c10d4c5 --- /dev/null +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat64Vector.java @@ -0,0 +1,909 @@ +/* + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.incubator.vector; + +import java.lang.foreign.MemorySegment; +import java.util.Arrays; +import java.util.Objects; +import java.util.function.IntUnaryOperator; + +import jdk.internal.vm.annotation.ForceInline; +import jdk.internal.vm.vector.VectorSupport; + +import static jdk.internal.vm.vector.VectorSupport.*; + +import static jdk.incubator.vector.VectorOperators.*; + +// -- This file was mechanically generated: Do not edit! -- // + +@SuppressWarnings("cast") // warning: redundant cast +final class Halffloat64Vector extends HalffloatVector { + static final HalffloatSpecies VSPECIES = + (HalffloatSpecies) HalffloatVector.SPECIES_64; + + static final VectorShape VSHAPE = + VSPECIES.vectorShape(); + + static final Class VCLASS = Halffloat64Vector.class; + + static final int VSIZE = VSPECIES.vectorBitSize(); + + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + + static final Class ETYPE = Float16.class; // used by the JVM + + Halffloat64Vector(Float16[] v) { + super(v); + } + + // For compatibility as Halffloat64Vector::new, + // stored into species.vectorFactory. + Halffloat64Vector(Object v) { + this((Float16[]) v); + } + + static final Halffloat64Vector ZERO = new Halffloat64Vector(new Float16[VLENGTH]); + static final Halffloat64Vector IOTA = new Halffloat64Vector(VSPECIES.iotaArray()); + + static { + // Warm up a few species caches. + // If we do this too much we will + // get NPEs from bootstrap circularity. + VSPECIES.dummyVector(); + VSPECIES.withLanes(LaneType.BYTE); + } + + // Specialized extractors + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractVector, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + @Override + public final Class elementType() { return Float16.class; } + + @ForceInline + @Override + public final int elementSize() { return Float16.SIZE; } + + @ForceInline + @Override + public final VectorShape shape() { return VSHAPE; } + + @ForceInline + @Override + public final int length() { return VLENGTH; } + + @ForceInline + @Override + public final int bitSize() { return VSIZE; } + + @ForceInline + @Override + public final int byteSize() { return VSIZE / Byte.SIZE; } + + /*package-private*/ + @ForceInline + final @Override + Float16[] vec() { + return (Float16[])getPayload(); + } + + // Virtualized constructors + + @Override + @ForceInline + public final Halffloat64Vector broadcast(Float16 e) { + return (Halffloat64Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + public final Halffloat64Vector broadcast(long e) { + return (Halffloat64Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + Halffloat64Mask maskFromArray(boolean[] bits) { + return new Halffloat64Mask(bits); + } + + @Override + @ForceInline + Halffloat64Shuffle iotaShuffle() { return Halffloat64Shuffle.IOTA; } + + @ForceInline + Halffloat64Shuffle iotaShuffle(int start, int step, boolean wrap) { + if (wrap) { + return (Halffloat64Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat64Shuffle.class, VSPECIES, VLENGTH, start, step, 1, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); + } else { + return (Halffloat64Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat64Shuffle.class, VSPECIES, VLENGTH, start, step, 0, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); + } + } + + @Override + @ForceInline + Halffloat64Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat64Shuffle(reorder); } + + @Override + @ForceInline + Halffloat64Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat64Shuffle(indexes, i); } + + @Override + @ForceInline + Halffloat64Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat64Shuffle(fn); } + + // Make a vector of the same species but the given elements: + @ForceInline + final @Override + Halffloat64Vector vectorFactory(Float16[] vec) { + return new Halffloat64Vector(vec); + } + + @ForceInline + final @Override + Byte64Vector asByteVectorRaw() { + return (Byte64Vector) super.asByteVectorRawTemplate(); // specialize + } + + @ForceInline + final @Override + AbstractVector asVectorRaw(LaneType laneType) { + return super.asVectorRawTemplate(laneType); // specialize + } + + // Unary operator + + @ForceInline + final @Override + Halffloat64Vector uOp(FUnOp f) { + return (Halffloat64Vector) super.uOpTemplate(f); // specialize + } + + @ForceInline + final @Override + Halffloat64Vector uOp(VectorMask m, FUnOp f) { + return (Halffloat64Vector) + super.uOpTemplate((Halffloat64Mask)m, f); // specialize + } + + // Binary operator + + @ForceInline + final @Override + Halffloat64Vector bOp(Vector v, FBinOp f) { + return (Halffloat64Vector) super.bOpTemplate((Halffloat64Vector)v, f); // specialize + } + + @ForceInline + final @Override + Halffloat64Vector bOp(Vector v, + VectorMask m, FBinOp f) { + return (Halffloat64Vector) + super.bOpTemplate((Halffloat64Vector)v, (Halffloat64Mask)m, + f); // specialize + } + + // Ternary operator + + @ForceInline + final @Override + Halffloat64Vector tOp(Vector v1, Vector v2, FTriOp f) { + return (Halffloat64Vector) + super.tOpTemplate((Halffloat64Vector)v1, (Halffloat64Vector)v2, + f); // specialize + } + + @ForceInline + final @Override + Halffloat64Vector tOp(Vector v1, Vector v2, + VectorMask m, FTriOp f) { + return (Halffloat64Vector) + super.tOpTemplate((Halffloat64Vector)v1, (Halffloat64Vector)v2, + (Halffloat64Mask)m, f); // specialize + } + + @ForceInline + final @Override + Float16 rOp(Float16 v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize + } + + @Override + @ForceInline + public final + Vector convertShape(VectorOperators.Conversion conv, + VectorSpecies rsp, int part) { + return super.convertShapeTemplate(conv, rsp, part); // specialize + } + + @Override + @ForceInline + public final + Vector reinterpretShape(VectorSpecies toSpecies, int part) { + return super.reinterpretShapeTemplate(toSpecies, part); // specialize + } + + // Specialized algebraic operations: + + // The following definition forces a specialized version of this + // crucial method into the v-table of this class. A call to add() + // will inline to a call to lanewise(ADD,), at which point the JIT + // intrinsic will have the opcode of ADD, plus all the metadata + // for this particular class, enabling it to generate precise + // code. + // + // There is probably no benefit to the JIT to specialize the + // masked or broadcast versions of the lanewise method. + + @Override + @ForceInline + public Halffloat64Vector lanewise(Unary op) { + return (Halffloat64Vector) super.lanewiseTemplate(op); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector lanewise(Unary op, VectorMask m) { + return (Halffloat64Vector) super.lanewiseTemplate(op, Halffloat64Mask.class, (Halffloat64Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector lanewise(Binary op, Vector v) { + return (Halffloat64Vector) super.lanewiseTemplate(op, v); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector lanewise(Binary op, Vector v, VectorMask m) { + return (Halffloat64Vector) super.lanewiseTemplate(op, Halffloat64Mask.class, v, (Halffloat64Mask) m); // specialize + } + + + /*package-private*/ + @Override + @ForceInline + public final + Halffloat64Vector + lanewise(Ternary op, Vector v1, Vector v2) { + return (Halffloat64Vector) super.lanewiseTemplate(op, v1, v2); // specialize + } + + @Override + @ForceInline + public final + Halffloat64Vector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (Halffloat64Vector) super.lanewiseTemplate(op, Halffloat64Mask.class, v1, v2, (Halffloat64Mask) m); // specialize + } + + @Override + @ForceInline + public final + Halffloat64Vector addIndex(int scale) { + return (Halffloat64Vector) super.addIndexTemplate(scale); // specialize + } + + // Type specific horizontal reductions + + @Override + @ForceInline + public final Float16 reduceLanes(VectorOperators.Associative op) { + return super.reduceLanesTemplate(op); // specialized + } + + @Override + @ForceInline + public final Float16 reduceLanes(VectorOperators.Associative op, + VectorMask m) { + return super.reduceLanesTemplate(op, Halffloat64Mask.class, (Halffloat64Mask) m); // specialized + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op) { + Float16 res = super.reduceLanesTemplate(op); // specialized + return res.longValue(); + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op, + VectorMask m) { + Float16 res = super.reduceLanesTemplate(op, Halffloat64Mask.class, (Halffloat64Mask) m); // specialized + return res.longValue(); + } + + @ForceInline + public VectorShuffle toShuffle() { + return super.toShuffleTemplate(Halffloat64Shuffle.class); // specialize + } + + // Specialized unary testing + + @Override + @ForceInline + public final Halffloat64Mask test(Test op) { + return super.testTemplate(Halffloat64Mask.class, op); // specialize + } + + @Override + @ForceInline + public final Halffloat64Mask test(Test op, VectorMask m) { + return super.testTemplate(Halffloat64Mask.class, op, (Halffloat64Mask) m); // specialize + } + + // Specialized comparisons + + @Override + @ForceInline + public final Halffloat64Mask compare(Comparison op, Vector v) { + return super.compareTemplate(Halffloat64Mask.class, op, v); // specialize + } + + @Override + @ForceInline + public final Halffloat64Mask compare(Comparison op, Float16 s) { + return super.compareTemplate(Halffloat64Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat64Mask compare(Comparison op, long s) { + return super.compareTemplate(Halffloat64Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat64Mask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(Halffloat64Mask.class, op, v, (Halffloat64Mask) m); + } + + + @Override + @ForceInline + public Halffloat64Vector blend(Vector v, VectorMask m) { + return (Halffloat64Vector) + super.blendTemplate(Halffloat64Mask.class, + (Halffloat64Vector) v, + (Halffloat64Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector slice(int origin, Vector v) { + return (Halffloat64Vector) super.sliceTemplate(origin, v); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector slice(int origin) { + return (Halffloat64Vector) super.sliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector unslice(int origin, Vector w, int part) { + return (Halffloat64Vector) super.unsliceTemplate(origin, w, part); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector unslice(int origin, Vector w, int part, VectorMask m) { + return (Halffloat64Vector) + super.unsliceTemplate(Halffloat64Mask.class, + origin, w, part, + (Halffloat64Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector unslice(int origin) { + return (Halffloat64Vector) super.unsliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector rearrange(VectorShuffle s) { + return (Halffloat64Vector) + super.rearrangeTemplate(Halffloat64Shuffle.class, + (Halffloat64Shuffle) s); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector rearrange(VectorShuffle shuffle, + VectorMask m) { + return (Halffloat64Vector) + super.rearrangeTemplate(Halffloat64Shuffle.class, + Halffloat64Mask.class, + (Halffloat64Shuffle) shuffle, + (Halffloat64Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector rearrange(VectorShuffle s, + Vector v) { + return (Halffloat64Vector) + super.rearrangeTemplate(Halffloat64Shuffle.class, + (Halffloat64Shuffle) s, + (Halffloat64Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector compress(VectorMask m) { + return (Halffloat64Vector) + super.compressTemplate(Halffloat64Mask.class, + (Halffloat64Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector expand(VectorMask m) { + return (Halffloat64Vector) + super.expandTemplate(Halffloat64Mask.class, + (Halffloat64Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector selectFrom(Vector v) { + return (Halffloat64Vector) + super.selectFromTemplate((Halffloat64Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector selectFrom(Vector v, + VectorMask m) { + return (Halffloat64Vector) + super.selectFromTemplate((Halffloat64Vector) v, + (Halffloat64Mask) m); // specialize + } + + + @ForceInline + @Override + public Float16 lane(int i) { + short bits; + switch(i) { + case 0: bits = laneHelper(0); break; + case 1: bits = laneHelper(1); break; + case 2: bits = laneHelper(2); break; + case 3: bits = laneHelper(3); break; + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + return Float16.shortBitsToFloat16(bits); + } + + public short laneHelper(int i) { + return (short) VectorSupport.extract( + VCLASS, ETYPE, VLENGTH, + this, i, + (vec, ix) -> { + Float16[] vecarr = vec.vec(); + return (long)Float16.float16ToShortBits(vecarr[ix]); + }); + } + + @ForceInline + @Override + public Halffloat64Vector withLane(int i, Float16 e) { + switch(i) { + case 0: return withLaneHelper(0, e); + case 1: return withLaneHelper(1, e); + case 2: return withLaneHelper(2, e); + case 3: return withLaneHelper(3, e); + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + } + + public Halffloat64Vector withLaneHelper(int i, Float16 e) { + return VectorSupport.insert( + VCLASS, ETYPE, VLENGTH, + this, i, (long)Float16.float16ToShortBits(e), + (v, ix, bits) -> { + Float16[] res = v.vec().clone(); + res[ix] = Float16.shortBitsToFloat16((short)bits); + return v.vectorFactory(res); + }); + } + + // Mask + + static final class Halffloat64Mask extends AbstractMask { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class ETYPE = Float16.class; // used by the JVM + + Halffloat64Mask(boolean[] bits) { + this(bits, 0); + } + + Halffloat64Mask(boolean[] bits, int offset) { + super(prepare(bits, offset)); + } + + Halffloat64Mask(boolean val) { + super(prepare(val)); + } + + private static boolean[] prepare(boolean[] bits, int offset) { + boolean[] newBits = new boolean[VSPECIES.laneCount()]; + for (int i = 0; i < newBits.length; i++) { + newBits[i] = bits[offset + i]; + } + return newBits; + } + + private static boolean[] prepare(boolean val) { + boolean[] bits = new boolean[VSPECIES.laneCount()]; + Arrays.fill(bits, val); + return bits; + } + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractMask, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + boolean[] getBits() { + return (boolean[])getPayload(); + } + + @Override + Halffloat64Mask uOp(MUnOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i]); + } + return new Halffloat64Mask(res); + } + + @Override + Halffloat64Mask bOp(VectorMask m, MBinOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + boolean[] mbits = ((Halffloat64Mask)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i], mbits[i]); + } + return new Halffloat64Mask(res); + } + + @ForceInline + @Override + public final + Halffloat64Vector toVector() { + return (Halffloat64Vector) super.toVectorTemplate(); // specialize + } + + /** + * Helper function for lane-wise mask conversions. + * This function kicks in after intrinsic failure. + */ + @ForceInline + private final + VectorMask defaultMaskCast(AbstractSpecies dsp) { + if (length() != dsp.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + boolean[] maskArray = toArray(); + return dsp.maskFactory(maskArray).check(dsp); + } + + @Override + @ForceInline + public VectorMask cast(VectorSpecies dsp) { + AbstractSpecies species = (AbstractSpecies) dsp; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); + } + + @Override + @ForceInline + /*package-private*/ + Halffloat64Mask indexPartiallyInUpperRange(long offset, long limit) { + return (Halffloat64Mask) VectorSupport.indexPartiallyInUpperRange( + Halffloat64Mask.class, ETYPE, VLENGTH, offset, limit, + (o, l) -> (Halffloat64Mask) TRUE_MASK.indexPartiallyInRange(o, l)); + } + + // Unary operations + + @Override + @ForceInline + public Halffloat64Mask not() { + return xor(maskAll(true)); + } + + @Override + @ForceInline + public Halffloat64Mask compress() { + return (Halffloat64Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS, + Halffloat64Vector.class, Halffloat64Mask.class, ETYPE, VLENGTH, null, this, + (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, Float.floatToFloat16(m1.trueCount()))); + } + + + // Binary operations + + @Override + @ForceInline + public Halffloat64Mask and(VectorMask mask) { + Objects.requireNonNull(mask); + Halffloat64Mask m = (Halffloat64Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat64Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); + } + + @Override + @ForceInline + public Halffloat64Mask or(VectorMask mask) { + Objects.requireNonNull(mask); + Halffloat64Mask m = (Halffloat64Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat64Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); + } + + @Override + @ForceInline + public Halffloat64Mask xor(VectorMask mask) { + Objects.requireNonNull(mask); + Halffloat64Mask m = (Halffloat64Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat64Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + } + + // Mask Query operations + + @Override + @ForceInline + public int trueCount() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat64Mask.class, short.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); + } + + @Override + @ForceInline + public int firstTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat64Mask.class, short.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public int lastTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat64Mask.class, short.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat64Mask.class, short.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); + } + + // laneIsSet + + @Override + @ForceInline + public boolean laneIsSet(int i) { + Objects.checkIndex(i, length()); + return VectorSupport.extract(Halffloat64Mask.class, Float16.class, VLENGTH, + this, i, (m, idx) -> (m.getBits()[idx] ? 1L : 0L)) == 1L; + } + + // Reductions + + @Override + @ForceInline + public boolean anyTrue() { + return VectorSupport.test(BT_ne, Halffloat64Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> anyTrueHelper(((Halffloat64Mask)m).getBits())); + } + + @Override + @ForceInline + public boolean allTrue() { + return VectorSupport.test(BT_overflow, Halffloat64Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> allTrueHelper(((Halffloat64Mask)m).getBits())); + } + + @ForceInline + /*package-private*/ + static Halffloat64Mask maskAll(boolean bit) { + return VectorSupport.fromBitsCoerced(Halffloat64Mask.class, short.class, VLENGTH, + (bit ? -1 : 0), MODE_BROADCAST, null, + (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK)); + } + private static final Halffloat64Mask TRUE_MASK = new Halffloat64Mask(true); + private static final Halffloat64Mask FALSE_MASK = new Halffloat64Mask(false); + + } + + // Shuffle + + static final class Halffloat64Shuffle extends AbstractShuffle { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class ETYPE = Float16.class; // used by the JVM + + Halffloat64Shuffle(byte[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat64Shuffle(int[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat64Shuffle(int[] reorder, int i) { + super(VLENGTH, reorder, i); + } + + public Halffloat64Shuffle(IntUnaryOperator fn) { + super(VLENGTH, fn); + } + + @Override + public HalffloatSpecies vspecies() { + return VSPECIES; + } + + static { + // There must be enough bits in the shuffle lanes to encode + // VLENGTH valid indexes and VLENGTH exceptional ones. + assert(VLENGTH < Byte.MAX_VALUE); + assert(Byte.MIN_VALUE <= -VLENGTH); + } + static final Halffloat64Shuffle IOTA = new Halffloat64Shuffle(IDENTITY); + + @Override + @ForceInline + public Halffloat64Vector toVector() { + return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat64Shuffle.class, this, VLENGTH, + (s) -> ((Halffloat64Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + } + + @Override + @ForceInline + public VectorShuffle cast(VectorSpecies s) { + AbstractSpecies species = (AbstractSpecies) s; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorShuffle length and species length differ"); + int[] shuffleArray = toArray(); + return s.shuffleFromArray(shuffleArray, 0).check(s); + } + + @ForceInline + @Override + public Halffloat64Shuffle rearrange(VectorShuffle shuffle) { + Halffloat64Shuffle s = (Halffloat64Shuffle) shuffle; + byte[] reorder1 = reorder(); + byte[] reorder2 = s.reorder(); + byte[] r = new byte[reorder1.length]; + for (int i = 0; i < reorder1.length; i++) { + int ssi = reorder2[i]; + r[i] = reorder1[ssi]; // throws on exceptional index + } + return new Halffloat64Shuffle(r); + } + } + + // ================================================ + + // Specialized low-level memory operations. + + @ForceInline + @Override + final + HalffloatVector fromArray0(Float16[] a, int offset) { + return super.fromArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromArray0(Float16[] a, int offset, VectorMask m, int offsetInRange) { + return super.fromArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m, offsetInRange); // specialize + } + + + + + @ForceInline + @Override + final + HalffloatVector fromMemorySegment0(MemorySegment ms, long offset) { + return super.fromMemorySegment0Template(ms, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask m, int offsetInRange) { + return super.fromMemorySegment0Template(Halffloat64Mask.class, ms, offset, (Halffloat64Mask) m, offsetInRange); // specialize + } + + @ForceInline + @Override + final + void intoArray0(Float16[] a, int offset) { + super.intoArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + void intoArray0(Float16[] a, int offset, VectorMask m) { + super.intoArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m); + } + + + + @ForceInline + @Override + final + void intoMemorySegment0(MemorySegment ms, long offset, VectorMask m) { + super.intoMemorySegment0Template(Halffloat64Mask.class, ms, offset, (Halffloat64Mask) m); + } + + + // End of specialized low-level memory operations. + + // ================================================ + +} + diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatMaxVector.java new file mode 100644 index 00000000000..3cc4059fa9d --- /dev/null +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatMaxVector.java @@ -0,0 +1,902 @@ +/* + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.incubator.vector; + +import java.lang.foreign.MemorySegment; +import java.util.Arrays; +import java.util.Objects; +import java.util.function.IntUnaryOperator; + +import jdk.internal.vm.annotation.ForceInline; +import jdk.internal.vm.vector.VectorSupport; + +import static jdk.internal.vm.vector.VectorSupport.*; + +import static jdk.incubator.vector.VectorOperators.*; + +// -- This file was mechanically generated: Do not edit! -- // + +@SuppressWarnings("cast") // warning: redundant cast +final class HalffloatMaxVector extends HalffloatVector { + static final HalffloatSpecies VSPECIES = + (HalffloatSpecies) HalffloatVector.SPECIES_MAX; + + static final VectorShape VSHAPE = + VSPECIES.vectorShape(); + + static final Class VCLASS = HalffloatMaxVector.class; + + static final int VSIZE = VSPECIES.vectorBitSize(); + + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + + static final Class ETYPE = Float16.class; // used by the JVM + + HalffloatMaxVector(Float16[] v) { + super(v); + } + + // For compatibility as HalffloatMaxVector::new, + // stored into species.vectorFactory. + HalffloatMaxVector(Object v) { + this((Float16[]) v); + } + + static final HalffloatMaxVector ZERO = new HalffloatMaxVector(new Float16[VLENGTH]); + static final HalffloatMaxVector IOTA = new HalffloatMaxVector(VSPECIES.iotaArray()); + + static { + // Warm up a few species caches. + // If we do this too much we will + // get NPEs from bootstrap circularity. + VSPECIES.dummyVector(); + VSPECIES.withLanes(LaneType.BYTE); + } + + // Specialized extractors + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractVector, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + @Override + public final Class elementType() { return Float16.class; } + + @ForceInline + @Override + public final int elementSize() { return Float16.SIZE; } + + @ForceInline + @Override + public final VectorShape shape() { return VSHAPE; } + + @ForceInline + @Override + public final int length() { return VLENGTH; } + + @ForceInline + @Override + public final int bitSize() { return VSIZE; } + + @ForceInline + @Override + public final int byteSize() { return VSIZE / Byte.SIZE; } + + /*package-private*/ + @ForceInline + final @Override + Float16[] vec() { + return (Float16[])getPayload(); + } + + // Virtualized constructors + + @Override + @ForceInline + public final HalffloatMaxVector broadcast(Float16 e) { + return (HalffloatMaxVector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + public final HalffloatMaxVector broadcast(long e) { + return (HalffloatMaxVector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + HalffloatMaxMask maskFromArray(boolean[] bits) { + return new HalffloatMaxMask(bits); + } + + @Override + @ForceInline + HalffloatMaxShuffle iotaShuffle() { return HalffloatMaxShuffle.IOTA; } + + @ForceInline + HalffloatMaxShuffle iotaShuffle(int start, int step, boolean wrap) { + if (wrap) { + return (HalffloatMaxShuffle)VectorSupport.shuffleIota(ETYPE, HalffloatMaxShuffle.class, VSPECIES, VLENGTH, start, step, 1, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); + } else { + return (HalffloatMaxShuffle)VectorSupport.shuffleIota(ETYPE, HalffloatMaxShuffle.class, VSPECIES, VLENGTH, start, step, 0, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); + } + } + + @Override + @ForceInline + HalffloatMaxShuffle shuffleFromBytes(byte[] reorder) { return new HalffloatMaxShuffle(reorder); } + + @Override + @ForceInline + HalffloatMaxShuffle shuffleFromArray(int[] indexes, int i) { return new HalffloatMaxShuffle(indexes, i); } + + @Override + @ForceInline + HalffloatMaxShuffle shuffleFromOp(IntUnaryOperator fn) { return new HalffloatMaxShuffle(fn); } + + // Make a vector of the same species but the given elements: + @ForceInline + final @Override + HalffloatMaxVector vectorFactory(Float16[] vec) { + return new HalffloatMaxVector(vec); + } + + @ForceInline + final @Override + ByteMaxVector asByteVectorRaw() { + return (ByteMaxVector) super.asByteVectorRawTemplate(); // specialize + } + + @ForceInline + final @Override + AbstractVector asVectorRaw(LaneType laneType) { + return super.asVectorRawTemplate(laneType); // specialize + } + + // Unary operator + + @ForceInline + final @Override + HalffloatMaxVector uOp(FUnOp f) { + return (HalffloatMaxVector) super.uOpTemplate(f); // specialize + } + + @ForceInline + final @Override + HalffloatMaxVector uOp(VectorMask m, FUnOp f) { + return (HalffloatMaxVector) + super.uOpTemplate((HalffloatMaxMask)m, f); // specialize + } + + // Binary operator + + @ForceInline + final @Override + HalffloatMaxVector bOp(Vector v, FBinOp f) { + return (HalffloatMaxVector) super.bOpTemplate((HalffloatMaxVector)v, f); // specialize + } + + @ForceInline + final @Override + HalffloatMaxVector bOp(Vector v, + VectorMask m, FBinOp f) { + return (HalffloatMaxVector) + super.bOpTemplate((HalffloatMaxVector)v, (HalffloatMaxMask)m, + f); // specialize + } + + // Ternary operator + + @ForceInline + final @Override + HalffloatMaxVector tOp(Vector v1, Vector v2, FTriOp f) { + return (HalffloatMaxVector) + super.tOpTemplate((HalffloatMaxVector)v1, (HalffloatMaxVector)v2, + f); // specialize + } + + @ForceInline + final @Override + HalffloatMaxVector tOp(Vector v1, Vector v2, + VectorMask m, FTriOp f) { + return (HalffloatMaxVector) + super.tOpTemplate((HalffloatMaxVector)v1, (HalffloatMaxVector)v2, + (HalffloatMaxMask)m, f); // specialize + } + + @ForceInline + final @Override + Float16 rOp(Float16 v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize + } + + @Override + @ForceInline + public final + Vector convertShape(VectorOperators.Conversion conv, + VectorSpecies rsp, int part) { + return super.convertShapeTemplate(conv, rsp, part); // specialize + } + + @Override + @ForceInline + public final + Vector reinterpretShape(VectorSpecies toSpecies, int part) { + return super.reinterpretShapeTemplate(toSpecies, part); // specialize + } + + // Specialized algebraic operations: + + // The following definition forces a specialized version of this + // crucial method into the v-table of this class. A call to add() + // will inline to a call to lanewise(ADD,), at which point the JIT + // intrinsic will have the opcode of ADD, plus all the metadata + // for this particular class, enabling it to generate precise + // code. + // + // There is probably no benefit to the JIT to specialize the + // masked or broadcast versions of the lanewise method. + + @Override + @ForceInline + public HalffloatMaxVector lanewise(Unary op) { + return (HalffloatMaxVector) super.lanewiseTemplate(op); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector lanewise(Unary op, VectorMask m) { + return (HalffloatMaxVector) super.lanewiseTemplate(op, HalffloatMaxMask.class, (HalffloatMaxMask) m); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector lanewise(Binary op, Vector v) { + return (HalffloatMaxVector) super.lanewiseTemplate(op, v); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector lanewise(Binary op, Vector v, VectorMask m) { + return (HalffloatMaxVector) super.lanewiseTemplate(op, HalffloatMaxMask.class, v, (HalffloatMaxMask) m); // specialize + } + + + /*package-private*/ + @Override + @ForceInline + public final + HalffloatMaxVector + lanewise(Ternary op, Vector v1, Vector v2) { + return (HalffloatMaxVector) super.lanewiseTemplate(op, v1, v2); // specialize + } + + @Override + @ForceInline + public final + HalffloatMaxVector + lanewise(Ternary op, Vector v1, Vector v2, VectorMask m) { + return (HalffloatMaxVector) super.lanewiseTemplate(op, HalffloatMaxMask.class, v1, v2, (HalffloatMaxMask) m); // specialize + } + + @Override + @ForceInline + public final + HalffloatMaxVector addIndex(int scale) { + return (HalffloatMaxVector) super.addIndexTemplate(scale); // specialize + } + + // Type specific horizontal reductions + + @Override + @ForceInline + public final Float16 reduceLanes(VectorOperators.Associative op) { + return super.reduceLanesTemplate(op); // specialized + } + + @Override + @ForceInline + public final Float16 reduceLanes(VectorOperators.Associative op, + VectorMask m) { + return super.reduceLanesTemplate(op, HalffloatMaxMask.class, (HalffloatMaxMask) m); // specialized + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op) { + Float16 res = super.reduceLanesTemplate(op); // specialized + return res.longValue(); + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op, + VectorMask m) { + Float16 res = super.reduceLanesTemplate(op, HalffloatMaxMask.class, (HalffloatMaxMask) m); // specialized + return res.longValue(); + } + + @ForceInline + public VectorShuffle toShuffle() { + return super.toShuffleTemplate(HalffloatMaxShuffle.class); // specialize + } + + // Specialized unary testing + + @Override + @ForceInline + public final HalffloatMaxMask test(Test op) { + return super.testTemplate(HalffloatMaxMask.class, op); // specialize + } + + @Override + @ForceInline + public final HalffloatMaxMask test(Test op, VectorMask m) { + return super.testTemplate(HalffloatMaxMask.class, op, (HalffloatMaxMask) m); // specialize + } + + // Specialized comparisons + + @Override + @ForceInline + public final HalffloatMaxMask compare(Comparison op, Vector v) { + return super.compareTemplate(HalffloatMaxMask.class, op, v); // specialize + } + + @Override + @ForceInline + public final HalffloatMaxMask compare(Comparison op, Float16 s) { + return super.compareTemplate(HalffloatMaxMask.class, op, s); // specialize + } + + @Override + @ForceInline + public final HalffloatMaxMask compare(Comparison op, long s) { + return super.compareTemplate(HalffloatMaxMask.class, op, s); // specialize + } + + @Override + @ForceInline + public final HalffloatMaxMask compare(Comparison op, Vector v, VectorMask m) { + return super.compareTemplate(HalffloatMaxMask.class, op, v, (HalffloatMaxMask) m); + } + + + @Override + @ForceInline + public HalffloatMaxVector blend(Vector v, VectorMask m) { + return (HalffloatMaxVector) + super.blendTemplate(HalffloatMaxMask.class, + (HalffloatMaxVector) v, + (HalffloatMaxMask) m); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector slice(int origin, Vector v) { + return (HalffloatMaxVector) super.sliceTemplate(origin, v); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector slice(int origin) { + return (HalffloatMaxVector) super.sliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector unslice(int origin, Vector w, int part) { + return (HalffloatMaxVector) super.unsliceTemplate(origin, w, part); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector unslice(int origin, Vector w, int part, VectorMask m) { + return (HalffloatMaxVector) + super.unsliceTemplate(HalffloatMaxMask.class, + origin, w, part, + (HalffloatMaxMask) m); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector unslice(int origin) { + return (HalffloatMaxVector) super.unsliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector rearrange(VectorShuffle s) { + return (HalffloatMaxVector) + super.rearrangeTemplate(HalffloatMaxShuffle.class, + (HalffloatMaxShuffle) s); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector rearrange(VectorShuffle shuffle, + VectorMask m) { + return (HalffloatMaxVector) + super.rearrangeTemplate(HalffloatMaxShuffle.class, + HalffloatMaxMask.class, + (HalffloatMaxShuffle) shuffle, + (HalffloatMaxMask) m); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector rearrange(VectorShuffle s, + Vector v) { + return (HalffloatMaxVector) + super.rearrangeTemplate(HalffloatMaxShuffle.class, + (HalffloatMaxShuffle) s, + (HalffloatMaxVector) v); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector compress(VectorMask m) { + return (HalffloatMaxVector) + super.compressTemplate(HalffloatMaxMask.class, + (HalffloatMaxMask) m); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector expand(VectorMask m) { + return (HalffloatMaxVector) + super.expandTemplate(HalffloatMaxMask.class, + (HalffloatMaxMask) m); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector selectFrom(Vector v) { + return (HalffloatMaxVector) + super.selectFromTemplate((HalffloatMaxVector) v); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector selectFrom(Vector v, + VectorMask m) { + return (HalffloatMaxVector) + super.selectFromTemplate((HalffloatMaxVector) v, + (HalffloatMaxMask) m); // specialize + } + + + @ForceInline + @Override + public Float16 lane(int i) { + if (i < 0 || i >= VLENGTH) { + throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + short bits = laneHelper(i); + return Float16.shortBitsToFloat16(bits); + } + + public short laneHelper(int i) { + return (short) VectorSupport.extract( + VCLASS, ETYPE, VLENGTH, + this, i, + (vec, ix) -> { + Float16[] vecarr = vec.vec(); + return (long)Float16.float16ToShortBits(vecarr[ix]); + }); + } + + @ForceInline + @Override + public HalffloatMaxVector withLane(int i, Float16 e) { + if (i < 0 || i >= VLENGTH) { + throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + return withLaneHelper(i, e); + } + + public HalffloatMaxVector withLaneHelper(int i, Float16 e) { + return VectorSupport.insert( + VCLASS, ETYPE, VLENGTH, + this, i, (long)Float16.float16ToShortBits(e), + (v, ix, bits) -> { + Float16[] res = v.vec().clone(); + res[ix] = Float16.shortBitsToFloat16((short)bits); + return v.vectorFactory(res); + }); + } + + // Mask + + static final class HalffloatMaxMask extends AbstractMask { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class ETYPE = Float16.class; // used by the JVM + + HalffloatMaxMask(boolean[] bits) { + this(bits, 0); + } + + HalffloatMaxMask(boolean[] bits, int offset) { + super(prepare(bits, offset)); + } + + HalffloatMaxMask(boolean val) { + super(prepare(val)); + } + + private static boolean[] prepare(boolean[] bits, int offset) { + boolean[] newBits = new boolean[VSPECIES.laneCount()]; + for (int i = 0; i < newBits.length; i++) { + newBits[i] = bits[offset + i]; + } + return newBits; + } + + private static boolean[] prepare(boolean val) { + boolean[] bits = new boolean[VSPECIES.laneCount()]; + Arrays.fill(bits, val); + return bits; + } + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractMask, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + boolean[] getBits() { + return (boolean[])getPayload(); + } + + @Override + HalffloatMaxMask uOp(MUnOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i]); + } + return new HalffloatMaxMask(res); + } + + @Override + HalffloatMaxMask bOp(VectorMask m, MBinOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + boolean[] mbits = ((HalffloatMaxMask)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i], mbits[i]); + } + return new HalffloatMaxMask(res); + } + + @ForceInline + @Override + public final + HalffloatMaxVector toVector() { + return (HalffloatMaxVector) super.toVectorTemplate(); // specialize + } + + /** + * Helper function for lane-wise mask conversions. + * This function kicks in after intrinsic failure. + */ + @ForceInline + private final + VectorMask defaultMaskCast(AbstractSpecies dsp) { + if (length() != dsp.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + boolean[] maskArray = toArray(); + return dsp.maskFactory(maskArray).check(dsp); + } + + @Override + @ForceInline + public VectorMask cast(VectorSpecies dsp) { + AbstractSpecies species = (AbstractSpecies) dsp; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); + } + + @Override + @ForceInline + /*package-private*/ + HalffloatMaxMask indexPartiallyInUpperRange(long offset, long limit) { + return (HalffloatMaxMask) VectorSupport.indexPartiallyInUpperRange( + HalffloatMaxMask.class, ETYPE, VLENGTH, offset, limit, + (o, l) -> (HalffloatMaxMask) TRUE_MASK.indexPartiallyInRange(o, l)); + } + + // Unary operations + + @Override + @ForceInline + public HalffloatMaxMask not() { + return xor(maskAll(true)); + } + + @Override + @ForceInline + public HalffloatMaxMask compress() { + return (HalffloatMaxMask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS, + HalffloatMaxVector.class, HalffloatMaxMask.class, ETYPE, VLENGTH, null, this, + (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, Float.floatToFloat16(m1.trueCount()))); + } + + + // Binary operations + + @Override + @ForceInline + public HalffloatMaxMask and(VectorMask mask) { + Objects.requireNonNull(mask); + HalffloatMaxMask m = (HalffloatMaxMask)mask; + return VectorSupport.binaryOp(VECTOR_OP_AND, HalffloatMaxMask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); + } + + @Override + @ForceInline + public HalffloatMaxMask or(VectorMask mask) { + Objects.requireNonNull(mask); + HalffloatMaxMask m = (HalffloatMaxMask)mask; + return VectorSupport.binaryOp(VECTOR_OP_OR, HalffloatMaxMask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); + } + + @Override + @ForceInline + public HalffloatMaxMask xor(VectorMask mask) { + Objects.requireNonNull(mask); + HalffloatMaxMask m = (HalffloatMaxMask)mask; + return VectorSupport.binaryOp(VECTOR_OP_XOR, HalffloatMaxMask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + } + + // Mask Query operations + + @Override + @ForceInline + public int trueCount() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, HalffloatMaxMask.class, short.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); + } + + @Override + @ForceInline + public int firstTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, HalffloatMaxMask.class, short.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public int lastTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, HalffloatMaxMask.class, short.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, HalffloatMaxMask.class, short.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); + } + + // laneIsSet + + @Override + @ForceInline + public boolean laneIsSet(int i) { + Objects.checkIndex(i, length()); + return VectorSupport.extract(HalffloatMaxMask.class, Float16.class, VLENGTH, + this, i, (m, idx) -> (m.getBits()[idx] ? 1L : 0L)) == 1L; + } + + // Reductions + + @Override + @ForceInline + public boolean anyTrue() { + return VectorSupport.test(BT_ne, HalffloatMaxMask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> anyTrueHelper(((HalffloatMaxMask)m).getBits())); + } + + @Override + @ForceInline + public boolean allTrue() { + return VectorSupport.test(BT_overflow, HalffloatMaxMask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> allTrueHelper(((HalffloatMaxMask)m).getBits())); + } + + @ForceInline + /*package-private*/ + static HalffloatMaxMask maskAll(boolean bit) { + return VectorSupport.fromBitsCoerced(HalffloatMaxMask.class, short.class, VLENGTH, + (bit ? -1 : 0), MODE_BROADCAST, null, + (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK)); + } + private static final HalffloatMaxMask TRUE_MASK = new HalffloatMaxMask(true); + private static final HalffloatMaxMask FALSE_MASK = new HalffloatMaxMask(false); + + } + + // Shuffle + + static final class HalffloatMaxShuffle extends AbstractShuffle { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class ETYPE = Float16.class; // used by the JVM + + HalffloatMaxShuffle(byte[] reorder) { + super(VLENGTH, reorder); + } + + public HalffloatMaxShuffle(int[] reorder) { + super(VLENGTH, reorder); + } + + public HalffloatMaxShuffle(int[] reorder, int i) { + super(VLENGTH, reorder, i); + } + + public HalffloatMaxShuffle(IntUnaryOperator fn) { + super(VLENGTH, fn); + } + + @Override + public HalffloatSpecies vspecies() { + return VSPECIES; + } + + static { + // There must be enough bits in the shuffle lanes to encode + // VLENGTH valid indexes and VLENGTH exceptional ones. + assert(VLENGTH < Byte.MAX_VALUE); + assert(Byte.MIN_VALUE <= -VLENGTH); + } + static final HalffloatMaxShuffle IOTA = new HalffloatMaxShuffle(IDENTITY); + + @Override + @ForceInline + public HalffloatMaxVector toVector() { + return VectorSupport.shuffleToVector(VCLASS, ETYPE, HalffloatMaxShuffle.class, this, VLENGTH, + (s) -> ((HalffloatMaxVector)(((AbstractShuffle)(s)).toVectorTemplate()))); + } + + @Override + @ForceInline + public VectorShuffle cast(VectorSpecies s) { + AbstractSpecies species = (AbstractSpecies) s; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorShuffle length and species length differ"); + int[] shuffleArray = toArray(); + return s.shuffleFromArray(shuffleArray, 0).check(s); + } + + @ForceInline + @Override + public HalffloatMaxShuffle rearrange(VectorShuffle shuffle) { + HalffloatMaxShuffle s = (HalffloatMaxShuffle) shuffle; + byte[] reorder1 = reorder(); + byte[] reorder2 = s.reorder(); + byte[] r = new byte[reorder1.length]; + for (int i = 0; i < reorder1.length; i++) { + int ssi = reorder2[i]; + r[i] = reorder1[ssi]; // throws on exceptional index + } + return new HalffloatMaxShuffle(r); + } + } + + // ================================================ + + // Specialized low-level memory operations. + + @ForceInline + @Override + final + HalffloatVector fromArray0(Float16[] a, int offset) { + return super.fromArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromArray0(Float16[] a, int offset, VectorMask m, int offsetInRange) { + return super.fromArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m, offsetInRange); // specialize + } + + + + + @ForceInline + @Override + final + HalffloatVector fromMemorySegment0(MemorySegment ms, long offset) { + return super.fromMemorySegment0Template(ms, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask m, int offsetInRange) { + return super.fromMemorySegment0Template(HalffloatMaxMask.class, ms, offset, (HalffloatMaxMask) m, offsetInRange); // specialize + } + + @ForceInline + @Override + final + void intoArray0(Float16[] a, int offset) { + super.intoArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + void intoArray0(Float16[] a, int offset, VectorMask m) { + super.intoArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m); + } + + + + @ForceInline + @Override + final + void intoMemorySegment0(MemorySegment ms, long offset, VectorMask m) { + super.intoMemorySegment0Template(HalffloatMaxMask.class, ms, offset, (HalffloatMaxMask) m); + } + + + // End of specialized low-level memory operations. + + // ================================================ + +} + diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatVector.java new file mode 100644 index 00000000000..beedb1d8390 --- /dev/null +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatVector.java @@ -0,0 +1,3853 @@ +/* + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.incubator.vector; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteOrder; +import java.util.Arrays; +import java.util.Objects; +import java.util.function.Function; + +import jdk.internal.foreign.AbstractMemorySegmentImpl; +import jdk.internal.misc.ScopedMemoryAccess; +import jdk.internal.misc.Unsafe; +import jdk.internal.vm.annotation.ForceInline; +import jdk.internal.vm.vector.VectorSupport; + +import static jdk.internal.vm.vector.VectorSupport.*; +import static jdk.incubator.vector.VectorIntrinsics.*; + +import static jdk.incubator.vector.VectorOperators.*; + +// -- This file was mechanically generated: Do not edit! -- // + +/** + * A specialized {@link Vector} representing an ordered immutable sequence of + * {@code Float16} values. + */ +@SuppressWarnings("cast") // warning: redundant cast +public abstract class HalffloatVector extends AbstractVector { + + HalffloatVector(Float16[] vec) { + super(vec); + } + + static final int FORBID_OPCODE_KIND = VO_NOFP; + + static final ValueLayout.OfShort ELEMENT_LAYOUT = ValueLayout.JAVA_SHORT.withByteAlignment(1); + + @ForceInline + static int opCode(Operator op) { + return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND); + } + @ForceInline + static int opCode(Operator op, int requireKind) { + requireKind |= VO_OPCODE_VALID; + return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND); + } + @ForceInline + static boolean opKind(Operator op, int bit) { + return VectorOperators.opKind(op, bit); + } + + // Virtualized factories and operators, + // coded with portable definitions. + // These are all @ForceInline in case + // they need to be used performantly. + // The various shape-specific subclasses + // also specialize them by wrapping + // them in a call like this: + // return (Byte128Vector) + // super.bOp((Byte128Vector) o); + // The purpose of that is to forcibly inline + // the generic definition from this file + // into a sharply type- and size-specific + // wrapper in the subclass file, so that + // the JIT can specialize the code. + // The code is only inlined and expanded + // if it gets hot. Think of it as a cheap + // and lazy version of C++ templates. + + // Virtualized getter + + /*package-private*/ + abstract Float16[] vec(); + + // Virtualized constructors + + /** + * Build a vector directly using my own constructor. + * It is an error if the array is aliased elsewhere. + */ + /*package-private*/ + abstract HalffloatVector vectorFactory(Float16[] vec); + + /** + * Build a mask directly using my species. + * It is an error if the array is aliased elsewhere. + */ + /*package-private*/ + @ForceInline + final + AbstractMask maskFactory(boolean[] bits) { + return vspecies().maskFactory(bits); + } + + // Constant loader (takes dummy as vector arg) + interface FVOp { + Float16 apply(int i); + } + + /*package-private*/ + @ForceInline + final + HalffloatVector vOp(FVOp f) { + Float16[] res = new Float16[length()]; + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i); + } + return vectorFactory(res); + } + + @ForceInline + final + HalffloatVector vOp(VectorMask m, FVOp f) { + Float16[] res = new Float16[length()]; + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < res.length; i++) { + if (mbits[i]) { + res[i] = f.apply(i); + } + } + return vectorFactory(res); + } + + // Unary operator + + /*package-private*/ + interface FUnOp { + Float16 apply(int i, Float16 a); + } + + /*package-private*/ + abstract + HalffloatVector uOp(FUnOp f); + @ForceInline + final + HalffloatVector uOpTemplate(FUnOp f) { + Float16[] vec = vec(); + Float16[] res = new Float16[length()]; + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, vec[i]); + } + return vectorFactory(res); + } + + /*package-private*/ + abstract + HalffloatVector uOp(VectorMask m, + FUnOp f); + @ForceInline + final + HalffloatVector uOpTemplate(VectorMask m, + FUnOp f) { + if (m == null) { + return uOpTemplate(f); + } + Float16[] vec = vec(); + Float16[] res = new Float16[length()]; + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i]; + } + return vectorFactory(res); + } + + // Binary operator + + /*package-private*/ + interface FBinOp { + Float16 apply(int i, Float16 a, Float16 b); + } + + /*package-private*/ + abstract + HalffloatVector bOp(Vector o, + FBinOp f); + @ForceInline + final + HalffloatVector bOpTemplate(Vector o, + FBinOp f) { + Float16[] res = new Float16[length()]; + Float16[] vec1 = this.vec(); + Float16[] vec2 = ((HalffloatVector)o).vec(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, vec1[i], vec2[i]); + } + return vectorFactory(res); + } + + /*package-private*/ + abstract + HalffloatVector bOp(Vector o, + VectorMask m, + FBinOp f); + @ForceInline + final + HalffloatVector bOpTemplate(Vector o, + VectorMask m, + FBinOp f) { + if (m == null) { + return bOpTemplate(o, f); + } + Float16[] res = new Float16[length()]; + Float16[] vec1 = this.vec(); + Float16[] vec2 = ((HalffloatVector)o).vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i]; + } + return vectorFactory(res); + } + + // Ternary operator + + /*package-private*/ + interface FTriOp { + Float16 apply(int i, Float16 a, Float16 b, Float16 c); + } + + /*package-private*/ + abstract + HalffloatVector tOp(Vector o1, + Vector o2, + FTriOp f); + @ForceInline + final + HalffloatVector tOpTemplate(Vector o1, + Vector o2, + FTriOp f) { + Float16[] res = new Float16[length()]; + Float16[] vec1 = this.vec(); + Float16[] vec2 = ((HalffloatVector)o1).vec(); + Float16[] vec3 = ((HalffloatVector)o2).vec(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]); + } + return vectorFactory(res); + } + + /*package-private*/ + abstract + HalffloatVector tOp(Vector o1, + Vector o2, + VectorMask m, + FTriOp f); + @ForceInline + final + HalffloatVector tOpTemplate(Vector o1, + Vector o2, + VectorMask m, + FTriOp f) { + if (m == null) { + return tOpTemplate(o1, o2, f); + } + Float16[] res = new Float16[length()]; + Float16[] vec1 = this.vec(); + Float16[] vec2 = ((HalffloatVector)o1).vec(); + Float16[] vec3 = ((HalffloatVector)o2).vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i]; + } + return vectorFactory(res); + } + + // Reduction operator + + /*package-private*/ + abstract + Float16 rOp(Float16 v, VectorMask m, FBinOp f); + + @ForceInline + final + Float16 rOpTemplate(Float16 v, VectorMask m, FBinOp f) { + if (m == null) { + return rOpTemplate(v, f); + } + Float16[] vec = vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < vec.length; i++) { + v = mbits[i] ? f.apply(i, v, vec[i]) : v; + } + return v; + } + + @ForceInline + final + Float16 rOpTemplate(Float16 v, FBinOp f) { + Float16[] vec = vec(); + for (int i = 0; i < vec.length; i++) { + v = f.apply(i, v, vec[i]); + } + return v; + } + + // Memory reference + + /*package-private*/ + interface FLdOp { + Float16 apply(M memory, int offset, int i); + } + + /*package-private*/ + @ForceInline + final + HalffloatVector ldOp(M memory, int offset, + FLdOp f) { + //dummy; no vec = vec(); + Float16[] res = new Float16[length()]; + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(memory, offset, i); + } + return vectorFactory(res); + } + + /*package-private*/ + @ForceInline + final + HalffloatVector ldOp(M memory, int offset, + VectorMask m, + FLdOp f) { + //Float16[] vec = vec(); + Float16[] res = new Float16[length()]; + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < res.length; i++) { + if (mbits[i]) { + res[i] = f.apply(memory, offset, i); + } + } + return vectorFactory(res); + } + + /*package-private*/ + interface FLdLongOp { + Float16 apply(MemorySegment memory, long offset, int i); + } + + /*package-private*/ + @ForceInline + final + HalffloatVector ldLongOp(MemorySegment memory, long offset, + FLdLongOp f) { + //dummy; no vec = vec(); + Float16[] res = new Float16[length()]; + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(memory, offset, i); + } + return vectorFactory(res); + } + + /*package-private*/ + @ForceInline + final + HalffloatVector ldLongOp(MemorySegment memory, long offset, + VectorMask m, + FLdLongOp f) { + //Float16[] vec = vec(); + Float16[] res = new Float16[length()]; + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < res.length; i++) { + if (mbits[i]) { + res[i] = f.apply(memory, offset, i); + } + } + return vectorFactory(res); + } + + static Float16 memorySegmentGet(MemorySegment ms, long o, int i) { + return Float16.valueOf(ms.get(ELEMENT_LAYOUT, o + i * 2L)); + } + + interface FStOp { + void apply(M memory, int offset, int i, Float16 a); + } + + /*package-private*/ + @ForceInline + final + void stOp(M memory, int offset, + FStOp f) { + Float16[] vec = vec(); + for (int i = 0; i < vec.length; i++) { + f.apply(memory, offset, i, vec[i]); + } + } + + /*package-private*/ + @ForceInline + final + void stOp(M memory, int offset, + VectorMask m, + FStOp f) { + Float16[] vec = vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < vec.length; i++) { + if (mbits[i]) { + f.apply(memory, offset, i, vec[i]); + } + } + } + + interface FStLongOp { + void apply(MemorySegment memory, long offset, int i, Float16 a); + } + + /*package-private*/ + @ForceInline + final + void stLongOp(MemorySegment memory, long offset, + FStLongOp f) { + Float16[] vec = vec(); + for (int i = 0; i < vec.length; i++) { + f.apply(memory, offset, i, vec[i]); + } + } + + /*package-private*/ + @ForceInline + final + void stLongOp(MemorySegment memory, long offset, + VectorMask m, + FStLongOp f) { + Float16[] vec = vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < vec.length; i++) { + if (mbits[i]) { + f.apply(memory, offset, i, vec[i]); + } + } + } + + static void memorySegmentSet(MemorySegment ms, long o, int i, Float16 e) { + ms.set(ELEMENT_LAYOUT, o + i * 2L, e.shortValue()); + } + + // Binary test + + /*package-private*/ + interface FBinTest { + boolean apply(int cond, int i, Float16 a, Float16 b); + } + + /*package-private*/ + @ForceInline + final + AbstractMask bTest(int cond, + Vector o, + FBinTest f) { + Float16[] vec1 = vec(); + Float16[] vec2 = ((HalffloatVector)o).vec(); + boolean[] bits = new boolean[length()]; + for (int i = 0; i < length(); i++){ + bits[i] = f.apply(cond, i, vec1[i], vec2[i]); + } + return maskFactory(bits); + } + + + /*package-private*/ + @Override + abstract HalffloatSpecies vspecies(); + + /*package-private*/ + @ForceInline + static long toBits(Float16 e) { + return Float16.float16ToRawShortBits(e); + } + + /*package-private*/ + @ForceInline + static Float16 fromBits(long bits) { + return Float16.shortBitsToFloat16((short)bits); + } + + static HalffloatVector expandHelper(Vector v, VectorMask m) { + VectorSpecies vsp = m.vectorSpecies(); + HalffloatVector r = (HalffloatVector) vsp.zero(); + HalffloatVector vi = (HalffloatVector) v; + if (m.allTrue()) { + return vi; + } + for (int i = 0, j = 0; i < vsp.length(); i++) { + if (m.laneIsSet(i)) { + r = r.withLane(i, vi.lane(j++)); + } + } + return r; + } + + static HalffloatVector compressHelper(Vector v, VectorMask m) { + VectorSpecies vsp = m.vectorSpecies(); + HalffloatVector r = (HalffloatVector) vsp.zero(); + HalffloatVector vi = (HalffloatVector) v; + if (m.allTrue()) { + return vi; + } + for (int i = 0, j = 0; i < vsp.length(); i++) { + if (m.laneIsSet(i)) { + r = r.withLane(j++, vi.lane(i)); + } + } + return r; + } + + // Static factories (other than memory operations) + + // Note: A surprising behavior in javadoc + // sometimes makes a lone /** {@inheritDoc} */ + // comment drop the method altogether, + // apparently if the method mentions an + // parameter or return type of Vector + // instead of Vector as originally specified. + // Adding an empty HTML fragment appears to + // nudge javadoc into providing the desired + // inherited documentation. We use the HTML + // comment for this. + + /** + * Returns a vector of the given species + * where all lane elements are set to + * zero, the default primitive value. + * + * @param species species of the desired zero vector + * @return a zero vector + */ + @ForceInline + public static HalffloatVector zero(VectorSpecies species) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + return VectorSupport.fromBitsCoerced(vsp.vectorType(), Float16.class, species.length(), + toBits(Float16.valueOf(0.0f)), MODE_BROADCAST, vsp, + ((bits_, s_) -> s_.rvOp(i -> bits_))); + } + + /** + * Returns a vector of the same species as this one + * where all lane elements are set to + * the primitive value {@code e}. + * + * The contents of the current vector are discarded; + * only the species is relevant to this operation. + * + *

This method returns the value of this expression: + * {@code HalffloatVector.broadcast(this.species(), e)}. + * + * @apiNote + * Unlike the similar method named {@code broadcast()} + * in the supertype {@code Vector}, this method does not + * need to validate its argument, and cannot throw + * {@code IllegalArgumentException}. This method is + * therefore preferable to the supertype method. + * + * @param e the value to broadcast + * @return a vector where all lane elements are set to + * the primitive value {@code e} + * @see #broadcast(VectorSpecies,long) + * @see Vector#broadcast(long) + * @see VectorSpecies#broadcast(long) + */ + public abstract HalffloatVector broadcast(Float16 e); + + /** + * Returns a vector of the given species + * where all lane elements are set to + * the primitive value {@code e}. + * + * @param species species of the desired vector + * @param e the value to broadcast + * @return a vector where all lane elements are set to + * the primitive value {@code e} + * @see #broadcast(long) + * @see Vector#broadcast(long) + * @see VectorSpecies#broadcast(long) + */ + @ForceInline + public static HalffloatVector broadcast(VectorSpecies species, Float16 e) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.broadcast(e); + } + + /*package-private*/ + @ForceInline + final HalffloatVector broadcastTemplate(Float16 e) { + HalffloatSpecies vsp = vspecies(); + return vsp.broadcast(e); + } + + /** + * {@inheritDoc} + * @apiNote + * When working with vector subtypes like {@code HalffloatVector}, + * {@linkplain #broadcast(Float16) the more strongly typed method} + * is typically selected. It can be explicitly selected + * using a cast: {@code v.broadcast((Float16)e)}. + * The two expressions will produce numerically identical results. + */ + @Override + public abstract HalffloatVector broadcast(long e); + + /** + * Returns a vector of the given species + * where all lane elements are set to + * the primitive value {@code e}. + * + * The {@code long} value must be accurately representable + * by the {@code ETYPE} of the vector species, so that + * {@code e==(long)(ETYPE)e}. + * + * @param species species of the desired vector + * @param e the value to broadcast + * @return a vector where all lane elements are set to + * the primitive value {@code e} + * @throws IllegalArgumentException + * if the given {@code long} value cannot + * be represented by the vector's {@code ETYPE} + * @see #broadcast(VectorSpecies,Float16) + * @see VectorSpecies#checkValue(long) + */ + @ForceInline + public static HalffloatVector broadcast(VectorSpecies species, long e) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.broadcast(e); + } + + /*package-private*/ + @ForceInline + final HalffloatVector broadcastTemplate(long e) { + return vspecies().broadcast(e); + } + + // Unary lanewise support + + /** + * {@inheritDoc} + */ + public abstract + HalffloatVector lanewise(VectorOperators.Unary op); + + @ForceInline + final + HalffloatVector lanewiseTemplate(VectorOperators.Unary op) { + if (opKind(op, VO_SPECIAL)) { + if (op == ZOMO) { + return blend(broadcast(-1), compare(NE, 0)); + } + } + int opc = opCode(op); + return VectorSupport.unaryOp( + opc, getClass(), null, Float16.class, length(), + this, null, + UN_IMPL.find(op, opc, HalffloatVector::unaryOperations)); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract + HalffloatVector lanewise(VectorOperators.Unary op, + VectorMask m); + @ForceInline + final + HalffloatVector lanewiseTemplate(VectorOperators.Unary op, + Class> maskClass, + VectorMask m) { + m.check(maskClass, this); + if (opKind(op, VO_SPECIAL)) { + if (op == ZOMO) { + return blend(broadcast(-1), compare(NE, 0, m)); + } + } + int opc = opCode(op); + return VectorSupport.unaryOp( + opc, getClass(), maskClass, Float16.class, length(), + this, m, + UN_IMPL.find(op, opc, HalffloatVector::unaryOperations)); + } + + private static final + ImplCache>> + UN_IMPL = new ImplCache<>(Unary.class, HalffloatVector.class); + + private static UnaryOperation> unaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_NEG: return (v0, m) -> + v0.uOp(m, (i, a) -> (Float16) Float16.valueOf(-a.floatValue())); + case VECTOR_OP_ABS: return (v0, m) -> + v0.uOp(m, (i, a) -> (Float16) Float16.abs(a)); + case VECTOR_OP_SIN: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.sin(a.floatValue()))); + case VECTOR_OP_COS: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.cos(a.floatValue()))); + case VECTOR_OP_TAN: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.tan(a.floatValue()))); + case VECTOR_OP_ASIN: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.asin(a.floatValue()))); + case VECTOR_OP_ACOS: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.acos(a.floatValue()))); + case VECTOR_OP_ATAN: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.atan(a.floatValue()))); + case VECTOR_OP_EXP: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.exp(a.floatValue()))); + case VECTOR_OP_LOG: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.log(a.floatValue()))); + case VECTOR_OP_LOG10: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.log10(a.floatValue()))); + case VECTOR_OP_SQRT: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.sqrt(a.floatValue()))); + case VECTOR_OP_CBRT: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.cbrt(a.floatValue()))); + case VECTOR_OP_SINH: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.sinh(a.floatValue()))); + case VECTOR_OP_COSH: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.cosh(a.floatValue()))); + case VECTOR_OP_TANH: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.tanh(a.floatValue()))); + case VECTOR_OP_EXPM1: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.expm1(a.floatValue()))); + case VECTOR_OP_LOG1P: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.log1p(a.floatValue()))); + default: return null; + } + } + + // Binary lanewise support + + /** + * {@inheritDoc} + * @see #lanewise(VectorOperators.Binary,Float16) + * @see #lanewise(VectorOperators.Binary,Float16,VectorMask) + */ + @Override + public abstract + HalffloatVector lanewise(VectorOperators.Binary op, + Vector v); + @ForceInline + final + HalffloatVector lanewiseTemplate(VectorOperators.Binary op, + Vector v) { + HalffloatVector that = (HalffloatVector) v; + that.check(this); + + if (opKind(op, VO_SPECIAL )) { + if (op == FIRST_NONZERO) { + VectorMask mask + = this.viewAsIntegralLanes().compare(EQ, (short) 0); + return this.blend(that, mask.cast(vspecies())); + } + } + + int opc = opCode(op); + return VectorSupport.binaryOp( + opc, getClass(), null, Float16.class, length(), + this, that, null, + BIN_IMPL.find(op, opc, HalffloatVector::binaryOperations)); + } + + /** + * {@inheritDoc} + * @see #lanewise(VectorOperators.Binary,Float16,VectorMask) + */ + @Override + public abstract + HalffloatVector lanewise(VectorOperators.Binary op, + Vector v, + VectorMask m); + @ForceInline + final + HalffloatVector lanewiseTemplate(VectorOperators.Binary op, + Class> maskClass, + Vector v, VectorMask m) { + HalffloatVector that = (HalffloatVector) v; + that.check(this); + m.check(maskClass, this); + + if (opKind(op, VO_SPECIAL )) { + if (op == FIRST_NONZERO) { + ShortVector bits = this.viewAsIntegralLanes(); + VectorMask mask + = bits.compare(EQ, (short) 0, m.cast(bits.vspecies())); + return this.blend(that, mask.cast(vspecies())); + } + } + + int opc = opCode(op); + return VectorSupport.binaryOp( + opc, getClass(), maskClass, Float16.class, length(), + this, that, m, + BIN_IMPL.find(op, opc, HalffloatVector::binaryOperations)); + } + + private static final + ImplCache>> + BIN_IMPL = new ImplCache<>(Binary.class, HalffloatVector.class); + + private static BinaryOperation> binaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.add(a, b)); + case VECTOR_OP_SUB: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.subtract(a, b)); + case VECTOR_OP_MUL: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.multiply(a, b)); + case VECTOR_OP_DIV: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.divide(a, b)); + case VECTOR_OP_MAX: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.max(a, b)); + case VECTOR_OP_MIN: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.min(a, b)); + case VECTOR_OP_ATAN2: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.valueOf(Math.atan2(a.floatValue(), b.floatValue()))); + case VECTOR_OP_POW: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.valueOf(Math.pow(a.floatValue(), b.floatValue()))); + case VECTOR_OP_HYPOT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.valueOf(Math.hypot(a.floatValue(), b.floatValue()))); + default: return null; + } + } + + // FIXME: Maybe all of the public final methods in this file (the + // simple ones that just call lanewise) should be pushed down to + // the X-VectorBits template. They can't optimize properly at + // this level, and must rely on inlining. Does it work? + // (If it works, of course keep the code here.) + + /** + * Combines the lane values of this vector + * with the value of a broadcast scalar. + * + * This is a lane-wise binary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, this.broadcast(e))}. + * + * @param op the operation used to process lane values + * @param e the input scalar + * @return the result of applying the operation lane-wise + * to the two input vectors + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,Float16,VectorMask) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Binary op, + Float16 e) { + return lanewise(op, broadcast(e)); + } + + /** + * Combines the lane values of this vector + * with the value of a broadcast scalar, + * with selection of lane elements controlled by a mask. + * + * This is a masked lane-wise binary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, this.broadcast(e), m)}. + * + * @param op the operation used to process lane values + * @param e the input scalar + * @param m the mask controlling lane selection + * @return the result of applying the operation lane-wise + * to the input vector and the scalar + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) + * @see #lanewise(VectorOperators.Binary,Float16) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Binary op, + Float16 e, + VectorMask m) { + return lanewise(op, broadcast(e), m); + } + + /** + * {@inheritDoc} + * @apiNote + * When working with vector subtypes like {@code HalffloatVector}, + * {@linkplain #lanewise(VectorOperators.Binary,Float16) + * the more strongly typed method} + * is typically selected. It can be explicitly selected + * using a cast: {@code v.lanewise(op,(Float16)e)}. + * The two expressions will produce numerically identical results. + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Binary op, + long e) { + Float16 e1 = Float16.valueOf(e); + if (e1.longValue() != e) { + vspecies().checkValue(e); // for exception + } + return lanewise(op, e1); + } + + /** + * {@inheritDoc} + * @apiNote + * When working with vector subtypes like {@code HalffloatVector}, + * {@linkplain #lanewise(VectorOperators.Binary,Float16,VectorMask) + * the more strongly typed method} + * is typically selected. It can be explicitly selected + * using a cast: {@code v.lanewise(op,(Float16)e,m)}. + * The two expressions will produce numerically identical results. + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Binary op, + long e, VectorMask m) { + Float16 e1 = Float16.valueOf(e); + if (e1.longValue() != e) { + vspecies().checkValue(e); // for exception + } + return lanewise(op, e1, m); + } + + + // Ternary lanewise support + + // Ternary operators come in eight variations: + // lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2]) + // lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask) + + // It is annoying to support all of these variations of masking + // and broadcast, but it would be more surprising not to continue + // the obvious pattern started by unary and binary. + + /** + * {@inheritDoc} + * @see #lanewise(VectorOperators.Ternary,Float16,Float16,VectorMask) + * @see #lanewise(VectorOperators.Ternary,Vector,Float16,VectorMask) + * @see #lanewise(VectorOperators.Ternary,Float16,Vector,VectorMask) + * @see #lanewise(VectorOperators.Ternary,Float16,Float16) + * @see #lanewise(VectorOperators.Ternary,Vector,Float16) + * @see #lanewise(VectorOperators.Ternary,Float16,Vector) + */ + @Override + public abstract + HalffloatVector lanewise(VectorOperators.Ternary op, + Vector v1, + Vector v2); + @ForceInline + final + HalffloatVector lanewiseTemplate(VectorOperators.Ternary op, + Vector v1, + Vector v2) { + HalffloatVector that = (HalffloatVector) v1; + HalffloatVector tother = (HalffloatVector) v2; + // It's a word: https://www.dictionary.com/browse/tother + // See also Chapter 11 of Dickens, Our Mutual Friend: + // "Totherest Governor," replied Mr Riderhood... + that.check(this); + tother.check(this); + int opc = opCode(op); + return VectorSupport.ternaryOp( + opc, getClass(), null, Float16.class, length(), + this, that, tother, null, + TERN_IMPL.find(op, opc, HalffloatVector::ternaryOperations)); + } + + /** + * {@inheritDoc} + * @see #lanewise(VectorOperators.Ternary,Float16,Float16,VectorMask) + * @see #lanewise(VectorOperators.Ternary,Vector,Float16,VectorMask) + * @see #lanewise(VectorOperators.Ternary,Float16,Vector,VectorMask) + */ + @Override + public abstract + HalffloatVector lanewise(VectorOperators.Ternary op, + Vector v1, + Vector v2, + VectorMask m); + @ForceInline + final + HalffloatVector lanewiseTemplate(VectorOperators.Ternary op, + Class> maskClass, + Vector v1, + Vector v2, + VectorMask m) { + HalffloatVector that = (HalffloatVector) v1; + HalffloatVector tother = (HalffloatVector) v2; + // It's a word: https://www.dictionary.com/browse/tother + // See also Chapter 11 of Dickens, Our Mutual Friend: + // "Totherest Governor," replied Mr Riderhood... + that.check(this); + tother.check(this); + m.check(maskClass, this); + + int opc = opCode(op); + return VectorSupport.ternaryOp( + opc, getClass(), maskClass, Float16.class, length(), + this, that, tother, m, + TERN_IMPL.find(op, opc, HalffloatVector::ternaryOperations)); + } + + private static final + ImplCache>> + TERN_IMPL = new ImplCache<>(Ternary.class, HalffloatVector.class); + + private static TernaryOperation> ternaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> v0.tOp(v1_, v2_, m, (i, a, b, c) -> Float16.fma(a, b, c)); + default: return null; + } + } + + /** + * Combines the lane values of this vector + * with the values of two broadcast scalars. + * + * This is a lane-wise ternary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}. + * + * @param op the operation used to combine lane values + * @param e1 the first input scalar + * @param e2 the second input scalar + * @return the result of applying the operation lane-wise + * to the input vector and the scalars + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Ternary,Vector,Vector) + * @see #lanewise(VectorOperators.Ternary,Float16,Float16,VectorMask) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2) + Float16 e1, + Float16 e2) { + return lanewise(op, broadcast(e1), broadcast(e2)); + } + + /** + * Combines the lane values of this vector + * with the values of two broadcast scalars, + * with selection of lane elements controlled by a mask. + * + * This is a masked lane-wise ternary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}. + * + * @param op the operation used to combine lane values + * @param e1 the first input scalar + * @param e2 the second input scalar + * @param m the mask controlling lane selection + * @return the result of applying the operation lane-wise + * to the input vector and the scalars + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) + * @see #lanewise(VectorOperators.Ternary,Float16,Float16) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m) + Float16 e1, + Float16 e2, + VectorMask m) { + return lanewise(op, broadcast(e1), broadcast(e2), m); + } + + /** + * Combines the lane values of this vector + * with the values of another vector and a broadcast scalar. + * + * This is a lane-wise ternary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, v1, this.broadcast(e2))}. + * + * @param op the operation used to combine lane values + * @param v1 the other input vector + * @param e2 the input scalar + * @return the result of applying the operation lane-wise + * to the input vectors and the scalar + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Ternary,Float16,Float16) + * @see #lanewise(VectorOperators.Ternary,Vector,Float16,VectorMask) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2) + Vector v1, + Float16 e2) { + return lanewise(op, v1, broadcast(e2)); + } + + /** + * Combines the lane values of this vector + * with the values of another vector and a broadcast scalar, + * with selection of lane elements controlled by a mask. + * + * This is a masked lane-wise ternary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, v1, this.broadcast(e2), m)}. + * + * @param op the operation used to combine lane values + * @param v1 the other input vector + * @param e2 the input scalar + * @param m the mask controlling lane selection + * @return the result of applying the operation lane-wise + * to the input vectors and the scalar + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Ternary,Vector,Vector) + * @see #lanewise(VectorOperators.Ternary,Float16,Float16,VectorMask) + * @see #lanewise(VectorOperators.Ternary,Vector,Float16) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m) + Vector v1, + Float16 e2, + VectorMask m) { + return lanewise(op, v1, broadcast(e2), m); + } + + /** + * Combines the lane values of this vector + * with the values of another vector and a broadcast scalar. + * + * This is a lane-wise ternary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, this.broadcast(e1), v2)}. + * + * @param op the operation used to combine lane values + * @param e1 the input scalar + * @param v2 the other input vector + * @return the result of applying the operation lane-wise + * to the input vectors and the scalar + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Ternary,Vector,Vector) + * @see #lanewise(VectorOperators.Ternary,Float16,Vector,VectorMask) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2) + Float16 e1, + Vector v2) { + return lanewise(op, broadcast(e1), v2); + } + + /** + * Combines the lane values of this vector + * with the values of another vector and a broadcast scalar, + * with selection of lane elements controlled by a mask. + * + * This is a masked lane-wise ternary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, this.broadcast(e1), v2, m)}. + * + * @param op the operation used to combine lane values + * @param e1 the input scalar + * @param v2 the other input vector + * @param m the mask controlling lane selection + * @return the result of applying the operation lane-wise + * to the input vectors and the scalar + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) + * @see #lanewise(VectorOperators.Ternary,Float16,Vector) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m) + Float16 e1, + Vector v2, + VectorMask m) { + return lanewise(op, broadcast(e1), v2, m); + } + + // (Thus endeth the Great and Mighty Ternary Ogdoad.) + // https://en.wikipedia.org/wiki/Ogdoad + + /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV + // + // These include masked and non-masked versions. + // This subclass adds broadcast (masked or not). + + /** + * {@inheritDoc} + * @see #add(Float16) + */ + @Override + @ForceInline + public final HalffloatVector add(Vector v) { + return lanewise(ADD, v); + } + + /** + * Adds this vector to the broadcast of an input scalar. + * + * This is a lane-wise binary operation which applies + * the primitive addition operation ({@code +}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,Float16) + * lanewise}{@code (}{@link VectorOperators#ADD + * ADD}{@code , e)}. + * + * @param e the input scalar + * @return the result of adding each lane of this vector to the scalar + * @see #add(Vector) + * @see #broadcast(Float16) + * @see #add(Float16,VectorMask) + * @see VectorOperators#ADD + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,Float16) + */ + @ForceInline + public final + HalffloatVector add(Float16 e) { + return lanewise(ADD, e); + } + + /** + * {@inheritDoc} + * @see #add(Float16,VectorMask) + */ + @Override + @ForceInline + public final HalffloatVector add(Vector v, + VectorMask m) { + return lanewise(ADD, v, m); + } + + /** + * Adds this vector to the broadcast of an input scalar, + * selecting lane elements controlled by a mask. + * + * This is a masked lane-wise binary operation which applies + * the primitive addition operation ({@code +}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,Float16,VectorMask) + * lanewise}{@code (}{@link VectorOperators#ADD + * ADD}{@code , s, m)}. + * + * @param e the input scalar + * @param m the mask controlling lane selection + * @return the result of adding each lane of this vector to the scalar + * @see #add(Vector,VectorMask) + * @see #broadcast(Float16) + * @see #add(Float16) + * @see VectorOperators#ADD + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,Float16) + */ + @ForceInline + public final HalffloatVector add(Float16 e, + VectorMask m) { + return lanewise(ADD, e, m); + } + + /** + * {@inheritDoc} + * @see #sub(Float16) + */ + @Override + @ForceInline + public final HalffloatVector sub(Vector v) { + return lanewise(SUB, v); + } + + /** + * Subtracts an input scalar from this vector. + * + * This is a masked lane-wise binary operation which applies + * the primitive subtraction operation ({@code -}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,Float16) + * lanewise}{@code (}{@link VectorOperators#SUB + * SUB}{@code , e)}. + * + * @param e the input scalar + * @return the result of subtracting the scalar from each lane of this vector + * @see #sub(Vector) + * @see #broadcast(Float16) + * @see #sub(Float16,VectorMask) + * @see VectorOperators#SUB + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,Float16) + */ + @ForceInline + public final HalffloatVector sub(Float16 e) { + return lanewise(SUB, e); + } + + /** + * {@inheritDoc} + * @see #sub(Float16,VectorMask) + */ + @Override + @ForceInline + public final HalffloatVector sub(Vector v, + VectorMask m) { + return lanewise(SUB, v, m); + } + + /** + * Subtracts an input scalar from this vector + * under the control of a mask. + * + * This is a masked lane-wise binary operation which applies + * the primitive subtraction operation ({@code -}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,Float16,VectorMask) + * lanewise}{@code (}{@link VectorOperators#SUB + * SUB}{@code , s, m)}. + * + * @param e the input scalar + * @param m the mask controlling lane selection + * @return the result of subtracting the scalar from each lane of this vector + * @see #sub(Vector,VectorMask) + * @see #broadcast(Float16) + * @see #sub(Float16) + * @see VectorOperators#SUB + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,Float16) + */ + @ForceInline + public final HalffloatVector sub(Float16 e, + VectorMask m) { + return lanewise(SUB, e, m); + } + + /** + * {@inheritDoc} + * @see #mul(Float16) + */ + @Override + @ForceInline + public final HalffloatVector mul(Vector v) { + return lanewise(MUL, v); + } + + /** + * Multiplies this vector by the broadcast of an input scalar. + * + * This is a lane-wise binary operation which applies + * the primitive multiplication operation ({@code *}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,Float16) + * lanewise}{@code (}{@link VectorOperators#MUL + * MUL}{@code , e)}. + * + * @param e the input scalar + * @return the result of multiplying this vector by the given scalar + * @see #mul(Vector) + * @see #broadcast(Float16) + * @see #mul(Float16,VectorMask) + * @see VectorOperators#MUL + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,Float16) + */ + @ForceInline + public final HalffloatVector mul(Float16 e) { + return lanewise(MUL, e); + } + + /** + * {@inheritDoc} + * @see #mul(Float16,VectorMask) + */ + @Override + @ForceInline + public final HalffloatVector mul(Vector v, + VectorMask m) { + return lanewise(MUL, v, m); + } + + /** + * Multiplies this vector by the broadcast of an input scalar, + * selecting lane elements controlled by a mask. + * + * This is a masked lane-wise binary operation which applies + * the primitive multiplication operation ({@code *}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,Float16,VectorMask) + * lanewise}{@code (}{@link VectorOperators#MUL + * MUL}{@code , s, m)}. + * + * @param e the input scalar + * @param m the mask controlling lane selection + * @return the result of muling each lane of this vector to the scalar + * @see #mul(Vector,VectorMask) + * @see #broadcast(Float16) + * @see #mul(Float16) + * @see VectorOperators#MUL + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,Float16) + */ + @ForceInline + public final HalffloatVector mul(Float16 e, + VectorMask m) { + return lanewise(MUL, e, m); + } + + /** + * {@inheritDoc} + * @apiNote Because the underlying scalar operator is an IEEE + * floating point number, division by zero in fact will + * not throw an exception, but will yield a signed + * infinity or NaN. + */ + @Override + @ForceInline + public final HalffloatVector div(Vector v) { + return lanewise(DIV, v); + } + + /** + * Divides this vector by the broadcast of an input scalar. + * + * This is a lane-wise binary operation which applies + * the primitive division operation ({@code /}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,Float16) + * lanewise}{@code (}{@link VectorOperators#DIV + * DIV}{@code , e)}. + * + * @apiNote Because the underlying scalar operator is an IEEE + * floating point number, division by zero in fact will + * not throw an exception, but will yield a signed + * infinity or NaN. + * + * @param e the input scalar + * @return the result of dividing each lane of this vector by the scalar + * @see #div(Vector) + * @see #broadcast(Float16) + * @see #div(Float16,VectorMask) + * @see VectorOperators#DIV + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,Float16) + */ + @ForceInline + public final HalffloatVector div(Float16 e) { + return lanewise(DIV, e); + } + + /** + * {@inheritDoc} + * @see #div(Float16,VectorMask) + * @apiNote Because the underlying scalar operator is an IEEE + * floating point number, division by zero in fact will + * not throw an exception, but will yield a signed + * infinity or NaN. + */ + @Override + @ForceInline + public final HalffloatVector div(Vector v, + VectorMask m) { + return lanewise(DIV, v, m); + } + + /** + * Divides this vector by the broadcast of an input scalar, + * selecting lane elements controlled by a mask. + * + * This is a masked lane-wise binary operation which applies + * the primitive division operation ({@code /}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,Float16,VectorMask) + * lanewise}{@code (}{@link VectorOperators#DIV + * DIV}{@code , s, m)}. + * + * @apiNote Because the underlying scalar operator is an IEEE + * floating point number, division by zero in fact will + * not throw an exception, but will yield a signed + * infinity or NaN. + * + * @param e the input scalar + * @param m the mask controlling lane selection + * @return the result of dividing each lane of this vector by the scalar + * @see #div(Vector,VectorMask) + * @see #broadcast(Float16) + * @see #div(Float16) + * @see VectorOperators#DIV + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,Float16) + */ + @ForceInline + public final HalffloatVector div(Float16 e, + VectorMask m) { + return lanewise(DIV, e, m); + } + + /// END OF FULL-SERVICE BINARY METHODS + + /// SECOND-TIER BINARY METHODS + // + // There are no masked versions. + + /** + * {@inheritDoc} + * @apiNote + * For this method, floating point negative + * zero {@code -0.0} is treated as a value distinct from, and less + * than the default value (positive zero). + */ + @Override + @ForceInline + public final HalffloatVector min(Vector v) { + return lanewise(MIN, v); + } + + // FIXME: "broadcast of an input scalar" is really wordy. Reduce? + /** + * Computes the smaller of this vector and the broadcast of an input scalar. + * + * This is a lane-wise binary operation which applies the + * operation {@code Math.min()} to each pair of + * corresponding lane values. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,Float16) + * lanewise}{@code (}{@link VectorOperators#MIN + * MIN}{@code , e)}. + * + * @param e the input scalar + * @return the result of multiplying this vector by the given scalar + * @see #min(Vector) + * @see #broadcast(Float16) + * @see VectorOperators#MIN + * @see #lanewise(VectorOperators.Binary,Float16,VectorMask) + * @apiNote + * For this method, floating point negative + * zero {@code -0.0} is treated as a value distinct from, and less + * than the default value (positive zero). + */ + @ForceInline + public final HalffloatVector min(Float16 e) { + return lanewise(MIN, e); + } + + /** + * {@inheritDoc} + * @apiNote + * For this method, floating point negative + * zero {@code -0.0} is treated as a value distinct from, and less + * than the default value (positive zero). + */ + @Override + @ForceInline + public final HalffloatVector max(Vector v) { + return lanewise(MAX, v); + } + + /** + * Computes the larger of this vector and the broadcast of an input scalar. + * + * This is a lane-wise binary operation which applies the + * operation {@code Math.max()} to each pair of + * corresponding lane values. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,Float16) + * lanewise}{@code (}{@link VectorOperators#MAX + * MAX}{@code , e)}. + * + * @param e the input scalar + * @return the result of multiplying this vector by the given scalar + * @see #max(Vector) + * @see #broadcast(Float16) + * @see VectorOperators#MAX + * @see #lanewise(VectorOperators.Binary,Float16,VectorMask) + * @apiNote + * For this method, floating point negative + * zero {@code -0.0} is treated as a value distinct from, and less + * than the default value (positive zero). + */ + @ForceInline + public final HalffloatVector max(Float16 e) { + return lanewise(MAX, e); + } + + + // common FP operator: pow + /** + * Raises this vector to the power of a second input vector. + * + * This is a lane-wise binary operation which applies an operation + * conforming to the specification of + * {@link Math#pow Math.pow(a,b)} + * to each pair of corresponding lane values. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,Vector) + * lanewise}{@code (}{@link VectorOperators#POW + * POW}{@code , b)}. + * + *

+ * This is not a full-service named operation like + * {@link #add(Vector) add}. A masked version of + * this operation is not directly available + * but may be obtained via the masked version of + * {@code lanewise}. + * + * @param b a vector exponent by which to raise this vector + * @return the {@code b}-th power of this vector + * @see #pow(Float16) + * @see VectorOperators#POW + * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) + */ + @ForceInline + public final HalffloatVector pow(Vector b) { + return lanewise(POW, b); + } + + /** + * Raises this vector to a scalar power. + * + * This is a lane-wise binary operation which applies an operation + * conforming to the specification of + * {@link Math#pow Math.pow(a,b)} + * to each pair of corresponding lane values. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,Vector) + * lanewise}{@code (}{@link VectorOperators#POW + * POW}{@code , b)}. + * + * @param b a scalar exponent by which to raise this vector + * @return the {@code b}-th power of this vector + * @see #pow(Vector) + * @see VectorOperators#POW + * @see #lanewise(VectorOperators.Binary,Float16,VectorMask) + */ + @ForceInline + public final HalffloatVector pow(Float16 b) { + return lanewise(POW, b); + } + + /// UNARY METHODS + + /** + * {@inheritDoc} + */ + @Override + @ForceInline + public final + HalffloatVector neg() { + return lanewise(NEG); + } + + /** + * {@inheritDoc} + */ + @Override + @ForceInline + public final + HalffloatVector abs() { + return lanewise(ABS); + } + + + + // sqrt + /** + * Computes the square root of this vector. + * + * This is a lane-wise unary operation which applies an operation + * conforming to the specification of + * {@link Math#sqrt Math.sqrt(a)} + * to each lane value. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Unary) + * lanewise}{@code (}{@link VectorOperators#SQRT + * SQRT}{@code )}. + * + * @return the square root of this vector + * @see VectorOperators#SQRT + * @see #lanewise(VectorOperators.Unary,VectorMask) + */ + @ForceInline + public final HalffloatVector sqrt() { + return lanewise(SQRT); + } + + /// COMPARISONS + + /** + * {@inheritDoc} + */ + @Override + @ForceInline + public final + VectorMask eq(Vector v) { + return compare(EQ, v); + } + + /** + * Tests if this vector is equal to an input scalar. + * + * This is a lane-wise binary test operation which applies + * the primitive equals operation ({@code ==}) to each lane. + * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}. + * + * @param e the input scalar + * @return the result mask of testing if this vector + * is equal to {@code e} + * @see #compare(VectorOperators.Comparison,Float16) + */ + @ForceInline + public final + VectorMask eq(Float16 e) { + return compare(EQ, e); + } + + /** + * {@inheritDoc} + */ + @Override + @ForceInline + public final + VectorMask lt(Vector v) { + return compare(LT, v); + } + + /** + * Tests if this vector is less than an input scalar. + * + * This is a lane-wise binary test operation which applies + * the primitive less than operation ({@code <}) to each lane. + * The result is the same as {@code compare(VectorOperators.LT, e)}. + * + * @param e the input scalar + * @return the mask result of testing if this vector + * is less than the input scalar + * @see #compare(VectorOperators.Comparison,Float16) + */ + @ForceInline + public final + VectorMask lt(Float16 e) { + return compare(LT, e); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract + VectorMask test(VectorOperators.Test op); + + /*package-private*/ + @ForceInline + final + > + M testTemplate(Class maskType, Test op) { + HalffloatSpecies vsp = vspecies(); + if (opKind(op, VO_SPECIAL)) { + ShortVector bits = this.viewAsIntegralLanes(); + VectorMask m; + if (op == IS_DEFAULT) { + m = bits.compare(EQ, (short) 0); + } else if (op == IS_NEGATIVE) { + m = bits.compare(LT, (short) 0); + } + else if (op == IS_FINITE || + op == IS_NAN || + op == IS_INFINITE) { + // first kill the sign: + bits = bits.and(Short.MAX_VALUE); + // next find the bit pattern for infinity: + short infbits = (short) toBits(Float16.POSITIVE_INFINITY); + // now compare: + if (op == IS_FINITE) { + m = bits.compare(LT, infbits); + } else if (op == IS_NAN) { + m = bits.compare(GT, infbits); + } else { + m = bits.compare(EQ, infbits); + } + } + else { + throw new AssertionError(op); + } + return maskType.cast(m.cast(vsp)); + } + int opc = opCode(op); + throw new AssertionError(op); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract + VectorMask test(VectorOperators.Test op, + VectorMask m); + + /*package-private*/ + @ForceInline + final + > + M testTemplate(Class maskType, Test op, M mask) { + HalffloatSpecies vsp = vspecies(); + mask.check(maskType, this); + if (opKind(op, VO_SPECIAL)) { + ShortVector bits = this.viewAsIntegralLanes(); + VectorMask m = mask.cast(ShortVector.species(shape())); + if (op == IS_DEFAULT) { + m = bits.compare(EQ, (short) 0, m); + } else if (op == IS_NEGATIVE) { + m = bits.compare(LT, (short) 0, m); + } + else if (op == IS_FINITE || + op == IS_NAN || + op == IS_INFINITE) { + // first kill the sign: + bits = bits.and(Short.MAX_VALUE); + // next find the bit pattern for infinity: + short infbits = (short) toBits(Float16.POSITIVE_INFINITY); + // now compare: + if (op == IS_FINITE) { + m = bits.compare(LT, infbits, m); + } else if (op == IS_NAN) { + m = bits.compare(GT, infbits, m); + } else { + m = bits.compare(EQ, infbits, m); + } + } + else { + throw new AssertionError(op); + } + return maskType.cast(m.cast(vsp)); + } + int opc = opCode(op); + throw new AssertionError(op); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract + VectorMask compare(VectorOperators.Comparison op, Vector v); + + /*package-private*/ + @ForceInline + final + > + M compareTemplate(Class maskType, Comparison op, Vector v) { + HalffloatVector that = (HalffloatVector) v; + that.check(this); + int opc = opCode(op); + return VectorSupport.compare( + opc, getClass(), maskType, Float16.class, length(), + this, that, null, + (cond, v0, v1, m1) -> { + AbstractMask m + = v0.bTest(cond, v1, (cond_, i, a, b) + -> compareWithOp(cond, a, b)); + @SuppressWarnings("unchecked") + M m2 = (M) m; + return m2; + }); + } + + /*package-private*/ + @ForceInline + final + > + M compareTemplate(Class maskType, Comparison op, Vector v, M m) { + HalffloatVector that = (HalffloatVector) v; + that.check(this); + m.check(maskType, this); + int opc = opCode(op); + return VectorSupport.compare( + opc, getClass(), maskType, Float16.class, length(), + this, that, m, + (cond, v0, v1, m1) -> { + AbstractMask cmpM + = v0.bTest(cond, v1, (cond_, i, a, b) + -> compareWithOp(cond, a, b)); + @SuppressWarnings("unchecked") + M m2 = (M) cmpM.and(m1); + return m2; + }); + } + + @ForceInline + private static boolean compareWithOp(int cond, Float16 a, Float16 b) { + return switch (cond) { + case BT_eq -> a == b; + case BT_ne -> a != b; + case BT_lt -> a.floatValue() < b.floatValue(); + case BT_le -> a.floatValue() <= b.floatValue(); + case BT_gt -> a.floatValue() > b.floatValue(); + case BT_ge -> a.floatValue() >= b.floatValue(); + default -> throw new AssertionError(); + }; + } + + /** + * Tests this vector by comparing it with an input scalar, + * according to the given comparison operation. + * + * This is a lane-wise binary test operation which applies + * the comparison operation to each lane. + *

+ * The result is the same as + * {@code compare(op, broadcast(species(), e))}. + * That is, the scalar may be regarded as broadcast to + * a vector of the same species, and then compared + * against the original vector, using the selected + * comparison operation. + * + * @param op the operation used to compare lane values + * @param e the input scalar + * @return the mask result of testing lane-wise if this vector + * compares to the input, according to the selected + * comparison operator + * @see HalffloatVector#compare(VectorOperators.Comparison,Vector) + * @see #eq(Float16) + * @see #lt(Float16) + */ + public abstract + VectorMask compare(Comparison op, Float16 e); + + /*package-private*/ + @ForceInline + final + > + M compareTemplate(Class maskType, Comparison op, Float16 e) { + return compareTemplate(maskType, op, broadcast(e)); + } + + /** + * Tests this vector by comparing it with an input scalar, + * according to the given comparison operation, + * in lanes selected by a mask. + * + * This is a masked lane-wise binary test operation which applies + * to each pair of corresponding lane values. + * + * The returned result is equal to the expression + * {@code compare(op,s).and(m)}. + * + * @param op the operation used to compare lane values + * @param e the input scalar + * @param m the mask controlling lane selection + * @return the mask result of testing lane-wise if this vector + * compares to the input, according to the selected + * comparison operator, + * and only in the lanes selected by the mask + * @see HalffloatVector#compare(VectorOperators.Comparison,Vector,VectorMask) + */ + @ForceInline + public final VectorMask compare(VectorOperators.Comparison op, + Float16 e, + VectorMask m) { + return compare(op, broadcast(e), m); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract + VectorMask compare(Comparison op, long e); + + /*package-private*/ + @ForceInline + final + > + M compareTemplate(Class maskType, Comparison op, long e) { + return compareTemplate(maskType, op, broadcast(e)); + } + + /** + * {@inheritDoc} + */ + @Override + @ForceInline + public final + VectorMask compare(Comparison op, long e, VectorMask m) { + return compare(op, broadcast(e), m); + } + + + + /** + * {@inheritDoc} + */ + @Override public abstract + HalffloatVector blend(Vector v, VectorMask m); + + /*package-private*/ + @ForceInline + final + > + HalffloatVector + blendTemplate(Class maskType, HalffloatVector v, M m) { + v.check(this); + return VectorSupport.blend( + getClass(), maskType, Float16.class, length(), + this, v, m, + (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b)); + } + + /** + * {@inheritDoc} + */ + @Override public abstract HalffloatVector addIndex(int scale); + + /*package-private*/ + @ForceInline + final HalffloatVector addIndexTemplate(int scale) { + HalffloatSpecies vsp = vspecies(); + // make sure VLENGTH*scale doesn't overflow: + vsp.checkScale(scale); + return VectorSupport.indexVector( + getClass(), Float16.class, length(), + this, scale, vsp, + (v, scale_, s) + -> { + // If the platform doesn't support an INDEX + // instruction directly, load IOTA from memory + // and multiply. + HalffloatVector iota = s.iota(); + return v.add(scale_ == 1 ? iota : iota.mul(Float16.valueOf(scale_))); + }); + } + + /** + * Replaces selected lanes of this vector with + * a scalar value + * under the control of a mask. + * + * This is a masked lane-wise binary operation which + * selects each lane value from one or the other input. + * + * The returned result is equal to the expression + * {@code blend(broadcast(e),m)}. + * + * @param e the input scalar, containing the replacement lane value + * @param m the mask controlling lane selection of the scalar + * @return the result of blending the lane elements of this vector with + * the scalar value + */ + @ForceInline + public final HalffloatVector blend(Float16 e, + VectorMask m) { + return blend(broadcast(e), m); + } + + /** + * Replaces selected lanes of this vector with + * a scalar value + * under the control of a mask. + * + * This is a masked lane-wise binary operation which + * selects each lane value from one or the other input. + * + * The returned result is equal to the expression + * {@code blend(broadcast(e),m)}. + * + * @param e the input scalar, containing the replacement lane value + * @param m the mask controlling lane selection of the scalar + * @return the result of blending the lane elements of this vector with + * the scalar value + */ + @ForceInline + public final HalffloatVector blend(long e, + VectorMask m) { + return blend(broadcast(e), m); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract + HalffloatVector slice(int origin, Vector v1); + + /*package-private*/ + final + @ForceInline + HalffloatVector sliceTemplate(int origin, Vector v1) { + HalffloatVector that = (HalffloatVector) v1; + that.check(this); + Objects.checkIndex(origin, length() + 1); + VectorShuffle iota = iotaShuffle(); + Float16 pivotidx = Float16.valueOf(length() - origin); + VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); + iota = iotaShuffle(origin, 1, true); + return that.rearrange(iota).blend(this.rearrange(iota), blendMask); + } + + /** + * {@inheritDoc} + */ + @Override + @ForceInline + public final + HalffloatVector slice(int origin, + Vector w, + VectorMask m) { + return broadcast(0).blend(slice(origin, w), m); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract + HalffloatVector slice(int origin); + + /*package-private*/ + final + @ForceInline + HalffloatVector sliceTemplate(int origin) { + Objects.checkIndex(origin, length() + 1); + VectorShuffle iota = iotaShuffle(); + Float16 pivotidx = Float16.valueOf(length() - origin); + VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); + iota = iotaShuffle(origin, 1, true); + return vspecies().zero().blend(this.rearrange(iota), blendMask); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract + HalffloatVector unslice(int origin, Vector w, int part); + + /*package-private*/ + final + @ForceInline + HalffloatVector + unsliceTemplate(int origin, Vector w, int part) { + HalffloatVector that = (HalffloatVector) w; + that.check(this); + Objects.checkIndex(origin, length() + 1); + VectorShuffle iota = iotaShuffle(); + VectorMask blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, + (broadcast(Float16.valueOf(origin)))); + iota = iotaShuffle(-origin, 1, true); + return that.blend(this.rearrange(iota), blendMask); + } + + /*package-private*/ + final + @ForceInline + > + HalffloatVector + unsliceTemplate(Class maskType, int origin, Vector w, int part, M m) { + HalffloatVector that = (HalffloatVector) w; + that.check(this); + HalffloatVector slice = that.sliceTemplate(origin, that); + slice = slice.blendTemplate(maskType, this, m); + return slice.unsliceTemplate(origin, w, part); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract + HalffloatVector unslice(int origin, Vector w, int part, VectorMask m); + + /** + * {@inheritDoc} + */ + @Override + public abstract + HalffloatVector unslice(int origin); + + /*package-private*/ + final + @ForceInline + HalffloatVector + unsliceTemplate(int origin) { + Objects.checkIndex(origin, length() + 1); + VectorShuffle iota = iotaShuffle(); + VectorMask blendMask = iota.toVector().compare(VectorOperators.GE, + broadcast(Float16.valueOf(origin))); + iota = iotaShuffle(-origin, 1, true); + return vspecies().zero().blend(this.rearrange(iota), blendMask); + } + + private ArrayIndexOutOfBoundsException + wrongPartForSlice(int part) { + String msg = String.format("bad part number %d for slice operation", + part); + return new ArrayIndexOutOfBoundsException(msg); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract + HalffloatVector rearrange(VectorShuffle m); + + /*package-private*/ + @ForceInline + final + > + HalffloatVector rearrangeTemplate(Class shuffletype, S shuffle) { + shuffle.checkIndexes(); + return VectorSupport.rearrangeOp( + getClass(), shuffletype, null, Float16.class, length(), + this, shuffle, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { + int ei = s_.laneSource(i); + return v1.lane(ei); + })); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract + HalffloatVector rearrange(VectorShuffle s, + VectorMask m); + + /*package-private*/ + @ForceInline + final + , M extends VectorMask> + HalffloatVector rearrangeTemplate(Class shuffletype, + Class masktype, + S shuffle, + M m) { + + m.check(masktype, this); + VectorMask valid = shuffle.laneIsValid(); + if (m.andNot(valid).anyTrue()) { + shuffle.checkIndexes(); + throw new AssertionError(); + } + return VectorSupport.rearrangeOp( + getClass(), shuffletype, masktype, Float16.class, length(), + this, shuffle, m, + (v1, s_, m_) -> v1.uOp((i, a) -> { + int ei = s_.laneSource(i); + return ei < 0 || !m_.laneIsSet(i) ? Float16.valueOf(0) : v1.lane(ei); + })); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract + HalffloatVector rearrange(VectorShuffle s, + Vector v); + + /*package-private*/ + @ForceInline + final + > + HalffloatVector rearrangeTemplate(Class shuffletype, + S shuffle, + HalffloatVector v) { + VectorMask valid = shuffle.laneIsValid(); + @SuppressWarnings("unchecked") + S ws = (S) shuffle.wrapIndexes(); + HalffloatVector r0 = + VectorSupport.rearrangeOp( + getClass(), shuffletype, null, Float16.class, length(), + this, ws, null, + (v0, s_, m_) -> v0.uOp((i, a) -> { + int ei = s_.laneSource(i); + return v0.lane(ei); + })); + HalffloatVector r1 = + VectorSupport.rearrangeOp( + getClass(), shuffletype, null, Float16.class, length(), + v, ws, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { + int ei = s_.laneSource(i); + return v1.lane(ei); + })); + return r1.blend(r0, valid); + } + + @ForceInline + private final + VectorShuffle toShuffle0(HalffloatSpecies dsp) { + Float16[] a = toArray(); + int[] sa = new int[a.length]; + for (int i = 0; i < a.length; i++) { + sa[i] = a[i].intValue(); + } + return VectorShuffle.fromArray(dsp, sa, 0); + } + + /*package-private*/ + @ForceInline + final + VectorShuffle toShuffleTemplate(Class shuffleType) { + HalffloatSpecies vsp = vspecies(); + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + getClass(), Float16.class, length(), + shuffleType, byte.class, length(), + this, vsp, + HalffloatVector::toShuffle0); + } + + /** + * {@inheritDoc} + * @since 19 + */ + @Override + public abstract + HalffloatVector compress(VectorMask m); + + /*package-private*/ + @ForceInline + final + > + HalffloatVector compressTemplate(Class masktype, M m) { + m.check(masktype, this); + return (HalffloatVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype, + Float16.class, length(), this, m, + (v1, m1) -> compressHelper(v1, m1)); + } + + /** + * {@inheritDoc} + * @since 19 + */ + @Override + public abstract + HalffloatVector expand(VectorMask m); + + /*package-private*/ + @ForceInline + final + > + HalffloatVector expandTemplate(Class masktype, M m) { + m.check(masktype, this); + return (HalffloatVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype, + Float16.class, length(), this, m, + (v1, m1) -> expandHelper(v1, m1)); + } + + + /** + * {@inheritDoc} + */ + @Override + public abstract + HalffloatVector selectFrom(Vector v); + + /*package-private*/ + @ForceInline + final HalffloatVector selectFromTemplate(HalffloatVector v) { + return v.rearrange(this.toShuffle()); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract + HalffloatVector selectFrom(Vector s, VectorMask m); + + /*package-private*/ + @ForceInline + final HalffloatVector selectFromTemplate(HalffloatVector v, + AbstractMask m) { + return v.rearrange(this.toShuffle(), m); + } + + /// Ternary operations + + + /** + * Multiplies this vector by a second input vector, and sums + * the result with a third. + * + * Extended precision is used for the intermediate result, + * avoiding possible loss of precision from rounding once + * for each of the two operations. + * The result is numerically close to {@code this.mul(b).add(c)}, + * and is typically closer to the true mathematical result. + * + * This is a lane-wise ternary operation which applies an operation + * conforming to the specification of + * {@link Float16#fma(Float16,Float16,Float16) Float16.fma(a,b,c)} + * to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) + * lanewise}{@code (}{@link VectorOperators#FMA + * FMA}{@code , b, c)}. + * + * @param b the second input vector, supplying multiplier values + * @param c the third input vector, supplying addend values + * @return the product of this vector and the second input vector + * summed with the third input vector, using extended precision + * for the intermediate result + * @see #fma(Float16,Float16) + * @see VectorOperators#FMA + * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) + */ + @ForceInline + public final + HalffloatVector fma(Vector b, Vector c) { + return lanewise(FMA, b, c); + } + + /** + * Multiplies this vector by a scalar multiplier, and sums + * the result with a scalar addend. + * + * Extended precision is used for the intermediate result, + * avoiding possible loss of precision from rounding once + * for each of the two operations. + * The result is numerically close to {@code this.mul(b).add(c)}, + * and is typically closer to the true mathematical result. + * + * This is a lane-wise ternary operation which applies an operation + * conforming to the specification of + * {@link Float16#fma(Float16,Float16,Float16) Float16.fma(a,b,c)} + * to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) + * lanewise}{@code (}{@link VectorOperators#FMA + * FMA}{@code , b, c)}. + * + * @param b the scalar multiplier + * @param c the scalar addend + * @return the product of this vector and the scalar multiplier + * summed with scalar addend, using extended precision + * for the intermediate result + * @see #fma(Vector,Vector) + * @see VectorOperators#FMA + * @see #lanewise(VectorOperators.Ternary,Float16,Float16,VectorMask) + */ + @ForceInline + public final + HalffloatVector fma(Float16 b, Float16 c) { + return lanewise(FMA, b, c); + } + + // Don't bother with (Vector,Float16) and (Float16,Vector) overloadings. + + // Type specific horizontal reductions + + /** + * Returns a value accumulated from all the lanes of this vector. + * + * This is an associative cross-lane reduction operation which + * applies the specified operation to all the lane elements. + *

+ * A few reduction operations do not support arbitrary reordering + * of their operands, yet are included here because of their + * usefulness. + *

+ * + * @param op the operation used to combine lane values + * @return the accumulated result + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #reduceLanes(VectorOperators.Associative,VectorMask) + * @see #add(Vector) + * @see #mul(Vector) + * @see #min(Vector) + * @see #max(Vector) + * @see VectorOperators#FIRST_NONZERO + */ + public abstract Float16 reduceLanes(VectorOperators.Associative op); + + /** + * Returns a value accumulated from selected lanes of this vector, + * controlled by a mask. + * + * This is an associative cross-lane reduction operation which + * applies the specified operation to the selected lane elements. + *

+ * If no elements are selected, an operation-specific identity + * value is returned. + *

    + *
  • + * If the operation is + * {@code ADD} + * or {@code FIRST_NONZERO}, + * then the identity value is positive zero, the default {@code Float16} value. + *
  • + * If the operation is {@code MUL}, + * then the identity value is one. + *
  • + * If the operation is {@code MAX}, + * then the identity value is {@code Float16.NEGATIVE_INFINITY}. + *
  • + * If the operation is {@code MIN}, + * then the identity value is {@code Float16.POSITIVE_INFINITY}. + *
+ *

+ * A few reduction operations do not support arbitrary reordering + * of their operands, yet are included here because of their + * usefulness. + *

    + *
  • + * In the case of {@code FIRST_NONZERO}, the reduction returns + * the value from the lowest-numbered non-zero lane. + * (As with {@code MAX} and {@code MIN}, floating point negative + * zero {@code -0.0} is treated as a value distinct from + * the default value, positive zero. So a first-nonzero lane reduction + * might return {@code -0.0} even in the presence of non-zero + * lane values.) + *
  • + * In the case of {@code ADD} and {@code MUL}, the + * precise result will reflect the choice of an arbitrary order + * of operations, which may even vary over time. + * For further details see the section + * Operations on floating point vectors. + *
  • + * All other reduction operations are fully commutative and + * associative. The implementation can choose any order of + * processing, yet it will always produce the same result. + *
+ * + * @param op the operation used to combine lane values + * @param m the mask controlling lane selection + * @return the reduced result accumulated from the selected lane values + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #reduceLanes(VectorOperators.Associative) + */ + public abstract Float16 reduceLanes(VectorOperators.Associative op, + VectorMask m); + + /*package-private*/ + @ForceInline + final + Float16 reduceLanesTemplate(VectorOperators.Associative op, + Class> maskClass, + VectorMask m) { + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + // FIXME: The JIT should handle this. + HalffloatVector v = broadcast(Float16.valueOf(0)).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, Float16.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, HalffloatVector::reductionOperations))); + } + + /*package-private*/ + @ForceInline + final + Float16 reduceLanesTemplate(VectorOperators.Associative op) { + if (op == FIRST_NONZERO) { + // FIXME: The JIT should handle this. + VectorMask thisNZ + = this.viewAsIntegralLanes().compare(NE, (short) 0); + int ft = thisNZ.firstTrue(); + return ft < length() ? this.lane(ft) : Float16.valueOf(0); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), null, Float16.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, HalffloatVector::reductionOperations))); + } + + private static final + ImplCache>> + REDUCE_IMPL = new ImplCache<>(Associative.class, HalffloatVector.class); + + private static ReductionOperation> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.add(a, b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.multiply(a, b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.max(a, b))); + default: return null; + } + } + + private static final Float16 MIN_OR_INF = Float16.NEGATIVE_INFINITY; + private static final Float16 MAX_OR_INF = Float16.POSITIVE_INFINITY; + + public @Override abstract long reduceLanesToLong(VectorOperators.Associative op); + public @Override abstract long reduceLanesToLong(VectorOperators.Associative op, + VectorMask m); + + // Type specific accessors + + /** + * Gets the lane element at lane index {@code i} + * + * @param i the lane index + * @return the lane element at lane index {@code i} + * @throws IllegalArgumentException if the index is out of range + * ({@code < 0 || >= length()}) + */ + public abstract Float16 lane(int i); + + /** + * Replaces the lane element of this vector at lane index {@code i} with + * value {@code e}. + * + * This is a cross-lane operation and behaves as if it returns the result + * of blending this vector with an input vector that is the result of + * broadcasting {@code e} and a mask that has only one lane set at lane + * index {@code i}. + * + * @param i the lane index of the lane element to be replaced + * @param e the value to be placed + * @return the result of replacing the lane element of this vector at lane + * index {@code i} with value {@code e}. + * @throws IllegalArgumentException if the index is out of range + * ({@code < 0 || >= length()}) + */ + public abstract HalffloatVector withLane(int i, Float16 e); + + // Memory load operations + + /** + * Returns an array of type {@code Float16[]} + * containing all the lane values. + * The array length is the same as the vector length. + * The array elements are stored in lane order. + *

+ * This method behaves as if it stores + * this vector into an allocated array + * (using {@link #intoArray(Float16[], int) intoArray}) + * and returns the array as follows: + *

{@code
+     *   Float16[] a = new Float16[this.length()];
+     *   this.intoArray(a, 0);
+     *   return a;
+     * }
+ * + * @return an array containing the lane values of this vector + */ + @ForceInline + @Override + public final Float16[] toArray() { + Float16[] a = new Float16[vspecies().laneCount()]; + intoArray(a, 0); + return a; + } + + /** {@inheritDoc} + */ + @ForceInline + @Override + public final int[] toIntArray() { + Float16[] a = toArray(); + int[] res = new int[a.length]; + for (int i = 0; i < a.length; i++) { + Float16 e = a[i]; + res[i] = (int) HalffloatSpecies.toIntegralChecked(e, true); + } + return res; + } + + /** {@inheritDoc} + */ + @ForceInline + @Override + public final long[] toLongArray() { + Float16[] a = toArray(); + long[] res = new long[a.length]; + for (int i = 0; i < a.length; i++) { + // Value range of integral casted Float16 value is a proper subset of + // long value range. + res[i] = a[i].longValue(); + } + return res; + } + + /** {@inheritDoc} + * @implNote + * When this method is used on used on vectors + * of type {@code HalffloatVector}, + * there will be no loss of precision. + */ + @ForceInline + @Override + public final double[] toDoubleArray() { + Float16[] a = toArray(); + double[] res = new double[a.length]; + for (int i = 0; i < a.length; i++) { + res[i] = a[i].doubleValue(); + } + return res; + } + + /** + * Loads a vector from an array of type {@code Float16[]} + * starting at an offset. + * For each vector lane, where {@code N} is the vector lane index, the + * array element at index {@code offset + N} is placed into the + * resulting vector at lane index {@code N}. + * + * @param species species of desired vector + * @param a the array + * @param offset the offset into the array + * @return the vector loaded from an array + * @throws IndexOutOfBoundsException + * if {@code offset+N < 0} or {@code offset+N >= a.length} + * for any lane {@code N} in the vector + */ + @ForceInline + public static + HalffloatVector fromArray(VectorSpecies species, + Float16[] a, int offset) { + offset = checkFromIndexSize(offset, species.length(), a.length); + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.dummyVector().fromArray0(a, offset); + } + + /** + * Loads a vector from an array of type {@code Float16[]} + * starting at an offset and using a mask. + * Lanes where the mask is unset are filled with the default + * value of {@code Float16} (positive zero). + * For each vector lane, where {@code N} is the vector lane index, + * if the mask lane at index {@code N} is set then the array element at + * index {@code offset + N} is placed into the resulting vector at lane index + * {@code N}, otherwise the default element value is placed into the + * resulting vector at lane index {@code N}. + * + * @param species species of desired vector + * @param a the array + * @param offset the offset into the array + * @param m the mask controlling lane selection + * @return the vector loaded from an array + * @throws IndexOutOfBoundsException + * if {@code offset+N < 0} or {@code offset+N >= a.length} + * for any lane {@code N} in the vector + * where the mask is set + */ + @ForceInline + public static + HalffloatVector fromArray(VectorSpecies species, + Float16[] a, int offset, + VectorMask m) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + if (VectorIntrinsics.indexInRange(offset, vsp.length(), a.length)) { + return vsp.dummyVector().fromArray0(a, offset, m, OFFSET_IN_RANGE); + } + + checkMaskFromIndexSize(offset, vsp, m, 1, a.length); + return vsp.dummyVector().fromArray0(a, offset, m, OFFSET_OUT_OF_RANGE); + } + + /** + * Gathers a new vector composed of elements from an array of type + * {@code Float16[]}, + * using indexes obtained by adding a fixed {@code offset} to a + * series of secondary offsets from an index map. + * The index map is a contiguous sequence of {@code VLENGTH} + * elements in a second array of {@code int}s, starting at a given + * {@code mapOffset}. + *

+ * For each vector lane, where {@code N} is the vector lane index, + * the lane is loaded from the array + * element {@code a[f(N)]}, where {@code f(N)} is the + * index mapping expression + * {@code offset + indexMap[mapOffset + N]]}. + * + * @param species species of desired vector + * @param a the array + * @param offset the offset into the array, may be negative if relative + * indexes in the index map compensate to produce a value within the + * array bounds + * @param indexMap the index map + * @param mapOffset the offset into the index map + * @return the vector loaded from the indexed elements of the array + * @throws IndexOutOfBoundsException + * if {@code mapOffset+N < 0} + * or if {@code mapOffset+N >= indexMap.length}, + * or if {@code f(N)=offset+indexMap[mapOffset+N]} + * is an invalid index into {@code a}, + * for any lane {@code N} in the vector + * @see HalffloatVector#toIntArray() + */ + @ForceInline + public static + HalffloatVector fromArray(VectorSpecies species, + Float16[] a, int offset, + int[] indexMap, int mapOffset) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]); + } + + /** + * Gathers a new vector composed of elements from an array of type + * {@code Float16[]}, + * under the control of a mask, and + * using indexes obtained by adding a fixed {@code offset} to a + * series of secondary offsets from an index map. + * The index map is a contiguous sequence of {@code VLENGTH} + * elements in a second array of {@code int}s, starting at a given + * {@code mapOffset}. + *

+ * For each vector lane, where {@code N} is the vector lane index, + * if the lane is set in the mask, + * the lane is loaded from the array + * element {@code a[f(N)]}, where {@code f(N)} is the + * index mapping expression + * {@code offset + indexMap[mapOffset + N]]}. + * Unset lanes in the resulting vector are set to zero. + * + * @param species species of desired vector + * @param a the array + * @param offset the offset into the array, may be negative if relative + * indexes in the index map compensate to produce a value within the + * array bounds + * @param indexMap the index map + * @param mapOffset the offset into the index map + * @param m the mask controlling lane selection + * @return the vector loaded from the indexed elements of the array + * @throws IndexOutOfBoundsException + * if {@code mapOffset+N < 0} + * or if {@code mapOffset+N >= indexMap.length}, + * or if {@code f(N)=offset+indexMap[mapOffset+N]} + * is an invalid index into {@code a}, + * for any lane {@code N} in the vector + * where the mask is set + * @see HalffloatVector#toIntArray() + */ + @ForceInline + public static + HalffloatVector fromArray(VectorSpecies species, + Float16[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask m) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]); + } + + + + /** + * Loads a vector from a {@linkplain MemorySegment memory segment} + * starting at an offset into the memory segment. + * Bytes are composed into primitive lane elements according + * to the specified byte order. + * The vector is arranged into lanes according to + * memory ordering. + *

+ * This method behaves as if it returns the result of calling + * {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask) + * fromMemorySegment()} as follows: + *

{@code
+     * var m = species.maskAll(true);
+     * return fromMemorySegment(species, ms, offset, bo, m);
+     * }
+ * + * @param species species of desired vector + * @param ms the memory segment + * @param offset the offset into the memory segment + * @param bo the intended byte order + * @return a vector loaded from the memory segment + * @throws IndexOutOfBoundsException + * if {@code offset+N*2 < 0} + * or {@code offset+N*2 >= ms.byteSize()} + * for any lane {@code N} in the vector + * @throws IllegalStateException if the memory segment's session is not alive, + * or if access occurs from a thread other than the thread owning the session. + * @since 19 + */ + @ForceInline + public static + HalffloatVector fromMemorySegment(VectorSpecies species, + MemorySegment ms, long offset, + ByteOrder bo) { + offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize()); + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo); + } + + /** + * Loads a vector from a {@linkplain MemorySegment memory segment} + * starting at an offset into the memory segment + * and using a mask. + * Lanes where the mask is unset are filled with the default + * value of {@code Float16} (positive zero). + * Bytes are composed into primitive lane elements according + * to the specified byte order. + * The vector is arranged into lanes according to + * memory ordering. + *

+ * The following pseudocode illustrates the behavior: + *

{@code
+     * var slice = ms.asSlice(offset);
+     * Float16[] ar = new Float16[species.length()];
+     * for (int n = 0; n < ar.length; n++) {
+     *     if (m.laneIsSet(n)) {
+     *         ar[n] = slice.getAtIndex(ValuaLayout.JAVA_HALFFLOAT.withByteAlignment(1), n);
+     *     }
+     * }
+     * HalffloatVector r = HalffloatVector.fromArray(species, ar, 0);
+     * }
+ * @implNote + * This operation is likely to be more efficient if + * the specified byte order is the same as + * {@linkplain ByteOrder#nativeOrder() + * the platform native order}, + * since this method will not need to reorder + * the bytes of lane values. + * + * @param species species of desired vector + * @param ms the memory segment + * @param offset the offset into the memory segment + * @param bo the intended byte order + * @param m the mask controlling lane selection + * @return a vector loaded from the memory segment + * @throws IndexOutOfBoundsException + * if {@code offset+N*2 < 0} + * or {@code offset+N*2 >= ms.byteSize()} + * for any lane {@code N} in the vector + * where the mask is set + * @throws IllegalStateException if the memory segment's session is not alive, + * or if access occurs from a thread other than the thread owning the session. + * @since 19 + */ + @ForceInline + public static + HalffloatVector fromMemorySegment(VectorSpecies species, + MemorySegment ms, long offset, + ByteOrder bo, + VectorMask m) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + if (VectorIntrinsics.indexInRange(offset, vsp.vectorByteSize(), ms.byteSize())) { + return vsp.dummyVector().fromMemorySegment0(ms, offset, m, OFFSET_IN_RANGE).maybeSwap(bo); + } + + checkMaskFromIndexSize(offset, vsp, m, 2, ms.byteSize()); + return vsp.dummyVector().fromMemorySegment0(ms, offset, m, OFFSET_OUT_OF_RANGE).maybeSwap(bo); + } + + // Memory store operations + + /** + * Stores this vector into an array of type {@code Float16[]} + * starting at an offset. + *

+ * For each vector lane, where {@code N} is the vector lane index, + * the lane element at index {@code N} is stored into the array + * element {@code a[offset+N]}. + * + * @param a the array, of type {@code Float16[]} + * @param offset the offset into the array + * @throws IndexOutOfBoundsException + * if {@code offset+N < 0} or {@code offset+N >= a.length} + * for any lane {@code N} in the vector + */ + @ForceInline + public final + void intoArray(Float16[] a, int offset) { + offset = checkFromIndexSize(offset, length(), a.length); + HalffloatSpecies vsp = vspecies(); + VectorSupport.store( + vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), false, + this, + a, offset, + (arr, off, v) + -> v.stOp(arr, (int) off, + (arr_, off_, i, e) -> arr_[off_ + i] = e)); + } + + /** + * Stores this vector into an array of type {@code Float16[]} + * starting at offset and using a mask. + *

+ * For each vector lane, where {@code N} is the vector lane index, + * the lane element at index {@code N} is stored into the array + * element {@code a[offset+N]}. + * If the mask lane at {@code N} is unset then the corresponding + * array element {@code a[offset+N]} is left unchanged. + *

+ * Array range checking is done for lanes where the mask is set. + * Lanes where the mask is unset are not stored and do not need + * to correspond to legitimate elements of {@code a}. + * That is, unset lanes may correspond to array indexes less than + * zero or beyond the end of the array. + * + * @param a the array, of type {@code Float16[]} + * @param offset the offset into the array + * @param m the mask controlling lane storage + * @throws IndexOutOfBoundsException + * if {@code offset+N < 0} or {@code offset+N >= a.length} + * for any lane {@code N} in the vector + * where the mask is set + */ + @ForceInline + public final + void intoArray(Float16[] a, int offset, + VectorMask m) { + if (m.allTrue()) { + intoArray(a, offset); + } else { + HalffloatSpecies vsp = vspecies(); + if (!VectorIntrinsics.indexInRange(offset, vsp.length(), a.length)) { + checkMaskFromIndexSize(offset, vsp, m, 1, a.length); + } + intoArray0(a, offset, m); + } + } + + /** + * Scatters this vector into an array of type {@code Float16[]} + * using indexes obtained by adding a fixed {@code offset} to a + * series of secondary offsets from an index map. + * The index map is a contiguous sequence of {@code VLENGTH} + * elements in a second array of {@code int}s, starting at a given + * {@code mapOffset}. + *

+ * For each vector lane, where {@code N} is the vector lane index, + * the lane element at index {@code N} is stored into the array + * element {@code a[f(N)]}, where {@code f(N)} is the + * index mapping expression + * {@code offset + indexMap[mapOffset + N]]}. + * + * @param a the array + * @param offset an offset to combine with the index map offsets + * @param indexMap the index map + * @param mapOffset the offset into the index map + * @throws IndexOutOfBoundsException + * if {@code mapOffset+N < 0} + * or if {@code mapOffset+N >= indexMap.length}, + * or if {@code f(N)=offset+indexMap[mapOffset+N]} + * is an invalid index into {@code a}, + * for any lane {@code N} in the vector + * @see HalffloatVector#toIntArray() + */ + @ForceInline + public final + void intoArray(Float16[] a, int offset, + int[] indexMap, int mapOffset) { + stOp(a, offset, + (arr, off, i, e) -> { + int j = indexMap[mapOffset + i]; + arr[off + j] = e; + }); + } + + /** + * Scatters this vector into an array of type {@code Float16[]}, + * under the control of a mask, and + * using indexes obtained by adding a fixed {@code offset} to a + * series of secondary offsets from an index map. + * The index map is a contiguous sequence of {@code VLENGTH} + * elements in a second array of {@code int}s, starting at a given + * {@code mapOffset}. + *

+ * For each vector lane, where {@code N} is the vector lane index, + * if the mask lane at index {@code N} is set then + * the lane element at index {@code N} is stored into the array + * element {@code a[f(N)]}, where {@code f(N)} is the + * index mapping expression + * {@code offset + indexMap[mapOffset + N]]}. + * + * @param a the array + * @param offset an offset to combine with the index map offsets + * @param indexMap the index map + * @param mapOffset the offset into the index map + * @param m the mask + * @throws IndexOutOfBoundsException + * if {@code mapOffset+N < 0} + * or if {@code mapOffset+N >= indexMap.length}, + * or if {@code f(N)=offset+indexMap[mapOffset+N]} + * is an invalid index into {@code a}, + * for any lane {@code N} in the vector + * where the mask is set + * @see HalffloatVector#toIntArray() + */ + @ForceInline + public final + void intoArray(Float16[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask m) { + stOp(a, offset, m, + (arr, off, i, e) -> { + int j = indexMap[mapOffset + i]; + arr[off + j] = e; + }); + } + + + + /** + * {@inheritDoc} + * @since 19 + */ + @Override + @ForceInline + public final + void intoMemorySegment(MemorySegment ms, long offset, + ByteOrder bo) { + if (ms.isReadOnly()) { + throw new UnsupportedOperationException("Attempt to write a read-only segment"); + } + + offset = checkFromIndexSize(offset, byteSize(), ms.byteSize()); + maybeSwap(bo).intoMemorySegment0(ms, offset); + } + + /** + * {@inheritDoc} + * @since 19 + */ + @Override + @ForceInline + public final + void intoMemorySegment(MemorySegment ms, long offset, + ByteOrder bo, + VectorMask m) { + if (m.allTrue()) { + intoMemorySegment(ms, offset, bo); + } else { + if (ms.isReadOnly()) { + throw new UnsupportedOperationException("Attempt to write a read-only segment"); + } + HalffloatSpecies vsp = vspecies(); + if (!VectorIntrinsics.indexInRange(offset, vsp.vectorByteSize(), ms.byteSize())) { + checkMaskFromIndexSize(offset, vsp, m, 2, ms.byteSize()); + } + maybeSwap(bo).intoMemorySegment0(ms, offset, m); + } + } + + // ================================================ + + // Low-level memory operations. + // + // Note that all of these operations *must* inline into a context + // where the exact species of the involved vector is a + // compile-time constant. Otherwise, the intrinsic generation + // will fail and performance will suffer. + // + // In many cases this is achieved by re-deriving a version of the + // method in each concrete subclass (per species). The re-derived + // method simply calls one of these generic methods, with exact + // parameters for the controlling metadata, which is either a + // typed vector or constant species instance. + + // Unchecked loading operations in native byte order. + // Caller is responsible for applying index checks, masking, and + // byte swapping. + + /*package-private*/ + abstract + HalffloatVector fromArray0(Float16[] a, int offset); + @ForceInline + final + HalffloatVector fromArray0Template(Float16[] a, int offset) { + HalffloatSpecies vsp = vspecies(); + return VectorSupport.load( + vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), false, + a, offset, vsp, + (arr, off, s) -> s.ldOp(arr, (int) off, + (arr_, off_, i) -> arr_[off_ + i])); + } + + /*package-private*/ + abstract + HalffloatVector fromArray0(Float16[] a, int offset, VectorMask m, int offsetInRange); + @ForceInline + final + > + HalffloatVector fromArray0Template(Class maskClass, Float16[] a, int offset, M m, int offsetInRange) { + m.check(species()); + HalffloatSpecies vsp = vspecies(); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), false, m, offsetInRange, + a, offset, vsp, + (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm, + (arr_, off_, i) -> arr_[off_ + i])); + } + + + + + abstract + HalffloatVector fromMemorySegment0(MemorySegment bb, long offset); + @ForceInline + final + HalffloatVector fromMemorySegment0Template(MemorySegment ms, long offset) { + HalffloatSpecies vsp = vspecies(); + return ScopedMemoryAccess.loadFromMemorySegment( + vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + (AbstractMemorySegmentImpl) ms, offset, vsp, + (msp, off, s) -> { + return s.ldLongOp((MemorySegment) msp, off, HalffloatVector::memorySegmentGet); + }); + } + + abstract + HalffloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask m, int offsetInRange); + @ForceInline + final + > + HalffloatVector fromMemorySegment0Template(Class maskClass, MemorySegment ms, long offset, M m, int offsetInRange) { + HalffloatSpecies vsp = vspecies(); + m.check(vsp); + return ScopedMemoryAccess.loadFromMemorySegmentMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + (AbstractMemorySegmentImpl) ms, offset, m, vsp, offsetInRange, + (msp, off, s, vm) -> { + return s.ldLongOp((MemorySegment) msp, off, vm, HalffloatVector::memorySegmentGet); + }); + } + + // Unchecked storing operations in native byte order. + // Caller is responsible for applying index checks, masking, and + // byte swapping. + + abstract + void intoArray0(Float16[] a, int offset); + @ForceInline + final + void intoArray0Template(Float16[] a, int offset) { + HalffloatSpecies vsp = vspecies(); + VectorSupport.store( + vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), false, + this, a, offset, + (arr, off, v) + -> v.stOp(arr, (int) off, + (arr_, off_, i, e) -> arr_[off_+i] = e)); + } + + abstract + void intoArray0(Float16[] a, int offset, VectorMask m); + @ForceInline + final + > + void intoArray0Template(Class maskClass, Float16[] a, int offset, M m) { + m.check(species()); + HalffloatSpecies vsp = vspecies(); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), false, + this, m, a, offset, + (arr, off, v, vm) + -> v.stOp(arr, (int) off, vm, + (arr_, off_, i, e) -> arr_[off_ + i] = e)); + } + + + + @ForceInline + final + void intoMemorySegment0(MemorySegment ms, long offset) { + HalffloatSpecies vsp = vspecies(); + ScopedMemoryAccess.storeIntoMemorySegment( + vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + this, + (AbstractMemorySegmentImpl) ms, offset, + (msp, off, v) -> { + v.stLongOp((MemorySegment) msp, off, HalffloatVector::memorySegmentSet); + }); + } + + abstract + void intoMemorySegment0(MemorySegment bb, long offset, VectorMask m); + @ForceInline + final + > + void intoMemorySegment0Template(Class maskClass, MemorySegment ms, long offset, M m) { + HalffloatSpecies vsp = vspecies(); + m.check(vsp); + ScopedMemoryAccess.storeIntoMemorySegmentMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + this, m, + (AbstractMemorySegmentImpl) ms, offset, + (msp, off, v, vm) -> { + v.stLongOp((MemorySegment) msp, off, vm, HalffloatVector::memorySegmentSet); + }); + } + + + // End of low-level memory operations. + + private static + void checkMaskFromIndexSize(int offset, + HalffloatSpecies vsp, + VectorMask m, + int scale, + int limit) { + ((AbstractMask)m) + .checkIndexByLane(offset, limit, vsp.iota(), scale); + } + + private static + void checkMaskFromIndexSize(long offset, + HalffloatSpecies vsp, + VectorMask m, + int scale, + long limit) { + ((AbstractMask)m) + .checkIndexByLane(offset, limit, vsp.iota(), scale); + } + + @ForceInline + private void conditionalStoreNYI(int offset, + HalffloatSpecies vsp, + VectorMask m, + int scale, + int limit) { + if (offset < 0 || offset + vsp.laneCount() * scale > limit) { + String msg = + String.format("unimplemented: store @%d in [0..%d), %s in %s", + offset, limit, m, vsp); + throw new AssertionError(msg); + } + } + + /*package-private*/ + @Override + @ForceInline + final + HalffloatVector maybeSwap(ByteOrder bo) { + if (bo != NATIVE_ENDIAN) { + return this.reinterpretAsBytes() + .rearrange(swapBytesShuffle()) + .reinterpretAsHalffloats(); + } + return this; + } + + static final int ARRAY_SHIFT = + 31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_OBJECT_INDEX_SCALE); + static final long ARRAY_BASE = + Unsafe.ARRAY_OBJECT_BASE_OFFSET; + + @ForceInline + static long arrayAddress(Float16[] a, int index) { + return ARRAY_BASE + (((long)index) << ARRAY_SHIFT); + } + + + + @ForceInline + static long byteArrayAddress(byte[] a, int index) { + return Unsafe.ARRAY_BYTE_BASE_OFFSET + index; + } + + // ================================================ + + /// Reinterpreting view methods: + // lanewise reinterpret: viewAsXVector() + // keep shape, redraw lanes: reinterpretAsEs() + + /** + * {@inheritDoc} + */ + @ForceInline + @Override + public final ByteVector reinterpretAsBytes() { + // Going to ByteVector, pay close attention to byte order. + assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN); + return asByteVectorRaw(); + //return asByteVectorRaw().rearrange(swapBytesShuffle()); + } + + /** + * {@inheritDoc} + */ + @ForceInline + @Override + public final ShortVector viewAsIntegralLanes() { + LaneType ilt = LaneType.FLOAT16.asIntegral(); + return (ShortVector) asVectorRaw(ilt); + } + + /** + * {@inheritDoc} + * + * @implNote This method always throws + * {@code UnsupportedOperationException}, because there is no floating + * point type of the same size as {@code Float16}. The return type + * of this method is arbitrarily designated as + * {@code Vector}. Future versions of this API may change the return + * type if additional floating point types become available. + */ + @ForceInline + @Override + public final + HalffloatVector + viewAsFloatingLanes() { + return this; + } + + // ================================================ + + /// Object methods: toString, equals, hashCode + // + // Object methods are defined as if via Arrays.toString, etc., + // is applied to the array of elements. Two equal vectors + // are required to have equal species and equal lane values. + + /** + * Returns a string representation of this vector, of the form + * {@code "[0,1,2...]"}, reporting the lane values of this vector, + * in lane order. + * + * The string is produced as if by a call to {@link + * java.util.Arrays#toString(Float16[]) Arrays.toString()}, + * as appropriate to the {@code Float16} array returned by + * {@link #toArray this.toArray()}. + * + * @return a string of the form {@code "[0,1,2...]"} + * reporting the lane values of this vector + */ + @Override + @ForceInline + public final + String toString() { + // now that toArray is strongly typed, we can define this + return Arrays.toString(toArray()); + } + + /** + * {@inheritDoc} + */ + @Override + @ForceInline + public final + boolean equals(Object obj) { + if (obj instanceof Vector) { + Vector that = (Vector) obj; + if (this.species().equals(that.species())) { + return this.eq(that.check(this.species())).allTrue(); + } + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + @ForceInline + public final + int hashCode() { + // now that toArray is strongly typed, we can define this + return Objects.hash(species(), Arrays.hashCode(toArray())); + } + + // ================================================ + + // Species + + /** + * Class representing {@link HalffloatVector}'s of the same {@link VectorShape VectorShape}. + */ + /*package-private*/ + static final class HalffloatSpecies extends AbstractSpecies { + private HalffloatSpecies(VectorShape shape, + Class vectorType, + Class> maskType, + Function vectorFactory) { + super(shape, LaneType.of(Float16.class), + vectorType, maskType, + vectorFactory); + assert(this.elementSize() == Float16.SIZE); + } + + // Specializing overrides: + + @Override + @ForceInline + public final Class elementType() { + return Float16.class; + } + + @Override + @ForceInline + final Class genericElementType() { + return Float16.class; + } + + @SuppressWarnings("unchecked") + @Override + @ForceInline + public final Class vectorType() { + return (Class) vectorType; + } + + @Override + @ForceInline + public final long checkValue(long e) { + longToElementBits(e); // only for exception + return e; + } + + /*package-private*/ + @Override + @ForceInline + final HalffloatVector broadcastBits(long bits) { + return (HalffloatVector) + VectorSupport.fromBitsCoerced( + vectorType, Float16.class, laneCount, + bits, MODE_BROADCAST, this, + (bits_, s_) -> s_.rvOp(i -> bits_)); + } + + /*package-private*/ + @ForceInline + final HalffloatVector broadcast(Float16 e) { + return broadcastBits(toBits(e)); + } + + @Override + @ForceInline + public final HalffloatVector broadcast(long e) { + return broadcastBits(longToElementBits(e)); + } + + /*package-private*/ + final @Override + @ForceInline + long longToElementBits(long value) { + // Do the conversion, and then test it for failure. + Float16 e = Float16.valueOf(value); + if (e.longValue() != value) { + throw badElementBits(value, e); + } + return toBits(e); + } + + /*package-private*/ + @ForceInline + static long toIntegralChecked(Float16 e, boolean convertToInt) { + long value = convertToInt ? e.intValue() : e.longValue(); + if (value != e.longValue()) { + throw badArrayBits(e, convertToInt, value); + } + return value; + } + + /* this non-public one is for internal conversions */ + @Override + @ForceInline + final HalffloatVector fromIntValues(int[] values) { + VectorIntrinsics.requireLength(values.length, laneCount); + Float16[] va = new Float16[laneCount()]; + for (int i = 0; i < va.length; i++) { + int lv = values[i]; + Float16 v = Float16.valueOf(lv); + va[i] = v; + if ( v.intValue() != lv) { + throw badElementBits(lv, v); + } + } + return dummyVector().fromArray0(va, 0); + } + + // Virtual constructors + + @ForceInline + @Override final + public HalffloatVector fromArray(Object a, int offset) { + // User entry point + // Defer only to the equivalent method on the vector class, using the same inputs + return HalffloatVector + .fromArray(this, (Float16[]) a, offset); + } + + @ForceInline + @Override final + public HalffloatVector fromMemorySegment(MemorySegment ms, long offset, ByteOrder bo) { + // User entry point + // Defer only to the equivalent method on the vector class, using the same inputs + return HalffloatVector + .fromMemorySegment(this, ms, offset, bo); + } + + @ForceInline + @Override final + HalffloatVector dummyVector() { + return (HalffloatVector) super.dummyVector(); + } + + /*package-private*/ + final @Override + @ForceInline + HalffloatVector rvOp(RVOp f) { + Float16[] res = new Float16[laneCount()]; + for (int i = 0; i < res.length; i++) { + short bits = (short) f.apply(i); + res[i] = fromBits(bits); + } + return dummyVector().vectorFactory(res); + } + + HalffloatVector vOp(FVOp f) { + Float16[] res = new Float16[laneCount()]; + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i); + } + return dummyVector().vectorFactory(res); + } + + HalffloatVector vOp(VectorMask m, FVOp f) { + Float16[] res = new Float16[laneCount()]; + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < res.length; i++) { + if (mbits[i]) { + res[i] = f.apply(i); + } + } + return dummyVector().vectorFactory(res); + } + + /*package-private*/ + @ForceInline + HalffloatVector ldOp(M memory, int offset, + FLdOp f) { + return dummyVector().ldOp(memory, offset, f); + } + + /*package-private*/ + @ForceInline + HalffloatVector ldOp(M memory, int offset, + VectorMask m, + FLdOp f) { + return dummyVector().ldOp(memory, offset, m, f); + } + + /*package-private*/ + @ForceInline + HalffloatVector ldLongOp(MemorySegment memory, long offset, + FLdLongOp f) { + return dummyVector().ldLongOp(memory, offset, f); + } + + /*package-private*/ + @ForceInline + HalffloatVector ldLongOp(MemorySegment memory, long offset, + VectorMask m, + FLdLongOp f) { + return dummyVector().ldLongOp(memory, offset, m, f); + } + + /*package-private*/ + @ForceInline + void stOp(M memory, int offset, FStOp f) { + dummyVector().stOp(memory, offset, f); + } + + /*package-private*/ + @ForceInline + void stOp(M memory, int offset, + AbstractMask m, + FStOp f) { + dummyVector().stOp(memory, offset, m, f); + } + + /*package-private*/ + @ForceInline + void stLongOp(MemorySegment memory, long offset, FStLongOp f) { + dummyVector().stLongOp(memory, offset, f); + } + + /*package-private*/ + @ForceInline + void stLongOp(MemorySegment memory, long offset, + AbstractMask m, + FStLongOp f) { + dummyVector().stLongOp(memory, offset, m, f); + } + + // N.B. Make sure these constant vectors and + // masks load up correctly into registers. + // + // Also, see if we can avoid all that switching. + // Could we cache both vectors and both masks in + // this species object? + + // Zero and iota vector access + @Override + @ForceInline + public final HalffloatVector zero() { + if ((Class) vectorType() == HalffloatMaxVector.class) + return HalffloatMaxVector.ZERO; + switch (vectorBitSize()) { + case 64: return Halffloat64Vector.ZERO; + case 128: return Halffloat128Vector.ZERO; + case 256: return Halffloat256Vector.ZERO; + case 512: return Halffloat512Vector.ZERO; + } + throw new AssertionError(); + } + + @Override + @ForceInline + public final HalffloatVector iota() { + if ((Class) vectorType() == HalffloatMaxVector.class) + return HalffloatMaxVector.IOTA; + switch (vectorBitSize()) { + case 64: return Halffloat64Vector.IOTA; + case 128: return Halffloat128Vector.IOTA; + case 256: return Halffloat256Vector.IOTA; + case 512: return Halffloat512Vector.IOTA; + } + throw new AssertionError(); + } + + // Mask access + @Override + @ForceInline + public final VectorMask maskAll(boolean bit) { + if ((Class) vectorType() == HalffloatMaxVector.class) + return HalffloatMaxVector.HalffloatMaxMask.maskAll(bit); + switch (vectorBitSize()) { + case 64: return Halffloat64Vector.Halffloat64Mask.maskAll(bit); + case 128: return Halffloat128Vector.Halffloat128Mask.maskAll(bit); + case 256: return Halffloat256Vector.Halffloat256Mask.maskAll(bit); + case 512: return Halffloat512Vector.Halffloat512Mask.maskAll(bit); + } + throw new AssertionError(); + } + } + + /** + * Finds a species for an element type of {@code Float16} and shape. + * + * @param s the shape + * @return a species for an element type of {@code Float16} and shape + * @throws IllegalArgumentException if no such species exists for the shape + */ + static HalffloatSpecies species(VectorShape s) { + Objects.requireNonNull(s); + switch (s.switchKey) { + case VectorShape.SK_64_BIT: return (HalffloatSpecies) SPECIES_64; + case VectorShape.SK_128_BIT: return (HalffloatSpecies) SPECIES_128; + case VectorShape.SK_256_BIT: return (HalffloatSpecies) SPECIES_256; + case VectorShape.SK_512_BIT: return (HalffloatSpecies) SPECIES_512; + case VectorShape.SK_Max_BIT: return (HalffloatSpecies) SPECIES_MAX; + default: throw new IllegalArgumentException("Bad shape: " + s); + } + } + + /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */ + public static final VectorSpecies SPECIES_64 + = new HalffloatSpecies(VectorShape.S_64_BIT, + Halffloat64Vector.class, + Halffloat64Vector.Halffloat64Mask.class, + Halffloat64Vector::new); + + /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */ + public static final VectorSpecies SPECIES_128 + = new HalffloatSpecies(VectorShape.S_128_BIT, + Halffloat128Vector.class, + Halffloat128Vector.Halffloat128Mask.class, + Halffloat128Vector::new); + + /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */ + public static final VectorSpecies SPECIES_256 + = new HalffloatSpecies(VectorShape.S_256_BIT, + Halffloat256Vector.class, + Halffloat256Vector.Halffloat256Mask.class, + Halffloat256Vector::new); + + /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */ + public static final VectorSpecies SPECIES_512 + = new HalffloatSpecies(VectorShape.S_512_BIT, + Halffloat512Vector.class, + Halffloat512Vector.Halffloat512Mask.class, + Halffloat512Vector::new); + + /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */ + public static final VectorSpecies SPECIES_MAX + = new HalffloatSpecies(VectorShape.S_Max_BIT, + HalffloatMaxVector.class, + HalffloatMaxVector.HalffloatMaxMask.class, + HalffloatMaxVector::new); + + /** + * Preferred species for {@link HalffloatVector}s. + * A preferred species is a species of maximal bit-size for the platform. + */ + public static final VectorSpecies SPECIES_PREFERRED + = (HalffloatSpecies) VectorSpecies.ofPreferred(Float16.class); +} + diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java index 0c83b037454..0af77b15e6d 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,14 +347,16 @@ public final int reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + int res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Int128Mask.class, (Int128Mask) m); // specialized + int res = super.reduceLanesTemplate(op, Int128Mask.class, (Int128Mask) m); // specialized + return (long) res; } @ForceInline @@ -658,7 +660,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Int128Mask indexPartiallyInUpperRange(long offset, long limit) { return (Int128Mask) VectorSupport.indexPartiallyInUpperRange( - Int128Mask.class, int.class, VLENGTH, offset, limit, + Int128Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Int128Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java index abb10696ba4..61f66403845 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,14 +347,16 @@ public final int reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + int res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Int256Mask.class, (Int256Mask) m); // specialized + int res = super.reduceLanesTemplate(op, Int256Mask.class, (Int256Mask) m); // specialized + return (long) res; } @ForceInline @@ -666,7 +668,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Int256Mask indexPartiallyInUpperRange(long offset, long limit) { return (Int256Mask) VectorSupport.indexPartiallyInUpperRange( - Int256Mask.class, int.class, VLENGTH, offset, limit, + Int256Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Int256Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java index 7b6435e4c0a..1c94bd86112 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,14 +347,16 @@ public final int reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + int res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Int512Mask.class, (Int512Mask) m); // specialized + int res = super.reduceLanesTemplate(op, Int512Mask.class, (Int512Mask) m); // specialized + return (long) res; } @ForceInline @@ -682,7 +684,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Int512Mask indexPartiallyInUpperRange(long offset, long limit) { return (Int512Mask) VectorSupport.indexPartiallyInUpperRange( - Int512Mask.class, int.class, VLENGTH, offset, limit, + Int512Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Int512Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java index 491010be90e..0b06265b0c1 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,14 +347,16 @@ public final int reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + int res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Int64Mask.class, (Int64Mask) m); // specialized + int res = super.reduceLanesTemplate(op, Int64Mask.class, (Int64Mask) m); // specialized + return (long) res; } @ForceInline @@ -654,7 +656,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Int64Mask indexPartiallyInUpperRange(long offset, long limit) { return (Int64Mask) VectorSupport.indexPartiallyInUpperRange( - Int64Mask.class, int.class, VLENGTH, offset, limit, + Int64Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Int64Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java index e20829f7c4f..331e6507c88 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,14 +347,16 @@ public final int reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + int res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, IntMaxMask.class, (IntMaxMask) m); // specialized + int res = super.reduceLanesTemplate(op, IntMaxMask.class, (IntMaxMask) m); // specialized + return (long) res; } @ForceInline @@ -652,7 +654,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ IntMaxMask indexPartiallyInUpperRange(long offset, long limit) { return (IntMaxMask) VectorSupport.indexPartiallyInUpperRange( - IntMaxMask.class, int.class, VLENGTH, offset, limit, + IntMaxMask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (IntMaxMask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java index 3317e25e73e..2fdd7c4baf3 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java @@ -2205,8 +2205,7 @@ final IntVector addIndexTemplate(int scale) { // instruction directly, load IOTA from memory // and multiply. IntVector iota = s.iota(); - int sc = (int) scale_; - return v.add(sc == 1 ? iota : iota.mul(sc)); + return v.add(scale_ == 1 ? iota : iota.mul((int)scale_)); }); } @@ -2269,7 +2268,8 @@ IntVector sliceTemplate(int origin, Vector v1) { that.check(this); Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((int)(length() - origin)))); + int pivotidx = (int)(length() - origin); + VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); iota = iotaShuffle(origin, 1, true); return that.rearrange(iota).blend(this.rearrange(iota), blendMask); } @@ -2299,7 +2299,8 @@ IntVector slice(int origin, IntVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((int)(length() - origin)))); + int pivotidx = (int)(length() - origin); + VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); iota = iotaShuffle(origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2361,7 +2362,7 @@ IntVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); VectorMask blendMask = iota.toVector().compare(VectorOperators.GE, - (broadcast((int)(origin)))); + broadcast((int)(origin))); iota = iotaShuffle(-origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2926,7 +2927,7 @@ public final double[] toDoubleArray() { int[] a = toArray(); double[] res = new double[a.length]; for (int i = 0; i < a.length; i++) { - res[i] = (double) a[i]; + res[i] = ((double) a[i]); } return res; } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java index 53fa773555f..efece4bff1c 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java @@ -40,7 +40,8 @@ enum LaneType { BYTE(byte.class, Byte.class, byte[].class, 'I', -1, Byte.SIZE, T_BYTE), SHORT(short.class, Short.class, short[].class, 'I', -1, Short.SIZE, T_SHORT), INT(int.class, Integer.class, int[].class, 'I', -1, Integer.SIZE, T_INT), - LONG(long.class, Long.class, long[].class, 'I', -1, Long.SIZE, T_LONG); + LONG(long.class, Long.class, long[].class, 'I', -1, Long.SIZE, T_LONG), + FLOAT16(Float16.class, Float16.class, Float16[].class, 'F', 11, Float16.SIZE, T_FLOAT16); LaneType(Class elementType, Class genericElementType, @@ -65,14 +66,14 @@ enum LaneType { // int:128 or int:4 or float:16, report the size in the // printName. If we do unsigned or vector or bit lane types, // report that condition also. - this.typeChar = genericElementType.getSimpleName().charAt(0); - assert("FDBSIL".indexOf(typeChar) == ordinal()) : this; + this.typeChar = genericElementType.getSimpleName().charAt(getElementTypeIndex(elementType)); + assert("FDBSILo".indexOf(typeChar) == ordinal()) : this; // Same as in JVMS, org.objectweb.asm.Opcodes, etc.: this.basicType = basicType; assert(basicType == ( (elementSizeLog2 - /*lg(Byte.SIZE)*/ 3) | (elementKind == 'F' ? 4 : 8))) : this; - assert("....zcFDBSILoav..".charAt(basicType) == typeChar); + assert("....zoFDBSILSoav..".charAt(basicType) == typeChar); } final Class elementType; @@ -108,13 +109,21 @@ LaneType asFloating() { return asFloating; } + static int getElementTypeIndex(Class elementType) { + if (elementType == java.lang.Float16.class) { + return 2; + } else { + return 0; + } + } + /** Decode a class mirror for an element type into an enum. */ @ForceInline static LaneType of(Class elementType) { // The following two lines are expected to // constant fold in the JIT, if the argument // is constant and this method is inlined. - int c0 = elementType.getName().charAt(0); + int c0 = elementType.getSimpleName().charAt(getElementTypeIndex(elementType)); LaneType type = ENUM_FROM_C0[c0 & C0_MASK]; // This line can short-circuit if a valid // elementType constant was passed: @@ -184,7 +193,8 @@ RuntimeException badElementType(Class elementType, Object expected) { SK_SHORT = 4, SK_INT = 5, SK_LONG = 6, - SK_LIMIT = 7; + SK_FLOAT16 = 7, + SK_LIMIT = 8; /*package-private*/ @ForceInline @@ -225,7 +235,7 @@ static LaneType ofBasicType(int bt) { } catch (ReflectiveOperationException ex) { throw new AssertionError(ex); } - int c0 = value.elementType.getName().charAt(0); + int c0 = value.elementType.getSimpleName().charAt(getElementTypeIndex(value.elementType)); c0 &= C0_MASK; assert(valuesByC0[c0] == null); valuesByC0[c0] = value; diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java index cf552c23a80..aa10a6cac92 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -342,14 +342,16 @@ public final long reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + long res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Long128Mask.class, (Long128Mask) m); // specialized + long res = super.reduceLanesTemplate(op, Long128Mask.class, (Long128Mask) m); // specialized + return (long) res; } @ForceInline @@ -644,7 +646,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Long128Mask indexPartiallyInUpperRange(long offset, long limit) { return (Long128Mask) VectorSupport.indexPartiallyInUpperRange( - Long128Mask.class, long.class, VLENGTH, offset, limit, + Long128Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Long128Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java index ffa1029bdd8..28826cddfc9 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -342,14 +342,16 @@ public final long reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + long res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Long256Mask.class, (Long256Mask) m); // specialized + long res = super.reduceLanesTemplate(op, Long256Mask.class, (Long256Mask) m); // specialized + return (long) res; } @ForceInline @@ -648,7 +650,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Long256Mask indexPartiallyInUpperRange(long offset, long limit) { return (Long256Mask) VectorSupport.indexPartiallyInUpperRange( - Long256Mask.class, long.class, VLENGTH, offset, limit, + Long256Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Long256Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java index aea8fe0fe6c..11483f64497 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -342,14 +342,16 @@ public final long reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + long res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Long512Mask.class, (Long512Mask) m); // specialized + long res = super.reduceLanesTemplate(op, Long512Mask.class, (Long512Mask) m); // specialized + return (long) res; } @ForceInline @@ -656,7 +658,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Long512Mask indexPartiallyInUpperRange(long offset, long limit) { return (Long512Mask) VectorSupport.indexPartiallyInUpperRange( - Long512Mask.class, long.class, VLENGTH, offset, limit, + Long512Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Long512Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java index ffb07535d65..bd33341a8ec 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -342,14 +342,16 @@ public final long reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + long res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Long64Mask.class, (Long64Mask) m); // specialized + long res = super.reduceLanesTemplate(op, Long64Mask.class, (Long64Mask) m); // specialized + return (long) res; } @ForceInline @@ -642,7 +644,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Long64Mask indexPartiallyInUpperRange(long offset, long limit) { return (Long64Mask) VectorSupport.indexPartiallyInUpperRange( - Long64Mask.class, long.class, VLENGTH, offset, limit, + Long64Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Long64Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java index e4197cb7f2e..1fef8e276f0 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -342,14 +342,16 @@ public final long reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + long res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, LongMaxMask.class, (LongMaxMask) m); // specialized + long res = super.reduceLanesTemplate(op, LongMaxMask.class, (LongMaxMask) m); // specialized + return (long) res; } @ForceInline @@ -642,7 +644,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ LongMaxMask indexPartiallyInUpperRange(long offset, long limit) { return (LongMaxMask) VectorSupport.indexPartiallyInUpperRange( - LongMaxMask.class, long.class, VLENGTH, offset, limit, + LongMaxMask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (LongMaxMask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java index 9dd3f2eb136..64f51bd333d 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java @@ -2092,8 +2092,7 @@ final LongVector addIndexTemplate(int scale) { // instruction directly, load IOTA from memory // and multiply. LongVector iota = s.iota(); - long sc = (long) scale_; - return v.add(sc == 1 ? iota : iota.mul(sc)); + return v.add(scale_ == 1 ? iota : iota.mul((long)scale_)); }); } @@ -2135,7 +2134,8 @@ LongVector sliceTemplate(int origin, Vector v1) { that.check(this); Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((long)(length() - origin)))); + long pivotidx = (long)(length() - origin); + VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); iota = iotaShuffle(origin, 1, true); return that.rearrange(iota).blend(this.rearrange(iota), blendMask); } @@ -2165,7 +2165,8 @@ LongVector slice(int origin, LongVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((long)(length() - origin)))); + long pivotidx = (long)(length() - origin); + VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); iota = iotaShuffle(origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2227,7 +2228,7 @@ LongVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); VectorMask blendMask = iota.toVector().compare(VectorOperators.GE, - (broadcast((long)(origin)))); + broadcast((long)(origin))); iota = iotaShuffle(-origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2787,7 +2788,7 @@ public final double[] toDoubleArray() { long[] a = toArray(); double[] res = new double[a.length]; for (int i = 0; i < a.length; i++) { - res[i] = (double) a[i]; + res[i] = ((double) a[i]); } return res; } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java index 8ae0638e4f3..3c2b673a3bb 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,14 +347,16 @@ public final short reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + short res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Short128Mask.class, (Short128Mask) m); // specialized + short res = super.reduceLanesTemplate(op, Short128Mask.class, (Short128Mask) m); // specialized + return (long) res; } @ForceInline @@ -666,7 +668,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Short128Mask indexPartiallyInUpperRange(long offset, long limit) { return (Short128Mask) VectorSupport.indexPartiallyInUpperRange( - Short128Mask.class, short.class, VLENGTH, offset, limit, + Short128Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Short128Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java index cd9d8ceb887..32db9a3a292 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,14 +347,16 @@ public final short reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + short res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Short256Mask.class, (Short256Mask) m); // specialized + short res = super.reduceLanesTemplate(op, Short256Mask.class, (Short256Mask) m); // specialized + return (long) res; } @ForceInline @@ -682,7 +684,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Short256Mask indexPartiallyInUpperRange(long offset, long limit) { return (Short256Mask) VectorSupport.indexPartiallyInUpperRange( - Short256Mask.class, short.class, VLENGTH, offset, limit, + Short256Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Short256Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java index 2a959a8181c..cc419227f91 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,14 +347,16 @@ public final short reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + short res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Short512Mask.class, (Short512Mask) m); // specialized + short res = super.reduceLanesTemplate(op, Short512Mask.class, (Short512Mask) m); // specialized + return (long) res; } @ForceInline @@ -714,7 +716,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Short512Mask indexPartiallyInUpperRange(long offset, long limit) { return (Short512Mask) VectorSupport.indexPartiallyInUpperRange( - Short512Mask.class, short.class, VLENGTH, offset, limit, + Short512Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Short512Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java index 6090e9cf0d1..b11dc59187c 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,14 +347,16 @@ public final short reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + short res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, Short64Mask.class, (Short64Mask) m); // specialized + short res = super.reduceLanesTemplate(op, Short64Mask.class, (Short64Mask) m); // specialized + return (long) res; } @ForceInline @@ -658,7 +660,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ Short64Mask indexPartiallyInUpperRange(long offset, long limit) { return (Short64Mask) VectorSupport.indexPartiallyInUpperRange( - Short64Mask.class, short.class, VLENGTH, offset, limit, + Short64Mask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (Short64Mask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java index d451cd4443f..80b4276cc8b 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -347,14 +347,16 @@ public final short reduceLanes(VectorOperators.Associative op, @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + short res = super.reduceLanesTemplate(op); // specialized + return (long) res; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, ShortMaxMask.class, (ShortMaxMask) m); // specialized + short res = super.reduceLanesTemplate(op, ShortMaxMask.class, (ShortMaxMask) m); // specialized + return (long) res; } @ForceInline @@ -652,7 +654,7 @@ public VectorMask cast(VectorSpecies dsp) { /*package-private*/ ShortMaxMask indexPartiallyInUpperRange(long offset, long limit) { return (ShortMaxMask) VectorSupport.indexPartiallyInUpperRange( - ShortMaxMask.class, short.class, VLENGTH, offset, limit, + ShortMaxMask.class, ETYPE, VLENGTH, offset, limit, (o, l) -> (ShortMaxMask) TRUE_MASK.indexPartiallyInRange(o, l)); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java index 84f542f07ff..c5adce31ad8 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java @@ -2221,8 +2221,7 @@ final ShortVector addIndexTemplate(int scale) { // instruction directly, load IOTA from memory // and multiply. ShortVector iota = s.iota(); - short sc = (short) scale_; - return v.add(sc == 1 ? iota : iota.mul(sc)); + return v.add(scale_ == 1 ? iota : iota.mul((short)scale_)); }); } @@ -2285,7 +2284,8 @@ ShortVector sliceTemplate(int origin, Vector v1) { that.check(this); Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin)))); + short pivotidx = (short)(length() - origin); + VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); iota = iotaShuffle(origin, 1, true); return that.rearrange(iota).blend(this.rearrange(iota), blendMask); } @@ -2315,7 +2315,8 @@ ShortVector slice(int origin, ShortVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin)))); + short pivotidx = (short)(length() - origin); + VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); iota = iotaShuffle(origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2377,7 +2378,7 @@ ShortVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); VectorShuffle iota = iotaShuffle(); VectorMask blendMask = iota.toVector().compare(VectorOperators.GE, - (broadcast((short)(origin)))); + broadcast((short)(origin))); iota = iotaShuffle(-origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2949,7 +2950,7 @@ public final double[] toDoubleArray() { short[] a = toArray(); double[] res = new double[a.length]; for (int i = 0; i < a.length; i++) { - res[i] = (double) a[i]; + res[i] = ((double) a[i]); } return res; } @@ -4011,11 +4012,10 @@ public final ShortVector viewAsIntegralLanes() { @ForceInline @Override public final - Vector + HalffloatVector viewAsFloatingLanes() { - LaneType flt = LaneType.SHORT.asFloating(); - // asFloating() will throw UnsupportedOperationException for the unsupported type short - throw new AssertionError("Cannot reach here"); + LaneType flt = LaneType.FLOAT16.asFloating(); + return (HalffloatVector) asVectorRaw(flt); } // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java index d34ac79e7c3..45aa8e22a4f 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java @@ -3027,6 +3027,19 @@ public abstract VectorMask compare(VectorOperators.Comparison op, */ public abstract DoubleVector reinterpretAsDoubles(); + /** + * Reinterprets this vector as a vector of the same shape + * and contents but a lane type of {@code Float16}, + * where the lanes are assembled from successive bytes + * according to little-endian order. + * It is a convenience method for the expression + * {@code reinterpretShape(species().withLanes(Float16.class))}. + * It may be considered an inverse to {@link Vector#reinterpretAsBytes()}. + * + * @return a {@code HalffloatVector} with the same shape and information content + */ + public abstract HalffloatVector reinterpretAsHalffloats(); + /** * Views this vector as a vector of the same shape, length, and * contents, but a lane type that is not a floating-point type. diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java index 1ffbcef821a..622cb548b76 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java @@ -237,10 +237,18 @@ public static VectorShape preferredShape() { return computePreferredShape(); } + private static Class getEffectiveLaneType(Class elementType) { + if (elementType == Float16.class) { + return short.class; + } else { + return elementType; + } + } + private static VectorShape computePreferredShape() { int prefBitSize = Integer.MAX_VALUE; for (LaneType type : LaneType.values()) { - Class etype = type.elementType; + Class etype = getEffectiveLaneType(type.elementType); prefBitSize = Math.min(prefBitSize, getMaxVectorBitSize(etype)); } // If these assertions fail, we must reconsider our API portability assumptions. diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template index ad878268404..03459b9a8b6 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template @@ -46,12 +46,12 @@ import static jdk.incubator.vector.VectorOperators.*; /** * A specialized {@link Vector} representing an ordered immutable sequence of - * {@code $type$} values. + * {@code $elemtype$} values. */ @SuppressWarnings("cast") // warning: redundant cast public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { - $abstractvectortype$($type$[] vec) { + $abstractvectortype$($elemtype$[] vec) { super(vec); } @@ -61,7 +61,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { static final int FORBID_OPCODE_KIND = VO_ONLYFP; #end[FP] - static final ValueLayout.Of$Type$ ELEMENT_LAYOUT = ValueLayout.JAVA_$TYPE$.withByteAlignment(1); + static final ValueLayout.Of{#if[FP16]?Short:$Elemtype$} ELEMENT_LAYOUT = ValueLayout.JAVA_{#if[FP16]?SHORT:$TYPE$}.withByteAlignment(1); @ForceInline static int opCode(Operator op) { @@ -98,7 +98,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { // Virtualized getter /*package-private*/ - abstract $type$[] vec(); + abstract $elemtype$[] vec(); // Virtualized constructors @@ -107,7 +107,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * It is an error if the array is aliased elsewhere. */ /*package-private*/ - abstract $abstractvectortype$ vectorFactory($type$[] vec); + abstract $abstractvectortype$ vectorFactory($elemtype$[] vec); /** * Build a mask directly using my species. @@ -122,14 +122,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { // Constant loader (takes dummy as vector arg) interface FVOp { - $type$ apply(int i); + $elemtype$ apply(int i); } /*package-private*/ @ForceInline final $abstractvectortype$ vOp(FVOp f) { - $type$[] res = new $type$[length()]; + $elemtype$[] res = new $elemtype$[length()]; for (int i = 0; i < res.length; i++) { res[i] = f.apply(i); } @@ -139,7 +139,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline final $abstractvectortype$ vOp(VectorMask<$Boxtype$> m, FVOp f) { - $type$[] res = new $type$[length()]; + $elemtype$[] res = new $elemtype$[length()]; boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits(); for (int i = 0; i < res.length; i++) { if (mbits[i]) { @@ -153,7 +153,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /*package-private*/ interface FUnOp { - $type$ apply(int i, $type$ a); + $elemtype$ apply(int i, $elemtype$ a); } /*package-private*/ @@ -162,8 +162,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline final $abstractvectortype$ uOpTemplate(FUnOp f) { - $type$[] vec = vec(); - $type$[] res = new $type$[length()]; + $elemtype$[] vec = vec(); + $elemtype$[] res = new $elemtype$[length()]; for (int i = 0; i < res.length; i++) { res[i] = f.apply(i, vec[i]); } @@ -181,8 +181,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { if (m == null) { return uOpTemplate(f); } - $type$[] vec = vec(); - $type$[] res = new $type$[length()]; + $elemtype$[] vec = vec(); + $elemtype$[] res = new $elemtype$[length()]; boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits(); for (int i = 0; i < res.length; i++) { res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i]; @@ -194,7 +194,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /*package-private*/ interface FBinOp { - $type$ apply(int i, $type$ a, $type$ b); + $elemtype$ apply(int i, $elemtype$ a, $elemtype$ b); } /*package-private*/ @@ -205,9 +205,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { final $abstractvectortype$ bOpTemplate(Vector<$Boxtype$> o, FBinOp f) { - $type$[] res = new $type$[length()]; - $type$[] vec1 = this.vec(); - $type$[] vec2 = (($abstractvectortype$)o).vec(); + $elemtype$[] res = new $elemtype$[length()]; + $elemtype$[] vec1 = this.vec(); + $elemtype$[] vec2 = (($abstractvectortype$)o).vec(); for (int i = 0; i < res.length; i++) { res[i] = f.apply(i, vec1[i], vec2[i]); } @@ -227,9 +227,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { if (m == null) { return bOpTemplate(o, f); } - $type$[] res = new $type$[length()]; - $type$[] vec1 = this.vec(); - $type$[] vec2 = (($abstractvectortype$)o).vec(); + $elemtype$[] res = new $elemtype$[length()]; + $elemtype$[] vec1 = this.vec(); + $elemtype$[] vec2 = (($abstractvectortype$)o).vec(); boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits(); for (int i = 0; i < res.length; i++) { res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i]; @@ -241,7 +241,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /*package-private*/ interface FTriOp { - $type$ apply(int i, $type$ a, $type$ b, $type$ c); + $elemtype$ apply(int i, $elemtype$ a, $elemtype$ b, $elemtype$ c); } /*package-private*/ @@ -254,10 +254,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $abstractvectortype$ tOpTemplate(Vector<$Boxtype$> o1, Vector<$Boxtype$> o2, FTriOp f) { - $type$[] res = new $type$[length()]; - $type$[] vec1 = this.vec(); - $type$[] vec2 = (($abstractvectortype$)o1).vec(); - $type$[] vec3 = (($abstractvectortype$)o2).vec(); + $elemtype$[] res = new $elemtype$[length()]; + $elemtype$[] vec1 = this.vec(); + $elemtype$[] vec2 = (($abstractvectortype$)o1).vec(); + $elemtype$[] vec3 = (($abstractvectortype$)o2).vec(); for (int i = 0; i < res.length; i++) { res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]); } @@ -279,10 +279,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { if (m == null) { return tOpTemplate(o1, o2, f); } - $type$[] res = new $type$[length()]; - $type$[] vec1 = this.vec(); - $type$[] vec2 = (($abstractvectortype$)o1).vec(); - $type$[] vec3 = (($abstractvectortype$)o2).vec(); + $elemtype$[] res = new $elemtype$[length()]; + $elemtype$[] vec1 = this.vec(); + $elemtype$[] vec2 = (($abstractvectortype$)o1).vec(); + $elemtype$[] vec3 = (($abstractvectortype$)o2).vec(); boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits(); for (int i = 0; i < res.length; i++) { res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i]; @@ -294,15 +294,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /*package-private*/ abstract - $type$ rOp($type$ v, VectorMask<$Boxtype$> m, FBinOp f); + $elemtype$ rOp($elemtype$ v, VectorMask<$Boxtype$> m, FBinOp f); @ForceInline final - $type$ rOpTemplate($type$ v, VectorMask<$Boxtype$> m, FBinOp f) { + $elemtype$ rOpTemplate($elemtype$ v, VectorMask<$Boxtype$> m, FBinOp f) { if (m == null) { return rOpTemplate(v, f); } - $type$[] vec = vec(); + $elemtype$[] vec = vec(); boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits(); for (int i = 0; i < vec.length; i++) { v = mbits[i] ? f.apply(i, v, vec[i]) : v; @@ -312,8 +312,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline final - $type$ rOpTemplate($type$ v, FBinOp f) { - $type$[] vec = vec(); + $elemtype$ rOpTemplate($elemtype$ v, FBinOp f) { + $elemtype$[] vec = vec(); for (int i = 0; i < vec.length; i++) { v = f.apply(i, v, vec[i]); } @@ -324,7 +324,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /*package-private*/ interface FLdOp { - $type$ apply(M memory, int offset, int i); + $elemtype$ apply(M memory, int offset, int i); } /*package-private*/ @@ -333,7 +333,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $abstractvectortype$ ldOp(M memory, int offset, FLdOp f) { //dummy; no vec = vec(); - $type$[] res = new $type$[length()]; + $elemtype$[] res = new $elemtype$[length()]; for (int i = 0; i < res.length; i++) { res[i] = f.apply(memory, offset, i); } @@ -346,8 +346,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $abstractvectortype$ ldOp(M memory, int offset, VectorMask<$Boxtype$> m, FLdOp f) { - //$type$[] vec = vec(); - $type$[] res = new $type$[length()]; + //$elemtype$[] vec = vec(); + $elemtype$[] res = new $elemtype$[length()]; boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits(); for (int i = 0; i < res.length; i++) { if (mbits[i]) { @@ -359,7 +359,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /*package-private*/ interface FLdLongOp { - $type$ apply(MemorySegment memory, long offset, int i); + $elemtype$ apply(MemorySegment memory, long offset, int i); } /*package-private*/ @@ -368,7 +368,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $abstractvectortype$ ldLongOp(MemorySegment memory, long offset, FLdLongOp f) { //dummy; no vec = vec(); - $type$[] res = new $type$[length()]; + $elemtype$[] res = new $elemtype$[length()]; for (int i = 0; i < res.length; i++) { res[i] = f.apply(memory, offset, i); } @@ -381,8 +381,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $abstractvectortype$ ldLongOp(MemorySegment memory, long offset, VectorMask<$Boxtype$> m, FLdLongOp f) { - //$type$[] vec = vec(); - $type$[] res = new $type$[length()]; + //$elemtype$[] vec = vec(); + $elemtype$[] res = new $elemtype$[length()]; boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits(); for (int i = 0; i < res.length; i++) { if (mbits[i]) { @@ -392,12 +392,12 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { return vectorFactory(res); } - static $type$ memorySegmentGet(MemorySegment ms, long o, int i) { - return ms.get(ELEMENT_LAYOUT, o + i * $sizeInBytes$L); + static $elemtype$ memorySegmentGet(MemorySegment ms, long o, int i) { + return {#if[FP16]?Float16.valueOf(ms.get(ELEMENT_LAYOUT, o + i * $sizeInBytes$L)):ms.get(ELEMENT_LAYOUT, o + i * $sizeInBytes$L)}; } interface FStOp { - void apply(M memory, int offset, int i, $type$ a); + void apply(M memory, int offset, int i, $elemtype$ a); } /*package-private*/ @@ -405,7 +405,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { final void stOp(M memory, int offset, FStOp f) { - $type$[] vec = vec(); + $elemtype$[] vec = vec(); for (int i = 0; i < vec.length; i++) { f.apply(memory, offset, i, vec[i]); } @@ -417,7 +417,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { void stOp(M memory, int offset, VectorMask<$Boxtype$> m, FStOp f) { - $type$[] vec = vec(); + $elemtype$[] vec = vec(); boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits(); for (int i = 0; i < vec.length; i++) { if (mbits[i]) { @@ -427,7 +427,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } interface FStLongOp { - void apply(MemorySegment memory, long offset, int i, $type$ a); + void apply(MemorySegment memory, long offset, int i, $elemtype$ a); } /*package-private*/ @@ -435,7 +435,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { final void stLongOp(MemorySegment memory, long offset, FStLongOp f) { - $type$[] vec = vec(); + $elemtype$[] vec = vec(); for (int i = 0; i < vec.length; i++) { f.apply(memory, offset, i, vec[i]); } @@ -447,7 +447,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { void stLongOp(MemorySegment memory, long offset, VectorMask<$Boxtype$> m, FStLongOp f) { - $type$[] vec = vec(); + $elemtype$[] vec = vec(); boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits(); for (int i = 0; i < vec.length; i++) { if (mbits[i]) { @@ -456,15 +456,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } } - static void memorySegmentSet(MemorySegment ms, long o, int i, $type$ e) { - ms.set(ELEMENT_LAYOUT, o + i * $sizeInBytes$L, e); + static void memorySegmentSet(MemorySegment ms, long o, int i, $elemtype$ e) { + ms.set(ELEMENT_LAYOUT, o + i * $sizeInBytes$L, {#if[FP16]?e.shortValue():e}); } // Binary test /*package-private*/ interface FBinTest { - boolean apply(int cond, int i, $type$ a, $type$ b); + boolean apply(int cond, int i, $elemtype$ a, $elemtype$ b); } /*package-private*/ @@ -473,8 +473,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { AbstractMask<$Boxtype$> bTest(int cond, Vector<$Boxtype$> o, FBinTest f) { - $type$[] vec1 = vec(); - $type$[] vec2 = (($abstractvectortype$)o).vec(); + $elemtype$[] vec1 = vec(); + $elemtype$[] vec2 = (($abstractvectortype$)o).vec(); boolean[] bits = new boolean[length()]; for (int i = 0; i < length(); i++){ bits[i] = f.apply(cond, i, vec1[i], vec2[i]); @@ -485,21 +485,21 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #if[BITWISE] /*package-private*/ @ForceInline - static $type$ rotateLeft($type$ a, int n) { + static $elemtype$ rotateLeft($elemtype$ a, int n) { #if[intOrLong] return $Boxtype$.rotateLeft(a, n); #else[intOrLong] - return ($type$)((((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) << (n & $Boxtype$.SIZE-1)) | (((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) >>> ($Boxtype$.SIZE - (n & $Boxtype$.SIZE-1)))); + return ($elemtype$)((((($elemtype$)a) & $Boxtype$.toUnsignedInt(($elemtype$)-1)) << (n & $Boxtype$.SIZE-1)) | (((($elemtype$)a) & $Boxtype$.toUnsignedInt(($elemtype$)-1)) >>> ($Boxtype$.SIZE - (n & $Boxtype$.SIZE-1)))); #end[intOrLong] } /*package-private*/ @ForceInline - static $type$ rotateRight($type$ a, int n) { + static $elemtype$ rotateRight($elemtype$ a, int n) { #if[intOrLong] return $Boxtype$.rotateRight(a, n); #else[intOrLong] - return ($type$)((((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) >>> (n & $Boxtype$.SIZE-1)) | (((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) << ($Boxtype$.SIZE - (n & $Boxtype$.SIZE-1)))); + return ($elemtype$)((((($elemtype$)a) & $Boxtype$.toUnsignedInt(($elemtype$)-1)) >>> (n & $Boxtype$.SIZE-1)) | (((($elemtype$)a) & $Boxtype$.toUnsignedInt(($elemtype$)-1)) << ($Boxtype$.SIZE - (n & $Boxtype$.SIZE-1)))); #end[intOrLong] } #end[BITWISE] @@ -510,14 +510,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /*package-private*/ @ForceInline - static long toBits($type$ e) { - return {#if[FP]? $Type$.$type$ToRaw$Bitstype$Bits(e): e}; + static long toBits($elemtype$ e) { + return {#if[FP]?$Elemtype$.$fptype$ToRaw$Bitstype$Bits(e): e}; } /*package-private*/ @ForceInline - static $type$ fromBits(long bits) { - return {#if[FP]?$Type$.$bitstype$BitsTo$Type$}(($bitstype$)bits); + static $elemtype$ fromBits(long bits) { + return {#if[FP]?$Elemtype$.$bitstype$BitsTo$Fptype$}(($bitstype$)bits); } static $abstractvectortype$ expandHelper(Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) { @@ -575,11 +575,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { public static $abstractvectortype$ zero(VectorSpecies<$Boxtype$> species) { $Type$Species vsp = ($Type$Species) species; #if[FP] - return VectorSupport.fromBitsCoerced(vsp.vectorType(), $type$.class, species.length(), - toBits(0.0f), MODE_BROADCAST, vsp, + return VectorSupport.fromBitsCoerced(vsp.vectorType(), $elemtype$.class, species.length(), + toBits({#if[FP16]?Float16.valueOf(0.0f):0.0f}), MODE_BROADCAST, vsp, ((bits_, s_) -> s_.rvOp(i -> bits_))); #else[FP] - return VectorSupport.fromBitsCoerced(vsp.vectorType(), $type$.class, species.length(), + return VectorSupport.fromBitsCoerced(vsp.vectorType(), $elemtype$.class, species.length(), 0, MODE_BROADCAST, vsp, ((bits_, s_) -> s_.rvOp(i -> bits_))); #end[FP] @@ -610,7 +610,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @see Vector#broadcast(long) * @see VectorSpecies#broadcast(long) */ - public abstract $abstractvectortype$ broadcast($type$ e); + public abstract $abstractvectortype$ broadcast($elemtype$ e); /** * Returns a vector of the given species @@ -626,14 +626,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @see VectorSpecies#broadcast(long) */ @ForceInline - public static $abstractvectortype$ broadcast(VectorSpecies<$Boxtype$> species, $type$ e) { + public static $abstractvectortype$ broadcast(VectorSpecies<$Boxtype$> species, $elemtype$ e) { $Type$Species vsp = ($Type$Species) species; return vsp.broadcast(e); } /*package-private*/ @ForceInline - final $abstractvectortype$ broadcastTemplate($type$ e) { + final $abstractvectortype$ broadcastTemplate($elemtype$ e) { $Type$Species vsp = vspecies(); return vsp.broadcast(e); } @@ -643,9 +643,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * {@inheritDoc} * @apiNote * When working with vector subtypes like {@code $abstractvectortype$}, - * {@linkplain #broadcast($type$) the more strongly typed method} + * {@linkplain #broadcast($elemtype$) the more strongly typed method} * is typically selected. It can be explicitly selected - * using a cast: {@code v.broadcast(($type$)e)}. + * using a cast: {@code v.broadcast(($elemtype$)e)}. * The two expressions will produce numerically identical results. */ @Override @@ -667,7 +667,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @throws IllegalArgumentException * if the given {@code long} value cannot * be represented by the vector's {@code ETYPE} - * @see #broadcast(VectorSpecies,$type$) + * @see #broadcast(VectorSpecies,$elemtype$) * @see VectorSpecies#checkValue(long) */ @ForceInline @@ -706,7 +706,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } int opc = opCode(op); return VectorSupport.unaryOp( - opc, getClass(), null, $type$.class, length(), + opc, getClass(), null, $elemtype$.class, length(), this, null, UN_IMPL.find(op, opc, $abstractvectortype$::unaryOperations)); } @@ -736,7 +736,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } int opc = opCode(op); return VectorSupport.unaryOp( - opc, getClass(), maskClass, $type$.class, length(), + opc, getClass(), maskClass, $elemtype$.class, length(), this, m, UN_IMPL.find(op, opc, $abstractvectortype$::unaryOperations)); } @@ -748,26 +748,26 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { private static UnaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> unaryOperations(int opc_) { switch (opc_) { case VECTOR_OP_NEG: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) -a); + v0.uOp(m, (i, a) -> ($elemtype$) {#if[FP16]?Float16.valueOf(-a.floatValue()):-a}); case VECTOR_OP_ABS: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.abs(a)); + v0.uOp(m, (i, a) -> ($elemtype$) {#if[FP16]?Float16.abs(a):Math.abs(a)}); #if[!FP] #if[intOrLong] case VECTOR_OP_BIT_COUNT: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) $Boxtype$.bitCount(a)); + v0.uOp(m, (i, a) -> ($elemtype$) $Boxtype$.bitCount(a)); case VECTOR_OP_TZ_COUNT: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) $Boxtype$.numberOfTrailingZeros(a)); + v0.uOp(m, (i, a) -> ($elemtype$) $Boxtype$.numberOfTrailingZeros(a)); case VECTOR_OP_LZ_COUNT: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) $Boxtype$.numberOfLeadingZeros(a)); + v0.uOp(m, (i, a) -> ($elemtype$) $Boxtype$.numberOfLeadingZeros(a)); case VECTOR_OP_REVERSE: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) $Boxtype$.reverse(a)); + v0.uOp(m, (i, a) -> ($elemtype$) $Boxtype$.reverse(a)); #else[intOrLong] case VECTOR_OP_BIT_COUNT: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) bitCount(a)); + v0.uOp(m, (i, a) -> ($elemtype$) bitCount(a)); case VECTOR_OP_TZ_COUNT: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) numberOfTrailingZeros(a)); + v0.uOp(m, (i, a) -> ($elemtype$) numberOfTrailingZeros(a)); case VECTOR_OP_LZ_COUNT: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) numberOfLeadingZeros(a)); + v0.uOp(m, (i, a) -> ($elemtype$) numberOfLeadingZeros(a)); case VECTOR_OP_REVERSE: return (v0, m) -> v0.uOp(m, (i, a) -> reverse(a)); #end[intOrLong] @@ -777,43 +777,78 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { v0.uOp(m, (i, a) -> a); #else[byte] case VECTOR_OP_REVERSE_BYTES: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) $Boxtype$.reverseBytes(a)); + v0.uOp(m, (i, a) -> ($elemtype$) $Boxtype$.reverseBytes(a)); #end[byte] #end[BITWISE] #end[!FP] #if[FP] +#if[!FP16] case VECTOR_OP_SIN: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.sin(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.sin(a)); case VECTOR_OP_COS: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.cos(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.cos(a)); case VECTOR_OP_TAN: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.tan(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.tan(a)); case VECTOR_OP_ASIN: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.asin(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.asin(a)); case VECTOR_OP_ACOS: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.acos(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.acos(a)); case VECTOR_OP_ATAN: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.atan(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.atan(a)); case VECTOR_OP_EXP: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.exp(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.exp(a)); case VECTOR_OP_LOG: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.log(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.log(a)); case VECTOR_OP_LOG10: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.log10(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.log10(a)); case VECTOR_OP_SQRT: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.sqrt(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.sqrt(a)); case VECTOR_OP_CBRT: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.cbrt(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.cbrt(a)); case VECTOR_OP_SINH: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.sinh(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.sinh(a)); case VECTOR_OP_COSH: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.cosh(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.cosh(a)); case VECTOR_OP_TANH: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.tanh(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.tanh(a)); case VECTOR_OP_EXPM1: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.expm1(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.expm1(a)); case VECTOR_OP_LOG1P: return (v0, m) -> - v0.uOp(m, (i, a) -> ($type$) Math.log1p(a)); + v0.uOp(m, (i, a) -> ($elemtype$) Math.log1p(a)); +#else[!FP16] + case VECTOR_OP_SIN: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.sin(a.floatValue()))); + case VECTOR_OP_COS: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.cos(a.floatValue()))); + case VECTOR_OP_TAN: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.tan(a.floatValue()))); + case VECTOR_OP_ASIN: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.asin(a.floatValue()))); + case VECTOR_OP_ACOS: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.acos(a.floatValue()))); + case VECTOR_OP_ATAN: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.atan(a.floatValue()))); + case VECTOR_OP_EXP: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.exp(a.floatValue()))); + case VECTOR_OP_LOG: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.log(a.floatValue()))); + case VECTOR_OP_LOG10: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.log10(a.floatValue()))); + case VECTOR_OP_SQRT: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.sqrt(a.floatValue()))); + case VECTOR_OP_CBRT: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.cbrt(a.floatValue()))); + case VECTOR_OP_SINH: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.sinh(a.floatValue()))); + case VECTOR_OP_COSH: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.cosh(a.floatValue()))); + case VECTOR_OP_TANH: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.tanh(a.floatValue()))); + case VECTOR_OP_EXPM1: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.expm1(a.floatValue()))); + case VECTOR_OP_LOG1P: return (v0, m) -> + v0.uOp(m, (i, a) -> Float16.valueOf(Math.log1p(a.floatValue()))); +#end[!FP16] #end[FP] default: return null; } @@ -823,8 +858,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /** * {@inheritDoc} - * @see #lanewise(VectorOperators.Binary,$type$) - * @see #lanewise(VectorOperators.Binary,$type$,VectorMask) + * @see #lanewise(VectorOperators.Binary,$elemtype$) + * @see #lanewise(VectorOperators.Binary,$elemtype$,VectorMask) */ @Override public abstract @@ -856,7 +891,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { that = that.lanewise(NOT); op = AND; } else if (op == DIV) { - VectorMask<$Boxtype$> eqz = that.eq(($type$) 0); + VectorMask<$Boxtype$> eqz = that.eq(($elemtype$) 0); if (eqz.anyTrue()) { throw that.divZeroException(); } @@ -866,14 +901,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { int opc = opCode(op); return VectorSupport.binaryOp( - opc, getClass(), null, $type$.class, length(), + opc, getClass(), null, $elemtype$.class, length(), this, that, null, BIN_IMPL.find(op, opc, $abstractvectortype$::binaryOperations)); } /** * {@inheritDoc} - * @see #lanewise(VectorOperators.Binary,$type$,VectorMask) + * @see #lanewise(VectorOperators.Binary,$elemtype$,VectorMask) */ @Override public abstract @@ -898,7 +933,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { return this.blend(that, mask.cast(vspecies())); #else[FP] VectorMask<$Boxtype$> mask - = this.compare(EQ, ($type$) 0, m); + = this.compare(EQ, ($elemtype$) 0, m); return this.blend(that, mask); #end[FP] } @@ -915,7 +950,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { that = that.lanewise(NOT); op = AND; } else if (op == DIV) { - VectorMask<$Boxtype$> eqz = that.eq(($type$)0); + VectorMask<$Boxtype$> eqz = that.eq(($elemtype$)0); if (eqz.and(m).anyTrue()) { throw that.divZeroException(); } @@ -927,7 +962,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { int opc = opCode(op); return VectorSupport.binaryOp( - opc, getClass(), maskClass, $type$.class, length(), + opc, getClass(), maskClass, $elemtype$.class, length(), this, that, m, BIN_IMPL.find(op, opc, $abstractvectortype$::binaryOperations)); } @@ -938,31 +973,46 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { private static BinaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> binaryOperations(int opc_) { switch (opc_) { +#if[FP16] + case VECTOR_OP_ADD: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.add(a, b)); + case VECTOR_OP_SUB: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.subtract(a, b)); + case VECTOR_OP_MUL: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.multiply(a, b)); + case VECTOR_OP_DIV: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.divide(a, b)); + case VECTOR_OP_MAX: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.max(a, b)); + case VECTOR_OP_MIN: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.min(a, b)); +#else[FP16] case VECTOR_OP_ADD: return (v0, v1, vm) -> - v0.bOp(v1, vm, (i, a, b) -> ($type$)(a + b)); + v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)(a + b)); case VECTOR_OP_SUB: return (v0, v1, vm) -> - v0.bOp(v1, vm, (i, a, b) -> ($type$)(a - b)); + v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)(a - b)); case VECTOR_OP_MUL: return (v0, v1, vm) -> - v0.bOp(v1, vm, (i, a, b) -> ($type$)(a * b)); + v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)(a * b)); case VECTOR_OP_DIV: return (v0, v1, vm) -> - v0.bOp(v1, vm, (i, a, b) -> ($type$)(a / b)); + v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)(a / b)); case VECTOR_OP_MAX: return (v0, v1, vm) -> - v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.max(a, b)); + v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)Math.max(a, b)); case VECTOR_OP_MIN: return (v0, v1, vm) -> - v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.min(a, b)); + v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)Math.min(a, b)); +#end[FP16] #if[BITWISE] case VECTOR_OP_AND: return (v0, v1, vm) -> - v0.bOp(v1, vm, (i, a, b) -> ($type$)(a & b)); + v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)(a & b)); case VECTOR_OP_OR: return (v0, v1, vm) -> - v0.bOp(v1, vm, (i, a, b) -> ($type$)(a | b)); + v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)(a | b)); case VECTOR_OP_XOR: return (v0, v1, vm) -> - v0.bOp(v1, vm, (i, a, b) -> ($type$)(a ^ b)); + v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)(a ^ b)); case VECTOR_OP_LSHIFT: return (v0, v1, vm) -> - v0.bOp(v1, vm, (i, a, n) -> ($type$)(a << n)); + v0.bOp(v1, vm, (i, a, n) -> ($elemtype$)(a << n)); case VECTOR_OP_RSHIFT: return (v0, v1, vm) -> - v0.bOp(v1, vm, (i, a, n) -> ($type$)(a >> n)); + v0.bOp(v1, vm, (i, a, n) -> ($elemtype$)(a >> n)); case VECTOR_OP_URSHIFT: return (v0, v1, vm) -> - v0.bOp(v1, vm, (i, a, n) -> ($type$)((a & LSHR_SETUP_MASK) >>> n)); + v0.bOp(v1, vm, (i, a, n) -> ($elemtype$)((a & LSHR_SETUP_MASK) >>> n)); case VECTOR_OP_LROTATE: return (v0, v1, vm) -> v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n)); case VECTOR_OP_RROTATE: return (v0, v1, vm) -> @@ -975,14 +1025,23 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #end[intOrLong] #end[BITWISE] #if[FP] +#if[!FP16] case VECTOR_OP_OR: return (v0, v1, vm) -> v0.bOp(v1, vm, (i, a, b) -> fromBits(toBits(a) | toBits(b))); case VECTOR_OP_ATAN2: return (v0, v1, vm) -> - v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.atan2(a, b)); + v0.bOp(v1, vm, (i, a, b) -> ($elemtype$) Math.atan2(a, b)); + case VECTOR_OP_POW: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($elemtype$) Math.pow(a, b)); + case VECTOR_OP_HYPOT: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($elemtype$) Math.hypot(a, b)); +#else[!FP16] + case VECTOR_OP_ATAN2: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Float16.valueOf(Math.atan2(a.floatValue(), b.floatValue()))); case VECTOR_OP_POW: return (v0, v1, vm) -> - v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.pow(a, b)); + v0.bOp(v1, vm, (i, a, b) -> Float16.valueOf(Math.pow(a.floatValue(), b.floatValue()))); case VECTOR_OP_HYPOT: return (v0, v1, vm) -> - v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.hypot(a, b)); + v0.bOp(v1, vm, (i, a, b) -> Float16.valueOf(Math.hypot(a.floatValue(), b.floatValue()))); +#end[!FP16] #end[FP] default: return null; } @@ -1010,18 +1069,18 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @throws UnsupportedOperationException if this vector does * not support the requested operation * @see #lanewise(VectorOperators.Binary,Vector) - * @see #lanewise(VectorOperators.Binary,$type$,VectorMask) + * @see #lanewise(VectorOperators.Binary,$elemtype$,VectorMask) */ @ForceInline public final $abstractvectortype$ lanewise(VectorOperators.Binary op, - $type$ e) { + $elemtype$ e) { #if[BITWISE] - if (opKind(op, VO_SHIFT) && ($type$)(int)e == e) { + if (opKind(op, VO_SHIFT) && ($elemtype$)(int)e == e) { return lanewiseShift(op, (int) e); } if (op == AND_NOT) { - op = AND; e = ($type$) ~e; + op = AND; e = ($elemtype$) ~e; } #end[BITWISE] return lanewise(op, broadcast(e)); @@ -1045,19 +1104,19 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @throws UnsupportedOperationException if this vector does * not support the requested operation * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) - * @see #lanewise(VectorOperators.Binary,$type$) + * @see #lanewise(VectorOperators.Binary,$elemtype$) */ @ForceInline public final $abstractvectortype$ lanewise(VectorOperators.Binary op, - $type$ e, + $elemtype$ e, VectorMask<$Boxtype$> m) { #if[BITWISE] - if (opKind(op, VO_SHIFT) && ($type$)(int)e == e) { + if (opKind(op, VO_SHIFT) && ($elemtype$)(int)e == e) { return lanewiseShift(op, (int) e, m); } if (op == AND_NOT) { - op = AND; e = ($type$) ~e; + op = AND; e = ($elemtype$) ~e; } #end[BITWISE] return lanewise(op, broadcast(e), m); @@ -1068,23 +1127,23 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * {@inheritDoc} * @apiNote * When working with vector subtypes like {@code $abstractvectortype$}, - * {@linkplain #lanewise(VectorOperators.Binary,$type$) + * {@linkplain #lanewise(VectorOperators.Binary,$elemtype$) * the more strongly typed method} * is typically selected. It can be explicitly selected - * using a cast: {@code v.lanewise(op,($type$)e)}. + * using a cast: {@code v.lanewise(op,($elemtype$)e)}. * The two expressions will produce numerically identical results. */ @ForceInline public final $abstractvectortype$ lanewise(VectorOperators.Binary op, long e) { - $type$ e1 = ($type$) e; + $elemtype$ e1 = {#if[FP16]?Float16.valueOf(e):($elemtype$) e}; #if[BITWISE] if ((long)e1 != e // allow shift ops to clip down their int parameters && !(opKind(op, VO_SHIFT) && (int)e1 == e)) { #else[BITWISE] - if ((long)e1 != e) { + if ({#if[FP16]?e1.longValue():(long)e1} != e) { #end[BITWISE] vspecies().checkValue(e); // for exception } @@ -1095,23 +1154,23 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * {@inheritDoc} * @apiNote * When working with vector subtypes like {@code $abstractvectortype$}, - * {@linkplain #lanewise(VectorOperators.Binary,$type$,VectorMask) + * {@linkplain #lanewise(VectorOperators.Binary,$elemtype$,VectorMask) * the more strongly typed method} * is typically selected. It can be explicitly selected - * using a cast: {@code v.lanewise(op,($type$)e,m)}. + * using a cast: {@code v.lanewise(op,($elemtype$)e,m)}. * The two expressions will produce numerically identical results. */ @ForceInline public final $abstractvectortype$ lanewise(VectorOperators.Binary op, long e, VectorMask<$Boxtype$> m) { - $type$ e1 = ($type$) e; + $elemtype$ e1 = {#if[FP16]?Float16.valueOf(e):($elemtype$) e}; #if[BITWISE] if ((long)e1 != e // allow shift ops to clip down their int parameters && !(opKind(op, VO_SHIFT) && (int)e1 == e)) { #else[BITWISE] - if ((long)e1 != e) { + if ({#if[FP16]?e1.longValue():(long)e1} != e) { #end[BITWISE] vspecies().checkValue(e); // for exception } @@ -1134,7 +1193,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { e &= SHIFT_MASK; int opc = opCode(op); return VectorSupport.broadcastInt( - opc, getClass(), null, $type$.class, length(), + opc, getClass(), null, $elemtype$.class, length(), this, e, null, BIN_INT_IMPL.find(op, opc, $abstractvectortype$::broadcastIntOperations)); } @@ -1155,7 +1214,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { e &= SHIFT_MASK; int opc = opCode(op); return VectorSupport.broadcastInt( - opc, getClass(), maskClass, $type$.class, length(), + opc, getClass(), maskClass, $elemtype$.class, length(), this, e, m, BIN_INT_IMPL.find(op, opc, $abstractvectortype$::broadcastIntOperations)); } @@ -1167,11 +1226,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { private static VectorBroadcastIntOp<$abstractvectortype$, VectorMask<$Boxtype$>> broadcastIntOperations(int opc_) { switch (opc_) { case VECTOR_OP_LSHIFT: return (v, n, m) -> - v.uOp(m, (i, a) -> ($type$)(a << n)); + v.uOp(m, (i, a) -> ($elemtype$)(a << n)); case VECTOR_OP_RSHIFT: return (v, n, m) -> - v.uOp(m, (i, a) -> ($type$)(a >> n)); + v.uOp(m, (i, a) -> ($elemtype$)(a >> n)); case VECTOR_OP_URSHIFT: return (v, n, m) -> - v.uOp(m, (i, a) -> ($type$)((a & LSHR_SETUP_MASK) >>> n)); + v.uOp(m, (i, a) -> ($elemtype$)((a & LSHR_SETUP_MASK) >>> n)); case VECTOR_OP_LROTATE: return (v, n, m) -> v.uOp(m, (i, a) -> rotateLeft(a, (int)n)); case VECTOR_OP_RROTATE: return (v, n, m) -> @@ -1190,7 +1249,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { // Also simulate >>> on sub-word variables with a mask. private static final int LSHR_SETUP_MASK = ((1 << $Boxtype$.SIZE) - 1); #else[byteOrShort] - private static final $type$ LSHR_SETUP_MASK = -1; + private static final $elemtype$ LSHR_SETUP_MASK = -1; #end[byteOrShort] #end[BITWISE] @@ -1206,12 +1265,12 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /** * {@inheritDoc} - * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask) - * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask) - * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask) - * @see #lanewise(VectorOperators.Ternary,$type$,$type$) - * @see #lanewise(VectorOperators.Ternary,Vector,$type$) - * @see #lanewise(VectorOperators.Ternary,$type$,Vector) + * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$,VectorMask) + * @see #lanewise(VectorOperators.Ternary,Vector,$elemtype$,VectorMask) + * @see #lanewise(VectorOperators.Ternary,$elemtype$,Vector,VectorMask) + * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$) + * @see #lanewise(VectorOperators.Ternary,Vector,$elemtype$) + * @see #lanewise(VectorOperators.Ternary,$elemtype$,Vector) */ @Override public abstract @@ -1239,16 +1298,16 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #end[BITWISE] int opc = opCode(op); return VectorSupport.ternaryOp( - opc, getClass(), null, $type$.class, length(), + opc, getClass(), null, $elemtype$.class, length(), this, that, tother, null, TERN_IMPL.find(op, opc, $abstractvectortype$::ternaryOperations)); } /** * {@inheritDoc} - * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask) - * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask) - * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask) + * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$,VectorMask) + * @see #lanewise(VectorOperators.Ternary,Vector,$elemtype$,VectorMask) + * @see #lanewise(VectorOperators.Ternary,$elemtype$,Vector,VectorMask) */ @Override public abstract @@ -1281,7 +1340,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #end[BITWISE] int opc = opCode(op); return VectorSupport.ternaryOp( - opc, getClass(), maskClass, $type$.class, length(), + opc, getClass(), maskClass, $elemtype$.class, length(), this, that, tother, m, TERN_IMPL.find(op, opc, $abstractvectortype$::ternaryOperations)); } @@ -1293,8 +1352,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { private static TernaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> ternaryOperations(int opc_) { switch (opc_) { #if[FP] - case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> - v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c)); + case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> v0.tOp(v1_, v2_, m, (i, a, b, c) -> {#if[FP16]?Float16.fma(a, b, c):Math.fma(a, b, c)}); #end[FP] default: return null; } @@ -1317,13 +1375,13 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @throws UnsupportedOperationException if this vector does * not support the requested operation * @see #lanewise(VectorOperators.Ternary,Vector,Vector) - * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask) + * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$,VectorMask) */ @ForceInline public final $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,e2) - $type$ e1, - $type$ e2) { + $elemtype$ e1, + $elemtype$ e2) { return lanewise(op, broadcast(e1), broadcast(e2)); } @@ -1346,13 +1404,13 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @throws UnsupportedOperationException if this vector does * not support the requested operation * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) - * @see #lanewise(VectorOperators.Ternary,$type$,$type$) + * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$) */ @ForceInline public final $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,e2,m) - $type$ e1, - $type$ e2, + $elemtype$ e1, + $elemtype$ e2, VectorMask<$Boxtype$> m) { return lanewise(op, broadcast(e1), broadcast(e2), m); } @@ -1373,14 +1431,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * to the input vectors and the scalar * @throws UnsupportedOperationException if this vector does * not support the requested operation - * @see #lanewise(VectorOperators.Ternary,$type$,$type$) - * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask) + * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$) + * @see #lanewise(VectorOperators.Ternary,Vector,$elemtype$,VectorMask) */ @ForceInline public final $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,v1,e2) Vector<$Boxtype$> v1, - $type$ e2) { + $elemtype$ e2) { return lanewise(op, v1, broadcast(e2)); } @@ -1403,14 +1461,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @throws UnsupportedOperationException if this vector does * not support the requested operation * @see #lanewise(VectorOperators.Ternary,Vector,Vector) - * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask) - * @see #lanewise(VectorOperators.Ternary,Vector,$type$) + * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$,VectorMask) + * @see #lanewise(VectorOperators.Ternary,Vector,$elemtype$) */ @ForceInline public final $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,v1,e2,m) Vector<$Boxtype$> v1, - $type$ e2, + $elemtype$ e2, VectorMask<$Boxtype$> m) { return lanewise(op, v1, broadcast(e2), m); } @@ -1432,12 +1490,12 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @throws UnsupportedOperationException if this vector does * not support the requested operation * @see #lanewise(VectorOperators.Ternary,Vector,Vector) - * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask) + * @see #lanewise(VectorOperators.Ternary,$elemtype$,Vector,VectorMask) */ @ForceInline public final $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,v2) - $type$ e1, + $elemtype$ e1, Vector<$Boxtype$> v2) { return lanewise(op, broadcast(e1), v2); } @@ -1461,12 +1519,12 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @throws UnsupportedOperationException if this vector does * not support the requested operation * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) - * @see #lanewise(VectorOperators.Ternary,$type$,Vector) + * @see #lanewise(VectorOperators.Ternary,$elemtype$,Vector) */ @ForceInline public final $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,v2,m) - $type$ e1, + $elemtype$ e1, Vector<$Boxtype$> v2, VectorMask<$Boxtype$> m) { return lanewise(op, broadcast(e1), v2, m); @@ -1482,7 +1540,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /** * {@inheritDoc} - * @see #add($type$) + * @see #add($elemtype$) */ @Override @ForceInline @@ -1497,28 +1555,28 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * the primitive addition operation ({@code +}) to each lane. * * This method is also equivalent to the expression - * {@link #lanewise(VectorOperators.Binary,$type$) + * {@link #lanewise(VectorOperators.Binary,$elemtype$) * lanewise}{@code (}{@link VectorOperators#ADD * ADD}{@code , e)}. * * @param e the input scalar * @return the result of adding each lane of this vector to the scalar * @see #add(Vector) - * @see #broadcast($type$) - * @see #add($type$,VectorMask) + * @see #broadcast($elemtype$) + * @see #add($elemtype$,VectorMask) * @see VectorOperators#ADD * @see #lanewise(VectorOperators.Binary,Vector) - * @see #lanewise(VectorOperators.Binary,$type$) + * @see #lanewise(VectorOperators.Binary,$elemtype$) */ @ForceInline public final - $abstractvectortype$ add($type$ e) { + $abstractvectortype$ add($elemtype$ e) { return lanewise(ADD, e); } /** * {@inheritDoc} - * @see #add($type$,VectorMask) + * @see #add($elemtype$,VectorMask) */ @Override @ForceInline @@ -1535,7 +1593,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * the primitive addition operation ({@code +}) to each lane. * * This method is also equivalent to the expression - * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask) + * {@link #lanewise(VectorOperators.Binary,$elemtype$,VectorMask) * lanewise}{@code (}{@link VectorOperators#ADD * ADD}{@code , s, m)}. * @@ -1543,21 +1601,21 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @param m the mask controlling lane selection * @return the result of adding each lane of this vector to the scalar * @see #add(Vector,VectorMask) - * @see #broadcast($type$) - * @see #add($type$) + * @see #broadcast($elemtype$) + * @see #add($elemtype$) * @see VectorOperators#ADD * @see #lanewise(VectorOperators.Binary,Vector) - * @see #lanewise(VectorOperators.Binary,$type$) + * @see #lanewise(VectorOperators.Binary,$elemtype$) */ @ForceInline - public final $abstractvectortype$ add($type$ e, + public final $abstractvectortype$ add($elemtype$ e, VectorMask<$Boxtype$> m) { return lanewise(ADD, e, m); } /** * {@inheritDoc} - * @see #sub($type$) + * @see #sub($elemtype$) */ @Override @ForceInline @@ -1572,27 +1630,27 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * the primitive subtraction operation ({@code -}) to each lane. * * This method is also equivalent to the expression - * {@link #lanewise(VectorOperators.Binary,$type$) + * {@link #lanewise(VectorOperators.Binary,$elemtype$) * lanewise}{@code (}{@link VectorOperators#SUB * SUB}{@code , e)}. * * @param e the input scalar * @return the result of subtracting the scalar from each lane of this vector * @see #sub(Vector) - * @see #broadcast($type$) - * @see #sub($type$,VectorMask) + * @see #broadcast($elemtype$) + * @see #sub($elemtype$,VectorMask) * @see VectorOperators#SUB * @see #lanewise(VectorOperators.Binary,Vector) - * @see #lanewise(VectorOperators.Binary,$type$) + * @see #lanewise(VectorOperators.Binary,$elemtype$) */ @ForceInline - public final $abstractvectortype$ sub($type$ e) { + public final $abstractvectortype$ sub($elemtype$ e) { return lanewise(SUB, e); } /** * {@inheritDoc} - * @see #sub($type$,VectorMask) + * @see #sub($elemtype$,VectorMask) */ @Override @ForceInline @@ -1609,7 +1667,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * the primitive subtraction operation ({@code -}) to each lane. * * This method is also equivalent to the expression - * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask) + * {@link #lanewise(VectorOperators.Binary,$elemtype$,VectorMask) * lanewise}{@code (}{@link VectorOperators#SUB * SUB}{@code , s, m)}. * @@ -1617,21 +1675,21 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @param m the mask controlling lane selection * @return the result of subtracting the scalar from each lane of this vector * @see #sub(Vector,VectorMask) - * @see #broadcast($type$) - * @see #sub($type$) + * @see #broadcast($elemtype$) + * @see #sub($elemtype$) * @see VectorOperators#SUB * @see #lanewise(VectorOperators.Binary,Vector) - * @see #lanewise(VectorOperators.Binary,$type$) + * @see #lanewise(VectorOperators.Binary,$elemtype$) */ @ForceInline - public final $abstractvectortype$ sub($type$ e, + public final $abstractvectortype$ sub($elemtype$ e, VectorMask<$Boxtype$> m) { return lanewise(SUB, e, m); } /** * {@inheritDoc} - * @see #mul($type$) + * @see #mul($elemtype$) */ @Override @ForceInline @@ -1646,27 +1704,27 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * the primitive multiplication operation ({@code *}) to each lane. * * This method is also equivalent to the expression - * {@link #lanewise(VectorOperators.Binary,$type$) + * {@link #lanewise(VectorOperators.Binary,$elemtype$) * lanewise}{@code (}{@link VectorOperators#MUL * MUL}{@code , e)}. * * @param e the input scalar * @return the result of multiplying this vector by the given scalar * @see #mul(Vector) - * @see #broadcast($type$) - * @see #mul($type$,VectorMask) + * @see #broadcast($elemtype$) + * @see #mul($elemtype$,VectorMask) * @see VectorOperators#MUL * @see #lanewise(VectorOperators.Binary,Vector) - * @see #lanewise(VectorOperators.Binary,$type$) + * @see #lanewise(VectorOperators.Binary,$elemtype$) */ @ForceInline - public final $abstractvectortype$ mul($type$ e) { + public final $abstractvectortype$ mul($elemtype$ e) { return lanewise(MUL, e); } /** * {@inheritDoc} - * @see #mul($type$,VectorMask) + * @see #mul($elemtype$,VectorMask) */ @Override @ForceInline @@ -1683,7 +1741,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * the primitive multiplication operation ({@code *}) to each lane. * * This method is also equivalent to the expression - * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask) + * {@link #lanewise(VectorOperators.Binary,$elemtype$,VectorMask) * lanewise}{@code (}{@link VectorOperators#MUL * MUL}{@code , s, m)}. * @@ -1691,14 +1749,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @param m the mask controlling lane selection * @return the result of muling each lane of this vector to the scalar * @see #mul(Vector,VectorMask) - * @see #broadcast($type$) - * @see #mul($type$) + * @see #broadcast($elemtype$) + * @see #mul($elemtype$) * @see VectorOperators#MUL * @see #lanewise(VectorOperators.Binary,Vector) - * @see #lanewise(VectorOperators.Binary,$type$) + * @see #lanewise(VectorOperators.Binary,$elemtype$) */ @ForceInline - public final $abstractvectortype$ mul($type$ e, + public final $abstractvectortype$ mul($elemtype$ e, VectorMask<$Boxtype$> m) { return lanewise(MUL, e, m); } @@ -1728,7 +1786,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * the primitive division operation ({@code /}) to each lane. * * This method is also equivalent to the expression - * {@link #lanewise(VectorOperators.Binary,$type$) + * {@link #lanewise(VectorOperators.Binary,$elemtype$) * lanewise}{@code (}{@link VectorOperators#DIV * DIV}{@code , e)}. * @@ -1745,20 +1803,20 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @param e the input scalar * @return the result of dividing each lane of this vector by the scalar * @see #div(Vector) - * @see #broadcast($type$) - * @see #div($type$,VectorMask) + * @see #broadcast($elemtype$) + * @see #div($elemtype$,VectorMask) * @see VectorOperators#DIV * @see #lanewise(VectorOperators.Binary,Vector) - * @see #lanewise(VectorOperators.Binary,$type$) + * @see #lanewise(VectorOperators.Binary,$elemtype$) */ @ForceInline - public final $abstractvectortype$ div($type$ e) { + public final $abstractvectortype$ div($elemtype$ e) { return lanewise(DIV, e); } /** * {@inheritDoc} - * @see #div($type$,VectorMask) + * @see #div($elemtype$,VectorMask) #if[FP] * @apiNote Because the underlying scalar operator is an IEEE * floating point number, division by zero in fact will @@ -1784,7 +1842,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * the primitive division operation ({@code /}) to each lane. * * This method is also equivalent to the expression - * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask) + * {@link #lanewise(VectorOperators.Binary,$elemtype$,VectorMask) * lanewise}{@code (}{@link VectorOperators#DIV * DIV}{@code , s, m)}. * @@ -1802,14 +1860,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @param m the mask controlling lane selection * @return the result of dividing each lane of this vector by the scalar * @see #div(Vector,VectorMask) - * @see #broadcast($type$) - * @see #div($type$) + * @see #broadcast($elemtype$) + * @see #div($elemtype$) * @see VectorOperators#DIV * @see #lanewise(VectorOperators.Binary,Vector) - * @see #lanewise(VectorOperators.Binary,$type$) + * @see #lanewise(VectorOperators.Binary,$elemtype$) */ @ForceInline - public final $abstractvectortype$ div($type$ e, + public final $abstractvectortype$ div($elemtype$ e, VectorMask<$Boxtype$> m) { return lanewise(DIV, e, m); } @@ -1844,16 +1902,16 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * corresponding lane values. * * This method is also equivalent to the expression - * {@link #lanewise(VectorOperators.Binary,$type$) + * {@link #lanewise(VectorOperators.Binary,$elemtype$) * lanewise}{@code (}{@link VectorOperators#MIN * MIN}{@code , e)}. * * @param e the input scalar * @return the result of multiplying this vector by the given scalar * @see #min(Vector) - * @see #broadcast($type$) + * @see #broadcast($elemtype$) * @see VectorOperators#MIN - * @see #lanewise(VectorOperators.Binary,$type$,VectorMask) + * @see #lanewise(VectorOperators.Binary,$elemtype$,VectorMask) #if[FP] * @apiNote * For this method, floating point negative @@ -1862,7 +1920,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #end[FP] */ @ForceInline - public final $abstractvectortype$ min($type$ e) { + public final $abstractvectortype$ min($elemtype$ e) { return lanewise(MIN, e); } @@ -1889,16 +1947,16 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * corresponding lane values. * * This method is also equivalent to the expression - * {@link #lanewise(VectorOperators.Binary,$type$) + * {@link #lanewise(VectorOperators.Binary,$elemtype$) * lanewise}{@code (}{@link VectorOperators#MAX * MAX}{@code , e)}. * * @param e the input scalar * @return the result of multiplying this vector by the given scalar * @see #max(Vector) - * @see #broadcast($type$) + * @see #broadcast($elemtype$) * @see VectorOperators#MAX - * @see #lanewise(VectorOperators.Binary,$type$,VectorMask) + * @see #lanewise(VectorOperators.Binary,$elemtype$,VectorMask) #if[FP] * @apiNote * For this method, floating point negative @@ -1907,7 +1965,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #end[FP] */ @ForceInline - public final $abstractvectortype$ max($type$ e) { + public final $abstractvectortype$ max($elemtype$ e) { return lanewise(MAX, e); } @@ -1935,7 +1993,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * * @param v a second input vector * @return the bitwise {@code &} of this vector and the second input vector - * @see #and($type$) + * @see #and($elemtype$) * @see #or(Vector) * @see #not() * @see VectorOperators#AND @@ -1966,7 +2024,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) */ @ForceInline - public final $abstractvectortype$ and($type$ e) { + public final $abstractvectortype$ and($elemtype$ e) { return lanewise(AND, e); } @@ -1992,7 +2050,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * * @param v a second input vector * @return the bitwise {@code |} of this vector and the second input vector - * @see #or($type$) + * @see #or($elemtype$) * @see #and(Vector) * @see #not() * @see VectorOperators#OR @@ -2023,7 +2081,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) */ @ForceInline - public final $abstractvectortype$ or($type$ e) { + public final $abstractvectortype$ or($elemtype$ e) { return lanewise(OR, e); } @@ -2059,7 +2117,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * * @param b a vector exponent by which to raise this vector * @return the {@code b}-th power of this vector - * @see #pow($type$) + * @see #pow($elemtype$) * @see VectorOperators#POW * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) */ @@ -2091,10 +2149,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @return the {@code b}-th power of this vector * @see #pow(Vector) * @see VectorOperators#POW - * @see #lanewise(VectorOperators.Binary,$type$,VectorMask) + * @see #lanewise(VectorOperators.Binary,$elemtype$,VectorMask) */ @ForceInline - public final $abstractvectortype$ pow($type$ b) { + public final $abstractvectortype$ pow($elemtype$ b) { return lanewise(POW, b); } #end[FP] @@ -2123,7 +2181,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #if[!FP] #if[!intOrLong] - static int bitCount($type$ a) { + static int bitCount($elemtype$ a) { #if[short] return Integer.bitCount((int)a & 0xFFFF); #else[short] @@ -2134,7 +2192,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #end[!FP] #if[!FP] #if[!intOrLong] - static int numberOfTrailingZeros($type$ a) { + static int numberOfTrailingZeros($elemtype$ a) { #if[short] return a != 0 ? Integer.numberOfTrailingZeros(a) : 16; #else[short] @@ -2145,7 +2203,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #end[!FP] #if[!FP] #if[!intOrLong] - static int numberOfLeadingZeros($type$ a) { + static int numberOfLeadingZeros($elemtype$ a) { #if[short] return a >= 0 ? Integer.numberOfLeadingZeros(a) - 16 : 0; #else[short] @@ -2153,18 +2211,18 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #end[short] } - static $type$ reverse($type$ a) { + static $elemtype$ reverse($elemtype$ a) { if (a == 0 || a == -1) return a; #if[short] - $type$ b = rotateLeft(a, 8); - b = ($type$) (((b & 0x5555) << 1) | ((b & 0xAAAA) >>> 1)); - b = ($type$) (((b & 0x3333) << 2) | ((b & 0xCCCC) >>> 2)); - b = ($type$) (((b & 0x0F0F) << 4) | ((b & 0xF0F0) >>> 4)); + $elemtype$ b = rotateLeft(a, 8); + b = ($elemtype$) (((b & 0x5555) << 1) | ((b & 0xAAAA) >>> 1)); + b = ($elemtype$) (((b & 0x3333) << 2) | ((b & 0xCCCC) >>> 2)); + b = ($elemtype$) (((b & 0x0F0F) << 4) | ((b & 0xF0F0) >>> 4)); #else[short] - $type$ b = rotateLeft(a, 4); - b = ($type$) (((b & 0x55) << 1) | ((b & 0xAA) >>> 1)); - b = ($type$) (((b & 0x33) << 2) | ((b & 0xCC) >>> 2)); + $elemtype$ b = rotateLeft(a, 4); + b = ($elemtype$) (((b & 0x55) << 1) | ((b & 0xAA) >>> 1)); + b = ($elemtype$) (((b & 0x33) << 2) | ((b & 0xCC) >>> 2)); #end[short] return b; } @@ -2257,11 +2315,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @param e the input scalar * @return the result mask of testing if this vector * is equal to {@code e} - * @see #compare(VectorOperators.Comparison,$type$) + * @see #compare(VectorOperators.Comparison,$elemtype$) */ @ForceInline public final - VectorMask<$Boxtype$> eq($type$ e) { + VectorMask<$Boxtype$> eq($elemtype$ e) { return compare(EQ, e); } @@ -2285,11 +2343,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @param e the input scalar * @return the mask result of testing if this vector * is less than the input scalar - * @see #compare(VectorOperators.Comparison,$type$) + * @see #compare(VectorOperators.Comparison,$elemtype$) */ @ForceInline public final - VectorMask<$Boxtype$> lt($type$ e) { + VectorMask<$Boxtype$> lt($elemtype$ e) { return compare(LT, e); } @@ -2413,7 +2471,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { that.check(this); int opc = opCode(op); return VectorSupport.compare( - opc, getClass(), maskType, $type$.class, length(), + opc, getClass(), maskType, $elemtype$.class, length(), this, that, null, (cond, v0, v1, m1) -> { AbstractMask<$Boxtype$> m @@ -2435,7 +2493,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { m.check(maskType, this); int opc = opCode(op); return VectorSupport.compare( - opc, getClass(), maskType, $type$.class, length(), + opc, getClass(), maskType, $elemtype$.class, length(), this, that, m, (cond, v0, v1, m1) -> { AbstractMask<$Boxtype$> cmpM @@ -2448,14 +2506,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } @ForceInline - private static boolean compareWithOp(int cond, $type$ a, $type$ b) { + private static boolean compareWithOp(int cond, $elemtype$ a, $elemtype$ b) { return switch (cond) { case BT_eq -> a == b; case BT_ne -> a != b; - case BT_lt -> a < b; - case BT_le -> a <= b; - case BT_gt -> a > b; - case BT_ge -> a >= b; + case BT_lt -> {#if[FP16]?a.floatValue() < b.floatValue():a < b}; + case BT_le -> {#if[FP16]?a.floatValue() <= b.floatValue():a <= b}; + case BT_gt -> {#if[FP16]?a.floatValue() > b.floatValue():a > b}; + case BT_ge -> {#if[FP16]?a.floatValue() >= b.floatValue():a >= b}; #if[!FP] case BT_ult -> $Boxtype$.compareUnsigned(a, b) < 0; case BT_ule -> $Boxtype$.compareUnsigned(a, b) <= 0; @@ -2486,17 +2544,17 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * compares to the input, according to the selected * comparison operator * @see $abstractvectortype$#compare(VectorOperators.Comparison,Vector) - * @see #eq($type$) - * @see #lt($type$) + * @see #eq($elemtype$) + * @see #lt($elemtype$) */ public abstract - VectorMask<$Boxtype$> compare(Comparison op, $type$ e); + VectorMask<$Boxtype$> compare(Comparison op, $elemtype$ e); /*package-private*/ @ForceInline final > - M compareTemplate(Class maskType, Comparison op, $type$ e) { + M compareTemplate(Class maskType, Comparison op, $elemtype$ e) { return compareTemplate(maskType, op, broadcast(e)); } @@ -2522,7 +2580,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { */ @ForceInline public final VectorMask<$Boxtype$> compare(VectorOperators.Comparison op, - $type$ e, + $elemtype$ e, VectorMask<$Boxtype$> m) { return compare(op, broadcast(e), m); } @@ -2570,7 +2628,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { blendTemplate(Class maskType, $abstractvectortype$ v, M m) { v.check(this); return VectorSupport.blend( - getClass(), maskType, $type$.class, length(), + getClass(), maskType, $elemtype$.class, length(), this, v, m, (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b)); } @@ -2587,7 +2645,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { // make sure VLENGTH*scale doesn't overflow: vsp.checkScale(scale); return VectorSupport.indexVector( - getClass(), $type$.class, length(), + getClass(), $elemtype$.class, length(), this, scale, vsp, (v, scale_, s) -> { @@ -2595,8 +2653,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { // instruction directly, load IOTA from memory // and multiply. $abstractvectortype$ iota = s.iota(); - $type$ sc = ($type$) scale_; - return v.add(sc == 1 ? iota : iota.mul(sc)); + return v.add(scale_ == 1 ? iota : iota.mul({#if[FP16]?Float16.valueOf(scale_):($elemtype$)scale_})); }); } @@ -2617,7 +2674,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * the scalar value */ @ForceInline - public final $abstractvectortype$ blend($type$ e, + public final $abstractvectortype$ blend($elemtype$ e, VectorMask<$Boxtype$> m) { return blend(broadcast(e), m); } @@ -2661,7 +2718,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { that.check(this); Objects.checkIndex(origin, length() + 1); VectorShuffle<$Boxtype$> iota = iotaShuffle(); - VectorMask<$Boxtype$> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast(($type$)(length() - origin)))); + $elemtype$ pivotidx = {#if[FP16]?Float16.valueOf(length() - origin):($elemtype$)(length() - origin)}; + VectorMask<$Boxtype$> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); iota = iotaShuffle(origin, 1, true); return that.rearrange(iota).blend(this.rearrange(iota), blendMask); } @@ -2691,7 +2749,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $abstractvectortype$ sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); VectorShuffle<$Boxtype$> iota = iotaShuffle(); - VectorMask<$Boxtype$> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast(($type$)(length() - origin)))); + $elemtype$ pivotidx = {#if[FP16]?Float16.valueOf(length() - origin):($elemtype$)(length() - origin)}; + VectorMask<$Boxtype$> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx)); iota = iotaShuffle(origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2713,7 +2772,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { Objects.checkIndex(origin, length() + 1); VectorShuffle<$Boxtype$> iota = iotaShuffle(); VectorMask<$Boxtype$> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, - (broadcast(($type$)(origin)))); + (broadcast({#if[FP16]?Float16.valueOf(origin):($elemtype$)(origin)}))); iota = iotaShuffle(-origin, 1, true); return that.blend(this.rearrange(iota), blendMask); } @@ -2753,7 +2812,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { Objects.checkIndex(origin, length() + 1); VectorShuffle<$Boxtype$> iota = iotaShuffle(); VectorMask<$Boxtype$> blendMask = iota.toVector().compare(VectorOperators.GE, - (broadcast(($type$)(origin)))); + broadcast({#if[FP16]?Float16.valueOf(origin):($elemtype$)(origin)})); iota = iotaShuffle(-origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2779,7 +2838,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $abstractvectortype$ rearrangeTemplate(Class shuffletype, S shuffle) { shuffle.checkIndexes(); return VectorSupport.rearrangeOp( - getClass(), shuffletype, null, $type$.class, length(), + getClass(), shuffletype, null, $elemtype$.class, length(), this, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); @@ -2811,11 +2870,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { throw new AssertionError(); } return VectorSupport.rearrangeOp( - getClass(), shuffletype, masktype, $type$.class, length(), + getClass(), shuffletype, masktype, $elemtype$.class, length(), this, shuffle, m, (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); - return ei < 0 || !m_.laneIsSet(i) ? 0 : v1.lane(ei); + return ei < 0 || !m_.laneIsSet(i) ? {#if[FP16]?Float16.valueOf(0):0} : v1.lane(ei); })); } @@ -2839,7 +2898,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { S ws = (S) shuffle.wrapIndexes(); $abstractvectortype$ r0 = VectorSupport.rearrangeOp( - getClass(), shuffletype, null, $type$.class, length(), + getClass(), shuffletype, null, $elemtype$.class, length(), this, ws, null, (v0, s_, m_) -> v0.uOp((i, a) -> { int ei = s_.laneSource(i); @@ -2847,7 +2906,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { })); $abstractvectortype$ r1 = VectorSupport.rearrangeOp( - getClass(), shuffletype, null, $type$.class, length(), + getClass(), shuffletype, null, $elemtype$.class, length(), v, ws, null, (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); @@ -2859,10 +2918,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline private final VectorShuffle<$Boxtype$> toShuffle0($Type$Species dsp) { - $type$[] a = toArray(); + $elemtype$[] a = toArray(); int[] sa = new int[a.length]; for (int i = 0; i < a.length; i++) { - sa[i] = (int) a[i]; + sa[i] = {#if[FP16]?a[i].intValue():(int) a[i]}; } return VectorShuffle.fromArray(dsp, sa, 0); } @@ -2873,7 +2932,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { VectorShuffle<$Boxtype$> toShuffleTemplate(Class shuffleType) { $Type$Species vsp = vspecies(); return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, - getClass(), $type$.class, length(), + getClass(), $elemtype$.class, length(), shuffleType, byte.class, length(), this, vsp, $Type$Vector::toShuffle0); @@ -2894,7 +2953,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $Type$Vector compressTemplate(Class masktype, M m) { m.check(masktype, this); return ($Type$Vector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype, - $type$.class, length(), this, m, + $elemtype$.class, length(), this, m, (v1, m1) -> compressHelper(v1, m1)); } @@ -2913,7 +2972,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $Type$Vector expandTemplate(Class masktype, M m) { m.check(masktype, this); return ($Type$Vector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype, - $type$.class, length(), this, m, + $elemtype$.class, length(), this, m, (v1, m1) -> expandHelper(v1, m1)); } @@ -2965,9 +3024,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @param mask a bitwise mask to enable blending of the input bits * @return the bitwise blend of the given bits into the current vector, * under control of the bitwise mask - * @see #bitwiseBlend($type$,$type$) - * @see #bitwiseBlend($type$,Vector) - * @see #bitwiseBlend(Vector,$type$) + * @see #bitwiseBlend($elemtype$,$elemtype$) + * @see #bitwiseBlend($elemtype$,Vector) + * @see #bitwiseBlend(Vector,$elemtype$) * @see VectorOperators#BITWISE_BLEND * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) */ @@ -2996,11 +3055,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * under control of the bitwise mask * @see #bitwiseBlend(Vector,Vector) * @see VectorOperators#BITWISE_BLEND - * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask) + * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$,VectorMask) */ @ForceInline public final - $abstractvectortype$ bitwiseBlend($type$ bits, $type$ mask) { + $abstractvectortype$ bitwiseBlend($elemtype$ bits, $elemtype$ mask) { return lanewise(BITWISE_BLEND, bits, mask); } @@ -3023,11 +3082,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * under control of the bitwise mask * @see #bitwiseBlend(Vector,Vector) * @see VectorOperators#BITWISE_BLEND - * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask) + * @see #lanewise(VectorOperators.Ternary,$elemtype$,Vector,VectorMask) */ @ForceInline public final - $abstractvectortype$ bitwiseBlend($type$ bits, Vector<$Boxtype$> mask) { + $abstractvectortype$ bitwiseBlend($elemtype$ bits, Vector<$Boxtype$> mask) { return lanewise(BITWISE_BLEND, bits, mask); } @@ -3050,11 +3109,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * under control of the bitwise mask * @see #bitwiseBlend(Vector,Vector) * @see VectorOperators#BITWISE_BLEND - * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask) + * @see #lanewise(VectorOperators.Ternary,Vector,$elemtype$,VectorMask) */ @ForceInline public final - $abstractvectortype$ bitwiseBlend(Vector<$Boxtype$> bits, $type$ mask) { + $abstractvectortype$ bitwiseBlend(Vector<$Boxtype$> bits, $elemtype$ mask) { return lanewise(BITWISE_BLEND, bits, mask); } #end[BITWISE] @@ -3072,7 +3131,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * * This is a lane-wise ternary operation which applies an operation * conforming to the specification of - * {@link Math#fma($type$,$type$,$type$) Math.fma(a,b,c)} + * {@link {#if[FP16]?Float16:Math}#fma($elemtype$,$elemtype$,$elemtype$) {#if[FP16]?Float16:Math}.fma(a,b,c)} * to each lane. #if[intOrFloat] * The operation is adapted to cast the operands and the result, @@ -3091,7 +3150,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @return the product of this vector and the second input vector * summed with the third input vector, using extended precision * for the intermediate result - * @see #fma($type$,$type$) + * @see #fma($elemtype$,$elemtype$) * @see VectorOperators#FMA * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) */ @@ -3113,7 +3172,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * * This is a lane-wise ternary operation which applies an operation * conforming to the specification of - * {@link Math#fma($type$,$type$,$type$) Math.fma(a,b,c)} + * {@link {#if[FP16]?Float16:Math}#fma($elemtype$,$elemtype$,$elemtype$) {#if[FP16]?Float16:Math}.fma(a,b,c)} * to each lane. #if[intOrFloat] * The operation is adapted to cast the operands and the result, @@ -3134,15 +3193,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * for the intermediate result * @see #fma(Vector,Vector) * @see VectorOperators#FMA - * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask) + * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$,VectorMask) */ @ForceInline public final - $abstractvectortype$ fma($type$ b, $type$ c) { + $abstractvectortype$ fma($elemtype$ b, $elemtype$ c) { return lanewise(FMA, b, c); } - // Don't bother with (Vector,$type$) and ($type$,Vector) overloadings. + // Don't bother with (Vector,$elemtype$) and ($elemtype$,Vector) overloadings. #end[FP] // Type specific horizontal reductions @@ -3195,7 +3254,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #end[BITWISE] * @see VectorOperators#FIRST_NONZERO */ - public abstract $type$ reduceLanes(VectorOperators.Associative op); + public abstract $elemtype$ reduceLanes(VectorOperators.Associative op); /** * Returns a value accumulated from selected lanes of this vector, @@ -3215,7 +3274,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * {@code ADD} #end[BITWISE] * or {@code FIRST_NONZERO}, - * then the identity value is {#if[FP]?positive }zero, the default {@code $type$} value. + * then the identity value is {#if[FP]?positive }zero, the default {@code $elemtype$} value. *

  • * If the operation is {@code MUL}, * then the identity value is one. @@ -3273,24 +3332,24 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * not support the requested operation * @see #reduceLanes(VectorOperators.Associative) */ - public abstract $type$ reduceLanes(VectorOperators.Associative op, + public abstract $elemtype$ reduceLanes(VectorOperators.Associative op, VectorMask<$Boxtype$> m); /*package-private*/ @ForceInline final - $type$ reduceLanesTemplate(VectorOperators.Associative op, + $elemtype$ reduceLanesTemplate(VectorOperators.Associative op, Class> maskClass, VectorMask<$Boxtype$> m) { m.check(maskClass, this); if (op == FIRST_NONZERO) { // FIXME: The JIT should handle this. - $abstractvectortype$ v = broadcast(($type$) 0).blend(this, m); + $abstractvectortype$ v = broadcast({#if[FP16]?Float16.valueOf(0):($elemtype$) 0}).blend(this, m); return v.reduceLanesTemplate(op); } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), maskClass, $type$.class, length(), + opc, getClass(), maskClass, $elemtype$.class, length(), this, m, REDUCE_IMPL.find(op, opc, $abstractvectortype$::reductionOperations))); } @@ -3298,17 +3357,17 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /*package-private*/ @ForceInline final - $type$ reduceLanesTemplate(VectorOperators.Associative op) { + $elemtype$ reduceLanesTemplate(VectorOperators.Associative op) { if (op == FIRST_NONZERO) { // FIXME: The JIT should handle this. VectorMask<$Boxbitstype$> thisNZ = this.viewAsIntegralLanes().compare(NE, ($bitstype$) 0); int ft = thisNZ.firstTrue(); - return ft < length() ? this.lane(ft) : ($type$) 0; + return ft < length() ? this.lane(ft) : {#if[FP16]?Float16.valueOf(0):($elemtype$) 0}; } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), null, $type$.class, length(), + opc, getClass(), null, $elemtype$.class, length(), this, null, REDUCE_IMPL.find(op, opc, $abstractvectortype$::reductionOperations))); } @@ -3319,32 +3378,43 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { private static ReductionOperation<$abstractvectortype$, VectorMask<$Boxtype$>> reductionOperations(int opc_) { switch (opc_) { +#if[FP16] case VECTOR_OP_ADD: return (v, m) -> - toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a + b))); + toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.add(a, b))); case VECTOR_OP_MUL: return (v, m) -> - toBits(v.rOp(($type$)1, m, (i, a, b) -> ($type$)(a * b))); + toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.multiply(a, b))); case VECTOR_OP_MIN: return (v, m) -> - toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($type$) Math.min(a, b))); + toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.min(a, b))); case VECTOR_OP_MAX: return (v, m) -> - toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($type$) Math.max(a, b))); + toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.max(a, b))); +#else[FP16] + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp(($elemtype$)0, m, (i, a, b) -> ($elemtype$)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp(($elemtype$)1, m, (i, a, b) -> ($elemtype$)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($elemtype$) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($elemtype$) Math.max(a, b))); +#end[FP16] #if[BITWISE] case VECTOR_OP_AND: return (v, m) -> - toBits(v.rOp(($type$)-1, m, (i, a, b) -> ($type$)(a & b))); + toBits(v.rOp(($elemtype$)-1, m, (i, a, b) -> ($elemtype$)(a & b))); case VECTOR_OP_OR: return (v, m) -> - toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a | b))); + toBits(v.rOp(($elemtype$)0, m, (i, a, b) -> ($elemtype$)(a | b))); case VECTOR_OP_XOR: return (v, m) -> - toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a ^ b))); + toBits(v.rOp(($elemtype$)0, m, (i, a, b) -> ($elemtype$)(a ^ b))); #end[BITWISE] default: return null; } } #if[FP] - private static final $type$ MIN_OR_INF = $Boxtype$.NEGATIVE_INFINITY; - private static final $type$ MAX_OR_INF = $Boxtype$.POSITIVE_INFINITY; + private static final $elemtype$ MIN_OR_INF = $Boxtype$.NEGATIVE_INFINITY; + private static final $elemtype$ MAX_OR_INF = $Boxtype$.POSITIVE_INFINITY; #else[FP] - private static final $type$ MIN_OR_INF = $Boxtype$.MIN_VALUE; - private static final $type$ MAX_OR_INF = $Boxtype$.MAX_VALUE; + private static final $elemtype$ MIN_OR_INF = $Boxtype$.MIN_VALUE; + private static final $elemtype$ MAX_OR_INF = $Boxtype$.MAX_VALUE; #end[FP] public @Override abstract long reduceLanesToLong(VectorOperators.Associative op); @@ -3361,7 +3431,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @throws IllegalArgumentException if the index is out of range * ({@code < 0 || >= length()}) */ - public abstract $type$ lane(int i); + public abstract $elemtype$ lane(int i); /** * Replaces the lane element of this vector at lane index {@code i} with @@ -3379,22 +3449,22 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * @throws IllegalArgumentException if the index is out of range * ({@code < 0 || >= length()}) */ - public abstract $abstractvectortype$ withLane(int i, $type$ e); + public abstract $abstractvectortype$ withLane(int i, $elemtype$ e); // Memory load operations /** - * Returns an array of type {@code $type$[]} + * Returns an array of type {@code $elemtype$[]} * containing all the lane values. * The array length is the same as the vector length. * The array elements are stored in lane order. *

    * This method behaves as if it stores * this vector into an allocated array - * (using {@link #intoArray($type$[], int) intoArray}) + * (using {@link #intoArray($elemtype$[], int) intoArray}) * and returns the array as follows: *

    {@code
    -     *   $type$[] a = new $type$[this.length()];
    +     *   $elemtype$[] a = new $elemtype$[this.length()];
          *   this.intoArray(a, 0);
          *   return a;
          * }
    @@ -3403,8 +3473,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { */ @ForceInline @Override - public final $type$[] toArray() { - $type$[] a = new $type$[vspecies().laneCount()]; + public final $elemtype$[] toArray() { + $elemtype$[] a = new $elemtype$[vspecies().laneCount()]; intoArray(a, 0); return a; } @@ -3438,10 +3508,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline @Override public final int[] toIntArray() { - $type$[] a = toArray(); + $elemtype$[] a = toArray(); int[] res = new int[a.length]; for (int i = 0; i < a.length; i++) { - $type$ e = a[i]; + $elemtype$ e = a[i]; res[i] = (int) $Type$Species.toIntegralChecked(e, true); } return res; @@ -3475,11 +3545,17 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline @Override public final long[] toLongArray() { - $type$[] a = toArray(); + $elemtype$[] a = toArray(); long[] res = new long[a.length]; for (int i = 0; i < a.length; i++) { - $type$ e = a[i]; +#if[FP16] + // Value range of integral casted Float16 value is a proper subset of + // long value range. + res[i] = a[i].longValue(); +#else[FP16] + $elemtype$ e = a[i]; res[i] = $Type$Species.toIntegralChecked(e, false); +#end[FP16] } return res; } @@ -3516,17 +3592,17 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline @Override public final double[] toDoubleArray() { - $type$[] a = toArray(); + $elemtype$[] a = toArray(); double[] res = new double[a.length]; for (int i = 0; i < a.length; i++) { - res[i] = (double) a[i]; + res[i] = {#if[FP16]?a[i].doubleValue():((double) a[i])}; } return res; } #end[double] /** - * Loads a vector from an array of type {@code $type$[]} + * Loads a vector from an array of type {@code $elemtype$[]} * starting at an offset. * For each vector lane, where {@code N} is the vector lane index, the * array element at index {@code offset + N} is placed into the @@ -3543,17 +3619,17 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline public static $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species, - $type$[] a, int offset) { + $elemtype$[] a, int offset) { offset = checkFromIndexSize(offset, species.length(), a.length); $Type$Species vsp = ($Type$Species) species; return vsp.dummyVector().fromArray0(a, offset); } /** - * Loads a vector from an array of type {@code $type$[]} + * Loads a vector from an array of type {@code $elemtype$[]} * starting at an offset and using a mask. * Lanes where the mask is unset are filled with the default - * value of {@code $type$} ({#if[FP]?positive }zero). + * value of {@code $elemtype$} ({#if[FP]?positive }zero). * For each vector lane, where {@code N} is the vector lane index, * if the mask lane at index {@code N} is set then the array element at * index {@code offset + N} is placed into the resulting vector at lane index @@ -3573,7 +3649,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline public static $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species, - $type$[] a, int offset, + $elemtype$[] a, int offset, VectorMask<$Boxtype$> m) { $Type$Species vsp = ($Type$Species) species; if (VectorIntrinsics.indexInRange(offset, vsp.length(), a.length)) { @@ -3586,7 +3662,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /** * Gathers a new vector composed of elements from an array of type - * {@code $type$[]}, + * {@code $elemtype$[]}, * using indexes obtained by adding a fixed {@code offset} to a * series of secondary offsets from an index map. * The index map is a contiguous sequence of {@code VLENGTH} @@ -3619,7 +3695,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline public static $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species, - $type$[] a, int offset, + $elemtype$[] a, int offset, int[] indexMap, int mapOffset) { $Type$Species vsp = ($Type$Species) species; return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]); @@ -3628,7 +3704,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline public static $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species, - $type$[] a, int offset, + $elemtype$[] a, int offset, int[] indexMap, int mapOffset) { $Type$Species vsp = ($Type$Species) species; IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); @@ -3669,7 +3745,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { vix = VectorIntrinsics.checkIndex(vix, a.length); return VectorSupport.loadWithMap( - vectorType, null, $type$.class, vsp.laneCount(), + vectorType, null, $elemtype$.class, vsp.laneCount(), isp.vectorType(), a, ARRAY_BASE, vix, null, a, offset, indexMap, mapOffset, vsp, @@ -3680,7 +3756,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /** * Gathers a new vector composed of elements from an array of type - * {@code $type$[]}, + * {@code $elemtype$[]}, * under the control of a mask, and * using indexes obtained by adding a fixed {@code offset} to a * series of secondary offsets from an index map. @@ -3718,7 +3794,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline public static $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species, - $type$[] a, int offset, + $elemtype$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) { $Type$Species vsp = ($Type$Species) species; @@ -3728,7 +3804,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline public static $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species, - $type$[] a, int offset, + $elemtype$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) { if (m.allTrue()) { @@ -3771,7 +3847,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * Loads a vector from an array of type {@code char[]} * starting at an offset and using a mask. * Lanes where the mask is unset are filled with the default - * value of {@code $type$} ({#if[FP]?positive }zero). + * value of {@code $elemtype$} ({#if[FP]?positive }zero). * For each vector lane, where {@code N} is the vector lane index, * if the mask lane at index {@code N} is set then the array element at * index {@code offset + N} @@ -3926,7 +4002,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * Loads a vector from an array of type {@code boolean[]} * starting at an offset and using a mask. * Lanes where the mask is unset are filled with the default - * value of {@code $type$} ({#if[FP]?positive }zero). + * value of {@code $elemtype$} ({#if[FP]?positive }zero). * For each vector lane, where {@code N} is the vector lane index, * if the mask lane at index {@code N} is set then the array element at * index {@code offset + N} @@ -4096,7 +4172,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * starting at an offset into the memory segment * and using a mask. * Lanes where the mask is unset are filled with the default - * value of {@code $type$} ({#if[FP]?positive }zero). + * value of {@code $elemtype$} ({#if[FP]?positive }zero). * Bytes are composed into primitive lane elements according * to the specified byte order. * The vector is arranged into lanes according to @@ -4105,7 +4181,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * The following pseudocode illustrates the behavior: *
    {@code
          * var slice = ms.asSlice(offset);
    -     * $type$[] ar = new $type$[species.length()];
    +     * $elemtype$[] ar = new $elemtype$[species.length()];
          * for (int n = 0; n < ar.length; n++) {
          *     if (m.laneIsSet(n)) {
          *         ar[n] = slice.getAtIndex(ValuaLayout.JAVA_$TYPE$.withByteAlignment(1), n);
    @@ -4158,14 +4234,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         // Memory store operations
     
         /**
    -     * Stores this vector into an array of type {@code $type$[]}
    +     * Stores this vector into an array of type {@code $elemtype$[]}
          * starting at an offset.
          * 

    * For each vector lane, where {@code N} is the vector lane index, * the lane element at index {@code N} is stored into the array * element {@code a[offset+N]}. * - * @param a the array, of type {@code $type$[]} + * @param a the array, of type {@code $elemtype$[]} * @param offset the offset into the array * @throws IndexOutOfBoundsException * if {@code offset+N < 0} or {@code offset+N >= a.length} @@ -4173,7 +4249,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { */ @ForceInline public final - void intoArray($type$[] a, int offset) { + void intoArray($elemtype$[] a, int offset) { offset = checkFromIndexSize(offset, length(), a.length); $Type$Species vsp = vspecies(); VectorSupport.store( @@ -4187,7 +4263,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } /** - * Stores this vector into an array of type {@code $type$[]} + * Stores this vector into an array of type {@code $elemtype$[]} * starting at offset and using a mask. *

    * For each vector lane, where {@code N} is the vector lane index, @@ -4202,7 +4278,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * That is, unset lanes may correspond to array indexes less than * zero or beyond the end of the array. * - * @param a the array, of type {@code $type$[]} + * @param a the array, of type {@code $elemtype$[]} * @param offset the offset into the array * @param m the mask controlling lane storage * @throws IndexOutOfBoundsException @@ -4212,7 +4288,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { */ @ForceInline public final - void intoArray($type$[] a, int offset, + void intoArray($elemtype$[] a, int offset, VectorMask<$Boxtype$> m) { if (m.allTrue()) { intoArray(a, offset); @@ -4226,7 +4302,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } /** - * Scatters this vector into an array of type {@code $type$[]} + * Scatters this vector into an array of type {@code $elemtype$[]} * using indexes obtained by adding a fixed {@code offset} to a * series of secondary offsets from an index map. * The index map is a contiguous sequence of {@code VLENGTH} @@ -4254,7 +4330,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #if[byteOrShort] @ForceInline public final - void intoArray($type$[] a, int offset, + void intoArray($elemtype$[] a, int offset, int[] indexMap, int mapOffset) { stOp(a, offset, (arr, off, i, e) -> { @@ -4265,7 +4341,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #else[byteOrShort] @ForceInline public final - void intoArray($type$[] a, int offset, + void intoArray($elemtype$[] a, int offset, int[] indexMap, int mapOffset) { $Type$Species vsp = vspecies(); IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); @@ -4318,7 +4394,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #end[byteOrShort] /** - * Scatters this vector into an array of type {@code $type$[]}, + * Scatters this vector into an array of type {@code $elemtype$[]}, * under the control of a mask, and * using indexes obtained by adding a fixed {@code offset} to a * series of secondary offsets from an index map. @@ -4350,7 +4426,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #if[byteOrShort] @ForceInline public final - void intoArray($type$[] a, int offset, + void intoArray($elemtype$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) { stOp(a, offset, m, @@ -4362,7 +4438,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #else[byteOrShort] @ForceInline public final - void intoArray($type$[] a, int offset, + void intoArray($elemtype$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) { if (m.allTrue()) { @@ -4763,10 +4839,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /*package-private*/ abstract - $abstractvectortype$ fromArray0($type$[] a, int offset); + $abstractvectortype$ fromArray0($elemtype$[] a, int offset); @ForceInline final - $abstractvectortype$ fromArray0Template($type$[] a, int offset) { + $abstractvectortype$ fromArray0Template($elemtype$[] a, int offset) { $Type$Species vsp = vspecies(); return VectorSupport.load( vsp.vectorType(), vsp.elementType(), vsp.laneCount(), @@ -4778,11 +4854,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /*package-private*/ abstract - $abstractvectortype$ fromArray0($type$[] a, int offset, VectorMask<$Boxtype$> m, int offsetInRange); + $abstractvectortype$ fromArray0($elemtype$[] a, int offset, VectorMask<$Boxtype$> m, int offsetInRange); @ForceInline final > - $abstractvectortype$ fromArray0Template(Class maskClass, $type$[] a, int offset, M m, int offsetInRange) { + $abstractvectortype$ fromArray0Template(Class maskClass, $elemtype$[] a, int offset, M m, int offsetInRange) { m.check(species()); $Type$Species vsp = vspecies(); return VectorSupport.loadMasked( @@ -4796,13 +4872,13 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #if[!byteOrShort] /*package-private*/ abstract - $abstractvectortype$ fromArray0($type$[] a, int offset, + $abstractvectortype$ fromArray0($elemtype$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m); @ForceInline final > - $abstractvectortype$ fromArray0Template(Class maskClass, $type$[] a, int offset, + $abstractvectortype$ fromArray0Template(Class maskClass, $elemtype$[] a, int offset, int[] indexMap, int mapOffset, M m) { $Type$Species vsp = vspecies(); IntVector.IntSpecies isp = IntVector.species(vsp.indexShape()); @@ -4845,7 +4921,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { vix = VectorIntrinsics.checkIndex(vix, a.length); return VectorSupport.loadWithMap( - vectorType, maskClass, $type$.class, vsp.laneCount(), + vectorType, maskClass, $elemtype$.class, vsp.laneCount(), isp.vectorType(), a, ARRAY_BASE, vix, m, a, offset, indexMap, mapOffset, vsp, @@ -4957,10 +5033,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { // byte swapping. abstract - void intoArray0($type$[] a, int offset); + void intoArray0($elemtype$[] a, int offset); @ForceInline final - void intoArray0Template($type$[] a, int offset) { + void intoArray0Template($elemtype$[] a, int offset) { $Type$Species vsp = vspecies(); VectorSupport.store( vsp.vectorType(), vsp.elementType(), vsp.laneCount(), @@ -4972,11 +5048,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } abstract - void intoArray0($type$[] a, int offset, VectorMask<$Boxtype$> m); + void intoArray0($elemtype$[] a, int offset, VectorMask<$Boxtype$> m); @ForceInline final > - void intoArray0Template(Class maskClass, $type$[] a, int offset, M m) { + void intoArray0Template(Class maskClass, $elemtype$[] a, int offset, M m) { m.check(species()); $Type$Species vsp = vspecies(); VectorSupport.storeMasked( @@ -4990,13 +5066,13 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #if[!byteOrShort] abstract - void intoArray0($type$[] a, int offset, + void intoArray0($elemtype$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m); @ForceInline final > - void intoArray0Template(Class maskClass, $type$[] a, int offset, + void intoArray0Template(Class maskClass, $elemtype$[] a, int offset, int[] indexMap, int mapOffset, M m) { m.check(species()); $Type$Species vsp = vspecies(); @@ -5172,12 +5248,12 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } static final int ARRAY_SHIFT = - 31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_$TYPE$_INDEX_SCALE); + 31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_{#if[FP16]?OBJECT:$TYPE$}_INDEX_SCALE); static final long ARRAY_BASE = - Unsafe.ARRAY_$TYPE$_BASE_OFFSET; + Unsafe.ARRAY_{#if[FP16]?OBJECT:$TYPE$}_BASE_OFFSET; @ForceInline - static long arrayAddress($type$[] a, int index) { + static long arrayAddress($elemtype$[] a, int index) { return ARRAY_BASE + (((long)index) << ARRAY_SHIFT); } @@ -5241,7 +5317,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #if[BITWISE] return this; #else[BITWISE] - LaneType ilt = LaneType.$TYPE$.asIntegral(); + LaneType ilt = LaneType.{#if[FP16]?FLOAT16:$TYPE$}.asIntegral(); return ($Bitstype$Vector) asVectorRaw(ilt); #end[BITWISE] } @@ -5252,7 +5328,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * * @implNote This method always throws * {@code UnsupportedOperationException}, because there is no floating - * point type of the same size as {@code $type$}. The return type + * point type of the same size as {@code $elemtype$}. The return type * of this method is arbitrarily designated as * {@code Vector}. Future versions of this API may change the return * type if additional floating point types become available. @@ -5261,18 +5337,22 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline @Override public final - {#if[byteOrShort]?Vector:$Fptype$Vector} +#if[FP] + $Type$Vector +#else[FP] + {#if[byte]?Vector:$Boxfptype$Vector} +#end[FP] viewAsFloatingLanes() { #if[FP] return this; #else[FP] - LaneType flt = LaneType.$TYPE$.asFloating(); -#if[!byteOrShort] - return ($Fptype$Vector) asVectorRaw(flt); -#else[!byteOrShort] - // asFloating() will throw UnsupportedOperationException for the unsupported type $type$ + LaneType flt = {#if[short]?LaneType.FLOAT16.asFloating():LaneType.$TYPE$.asFloating()}; +#if[!byte] + return ($Boxfptype$Vector) asVectorRaw(flt); +#else[!byte] + // asFloating() will throw UnsupportedOperationException for the unsupported type $elemtype$ throw new AssertionError("Cannot reach here"); -#end[!byteOrShort] +#end[!byte] #end[FP] } @@ -5290,8 +5370,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * in lane order. * * The string is produced as if by a call to {@link - * java.util.Arrays#toString($type$[]) Arrays.toString()}, - * as appropriate to the {@code $type$} array returned by + * java.util.Arrays#toString($elemtype$[]) Arrays.toString()}, + * as appropriate to the {@code $elemtype$} array returned by * {@link #toArray this.toArray()}. * * @return a string of the form {@code "[0,1,2...]"} @@ -5345,7 +5425,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { Class vectorType, Class> maskType, Function vectorFactory) { - super(shape, LaneType.of($type$.class), + super(shape, LaneType.of($elemtype$.class), vectorType, maskType, vectorFactory); assert(this.elementSize() == $Boxtype$.SIZE); @@ -5356,7 +5436,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @Override @ForceInline public final Class<$Boxtype$> elementType() { - return $type$.class; + return $elemtype$.class; } @Override @@ -5385,14 +5465,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { final $abstractvectortype$ broadcastBits(long bits) { return ($abstractvectortype$) VectorSupport.fromBitsCoerced( - vectorType, $type$.class, laneCount, + vectorType, $elemtype$.class, laneCount, bits, MODE_BROADCAST, this, (bits_, s_) -> s_.rvOp(i -> bits_)); } /*package-private*/ @ForceInline - {#if[long]?public }final $abstractvectortype$ broadcast($type$ e) { + {#if[long]?public }final $abstractvectortype$ broadcast($elemtype$ e) { return broadcastBits(toBits(e)); } @@ -5413,8 +5493,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { return value; #else[long] // Do the conversion, and then test it for failure. - $type$ e = ($type$) value; - if ((long) e != value) { + $elemtype$ e = {#if[FP16]?Float16.valueOf(value):($elemtype$) value}; + if ({#if[FP16]?e.longValue():(long) e} != value) { throw badElementBits(value, e); } return toBits(e); @@ -5423,11 +5503,18 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /*package-private*/ @ForceInline - static long toIntegralChecked($type$ e, boolean convertToInt) { + static long toIntegralChecked($elemtype$ e, boolean convertToInt) { +#if[FP16] + long value = convertToInt ? e.intValue() : e.longValue(); + if (value != e.longValue()) { + throw badArrayBits(e, convertToInt, value); + } +#else[FP16] long value = convertToInt ? (int) e : (long) e; - if (($type$) value != e) { + if (($elemtype$) value != e) { throw badArrayBits(e, convertToInt, value); } +#end[FP16] return value; } @@ -5436,14 +5523,22 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline final $abstractvectortype$ fromIntValues(int[] values) { VectorIntrinsics.requireLength(values.length, laneCount); - $type$[] va = new $type$[laneCount()]; + $elemtype$[] va = new $elemtype$[laneCount()]; for (int i = 0; i < va.length; i++) { int lv = values[i]; - $type$ v = ($type$) lv; +#if[FP16] + $elemtype$ v = Float16.valueOf(lv); + va[i] = v; + if ( v.intValue() != lv) { + throw badElementBits(lv, v); + } +#else[FP16] + $elemtype$ v = ($elemtype$) lv; va[i] = v; if ((int)v != lv) { throw badElementBits(lv, v); } +#end[FP16] } return dummyVector().fromArray0(va, 0); } @@ -5456,7 +5551,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { // User entry point // Defer only to the equivalent method on the vector class, using the same inputs return $abstractvectortype$ - .fromArray(this, ($type$[]) a, offset); + .fromArray(this, ($elemtype$[]) a, offset); } @ForceInline @@ -5478,7 +5573,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { final @Override @ForceInline $abstractvectortype$ rvOp(RVOp f) { - $type$[] res = new $type$[laneCount()]; + $elemtype$[] res = new $elemtype$[laneCount()]; for (int i = 0; i < res.length; i++) { $bitstype$ bits = {#if[!long]?($bitstype$)} f.apply(i); res[i] = fromBits(bits); @@ -5487,7 +5582,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } $Type$Vector vOp(FVOp f) { - $type$[] res = new $type$[laneCount()]; + $elemtype$[] res = new $elemtype$[laneCount()]; for (int i = 0; i < res.length; i++) { res[i] = f.apply(i); } @@ -5495,7 +5590,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } $Type$Vector vOp(VectorMask<$Boxtype$> m, FVOp f) { - $type$[] res = new $type$[laneCount()]; + $elemtype$[] res = new $elemtype$[laneCount()]; boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits(); for (int i = 0; i < res.length; i++) { if (mbits[i]) { @@ -5616,10 +5711,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } /** - * Finds a species for an element type of {@code $type$} and shape. + * Finds a species for an element type of {@code $elemtype$} and shape. * * @param s the shape - * @return a species for an element type of {@code $type$} and shape + * @return a species for an element type of {@code $elemtype$} and shape * @throws IllegalArgumentException if no such species exists for the shape */ static $Type$Species species(VectorShape s) { @@ -5674,6 +5769,6 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * A preferred species is a species of maximal bit-size for the platform. */ public static final VectorSpecies<$Boxtype$> SPECIES_PREFERRED - = ($Type$Species) VectorSpecies.ofPreferred($type$.class); + = ($Type$Species) VectorSpecies.ofPreferred($elemtype$.class); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template index f2b36066fa7..13ff09bf7d1 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -52,19 +52,19 @@ final class $vectortype$ extends $abstractvectortype$ { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class<$Boxtype$> ETYPE = $type$.class; // used by the JVM + static final Class<$Boxtype$> ETYPE = $elemtype$.class; // used by the JVM - $vectortype$($type$[] v) { + $vectortype$($elemtype$[] v) { super(v); } // For compatibility as $vectortype$::new, // stored into species.vectorFactory. $vectortype$(Object v) { - this(($type$[]) v); + this(($elemtype$[]) v); } - static final $vectortype$ ZERO = new $vectortype$(new $type$[VLENGTH]); + static final $vectortype$ ZERO = new $vectortype$(new $elemtype$[VLENGTH]); static final $vectortype$ IOTA = new $vectortype$(VSPECIES.iotaArray()); static { @@ -88,7 +88,7 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline @Override - public final Class<$Boxtype$> elementType() { return $type$.class; } + public final Class<$Boxtype$> elementType() { return $elemtype$.class; } @ForceInline @Override @@ -113,15 +113,15 @@ final class $vectortype$ extends $abstractvectortype$ { /*package-private*/ @ForceInline final @Override - $type$[] vec() { - return ($type$[])getPayload(); + $elemtype$[] vec() { + return ($elemtype$[])getPayload(); } // Virtualized constructors @Override @ForceInline - public final $vectortype$ broadcast($type$ e) { + public final $vectortype$ broadcast($elemtype$ e) { return ($vectortype$) super.broadcastTemplate(e); // specialize } @@ -169,7 +169,7 @@ final class $vectortype$ extends $abstractvectortype$ { // Make a vector of the same species but the given elements: @ForceInline final @Override - $vectortype$ vectorFactory($type$[] vec) { + $vectortype$ vectorFactory($elemtype$[] vec) { return new $vectortype$(vec); } @@ -238,7 +238,7 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline final @Override - $type$ rOp($type$ v, VectorMask<$Boxtype$> m, FBinOp f) { + $elemtype$ rOp($elemtype$ v, VectorMask<$Boxtype$> m, FBinOp f) { return super.rOpTemplate(v, m, f); // specialize } @@ -337,13 +337,13 @@ final class $vectortype$ extends $abstractvectortype$ { @Override @ForceInline - public final $type$ reduceLanes(VectorOperators.Associative op) { + public final $elemtype$ reduceLanes(VectorOperators.Associative op) { return super.reduceLanesTemplate(op); // specialized } @Override @ForceInline - public final $type$ reduceLanes(VectorOperators.Associative op, + public final $elemtype$ reduceLanes(VectorOperators.Associative op, VectorMask<$Boxtype$> m) { return super.reduceLanesTemplate(op, $masktype$.class, ($masktype$) m); // specialized } @@ -351,14 +351,16 @@ final class $vectortype$ extends $abstractvectortype$ { @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op) { - return (long) super.reduceLanesTemplate(op); // specialized + $elemtype$ res = super.reduceLanesTemplate(op); // specialized + return {#if[FP16]?res.longValue(): (long) res}; } @Override @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask<$Boxtype$> m) { - return (long) super.reduceLanesTemplate(op, $masktype$.class, ($masktype$) m); // specialized + $elemtype$ res = super.reduceLanesTemplate(op, $masktype$.class, ($masktype$) m); // specialized + return {#if[FP16]?res.longValue(): (long) res}; } @ForceInline @@ -390,7 +392,7 @@ final class $vectortype$ extends $abstractvectortype$ { @Override @ForceInline - public final $masktype$ compare(Comparison op, $type$ s) { + public final $masktype$ compare(Comparison op, $elemtype$ s) { return super.compareTemplate($masktype$.class, op, s); // specialize } @@ -516,7 +518,7 @@ final class $vectortype$ extends $abstractvectortype$ { #if[FP] @ForceInline @Override - public $type$ lane(int i) { + public $elemtype$ lane(int i) { #if[!Max] $bitstype$ bits; switch(i) { @@ -552,7 +554,7 @@ final class $vectortype$ extends $abstractvectortype$ { } $bitstype$ bits = laneHelper(i); #end[!Max] - return $Type$.$bitstype$BitsTo$Fptype$(bits); + return $Elemtype$.$bitstype$BitsTo{#if[FP16]?Float16:$Fptype$}(bits); } public $bitstype$ laneHelper(int i) { @@ -560,14 +562,14 @@ final class $vectortype$ extends $abstractvectortype$ { VCLASS, ETYPE, VLENGTH, this, i, (vec, ix) -> { - $type$[] vecarr = vec.vec(); - return (long)$Type$.$type$To$Bitstype$Bits(vecarr[ix]); + $elemtype$[] vecarr = vec.vec(); + return (long)$Elemtype$.{#if[FP16]?float16:$elemtype$}To$Bitstype$Bits(vecarr[ix]); }); } @ForceInline @Override - public $vectortype$ withLane(int i, $type$ e) { + public $vectortype$ withLane(int i, $elemtype$ e) { #if[!Max] switch(i) { case 0: return withLaneHelper(0, e); @@ -604,20 +606,20 @@ final class $vectortype$ extends $abstractvectortype$ { #end[!Max] } - public $vectortype$ withLaneHelper(int i, $type$ e) { + public $vectortype$ withLaneHelper(int i, $elemtype$ e) { return VectorSupport.insert( VCLASS, ETYPE, VLENGTH, - this, i, (long)$Type$.$type$To$Bitstype$Bits(e), + this, i, (long)$Elemtype$.{#if[FP16]?float16:$elemtype$}To$Bitstype$Bits(e), (v, ix, bits) -> { - $type$[] res = v.vec().clone(); - res[ix] = $Type$.$bitstype$BitsTo$Type$(($bitstype$)bits); + $elemtype$[] res = v.vec().clone(); + res[ix] = $Elemtype$.$bitstype$BitsTo$Elemtype$(($bitstype$)bits); return v.vectorFactory(res); }); } #else[FP] @ForceInline @Override - public $type$ lane(int i) { + public $elemtype$ lane(int i) { #if[!Max] switch(i) { case 0: return laneHelper(0); @@ -706,19 +708,19 @@ final class $vectortype$ extends $abstractvectortype$ { #end[!Max] } - public $type$ laneHelper(int i) { - return ($type$) VectorSupport.extract( + public $elemtype$ laneHelper(int i) { + return ($elemtype$) VectorSupport.extract( VCLASS, ETYPE, VLENGTH, this, i, (vec, ix) -> { - $type$[] vecarr = vec.vec(); + $elemtype$[] vecarr = vec.vec(); return (long)vecarr[ix]; }); } @ForceInline @Override - public $vectortype$ withLane(int i, $type$ e) { + public $vectortype$ withLane(int i, $elemtype$ e) { #if[!Max] switch (i) { case 0: return withLaneHelper(0, e); @@ -807,13 +809,13 @@ final class $vectortype$ extends $abstractvectortype$ { #end[!Max] } - public $vectortype$ withLaneHelper(int i, $type$ e) { + public $vectortype$ withLaneHelper(int i, $elemtype$ e) { return VectorSupport.insert( VCLASS, ETYPE, VLENGTH, this, i, (long)e, (v, ix, bits) -> { - $type$[] res = v.vec().clone(); - res[ix] = ($type$)bits; + $elemtype$[] res = v.vec().clone(); + res[ix] = ($elemtype$)bits; return v.vectorFactory(res); }); } @@ -823,7 +825,7 @@ final class $vectortype$ extends $abstractvectortype$ { static final class $masktype$ extends AbstractMask<$Boxtype$> { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class<$Boxtype$> ETYPE = $type$.class; // used by the JVM + static final Class<$Boxtype$> ETYPE = $elemtype$.class; // used by the JVM $masktype$(boolean[] bits) { this(bits, 0); @@ -925,7 +927,7 @@ final class $vectortype$ extends $abstractvectortype$ { /*package-private*/ $masktype$ indexPartiallyInUpperRange(long offset, long limit) { return ($masktype$) VectorSupport.indexPartiallyInUpperRange( - $masktype$.class, $type$.class, VLENGTH, offset, limit, + $masktype$.class, ETYPE, VLENGTH, offset, limit, (o, l) -> ($masktype$) TRUE_MASK.indexPartiallyInRange(o, l)); } @@ -942,7 +944,11 @@ final class $vectortype$ extends $abstractvectortype$ { public $masktype$ compress() { return ($masktype$)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS, $vectortype$.class, $masktype$.class, ETYPE, VLENGTH, null, this, +#if[FP16] + (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, Float.floatToFloat16(m1.trueCount()))); +#else[FP16] (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount())); +#end[FP16] } @@ -953,7 +959,7 @@ final class $vectortype$ extends $abstractvectortype$ { public $masktype$ and(VectorMask<$Boxtype$> mask) { Objects.requireNonNull(mask); $masktype$ m = ($masktype$)mask; - return VectorSupport.binaryOp(VECTOR_OP_AND, $masktype$.class, null, $bitstype$.class, VLENGTH, + return VectorSupport.binaryOp(VECTOR_OP_AND, $masktype$.class, null, $maskbitstype$.class, VLENGTH, this, m, null, (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); } @@ -963,7 +969,7 @@ final class $vectortype$ extends $abstractvectortype$ { public $masktype$ or(VectorMask<$Boxtype$> mask) { Objects.requireNonNull(mask); $masktype$ m = ($masktype$)mask; - return VectorSupport.binaryOp(VECTOR_OP_OR, $masktype$.class, null, $bitstype$.class, VLENGTH, + return VectorSupport.binaryOp(VECTOR_OP_OR, $masktype$.class, null, $maskbitstype$.class, VLENGTH, this, m, null, (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); } @@ -973,7 +979,7 @@ final class $vectortype$ extends $abstractvectortype$ { public $masktype$ xor(VectorMask<$Boxtype$> mask) { Objects.requireNonNull(mask); $masktype$ m = ($masktype$)mask; - return VectorSupport.binaryOp(VECTOR_OP_XOR, $masktype$.class, null, $bitstype$.class, VLENGTH, + return VectorSupport.binaryOp(VECTOR_OP_XOR, $masktype$.class, null, $maskbitstype$.class, VLENGTH, this, m, null, (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); } @@ -983,21 +989,21 @@ final class $vectortype$ extends $abstractvectortype$ { @Override @ForceInline public int trueCount() { - return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, $masktype$.class, $bitstype$.class, VLENGTH, this, + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, $masktype$.class, $maskbitstype$.class, VLENGTH, this, (m) -> trueCountHelper(m.getBits())); } @Override @ForceInline public int firstTrue() { - return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, $masktype$.class, $bitstype$.class, VLENGTH, this, + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, $masktype$.class, $maskbitstype$.class, VLENGTH, this, (m) -> firstTrueHelper(m.getBits())); } @Override @ForceInline public int lastTrue() { - return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, $masktype$.class, $bitstype$.class, VLENGTH, this, + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, $masktype$.class, $maskbitstype$.class, VLENGTH, this, (m) -> lastTrueHelper(m.getBits())); } @@ -1007,7 +1013,7 @@ final class $vectortype$ extends $abstractvectortype$ { if (length() > Long.SIZE) { throw new UnsupportedOperationException("too many lanes for one long"); } - return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, $masktype$.class, $bitstype$.class, VLENGTH, this, + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, $masktype$.class, $maskbitstype$.class, VLENGTH, this, (m) -> toLongHelper(m.getBits())); } @@ -1017,7 +1023,7 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline public boolean laneIsSet(int i) { Objects.checkIndex(i, length()); - return VectorSupport.extract($masktype$.class, $type$.class, VLENGTH, + return VectorSupport.extract($masktype$.class, $elemtype$.class, VLENGTH, this, i, (m, idx) -> (m.getBits()[idx] ? 1L : 0L)) == 1L; } @@ -1026,7 +1032,7 @@ final class $vectortype$ extends $abstractvectortype$ { @Override @ForceInline public boolean anyTrue() { - return VectorSupport.test(BT_ne, $masktype$.class, $bitstype$.class, VLENGTH, + return VectorSupport.test(BT_ne, $masktype$.class, $maskbitstype$.class, VLENGTH, this, vspecies().maskAll(true), (m, __) -> anyTrueHelper((($masktype$)m).getBits())); } @@ -1034,7 +1040,7 @@ final class $vectortype$ extends $abstractvectortype$ { @Override @ForceInline public boolean allTrue() { - return VectorSupport.test(BT_overflow, $masktype$.class, $bitstype$.class, VLENGTH, + return VectorSupport.test(BT_overflow, $masktype$.class, $maskbitstype$.class, VLENGTH, this, vspecies().maskAll(true), (m, __) -> allTrueHelper((($masktype$)m).getBits())); } @@ -1042,7 +1048,7 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline /*package-private*/ static $masktype$ maskAll(boolean bit) { - return VectorSupport.fromBitsCoerced($masktype$.class, $bitstype$.class, VLENGTH, + return VectorSupport.fromBitsCoerced($masktype$.class, $maskbitstype$.class, VLENGTH, (bit ? -1 : 0), MODE_BROADCAST, null, (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK)); } @@ -1070,7 +1076,7 @@ final class $vectortype$ extends $abstractvectortype$ { static final class $shuffletype$ extends AbstractShuffle<$Boxtype$> { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class<$Boxtype$> ETYPE = $type$.class; // used by the JVM + static final Class<$Boxtype$> ETYPE = $elemtype$.class; // used by the JVM $shuffletype$(byte[] reorder) { super(VLENGTH, reorder); @@ -1140,14 +1146,14 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline @Override final - $abstractvectortype$ fromArray0($type$[] a, int offset) { + $abstractvectortype$ fromArray0($elemtype$[] a, int offset) { return super.fromArray0Template(a, offset); // specialize } @ForceInline @Override final - $abstractvectortype$ fromArray0($type$[] a, int offset, VectorMask<$Boxtype$> m, int offsetInRange) { + $abstractvectortype$ fromArray0($elemtype$[] a, int offset, VectorMask<$Boxtype$> m, int offsetInRange) { return super.fromArray0Template($masktype$.class, a, offset, ($masktype$) m, offsetInRange); // specialize } @@ -1155,7 +1161,7 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline @Override final - $abstractvectortype$ fromArray0($type$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) { + $abstractvectortype$ fromArray0($elemtype$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) { return super.fromArray0Template($masktype$.class, a, offset, indexMap, mapOffset, ($masktype$) m); } #end[!byteOrShort] @@ -1209,14 +1215,14 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline @Override final - void intoArray0($type$[] a, int offset) { + void intoArray0($elemtype$[] a, int offset) { super.intoArray0Template(a, offset); // specialize } @ForceInline @Override final - void intoArray0($type$[] a, int offset, VectorMask<$Boxtype$> m) { + void intoArray0($elemtype$[] a, int offset, VectorMask<$Boxtype$> m) { super.intoArray0Template($masktype$.class, a, offset, ($masktype$) m); } @@ -1224,7 +1230,7 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline @Override final - void intoArray0($type$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) { + void intoArray0($elemtype$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) { super.intoArray0Template($masktype$.class, a, offset, indexMap, mapOffset, ($masktype$) m); } #end[!byteOrShort] diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh index 6841a47c757..19aad5b3b5a 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh @@ -53,10 +53,12 @@ typeprefix= globalArgs="" #globalArgs="$globalArgs -KextraOverrides" -for type in byte short int long float double +for type in byte short int long float double Halffloat do + Type="$(tr '[:lower:]' '[:upper:]' <<< ${type:0:1})${type:1}" TYPE="$(tr '[:lower:]' '[:upper:]' <<< ${type})" + args=$globalArgs args="$args -K$type -Dtype=$type -DType=$Type -DTYPE=$TYPE" @@ -66,25 +68,33 @@ do kind=BITWISE bitstype=$type + maskbitstype=$type Bitstype=$Type Boxbitstype=$Boxtype fptype=$type Fptype=$Type Boxfptype=$Boxtype + elemtype=$type + Elemtype=$Type + FPtype=$type + - case $type in - byte) + case $Type in + Byte) Wideboxtype=Integer sizeInBytes=1 args="$args -KbyteOrShort" ;; - short) + Short) + fptype=float16 + Fptype=Float16 + Boxfptype=Halffloat Wideboxtype=Integer sizeInBytes=2 args="$args -KbyteOrShort" ;; - int) + Int) Boxtype=Integer Wideboxtype=Integer Boxbitstype=Integer @@ -94,35 +104,55 @@ do sizeInBytes=4 args="$args -KintOrLong -KintOrFP -KintOrFloat" ;; - long) + Long) fptype=double Fptype=Double Boxfptype=Double sizeInBytes=8 args="$args -KintOrLong -KlongOrDouble" ;; - float) + Float) kind=FP bitstype=int + maskbitstype=int Bitstype=Int Boxbitstype=Integer sizeInBytes=4 args="$args -KintOrFP -KintOrFloat" + FPtype=FP32 ;; - double) + Double) kind=FP bitstype=long + maskbitstype=long Bitstype=Long Boxbitstype=Long sizeInBytes=8 args="$args -KintOrFP -KlongOrDouble" + FPtype=FP64 + ;; + Halffloat) + kind=FP + bitstype=short + maskbitstype=short + Bitstype=Short + Boxbitstype=Short + sizeInBytes=2 + Boxtype=Float16 + elemtype=Float16 + Elemtype=Float16 + FPtype=FP16 + fptype=float16 + Fptype=Float16 + args="$args -KbyteOrShort -KshortOrFP -KshortOrHalffloat" ;; esac - args="$args -K$kind -DBoxtype=$Boxtype -DWideboxtype=$Wideboxtype" - args="$args -Dbitstype=$bitstype -DBitstype=$Bitstype -DBoxbitstype=$Boxbitstype" + args="$args -K$FPtype -K$kind -DBoxtype=$Boxtype -DWideboxtype=$Wideboxtype" + args="$args -Dbitstype=$bitstype -Dmaskbitstype=$maskbitstype -DBitstype=$Bitstype -DBoxbitstype=$Boxbitstype" args="$args -Dfptype=$fptype -DFptype=$Fptype -DBoxfptype=$Boxfptype" args="$args -DsizeInBytes=$sizeInBytes" + args="$args -Delemtype=$elemtype -DElemtype=$Elemtype" abstractvectortype=${typeprefix}${Type}Vector abstractbitsvectortype=${typeprefix}${Bitstype}Vector diff --git a/test/jdk/jdk/incubator/vector/Short128VectorTests.java b/test/jdk/jdk/incubator/vector/Short128VectorTests.java index e40a40686c9..8ffeaa22a88 100644 --- a/test/jdk/jdk/incubator/vector/Short128VectorTests.java +++ b/test/jdk/jdk/incubator/vector/Short128VectorTests.java @@ -1382,9 +1382,14 @@ void viewAsIntegeralLanesTest() { Assert.assertEquals(asIntegral.species(), SPECIES); } - @Test(expectedExceptions = UnsupportedOperationException.class) + @Test void viewAsFloatingLanesTest() { - SPECIES.zero().viewAsFloatingLanes(); + Vector asFloating = SPECIES.zero().viewAsFloatingLanes(); + VectorSpecies asFloatingSpecies = asFloating.species(); + Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType()); + Assert.assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape()); + Assert.assertEquals(asFloatingSpecies.length(), SPECIES.length()); + Assert.assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES); } @Test diff --git a/test/jdk/jdk/incubator/vector/Short256VectorTests.java b/test/jdk/jdk/incubator/vector/Short256VectorTests.java index 02138e3e8aa..59248b01a3c 100644 --- a/test/jdk/jdk/incubator/vector/Short256VectorTests.java +++ b/test/jdk/jdk/incubator/vector/Short256VectorTests.java @@ -1382,9 +1382,14 @@ void viewAsIntegeralLanesTest() { Assert.assertEquals(asIntegral.species(), SPECIES); } - @Test(expectedExceptions = UnsupportedOperationException.class) + @Test void viewAsFloatingLanesTest() { - SPECIES.zero().viewAsFloatingLanes(); + Vector asFloating = SPECIES.zero().viewAsFloatingLanes(); + VectorSpecies asFloatingSpecies = asFloating.species(); + Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType()); + Assert.assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape()); + Assert.assertEquals(asFloatingSpecies.length(), SPECIES.length()); + Assert.assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES); } @Test diff --git a/test/jdk/jdk/incubator/vector/Short512VectorTests.java b/test/jdk/jdk/incubator/vector/Short512VectorTests.java index 9577f22f58c..6524a0be416 100644 --- a/test/jdk/jdk/incubator/vector/Short512VectorTests.java +++ b/test/jdk/jdk/incubator/vector/Short512VectorTests.java @@ -1382,9 +1382,14 @@ void viewAsIntegeralLanesTest() { Assert.assertEquals(asIntegral.species(), SPECIES); } - @Test(expectedExceptions = UnsupportedOperationException.class) + @Test void viewAsFloatingLanesTest() { - SPECIES.zero().viewAsFloatingLanes(); + Vector asFloating = SPECIES.zero().viewAsFloatingLanes(); + VectorSpecies asFloatingSpecies = asFloating.species(); + Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType()); + Assert.assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape()); + Assert.assertEquals(asFloatingSpecies.length(), SPECIES.length()); + Assert.assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES); } @Test diff --git a/test/jdk/jdk/incubator/vector/Short64VectorTests.java b/test/jdk/jdk/incubator/vector/Short64VectorTests.java index 71b3c6046b4..87853b4c182 100644 --- a/test/jdk/jdk/incubator/vector/Short64VectorTests.java +++ b/test/jdk/jdk/incubator/vector/Short64VectorTests.java @@ -1382,9 +1382,14 @@ void viewAsIntegeralLanesTest() { Assert.assertEquals(asIntegral.species(), SPECIES); } - @Test(expectedExceptions = UnsupportedOperationException.class) + @Test void viewAsFloatingLanesTest() { - SPECIES.zero().viewAsFloatingLanes(); + Vector asFloating = SPECIES.zero().viewAsFloatingLanes(); + VectorSpecies asFloatingSpecies = asFloating.species(); + Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType()); + Assert.assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape()); + Assert.assertEquals(asFloatingSpecies.length(), SPECIES.length()); + Assert.assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES); } @Test diff --git a/test/jdk/jdk/incubator/vector/ShortMaxVectorTests.java b/test/jdk/jdk/incubator/vector/ShortMaxVectorTests.java index 4a6adbf2c8e..b10f951913e 100644 --- a/test/jdk/jdk/incubator/vector/ShortMaxVectorTests.java +++ b/test/jdk/jdk/incubator/vector/ShortMaxVectorTests.java @@ -1387,9 +1387,14 @@ void viewAsIntegeralLanesTest() { Assert.assertEquals(asIntegral.species(), SPECIES); } - @Test(expectedExceptions = UnsupportedOperationException.class) + @Test void viewAsFloatingLanesTest() { - SPECIES.zero().viewAsFloatingLanes(); + Vector asFloating = SPECIES.zero().viewAsFloatingLanes(); + VectorSpecies asFloatingSpecies = asFloating.species(); + Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType()); + Assert.assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape()); + Assert.assertEquals(asFloatingSpecies.length(), SPECIES.length()); + Assert.assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES); } @Test diff --git a/test/jdk/jdk/incubator/vector/templates/Unit-header.template b/test/jdk/jdk/incubator/vector/templates/Unit-header.template index 4d3795ea3d1..016a570f3f1 100644 --- a/test/jdk/jdk/incubator/vector/templates/Unit-header.template +++ b/test/jdk/jdk/incubator/vector/templates/Unit-header.template @@ -1750,13 +1750,13 @@ relativeError)); Assert.assertEquals(asFloating.species(), SPECIES); } #else[FP] -#if[byteOrShort] +#if[byte] @Test(expectedExceptions = UnsupportedOperationException.class) void viewAsFloatingLanesTest() { SPECIES.zero().viewAsFloatingLanes(); } -#else[byteOrShort] +#else[byte] @Test void viewAsFloatingLanesTest() { @@ -1767,7 +1767,7 @@ relativeError)); Assert.assertEquals(asFloatingSpecies.length(), SPECIES.length()); Assert.assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES); } -#end[byteOrShort] +#end[byte] #end[FP] #if[BITWISE]