From 6ff94230aa19fb10ffd914f2bedf50910f3c98cb Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jatin.bhateja@intel.com>
Date: Tue, 3 Sep 2024 22:18:07 +0530
Subject: [PATCH] 8339494: Porting HalfFloatVector classes.

---
 .../share/classes/java/lang/Float16.java      |   50 +-
 .../jdk/internal/vm/vector/VectorSupport.java |    1 +
 .../jdk/incubator/vector/AbstractSpecies.java |   32 +-
 .../jdk/incubator/vector/AbstractVector.java  |   25 +
 .../jdk/incubator/vector/Byte128Vector.java   |   10 +-
 .../jdk/incubator/vector/Byte256Vector.java   |   10 +-
 .../jdk/incubator/vector/Byte512Vector.java   |   10 +-
 .../jdk/incubator/vector/Byte64Vector.java    |   10 +-
 .../jdk/incubator/vector/ByteMaxVector.java   |   10 +-
 .../jdk/incubator/vector/ByteVector.java      |   13 +-
 .../jdk/incubator/vector/Double128Vector.java |   10 +-
 .../jdk/incubator/vector/Double256Vector.java |   10 +-
 .../jdk/incubator/vector/Double512Vector.java |   10 +-
 .../jdk/incubator/vector/Double64Vector.java  |   10 +-
 .../jdk/incubator/vector/DoubleMaxVector.java |   10 +-
 .../jdk/incubator/vector/DoubleVector.java    |   16 +-
 .../jdk/incubator/vector/Float128Vector.java  |   10 +-
 .../jdk/incubator/vector/Float256Vector.java  |   10 +-
 .../jdk/incubator/vector/Float512Vector.java  |   10 +-
 .../jdk/incubator/vector/Float64Vector.java   |   10 +-
 .../jdk/incubator/vector/FloatMaxVector.java  |   10 +-
 .../jdk/incubator/vector/FloatVector.java     |   18 +-
 .../incubator/vector/Halffloat128Vector.java  |  917 ++++
 .../incubator/vector/Halffloat256Vector.java  |  933 ++++
 .../incubator/vector/Halffloat512Vector.java  |  933 ++++
 .../incubator/vector/Halffloat64Vector.java   |  909 ++++
 .../incubator/vector/HalffloatMaxVector.java  |  902 ++++
 .../jdk/incubator/vector/HalffloatVector.java | 3853 +++++++++++++++++
 .../jdk/incubator/vector/Int128Vector.java    |   10 +-
 .../jdk/incubator/vector/Int256Vector.java    |   10 +-
 .../jdk/incubator/vector/Int512Vector.java    |   10 +-
 .../jdk/incubator/vector/Int64Vector.java     |   10 +-
 .../jdk/incubator/vector/IntMaxVector.java    |   10 +-
 .../jdk/incubator/vector/IntVector.java       |   13 +-
 .../jdk/incubator/vector/LaneType.java        |   24 +-
 .../jdk/incubator/vector/Long128Vector.java   |   10 +-
 .../jdk/incubator/vector/Long256Vector.java   |   10 +-
 .../jdk/incubator/vector/Long512Vector.java   |   10 +-
 .../jdk/incubator/vector/Long64Vector.java    |   10 +-
 .../jdk/incubator/vector/LongMaxVector.java   |   10 +-
 .../jdk/incubator/vector/LongVector.java      |   13 +-
 .../jdk/incubator/vector/Short128Vector.java  |   10 +-
 .../jdk/incubator/vector/Short256Vector.java  |   10 +-
 .../jdk/incubator/vector/Short512Vector.java  |   10 +-
 .../jdk/incubator/vector/Short64Vector.java   |   10 +-
 .../jdk/incubator/vector/ShortMaxVector.java  |   10 +-
 .../jdk/incubator/vector/ShortVector.java     |   20 +-
 .../classes/jdk/incubator/vector/Vector.java  |   13 +
 .../jdk/incubator/vector/VectorShape.java     |   10 +-
 .../incubator/vector/X-Vector.java.template   |  899 ++--
 .../vector/X-VectorBits.java.template         |  112 +-
 .../classes/jdk/incubator/vector/gen-src.sh   |   50 +-
 .../incubator/vector/Short128VectorTests.java |    9 +-
 .../incubator/vector/Short256VectorTests.java |    9 +-
 .../incubator/vector/Short512VectorTests.java |    9 +-
 .../incubator/vector/Short64VectorTests.java  |    9 +-
 .../incubator/vector/ShortMaxVectorTests.java |    9 +-
 .../vector/templates/Unit-header.template     |    6 +-
 58 files changed, 9438 insertions(+), 669 deletions(-)
 create mode 100644 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat128Vector.java
 create mode 100644 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat256Vector.java
 create mode 100644 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat512Vector.java
 create mode 100644 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat64Vector.java
 create mode 100644 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatMaxVector.java
 create mode 100644 src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatVector.java

diff --git a/src/java.base/share/classes/java/lang/Float16.java b/src/java.base/share/classes/java/lang/Float16.java
index 9a326b49d9d..49fcc0bb661 100644
--- a/src/java.base/share/classes/java/lang/Float16.java
+++ b/src/java.base/share/classes/java/lang/Float16.java
@@ -73,6 +73,7 @@
 // Enhanced Primitive Boxes described by JEP-402 (https://openjdk.org/jeps/402)
 @jdk.internal.MigratedValueClass
 @jdk.internal.ValueBased
+@SuppressWarnings("serial")
 public final class Float16
     extends Number
     implements Comparable<Float16> {
@@ -80,14 +81,11 @@ public final class Float16
     private static final long serialVersionUID = 16; // Not needed for a value class?
 
     // Functionality for future consideration:
-    // float16ToShortBits that normalizes NaNs, c.f. floatToIntBits vs floatToRawIntBits
     // copysign
     // scalb
     // nextUp / nextDown
     // IEEEremainder / remainder operator remainder
     // signum
-    // valueOf(BigDecimal) -- main implementation could be package private in BigDecimal
-
    /**
     * Returns a {@code Float16} instance wrapping IEEE 754 binary16
     * encoded {@code short} value.
@@ -281,11 +279,11 @@ public static Float16 valueOf(int value) {
     * @param  value a {@code long} value.
     */
     public static Float16 valueOf(long value) {
-        if (value < -65_504L) {
+        if (value <= -65_520L) {  // -(Float16.MAX_VALUE + Float16.ulp(Float16.MAX_VALUE) / 2)
             return NEGATIVE_INFINITY;
         } else {
-            if (value > 65_504L) {
-                return NEGATIVE_INFINITY;
+            if (value >= 65_520L) {  // Float16.MAX_VALUE + Float16.ulp(Float16.MAX_VALUE) / 2
+                return POSITIVE_INFINITY;
             }
             // Remaining range of long, the integers in approx. +/-
             // 2^16, all fit in a float so the correct conversion can
@@ -572,6 +570,45 @@ public double doubleValue() {
         return (double)floatValue();
     }
 
+    /**
+     * Returns a representation of the specified floating-point value
+     * according to the IEEE 754 floating-point "binary16" bit
+     * layout.
+     *
+     * <p>Bit 15 (the bit that is selected by the mask
+     * {@code 0x80000000}) represents the sign of the floating-point
+     * number.
+     * Bits 14-10 (the bits that are selected by the mask
+     * {@code 0x7f800000}) represent the exponent.
+     * Bits 9-0 (the bits that are selected by the mask
+     * {@code 0x007fffff}) represent the significand (sometimes called
+     * the mantissa) of the floating-point number.
+     *
+     * <p>If the argument is positive infinity, the result is
+     * {@code 0x7C00}.
+     *
+     * <p>If the argument is negative infinity, the result is
+     * {@code 0xfC00}.
+     *
+     * <p>If the argument is NaN, the result is {@code 0x7E00}.
+     *
+     * <p>In all cases, the result is a short that, when given to the
+     * {@link #shortBitsToFloat16(short)} method, will produce a floating-point
+     * value the same as the argument to {@code float16ToShortBits}
+     * (except all NaN values are collapsed to a single
+     * "canonical" NaN value).
+     *
+     * @param   f16   an IEEE 754 binary16 floating-point number.
+     * @return the bits that represent the floating-point number.
+     */
+    //@IntrinsicCandidate
+    public static short float16ToShortBits(Float16 f16) {
+        if (!isNaN(f16)) {
+            return float16ToRawShortBits(f16);
+        }
+        return 0x7E00;
+    }
+
     // Skipping for now:
     // public int hashCode()
     // public static int hashCode(Float16 value)
@@ -855,7 +892,6 @@ public static Float16 divide(Float16 dividend, Float16 divisor) {
      *
      * @param radicand the argument to have its square root taken
      *
-     * @see Math#sqrt(float)
      * @see Math#sqrt(double)
      */
     // @IntrinsicCandidate
diff --git a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java
index ccfa006b102..eb30975997f 100644
--- a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java
+++ b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java
@@ -139,6 +139,7 @@ public class VectorSupport {
 
     // BasicType codes, for primitives only:
     public static final int
+        T_FLOAT16 = 5,
         T_FLOAT   = 6,
         T_DOUBLE  = 7,
         T_BYTE    = 8,
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java
index 0ff4830ded5..c816c61e26e 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java
@@ -406,18 +406,26 @@ final IllegalArgumentException badArrayBits(Object iv,
     Object iotaArray() {
         // Create an iota array.  It's OK if this is really slow,
         // because it happens only once per species.
-        Object ia = Array.newInstance(laneType.elementType,
-                                      laneCount);
-        assert(ia.getClass() == laneType.arrayType);
+        Object ia = Array.newInstance(laneType.elementType, laneCount);
         checkValue(laneCount-1);  // worst case
-        for (int i = 0; i < laneCount; i++) {
-            if ((byte)i == i)
-                Array.setByte(ia, i, (byte)i);
-            else if ((short)i == i)
-                Array.setShort(ia, i, (short)i);
-            else
-                Array.setInt(ia, i, i);
-            assert(Array.getDouble(ia, i) == i);
+        assert(ia.getClass() == laneType.arrayType);
+        if (elementType() == Float16.class) {
+            Float16 [] f16arr = (Float16[])ia;
+            for (int i = 0; i < laneCount; i++) {
+                // Note: All the numbers in the range [0:2049) are directly
+                // representable in FP16 format without the precision loss.
+                f16arr[i] = Float16.valueOf((float)i);
+            }
+        } else {
+            for (int i = 0; i < laneCount; i++) {
+                if ((byte)i == i)
+                    Array.setByte(ia, i, (byte)i);
+                else if ((short)i == i)
+                    Array.setShort(ia, i, (short)i);
+                else
+                    Array.setInt(ia, i, i);
+                assert(Array.getDouble(ia, i) == i);
+            }
         }
         return ia;
     }
@@ -615,6 +623,8 @@ AbstractSpecies<?> computeSpecies(LaneType laneType,
             s = IntVector.species(shape); break;
         case LaneType.SK_LONG:
             s = LongVector.species(shape); break;
+        case LaneType.SK_FLOAT16:
+            s = HalffloatVector.species(shape); break;
         }
         if (s == null) {
             // NOTE: The result of this method is guaranteed to be
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java
index 64d681e4aee..da612e97a1f 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java
@@ -262,6 +262,15 @@ public DoubleVector reinterpretAsDoubles() {
         return (DoubleVector) asVectorRaw(LaneType.DOUBLE);
     }
 
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public HalffloatVector reinterpretAsHalffloats() {
+        return (HalffloatVector) asVectorRaw(LaneType.FLOAT16);
+    }
+
     /**
      * {@inheritDoc} <!--workaround-->
      */
@@ -533,6 +542,8 @@ AbstractVector<F> defaultReinterpret(AbstractSpecies<F> rsp) {
             return FloatVector.fromMemorySegment(rsp.check(float.class), ms, 0, bo, m.check(float.class)).check0(rsp);
         case LaneType.SK_DOUBLE:
             return DoubleVector.fromMemorySegment(rsp.check(double.class), ms, 0, bo, m.check(double.class)).check0(rsp);
+        case LaneType.SK_FLOAT16:
+            return HalffloatVector.fromMemorySegment(rsp.check(Float16.class), ms, 0, bo, m.check(Float16.class)).check0(rsp);
         default:
             throw new AssertionError(rsp.toString());
         }
@@ -595,6 +606,13 @@ AbstractVector<F> defaultCast(AbstractSpecies<F> dsp) {
                 }
                 return DoubleVector.fromArray(dsp.check(double.class), a, 0).check0(dsp);
             }
+            case LaneType.SK_FLOAT16: {
+                Float16[] a = new Float16[rlength];
+                for (int i = 0; i < limit; i++) {
+                    a[i] = Float16.valueOf(lanes[i]);
+                }
+                return HalffloatVector.fromArray(dsp.check(Float16.class), a, 0).check0(dsp);
+            }
             default: break;
             }
         } else {
@@ -645,6 +663,13 @@ AbstractVector<F> defaultCast(AbstractSpecies<F> dsp) {
                 }
                 return DoubleVector.fromArray(dsp.check(double.class), a, 0).check0(dsp);
             }
+            case LaneType.SK_FLOAT16: {
+                Float16[] a = new Float16[rlength];
+                for (int i = 0; i < limit; i++) {
+                    a[i] = Float16.valueOf(lanes[i]);
+                }
+                return HalffloatVector.fromArray(dsp.check(Float16.class), a, 0).check0(dsp);
+            }
             default: break;
             }
         }
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java
index af60895899f..a99b419538a 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -347,14 +347,16 @@ public final byte reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        byte res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Byte> m) {
-        return (long) super.reduceLanesTemplate(op, Byte128Mask.class, (Byte128Mask) m);  // specialized
+        byte res = super.reduceLanesTemplate(op, Byte128Mask.class, (Byte128Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -682,7 +684,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Byte128Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Byte128Mask) VectorSupport.indexPartiallyInUpperRange(
-                Byte128Mask.class, byte.class, VLENGTH, offset, limit,
+                Byte128Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Byte128Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java
index 1dcbbd26907..1fcb0359f14 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -347,14 +347,16 @@ public final byte reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        byte res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Byte> m) {
-        return (long) super.reduceLanesTemplate(op, Byte256Mask.class, (Byte256Mask) m);  // specialized
+        byte res = super.reduceLanesTemplate(op, Byte256Mask.class, (Byte256Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -714,7 +716,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Byte256Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Byte256Mask) VectorSupport.indexPartiallyInUpperRange(
-                Byte256Mask.class, byte.class, VLENGTH, offset, limit,
+                Byte256Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Byte256Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java
index 9e99a1916a7..ae9950cfe43 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -347,14 +347,16 @@ public final byte reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        byte res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Byte> m) {
-        return (long) super.reduceLanesTemplate(op, Byte512Mask.class, (Byte512Mask) m);  // specialized
+        byte res = super.reduceLanesTemplate(op, Byte512Mask.class, (Byte512Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -778,7 +780,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Byte512Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Byte512Mask) VectorSupport.indexPartiallyInUpperRange(
-                Byte512Mask.class, byte.class, VLENGTH, offset, limit,
+                Byte512Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Byte512Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java
index 85276b2eb19..f6760cd2ff1 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -347,14 +347,16 @@ public final byte reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        byte res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Byte> m) {
-        return (long) super.reduceLanesTemplate(op, Byte64Mask.class, (Byte64Mask) m);  // specialized
+        byte res = super.reduceLanesTemplate(op, Byte64Mask.class, (Byte64Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -666,7 +668,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Byte64Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Byte64Mask) VectorSupport.indexPartiallyInUpperRange(
-                Byte64Mask.class, byte.class, VLENGTH, offset, limit,
+                Byte64Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Byte64Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java
index ff035f13294..a20ee7ec591 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -347,14 +347,16 @@ public final byte reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        byte res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Byte> m) {
-        return (long) super.reduceLanesTemplate(op, ByteMaxMask.class, (ByteMaxMask) m);  // specialized
+        byte res = super.reduceLanesTemplate(op, ByteMaxMask.class, (ByteMaxMask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -652,7 +654,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         ByteMaxMask indexPartiallyInUpperRange(long offset, long limit) {
             return (ByteMaxMask) VectorSupport.indexPartiallyInUpperRange(
-                ByteMaxMask.class, byte.class, VLENGTH, offset, limit,
+                ByteMaxMask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (ByteMaxMask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java
index 4fc8626754a..add6bbc1ce2 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java
@@ -2220,8 +2220,7 @@ final ByteVector addIndexTemplate(int scale) {
                 // instruction directly, load IOTA from memory
                 // and multiply.
                 ByteVector iota = s.iota();
-                byte sc = (byte) scale_;
-                return v.add(sc == 1 ? iota : iota.mul(sc));
+                return v.add(scale_ == 1 ? iota : iota.mul((byte)scale_));
             });
     }
 
@@ -2284,7 +2283,8 @@ ByteVector sliceTemplate(int origin, Vector<Byte> v1) {
         that.check(this);
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Byte> iota = iotaShuffle();
-        VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(length() - origin))));
+        byte pivotidx = (byte)(length() - origin);
+        VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
         iota = iotaShuffle(origin, 1, true);
         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
     }
@@ -2314,7 +2314,8 @@ ByteVector slice(int origin,
     ByteVector sliceTemplate(int origin) {
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Byte> iota = iotaShuffle();
-        VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(length() - origin))));
+        byte pivotidx = (byte)(length() - origin);
+        VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
         iota = iotaShuffle(origin, 1, true);
         return vspecies().zero().blend(this.rearrange(iota), blendMask);
     }
@@ -2376,7 +2377,7 @@ ByteVector sliceTemplate(int origin) {
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Byte> iota = iotaShuffle();
         VectorMask<Byte> blendMask = iota.toVector().compare(VectorOperators.GE,
-                                                                  (broadcast((byte)(origin))));
+                                                                  broadcast((byte)(origin)));
         iota = iotaShuffle(-origin, 1, true);
         return vspecies().zero().blend(this.rearrange(iota), blendMask);
     }
@@ -2948,7 +2949,7 @@ public final double[] toDoubleArray() {
         byte[] a = toArray();
         double[] res = new double[a.length];
         for (int i = 0; i < a.length; i++) {
-            res[i] = (double) a[i];
+            res[i] = ((double) a[i]);
         }
         return res;
     }
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java
index 385fbba55a3..edf738ed3cf 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -334,14 +334,16 @@ public final double reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        double res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Double> m) {
-        return (long) super.reduceLanesTemplate(op, Double128Mask.class, (Double128Mask) m);  // specialized
+        double res = super.reduceLanesTemplate(op, Double128Mask.class, (Double128Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -643,7 +645,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Double128Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Double128Mask) VectorSupport.indexPartiallyInUpperRange(
-                Double128Mask.class, double.class, VLENGTH, offset, limit,
+                Double128Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Double128Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java
index e73ada8a088..e6110fce2aa 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -334,14 +334,16 @@ public final double reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        double res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Double> m) {
-        return (long) super.reduceLanesTemplate(op, Double256Mask.class, (Double256Mask) m);  // specialized
+        double res = super.reduceLanesTemplate(op, Double256Mask.class, (Double256Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -647,7 +649,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Double256Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Double256Mask) VectorSupport.indexPartiallyInUpperRange(
-                Double256Mask.class, double.class, VLENGTH, offset, limit,
+                Double256Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Double256Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java
index 5f239d2a527..5674cdae1d6 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -334,14 +334,16 @@ public final double reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        double res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Double> m) {
-        return (long) super.reduceLanesTemplate(op, Double512Mask.class, (Double512Mask) m);  // specialized
+        double res = super.reduceLanesTemplate(op, Double512Mask.class, (Double512Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -655,7 +657,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Double512Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Double512Mask) VectorSupport.indexPartiallyInUpperRange(
-                Double512Mask.class, double.class, VLENGTH, offset, limit,
+                Double512Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Double512Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java
index cd5f14c47db..476c4757264 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -334,14 +334,16 @@ public final double reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        double res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Double> m) {
-        return (long) super.reduceLanesTemplate(op, Double64Mask.class, (Double64Mask) m);  // specialized
+        double res = super.reduceLanesTemplate(op, Double64Mask.class, (Double64Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -641,7 +643,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Double64Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Double64Mask) VectorSupport.indexPartiallyInUpperRange(
-                Double64Mask.class, double.class, VLENGTH, offset, limit,
+                Double64Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Double64Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java
index 84b0b240ca5..cf615934454 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -334,14 +334,16 @@ public final double reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        double res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Double> m) {
-        return (long) super.reduceLanesTemplate(op, DoubleMaxMask.class, (DoubleMaxMask) m);  // specialized
+        double res = super.reduceLanesTemplate(op, DoubleMaxMask.class, (DoubleMaxMask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -640,7 +642,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         DoubleMaxMask indexPartiallyInUpperRange(long offset, long limit) {
             return (DoubleMaxMask) VectorSupport.indexPartiallyInUpperRange(
-                DoubleMaxMask.class, double.class, VLENGTH, offset, limit,
+                DoubleMaxMask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (DoubleMaxMask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java
index 59e67195732..8b876caece7 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java
@@ -486,7 +486,7 @@ AbstractMask<Double> bTest(int cond,
     /*package-private*/
     @ForceInline
     static long toBits(double e) {
-        return  Double.doubleToRawLongBits(e);
+        return Double.doubleToRawLongBits(e);
     }
 
     /*package-private*/
@@ -1027,8 +1027,7 @@ opc, getClass(), maskClass, double.class, length(),
 
     private static TernaryOperation<DoubleVector, VectorMask<Double>> ternaryOperations(int opc_) {
         switch (opc_) {
-            case VECTOR_OP_FMA: return (v0, v1_, v2_, m) ->
-                    v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c));
+            case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c));
             default: return null;
         }
     }
@@ -2062,8 +2061,7 @@ final DoubleVector addIndexTemplate(int scale) {
                 // instruction directly, load IOTA from memory
                 // and multiply.
                 DoubleVector iota = s.iota();
-                double sc = (double) scale_;
-                return v.add(sc == 1 ? iota : iota.mul(sc));
+                return v.add(scale_ == 1 ? iota : iota.mul((double)scale_));
             });
     }
 
@@ -2126,7 +2124,8 @@ DoubleVector sliceTemplate(int origin, Vector<Double> v1) {
         that.check(this);
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Double> iota = iotaShuffle();
-        VectorMask<Double> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((double)(length() - origin))));
+        double pivotidx = (double)(length() - origin);
+        VectorMask<Double> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
         iota = iotaShuffle(origin, 1, true);
         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
     }
@@ -2156,7 +2155,8 @@ DoubleVector slice(int origin,
     DoubleVector sliceTemplate(int origin) {
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Double> iota = iotaShuffle();
-        VectorMask<Double> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((double)(length() - origin))));
+        double pivotidx = (double)(length() - origin);
+        VectorMask<Double> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
         iota = iotaShuffle(origin, 1, true);
         return vspecies().zero().blend(this.rearrange(iota), blendMask);
     }
@@ -2218,7 +2218,7 @@ DoubleVector sliceTemplate(int origin) {
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Double> iota = iotaShuffle();
         VectorMask<Double> blendMask = iota.toVector().compare(VectorOperators.GE,
-                                                                  (broadcast((double)(origin))));
+                                                                  broadcast((double)(origin)));
         iota = iotaShuffle(-origin, 1, true);
         return vspecies().zero().blend(this.rearrange(iota), blendMask);
     }
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java
index d6b66f77431..73ca6710625 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -334,14 +334,16 @@ public final float reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        float res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Float> m) {
-        return (long) super.reduceLanesTemplate(op, Float128Mask.class, (Float128Mask) m);  // specialized
+        float res = super.reduceLanesTemplate(op, Float128Mask.class, (Float128Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -647,7 +649,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Float128Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Float128Mask) VectorSupport.indexPartiallyInUpperRange(
-                Float128Mask.class, float.class, VLENGTH, offset, limit,
+                Float128Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Float128Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java
index 38e5bee8a97..a4a252b2362 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -334,14 +334,16 @@ public final float reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        float res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Float> m) {
-        return (long) super.reduceLanesTemplate(op, Float256Mask.class, (Float256Mask) m);  // specialized
+        float res = super.reduceLanesTemplate(op, Float256Mask.class, (Float256Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -655,7 +657,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Float256Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Float256Mask) VectorSupport.indexPartiallyInUpperRange(
-                Float256Mask.class, float.class, VLENGTH, offset, limit,
+                Float256Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Float256Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java
index 3a398976d98..da7db208936 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -334,14 +334,16 @@ public final float reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        float res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Float> m) {
-        return (long) super.reduceLanesTemplate(op, Float512Mask.class, (Float512Mask) m);  // specialized
+        float res = super.reduceLanesTemplate(op, Float512Mask.class, (Float512Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -671,7 +673,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Float512Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Float512Mask) VectorSupport.indexPartiallyInUpperRange(
-                Float512Mask.class, float.class, VLENGTH, offset, limit,
+                Float512Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Float512Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java
index 867b3e284ae..abbdd83b8bd 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -334,14 +334,16 @@ public final float reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        float res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Float> m) {
-        return (long) super.reduceLanesTemplate(op, Float64Mask.class, (Float64Mask) m);  // specialized
+        float res = super.reduceLanesTemplate(op, Float64Mask.class, (Float64Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -643,7 +645,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Float64Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Float64Mask) VectorSupport.indexPartiallyInUpperRange(
-                Float64Mask.class, float.class, VLENGTH, offset, limit,
+                Float64Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Float64Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java
index 242d405eafb..26f6afa64f0 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -334,14 +334,16 @@ public final float reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        float res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Float> m) {
-        return (long) super.reduceLanesTemplate(op, FloatMaxMask.class, (FloatMaxMask) m);  // specialized
+        float res = super.reduceLanesTemplate(op, FloatMaxMask.class, (FloatMaxMask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -640,7 +642,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         FloatMaxMask indexPartiallyInUpperRange(long offset, long limit) {
             return (FloatMaxMask) VectorSupport.indexPartiallyInUpperRange(
-                FloatMaxMask.class, float.class, VLENGTH, offset, limit,
+                FloatMaxMask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (FloatMaxMask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java
index 45427817e3d..08c9dbd763c 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java
@@ -486,7 +486,7 @@ AbstractMask<Float> bTest(int cond,
     /*package-private*/
     @ForceInline
     static long toBits(float e) {
-        return  Float.floatToRawIntBits(e);
+        return Float.floatToRawIntBits(e);
     }
 
     /*package-private*/
@@ -1027,8 +1027,7 @@ opc, getClass(), maskClass, float.class, length(),
 
     private static TernaryOperation<FloatVector, VectorMask<Float>> ternaryOperations(int opc_) {
         switch (opc_) {
-            case VECTOR_OP_FMA: return (v0, v1_, v2_, m) ->
-                    v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c));
+            case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c));
             default: return null;
         }
     }
@@ -2074,8 +2073,7 @@ final FloatVector addIndexTemplate(int scale) {
                 // instruction directly, load IOTA from memory
                 // and multiply.
                 FloatVector iota = s.iota();
-                float sc = (float) scale_;
-                return v.add(sc == 1 ? iota : iota.mul(sc));
+                return v.add(scale_ == 1 ? iota : iota.mul((float)scale_));
             });
     }
 
@@ -2138,7 +2136,8 @@ FloatVector sliceTemplate(int origin, Vector<Float> v1) {
         that.check(this);
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Float> iota = iotaShuffle();
-        VectorMask<Float> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((float)(length() - origin))));
+        float pivotidx = (float)(length() - origin);
+        VectorMask<Float> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
         iota = iotaShuffle(origin, 1, true);
         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
     }
@@ -2168,7 +2167,8 @@ FloatVector slice(int origin,
     FloatVector sliceTemplate(int origin) {
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Float> iota = iotaShuffle();
-        VectorMask<Float> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((float)(length() - origin))));
+        float pivotidx = (float)(length() - origin);
+        VectorMask<Float> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
         iota = iotaShuffle(origin, 1, true);
         return vspecies().zero().blend(this.rearrange(iota), blendMask);
     }
@@ -2230,7 +2230,7 @@ FloatVector sliceTemplate(int origin) {
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Float> iota = iotaShuffle();
         VectorMask<Float> blendMask = iota.toVector().compare(VectorOperators.GE,
-                                                                  (broadcast((float)(origin))));
+                                                                  broadcast((float)(origin)));
         iota = iotaShuffle(-origin, 1, true);
         return vspecies().zero().blend(this.rearrange(iota), blendMask);
     }
@@ -2770,7 +2770,7 @@ public final double[] toDoubleArray() {
         float[] a = toArray();
         double[] res = new double[a.length];
         for (int i = 0; i < a.length; i++) {
-            res[i] = (double) a[i];
+            res[i] = ((double) a[i]);
         }
         return res;
     }
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat128Vector.java
new file mode 100644
index 00000000000..2f53e6675d2
--- /dev/null
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat128Vector.java
@@ -0,0 +1,917 @@
+/*
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.IntUnaryOperator;
+
+import jdk.internal.vm.annotation.ForceInline;
+import jdk.internal.vm.vector.VectorSupport;
+
+import static jdk.internal.vm.vector.VectorSupport.*;
+
+import static jdk.incubator.vector.VectorOperators.*;
+
+// -- This file was mechanically generated: Do not edit! -- //
+
+@SuppressWarnings("cast")  // warning: redundant cast
+final class Halffloat128Vector extends HalffloatVector {
+    static final HalffloatSpecies VSPECIES =
+        (HalffloatSpecies) HalffloatVector.SPECIES_128;
+
+    static final VectorShape VSHAPE =
+        VSPECIES.vectorShape();
+
+    static final Class<Halffloat128Vector> VCLASS = Halffloat128Vector.class;
+
+    static final int VSIZE = VSPECIES.vectorBitSize();
+
+    static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
+
+    static final Class<Float16> ETYPE = Float16.class; // used by the JVM
+
+    Halffloat128Vector(Float16[] v) {
+        super(v);
+    }
+
+    // For compatibility as Halffloat128Vector::new,
+    // stored into species.vectorFactory.
+    Halffloat128Vector(Object v) {
+        this((Float16[]) v);
+    }
+
+    static final Halffloat128Vector ZERO = new Halffloat128Vector(new Float16[VLENGTH]);
+    static final Halffloat128Vector IOTA = new Halffloat128Vector(VSPECIES.iotaArray());
+
+    static {
+        // Warm up a few species caches.
+        // If we do this too much we will
+        // get NPEs from bootstrap circularity.
+        VSPECIES.dummyVector();
+        VSPECIES.withLanes(LaneType.BYTE);
+    }
+
+    // Specialized extractors
+
+    @ForceInline
+    final @Override
+    public HalffloatSpecies vspecies() {
+        // ISSUE:  This should probably be a @Stable
+        // field inside AbstractVector, rather than
+        // a megamorphic method.
+        return VSPECIES;
+    }
+
+    @ForceInline
+    @Override
+    public final Class<Float16> elementType() { return Float16.class; }
+
+    @ForceInline
+    @Override
+    public final int elementSize() { return Float16.SIZE; }
+
+    @ForceInline
+    @Override
+    public final VectorShape shape() { return VSHAPE; }
+
+    @ForceInline
+    @Override
+    public final int length() { return VLENGTH; }
+
+    @ForceInline
+    @Override
+    public final int bitSize() { return VSIZE; }
+
+    @ForceInline
+    @Override
+    public final int byteSize() { return VSIZE / Byte.SIZE; }
+
+    /*package-private*/
+    @ForceInline
+    final @Override
+    Float16[] vec() {
+        return (Float16[])getPayload();
+    }
+
+    // Virtualized constructors
+
+    @Override
+    @ForceInline
+    public final Halffloat128Vector broadcast(Float16 e) {
+        return (Halffloat128Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat128Vector broadcast(long e) {
+        return (Halffloat128Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    Halffloat128Mask maskFromArray(boolean[] bits) {
+        return new Halffloat128Mask(bits);
+    }
+
+    @Override
+    @ForceInline
+    Halffloat128Shuffle iotaShuffle() { return Halffloat128Shuffle.IOTA; }
+
+    @ForceInline
+    Halffloat128Shuffle iotaShuffle(int start, int step, boolean wrap) {
+      if (wrap) {
+        return (Halffloat128Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat128Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
+      } else {
+        return (Halffloat128Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat128Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
+      }
+    }
+
+    @Override
+    @ForceInline
+    Halffloat128Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat128Shuffle(reorder); }
+
+    @Override
+    @ForceInline
+    Halffloat128Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat128Shuffle(indexes, i); }
+
+    @Override
+    @ForceInline
+    Halffloat128Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat128Shuffle(fn); }
+
+    // Make a vector of the same species but the given elements:
+    @ForceInline
+    final @Override
+    Halffloat128Vector vectorFactory(Float16[] vec) {
+        return new Halffloat128Vector(vec);
+    }
+
+    @ForceInline
+    final @Override
+    Byte128Vector asByteVectorRaw() {
+        return (Byte128Vector) super.asByteVectorRawTemplate();  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    AbstractVector<?> asVectorRaw(LaneType laneType) {
+        return super.asVectorRawTemplate(laneType);  // specialize
+    }
+
+    // Unary operator
+
+    @ForceInline
+    final @Override
+    Halffloat128Vector uOp(FUnOp f) {
+        return (Halffloat128Vector) super.uOpTemplate(f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat128Vector uOp(VectorMask<Float16> m, FUnOp f) {
+        return (Halffloat128Vector)
+            super.uOpTemplate((Halffloat128Mask)m, f);  // specialize
+    }
+
+    // Binary operator
+
+    @ForceInline
+    final @Override
+    Halffloat128Vector bOp(Vector<Float16> v, FBinOp f) {
+        return (Halffloat128Vector) super.bOpTemplate((Halffloat128Vector)v, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat128Vector bOp(Vector<Float16> v,
+                     VectorMask<Float16> m, FBinOp f) {
+        return (Halffloat128Vector)
+            super.bOpTemplate((Halffloat128Vector)v, (Halffloat128Mask)m,
+                              f);  // specialize
+    }
+
+    // Ternary operator
+
+    @ForceInline
+    final @Override
+    Halffloat128Vector tOp(Vector<Float16> v1, Vector<Float16> v2, FTriOp f) {
+        return (Halffloat128Vector)
+            super.tOpTemplate((Halffloat128Vector)v1, (Halffloat128Vector)v2,
+                              f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat128Vector tOp(Vector<Float16> v1, Vector<Float16> v2,
+                     VectorMask<Float16> m, FTriOp f) {
+        return (Halffloat128Vector)
+            super.tOpTemplate((Halffloat128Vector)v1, (Halffloat128Vector)v2,
+                              (Halffloat128Mask)m, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Float16 rOp(Float16 v, VectorMask<Float16> m, FBinOp f) {
+        return super.rOpTemplate(v, m, f);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> convertShape(VectorOperators.Conversion<Float16,F> conv,
+                           VectorSpecies<F> rsp, int part) {
+        return super.convertShapeTemplate(conv, rsp, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
+        return super.reinterpretShapeTemplate(toSpecies, part);  // specialize
+    }
+
+    // Specialized algebraic operations:
+
+    // The following definition forces a specialized version of this
+    // crucial method into the v-table of this class.  A call to add()
+    // will inline to a call to lanewise(ADD,), at which point the JIT
+    // intrinsic will have the opcode of ADD, plus all the metadata
+    // for this particular class, enabling it to generate precise
+    // code.
+    //
+    // There is probably no benefit to the JIT to specialize the
+    // masked or broadcast versions of the lanewise method.
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector lanewise(Unary op) {
+        return (Halffloat128Vector) super.lanewiseTemplate(op);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector lanewise(Unary op, VectorMask<Float16> m) {
+        return (Halffloat128Vector) super.lanewiseTemplate(op, Halffloat128Mask.class, (Halffloat128Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector lanewise(Binary op, Vector<Float16> v) {
+        return (Halffloat128Vector) super.lanewiseTemplate(op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector lanewise(Binary op, Vector<Float16> v, VectorMask<Float16> m) {
+        return (Halffloat128Vector) super.lanewiseTemplate(op, Halffloat128Mask.class, v, (Halffloat128Mask) m);  // specialize
+    }
+
+
+    /*package-private*/
+    @Override
+    @ForceInline
+    public final
+    Halffloat128Vector
+    lanewise(Ternary op, Vector<Float16> v1, Vector<Float16> v2) {
+        return (Halffloat128Vector) super.lanewiseTemplate(op, v1, v2);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat128Vector
+    lanewise(Ternary op, Vector<Float16> v1, Vector<Float16> v2, VectorMask<Float16> m) {
+        return (Halffloat128Vector) super.lanewiseTemplate(op, Halffloat128Mask.class, v1, v2, (Halffloat128Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat128Vector addIndex(int scale) {
+        return (Halffloat128Vector) super.addIndexTemplate(scale);  // specialize
+    }
+
+    // Type specific horizontal reductions
+
+    @Override
+    @ForceInline
+    public final Float16 reduceLanes(VectorOperators.Associative op) {
+        return super.reduceLanesTemplate(op);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final Float16 reduceLanes(VectorOperators.Associative op,
+                                    VectorMask<Float16> m) {
+        return super.reduceLanesTemplate(op, Halffloat128Mask.class, (Halffloat128Mask) m);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op) {
+        Float16 res = super.reduceLanesTemplate(op);  // specialized
+        return res.longValue();
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op,
+                                        VectorMask<Float16> m) {
+        Float16 res = super.reduceLanesTemplate(op, Halffloat128Mask.class, (Halffloat128Mask) m);  // specialized
+        return res.longValue();
+    }
+
+    @ForceInline
+    public VectorShuffle<Float16> toShuffle() {
+        return super.toShuffleTemplate(Halffloat128Shuffle.class); // specialize
+    }
+
+    // Specialized unary testing
+
+    @Override
+    @ForceInline
+    public final Halffloat128Mask test(Test op) {
+        return super.testTemplate(Halffloat128Mask.class, op);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat128Mask test(Test op, VectorMask<Float16> m) {
+        return super.testTemplate(Halffloat128Mask.class, op, (Halffloat128Mask) m);  // specialize
+    }
+
+    // Specialized comparisons
+
+    @Override
+    @ForceInline
+    public final Halffloat128Mask compare(Comparison op, Vector<Float16> v) {
+        return super.compareTemplate(Halffloat128Mask.class, op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat128Mask compare(Comparison op, Float16 s) {
+        return super.compareTemplate(Halffloat128Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat128Mask compare(Comparison op, long s) {
+        return super.compareTemplate(Halffloat128Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat128Mask compare(Comparison op, Vector<Float16> v, VectorMask<Float16> m) {
+        return super.compareTemplate(Halffloat128Mask.class, op, v, (Halffloat128Mask) m);
+    }
+
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector blend(Vector<Float16> v, VectorMask<Float16> m) {
+        return (Halffloat128Vector)
+            super.blendTemplate(Halffloat128Mask.class,
+                                (Halffloat128Vector) v,
+                                (Halffloat128Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector slice(int origin, Vector<Float16> v) {
+        return (Halffloat128Vector) super.sliceTemplate(origin, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector slice(int origin) {
+        return (Halffloat128Vector) super.sliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector unslice(int origin, Vector<Float16> w, int part) {
+        return (Halffloat128Vector) super.unsliceTemplate(origin, w, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector unslice(int origin, Vector<Float16> w, int part, VectorMask<Float16> m) {
+        return (Halffloat128Vector)
+            super.unsliceTemplate(Halffloat128Mask.class,
+                                  origin, w, part,
+                                  (Halffloat128Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector unslice(int origin) {
+        return (Halffloat128Vector) super.unsliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector rearrange(VectorShuffle<Float16> s) {
+        return (Halffloat128Vector)
+            super.rearrangeTemplate(Halffloat128Shuffle.class,
+                                    (Halffloat128Shuffle) s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector rearrange(VectorShuffle<Float16> shuffle,
+                                  VectorMask<Float16> m) {
+        return (Halffloat128Vector)
+            super.rearrangeTemplate(Halffloat128Shuffle.class,
+                                    Halffloat128Mask.class,
+                                    (Halffloat128Shuffle) shuffle,
+                                    (Halffloat128Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector rearrange(VectorShuffle<Float16> s,
+                                  Vector<Float16> v) {
+        return (Halffloat128Vector)
+            super.rearrangeTemplate(Halffloat128Shuffle.class,
+                                    (Halffloat128Shuffle) s,
+                                    (Halffloat128Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector compress(VectorMask<Float16> m) {
+        return (Halffloat128Vector)
+            super.compressTemplate(Halffloat128Mask.class,
+                                   (Halffloat128Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector expand(VectorMask<Float16> m) {
+        return (Halffloat128Vector)
+            super.expandTemplate(Halffloat128Mask.class,
+                                   (Halffloat128Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector selectFrom(Vector<Float16> v) {
+        return (Halffloat128Vector)
+            super.selectFromTemplate((Halffloat128Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector selectFrom(Vector<Float16> v,
+                                   VectorMask<Float16> m) {
+        return (Halffloat128Vector)
+            super.selectFromTemplate((Halffloat128Vector) v,
+                                     (Halffloat128Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    public Float16 lane(int i) {
+        short bits;
+        switch(i) {
+            case 0: bits = laneHelper(0); break;
+            case 1: bits = laneHelper(1); break;
+            case 2: bits = laneHelper(2); break;
+            case 3: bits = laneHelper(3); break;
+            case 4: bits = laneHelper(4); break;
+            case 5: bits = laneHelper(5); break;
+            case 6: bits = laneHelper(6); break;
+            case 7: bits = laneHelper(7); break;
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+        return Float16.shortBitsToFloat16(bits);
+    }
+
+    public short laneHelper(int i) {
+        return (short) VectorSupport.extract(
+                     VCLASS, ETYPE, VLENGTH,
+                     this, i,
+                     (vec, ix) -> {
+                     Float16[] vecarr = vec.vec();
+                     return (long)Float16.float16ToShortBits(vecarr[ix]);
+                     });
+    }
+
+    @ForceInline
+    @Override
+    public Halffloat128Vector withLane(int i, Float16 e) {
+        switch(i) {
+            case 0: return withLaneHelper(0, e);
+            case 1: return withLaneHelper(1, e);
+            case 2: return withLaneHelper(2, e);
+            case 3: return withLaneHelper(3, e);
+            case 4: return withLaneHelper(4, e);
+            case 5: return withLaneHelper(5, e);
+            case 6: return withLaneHelper(6, e);
+            case 7: return withLaneHelper(7, e);
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+    }
+
+    public Halffloat128Vector withLaneHelper(int i, Float16 e) {
+        return VectorSupport.insert(
+                                VCLASS, ETYPE, VLENGTH,
+                                this, i, (long)Float16.float16ToShortBits(e),
+                                (v, ix, bits) -> {
+                                    Float16[] res = v.vec().clone();
+                                    res[ix] = Float16.shortBitsToFloat16((short)bits);
+                                    return v.vectorFactory(res);
+                                });
+    }
+
+    // Mask
+
+    static final class Halffloat128Mask extends AbstractMask<Float16> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Float16> ETYPE = Float16.class; // used by the JVM
+
+        Halffloat128Mask(boolean[] bits) {
+            this(bits, 0);
+        }
+
+        Halffloat128Mask(boolean[] bits, int offset) {
+            super(prepare(bits, offset));
+        }
+
+        Halffloat128Mask(boolean val) {
+            super(prepare(val));
+        }
+
+        private static boolean[] prepare(boolean[] bits, int offset) {
+            boolean[] newBits = new boolean[VSPECIES.laneCount()];
+            for (int i = 0; i < newBits.length; i++) {
+                newBits[i] = bits[offset + i];
+            }
+            return newBits;
+        }
+
+        private static boolean[] prepare(boolean val) {
+            boolean[] bits = new boolean[VSPECIES.laneCount()];
+            Arrays.fill(bits, val);
+            return bits;
+        }
+
+        @ForceInline
+        final @Override
+        public HalffloatSpecies vspecies() {
+            // ISSUE:  This should probably be a @Stable
+            // field inside AbstractMask, rather than
+            // a megamorphic method.
+            return VSPECIES;
+        }
+
+        @ForceInline
+        boolean[] getBits() {
+            return (boolean[])getPayload();
+        }
+
+        @Override
+        Halffloat128Mask uOp(MUnOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i]);
+            }
+            return new Halffloat128Mask(res);
+        }
+
+        @Override
+        Halffloat128Mask bOp(VectorMask<Float16> m, MBinOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            boolean[] mbits = ((Halffloat128Mask)m).getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i], mbits[i]);
+            }
+            return new Halffloat128Mask(res);
+        }
+
+        @ForceInline
+        @Override
+        public final
+        Halffloat128Vector toVector() {
+            return (Halffloat128Vector) super.toVectorTemplate();  // specialize
+        }
+
+        /**
+         * Helper function for lane-wise mask conversions.
+         * This function kicks in after intrinsic failure.
+         */
+        @ForceInline
+        private final <E>
+        VectorMask<E> defaultMaskCast(AbstractSpecies<E> dsp) {
+            if (length() != dsp.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+            boolean[] maskArray = toArray();
+            return  dsp.maskFactory(maskArray).check(dsp);
+        }
+
+        @Override
+        @ForceInline
+        public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
+            AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+
+            return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
+                this.getClass(), ETYPE, VLENGTH,
+                species.maskType(), species.elementType(), VLENGTH,
+                this, species,
+                (m, s) -> s.maskFactory(m.toArray()).check(s));
+        }
+
+        @Override
+        @ForceInline
+        /*package-private*/
+        Halffloat128Mask indexPartiallyInUpperRange(long offset, long limit) {
+            return (Halffloat128Mask) VectorSupport.indexPartiallyInUpperRange(
+                Halffloat128Mask.class, ETYPE, VLENGTH, offset, limit,
+                (o, l) -> (Halffloat128Mask) TRUE_MASK.indexPartiallyInRange(o, l));
+        }
+
+        // Unary operations
+
+        @Override
+        @ForceInline
+        public Halffloat128Mask not() {
+            return xor(maskAll(true));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat128Mask compress() {
+            return (Halffloat128Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
+                Halffloat128Vector.class, Halffloat128Mask.class, ETYPE, VLENGTH, null, this,
+                (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, Float.floatToFloat16(m1.trueCount())));
+        }
+
+
+        // Binary operations
+
+        @Override
+        @ForceInline
+        public Halffloat128Mask and(VectorMask<Float16> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat128Mask m = (Halffloat128Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat128Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat128Mask or(VectorMask<Float16> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat128Mask m = (Halffloat128Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat128Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat128Mask xor(VectorMask<Float16> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat128Mask m = (Halffloat128Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat128Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
+        }
+
+        // Mask Query operations
+
+        @Override
+        @ForceInline
+        public int trueCount() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat128Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> trueCountHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int firstTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat128Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> firstTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int lastTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat128Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> lastTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public long toLong() {
+            if (length() > Long.SIZE) {
+                throw new UnsupportedOperationException("too many lanes for one long");
+            }
+            return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat128Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> toLongHelper(m.getBits()));
+        }
+
+        // laneIsSet
+
+        @Override
+        @ForceInline
+        public boolean laneIsSet(int i) {
+            Objects.checkIndex(i, length());
+            return VectorSupport.extract(Halffloat128Mask.class, Float16.class, VLENGTH,
+                                         this, i, (m, idx) -> (m.getBits()[idx] ? 1L : 0L)) == 1L;
+        }
+
+        // Reductions
+
+        @Override
+        @ForceInline
+        public boolean anyTrue() {
+            return VectorSupport.test(BT_ne, Halffloat128Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> anyTrueHelper(((Halffloat128Mask)m).getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public boolean allTrue() {
+            return VectorSupport.test(BT_overflow, Halffloat128Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> allTrueHelper(((Halffloat128Mask)m).getBits()));
+        }
+
+        @ForceInline
+        /*package-private*/
+        static Halffloat128Mask maskAll(boolean bit) {
+            return VectorSupport.fromBitsCoerced(Halffloat128Mask.class, short.class, VLENGTH,
+                                                 (bit ? -1 : 0), MODE_BROADCAST, null,
+                                                 (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
+        }
+        private static final Halffloat128Mask  TRUE_MASK = new Halffloat128Mask(true);
+        private static final Halffloat128Mask FALSE_MASK = new Halffloat128Mask(false);
+
+    }
+
+    // Shuffle
+
+    static final class Halffloat128Shuffle extends AbstractShuffle<Float16> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Float16> ETYPE = Float16.class; // used by the JVM
+
+        Halffloat128Shuffle(byte[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat128Shuffle(int[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat128Shuffle(int[] reorder, int i) {
+            super(VLENGTH, reorder, i);
+        }
+
+        public Halffloat128Shuffle(IntUnaryOperator fn) {
+            super(VLENGTH, fn);
+        }
+
+        @Override
+        public HalffloatSpecies vspecies() {
+            return VSPECIES;
+        }
+
+        static {
+            // There must be enough bits in the shuffle lanes to encode
+            // VLENGTH valid indexes and VLENGTH exceptional ones.
+            assert(VLENGTH < Byte.MAX_VALUE);
+            assert(Byte.MIN_VALUE <= -VLENGTH);
+        }
+        static final Halffloat128Shuffle IOTA = new Halffloat128Shuffle(IDENTITY);
+
+        @Override
+        @ForceInline
+        public Halffloat128Vector toVector() {
+            return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat128Shuffle.class, this, VLENGTH,
+                                                    (s) -> ((Halffloat128Vector)(((AbstractShuffle<Float16>)(s)).toVectorTemplate())));
+        }
+
+        @Override
+        @ForceInline
+        public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
+            AbstractSpecies<F> species = (AbstractSpecies<F>) s;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorShuffle length and species length differ");
+            int[] shuffleArray = toArray();
+            return s.shuffleFromArray(shuffleArray, 0).check(s);
+        }
+
+        @ForceInline
+        @Override
+        public Halffloat128Shuffle rearrange(VectorShuffle<Float16> shuffle) {
+            Halffloat128Shuffle s = (Halffloat128Shuffle) shuffle;
+            byte[] reorder1 = reorder();
+            byte[] reorder2 = s.reorder();
+            byte[] r = new byte[reorder1.length];
+            for (int i = 0; i < reorder1.length; i++) {
+                int ssi = reorder2[i];
+                r[i] = reorder1[ssi];  // throws on exceptional index
+            }
+            return new Halffloat128Shuffle(r);
+        }
+    }
+
+    // ================================================
+
+    // Specialized low-level memory operations.
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(Float16[] a, int offset) {
+        return super.fromArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(Float16[] a, int offset, VectorMask<Float16> m, int offsetInRange) {
+        return super.fromArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m, offsetInRange);  // specialize
+    }
+
+
+
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromMemorySegment0(MemorySegment ms, long offset) {
+        return super.fromMemorySegment0Template(ms, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float16> m, int offsetInRange) {
+        return super.fromMemorySegment0Template(Halffloat128Mask.class, ms, offset, (Halffloat128Mask) m, offsetInRange);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(Float16[] a, int offset) {
+        super.intoArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(Float16[] a, int offset, VectorMask<Float16> m) {
+        super.intoArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m);
+    }
+
+
+
+    @ForceInline
+    @Override
+    final
+    void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Float16> m) {
+        super.intoMemorySegment0Template(Halffloat128Mask.class, ms, offset, (Halffloat128Mask) m);
+    }
+
+
+    // End of specialized low-level memory operations.
+
+    // ================================================
+
+}
+
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat256Vector.java
new file mode 100644
index 00000000000..c0b05cc3b53
--- /dev/null
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat256Vector.java
@@ -0,0 +1,933 @@
+/*
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.IntUnaryOperator;
+
+import jdk.internal.vm.annotation.ForceInline;
+import jdk.internal.vm.vector.VectorSupport;
+
+import static jdk.internal.vm.vector.VectorSupport.*;
+
+import static jdk.incubator.vector.VectorOperators.*;
+
+// -- This file was mechanically generated: Do not edit! -- //
+
+@SuppressWarnings("cast")  // warning: redundant cast
+final class Halffloat256Vector extends HalffloatVector {
+    static final HalffloatSpecies VSPECIES =
+        (HalffloatSpecies) HalffloatVector.SPECIES_256;
+
+    static final VectorShape VSHAPE =
+        VSPECIES.vectorShape();
+
+    static final Class<Halffloat256Vector> VCLASS = Halffloat256Vector.class;
+
+    static final int VSIZE = VSPECIES.vectorBitSize();
+
+    static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
+
+    static final Class<Float16> ETYPE = Float16.class; // used by the JVM
+
+    Halffloat256Vector(Float16[] v) {
+        super(v);
+    }
+
+    // For compatibility as Halffloat256Vector::new,
+    // stored into species.vectorFactory.
+    Halffloat256Vector(Object v) {
+        this((Float16[]) v);
+    }
+
+    static final Halffloat256Vector ZERO = new Halffloat256Vector(new Float16[VLENGTH]);
+    static final Halffloat256Vector IOTA = new Halffloat256Vector(VSPECIES.iotaArray());
+
+    static {
+        // Warm up a few species caches.
+        // If we do this too much we will
+        // get NPEs from bootstrap circularity.
+        VSPECIES.dummyVector();
+        VSPECIES.withLanes(LaneType.BYTE);
+    }
+
+    // Specialized extractors
+
+    @ForceInline
+    final @Override
+    public HalffloatSpecies vspecies() {
+        // ISSUE:  This should probably be a @Stable
+        // field inside AbstractVector, rather than
+        // a megamorphic method.
+        return VSPECIES;
+    }
+
+    @ForceInline
+    @Override
+    public final Class<Float16> elementType() { return Float16.class; }
+
+    @ForceInline
+    @Override
+    public final int elementSize() { return Float16.SIZE; }
+
+    @ForceInline
+    @Override
+    public final VectorShape shape() { return VSHAPE; }
+
+    @ForceInline
+    @Override
+    public final int length() { return VLENGTH; }
+
+    @ForceInline
+    @Override
+    public final int bitSize() { return VSIZE; }
+
+    @ForceInline
+    @Override
+    public final int byteSize() { return VSIZE / Byte.SIZE; }
+
+    /*package-private*/
+    @ForceInline
+    final @Override
+    Float16[] vec() {
+        return (Float16[])getPayload();
+    }
+
+    // Virtualized constructors
+
+    @Override
+    @ForceInline
+    public final Halffloat256Vector broadcast(Float16 e) {
+        return (Halffloat256Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat256Vector broadcast(long e) {
+        return (Halffloat256Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    Halffloat256Mask maskFromArray(boolean[] bits) {
+        return new Halffloat256Mask(bits);
+    }
+
+    @Override
+    @ForceInline
+    Halffloat256Shuffle iotaShuffle() { return Halffloat256Shuffle.IOTA; }
+
+    @ForceInline
+    Halffloat256Shuffle iotaShuffle(int start, int step, boolean wrap) {
+      if (wrap) {
+        return (Halffloat256Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat256Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
+      } else {
+        return (Halffloat256Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat256Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
+      }
+    }
+
+    @Override
+    @ForceInline
+    Halffloat256Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat256Shuffle(reorder); }
+
+    @Override
+    @ForceInline
+    Halffloat256Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat256Shuffle(indexes, i); }
+
+    @Override
+    @ForceInline
+    Halffloat256Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat256Shuffle(fn); }
+
+    // Make a vector of the same species but the given elements:
+    @ForceInline
+    final @Override
+    Halffloat256Vector vectorFactory(Float16[] vec) {
+        return new Halffloat256Vector(vec);
+    }
+
+    @ForceInline
+    final @Override
+    Byte256Vector asByteVectorRaw() {
+        return (Byte256Vector) super.asByteVectorRawTemplate();  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    AbstractVector<?> asVectorRaw(LaneType laneType) {
+        return super.asVectorRawTemplate(laneType);  // specialize
+    }
+
+    // Unary operator
+
+    @ForceInline
+    final @Override
+    Halffloat256Vector uOp(FUnOp f) {
+        return (Halffloat256Vector) super.uOpTemplate(f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat256Vector uOp(VectorMask<Float16> m, FUnOp f) {
+        return (Halffloat256Vector)
+            super.uOpTemplate((Halffloat256Mask)m, f);  // specialize
+    }
+
+    // Binary operator
+
+    @ForceInline
+    final @Override
+    Halffloat256Vector bOp(Vector<Float16> v, FBinOp f) {
+        return (Halffloat256Vector) super.bOpTemplate((Halffloat256Vector)v, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat256Vector bOp(Vector<Float16> v,
+                     VectorMask<Float16> m, FBinOp f) {
+        return (Halffloat256Vector)
+            super.bOpTemplate((Halffloat256Vector)v, (Halffloat256Mask)m,
+                              f);  // specialize
+    }
+
+    // Ternary operator
+
+    @ForceInline
+    final @Override
+    Halffloat256Vector tOp(Vector<Float16> v1, Vector<Float16> v2, FTriOp f) {
+        return (Halffloat256Vector)
+            super.tOpTemplate((Halffloat256Vector)v1, (Halffloat256Vector)v2,
+                              f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat256Vector tOp(Vector<Float16> v1, Vector<Float16> v2,
+                     VectorMask<Float16> m, FTriOp f) {
+        return (Halffloat256Vector)
+            super.tOpTemplate((Halffloat256Vector)v1, (Halffloat256Vector)v2,
+                              (Halffloat256Mask)m, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Float16 rOp(Float16 v, VectorMask<Float16> m, FBinOp f) {
+        return super.rOpTemplate(v, m, f);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> convertShape(VectorOperators.Conversion<Float16,F> conv,
+                           VectorSpecies<F> rsp, int part) {
+        return super.convertShapeTemplate(conv, rsp, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
+        return super.reinterpretShapeTemplate(toSpecies, part);  // specialize
+    }
+
+    // Specialized algebraic operations:
+
+    // The following definition forces a specialized version of this
+    // crucial method into the v-table of this class.  A call to add()
+    // will inline to a call to lanewise(ADD,), at which point the JIT
+    // intrinsic will have the opcode of ADD, plus all the metadata
+    // for this particular class, enabling it to generate precise
+    // code.
+    //
+    // There is probably no benefit to the JIT to specialize the
+    // masked or broadcast versions of the lanewise method.
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector lanewise(Unary op) {
+        return (Halffloat256Vector) super.lanewiseTemplate(op);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector lanewise(Unary op, VectorMask<Float16> m) {
+        return (Halffloat256Vector) super.lanewiseTemplate(op, Halffloat256Mask.class, (Halffloat256Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector lanewise(Binary op, Vector<Float16> v) {
+        return (Halffloat256Vector) super.lanewiseTemplate(op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector lanewise(Binary op, Vector<Float16> v, VectorMask<Float16> m) {
+        return (Halffloat256Vector) super.lanewiseTemplate(op, Halffloat256Mask.class, v, (Halffloat256Mask) m);  // specialize
+    }
+
+
+    /*package-private*/
+    @Override
+    @ForceInline
+    public final
+    Halffloat256Vector
+    lanewise(Ternary op, Vector<Float16> v1, Vector<Float16> v2) {
+        return (Halffloat256Vector) super.lanewiseTemplate(op, v1, v2);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat256Vector
+    lanewise(Ternary op, Vector<Float16> v1, Vector<Float16> v2, VectorMask<Float16> m) {
+        return (Halffloat256Vector) super.lanewiseTemplate(op, Halffloat256Mask.class, v1, v2, (Halffloat256Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat256Vector addIndex(int scale) {
+        return (Halffloat256Vector) super.addIndexTemplate(scale);  // specialize
+    }
+
+    // Type specific horizontal reductions
+
+    @Override
+    @ForceInline
+    public final Float16 reduceLanes(VectorOperators.Associative op) {
+        return super.reduceLanesTemplate(op);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final Float16 reduceLanes(VectorOperators.Associative op,
+                                    VectorMask<Float16> m) {
+        return super.reduceLanesTemplate(op, Halffloat256Mask.class, (Halffloat256Mask) m);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op) {
+        Float16 res = super.reduceLanesTemplate(op);  // specialized
+        return res.longValue();
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op,
+                                        VectorMask<Float16> m) {
+        Float16 res = super.reduceLanesTemplate(op, Halffloat256Mask.class, (Halffloat256Mask) m);  // specialized
+        return res.longValue();
+    }
+
+    @ForceInline
+    public VectorShuffle<Float16> toShuffle() {
+        return super.toShuffleTemplate(Halffloat256Shuffle.class); // specialize
+    }
+
+    // Specialized unary testing
+
+    @Override
+    @ForceInline
+    public final Halffloat256Mask test(Test op) {
+        return super.testTemplate(Halffloat256Mask.class, op);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat256Mask test(Test op, VectorMask<Float16> m) {
+        return super.testTemplate(Halffloat256Mask.class, op, (Halffloat256Mask) m);  // specialize
+    }
+
+    // Specialized comparisons
+
+    @Override
+    @ForceInline
+    public final Halffloat256Mask compare(Comparison op, Vector<Float16> v) {
+        return super.compareTemplate(Halffloat256Mask.class, op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat256Mask compare(Comparison op, Float16 s) {
+        return super.compareTemplate(Halffloat256Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat256Mask compare(Comparison op, long s) {
+        return super.compareTemplate(Halffloat256Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat256Mask compare(Comparison op, Vector<Float16> v, VectorMask<Float16> m) {
+        return super.compareTemplate(Halffloat256Mask.class, op, v, (Halffloat256Mask) m);
+    }
+
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector blend(Vector<Float16> v, VectorMask<Float16> m) {
+        return (Halffloat256Vector)
+            super.blendTemplate(Halffloat256Mask.class,
+                                (Halffloat256Vector) v,
+                                (Halffloat256Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector slice(int origin, Vector<Float16> v) {
+        return (Halffloat256Vector) super.sliceTemplate(origin, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector slice(int origin) {
+        return (Halffloat256Vector) super.sliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector unslice(int origin, Vector<Float16> w, int part) {
+        return (Halffloat256Vector) super.unsliceTemplate(origin, w, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector unslice(int origin, Vector<Float16> w, int part, VectorMask<Float16> m) {
+        return (Halffloat256Vector)
+            super.unsliceTemplate(Halffloat256Mask.class,
+                                  origin, w, part,
+                                  (Halffloat256Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector unslice(int origin) {
+        return (Halffloat256Vector) super.unsliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector rearrange(VectorShuffle<Float16> s) {
+        return (Halffloat256Vector)
+            super.rearrangeTemplate(Halffloat256Shuffle.class,
+                                    (Halffloat256Shuffle) s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector rearrange(VectorShuffle<Float16> shuffle,
+                                  VectorMask<Float16> m) {
+        return (Halffloat256Vector)
+            super.rearrangeTemplate(Halffloat256Shuffle.class,
+                                    Halffloat256Mask.class,
+                                    (Halffloat256Shuffle) shuffle,
+                                    (Halffloat256Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector rearrange(VectorShuffle<Float16> s,
+                                  Vector<Float16> v) {
+        return (Halffloat256Vector)
+            super.rearrangeTemplate(Halffloat256Shuffle.class,
+                                    (Halffloat256Shuffle) s,
+                                    (Halffloat256Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector compress(VectorMask<Float16> m) {
+        return (Halffloat256Vector)
+            super.compressTemplate(Halffloat256Mask.class,
+                                   (Halffloat256Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector expand(VectorMask<Float16> m) {
+        return (Halffloat256Vector)
+            super.expandTemplate(Halffloat256Mask.class,
+                                   (Halffloat256Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector selectFrom(Vector<Float16> v) {
+        return (Halffloat256Vector)
+            super.selectFromTemplate((Halffloat256Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector selectFrom(Vector<Float16> v,
+                                   VectorMask<Float16> m) {
+        return (Halffloat256Vector)
+            super.selectFromTemplate((Halffloat256Vector) v,
+                                     (Halffloat256Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    public Float16 lane(int i) {
+        short bits;
+        switch(i) {
+            case 0: bits = laneHelper(0); break;
+            case 1: bits = laneHelper(1); break;
+            case 2: bits = laneHelper(2); break;
+            case 3: bits = laneHelper(3); break;
+            case 4: bits = laneHelper(4); break;
+            case 5: bits = laneHelper(5); break;
+            case 6: bits = laneHelper(6); break;
+            case 7: bits = laneHelper(7); break;
+            case 8: bits = laneHelper(8); break;
+            case 9: bits = laneHelper(9); break;
+            case 10: bits = laneHelper(10); break;
+            case 11: bits = laneHelper(11); break;
+            case 12: bits = laneHelper(12); break;
+            case 13: bits = laneHelper(13); break;
+            case 14: bits = laneHelper(14); break;
+            case 15: bits = laneHelper(15); break;
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+        return Float16.shortBitsToFloat16(bits);
+    }
+
+    public short laneHelper(int i) {
+        return (short) VectorSupport.extract(
+                     VCLASS, ETYPE, VLENGTH,
+                     this, i,
+                     (vec, ix) -> {
+                     Float16[] vecarr = vec.vec();
+                     return (long)Float16.float16ToShortBits(vecarr[ix]);
+                     });
+    }
+
+    @ForceInline
+    @Override
+    public Halffloat256Vector withLane(int i, Float16 e) {
+        switch(i) {
+            case 0: return withLaneHelper(0, e);
+            case 1: return withLaneHelper(1, e);
+            case 2: return withLaneHelper(2, e);
+            case 3: return withLaneHelper(3, e);
+            case 4: return withLaneHelper(4, e);
+            case 5: return withLaneHelper(5, e);
+            case 6: return withLaneHelper(6, e);
+            case 7: return withLaneHelper(7, e);
+            case 8: return withLaneHelper(8, e);
+            case 9: return withLaneHelper(9, e);
+            case 10: return withLaneHelper(10, e);
+            case 11: return withLaneHelper(11, e);
+            case 12: return withLaneHelper(12, e);
+            case 13: return withLaneHelper(13, e);
+            case 14: return withLaneHelper(14, e);
+            case 15: return withLaneHelper(15, e);
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+    }
+
+    public Halffloat256Vector withLaneHelper(int i, Float16 e) {
+        return VectorSupport.insert(
+                                VCLASS, ETYPE, VLENGTH,
+                                this, i, (long)Float16.float16ToShortBits(e),
+                                (v, ix, bits) -> {
+                                    Float16[] res = v.vec().clone();
+                                    res[ix] = Float16.shortBitsToFloat16((short)bits);
+                                    return v.vectorFactory(res);
+                                });
+    }
+
+    // Mask
+
+    static final class Halffloat256Mask extends AbstractMask<Float16> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Float16> ETYPE = Float16.class; // used by the JVM
+
+        Halffloat256Mask(boolean[] bits) {
+            this(bits, 0);
+        }
+
+        Halffloat256Mask(boolean[] bits, int offset) {
+            super(prepare(bits, offset));
+        }
+
+        Halffloat256Mask(boolean val) {
+            super(prepare(val));
+        }
+
+        private static boolean[] prepare(boolean[] bits, int offset) {
+            boolean[] newBits = new boolean[VSPECIES.laneCount()];
+            for (int i = 0; i < newBits.length; i++) {
+                newBits[i] = bits[offset + i];
+            }
+            return newBits;
+        }
+
+        private static boolean[] prepare(boolean val) {
+            boolean[] bits = new boolean[VSPECIES.laneCount()];
+            Arrays.fill(bits, val);
+            return bits;
+        }
+
+        @ForceInline
+        final @Override
+        public HalffloatSpecies vspecies() {
+            // ISSUE:  This should probably be a @Stable
+            // field inside AbstractMask, rather than
+            // a megamorphic method.
+            return VSPECIES;
+        }
+
+        @ForceInline
+        boolean[] getBits() {
+            return (boolean[])getPayload();
+        }
+
+        @Override
+        Halffloat256Mask uOp(MUnOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i]);
+            }
+            return new Halffloat256Mask(res);
+        }
+
+        @Override
+        Halffloat256Mask bOp(VectorMask<Float16> m, MBinOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            boolean[] mbits = ((Halffloat256Mask)m).getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i], mbits[i]);
+            }
+            return new Halffloat256Mask(res);
+        }
+
+        @ForceInline
+        @Override
+        public final
+        Halffloat256Vector toVector() {
+            return (Halffloat256Vector) super.toVectorTemplate();  // specialize
+        }
+
+        /**
+         * Helper function for lane-wise mask conversions.
+         * This function kicks in after intrinsic failure.
+         */
+        @ForceInline
+        private final <E>
+        VectorMask<E> defaultMaskCast(AbstractSpecies<E> dsp) {
+            if (length() != dsp.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+            boolean[] maskArray = toArray();
+            return  dsp.maskFactory(maskArray).check(dsp);
+        }
+
+        @Override
+        @ForceInline
+        public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
+            AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+
+            return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
+                this.getClass(), ETYPE, VLENGTH,
+                species.maskType(), species.elementType(), VLENGTH,
+                this, species,
+                (m, s) -> s.maskFactory(m.toArray()).check(s));
+        }
+
+        @Override
+        @ForceInline
+        /*package-private*/
+        Halffloat256Mask indexPartiallyInUpperRange(long offset, long limit) {
+            return (Halffloat256Mask) VectorSupport.indexPartiallyInUpperRange(
+                Halffloat256Mask.class, ETYPE, VLENGTH, offset, limit,
+                (o, l) -> (Halffloat256Mask) TRUE_MASK.indexPartiallyInRange(o, l));
+        }
+
+        // Unary operations
+
+        @Override
+        @ForceInline
+        public Halffloat256Mask not() {
+            return xor(maskAll(true));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat256Mask compress() {
+            return (Halffloat256Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
+                Halffloat256Vector.class, Halffloat256Mask.class, ETYPE, VLENGTH, null, this,
+                (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, Float.floatToFloat16(m1.trueCount())));
+        }
+
+
+        // Binary operations
+
+        @Override
+        @ForceInline
+        public Halffloat256Mask and(VectorMask<Float16> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat256Mask m = (Halffloat256Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat256Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat256Mask or(VectorMask<Float16> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat256Mask m = (Halffloat256Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat256Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat256Mask xor(VectorMask<Float16> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat256Mask m = (Halffloat256Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat256Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
+        }
+
+        // Mask Query operations
+
+        @Override
+        @ForceInline
+        public int trueCount() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat256Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> trueCountHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int firstTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat256Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> firstTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int lastTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat256Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> lastTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public long toLong() {
+            if (length() > Long.SIZE) {
+                throw new UnsupportedOperationException("too many lanes for one long");
+            }
+            return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat256Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> toLongHelper(m.getBits()));
+        }
+
+        // laneIsSet
+
+        @Override
+        @ForceInline
+        public boolean laneIsSet(int i) {
+            Objects.checkIndex(i, length());
+            return VectorSupport.extract(Halffloat256Mask.class, Float16.class, VLENGTH,
+                                         this, i, (m, idx) -> (m.getBits()[idx] ? 1L : 0L)) == 1L;
+        }
+
+        // Reductions
+
+        @Override
+        @ForceInline
+        public boolean anyTrue() {
+            return VectorSupport.test(BT_ne, Halffloat256Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> anyTrueHelper(((Halffloat256Mask)m).getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public boolean allTrue() {
+            return VectorSupport.test(BT_overflow, Halffloat256Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> allTrueHelper(((Halffloat256Mask)m).getBits()));
+        }
+
+        @ForceInline
+        /*package-private*/
+        static Halffloat256Mask maskAll(boolean bit) {
+            return VectorSupport.fromBitsCoerced(Halffloat256Mask.class, short.class, VLENGTH,
+                                                 (bit ? -1 : 0), MODE_BROADCAST, null,
+                                                 (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
+        }
+        private static final Halffloat256Mask  TRUE_MASK = new Halffloat256Mask(true);
+        private static final Halffloat256Mask FALSE_MASK = new Halffloat256Mask(false);
+
+    }
+
+    // Shuffle
+
+    static final class Halffloat256Shuffle extends AbstractShuffle<Float16> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Float16> ETYPE = Float16.class; // used by the JVM
+
+        Halffloat256Shuffle(byte[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat256Shuffle(int[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat256Shuffle(int[] reorder, int i) {
+            super(VLENGTH, reorder, i);
+        }
+
+        public Halffloat256Shuffle(IntUnaryOperator fn) {
+            super(VLENGTH, fn);
+        }
+
+        @Override
+        public HalffloatSpecies vspecies() {
+            return VSPECIES;
+        }
+
+        static {
+            // There must be enough bits in the shuffle lanes to encode
+            // VLENGTH valid indexes and VLENGTH exceptional ones.
+            assert(VLENGTH < Byte.MAX_VALUE);
+            assert(Byte.MIN_VALUE <= -VLENGTH);
+        }
+        static final Halffloat256Shuffle IOTA = new Halffloat256Shuffle(IDENTITY);
+
+        @Override
+        @ForceInline
+        public Halffloat256Vector toVector() {
+            return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat256Shuffle.class, this, VLENGTH,
+                                                    (s) -> ((Halffloat256Vector)(((AbstractShuffle<Float16>)(s)).toVectorTemplate())));
+        }
+
+        @Override
+        @ForceInline
+        public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
+            AbstractSpecies<F> species = (AbstractSpecies<F>) s;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorShuffle length and species length differ");
+            int[] shuffleArray = toArray();
+            return s.shuffleFromArray(shuffleArray, 0).check(s);
+        }
+
+        @ForceInline
+        @Override
+        public Halffloat256Shuffle rearrange(VectorShuffle<Float16> shuffle) {
+            Halffloat256Shuffle s = (Halffloat256Shuffle) shuffle;
+            byte[] reorder1 = reorder();
+            byte[] reorder2 = s.reorder();
+            byte[] r = new byte[reorder1.length];
+            for (int i = 0; i < reorder1.length; i++) {
+                int ssi = reorder2[i];
+                r[i] = reorder1[ssi];  // throws on exceptional index
+            }
+            return new Halffloat256Shuffle(r);
+        }
+    }
+
+    // ================================================
+
+    // Specialized low-level memory operations.
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(Float16[] a, int offset) {
+        return super.fromArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(Float16[] a, int offset, VectorMask<Float16> m, int offsetInRange) {
+        return super.fromArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m, offsetInRange);  // specialize
+    }
+
+
+
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromMemorySegment0(MemorySegment ms, long offset) {
+        return super.fromMemorySegment0Template(ms, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float16> m, int offsetInRange) {
+        return super.fromMemorySegment0Template(Halffloat256Mask.class, ms, offset, (Halffloat256Mask) m, offsetInRange);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(Float16[] a, int offset) {
+        super.intoArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(Float16[] a, int offset, VectorMask<Float16> m) {
+        super.intoArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m);
+    }
+
+
+
+    @ForceInline
+    @Override
+    final
+    void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Float16> m) {
+        super.intoMemorySegment0Template(Halffloat256Mask.class, ms, offset, (Halffloat256Mask) m);
+    }
+
+
+    // End of specialized low-level memory operations.
+
+    // ================================================
+
+}
+
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat512Vector.java
new file mode 100644
index 00000000000..1865119a030
--- /dev/null
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat512Vector.java
@@ -0,0 +1,933 @@
+/*
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.IntUnaryOperator;
+
+import jdk.internal.vm.annotation.ForceInline;
+import jdk.internal.vm.vector.VectorSupport;
+
+import static jdk.internal.vm.vector.VectorSupport.*;
+
+import static jdk.incubator.vector.VectorOperators.*;
+
+// -- This file was mechanically generated: Do not edit! -- //
+
+@SuppressWarnings("cast")  // warning: redundant cast
+final class Halffloat512Vector extends HalffloatVector {
+    static final HalffloatSpecies VSPECIES =
+        (HalffloatSpecies) HalffloatVector.SPECIES_512;
+
+    static final VectorShape VSHAPE =
+        VSPECIES.vectorShape();
+
+    static final Class<Halffloat512Vector> VCLASS = Halffloat512Vector.class;
+
+    static final int VSIZE = VSPECIES.vectorBitSize();
+
+    static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
+
+    static final Class<Float16> ETYPE = Float16.class; // used by the JVM
+
+    Halffloat512Vector(Float16[] v) {
+        super(v);
+    }
+
+    // For compatibility as Halffloat512Vector::new,
+    // stored into species.vectorFactory.
+    Halffloat512Vector(Object v) {
+        this((Float16[]) v);
+    }
+
+    static final Halffloat512Vector ZERO = new Halffloat512Vector(new Float16[VLENGTH]);
+    static final Halffloat512Vector IOTA = new Halffloat512Vector(VSPECIES.iotaArray());
+
+    static {
+        // Warm up a few species caches.
+        // If we do this too much we will
+        // get NPEs from bootstrap circularity.
+        VSPECIES.dummyVector();
+        VSPECIES.withLanes(LaneType.BYTE);
+    }
+
+    // Specialized extractors
+
+    @ForceInline
+    final @Override
+    public HalffloatSpecies vspecies() {
+        // ISSUE:  This should probably be a @Stable
+        // field inside AbstractVector, rather than
+        // a megamorphic method.
+        return VSPECIES;
+    }
+
+    @ForceInline
+    @Override
+    public final Class<Float16> elementType() { return Float16.class; }
+
+    @ForceInline
+    @Override
+    public final int elementSize() { return Float16.SIZE; }
+
+    @ForceInline
+    @Override
+    public final VectorShape shape() { return VSHAPE; }
+
+    @ForceInline
+    @Override
+    public final int length() { return VLENGTH; }
+
+    @ForceInline
+    @Override
+    public final int bitSize() { return VSIZE; }
+
+    @ForceInline
+    @Override
+    public final int byteSize() { return VSIZE / Byte.SIZE; }
+
+    /*package-private*/
+    @ForceInline
+    final @Override
+    Float16[] vec() {
+        return (Float16[])getPayload();
+    }
+
+    // Virtualized constructors
+
+    @Override
+    @ForceInline
+    public final Halffloat512Vector broadcast(Float16 e) {
+        return (Halffloat512Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat512Vector broadcast(long e) {
+        return (Halffloat512Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    Halffloat512Mask maskFromArray(boolean[] bits) {
+        return new Halffloat512Mask(bits);
+    }
+
+    @Override
+    @ForceInline
+    Halffloat512Shuffle iotaShuffle() { return Halffloat512Shuffle.IOTA; }
+
+    @ForceInline
+    Halffloat512Shuffle iotaShuffle(int start, int step, boolean wrap) {
+      if (wrap) {
+        return (Halffloat512Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat512Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
+      } else {
+        return (Halffloat512Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat512Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
+      }
+    }
+
+    @Override
+    @ForceInline
+    Halffloat512Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat512Shuffle(reorder); }
+
+    @Override
+    @ForceInline
+    Halffloat512Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat512Shuffle(indexes, i); }
+
+    @Override
+    @ForceInline
+    Halffloat512Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat512Shuffle(fn); }
+
+    // Make a vector of the same species but the given elements:
+    @ForceInline
+    final @Override
+    Halffloat512Vector vectorFactory(Float16[] vec) {
+        return new Halffloat512Vector(vec);
+    }
+
+    @ForceInline
+    final @Override
+    Byte512Vector asByteVectorRaw() {
+        return (Byte512Vector) super.asByteVectorRawTemplate();  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    AbstractVector<?> asVectorRaw(LaneType laneType) {
+        return super.asVectorRawTemplate(laneType);  // specialize
+    }
+
+    // Unary operator
+
+    @ForceInline
+    final @Override
+    Halffloat512Vector uOp(FUnOp f) {
+        return (Halffloat512Vector) super.uOpTemplate(f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat512Vector uOp(VectorMask<Float16> m, FUnOp f) {
+        return (Halffloat512Vector)
+            super.uOpTemplate((Halffloat512Mask)m, f);  // specialize
+    }
+
+    // Binary operator
+
+    @ForceInline
+    final @Override
+    Halffloat512Vector bOp(Vector<Float16> v, FBinOp f) {
+        return (Halffloat512Vector) super.bOpTemplate((Halffloat512Vector)v, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat512Vector bOp(Vector<Float16> v,
+                     VectorMask<Float16> m, FBinOp f) {
+        return (Halffloat512Vector)
+            super.bOpTemplate((Halffloat512Vector)v, (Halffloat512Mask)m,
+                              f);  // specialize
+    }
+
+    // Ternary operator
+
+    @ForceInline
+    final @Override
+    Halffloat512Vector tOp(Vector<Float16> v1, Vector<Float16> v2, FTriOp f) {
+        return (Halffloat512Vector)
+            super.tOpTemplate((Halffloat512Vector)v1, (Halffloat512Vector)v2,
+                              f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat512Vector tOp(Vector<Float16> v1, Vector<Float16> v2,
+                     VectorMask<Float16> m, FTriOp f) {
+        return (Halffloat512Vector)
+            super.tOpTemplate((Halffloat512Vector)v1, (Halffloat512Vector)v2,
+                              (Halffloat512Mask)m, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Float16 rOp(Float16 v, VectorMask<Float16> m, FBinOp f) {
+        return super.rOpTemplate(v, m, f);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> convertShape(VectorOperators.Conversion<Float16,F> conv,
+                           VectorSpecies<F> rsp, int part) {
+        return super.convertShapeTemplate(conv, rsp, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
+        return super.reinterpretShapeTemplate(toSpecies, part);  // specialize
+    }
+
+    // Specialized algebraic operations:
+
+    // The following definition forces a specialized version of this
+    // crucial method into the v-table of this class.  A call to add()
+    // will inline to a call to lanewise(ADD,), at which point the JIT
+    // intrinsic will have the opcode of ADD, plus all the metadata
+    // for this particular class, enabling it to generate precise
+    // code.
+    //
+    // There is probably no benefit to the JIT to specialize the
+    // masked or broadcast versions of the lanewise method.
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector lanewise(Unary op) {
+        return (Halffloat512Vector) super.lanewiseTemplate(op);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector lanewise(Unary op, VectorMask<Float16> m) {
+        return (Halffloat512Vector) super.lanewiseTemplate(op, Halffloat512Mask.class, (Halffloat512Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector lanewise(Binary op, Vector<Float16> v) {
+        return (Halffloat512Vector) super.lanewiseTemplate(op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector lanewise(Binary op, Vector<Float16> v, VectorMask<Float16> m) {
+        return (Halffloat512Vector) super.lanewiseTemplate(op, Halffloat512Mask.class, v, (Halffloat512Mask) m);  // specialize
+    }
+
+
+    /*package-private*/
+    @Override
+    @ForceInline
+    public final
+    Halffloat512Vector
+    lanewise(Ternary op, Vector<Float16> v1, Vector<Float16> v2) {
+        return (Halffloat512Vector) super.lanewiseTemplate(op, v1, v2);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat512Vector
+    lanewise(Ternary op, Vector<Float16> v1, Vector<Float16> v2, VectorMask<Float16> m) {
+        return (Halffloat512Vector) super.lanewiseTemplate(op, Halffloat512Mask.class, v1, v2, (Halffloat512Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat512Vector addIndex(int scale) {
+        return (Halffloat512Vector) super.addIndexTemplate(scale);  // specialize
+    }
+
+    // Type specific horizontal reductions
+
+    @Override
+    @ForceInline
+    public final Float16 reduceLanes(VectorOperators.Associative op) {
+        return super.reduceLanesTemplate(op);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final Float16 reduceLanes(VectorOperators.Associative op,
+                                    VectorMask<Float16> m) {
+        return super.reduceLanesTemplate(op, Halffloat512Mask.class, (Halffloat512Mask) m);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op) {
+        Float16 res = super.reduceLanesTemplate(op);  // specialized
+        return res.longValue();
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op,
+                                        VectorMask<Float16> m) {
+        Float16 res = super.reduceLanesTemplate(op, Halffloat512Mask.class, (Halffloat512Mask) m);  // specialized
+        return res.longValue();
+    }
+
+    @ForceInline
+    public VectorShuffle<Float16> toShuffle() {
+        return super.toShuffleTemplate(Halffloat512Shuffle.class); // specialize
+    }
+
+    // Specialized unary testing
+
+    @Override
+    @ForceInline
+    public final Halffloat512Mask test(Test op) {
+        return super.testTemplate(Halffloat512Mask.class, op);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat512Mask test(Test op, VectorMask<Float16> m) {
+        return super.testTemplate(Halffloat512Mask.class, op, (Halffloat512Mask) m);  // specialize
+    }
+
+    // Specialized comparisons
+
+    @Override
+    @ForceInline
+    public final Halffloat512Mask compare(Comparison op, Vector<Float16> v) {
+        return super.compareTemplate(Halffloat512Mask.class, op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat512Mask compare(Comparison op, Float16 s) {
+        return super.compareTemplate(Halffloat512Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat512Mask compare(Comparison op, long s) {
+        return super.compareTemplate(Halffloat512Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat512Mask compare(Comparison op, Vector<Float16> v, VectorMask<Float16> m) {
+        return super.compareTemplate(Halffloat512Mask.class, op, v, (Halffloat512Mask) m);
+    }
+
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector blend(Vector<Float16> v, VectorMask<Float16> m) {
+        return (Halffloat512Vector)
+            super.blendTemplate(Halffloat512Mask.class,
+                                (Halffloat512Vector) v,
+                                (Halffloat512Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector slice(int origin, Vector<Float16> v) {
+        return (Halffloat512Vector) super.sliceTemplate(origin, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector slice(int origin) {
+        return (Halffloat512Vector) super.sliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector unslice(int origin, Vector<Float16> w, int part) {
+        return (Halffloat512Vector) super.unsliceTemplate(origin, w, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector unslice(int origin, Vector<Float16> w, int part, VectorMask<Float16> m) {
+        return (Halffloat512Vector)
+            super.unsliceTemplate(Halffloat512Mask.class,
+                                  origin, w, part,
+                                  (Halffloat512Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector unslice(int origin) {
+        return (Halffloat512Vector) super.unsliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector rearrange(VectorShuffle<Float16> s) {
+        return (Halffloat512Vector)
+            super.rearrangeTemplate(Halffloat512Shuffle.class,
+                                    (Halffloat512Shuffle) s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector rearrange(VectorShuffle<Float16> shuffle,
+                                  VectorMask<Float16> m) {
+        return (Halffloat512Vector)
+            super.rearrangeTemplate(Halffloat512Shuffle.class,
+                                    Halffloat512Mask.class,
+                                    (Halffloat512Shuffle) shuffle,
+                                    (Halffloat512Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector rearrange(VectorShuffle<Float16> s,
+                                  Vector<Float16> v) {
+        return (Halffloat512Vector)
+            super.rearrangeTemplate(Halffloat512Shuffle.class,
+                                    (Halffloat512Shuffle) s,
+                                    (Halffloat512Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector compress(VectorMask<Float16> m) {
+        return (Halffloat512Vector)
+            super.compressTemplate(Halffloat512Mask.class,
+                                   (Halffloat512Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector expand(VectorMask<Float16> m) {
+        return (Halffloat512Vector)
+            super.expandTemplate(Halffloat512Mask.class,
+                                   (Halffloat512Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector selectFrom(Vector<Float16> v) {
+        return (Halffloat512Vector)
+            super.selectFromTemplate((Halffloat512Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector selectFrom(Vector<Float16> v,
+                                   VectorMask<Float16> m) {
+        return (Halffloat512Vector)
+            super.selectFromTemplate((Halffloat512Vector) v,
+                                     (Halffloat512Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    public Float16 lane(int i) {
+        short bits;
+        switch(i) {
+            case 0: bits = laneHelper(0); break;
+            case 1: bits = laneHelper(1); break;
+            case 2: bits = laneHelper(2); break;
+            case 3: bits = laneHelper(3); break;
+            case 4: bits = laneHelper(4); break;
+            case 5: bits = laneHelper(5); break;
+            case 6: bits = laneHelper(6); break;
+            case 7: bits = laneHelper(7); break;
+            case 8: bits = laneHelper(8); break;
+            case 9: bits = laneHelper(9); break;
+            case 10: bits = laneHelper(10); break;
+            case 11: bits = laneHelper(11); break;
+            case 12: bits = laneHelper(12); break;
+            case 13: bits = laneHelper(13); break;
+            case 14: bits = laneHelper(14); break;
+            case 15: bits = laneHelper(15); break;
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+        return Float16.shortBitsToFloat16(bits);
+    }
+
+    public short laneHelper(int i) {
+        return (short) VectorSupport.extract(
+                     VCLASS, ETYPE, VLENGTH,
+                     this, i,
+                     (vec, ix) -> {
+                     Float16[] vecarr = vec.vec();
+                     return (long)Float16.float16ToShortBits(vecarr[ix]);
+                     });
+    }
+
+    @ForceInline
+    @Override
+    public Halffloat512Vector withLane(int i, Float16 e) {
+        switch(i) {
+            case 0: return withLaneHelper(0, e);
+            case 1: return withLaneHelper(1, e);
+            case 2: return withLaneHelper(2, e);
+            case 3: return withLaneHelper(3, e);
+            case 4: return withLaneHelper(4, e);
+            case 5: return withLaneHelper(5, e);
+            case 6: return withLaneHelper(6, e);
+            case 7: return withLaneHelper(7, e);
+            case 8: return withLaneHelper(8, e);
+            case 9: return withLaneHelper(9, e);
+            case 10: return withLaneHelper(10, e);
+            case 11: return withLaneHelper(11, e);
+            case 12: return withLaneHelper(12, e);
+            case 13: return withLaneHelper(13, e);
+            case 14: return withLaneHelper(14, e);
+            case 15: return withLaneHelper(15, e);
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+    }
+
+    public Halffloat512Vector withLaneHelper(int i, Float16 e) {
+        return VectorSupport.insert(
+                                VCLASS, ETYPE, VLENGTH,
+                                this, i, (long)Float16.float16ToShortBits(e),
+                                (v, ix, bits) -> {
+                                    Float16[] res = v.vec().clone();
+                                    res[ix] = Float16.shortBitsToFloat16((short)bits);
+                                    return v.vectorFactory(res);
+                                });
+    }
+
+    // Mask
+
+    static final class Halffloat512Mask extends AbstractMask<Float16> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Float16> ETYPE = Float16.class; // used by the JVM
+
+        Halffloat512Mask(boolean[] bits) {
+            this(bits, 0);
+        }
+
+        Halffloat512Mask(boolean[] bits, int offset) {
+            super(prepare(bits, offset));
+        }
+
+        Halffloat512Mask(boolean val) {
+            super(prepare(val));
+        }
+
+        private static boolean[] prepare(boolean[] bits, int offset) {
+            boolean[] newBits = new boolean[VSPECIES.laneCount()];
+            for (int i = 0; i < newBits.length; i++) {
+                newBits[i] = bits[offset + i];
+            }
+            return newBits;
+        }
+
+        private static boolean[] prepare(boolean val) {
+            boolean[] bits = new boolean[VSPECIES.laneCount()];
+            Arrays.fill(bits, val);
+            return bits;
+        }
+
+        @ForceInline
+        final @Override
+        public HalffloatSpecies vspecies() {
+            // ISSUE:  This should probably be a @Stable
+            // field inside AbstractMask, rather than
+            // a megamorphic method.
+            return VSPECIES;
+        }
+
+        @ForceInline
+        boolean[] getBits() {
+            return (boolean[])getPayload();
+        }
+
+        @Override
+        Halffloat512Mask uOp(MUnOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i]);
+            }
+            return new Halffloat512Mask(res);
+        }
+
+        @Override
+        Halffloat512Mask bOp(VectorMask<Float16> m, MBinOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            boolean[] mbits = ((Halffloat512Mask)m).getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i], mbits[i]);
+            }
+            return new Halffloat512Mask(res);
+        }
+
+        @ForceInline
+        @Override
+        public final
+        Halffloat512Vector toVector() {
+            return (Halffloat512Vector) super.toVectorTemplate();  // specialize
+        }
+
+        /**
+         * Helper function for lane-wise mask conversions.
+         * This function kicks in after intrinsic failure.
+         */
+        @ForceInline
+        private final <E>
+        VectorMask<E> defaultMaskCast(AbstractSpecies<E> dsp) {
+            if (length() != dsp.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+            boolean[] maskArray = toArray();
+            return  dsp.maskFactory(maskArray).check(dsp);
+        }
+
+        @Override
+        @ForceInline
+        public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
+            AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+
+            return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
+                this.getClass(), ETYPE, VLENGTH,
+                species.maskType(), species.elementType(), VLENGTH,
+                this, species,
+                (m, s) -> s.maskFactory(m.toArray()).check(s));
+        }
+
+        @Override
+        @ForceInline
+        /*package-private*/
+        Halffloat512Mask indexPartiallyInUpperRange(long offset, long limit) {
+            return (Halffloat512Mask) VectorSupport.indexPartiallyInUpperRange(
+                Halffloat512Mask.class, ETYPE, VLENGTH, offset, limit,
+                (o, l) -> (Halffloat512Mask) TRUE_MASK.indexPartiallyInRange(o, l));
+        }
+
+        // Unary operations
+
+        @Override
+        @ForceInline
+        public Halffloat512Mask not() {
+            return xor(maskAll(true));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat512Mask compress() {
+            return (Halffloat512Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
+                Halffloat512Vector.class, Halffloat512Mask.class, ETYPE, VLENGTH, null, this,
+                (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, Float.floatToFloat16(m1.trueCount())));
+        }
+
+
+        // Binary operations
+
+        @Override
+        @ForceInline
+        public Halffloat512Mask and(VectorMask<Float16> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat512Mask m = (Halffloat512Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat512Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat512Mask or(VectorMask<Float16> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat512Mask m = (Halffloat512Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat512Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat512Mask xor(VectorMask<Float16> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat512Mask m = (Halffloat512Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat512Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
+        }
+
+        // Mask Query operations
+
+        @Override
+        @ForceInline
+        public int trueCount() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat512Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> trueCountHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int firstTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat512Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> firstTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int lastTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat512Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> lastTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public long toLong() {
+            if (length() > Long.SIZE) {
+                throw new UnsupportedOperationException("too many lanes for one long");
+            }
+            return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat512Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> toLongHelper(m.getBits()));
+        }
+
+        // laneIsSet
+
+        @Override
+        @ForceInline
+        public boolean laneIsSet(int i) {
+            Objects.checkIndex(i, length());
+            return VectorSupport.extract(Halffloat512Mask.class, Float16.class, VLENGTH,
+                                         this, i, (m, idx) -> (m.getBits()[idx] ? 1L : 0L)) == 1L;
+        }
+
+        // Reductions
+
+        @Override
+        @ForceInline
+        public boolean anyTrue() {
+            return VectorSupport.test(BT_ne, Halffloat512Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> anyTrueHelper(((Halffloat512Mask)m).getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public boolean allTrue() {
+            return VectorSupport.test(BT_overflow, Halffloat512Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> allTrueHelper(((Halffloat512Mask)m).getBits()));
+        }
+
+        @ForceInline
+        /*package-private*/
+        static Halffloat512Mask maskAll(boolean bit) {
+            return VectorSupport.fromBitsCoerced(Halffloat512Mask.class, short.class, VLENGTH,
+                                                 (bit ? -1 : 0), MODE_BROADCAST, null,
+                                                 (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
+        }
+        private static final Halffloat512Mask  TRUE_MASK = new Halffloat512Mask(true);
+        private static final Halffloat512Mask FALSE_MASK = new Halffloat512Mask(false);
+
+    }
+
+    // Shuffle
+
+    static final class Halffloat512Shuffle extends AbstractShuffle<Float16> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Float16> ETYPE = Float16.class; // used by the JVM
+
+        Halffloat512Shuffle(byte[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat512Shuffle(int[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat512Shuffle(int[] reorder, int i) {
+            super(VLENGTH, reorder, i);
+        }
+
+        public Halffloat512Shuffle(IntUnaryOperator fn) {
+            super(VLENGTH, fn);
+        }
+
+        @Override
+        public HalffloatSpecies vspecies() {
+            return VSPECIES;
+        }
+
+        static {
+            // There must be enough bits in the shuffle lanes to encode
+            // VLENGTH valid indexes and VLENGTH exceptional ones.
+            assert(VLENGTH < Byte.MAX_VALUE);
+            assert(Byte.MIN_VALUE <= -VLENGTH);
+        }
+        static final Halffloat512Shuffle IOTA = new Halffloat512Shuffle(IDENTITY);
+
+        @Override
+        @ForceInline
+        public Halffloat512Vector toVector() {
+            return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat512Shuffle.class, this, VLENGTH,
+                                                    (s) -> ((Halffloat512Vector)(((AbstractShuffle<Float16>)(s)).toVectorTemplate())));
+        }
+
+        @Override
+        @ForceInline
+        public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
+            AbstractSpecies<F> species = (AbstractSpecies<F>) s;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorShuffle length and species length differ");
+            int[] shuffleArray = toArray();
+            return s.shuffleFromArray(shuffleArray, 0).check(s);
+        }
+
+        @ForceInline
+        @Override
+        public Halffloat512Shuffle rearrange(VectorShuffle<Float16> shuffle) {
+            Halffloat512Shuffle s = (Halffloat512Shuffle) shuffle;
+            byte[] reorder1 = reorder();
+            byte[] reorder2 = s.reorder();
+            byte[] r = new byte[reorder1.length];
+            for (int i = 0; i < reorder1.length; i++) {
+                int ssi = reorder2[i];
+                r[i] = reorder1[ssi];  // throws on exceptional index
+            }
+            return new Halffloat512Shuffle(r);
+        }
+    }
+
+    // ================================================
+
+    // Specialized low-level memory operations.
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(Float16[] a, int offset) {
+        return super.fromArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(Float16[] a, int offset, VectorMask<Float16> m, int offsetInRange) {
+        return super.fromArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m, offsetInRange);  // specialize
+    }
+
+
+
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromMemorySegment0(MemorySegment ms, long offset) {
+        return super.fromMemorySegment0Template(ms, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float16> m, int offsetInRange) {
+        return super.fromMemorySegment0Template(Halffloat512Mask.class, ms, offset, (Halffloat512Mask) m, offsetInRange);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(Float16[] a, int offset) {
+        super.intoArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(Float16[] a, int offset, VectorMask<Float16> m) {
+        super.intoArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m);
+    }
+
+
+
+    @ForceInline
+    @Override
+    final
+    void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Float16> m) {
+        super.intoMemorySegment0Template(Halffloat512Mask.class, ms, offset, (Halffloat512Mask) m);
+    }
+
+
+    // End of specialized low-level memory operations.
+
+    // ================================================
+
+}
+
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat64Vector.java
new file mode 100644
index 00000000000..05e0c10d4c5
--- /dev/null
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat64Vector.java
@@ -0,0 +1,909 @@
+/*
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.IntUnaryOperator;
+
+import jdk.internal.vm.annotation.ForceInline;
+import jdk.internal.vm.vector.VectorSupport;
+
+import static jdk.internal.vm.vector.VectorSupport.*;
+
+import static jdk.incubator.vector.VectorOperators.*;
+
+// -- This file was mechanically generated: Do not edit! -- //
+
+@SuppressWarnings("cast")  // warning: redundant cast
+final class Halffloat64Vector extends HalffloatVector {
+    static final HalffloatSpecies VSPECIES =
+        (HalffloatSpecies) HalffloatVector.SPECIES_64;
+
+    static final VectorShape VSHAPE =
+        VSPECIES.vectorShape();
+
+    static final Class<Halffloat64Vector> VCLASS = Halffloat64Vector.class;
+
+    static final int VSIZE = VSPECIES.vectorBitSize();
+
+    static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
+
+    static final Class<Float16> ETYPE = Float16.class; // used by the JVM
+
+    Halffloat64Vector(Float16[] v) {
+        super(v);
+    }
+
+    // For compatibility as Halffloat64Vector::new,
+    // stored into species.vectorFactory.
+    Halffloat64Vector(Object v) {
+        this((Float16[]) v);
+    }
+
+    static final Halffloat64Vector ZERO = new Halffloat64Vector(new Float16[VLENGTH]);
+    static final Halffloat64Vector IOTA = new Halffloat64Vector(VSPECIES.iotaArray());
+
+    static {
+        // Warm up a few species caches.
+        // If we do this too much we will
+        // get NPEs from bootstrap circularity.
+        VSPECIES.dummyVector();
+        VSPECIES.withLanes(LaneType.BYTE);
+    }
+
+    // Specialized extractors
+
+    @ForceInline
+    final @Override
+    public HalffloatSpecies vspecies() {
+        // ISSUE:  This should probably be a @Stable
+        // field inside AbstractVector, rather than
+        // a megamorphic method.
+        return VSPECIES;
+    }
+
+    @ForceInline
+    @Override
+    public final Class<Float16> elementType() { return Float16.class; }
+
+    @ForceInline
+    @Override
+    public final int elementSize() { return Float16.SIZE; }
+
+    @ForceInline
+    @Override
+    public final VectorShape shape() { return VSHAPE; }
+
+    @ForceInline
+    @Override
+    public final int length() { return VLENGTH; }
+
+    @ForceInline
+    @Override
+    public final int bitSize() { return VSIZE; }
+
+    @ForceInline
+    @Override
+    public final int byteSize() { return VSIZE / Byte.SIZE; }
+
+    /*package-private*/
+    @ForceInline
+    final @Override
+    Float16[] vec() {
+        return (Float16[])getPayload();
+    }
+
+    // Virtualized constructors
+
+    @Override
+    @ForceInline
+    public final Halffloat64Vector broadcast(Float16 e) {
+        return (Halffloat64Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat64Vector broadcast(long e) {
+        return (Halffloat64Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    Halffloat64Mask maskFromArray(boolean[] bits) {
+        return new Halffloat64Mask(bits);
+    }
+
+    @Override
+    @ForceInline
+    Halffloat64Shuffle iotaShuffle() { return Halffloat64Shuffle.IOTA; }
+
+    @ForceInline
+    Halffloat64Shuffle iotaShuffle(int start, int step, boolean wrap) {
+      if (wrap) {
+        return (Halffloat64Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat64Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
+      } else {
+        return (Halffloat64Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat64Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
+      }
+    }
+
+    @Override
+    @ForceInline
+    Halffloat64Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat64Shuffle(reorder); }
+
+    @Override
+    @ForceInline
+    Halffloat64Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat64Shuffle(indexes, i); }
+
+    @Override
+    @ForceInline
+    Halffloat64Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat64Shuffle(fn); }
+
+    // Make a vector of the same species but the given elements:
+    @ForceInline
+    final @Override
+    Halffloat64Vector vectorFactory(Float16[] vec) {
+        return new Halffloat64Vector(vec);
+    }
+
+    @ForceInline
+    final @Override
+    Byte64Vector asByteVectorRaw() {
+        return (Byte64Vector) super.asByteVectorRawTemplate();  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    AbstractVector<?> asVectorRaw(LaneType laneType) {
+        return super.asVectorRawTemplate(laneType);  // specialize
+    }
+
+    // Unary operator
+
+    @ForceInline
+    final @Override
+    Halffloat64Vector uOp(FUnOp f) {
+        return (Halffloat64Vector) super.uOpTemplate(f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat64Vector uOp(VectorMask<Float16> m, FUnOp f) {
+        return (Halffloat64Vector)
+            super.uOpTemplate((Halffloat64Mask)m, f);  // specialize
+    }
+
+    // Binary operator
+
+    @ForceInline
+    final @Override
+    Halffloat64Vector bOp(Vector<Float16> v, FBinOp f) {
+        return (Halffloat64Vector) super.bOpTemplate((Halffloat64Vector)v, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat64Vector bOp(Vector<Float16> v,
+                     VectorMask<Float16> m, FBinOp f) {
+        return (Halffloat64Vector)
+            super.bOpTemplate((Halffloat64Vector)v, (Halffloat64Mask)m,
+                              f);  // specialize
+    }
+
+    // Ternary operator
+
+    @ForceInline
+    final @Override
+    Halffloat64Vector tOp(Vector<Float16> v1, Vector<Float16> v2, FTriOp f) {
+        return (Halffloat64Vector)
+            super.tOpTemplate((Halffloat64Vector)v1, (Halffloat64Vector)v2,
+                              f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat64Vector tOp(Vector<Float16> v1, Vector<Float16> v2,
+                     VectorMask<Float16> m, FTriOp f) {
+        return (Halffloat64Vector)
+            super.tOpTemplate((Halffloat64Vector)v1, (Halffloat64Vector)v2,
+                              (Halffloat64Mask)m, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Float16 rOp(Float16 v, VectorMask<Float16> m, FBinOp f) {
+        return super.rOpTemplate(v, m, f);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> convertShape(VectorOperators.Conversion<Float16,F> conv,
+                           VectorSpecies<F> rsp, int part) {
+        return super.convertShapeTemplate(conv, rsp, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
+        return super.reinterpretShapeTemplate(toSpecies, part);  // specialize
+    }
+
+    // Specialized algebraic operations:
+
+    // The following definition forces a specialized version of this
+    // crucial method into the v-table of this class.  A call to add()
+    // will inline to a call to lanewise(ADD,), at which point the JIT
+    // intrinsic will have the opcode of ADD, plus all the metadata
+    // for this particular class, enabling it to generate precise
+    // code.
+    //
+    // There is probably no benefit to the JIT to specialize the
+    // masked or broadcast versions of the lanewise method.
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector lanewise(Unary op) {
+        return (Halffloat64Vector) super.lanewiseTemplate(op);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector lanewise(Unary op, VectorMask<Float16> m) {
+        return (Halffloat64Vector) super.lanewiseTemplate(op, Halffloat64Mask.class, (Halffloat64Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector lanewise(Binary op, Vector<Float16> v) {
+        return (Halffloat64Vector) super.lanewiseTemplate(op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector lanewise(Binary op, Vector<Float16> v, VectorMask<Float16> m) {
+        return (Halffloat64Vector) super.lanewiseTemplate(op, Halffloat64Mask.class, v, (Halffloat64Mask) m);  // specialize
+    }
+
+
+    /*package-private*/
+    @Override
+    @ForceInline
+    public final
+    Halffloat64Vector
+    lanewise(Ternary op, Vector<Float16> v1, Vector<Float16> v2) {
+        return (Halffloat64Vector) super.lanewiseTemplate(op, v1, v2);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat64Vector
+    lanewise(Ternary op, Vector<Float16> v1, Vector<Float16> v2, VectorMask<Float16> m) {
+        return (Halffloat64Vector) super.lanewiseTemplate(op, Halffloat64Mask.class, v1, v2, (Halffloat64Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat64Vector addIndex(int scale) {
+        return (Halffloat64Vector) super.addIndexTemplate(scale);  // specialize
+    }
+
+    // Type specific horizontal reductions
+
+    @Override
+    @ForceInline
+    public final Float16 reduceLanes(VectorOperators.Associative op) {
+        return super.reduceLanesTemplate(op);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final Float16 reduceLanes(VectorOperators.Associative op,
+                                    VectorMask<Float16> m) {
+        return super.reduceLanesTemplate(op, Halffloat64Mask.class, (Halffloat64Mask) m);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op) {
+        Float16 res = super.reduceLanesTemplate(op);  // specialized
+        return res.longValue();
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op,
+                                        VectorMask<Float16> m) {
+        Float16 res = super.reduceLanesTemplate(op, Halffloat64Mask.class, (Halffloat64Mask) m);  // specialized
+        return res.longValue();
+    }
+
+    @ForceInline
+    public VectorShuffle<Float16> toShuffle() {
+        return super.toShuffleTemplate(Halffloat64Shuffle.class); // specialize
+    }
+
+    // Specialized unary testing
+
+    @Override
+    @ForceInline
+    public final Halffloat64Mask test(Test op) {
+        return super.testTemplate(Halffloat64Mask.class, op);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat64Mask test(Test op, VectorMask<Float16> m) {
+        return super.testTemplate(Halffloat64Mask.class, op, (Halffloat64Mask) m);  // specialize
+    }
+
+    // Specialized comparisons
+
+    @Override
+    @ForceInline
+    public final Halffloat64Mask compare(Comparison op, Vector<Float16> v) {
+        return super.compareTemplate(Halffloat64Mask.class, op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat64Mask compare(Comparison op, Float16 s) {
+        return super.compareTemplate(Halffloat64Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat64Mask compare(Comparison op, long s) {
+        return super.compareTemplate(Halffloat64Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat64Mask compare(Comparison op, Vector<Float16> v, VectorMask<Float16> m) {
+        return super.compareTemplate(Halffloat64Mask.class, op, v, (Halffloat64Mask) m);
+    }
+
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector blend(Vector<Float16> v, VectorMask<Float16> m) {
+        return (Halffloat64Vector)
+            super.blendTemplate(Halffloat64Mask.class,
+                                (Halffloat64Vector) v,
+                                (Halffloat64Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector slice(int origin, Vector<Float16> v) {
+        return (Halffloat64Vector) super.sliceTemplate(origin, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector slice(int origin) {
+        return (Halffloat64Vector) super.sliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector unslice(int origin, Vector<Float16> w, int part) {
+        return (Halffloat64Vector) super.unsliceTemplate(origin, w, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector unslice(int origin, Vector<Float16> w, int part, VectorMask<Float16> m) {
+        return (Halffloat64Vector)
+            super.unsliceTemplate(Halffloat64Mask.class,
+                                  origin, w, part,
+                                  (Halffloat64Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector unslice(int origin) {
+        return (Halffloat64Vector) super.unsliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector rearrange(VectorShuffle<Float16> s) {
+        return (Halffloat64Vector)
+            super.rearrangeTemplate(Halffloat64Shuffle.class,
+                                    (Halffloat64Shuffle) s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector rearrange(VectorShuffle<Float16> shuffle,
+                                  VectorMask<Float16> m) {
+        return (Halffloat64Vector)
+            super.rearrangeTemplate(Halffloat64Shuffle.class,
+                                    Halffloat64Mask.class,
+                                    (Halffloat64Shuffle) shuffle,
+                                    (Halffloat64Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector rearrange(VectorShuffle<Float16> s,
+                                  Vector<Float16> v) {
+        return (Halffloat64Vector)
+            super.rearrangeTemplate(Halffloat64Shuffle.class,
+                                    (Halffloat64Shuffle) s,
+                                    (Halffloat64Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector compress(VectorMask<Float16> m) {
+        return (Halffloat64Vector)
+            super.compressTemplate(Halffloat64Mask.class,
+                                   (Halffloat64Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector expand(VectorMask<Float16> m) {
+        return (Halffloat64Vector)
+            super.expandTemplate(Halffloat64Mask.class,
+                                   (Halffloat64Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector selectFrom(Vector<Float16> v) {
+        return (Halffloat64Vector)
+            super.selectFromTemplate((Halffloat64Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector selectFrom(Vector<Float16> v,
+                                   VectorMask<Float16> m) {
+        return (Halffloat64Vector)
+            super.selectFromTemplate((Halffloat64Vector) v,
+                                     (Halffloat64Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    public Float16 lane(int i) {
+        short bits;
+        switch(i) {
+            case 0: bits = laneHelper(0); break;
+            case 1: bits = laneHelper(1); break;
+            case 2: bits = laneHelper(2); break;
+            case 3: bits = laneHelper(3); break;
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+        return Float16.shortBitsToFloat16(bits);
+    }
+
+    public short laneHelper(int i) {
+        return (short) VectorSupport.extract(
+                     VCLASS, ETYPE, VLENGTH,
+                     this, i,
+                     (vec, ix) -> {
+                     Float16[] vecarr = vec.vec();
+                     return (long)Float16.float16ToShortBits(vecarr[ix]);
+                     });
+    }
+
+    @ForceInline
+    @Override
+    public Halffloat64Vector withLane(int i, Float16 e) {
+        switch(i) {
+            case 0: return withLaneHelper(0, e);
+            case 1: return withLaneHelper(1, e);
+            case 2: return withLaneHelper(2, e);
+            case 3: return withLaneHelper(3, e);
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+    }
+
+    public Halffloat64Vector withLaneHelper(int i, Float16 e) {
+        return VectorSupport.insert(
+                                VCLASS, ETYPE, VLENGTH,
+                                this, i, (long)Float16.float16ToShortBits(e),
+                                (v, ix, bits) -> {
+                                    Float16[] res = v.vec().clone();
+                                    res[ix] = Float16.shortBitsToFloat16((short)bits);
+                                    return v.vectorFactory(res);
+                                });
+    }
+
+    // Mask
+
+    static final class Halffloat64Mask extends AbstractMask<Float16> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Float16> ETYPE = Float16.class; // used by the JVM
+
+        Halffloat64Mask(boolean[] bits) {
+            this(bits, 0);
+        }
+
+        Halffloat64Mask(boolean[] bits, int offset) {
+            super(prepare(bits, offset));
+        }
+
+        Halffloat64Mask(boolean val) {
+            super(prepare(val));
+        }
+
+        private static boolean[] prepare(boolean[] bits, int offset) {
+            boolean[] newBits = new boolean[VSPECIES.laneCount()];
+            for (int i = 0; i < newBits.length; i++) {
+                newBits[i] = bits[offset + i];
+            }
+            return newBits;
+        }
+
+        private static boolean[] prepare(boolean val) {
+            boolean[] bits = new boolean[VSPECIES.laneCount()];
+            Arrays.fill(bits, val);
+            return bits;
+        }
+
+        @ForceInline
+        final @Override
+        public HalffloatSpecies vspecies() {
+            // ISSUE:  This should probably be a @Stable
+            // field inside AbstractMask, rather than
+            // a megamorphic method.
+            return VSPECIES;
+        }
+
+        @ForceInline
+        boolean[] getBits() {
+            return (boolean[])getPayload();
+        }
+
+        @Override
+        Halffloat64Mask uOp(MUnOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i]);
+            }
+            return new Halffloat64Mask(res);
+        }
+
+        @Override
+        Halffloat64Mask bOp(VectorMask<Float16> m, MBinOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            boolean[] mbits = ((Halffloat64Mask)m).getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i], mbits[i]);
+            }
+            return new Halffloat64Mask(res);
+        }
+
+        @ForceInline
+        @Override
+        public final
+        Halffloat64Vector toVector() {
+            return (Halffloat64Vector) super.toVectorTemplate();  // specialize
+        }
+
+        /**
+         * Helper function for lane-wise mask conversions.
+         * This function kicks in after intrinsic failure.
+         */
+        @ForceInline
+        private final <E>
+        VectorMask<E> defaultMaskCast(AbstractSpecies<E> dsp) {
+            if (length() != dsp.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+            boolean[] maskArray = toArray();
+            return  dsp.maskFactory(maskArray).check(dsp);
+        }
+
+        @Override
+        @ForceInline
+        public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
+            AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+
+            return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
+                this.getClass(), ETYPE, VLENGTH,
+                species.maskType(), species.elementType(), VLENGTH,
+                this, species,
+                (m, s) -> s.maskFactory(m.toArray()).check(s));
+        }
+
+        @Override
+        @ForceInline
+        /*package-private*/
+        Halffloat64Mask indexPartiallyInUpperRange(long offset, long limit) {
+            return (Halffloat64Mask) VectorSupport.indexPartiallyInUpperRange(
+                Halffloat64Mask.class, ETYPE, VLENGTH, offset, limit,
+                (o, l) -> (Halffloat64Mask) TRUE_MASK.indexPartiallyInRange(o, l));
+        }
+
+        // Unary operations
+
+        @Override
+        @ForceInline
+        public Halffloat64Mask not() {
+            return xor(maskAll(true));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat64Mask compress() {
+            return (Halffloat64Mask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
+                Halffloat64Vector.class, Halffloat64Mask.class, ETYPE, VLENGTH, null, this,
+                (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, Float.floatToFloat16(m1.trueCount())));
+        }
+
+
+        // Binary operations
+
+        @Override
+        @ForceInline
+        public Halffloat64Mask and(VectorMask<Float16> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat64Mask m = (Halffloat64Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat64Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat64Mask or(VectorMask<Float16> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat64Mask m = (Halffloat64Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat64Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat64Mask xor(VectorMask<Float16> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat64Mask m = (Halffloat64Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat64Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
+        }
+
+        // Mask Query operations
+
+        @Override
+        @ForceInline
+        public int trueCount() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat64Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> trueCountHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int firstTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat64Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> firstTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int lastTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat64Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> lastTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public long toLong() {
+            if (length() > Long.SIZE) {
+                throw new UnsupportedOperationException("too many lanes for one long");
+            }
+            return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat64Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> toLongHelper(m.getBits()));
+        }
+
+        // laneIsSet
+
+        @Override
+        @ForceInline
+        public boolean laneIsSet(int i) {
+            Objects.checkIndex(i, length());
+            return VectorSupport.extract(Halffloat64Mask.class, Float16.class, VLENGTH,
+                                         this, i, (m, idx) -> (m.getBits()[idx] ? 1L : 0L)) == 1L;
+        }
+
+        // Reductions
+
+        @Override
+        @ForceInline
+        public boolean anyTrue() {
+            return VectorSupport.test(BT_ne, Halffloat64Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> anyTrueHelper(((Halffloat64Mask)m).getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public boolean allTrue() {
+            return VectorSupport.test(BT_overflow, Halffloat64Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> allTrueHelper(((Halffloat64Mask)m).getBits()));
+        }
+
+        @ForceInline
+        /*package-private*/
+        static Halffloat64Mask maskAll(boolean bit) {
+            return VectorSupport.fromBitsCoerced(Halffloat64Mask.class, short.class, VLENGTH,
+                                                 (bit ? -1 : 0), MODE_BROADCAST, null,
+                                                 (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
+        }
+        private static final Halffloat64Mask  TRUE_MASK = new Halffloat64Mask(true);
+        private static final Halffloat64Mask FALSE_MASK = new Halffloat64Mask(false);
+
+    }
+
+    // Shuffle
+
+    static final class Halffloat64Shuffle extends AbstractShuffle<Float16> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Float16> ETYPE = Float16.class; // used by the JVM
+
+        Halffloat64Shuffle(byte[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat64Shuffle(int[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat64Shuffle(int[] reorder, int i) {
+            super(VLENGTH, reorder, i);
+        }
+
+        public Halffloat64Shuffle(IntUnaryOperator fn) {
+            super(VLENGTH, fn);
+        }
+
+        @Override
+        public HalffloatSpecies vspecies() {
+            return VSPECIES;
+        }
+
+        static {
+            // There must be enough bits in the shuffle lanes to encode
+            // VLENGTH valid indexes and VLENGTH exceptional ones.
+            assert(VLENGTH < Byte.MAX_VALUE);
+            assert(Byte.MIN_VALUE <= -VLENGTH);
+        }
+        static final Halffloat64Shuffle IOTA = new Halffloat64Shuffle(IDENTITY);
+
+        @Override
+        @ForceInline
+        public Halffloat64Vector toVector() {
+            return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat64Shuffle.class, this, VLENGTH,
+                                                    (s) -> ((Halffloat64Vector)(((AbstractShuffle<Float16>)(s)).toVectorTemplate())));
+        }
+
+        @Override
+        @ForceInline
+        public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
+            AbstractSpecies<F> species = (AbstractSpecies<F>) s;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorShuffle length and species length differ");
+            int[] shuffleArray = toArray();
+            return s.shuffleFromArray(shuffleArray, 0).check(s);
+        }
+
+        @ForceInline
+        @Override
+        public Halffloat64Shuffle rearrange(VectorShuffle<Float16> shuffle) {
+            Halffloat64Shuffle s = (Halffloat64Shuffle) shuffle;
+            byte[] reorder1 = reorder();
+            byte[] reorder2 = s.reorder();
+            byte[] r = new byte[reorder1.length];
+            for (int i = 0; i < reorder1.length; i++) {
+                int ssi = reorder2[i];
+                r[i] = reorder1[ssi];  // throws on exceptional index
+            }
+            return new Halffloat64Shuffle(r);
+        }
+    }
+
+    // ================================================
+
+    // Specialized low-level memory operations.
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(Float16[] a, int offset) {
+        return super.fromArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(Float16[] a, int offset, VectorMask<Float16> m, int offsetInRange) {
+        return super.fromArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m, offsetInRange);  // specialize
+    }
+
+
+
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromMemorySegment0(MemorySegment ms, long offset) {
+        return super.fromMemorySegment0Template(ms, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float16> m, int offsetInRange) {
+        return super.fromMemorySegment0Template(Halffloat64Mask.class, ms, offset, (Halffloat64Mask) m, offsetInRange);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(Float16[] a, int offset) {
+        super.intoArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(Float16[] a, int offset, VectorMask<Float16> m) {
+        super.intoArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m);
+    }
+
+
+
+    @ForceInline
+    @Override
+    final
+    void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Float16> m) {
+        super.intoMemorySegment0Template(Halffloat64Mask.class, ms, offset, (Halffloat64Mask) m);
+    }
+
+
+    // End of specialized low-level memory operations.
+
+    // ================================================
+
+}
+
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatMaxVector.java
new file mode 100644
index 00000000000..3cc4059fa9d
--- /dev/null
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatMaxVector.java
@@ -0,0 +1,902 @@
+/*
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.IntUnaryOperator;
+
+import jdk.internal.vm.annotation.ForceInline;
+import jdk.internal.vm.vector.VectorSupport;
+
+import static jdk.internal.vm.vector.VectorSupport.*;
+
+import static jdk.incubator.vector.VectorOperators.*;
+
+// -- This file was mechanically generated: Do not edit! -- //
+
+@SuppressWarnings("cast")  // warning: redundant cast
+final class HalffloatMaxVector extends HalffloatVector {
+    static final HalffloatSpecies VSPECIES =
+        (HalffloatSpecies) HalffloatVector.SPECIES_MAX;
+
+    static final VectorShape VSHAPE =
+        VSPECIES.vectorShape();
+
+    static final Class<HalffloatMaxVector> VCLASS = HalffloatMaxVector.class;
+
+    static final int VSIZE = VSPECIES.vectorBitSize();
+
+    static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
+
+    static final Class<Float16> ETYPE = Float16.class; // used by the JVM
+
+    HalffloatMaxVector(Float16[] v) {
+        super(v);
+    }
+
+    // For compatibility as HalffloatMaxVector::new,
+    // stored into species.vectorFactory.
+    HalffloatMaxVector(Object v) {
+        this((Float16[]) v);
+    }
+
+    static final HalffloatMaxVector ZERO = new HalffloatMaxVector(new Float16[VLENGTH]);
+    static final HalffloatMaxVector IOTA = new HalffloatMaxVector(VSPECIES.iotaArray());
+
+    static {
+        // Warm up a few species caches.
+        // If we do this too much we will
+        // get NPEs from bootstrap circularity.
+        VSPECIES.dummyVector();
+        VSPECIES.withLanes(LaneType.BYTE);
+    }
+
+    // Specialized extractors
+
+    @ForceInline
+    final @Override
+    public HalffloatSpecies vspecies() {
+        // ISSUE:  This should probably be a @Stable
+        // field inside AbstractVector, rather than
+        // a megamorphic method.
+        return VSPECIES;
+    }
+
+    @ForceInline
+    @Override
+    public final Class<Float16> elementType() { return Float16.class; }
+
+    @ForceInline
+    @Override
+    public final int elementSize() { return Float16.SIZE; }
+
+    @ForceInline
+    @Override
+    public final VectorShape shape() { return VSHAPE; }
+
+    @ForceInline
+    @Override
+    public final int length() { return VLENGTH; }
+
+    @ForceInline
+    @Override
+    public final int bitSize() { return VSIZE; }
+
+    @ForceInline
+    @Override
+    public final int byteSize() { return VSIZE / Byte.SIZE; }
+
+    /*package-private*/
+    @ForceInline
+    final @Override
+    Float16[] vec() {
+        return (Float16[])getPayload();
+    }
+
+    // Virtualized constructors
+
+    @Override
+    @ForceInline
+    public final HalffloatMaxVector broadcast(Float16 e) {
+        return (HalffloatMaxVector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final HalffloatMaxVector broadcast(long e) {
+        return (HalffloatMaxVector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    HalffloatMaxMask maskFromArray(boolean[] bits) {
+        return new HalffloatMaxMask(bits);
+    }
+
+    @Override
+    @ForceInline
+    HalffloatMaxShuffle iotaShuffle() { return HalffloatMaxShuffle.IOTA; }
+
+    @ForceInline
+    HalffloatMaxShuffle iotaShuffle(int start, int step, boolean wrap) {
+      if (wrap) {
+        return (HalffloatMaxShuffle)VectorSupport.shuffleIota(ETYPE, HalffloatMaxShuffle.class, VSPECIES, VLENGTH, start, step, 1,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
+      } else {
+        return (HalffloatMaxShuffle)VectorSupport.shuffleIota(ETYPE, HalffloatMaxShuffle.class, VSPECIES, VLENGTH, start, step, 0,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
+      }
+    }
+
+    @Override
+    @ForceInline
+    HalffloatMaxShuffle shuffleFromBytes(byte[] reorder) { return new HalffloatMaxShuffle(reorder); }
+
+    @Override
+    @ForceInline
+    HalffloatMaxShuffle shuffleFromArray(int[] indexes, int i) { return new HalffloatMaxShuffle(indexes, i); }
+
+    @Override
+    @ForceInline
+    HalffloatMaxShuffle shuffleFromOp(IntUnaryOperator fn) { return new HalffloatMaxShuffle(fn); }
+
+    // Make a vector of the same species but the given elements:
+    @ForceInline
+    final @Override
+    HalffloatMaxVector vectorFactory(Float16[] vec) {
+        return new HalffloatMaxVector(vec);
+    }
+
+    @ForceInline
+    final @Override
+    ByteMaxVector asByteVectorRaw() {
+        return (ByteMaxVector) super.asByteVectorRawTemplate();  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    AbstractVector<?> asVectorRaw(LaneType laneType) {
+        return super.asVectorRawTemplate(laneType);  // specialize
+    }
+
+    // Unary operator
+
+    @ForceInline
+    final @Override
+    HalffloatMaxVector uOp(FUnOp f) {
+        return (HalffloatMaxVector) super.uOpTemplate(f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    HalffloatMaxVector uOp(VectorMask<Float16> m, FUnOp f) {
+        return (HalffloatMaxVector)
+            super.uOpTemplate((HalffloatMaxMask)m, f);  // specialize
+    }
+
+    // Binary operator
+
+    @ForceInline
+    final @Override
+    HalffloatMaxVector bOp(Vector<Float16> v, FBinOp f) {
+        return (HalffloatMaxVector) super.bOpTemplate((HalffloatMaxVector)v, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    HalffloatMaxVector bOp(Vector<Float16> v,
+                     VectorMask<Float16> m, FBinOp f) {
+        return (HalffloatMaxVector)
+            super.bOpTemplate((HalffloatMaxVector)v, (HalffloatMaxMask)m,
+                              f);  // specialize
+    }
+
+    // Ternary operator
+
+    @ForceInline
+    final @Override
+    HalffloatMaxVector tOp(Vector<Float16> v1, Vector<Float16> v2, FTriOp f) {
+        return (HalffloatMaxVector)
+            super.tOpTemplate((HalffloatMaxVector)v1, (HalffloatMaxVector)v2,
+                              f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    HalffloatMaxVector tOp(Vector<Float16> v1, Vector<Float16> v2,
+                     VectorMask<Float16> m, FTriOp f) {
+        return (HalffloatMaxVector)
+            super.tOpTemplate((HalffloatMaxVector)v1, (HalffloatMaxVector)v2,
+                              (HalffloatMaxMask)m, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Float16 rOp(Float16 v, VectorMask<Float16> m, FBinOp f) {
+        return super.rOpTemplate(v, m, f);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> convertShape(VectorOperators.Conversion<Float16,F> conv,
+                           VectorSpecies<F> rsp, int part) {
+        return super.convertShapeTemplate(conv, rsp, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
+        return super.reinterpretShapeTemplate(toSpecies, part);  // specialize
+    }
+
+    // Specialized algebraic operations:
+
+    // The following definition forces a specialized version of this
+    // crucial method into the v-table of this class.  A call to add()
+    // will inline to a call to lanewise(ADD,), at which point the JIT
+    // intrinsic will have the opcode of ADD, plus all the metadata
+    // for this particular class, enabling it to generate precise
+    // code.
+    //
+    // There is probably no benefit to the JIT to specialize the
+    // masked or broadcast versions of the lanewise method.
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector lanewise(Unary op) {
+        return (HalffloatMaxVector) super.lanewiseTemplate(op);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector lanewise(Unary op, VectorMask<Float16> m) {
+        return (HalffloatMaxVector) super.lanewiseTemplate(op, HalffloatMaxMask.class, (HalffloatMaxMask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector lanewise(Binary op, Vector<Float16> v) {
+        return (HalffloatMaxVector) super.lanewiseTemplate(op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector lanewise(Binary op, Vector<Float16> v, VectorMask<Float16> m) {
+        return (HalffloatMaxVector) super.lanewiseTemplate(op, HalffloatMaxMask.class, v, (HalffloatMaxMask) m);  // specialize
+    }
+
+
+    /*package-private*/
+    @Override
+    @ForceInline
+    public final
+    HalffloatMaxVector
+    lanewise(Ternary op, Vector<Float16> v1, Vector<Float16> v2) {
+        return (HalffloatMaxVector) super.lanewiseTemplate(op, v1, v2);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    HalffloatMaxVector
+    lanewise(Ternary op, Vector<Float16> v1, Vector<Float16> v2, VectorMask<Float16> m) {
+        return (HalffloatMaxVector) super.lanewiseTemplate(op, HalffloatMaxMask.class, v1, v2, (HalffloatMaxMask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    HalffloatMaxVector addIndex(int scale) {
+        return (HalffloatMaxVector) super.addIndexTemplate(scale);  // specialize
+    }
+
+    // Type specific horizontal reductions
+
+    @Override
+    @ForceInline
+    public final Float16 reduceLanes(VectorOperators.Associative op) {
+        return super.reduceLanesTemplate(op);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final Float16 reduceLanes(VectorOperators.Associative op,
+                                    VectorMask<Float16> m) {
+        return super.reduceLanesTemplate(op, HalffloatMaxMask.class, (HalffloatMaxMask) m);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op) {
+        Float16 res = super.reduceLanesTemplate(op);  // specialized
+        return res.longValue();
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op,
+                                        VectorMask<Float16> m) {
+        Float16 res = super.reduceLanesTemplate(op, HalffloatMaxMask.class, (HalffloatMaxMask) m);  // specialized
+        return res.longValue();
+    }
+
+    @ForceInline
+    public VectorShuffle<Float16> toShuffle() {
+        return super.toShuffleTemplate(HalffloatMaxShuffle.class); // specialize
+    }
+
+    // Specialized unary testing
+
+    @Override
+    @ForceInline
+    public final HalffloatMaxMask test(Test op) {
+        return super.testTemplate(HalffloatMaxMask.class, op);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final HalffloatMaxMask test(Test op, VectorMask<Float16> m) {
+        return super.testTemplate(HalffloatMaxMask.class, op, (HalffloatMaxMask) m);  // specialize
+    }
+
+    // Specialized comparisons
+
+    @Override
+    @ForceInline
+    public final HalffloatMaxMask compare(Comparison op, Vector<Float16> v) {
+        return super.compareTemplate(HalffloatMaxMask.class, op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final HalffloatMaxMask compare(Comparison op, Float16 s) {
+        return super.compareTemplate(HalffloatMaxMask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final HalffloatMaxMask compare(Comparison op, long s) {
+        return super.compareTemplate(HalffloatMaxMask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final HalffloatMaxMask compare(Comparison op, Vector<Float16> v, VectorMask<Float16> m) {
+        return super.compareTemplate(HalffloatMaxMask.class, op, v, (HalffloatMaxMask) m);
+    }
+
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector blend(Vector<Float16> v, VectorMask<Float16> m) {
+        return (HalffloatMaxVector)
+            super.blendTemplate(HalffloatMaxMask.class,
+                                (HalffloatMaxVector) v,
+                                (HalffloatMaxMask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector slice(int origin, Vector<Float16> v) {
+        return (HalffloatMaxVector) super.sliceTemplate(origin, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector slice(int origin) {
+        return (HalffloatMaxVector) super.sliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector unslice(int origin, Vector<Float16> w, int part) {
+        return (HalffloatMaxVector) super.unsliceTemplate(origin, w, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector unslice(int origin, Vector<Float16> w, int part, VectorMask<Float16> m) {
+        return (HalffloatMaxVector)
+            super.unsliceTemplate(HalffloatMaxMask.class,
+                                  origin, w, part,
+                                  (HalffloatMaxMask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector unslice(int origin) {
+        return (HalffloatMaxVector) super.unsliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector rearrange(VectorShuffle<Float16> s) {
+        return (HalffloatMaxVector)
+            super.rearrangeTemplate(HalffloatMaxShuffle.class,
+                                    (HalffloatMaxShuffle) s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector rearrange(VectorShuffle<Float16> shuffle,
+                                  VectorMask<Float16> m) {
+        return (HalffloatMaxVector)
+            super.rearrangeTemplate(HalffloatMaxShuffle.class,
+                                    HalffloatMaxMask.class,
+                                    (HalffloatMaxShuffle) shuffle,
+                                    (HalffloatMaxMask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector rearrange(VectorShuffle<Float16> s,
+                                  Vector<Float16> v) {
+        return (HalffloatMaxVector)
+            super.rearrangeTemplate(HalffloatMaxShuffle.class,
+                                    (HalffloatMaxShuffle) s,
+                                    (HalffloatMaxVector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector compress(VectorMask<Float16> m) {
+        return (HalffloatMaxVector)
+            super.compressTemplate(HalffloatMaxMask.class,
+                                   (HalffloatMaxMask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector expand(VectorMask<Float16> m) {
+        return (HalffloatMaxVector)
+            super.expandTemplate(HalffloatMaxMask.class,
+                                   (HalffloatMaxMask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector selectFrom(Vector<Float16> v) {
+        return (HalffloatMaxVector)
+            super.selectFromTemplate((HalffloatMaxVector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector selectFrom(Vector<Float16> v,
+                                   VectorMask<Float16> m) {
+        return (HalffloatMaxVector)
+            super.selectFromTemplate((HalffloatMaxVector) v,
+                                     (HalffloatMaxMask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    public Float16 lane(int i) {
+        if (i < 0 || i >= VLENGTH) {
+            throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+        short bits = laneHelper(i);
+        return Float16.shortBitsToFloat16(bits);
+    }
+
+    public short laneHelper(int i) {
+        return (short) VectorSupport.extract(
+                     VCLASS, ETYPE, VLENGTH,
+                     this, i,
+                     (vec, ix) -> {
+                     Float16[] vecarr = vec.vec();
+                     return (long)Float16.float16ToShortBits(vecarr[ix]);
+                     });
+    }
+
+    @ForceInline
+    @Override
+    public HalffloatMaxVector withLane(int i, Float16 e) {
+        if (i < 0 || i >= VLENGTH) {
+            throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+        return withLaneHelper(i, e);
+    }
+
+    public HalffloatMaxVector withLaneHelper(int i, Float16 e) {
+        return VectorSupport.insert(
+                                VCLASS, ETYPE, VLENGTH,
+                                this, i, (long)Float16.float16ToShortBits(e),
+                                (v, ix, bits) -> {
+                                    Float16[] res = v.vec().clone();
+                                    res[ix] = Float16.shortBitsToFloat16((short)bits);
+                                    return v.vectorFactory(res);
+                                });
+    }
+
+    // Mask
+
+    static final class HalffloatMaxMask extends AbstractMask<Float16> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Float16> ETYPE = Float16.class; // used by the JVM
+
+        HalffloatMaxMask(boolean[] bits) {
+            this(bits, 0);
+        }
+
+        HalffloatMaxMask(boolean[] bits, int offset) {
+            super(prepare(bits, offset));
+        }
+
+        HalffloatMaxMask(boolean val) {
+            super(prepare(val));
+        }
+
+        private static boolean[] prepare(boolean[] bits, int offset) {
+            boolean[] newBits = new boolean[VSPECIES.laneCount()];
+            for (int i = 0; i < newBits.length; i++) {
+                newBits[i] = bits[offset + i];
+            }
+            return newBits;
+        }
+
+        private static boolean[] prepare(boolean val) {
+            boolean[] bits = new boolean[VSPECIES.laneCount()];
+            Arrays.fill(bits, val);
+            return bits;
+        }
+
+        @ForceInline
+        final @Override
+        public HalffloatSpecies vspecies() {
+            // ISSUE:  This should probably be a @Stable
+            // field inside AbstractMask, rather than
+            // a megamorphic method.
+            return VSPECIES;
+        }
+
+        @ForceInline
+        boolean[] getBits() {
+            return (boolean[])getPayload();
+        }
+
+        @Override
+        HalffloatMaxMask uOp(MUnOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i]);
+            }
+            return new HalffloatMaxMask(res);
+        }
+
+        @Override
+        HalffloatMaxMask bOp(VectorMask<Float16> m, MBinOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            boolean[] mbits = ((HalffloatMaxMask)m).getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i], mbits[i]);
+            }
+            return new HalffloatMaxMask(res);
+        }
+
+        @ForceInline
+        @Override
+        public final
+        HalffloatMaxVector toVector() {
+            return (HalffloatMaxVector) super.toVectorTemplate();  // specialize
+        }
+
+        /**
+         * Helper function for lane-wise mask conversions.
+         * This function kicks in after intrinsic failure.
+         */
+        @ForceInline
+        private final <E>
+        VectorMask<E> defaultMaskCast(AbstractSpecies<E> dsp) {
+            if (length() != dsp.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+            boolean[] maskArray = toArray();
+            return  dsp.maskFactory(maskArray).check(dsp);
+        }
+
+        @Override
+        @ForceInline
+        public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
+            AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+
+            return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
+                this.getClass(), ETYPE, VLENGTH,
+                species.maskType(), species.elementType(), VLENGTH,
+                this, species,
+                (m, s) -> s.maskFactory(m.toArray()).check(s));
+        }
+
+        @Override
+        @ForceInline
+        /*package-private*/
+        HalffloatMaxMask indexPartiallyInUpperRange(long offset, long limit) {
+            return (HalffloatMaxMask) VectorSupport.indexPartiallyInUpperRange(
+                HalffloatMaxMask.class, ETYPE, VLENGTH, offset, limit,
+                (o, l) -> (HalffloatMaxMask) TRUE_MASK.indexPartiallyInRange(o, l));
+        }
+
+        // Unary operations
+
+        @Override
+        @ForceInline
+        public HalffloatMaxMask not() {
+            return xor(maskAll(true));
+        }
+
+        @Override
+        @ForceInline
+        public HalffloatMaxMask compress() {
+            return (HalffloatMaxMask)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
+                HalffloatMaxVector.class, HalffloatMaxMask.class, ETYPE, VLENGTH, null, this,
+                (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, Float.floatToFloat16(m1.trueCount())));
+        }
+
+
+        // Binary operations
+
+        @Override
+        @ForceInline
+        public HalffloatMaxMask and(VectorMask<Float16> mask) {
+            Objects.requireNonNull(mask);
+            HalffloatMaxMask m = (HalffloatMaxMask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_AND, HalffloatMaxMask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
+        }
+
+        @Override
+        @ForceInline
+        public HalffloatMaxMask or(VectorMask<Float16> mask) {
+            Objects.requireNonNull(mask);
+            HalffloatMaxMask m = (HalffloatMaxMask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_OR, HalffloatMaxMask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
+        }
+
+        @Override
+        @ForceInline
+        public HalffloatMaxMask xor(VectorMask<Float16> mask) {
+            Objects.requireNonNull(mask);
+            HalffloatMaxMask m = (HalffloatMaxMask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_XOR, HalffloatMaxMask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
+        }
+
+        // Mask Query operations
+
+        @Override
+        @ForceInline
+        public int trueCount() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, HalffloatMaxMask.class, short.class, VLENGTH, this,
+                                                      (m) -> trueCountHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int firstTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, HalffloatMaxMask.class, short.class, VLENGTH, this,
+                                                      (m) -> firstTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int lastTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, HalffloatMaxMask.class, short.class, VLENGTH, this,
+                                                      (m) -> lastTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public long toLong() {
+            if (length() > Long.SIZE) {
+                throw new UnsupportedOperationException("too many lanes for one long");
+            }
+            return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, HalffloatMaxMask.class, short.class, VLENGTH, this,
+                                                      (m) -> toLongHelper(m.getBits()));
+        }
+
+        // laneIsSet
+
+        @Override
+        @ForceInline
+        public boolean laneIsSet(int i) {
+            Objects.checkIndex(i, length());
+            return VectorSupport.extract(HalffloatMaxMask.class, Float16.class, VLENGTH,
+                                         this, i, (m, idx) -> (m.getBits()[idx] ? 1L : 0L)) == 1L;
+        }
+
+        // Reductions
+
+        @Override
+        @ForceInline
+        public boolean anyTrue() {
+            return VectorSupport.test(BT_ne, HalffloatMaxMask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> anyTrueHelper(((HalffloatMaxMask)m).getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public boolean allTrue() {
+            return VectorSupport.test(BT_overflow, HalffloatMaxMask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> allTrueHelper(((HalffloatMaxMask)m).getBits()));
+        }
+
+        @ForceInline
+        /*package-private*/
+        static HalffloatMaxMask maskAll(boolean bit) {
+            return VectorSupport.fromBitsCoerced(HalffloatMaxMask.class, short.class, VLENGTH,
+                                                 (bit ? -1 : 0), MODE_BROADCAST, null,
+                                                 (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
+        }
+        private static final HalffloatMaxMask  TRUE_MASK = new HalffloatMaxMask(true);
+        private static final HalffloatMaxMask FALSE_MASK = new HalffloatMaxMask(false);
+
+    }
+
+    // Shuffle
+
+    static final class HalffloatMaxShuffle extends AbstractShuffle<Float16> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Float16> ETYPE = Float16.class; // used by the JVM
+
+        HalffloatMaxShuffle(byte[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public HalffloatMaxShuffle(int[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public HalffloatMaxShuffle(int[] reorder, int i) {
+            super(VLENGTH, reorder, i);
+        }
+
+        public HalffloatMaxShuffle(IntUnaryOperator fn) {
+            super(VLENGTH, fn);
+        }
+
+        @Override
+        public HalffloatSpecies vspecies() {
+            return VSPECIES;
+        }
+
+        static {
+            // There must be enough bits in the shuffle lanes to encode
+            // VLENGTH valid indexes and VLENGTH exceptional ones.
+            assert(VLENGTH < Byte.MAX_VALUE);
+            assert(Byte.MIN_VALUE <= -VLENGTH);
+        }
+        static final HalffloatMaxShuffle IOTA = new HalffloatMaxShuffle(IDENTITY);
+
+        @Override
+        @ForceInline
+        public HalffloatMaxVector toVector() {
+            return VectorSupport.shuffleToVector(VCLASS, ETYPE, HalffloatMaxShuffle.class, this, VLENGTH,
+                                                    (s) -> ((HalffloatMaxVector)(((AbstractShuffle<Float16>)(s)).toVectorTemplate())));
+        }
+
+        @Override
+        @ForceInline
+        public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
+            AbstractSpecies<F> species = (AbstractSpecies<F>) s;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorShuffle length and species length differ");
+            int[] shuffleArray = toArray();
+            return s.shuffleFromArray(shuffleArray, 0).check(s);
+        }
+
+        @ForceInline
+        @Override
+        public HalffloatMaxShuffle rearrange(VectorShuffle<Float16> shuffle) {
+            HalffloatMaxShuffle s = (HalffloatMaxShuffle) shuffle;
+            byte[] reorder1 = reorder();
+            byte[] reorder2 = s.reorder();
+            byte[] r = new byte[reorder1.length];
+            for (int i = 0; i < reorder1.length; i++) {
+                int ssi = reorder2[i];
+                r[i] = reorder1[ssi];  // throws on exceptional index
+            }
+            return new HalffloatMaxShuffle(r);
+        }
+    }
+
+    // ================================================
+
+    // Specialized low-level memory operations.
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(Float16[] a, int offset) {
+        return super.fromArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(Float16[] a, int offset, VectorMask<Float16> m, int offsetInRange) {
+        return super.fromArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m, offsetInRange);  // specialize
+    }
+
+
+
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromMemorySegment0(MemorySegment ms, long offset) {
+        return super.fromMemorySegment0Template(ms, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float16> m, int offsetInRange) {
+        return super.fromMemorySegment0Template(HalffloatMaxMask.class, ms, offset, (HalffloatMaxMask) m, offsetInRange);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(Float16[] a, int offset) {
+        super.intoArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(Float16[] a, int offset, VectorMask<Float16> m) {
+        super.intoArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m);
+    }
+
+
+
+    @ForceInline
+    @Override
+    final
+    void intoMemorySegment0(MemorySegment ms, long offset, VectorMask<Float16> m) {
+        super.intoMemorySegment0Template(HalffloatMaxMask.class, ms, offset, (HalffloatMaxMask) m);
+    }
+
+
+    // End of specialized low-level memory operations.
+
+    // ================================================
+
+}
+
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatVector.java
new file mode 100644
index 00000000000..beedb1d8390
--- /dev/null
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatVector.java
@@ -0,0 +1,3853 @@
+/*
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.ValueLayout;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.Function;
+
+import jdk.internal.foreign.AbstractMemorySegmentImpl;
+import jdk.internal.misc.ScopedMemoryAccess;
+import jdk.internal.misc.Unsafe;
+import jdk.internal.vm.annotation.ForceInline;
+import jdk.internal.vm.vector.VectorSupport;
+
+import static jdk.internal.vm.vector.VectorSupport.*;
+import static jdk.incubator.vector.VectorIntrinsics.*;
+
+import static jdk.incubator.vector.VectorOperators.*;
+
+// -- This file was mechanically generated: Do not edit! -- //
+
+/**
+ * A specialized {@link Vector} representing an ordered immutable sequence of
+ * {@code Float16} values.
+ */
+@SuppressWarnings("cast")  // warning: redundant cast
+public abstract class HalffloatVector extends AbstractVector<Float16> {
+
+    HalffloatVector(Float16[] vec) {
+        super(vec);
+    }
+
+    static final int FORBID_OPCODE_KIND = VO_NOFP;
+
+    static final ValueLayout.OfShort ELEMENT_LAYOUT = ValueLayout.JAVA_SHORT.withByteAlignment(1);
+
+    @ForceInline
+    static int opCode(Operator op) {
+        return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
+    }
+    @ForceInline
+    static int opCode(Operator op, int requireKind) {
+        requireKind |= VO_OPCODE_VALID;
+        return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
+    }
+    @ForceInline
+    static boolean opKind(Operator op, int bit) {
+        return VectorOperators.opKind(op, bit);
+    }
+
+    // Virtualized factories and operators,
+    // coded with portable definitions.
+    // These are all @ForceInline in case
+    // they need to be used performantly.
+    // The various shape-specific subclasses
+    // also specialize them by wrapping
+    // them in a call like this:
+    //    return (Byte128Vector)
+    //       super.bOp((Byte128Vector) o);
+    // The purpose of that is to forcibly inline
+    // the generic definition from this file
+    // into a sharply type- and size-specific
+    // wrapper in the subclass file, so that
+    // the JIT can specialize the code.
+    // The code is only inlined and expanded
+    // if it gets hot.  Think of it as a cheap
+    // and lazy version of C++ templates.
+
+    // Virtualized getter
+
+    /*package-private*/
+    abstract Float16[] vec();
+
+    // Virtualized constructors
+
+    /**
+     * Build a vector directly using my own constructor.
+     * It is an error if the array is aliased elsewhere.
+     */
+    /*package-private*/
+    abstract HalffloatVector vectorFactory(Float16[] vec);
+
+    /**
+     * Build a mask directly using my species.
+     * It is an error if the array is aliased elsewhere.
+     */
+    /*package-private*/
+    @ForceInline
+    final
+    AbstractMask<Float16> maskFactory(boolean[] bits) {
+        return vspecies().maskFactory(bits);
+    }
+
+    // Constant loader (takes dummy as vector arg)
+    interface FVOp {
+        Float16 apply(int i);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    HalffloatVector vOp(FVOp f) {
+        Float16[] res = new Float16[length()];
+        for (int i = 0; i < res.length; i++) {
+            res[i] = f.apply(i);
+        }
+        return vectorFactory(res);
+    }
+
+    @ForceInline
+    final
+    HalffloatVector vOp(VectorMask<Float16> m, FVOp f) {
+        Float16[] res = new Float16[length()];
+        boolean[] mbits = ((AbstractMask<Float16>)m).getBits();
+        for (int i = 0; i < res.length; i++) {
+            if (mbits[i]) {
+                res[i] = f.apply(i);
+            }
+        }
+        return vectorFactory(res);
+    }
+
+    // Unary operator
+
+    /*package-private*/
+    interface FUnOp {
+        Float16 apply(int i, Float16 a);
+    }
+
+    /*package-private*/
+    abstract
+    HalffloatVector uOp(FUnOp f);
+    @ForceInline
+    final
+    HalffloatVector uOpTemplate(FUnOp f) {
+        Float16[] vec = vec();
+        Float16[] res = new Float16[length()];
+        for (int i = 0; i < res.length; i++) {
+            res[i] = f.apply(i, vec[i]);
+        }
+        return vectorFactory(res);
+    }
+
+    /*package-private*/
+    abstract
+    HalffloatVector uOp(VectorMask<Float16> m,
+                             FUnOp f);
+    @ForceInline
+    final
+    HalffloatVector uOpTemplate(VectorMask<Float16> m,
+                                     FUnOp f) {
+        if (m == null) {
+            return uOpTemplate(f);
+        }
+        Float16[] vec = vec();
+        Float16[] res = new Float16[length()];
+        boolean[] mbits = ((AbstractMask<Float16>)m).getBits();
+        for (int i = 0; i < res.length; i++) {
+            res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
+        }
+        return vectorFactory(res);
+    }
+
+    // Binary operator
+
+    /*package-private*/
+    interface FBinOp {
+        Float16 apply(int i, Float16 a, Float16 b);
+    }
+
+    /*package-private*/
+    abstract
+    HalffloatVector bOp(Vector<Float16> o,
+                             FBinOp f);
+    @ForceInline
+    final
+    HalffloatVector bOpTemplate(Vector<Float16> o,
+                                     FBinOp f) {
+        Float16[] res = new Float16[length()];
+        Float16[] vec1 = this.vec();
+        Float16[] vec2 = ((HalffloatVector)o).vec();
+        for (int i = 0; i < res.length; i++) {
+            res[i] = f.apply(i, vec1[i], vec2[i]);
+        }
+        return vectorFactory(res);
+    }
+
+    /*package-private*/
+    abstract
+    HalffloatVector bOp(Vector<Float16> o,
+                             VectorMask<Float16> m,
+                             FBinOp f);
+    @ForceInline
+    final
+    HalffloatVector bOpTemplate(Vector<Float16> o,
+                                     VectorMask<Float16> m,
+                                     FBinOp f) {
+        if (m == null) {
+            return bOpTemplate(o, f);
+        }
+        Float16[] res = new Float16[length()];
+        Float16[] vec1 = this.vec();
+        Float16[] vec2 = ((HalffloatVector)o).vec();
+        boolean[] mbits = ((AbstractMask<Float16>)m).getBits();
+        for (int i = 0; i < res.length; i++) {
+            res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
+        }
+        return vectorFactory(res);
+    }
+
+    // Ternary operator
+
+    /*package-private*/
+    interface FTriOp {
+        Float16 apply(int i, Float16 a, Float16 b, Float16 c);
+    }
+
+    /*package-private*/
+    abstract
+    HalffloatVector tOp(Vector<Float16> o1,
+                             Vector<Float16> o2,
+                             FTriOp f);
+    @ForceInline
+    final
+    HalffloatVector tOpTemplate(Vector<Float16> o1,
+                                     Vector<Float16> o2,
+                                     FTriOp f) {
+        Float16[] res = new Float16[length()];
+        Float16[] vec1 = this.vec();
+        Float16[] vec2 = ((HalffloatVector)o1).vec();
+        Float16[] vec3 = ((HalffloatVector)o2).vec();
+        for (int i = 0; i < res.length; i++) {
+            res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
+        }
+        return vectorFactory(res);
+    }
+
+    /*package-private*/
+    abstract
+    HalffloatVector tOp(Vector<Float16> o1,
+                             Vector<Float16> o2,
+                             VectorMask<Float16> m,
+                             FTriOp f);
+    @ForceInline
+    final
+    HalffloatVector tOpTemplate(Vector<Float16> o1,
+                                     Vector<Float16> o2,
+                                     VectorMask<Float16> m,
+                                     FTriOp f) {
+        if (m == null) {
+            return tOpTemplate(o1, o2, f);
+        }
+        Float16[] res = new Float16[length()];
+        Float16[] vec1 = this.vec();
+        Float16[] vec2 = ((HalffloatVector)o1).vec();
+        Float16[] vec3 = ((HalffloatVector)o2).vec();
+        boolean[] mbits = ((AbstractMask<Float16>)m).getBits();
+        for (int i = 0; i < res.length; i++) {
+            res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
+        }
+        return vectorFactory(res);
+    }
+
+    // Reduction operator
+
+    /*package-private*/
+    abstract
+    Float16 rOp(Float16 v, VectorMask<Float16> m, FBinOp f);
+
+    @ForceInline
+    final
+    Float16 rOpTemplate(Float16 v, VectorMask<Float16> m, FBinOp f) {
+        if (m == null) {
+            return rOpTemplate(v, f);
+        }
+        Float16[] vec = vec();
+        boolean[] mbits = ((AbstractMask<Float16>)m).getBits();
+        for (int i = 0; i < vec.length; i++) {
+            v = mbits[i] ? f.apply(i, v, vec[i]) : v;
+        }
+        return v;
+    }
+
+    @ForceInline
+    final
+    Float16 rOpTemplate(Float16 v, FBinOp f) {
+        Float16[] vec = vec();
+        for (int i = 0; i < vec.length; i++) {
+            v = f.apply(i, v, vec[i]);
+        }
+        return v;
+    }
+
+    // Memory reference
+
+    /*package-private*/
+    interface FLdOp<M> {
+        Float16 apply(M memory, int offset, int i);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M> HalffloatVector ldOp(M memory, int offset,
+                                  FLdOp<M> f) {
+        //dummy; no vec = vec();
+        Float16[] res = new Float16[length()];
+        for (int i = 0; i < res.length; i++) {
+            res[i] = f.apply(memory, offset, i);
+        }
+        return vectorFactory(res);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M> HalffloatVector ldOp(M memory, int offset,
+                                  VectorMask<Float16> m,
+                                  FLdOp<M> f) {
+        //Float16[] vec = vec();
+        Float16[] res = new Float16[length()];
+        boolean[] mbits = ((AbstractMask<Float16>)m).getBits();
+        for (int i = 0; i < res.length; i++) {
+            if (mbits[i]) {
+                res[i] = f.apply(memory, offset, i);
+            }
+        }
+        return vectorFactory(res);
+    }
+
+    /*package-private*/
+    interface FLdLongOp {
+        Float16 apply(MemorySegment memory, long offset, int i);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    HalffloatVector ldLongOp(MemorySegment memory, long offset,
+                                  FLdLongOp f) {
+        //dummy; no vec = vec();
+        Float16[] res = new Float16[length()];
+        for (int i = 0; i < res.length; i++) {
+            res[i] = f.apply(memory, offset, i);
+        }
+        return vectorFactory(res);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    HalffloatVector ldLongOp(MemorySegment memory, long offset,
+                                  VectorMask<Float16> m,
+                                  FLdLongOp f) {
+        //Float16[] vec = vec();
+        Float16[] res = new Float16[length()];
+        boolean[] mbits = ((AbstractMask<Float16>)m).getBits();
+        for (int i = 0; i < res.length; i++) {
+            if (mbits[i]) {
+                res[i] = f.apply(memory, offset, i);
+            }
+        }
+        return vectorFactory(res);
+    }
+
+    static Float16 memorySegmentGet(MemorySegment ms, long o, int i) {
+        return Float16.valueOf(ms.get(ELEMENT_LAYOUT, o + i * 2L));
+    }
+
+    interface FStOp<M> {
+        void apply(M memory, int offset, int i, Float16 a);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M> void stOp(M memory, int offset,
+                  FStOp<M> f) {
+        Float16[] vec = vec();
+        for (int i = 0; i < vec.length; i++) {
+            f.apply(memory, offset, i, vec[i]);
+        }
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M> void stOp(M memory, int offset,
+                  VectorMask<Float16> m,
+                  FStOp<M> f) {
+        Float16[] vec = vec();
+        boolean[] mbits = ((AbstractMask<Float16>)m).getBits();
+        for (int i = 0; i < vec.length; i++) {
+            if (mbits[i]) {
+                f.apply(memory, offset, i, vec[i]);
+            }
+        }
+    }
+
+    interface FStLongOp {
+        void apply(MemorySegment memory, long offset, int i, Float16 a);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    void stLongOp(MemorySegment memory, long offset,
+                  FStLongOp f) {
+        Float16[] vec = vec();
+        for (int i = 0; i < vec.length; i++) {
+            f.apply(memory, offset, i, vec[i]);
+        }
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    void stLongOp(MemorySegment memory, long offset,
+                  VectorMask<Float16> m,
+                  FStLongOp f) {
+        Float16[] vec = vec();
+        boolean[] mbits = ((AbstractMask<Float16>)m).getBits();
+        for (int i = 0; i < vec.length; i++) {
+            if (mbits[i]) {
+                f.apply(memory, offset, i, vec[i]);
+            }
+        }
+    }
+
+    static void memorySegmentSet(MemorySegment ms, long o, int i, Float16 e) {
+        ms.set(ELEMENT_LAYOUT, o + i * 2L, e.shortValue());
+    }
+
+    // Binary test
+
+    /*package-private*/
+    interface FBinTest {
+        boolean apply(int cond, int i, Float16 a, Float16 b);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    AbstractMask<Float16> bTest(int cond,
+                                  Vector<Float16> o,
+                                  FBinTest f) {
+        Float16[] vec1 = vec();
+        Float16[] vec2 = ((HalffloatVector)o).vec();
+        boolean[] bits = new boolean[length()];
+        for (int i = 0; i < length(); i++){
+            bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
+        }
+        return maskFactory(bits);
+    }
+
+
+    /*package-private*/
+    @Override
+    abstract HalffloatSpecies vspecies();
+
+    /*package-private*/
+    @ForceInline
+    static long toBits(Float16 e) {
+        return Float16.float16ToRawShortBits(e);
+    }
+
+    /*package-private*/
+    @ForceInline
+    static Float16 fromBits(long bits) {
+        return Float16.shortBitsToFloat16((short)bits);
+    }
+
+    static HalffloatVector expandHelper(Vector<Float16> v, VectorMask<Float16> m) {
+        VectorSpecies<Float16> vsp = m.vectorSpecies();
+        HalffloatVector r  = (HalffloatVector) vsp.zero();
+        HalffloatVector vi = (HalffloatVector) v;
+        if (m.allTrue()) {
+            return vi;
+        }
+        for (int i = 0, j = 0; i < vsp.length(); i++) {
+            if (m.laneIsSet(i)) {
+                r = r.withLane(i, vi.lane(j++));
+            }
+        }
+        return r;
+    }
+
+    static HalffloatVector compressHelper(Vector<Float16> v, VectorMask<Float16> m) {
+        VectorSpecies<Float16> vsp = m.vectorSpecies();
+        HalffloatVector r  = (HalffloatVector) vsp.zero();
+        HalffloatVector vi = (HalffloatVector) v;
+        if (m.allTrue()) {
+            return vi;
+        }
+        for (int i = 0, j = 0; i < vsp.length(); i++) {
+            if (m.laneIsSet(i)) {
+                r = r.withLane(j++, vi.lane(i));
+            }
+        }
+        return r;
+    }
+
+    // Static factories (other than memory operations)
+
+    // Note: A surprising behavior in javadoc
+    // sometimes makes a lone /** {@inheritDoc} */
+    // comment drop the method altogether,
+    // apparently if the method mentions an
+    // parameter or return type of Vector<Float16>
+    // instead of Vector<E> as originally specified.
+    // Adding an empty HTML fragment appears to
+    // nudge javadoc into providing the desired
+    // inherited documentation.  We use the HTML
+    // comment <!--workaround--> for this.
+
+    /**
+     * Returns a vector of the given species
+     * where all lane elements are set to
+     * zero, the default primitive value.
+     *
+     * @param species species of the desired zero vector
+     * @return a zero vector
+     */
+    @ForceInline
+    public static HalffloatVector zero(VectorSpecies<Float16> species) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return VectorSupport.fromBitsCoerced(vsp.vectorType(), Float16.class, species.length(),
+                        toBits(Float16.valueOf(0.0f)), MODE_BROADCAST, vsp,
+                        ((bits_, s_) -> s_.rvOp(i -> bits_)));
+    }
+
+    /**
+     * Returns a vector of the same species as this one
+     * where all lane elements are set to
+     * the primitive value {@code e}.
+     *
+     * The contents of the current vector are discarded;
+     * only the species is relevant to this operation.
+     *
+     * <p> This method returns the value of this expression:
+     * {@code HalffloatVector.broadcast(this.species(), e)}.
+     *
+     * @apiNote
+     * Unlike the similar method named {@code broadcast()}
+     * in the supertype {@code Vector}, this method does not
+     * need to validate its argument, and cannot throw
+     * {@code IllegalArgumentException}.  This method is
+     * therefore preferable to the supertype method.
+     *
+     * @param e the value to broadcast
+     * @return a vector where all lane elements are set to
+     *         the primitive value {@code e}
+     * @see #broadcast(VectorSpecies,long)
+     * @see Vector#broadcast(long)
+     * @see VectorSpecies#broadcast(long)
+     */
+    public abstract HalffloatVector broadcast(Float16 e);
+
+    /**
+     * Returns a vector of the given species
+     * where all lane elements are set to
+     * the primitive value {@code e}.
+     *
+     * @param species species of the desired vector
+     * @param e the value to broadcast
+     * @return a vector where all lane elements are set to
+     *         the primitive value {@code e}
+     * @see #broadcast(long)
+     * @see Vector#broadcast(long)
+     * @see VectorSpecies#broadcast(long)
+     */
+    @ForceInline
+    public static HalffloatVector broadcast(VectorSpecies<Float16> species, Float16 e) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.broadcast(e);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final HalffloatVector broadcastTemplate(Float16 e) {
+        HalffloatSpecies vsp = vspecies();
+        return vsp.broadcast(e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @apiNote
+     * When working with vector subtypes like {@code HalffloatVector},
+     * {@linkplain #broadcast(Float16) the more strongly typed method}
+     * is typically selected.  It can be explicitly selected
+     * using a cast: {@code v.broadcast((Float16)e)}.
+     * The two expressions will produce numerically identical results.
+     */
+    @Override
+    public abstract HalffloatVector broadcast(long e);
+
+    /**
+     * Returns a vector of the given species
+     * where all lane elements are set to
+     * the primitive value {@code e}.
+     *
+     * The {@code long} value must be accurately representable
+     * by the {@code ETYPE} of the vector species, so that
+     * {@code e==(long)(ETYPE)e}.
+     *
+     * @param species species of the desired vector
+     * @param e the value to broadcast
+     * @return a vector where all lane elements are set to
+     *         the primitive value {@code e}
+     * @throws IllegalArgumentException
+     *         if the given {@code long} value cannot
+     *         be represented by the vector's {@code ETYPE}
+     * @see #broadcast(VectorSpecies,Float16)
+     * @see VectorSpecies#checkValue(long)
+     */
+    @ForceInline
+    public static HalffloatVector broadcast(VectorSpecies<Float16> species, long e) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.broadcast(e);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final HalffloatVector broadcastTemplate(long e) {
+        return vspecies().broadcast(e);
+    }
+
+    // Unary lanewise support
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    public abstract
+    HalffloatVector lanewise(VectorOperators.Unary op);
+
+    @ForceInline
+    final
+    HalffloatVector lanewiseTemplate(VectorOperators.Unary op) {
+        if (opKind(op, VO_SPECIAL)) {
+            if (op == ZOMO) {
+                return blend(broadcast(-1), compare(NE, 0));
+            }
+        }
+        int opc = opCode(op);
+        return VectorSupport.unaryOp(
+            opc, getClass(), null, Float16.class, length(),
+            this, null,
+            UN_IMPL.find(op, opc, HalffloatVector::unaryOperations));
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector lanewise(VectorOperators.Unary op,
+                                  VectorMask<Float16> m);
+    @ForceInline
+    final
+    HalffloatVector lanewiseTemplate(VectorOperators.Unary op,
+                                          Class<? extends VectorMask<Float16>> maskClass,
+                                          VectorMask<Float16> m) {
+        m.check(maskClass, this);
+        if (opKind(op, VO_SPECIAL)) {
+            if (op == ZOMO) {
+                return blend(broadcast(-1), compare(NE, 0, m));
+            }
+        }
+        int opc = opCode(op);
+        return VectorSupport.unaryOp(
+            opc, getClass(), maskClass, Float16.class, length(),
+            this, m,
+            UN_IMPL.find(op, opc, HalffloatVector::unaryOperations));
+    }
+
+    private static final
+    ImplCache<Unary, UnaryOperation<HalffloatVector, VectorMask<Float16>>>
+        UN_IMPL = new ImplCache<>(Unary.class, HalffloatVector.class);
+
+    private static UnaryOperation<HalffloatVector, VectorMask<Float16>> unaryOperations(int opc_) {
+        switch (opc_) {
+            case VECTOR_OP_NEG: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> (Float16) Float16.valueOf(-a.floatValue()));
+            case VECTOR_OP_ABS: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> (Float16) Float16.abs(a));
+            case VECTOR_OP_SIN: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.sin(a.floatValue())));
+            case VECTOR_OP_COS: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.cos(a.floatValue())));
+            case VECTOR_OP_TAN: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.tan(a.floatValue())));
+            case VECTOR_OP_ASIN: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.asin(a.floatValue())));
+            case VECTOR_OP_ACOS: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.acos(a.floatValue())));
+            case VECTOR_OP_ATAN: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.atan(a.floatValue())));
+            case VECTOR_OP_EXP: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.exp(a.floatValue())));
+            case VECTOR_OP_LOG: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.log(a.floatValue())));
+            case VECTOR_OP_LOG10: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.log10(a.floatValue())));
+            case VECTOR_OP_SQRT: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.sqrt(a.floatValue())));
+            case VECTOR_OP_CBRT: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.cbrt(a.floatValue())));
+            case VECTOR_OP_SINH: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.sinh(a.floatValue())));
+            case VECTOR_OP_COSH: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.cosh(a.floatValue())));
+            case VECTOR_OP_TANH: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.tanh(a.floatValue())));
+            case VECTOR_OP_EXPM1: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.expm1(a.floatValue())));
+            case VECTOR_OP_LOG1P: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.log1p(a.floatValue())));
+            default: return null;
+        }
+    }
+
+    // Binary lanewise support
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #lanewise(VectorOperators.Binary,Float16)
+     * @see #lanewise(VectorOperators.Binary,Float16,VectorMask)
+     */
+    @Override
+    public abstract
+    HalffloatVector lanewise(VectorOperators.Binary op,
+                                  Vector<Float16> v);
+    @ForceInline
+    final
+    HalffloatVector lanewiseTemplate(VectorOperators.Binary op,
+                                          Vector<Float16> v) {
+        HalffloatVector that = (HalffloatVector) v;
+        that.check(this);
+
+        if (opKind(op, VO_SPECIAL )) {
+            if (op == FIRST_NONZERO) {
+                VectorMask<Short> mask
+                    = this.viewAsIntegralLanes().compare(EQ, (short) 0);
+                return this.blend(that, mask.cast(vspecies()));
+            }
+        }
+
+        int opc = opCode(op);
+        return VectorSupport.binaryOp(
+            opc, getClass(), null, Float16.class, length(),
+            this, that, null,
+            BIN_IMPL.find(op, opc, HalffloatVector::binaryOperations));
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #lanewise(VectorOperators.Binary,Float16,VectorMask)
+     */
+    @Override
+    public abstract
+    HalffloatVector lanewise(VectorOperators.Binary op,
+                                  Vector<Float16> v,
+                                  VectorMask<Float16> m);
+    @ForceInline
+    final
+    HalffloatVector lanewiseTemplate(VectorOperators.Binary op,
+                                          Class<? extends VectorMask<Float16>> maskClass,
+                                          Vector<Float16> v, VectorMask<Float16> m) {
+        HalffloatVector that = (HalffloatVector) v;
+        that.check(this);
+        m.check(maskClass, this);
+
+        if (opKind(op, VO_SPECIAL )) {
+            if (op == FIRST_NONZERO) {
+                ShortVector bits = this.viewAsIntegralLanes();
+                VectorMask<Short> mask
+                    = bits.compare(EQ, (short) 0, m.cast(bits.vspecies()));
+                return this.blend(that, mask.cast(vspecies()));
+            }
+        }
+
+        int opc = opCode(op);
+        return VectorSupport.binaryOp(
+            opc, getClass(), maskClass, Float16.class, length(),
+            this, that, m,
+            BIN_IMPL.find(op, opc, HalffloatVector::binaryOperations));
+    }
+
+    private static final
+    ImplCache<Binary, BinaryOperation<HalffloatVector, VectorMask<Float16>>>
+        BIN_IMPL = new ImplCache<>(Binary.class, HalffloatVector.class);
+
+    private static BinaryOperation<HalffloatVector, VectorMask<Float16>> binaryOperations(int opc_) {
+        switch (opc_) {
+            case VECTOR_OP_ADD: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.add(a, b));
+            case VECTOR_OP_SUB: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.subtract(a, b));
+            case VECTOR_OP_MUL: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.multiply(a, b));
+            case VECTOR_OP_DIV: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.divide(a, b));
+            case VECTOR_OP_MAX: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.max(a, b));
+            case VECTOR_OP_MIN: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.min(a, b));
+            case VECTOR_OP_ATAN2: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.valueOf(Math.atan2(a.floatValue(), b.floatValue())));
+            case VECTOR_OP_POW: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.valueOf(Math.pow(a.floatValue(), b.floatValue())));
+            case VECTOR_OP_HYPOT: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.valueOf(Math.hypot(a.floatValue(), b.floatValue())));
+            default: return null;
+        }
+    }
+
+    // FIXME: Maybe all of the public final methods in this file (the
+    // simple ones that just call lanewise) should be pushed down to
+    // the X-VectorBits template.  They can't optimize properly at
+    // this level, and must rely on inlining.  Does it work?
+    // (If it works, of course keep the code here.)
+
+    /**
+     * Combines the lane values of this vector
+     * with the value of a broadcast scalar.
+     *
+     * This is a lane-wise binary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, this.broadcast(e))}.
+     *
+     * @param op the operation used to process lane values
+     * @param e the input scalar
+     * @return the result of applying the operation lane-wise
+     *         to the two input vectors
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,Float16,VectorMask)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Binary op,
+                                  Float16 e) {
+        return lanewise(op, broadcast(e));
+    }
+
+    /**
+     * Combines the lane values of this vector
+     * with the value of a broadcast scalar,
+     * with selection of lane elements controlled by a mask.
+     *
+     * This is a masked lane-wise binary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, this.broadcast(e), m)}.
+     *
+     * @param op the operation used to process lane values
+     * @param e the input scalar
+     * @param m the mask controlling lane selection
+     * @return the result of applying the operation lane-wise
+     *         to the input vector and the scalar
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
+     * @see #lanewise(VectorOperators.Binary,Float16)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Binary op,
+                                  Float16 e,
+                                  VectorMask<Float16> m) {
+        return lanewise(op, broadcast(e), m);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @apiNote
+     * When working with vector subtypes like {@code HalffloatVector},
+     * {@linkplain #lanewise(VectorOperators.Binary,Float16)
+     * the more strongly typed method}
+     * is typically selected.  It can be explicitly selected
+     * using a cast: {@code v.lanewise(op,(Float16)e)}.
+     * The two expressions will produce numerically identical results.
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Binary op,
+                                  long e) {
+        Float16 e1 = Float16.valueOf(e);
+        if (e1.longValue() != e) {
+            vspecies().checkValue(e);  // for exception
+        }
+        return lanewise(op, e1);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @apiNote
+     * When working with vector subtypes like {@code HalffloatVector},
+     * {@linkplain #lanewise(VectorOperators.Binary,Float16,VectorMask)
+     * the more strongly typed method}
+     * is typically selected.  It can be explicitly selected
+     * using a cast: {@code v.lanewise(op,(Float16)e,m)}.
+     * The two expressions will produce numerically identical results.
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Binary op,
+                                  long e, VectorMask<Float16> m) {
+        Float16 e1 = Float16.valueOf(e);
+        if (e1.longValue() != e) {
+            vspecies().checkValue(e);  // for exception
+        }
+        return lanewise(op, e1, m);
+    }
+
+
+    // Ternary lanewise support
+
+    // Ternary operators come in eight variations:
+    //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
+    //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
+
+    // It is annoying to support all of these variations of masking
+    // and broadcast, but it would be more surprising not to continue
+    // the obvious pattern started by unary and binary.
+
+   /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #lanewise(VectorOperators.Ternary,Float16,Float16,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,Vector,Float16,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,Float16,Vector,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,Float16,Float16)
+     * @see #lanewise(VectorOperators.Ternary,Vector,Float16)
+     * @see #lanewise(VectorOperators.Ternary,Float16,Vector)
+     */
+    @Override
+    public abstract
+    HalffloatVector lanewise(VectorOperators.Ternary op,
+                                                  Vector<Float16> v1,
+                                                  Vector<Float16> v2);
+    @ForceInline
+    final
+    HalffloatVector lanewiseTemplate(VectorOperators.Ternary op,
+                                          Vector<Float16> v1,
+                                          Vector<Float16> v2) {
+        HalffloatVector that = (HalffloatVector) v1;
+        HalffloatVector tother = (HalffloatVector) v2;
+        // It's a word: https://www.dictionary.com/browse/tother
+        // See also Chapter 11 of Dickens, Our Mutual Friend:
+        // "Totherest Governor," replied Mr Riderhood...
+        that.check(this);
+        tother.check(this);
+        int opc = opCode(op);
+        return VectorSupport.ternaryOp(
+            opc, getClass(), null, Float16.class, length(),
+            this, that, tother, null,
+            TERN_IMPL.find(op, opc, HalffloatVector::ternaryOperations));
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #lanewise(VectorOperators.Ternary,Float16,Float16,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,Vector,Float16,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,Float16,Vector,VectorMask)
+     */
+    @Override
+    public abstract
+    HalffloatVector lanewise(VectorOperators.Ternary op,
+                                  Vector<Float16> v1,
+                                  Vector<Float16> v2,
+                                  VectorMask<Float16> m);
+    @ForceInline
+    final
+    HalffloatVector lanewiseTemplate(VectorOperators.Ternary op,
+                                          Class<? extends VectorMask<Float16>> maskClass,
+                                          Vector<Float16> v1,
+                                          Vector<Float16> v2,
+                                          VectorMask<Float16> m) {
+        HalffloatVector that = (HalffloatVector) v1;
+        HalffloatVector tother = (HalffloatVector) v2;
+        // It's a word: https://www.dictionary.com/browse/tother
+        // See also Chapter 11 of Dickens, Our Mutual Friend:
+        // "Totherest Governor," replied Mr Riderhood...
+        that.check(this);
+        tother.check(this);
+        m.check(maskClass, this);
+
+        int opc = opCode(op);
+        return VectorSupport.ternaryOp(
+            opc, getClass(), maskClass, Float16.class, length(),
+            this, that, tother, m,
+            TERN_IMPL.find(op, opc, HalffloatVector::ternaryOperations));
+    }
+
+    private static final
+    ImplCache<Ternary, TernaryOperation<HalffloatVector, VectorMask<Float16>>>
+        TERN_IMPL = new ImplCache<>(Ternary.class, HalffloatVector.class);
+
+    private static TernaryOperation<HalffloatVector, VectorMask<Float16>> ternaryOperations(int opc_) {
+        switch (opc_) {
+            case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> v0.tOp(v1_, v2_, m, (i, a, b, c) -> Float16.fma(a, b, c));
+            default: return null;
+        }
+    }
+
+    /**
+     * Combines the lane values of this vector
+     * with the values of two broadcast scalars.
+     *
+     * This is a lane-wise ternary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
+     *
+     * @param op the operation used to combine lane values
+     * @param e1 the first input scalar
+     * @param e2 the second input scalar
+     * @return the result of applying the operation lane-wise
+     *         to the input vector and the scalars
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
+     * @see #lanewise(VectorOperators.Ternary,Float16,Float16,VectorMask)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
+                                  Float16 e1,
+                                  Float16 e2) {
+        return lanewise(op, broadcast(e1), broadcast(e2));
+    }
+
+    /**
+     * Combines the lane values of this vector
+     * with the values of two broadcast scalars,
+     * with selection of lane elements controlled by a mask.
+     *
+     * This is a masked lane-wise ternary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
+     *
+     * @param op the operation used to combine lane values
+     * @param e1 the first input scalar
+     * @param e2 the second input scalar
+     * @param m the mask controlling lane selection
+     * @return the result of applying the operation lane-wise
+     *         to the input vector and the scalars
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,Float16,Float16)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
+                                  Float16 e1,
+                                  Float16 e2,
+                                  VectorMask<Float16> m) {
+        return lanewise(op, broadcast(e1), broadcast(e2), m);
+    }
+
+    /**
+     * Combines the lane values of this vector
+     * with the values of another vector and a broadcast scalar.
+     *
+     * This is a lane-wise ternary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, v1, this.broadcast(e2))}.
+     *
+     * @param op the operation used to combine lane values
+     * @param v1 the other input vector
+     * @param e2 the input scalar
+     * @return the result of applying the operation lane-wise
+     *         to the input vectors and the scalar
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Ternary,Float16,Float16)
+     * @see #lanewise(VectorOperators.Ternary,Vector,Float16,VectorMask)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
+                                  Vector<Float16> v1,
+                                  Float16 e2) {
+        return lanewise(op, v1, broadcast(e2));
+    }
+
+    /**
+     * Combines the lane values of this vector
+     * with the values of another vector and a broadcast scalar,
+     * with selection of lane elements controlled by a mask.
+     *
+     * This is a masked lane-wise ternary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
+     *
+     * @param op the operation used to combine lane values
+     * @param v1 the other input vector
+     * @param e2 the input scalar
+     * @param m the mask controlling lane selection
+     * @return the result of applying the operation lane-wise
+     *         to the input vectors and the scalar
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
+     * @see #lanewise(VectorOperators.Ternary,Float16,Float16,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,Vector,Float16)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
+                                  Vector<Float16> v1,
+                                  Float16 e2,
+                                  VectorMask<Float16> m) {
+        return lanewise(op, v1, broadcast(e2), m);
+    }
+
+    /**
+     * Combines the lane values of this vector
+     * with the values of another vector and a broadcast scalar.
+     *
+     * This is a lane-wise ternary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, this.broadcast(e1), v2)}.
+     *
+     * @param op the operation used to combine lane values
+     * @param e1 the input scalar
+     * @param v2 the other input vector
+     * @return the result of applying the operation lane-wise
+     *         to the input vectors and the scalar
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
+     * @see #lanewise(VectorOperators.Ternary,Float16,Vector,VectorMask)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
+                                  Float16 e1,
+                                  Vector<Float16> v2) {
+        return lanewise(op, broadcast(e1), v2);
+    }
+
+    /**
+     * Combines the lane values of this vector
+     * with the values of another vector and a broadcast scalar,
+     * with selection of lane elements controlled by a mask.
+     *
+     * This is a masked lane-wise ternary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
+     *
+     * @param op the operation used to combine lane values
+     * @param e1 the input scalar
+     * @param v2 the other input vector
+     * @param m the mask controlling lane selection
+     * @return the result of applying the operation lane-wise
+     *         to the input vectors and the scalar
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,Float16,Vector)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
+                                  Float16 e1,
+                                  Vector<Float16> v2,
+                                  VectorMask<Float16> m) {
+        return lanewise(op, broadcast(e1), v2, m);
+    }
+
+    // (Thus endeth the Great and Mighty Ternary Ogdoad.)
+    // https://en.wikipedia.org/wiki/Ogdoad
+
+    /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
+    //
+    // These include masked and non-masked versions.
+    // This subclass adds broadcast (masked or not).
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #add(Float16)
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector add(Vector<Float16> v) {
+        return lanewise(ADD, v);
+    }
+
+    /**
+     * Adds this vector to the broadcast of an input scalar.
+     *
+     * This is a lane-wise binary operation which applies
+     * the primitive addition operation ({@code +}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,Float16)
+     *    lanewise}{@code (}{@link VectorOperators#ADD
+     *    ADD}{@code , e)}.
+     *
+     * @param e the input scalar
+     * @return the result of adding each lane of this vector to the scalar
+     * @see #add(Vector)
+     * @see #broadcast(Float16)
+     * @see #add(Float16,VectorMask)
+     * @see VectorOperators#ADD
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,Float16)
+     */
+    @ForceInline
+    public final
+    HalffloatVector add(Float16 e) {
+        return lanewise(ADD, e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #add(Float16,VectorMask)
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector add(Vector<Float16> v,
+                                          VectorMask<Float16> m) {
+        return lanewise(ADD, v, m);
+    }
+
+    /**
+     * Adds this vector to the broadcast of an input scalar,
+     * selecting lane elements controlled by a mask.
+     *
+     * This is a masked lane-wise binary operation which applies
+     * the primitive addition operation ({@code +}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,Float16,VectorMask)
+     *    lanewise}{@code (}{@link VectorOperators#ADD
+     *    ADD}{@code , s, m)}.
+     *
+     * @param e the input scalar
+     * @param m the mask controlling lane selection
+     * @return the result of adding each lane of this vector to the scalar
+     * @see #add(Vector,VectorMask)
+     * @see #broadcast(Float16)
+     * @see #add(Float16)
+     * @see VectorOperators#ADD
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,Float16)
+     */
+    @ForceInline
+    public final HalffloatVector add(Float16 e,
+                                          VectorMask<Float16> m) {
+        return lanewise(ADD, e, m);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #sub(Float16)
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector sub(Vector<Float16> v) {
+        return lanewise(SUB, v);
+    }
+
+    /**
+     * Subtracts an input scalar from this vector.
+     *
+     * This is a masked lane-wise binary operation which applies
+     * the primitive subtraction operation ({@code -}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,Float16)
+     *    lanewise}{@code (}{@link VectorOperators#SUB
+     *    SUB}{@code , e)}.
+     *
+     * @param e the input scalar
+     * @return the result of subtracting the scalar from each lane of this vector
+     * @see #sub(Vector)
+     * @see #broadcast(Float16)
+     * @see #sub(Float16,VectorMask)
+     * @see VectorOperators#SUB
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,Float16)
+     */
+    @ForceInline
+    public final HalffloatVector sub(Float16 e) {
+        return lanewise(SUB, e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #sub(Float16,VectorMask)
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector sub(Vector<Float16> v,
+                                          VectorMask<Float16> m) {
+        return lanewise(SUB, v, m);
+    }
+
+    /**
+     * Subtracts an input scalar from this vector
+     * under the control of a mask.
+     *
+     * This is a masked lane-wise binary operation which applies
+     * the primitive subtraction operation ({@code -}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,Float16,VectorMask)
+     *    lanewise}{@code (}{@link VectorOperators#SUB
+     *    SUB}{@code , s, m)}.
+     *
+     * @param e the input scalar
+     * @param m the mask controlling lane selection
+     * @return the result of subtracting the scalar from each lane of this vector
+     * @see #sub(Vector,VectorMask)
+     * @see #broadcast(Float16)
+     * @see #sub(Float16)
+     * @see VectorOperators#SUB
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,Float16)
+     */
+    @ForceInline
+    public final HalffloatVector sub(Float16 e,
+                                          VectorMask<Float16> m) {
+        return lanewise(SUB, e, m);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #mul(Float16)
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector mul(Vector<Float16> v) {
+        return lanewise(MUL, v);
+    }
+
+    /**
+     * Multiplies this vector by the broadcast of an input scalar.
+     *
+     * This is a lane-wise binary operation which applies
+     * the primitive multiplication operation ({@code *}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,Float16)
+     *    lanewise}{@code (}{@link VectorOperators#MUL
+     *    MUL}{@code , e)}.
+     *
+     * @param e the input scalar
+     * @return the result of multiplying this vector by the given scalar
+     * @see #mul(Vector)
+     * @see #broadcast(Float16)
+     * @see #mul(Float16,VectorMask)
+     * @see VectorOperators#MUL
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,Float16)
+     */
+    @ForceInline
+    public final HalffloatVector mul(Float16 e) {
+        return lanewise(MUL, e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #mul(Float16,VectorMask)
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector mul(Vector<Float16> v,
+                                          VectorMask<Float16> m) {
+        return lanewise(MUL, v, m);
+    }
+
+    /**
+     * Multiplies this vector by the broadcast of an input scalar,
+     * selecting lane elements controlled by a mask.
+     *
+     * This is a masked lane-wise binary operation which applies
+     * the primitive multiplication operation ({@code *}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,Float16,VectorMask)
+     *    lanewise}{@code (}{@link VectorOperators#MUL
+     *    MUL}{@code , s, m)}.
+     *
+     * @param e the input scalar
+     * @param m the mask controlling lane selection
+     * @return the result of muling each lane of this vector to the scalar
+     * @see #mul(Vector,VectorMask)
+     * @see #broadcast(Float16)
+     * @see #mul(Float16)
+     * @see VectorOperators#MUL
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,Float16)
+     */
+    @ForceInline
+    public final HalffloatVector mul(Float16 e,
+                                          VectorMask<Float16> m) {
+        return lanewise(MUL, e, m);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @apiNote Because the underlying scalar operator is an IEEE
+     * floating point number, division by zero in fact will
+     * not throw an exception, but will yield a signed
+     * infinity or NaN.
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector div(Vector<Float16> v) {
+        return lanewise(DIV, v);
+    }
+
+    /**
+     * Divides this vector by the broadcast of an input scalar.
+     *
+     * This is a lane-wise binary operation which applies
+     * the primitive division operation ({@code /}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,Float16)
+     *    lanewise}{@code (}{@link VectorOperators#DIV
+     *    DIV}{@code , e)}.
+     *
+     * @apiNote Because the underlying scalar operator is an IEEE
+     * floating point number, division by zero in fact will
+     * not throw an exception, but will yield a signed
+     * infinity or NaN.
+     *
+     * @param e the input scalar
+     * @return the result of dividing each lane of this vector by the scalar
+     * @see #div(Vector)
+     * @see #broadcast(Float16)
+     * @see #div(Float16,VectorMask)
+     * @see VectorOperators#DIV
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,Float16)
+     */
+    @ForceInline
+    public final HalffloatVector div(Float16 e) {
+        return lanewise(DIV, e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #div(Float16,VectorMask)
+     * @apiNote Because the underlying scalar operator is an IEEE
+     * floating point number, division by zero in fact will
+     * not throw an exception, but will yield a signed
+     * infinity or NaN.
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector div(Vector<Float16> v,
+                                          VectorMask<Float16> m) {
+        return lanewise(DIV, v, m);
+    }
+
+    /**
+     * Divides this vector by the broadcast of an input scalar,
+     * selecting lane elements controlled by a mask.
+     *
+     * This is a masked lane-wise binary operation which applies
+     * the primitive division operation ({@code /}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,Float16,VectorMask)
+     *    lanewise}{@code (}{@link VectorOperators#DIV
+     *    DIV}{@code , s, m)}.
+     *
+     * @apiNote Because the underlying scalar operator is an IEEE
+     * floating point number, division by zero in fact will
+     * not throw an exception, but will yield a signed
+     * infinity or NaN.
+     *
+     * @param e the input scalar
+     * @param m the mask controlling lane selection
+     * @return the result of dividing each lane of this vector by the scalar
+     * @see #div(Vector,VectorMask)
+     * @see #broadcast(Float16)
+     * @see #div(Float16)
+     * @see VectorOperators#DIV
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,Float16)
+     */
+    @ForceInline
+    public final HalffloatVector div(Float16 e,
+                                          VectorMask<Float16> m) {
+        return lanewise(DIV, e, m);
+    }
+
+    /// END OF FULL-SERVICE BINARY METHODS
+
+    /// SECOND-TIER BINARY METHODS
+    //
+    // There are no masked versions.
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @apiNote
+     * For this method, floating point negative
+     * zero {@code -0.0} is treated as a value distinct from, and less
+     * than the default value (positive zero).
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector min(Vector<Float16> v) {
+        return lanewise(MIN, v);
+    }
+
+    // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
+    /**
+     * Computes the smaller of this vector and the broadcast of an input scalar.
+     *
+     * This is a lane-wise binary operation which applies the
+     * operation {@code Math.min()} to each pair of
+     * corresponding lane values.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,Float16)
+     *    lanewise}{@code (}{@link VectorOperators#MIN
+     *    MIN}{@code , e)}.
+     *
+     * @param e the input scalar
+     * @return the result of multiplying this vector by the given scalar
+     * @see #min(Vector)
+     * @see #broadcast(Float16)
+     * @see VectorOperators#MIN
+     * @see #lanewise(VectorOperators.Binary,Float16,VectorMask)
+     * @apiNote
+     * For this method, floating point negative
+     * zero {@code -0.0} is treated as a value distinct from, and less
+     * than the default value (positive zero).
+     */
+    @ForceInline
+    public final HalffloatVector min(Float16 e) {
+        return lanewise(MIN, e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @apiNote
+     * For this method, floating point negative
+     * zero {@code -0.0} is treated as a value distinct from, and less
+     * than the default value (positive zero).
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector max(Vector<Float16> v) {
+        return lanewise(MAX, v);
+    }
+
+    /**
+     * Computes the larger of this vector and the broadcast of an input scalar.
+     *
+     * This is a lane-wise binary operation which applies the
+     * operation {@code Math.max()} to each pair of
+     * corresponding lane values.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,Float16)
+     *    lanewise}{@code (}{@link VectorOperators#MAX
+     *    MAX}{@code , e)}.
+     *
+     * @param e the input scalar
+     * @return the result of multiplying this vector by the given scalar
+     * @see #max(Vector)
+     * @see #broadcast(Float16)
+     * @see VectorOperators#MAX
+     * @see #lanewise(VectorOperators.Binary,Float16,VectorMask)
+     * @apiNote
+     * For this method, floating point negative
+     * zero {@code -0.0} is treated as a value distinct from, and less
+     * than the default value (positive zero).
+     */
+    @ForceInline
+    public final HalffloatVector max(Float16 e) {
+        return lanewise(MAX, e);
+    }
+
+
+    // common FP operator: pow
+    /**
+     * Raises this vector to the power of a second input vector.
+     *
+     * This is a lane-wise binary operation which applies an operation
+     * conforming to the specification of
+     * {@link Math#pow Math.pow(a,b)}
+     * to each pair of corresponding lane values.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,Vector)
+     *    lanewise}{@code (}{@link VectorOperators#POW
+     *    POW}{@code , b)}.
+     *
+     * <p>
+     * This is not a full-service named operation like
+     * {@link #add(Vector) add}.  A masked version of
+     * this operation is not directly available
+     * but may be obtained via the masked version of
+     * {@code lanewise}.
+     *
+     * @param b a vector exponent by which to raise this vector
+     * @return the {@code b}-th power of this vector
+     * @see #pow(Float16)
+     * @see VectorOperators#POW
+     * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
+     */
+    @ForceInline
+    public final HalffloatVector pow(Vector<Float16> b) {
+        return lanewise(POW, b);
+    }
+
+    /**
+     * Raises this vector to a scalar power.
+     *
+     * This is a lane-wise binary operation which applies an operation
+     * conforming to the specification of
+     * {@link Math#pow Math.pow(a,b)}
+     * to each pair of corresponding lane values.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,Vector)
+     *    lanewise}{@code (}{@link VectorOperators#POW
+     *    POW}{@code , b)}.
+     *
+     * @param b a scalar exponent by which to raise this vector
+     * @return the {@code b}-th power of this vector
+     * @see #pow(Vector)
+     * @see VectorOperators#POW
+     * @see #lanewise(VectorOperators.Binary,Float16,VectorMask)
+     */
+    @ForceInline
+    public final HalffloatVector pow(Float16 b) {
+        return lanewise(POW, b);
+    }
+
+    /// UNARY METHODS
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    HalffloatVector neg() {
+        return lanewise(NEG);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    HalffloatVector abs() {
+        return lanewise(ABS);
+    }
+
+
+
+    // sqrt
+    /**
+     * Computes the square root of this vector.
+     *
+     * This is a lane-wise unary operation which applies an operation
+     * conforming to the specification of
+     * {@link Math#sqrt Math.sqrt(a)}
+     * to each lane value.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Unary)
+     *    lanewise}{@code (}{@link VectorOperators#SQRT
+     *    SQRT}{@code )}.
+     *
+     * @return the square root of this vector
+     * @see VectorOperators#SQRT
+     * @see #lanewise(VectorOperators.Unary,VectorMask)
+     */
+    @ForceInline
+    public final HalffloatVector sqrt() {
+        return lanewise(SQRT);
+    }
+
+    /// COMPARISONS
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    VectorMask<Float16> eq(Vector<Float16> v) {
+        return compare(EQ, v);
+    }
+
+    /**
+     * Tests if this vector is equal to an input scalar.
+     *
+     * This is a lane-wise binary test operation which applies
+     * the primitive equals operation ({@code ==}) to each lane.
+     * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
+     *
+     * @param e the input scalar
+     * @return the result mask of testing if this vector
+     *         is equal to {@code e}
+     * @see #compare(VectorOperators.Comparison,Float16)
+     */
+    @ForceInline
+    public final
+    VectorMask<Float16> eq(Float16 e) {
+        return compare(EQ, e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    VectorMask<Float16> lt(Vector<Float16> v) {
+        return compare(LT, v);
+    }
+
+    /**
+     * Tests if this vector is less than an input scalar.
+     *
+     * This is a lane-wise binary test operation which applies
+     * the primitive less than operation ({@code <}) to each lane.
+     * The result is the same as {@code compare(VectorOperators.LT, e)}.
+     *
+     * @param e the input scalar
+     * @return the mask result of testing if this vector
+     *         is less than the input scalar
+     * @see #compare(VectorOperators.Comparison,Float16)
+     */
+    @ForceInline
+    public final
+    VectorMask<Float16> lt(Float16 e) {
+        return compare(LT, e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    VectorMask<Float16> test(VectorOperators.Test op);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M extends VectorMask<Float16>>
+    M testTemplate(Class<M> maskType, Test op) {
+        HalffloatSpecies vsp = vspecies();
+        if (opKind(op, VO_SPECIAL)) {
+            ShortVector bits = this.viewAsIntegralLanes();
+            VectorMask<Short> m;
+            if (op == IS_DEFAULT) {
+                m = bits.compare(EQ, (short) 0);
+            } else if (op == IS_NEGATIVE) {
+                m = bits.compare(LT, (short) 0);
+            }
+            else if (op == IS_FINITE ||
+                     op == IS_NAN ||
+                     op == IS_INFINITE) {
+                // first kill the sign:
+                bits = bits.and(Short.MAX_VALUE);
+                // next find the bit pattern for infinity:
+                short infbits = (short) toBits(Float16.POSITIVE_INFINITY);
+                // now compare:
+                if (op == IS_FINITE) {
+                    m = bits.compare(LT, infbits);
+                } else if (op == IS_NAN) {
+                    m = bits.compare(GT, infbits);
+                } else {
+                    m = bits.compare(EQ, infbits);
+                }
+            }
+            else {
+                throw new AssertionError(op);
+            }
+            return maskType.cast(m.cast(vsp));
+        }
+        int opc = opCode(op);
+        throw new AssertionError(op);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    VectorMask<Float16> test(VectorOperators.Test op,
+                                  VectorMask<Float16> m);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M extends VectorMask<Float16>>
+    M testTemplate(Class<M> maskType, Test op, M mask) {
+        HalffloatSpecies vsp = vspecies();
+        mask.check(maskType, this);
+        if (opKind(op, VO_SPECIAL)) {
+            ShortVector bits = this.viewAsIntegralLanes();
+            VectorMask<Short> m = mask.cast(ShortVector.species(shape()));
+            if (op == IS_DEFAULT) {
+                m = bits.compare(EQ, (short) 0, m);
+            } else if (op == IS_NEGATIVE) {
+                m = bits.compare(LT, (short) 0, m);
+            }
+            else if (op == IS_FINITE ||
+                     op == IS_NAN ||
+                     op == IS_INFINITE) {
+                // first kill the sign:
+                bits = bits.and(Short.MAX_VALUE);
+                // next find the bit pattern for infinity:
+                short infbits = (short) toBits(Float16.POSITIVE_INFINITY);
+                // now compare:
+                if (op == IS_FINITE) {
+                    m = bits.compare(LT, infbits, m);
+                } else if (op == IS_NAN) {
+                    m = bits.compare(GT, infbits, m);
+                } else {
+                    m = bits.compare(EQ, infbits, m);
+                }
+            }
+            else {
+                throw new AssertionError(op);
+            }
+            return maskType.cast(m.cast(vsp));
+        }
+        int opc = opCode(op);
+        throw new AssertionError(op);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    VectorMask<Float16> compare(VectorOperators.Comparison op, Vector<Float16> v);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M extends VectorMask<Float16>>
+    M compareTemplate(Class<M> maskType, Comparison op, Vector<Float16> v) {
+        HalffloatVector that = (HalffloatVector) v;
+        that.check(this);
+        int opc = opCode(op);
+        return VectorSupport.compare(
+            opc, getClass(), maskType, Float16.class, length(),
+            this, that, null,
+            (cond, v0, v1, m1) -> {
+                AbstractMask<Float16> m
+                    = v0.bTest(cond, v1, (cond_, i, a, b)
+                               -> compareWithOp(cond, a, b));
+                @SuppressWarnings("unchecked")
+                M m2 = (M) m;
+                return m2;
+            });
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M extends VectorMask<Float16>>
+    M compareTemplate(Class<M> maskType, Comparison op, Vector<Float16> v, M m) {
+        HalffloatVector that = (HalffloatVector) v;
+        that.check(this);
+        m.check(maskType, this);
+        int opc = opCode(op);
+        return VectorSupport.compare(
+            opc, getClass(), maskType, Float16.class, length(),
+            this, that, m,
+            (cond, v0, v1, m1) -> {
+                AbstractMask<Float16> cmpM
+                    = v0.bTest(cond, v1, (cond_, i, a, b)
+                               -> compareWithOp(cond, a, b));
+                @SuppressWarnings("unchecked")
+                M m2 = (M) cmpM.and(m1);
+                return m2;
+            });
+    }
+
+    @ForceInline
+    private static boolean compareWithOp(int cond, Float16 a, Float16 b) {
+        return switch (cond) {
+            case BT_eq -> a == b;
+            case BT_ne -> a != b;
+            case BT_lt -> a.floatValue() < b.floatValue();
+            case BT_le -> a.floatValue() <= b.floatValue();
+            case BT_gt -> a.floatValue() > b.floatValue();
+            case BT_ge -> a.floatValue() >= b.floatValue();
+            default -> throw new AssertionError();
+        };
+    }
+
+    /**
+     * Tests this vector by comparing it with an input scalar,
+     * according to the given comparison operation.
+     *
+     * This is a lane-wise binary test operation which applies
+     * the comparison operation to each lane.
+     * <p>
+     * The result is the same as
+     * {@code compare(op, broadcast(species(), e))}.
+     * That is, the scalar may be regarded as broadcast to
+     * a vector of the same species, and then compared
+     * against the original vector, using the selected
+     * comparison operation.
+     *
+     * @param op the operation used to compare lane values
+     * @param e the input scalar
+     * @return the mask result of testing lane-wise if this vector
+     *         compares to the input, according to the selected
+     *         comparison operator
+     * @see HalffloatVector#compare(VectorOperators.Comparison,Vector)
+     * @see #eq(Float16)
+     * @see #lt(Float16)
+     */
+    public abstract
+    VectorMask<Float16> compare(Comparison op, Float16 e);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M extends VectorMask<Float16>>
+    M compareTemplate(Class<M> maskType, Comparison op, Float16 e) {
+        return compareTemplate(maskType, op, broadcast(e));
+    }
+
+    /**
+     * Tests this vector by comparing it with an input scalar,
+     * according to the given comparison operation,
+     * in lanes selected by a mask.
+     *
+     * This is a masked lane-wise binary test operation which applies
+     * to each pair of corresponding lane values.
+     *
+     * The returned result is equal to the expression
+     * {@code compare(op,s).and(m)}.
+     *
+     * @param op the operation used to compare lane values
+     * @param e the input scalar
+     * @param m the mask controlling lane selection
+     * @return the mask result of testing lane-wise if this vector
+     *         compares to the input, according to the selected
+     *         comparison operator,
+     *         and only in the lanes selected by the mask
+     * @see HalffloatVector#compare(VectorOperators.Comparison,Vector,VectorMask)
+     */
+    @ForceInline
+    public final VectorMask<Float16> compare(VectorOperators.Comparison op,
+                                               Float16 e,
+                                               VectorMask<Float16> m) {
+        return compare(op, broadcast(e), m);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    VectorMask<Float16> compare(Comparison op, long e);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M extends VectorMask<Float16>>
+    M compareTemplate(Class<M> maskType, Comparison op, long e) {
+        return compareTemplate(maskType, op, broadcast(e));
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    VectorMask<Float16> compare(Comparison op, long e, VectorMask<Float16> m) {
+        return compare(op, broadcast(e), m);
+    }
+
+
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override public abstract
+    HalffloatVector blend(Vector<Float16> v, VectorMask<Float16> m);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M extends VectorMask<Float16>>
+    HalffloatVector
+    blendTemplate(Class<M> maskType, HalffloatVector v, M m) {
+        v.check(this);
+        return VectorSupport.blend(
+            getClass(), maskType, Float16.class, length(),
+            this, v, m,
+            (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override public abstract HalffloatVector addIndex(int scale);
+
+    /*package-private*/
+    @ForceInline
+    final HalffloatVector addIndexTemplate(int scale) {
+        HalffloatSpecies vsp = vspecies();
+        // make sure VLENGTH*scale doesn't overflow:
+        vsp.checkScale(scale);
+        return VectorSupport.indexVector(
+            getClass(), Float16.class, length(),
+            this, scale, vsp,
+            (v, scale_, s)
+            -> {
+                // If the platform doesn't support an INDEX
+                // instruction directly, load IOTA from memory
+                // and multiply.
+                HalffloatVector iota = s.iota();
+                return v.add(scale_ == 1 ? iota : iota.mul(Float16.valueOf(scale_)));
+            });
+    }
+
+    /**
+     * Replaces selected lanes of this vector with
+     * a scalar value
+     * under the control of a mask.
+     *
+     * This is a masked lane-wise binary operation which
+     * selects each lane value from one or the other input.
+     *
+     * The returned result is equal to the expression
+     * {@code blend(broadcast(e),m)}.
+     *
+     * @param e the input scalar, containing the replacement lane value
+     * @param m the mask controlling lane selection of the scalar
+     * @return the result of blending the lane elements of this vector with
+     *         the scalar value
+     */
+    @ForceInline
+    public final HalffloatVector blend(Float16 e,
+                                            VectorMask<Float16> m) {
+        return blend(broadcast(e), m);
+    }
+
+    /**
+     * Replaces selected lanes of this vector with
+     * a scalar value
+     * under the control of a mask.
+     *
+     * This is a masked lane-wise binary operation which
+     * selects each lane value from one or the other input.
+     *
+     * The returned result is equal to the expression
+     * {@code blend(broadcast(e),m)}.
+     *
+     * @param e the input scalar, containing the replacement lane value
+     * @param m the mask controlling lane selection of the scalar
+     * @return the result of blending the lane elements of this vector with
+     *         the scalar value
+     */
+    @ForceInline
+    public final HalffloatVector blend(long e,
+                                            VectorMask<Float16> m) {
+        return blend(broadcast(e), m);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector slice(int origin, Vector<Float16> v1);
+
+    /*package-private*/
+    final
+    @ForceInline
+    HalffloatVector sliceTemplate(int origin, Vector<Float16> v1) {
+        HalffloatVector that = (HalffloatVector) v1;
+        that.check(this);
+        Objects.checkIndex(origin, length() + 1);
+        VectorShuffle<Float16> iota = iotaShuffle();
+        Float16 pivotidx = Float16.valueOf(length() - origin);
+        VectorMask<Float16> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
+        iota = iotaShuffle(origin, 1, true);
+        return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    HalffloatVector slice(int origin,
+                               Vector<Float16> w,
+                               VectorMask<Float16> m) {
+        return broadcast(0).blend(slice(origin, w), m);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector slice(int origin);
+
+    /*package-private*/
+    final
+    @ForceInline
+    HalffloatVector sliceTemplate(int origin) {
+        Objects.checkIndex(origin, length() + 1);
+        VectorShuffle<Float16> iota = iotaShuffle();
+        Float16 pivotidx = Float16.valueOf(length() - origin);
+        VectorMask<Float16> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
+        iota = iotaShuffle(origin, 1, true);
+        return vspecies().zero().blend(this.rearrange(iota), blendMask);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector unslice(int origin, Vector<Float16> w, int part);
+
+    /*package-private*/
+    final
+    @ForceInline
+    HalffloatVector
+    unsliceTemplate(int origin, Vector<Float16> w, int part) {
+        HalffloatVector that = (HalffloatVector) w;
+        that.check(this);
+        Objects.checkIndex(origin, length() + 1);
+        VectorShuffle<Float16> iota = iotaShuffle();
+        VectorMask<Float16> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT,
+                                                                  (broadcast(Float16.valueOf(origin))));
+        iota = iotaShuffle(-origin, 1, true);
+        return that.blend(this.rearrange(iota), blendMask);
+    }
+
+    /*package-private*/
+    final
+    @ForceInline
+    <M extends VectorMask<Float16>>
+    HalffloatVector
+    unsliceTemplate(Class<M> maskType, int origin, Vector<Float16> w, int part, M m) {
+        HalffloatVector that = (HalffloatVector) w;
+        that.check(this);
+        HalffloatVector slice = that.sliceTemplate(origin, that);
+        slice = slice.blendTemplate(maskType, this, m);
+        return slice.unsliceTemplate(origin, w, part);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector unslice(int origin, Vector<Float16> w, int part, VectorMask<Float16> m);
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector unslice(int origin);
+
+    /*package-private*/
+    final
+    @ForceInline
+    HalffloatVector
+    unsliceTemplate(int origin) {
+        Objects.checkIndex(origin, length() + 1);
+        VectorShuffle<Float16> iota = iotaShuffle();
+        VectorMask<Float16> blendMask = iota.toVector().compare(VectorOperators.GE,
+                                                                  broadcast(Float16.valueOf(origin)));
+        iota = iotaShuffle(-origin, 1, true);
+        return vspecies().zero().blend(this.rearrange(iota), blendMask);
+    }
+
+    private ArrayIndexOutOfBoundsException
+    wrongPartForSlice(int part) {
+        String msg = String.format("bad part number %d for slice operation",
+                                   part);
+        return new ArrayIndexOutOfBoundsException(msg);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector rearrange(VectorShuffle<Float16> m);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <S extends VectorShuffle<Float16>>
+    HalffloatVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
+        shuffle.checkIndexes();
+        return VectorSupport.rearrangeOp(
+            getClass(), shuffletype, null, Float16.class, length(),
+            this, shuffle, null,
+            (v1, s_, m_) -> v1.uOp((i, a) -> {
+                int ei = s_.laneSource(i);
+                return v1.lane(ei);
+            }));
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector rearrange(VectorShuffle<Float16> s,
+                                   VectorMask<Float16> m);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <S extends VectorShuffle<Float16>, M extends VectorMask<Float16>>
+    HalffloatVector rearrangeTemplate(Class<S> shuffletype,
+                                           Class<M> masktype,
+                                           S shuffle,
+                                           M m) {
+
+        m.check(masktype, this);
+        VectorMask<Float16> valid = shuffle.laneIsValid();
+        if (m.andNot(valid).anyTrue()) {
+            shuffle.checkIndexes();
+            throw new AssertionError();
+        }
+        return VectorSupport.rearrangeOp(
+                   getClass(), shuffletype, masktype, Float16.class, length(),
+                   this, shuffle, m,
+                   (v1, s_, m_) -> v1.uOp((i, a) -> {
+                        int ei = s_.laneSource(i);
+                        return ei < 0  || !m_.laneIsSet(i) ? Float16.valueOf(0) : v1.lane(ei);
+                   }));
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector rearrange(VectorShuffle<Float16> s,
+                                   Vector<Float16> v);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <S extends VectorShuffle<Float16>>
+    HalffloatVector rearrangeTemplate(Class<S> shuffletype,
+                                           S shuffle,
+                                           HalffloatVector v) {
+        VectorMask<Float16> valid = shuffle.laneIsValid();
+        @SuppressWarnings("unchecked")
+        S ws = (S) shuffle.wrapIndexes();
+        HalffloatVector r0 =
+            VectorSupport.rearrangeOp(
+                getClass(), shuffletype, null, Float16.class, length(),
+                this, ws, null,
+                (v0, s_, m_) -> v0.uOp((i, a) -> {
+                    int ei = s_.laneSource(i);
+                    return v0.lane(ei);
+                }));
+        HalffloatVector r1 =
+            VectorSupport.rearrangeOp(
+                getClass(), shuffletype, null, Float16.class, length(),
+                v, ws, null,
+                (v1, s_, m_) -> v1.uOp((i, a) -> {
+                    int ei = s_.laneSource(i);
+                    return v1.lane(ei);
+                }));
+        return r1.blend(r0, valid);
+    }
+
+    @ForceInline
+    private final
+    VectorShuffle<Float16> toShuffle0(HalffloatSpecies dsp) {
+        Float16[] a = toArray();
+        int[] sa = new int[a.length];
+        for (int i = 0; i < a.length; i++) {
+            sa[i] = a[i].intValue();
+        }
+        return VectorShuffle.fromArray(dsp, sa, 0);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    VectorShuffle<Float16> toShuffleTemplate(Class<?> shuffleType) {
+        HalffloatSpecies vsp = vspecies();
+        return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
+                                     getClass(), Float16.class, length(),
+                                     shuffleType, byte.class, length(),
+                                     this, vsp,
+                                     HalffloatVector::toShuffle0);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @since 19
+     */
+    @Override
+    public abstract
+    HalffloatVector compress(VectorMask<Float16> m);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M extends AbstractMask<Float16>>
+    HalffloatVector compressTemplate(Class<M> masktype, M m) {
+      m.check(masktype, this);
+      return (HalffloatVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
+                                                        Float16.class, length(), this, m,
+                                                        (v1, m1) -> compressHelper(v1, m1));
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @since 19
+     */
+    @Override
+    public abstract
+    HalffloatVector expand(VectorMask<Float16> m);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M extends AbstractMask<Float16>>
+    HalffloatVector expandTemplate(Class<M> masktype, M m) {
+      m.check(masktype, this);
+      return (HalffloatVector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
+                                                        Float16.class, length(), this, m,
+                                                        (v1, m1) -> expandHelper(v1, m1));
+    }
+
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector selectFrom(Vector<Float16> v);
+
+    /*package-private*/
+    @ForceInline
+    final HalffloatVector selectFromTemplate(HalffloatVector v) {
+        return v.rearrange(this.toShuffle());
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector selectFrom(Vector<Float16> s, VectorMask<Float16> m);
+
+    /*package-private*/
+    @ForceInline
+    final HalffloatVector selectFromTemplate(HalffloatVector v,
+                                                  AbstractMask<Float16> m) {
+        return v.rearrange(this.toShuffle(), m);
+    }
+
+    /// Ternary operations
+
+
+    /**
+     * Multiplies this vector by a second input vector, and sums
+     * the result with a third.
+     *
+     * Extended precision is used for the intermediate result,
+     * avoiding possible loss of precision from rounding once
+     * for each of the two operations.
+     * The result is numerically close to {@code this.mul(b).add(c)},
+     * and is typically closer to the true mathematical result.
+     *
+     * This is a lane-wise ternary operation which applies an operation
+     * conforming to the specification of
+     * {@link Float16#fma(Float16,Float16,Float16) Float16.fma(a,b,c)}
+     * to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
+     *    lanewise}{@code (}{@link VectorOperators#FMA
+     *    FMA}{@code , b, c)}.
+     *
+     * @param b the second input vector, supplying multiplier values
+     * @param c the third input vector, supplying addend values
+     * @return the product of this vector and the second input vector
+     *         summed with the third input vector, using extended precision
+     *         for the intermediate result
+     * @see #fma(Float16,Float16)
+     * @see VectorOperators#FMA
+     * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
+     */
+    @ForceInline
+    public final
+    HalffloatVector fma(Vector<Float16> b, Vector<Float16> c) {
+        return lanewise(FMA, b, c);
+    }
+
+    /**
+     * Multiplies this vector by a scalar multiplier, and sums
+     * the result with a scalar addend.
+     *
+     * Extended precision is used for the intermediate result,
+     * avoiding possible loss of precision from rounding once
+     * for each of the two operations.
+     * The result is numerically close to {@code this.mul(b).add(c)},
+     * and is typically closer to the true mathematical result.
+     *
+     * This is a lane-wise ternary operation which applies an operation
+     * conforming to the specification of
+     * {@link Float16#fma(Float16,Float16,Float16) Float16.fma(a,b,c)}
+     * to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
+     *    lanewise}{@code (}{@link VectorOperators#FMA
+     *    FMA}{@code , b, c)}.
+     *
+     * @param b the scalar multiplier
+     * @param c the scalar addend
+     * @return the product of this vector and the scalar multiplier
+     *         summed with scalar addend, using extended precision
+     *         for the intermediate result
+     * @see #fma(Vector,Vector)
+     * @see VectorOperators#FMA
+     * @see #lanewise(VectorOperators.Ternary,Float16,Float16,VectorMask)
+     */
+    @ForceInline
+    public final
+    HalffloatVector fma(Float16 b, Float16 c) {
+        return lanewise(FMA, b, c);
+    }
+
+    // Don't bother with (Vector,Float16) and (Float16,Vector) overloadings.
+
+    // Type specific horizontal reductions
+
+    /**
+     * Returns a value accumulated from all the lanes of this vector.
+     *
+     * This is an associative cross-lane reduction operation which
+     * applies the specified operation to all the lane elements.
+     * <p>
+     * A few reduction operations do not support arbitrary reordering
+     * of their operands, yet are included here because of their
+     * usefulness.
+     * <ul>
+     * <li>
+     * In the case of {@code FIRST_NONZERO}, the reduction returns
+     * the value from the lowest-numbered non-zero lane.
+     * (As with {@code MAX} and {@code MIN}, floating point negative
+     * zero {@code -0.0} is treated as a value distinct from
+     * the default value, positive zero. So a first-nonzero lane reduction
+     * might return {@code -0.0} even in the presence of non-zero
+     * lane values.)
+     * <li>
+     * In the case of {@code ADD} and {@code MUL}, the
+     * precise result will reflect the choice of an arbitrary order
+     * of operations, which may even vary over time.
+     * For further details see the section
+     * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>.
+     * <li>
+     * All other reduction operations are fully commutative and
+     * associative.  The implementation can choose any order of
+     * processing, yet it will always produce the same result.
+     * </ul>
+     *
+     * @param op the operation used to combine lane values
+     * @return the accumulated result
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #reduceLanes(VectorOperators.Associative,VectorMask)
+     * @see #add(Vector)
+     * @see #mul(Vector)
+     * @see #min(Vector)
+     * @see #max(Vector)
+     * @see VectorOperators#FIRST_NONZERO
+     */
+    public abstract Float16 reduceLanes(VectorOperators.Associative op);
+
+    /**
+     * Returns a value accumulated from selected lanes of this vector,
+     * controlled by a mask.
+     *
+     * This is an associative cross-lane reduction operation which
+     * applies the specified operation to the selected lane elements.
+     * <p>
+     * If no elements are selected, an operation-specific identity
+     * value is returned.
+     * <ul>
+     * <li>
+     * If the operation is
+     *  {@code ADD}
+     * or {@code FIRST_NONZERO},
+     * then the identity value is positive zero, the default {@code Float16} value.
+     * <li>
+     * If the operation is {@code MUL},
+     * then the identity value is one.
+     * <li>
+     * If the operation is {@code MAX},
+     * then the identity value is {@code Float16.NEGATIVE_INFINITY}.
+     * <li>
+     * If the operation is {@code MIN},
+     * then the identity value is {@code Float16.POSITIVE_INFINITY}.
+     * </ul>
+     * <p>
+     * A few reduction operations do not support arbitrary reordering
+     * of their operands, yet are included here because of their
+     * usefulness.
+     * <ul>
+     * <li>
+     * In the case of {@code FIRST_NONZERO}, the reduction returns
+     * the value from the lowest-numbered non-zero lane.
+     * (As with {@code MAX} and {@code MIN}, floating point negative
+     * zero {@code -0.0} is treated as a value distinct from
+     * the default value, positive zero. So a first-nonzero lane reduction
+     * might return {@code -0.0} even in the presence of non-zero
+     * lane values.)
+     * <li>
+     * In the case of {@code ADD} and {@code MUL}, the
+     * precise result will reflect the choice of an arbitrary order
+     * of operations, which may even vary over time.
+     * For further details see the section
+     * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>.
+     * <li>
+     * All other reduction operations are fully commutative and
+     * associative.  The implementation can choose any order of
+     * processing, yet it will always produce the same result.
+     * </ul>
+     *
+     * @param op the operation used to combine lane values
+     * @param m the mask controlling lane selection
+     * @return the reduced result accumulated from the selected lane values
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #reduceLanes(VectorOperators.Associative)
+     */
+    public abstract Float16 reduceLanes(VectorOperators.Associative op,
+                                       VectorMask<Float16> m);
+
+    /*package-private*/
+    @ForceInline
+    final
+    Float16 reduceLanesTemplate(VectorOperators.Associative op,
+                               Class<? extends VectorMask<Float16>> maskClass,
+                               VectorMask<Float16> m) {
+        m.check(maskClass, this);
+        if (op == FIRST_NONZERO) {
+            // FIXME:  The JIT should handle this.
+            HalffloatVector v = broadcast(Float16.valueOf(0)).blend(this, m);
+            return v.reduceLanesTemplate(op);
+        }
+        int opc = opCode(op);
+        return fromBits(VectorSupport.reductionCoerced(
+            opc, getClass(), maskClass, Float16.class, length(),
+            this, m,
+            REDUCE_IMPL.find(op, opc, HalffloatVector::reductionOperations)));
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    Float16 reduceLanesTemplate(VectorOperators.Associative op) {
+        if (op == FIRST_NONZERO) {
+            // FIXME:  The JIT should handle this.
+            VectorMask<Short> thisNZ
+                = this.viewAsIntegralLanes().compare(NE, (short) 0);
+            int ft = thisNZ.firstTrue();
+            return ft < length() ? this.lane(ft) : Float16.valueOf(0);
+        }
+        int opc = opCode(op);
+        return fromBits(VectorSupport.reductionCoerced(
+            opc, getClass(), null, Float16.class, length(),
+            this, null,
+            REDUCE_IMPL.find(op, opc, HalffloatVector::reductionOperations)));
+    }
+
+    private static final
+    ImplCache<Associative, ReductionOperation<HalffloatVector, VectorMask<Float16>>>
+        REDUCE_IMPL = new ImplCache<>(Associative.class, HalffloatVector.class);
+
+    private static ReductionOperation<HalffloatVector, VectorMask<Float16>> reductionOperations(int opc_) {
+        switch (opc_) {
+            case VECTOR_OP_ADD: return (v, m) ->
+                    toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.add(a, b)));
+            case VECTOR_OP_MUL: return (v, m) ->
+                    toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.multiply(a, b)));
+            case VECTOR_OP_MIN: return (v, m) ->
+                    toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.min(a, b)));
+            case VECTOR_OP_MAX: return (v, m) ->
+                    toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.max(a, b)));
+            default: return null;
+        }
+    }
+
+    private static final Float16 MIN_OR_INF = Float16.NEGATIVE_INFINITY;
+    private static final Float16 MAX_OR_INF = Float16.POSITIVE_INFINITY;
+
+    public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
+    public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
+                                                     VectorMask<Float16> m);
+
+    // Type specific accessors
+
+    /**
+     * Gets the lane element at lane index {@code i}
+     *
+     * @param i the lane index
+     * @return the lane element at lane index {@code i}
+     * @throws IllegalArgumentException if the index is out of range
+     * ({@code < 0 || >= length()})
+     */
+    public abstract Float16 lane(int i);
+
+    /**
+     * Replaces the lane element of this vector at lane index {@code i} with
+     * value {@code e}.
+     *
+     * This is a cross-lane operation and behaves as if it returns the result
+     * of blending this vector with an input vector that is the result of
+     * broadcasting {@code e} and a mask that has only one lane set at lane
+     * index {@code i}.
+     *
+     * @param i the lane index of the lane element to be replaced
+     * @param e the value to be placed
+     * @return the result of replacing the lane element of this vector at lane
+     * index {@code i} with value {@code e}.
+     * @throws IllegalArgumentException if the index is out of range
+     * ({@code < 0 || >= length()})
+     */
+    public abstract HalffloatVector withLane(int i, Float16 e);
+
+    // Memory load operations
+
+    /**
+     * Returns an array of type {@code Float16[]}
+     * containing all the lane values.
+     * The array length is the same as the vector length.
+     * The array elements are stored in lane order.
+     * <p>
+     * This method behaves as if it stores
+     * this vector into an allocated array
+     * (using {@link #intoArray(Float16[], int) intoArray})
+     * and returns the array as follows:
+     * <pre>{@code
+     *   Float16[] a = new Float16[this.length()];
+     *   this.intoArray(a, 0);
+     *   return a;
+     * }</pre>
+     *
+     * @return an array containing the lane values of this vector
+     */
+    @ForceInline
+    @Override
+    public final Float16[] toArray() {
+        Float16[] a = new Float16[vspecies().laneCount()];
+        intoArray(a, 0);
+        return a;
+    }
+
+    /** {@inheritDoc} <!--workaround-->
+     */
+    @ForceInline
+    @Override
+    public final int[] toIntArray() {
+        Float16[] a = toArray();
+        int[] res = new int[a.length];
+        for (int i = 0; i < a.length; i++) {
+            Float16 e = a[i];
+            res[i] = (int) HalffloatSpecies.toIntegralChecked(e, true);
+        }
+        return res;
+    }
+
+    /** {@inheritDoc} <!--workaround-->
+     */
+    @ForceInline
+    @Override
+    public final long[] toLongArray() {
+        Float16[] a = toArray();
+        long[] res = new long[a.length];
+        for (int i = 0; i < a.length; i++) {
+            // Value range of integral casted Float16 value is a proper subset of
+            // long value range.
+            res[i] = a[i].longValue();
+        }
+        return res;
+    }
+
+    /** {@inheritDoc} <!--workaround-->
+     * @implNote
+     * When this method is used on used on vectors
+     * of type {@code HalffloatVector},
+     * there will be no loss of precision.
+     */
+    @ForceInline
+    @Override
+    public final double[] toDoubleArray() {
+        Float16[] a = toArray();
+        double[] res = new double[a.length];
+        for (int i = 0; i < a.length; i++) {
+            res[i] = a[i].doubleValue();
+        }
+        return res;
+    }
+
+    /**
+     * Loads a vector from an array of type {@code Float16[]}
+     * starting at an offset.
+     * For each vector lane, where {@code N} is the vector lane index, the
+     * array element at index {@code offset + N} is placed into the
+     * resulting vector at lane index {@code N}.
+     *
+     * @param species species of desired vector
+     * @param a the array
+     * @param offset the offset into the array
+     * @return the vector loaded from an array
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
+     *         for any lane {@code N} in the vector
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromArray(VectorSpecies<Float16> species,
+                                   Float16[] a, int offset) {
+        offset = checkFromIndexSize(offset, species.length(), a.length);
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.dummyVector().fromArray0(a, offset);
+    }
+
+    /**
+     * Loads a vector from an array of type {@code Float16[]}
+     * starting at an offset and using a mask.
+     * Lanes where the mask is unset are filled with the default
+     * value of {@code Float16} (positive zero).
+     * For each vector lane, where {@code N} is the vector lane index,
+     * if the mask lane at index {@code N} is set then the array element at
+     * index {@code offset + N} is placed into the resulting vector at lane index
+     * {@code N}, otherwise the default element value is placed into the
+     * resulting vector at lane index {@code N}.
+     *
+     * @param species species of desired vector
+     * @param a the array
+     * @param offset the offset into the array
+     * @param m the mask controlling lane selection
+     * @return the vector loaded from an array
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
+     *         for any lane {@code N} in the vector
+     *         where the mask is set
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromArray(VectorSpecies<Float16> species,
+                                   Float16[] a, int offset,
+                                   VectorMask<Float16> m) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        if (VectorIntrinsics.indexInRange(offset, vsp.length(), a.length)) {
+            return vsp.dummyVector().fromArray0(a, offset, m, OFFSET_IN_RANGE);
+        }
+
+        checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
+        return vsp.dummyVector().fromArray0(a, offset, m, OFFSET_OUT_OF_RANGE);
+    }
+
+    /**
+     * Gathers a new vector composed of elements from an array of type
+     * {@code Float16[]},
+     * using indexes obtained by adding a fixed {@code offset} to a
+     * series of secondary offsets from an <em>index map</em>.
+     * The index map is a contiguous sequence of {@code VLENGTH}
+     * elements in a second array of {@code int}s, starting at a given
+     * {@code mapOffset}.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * the lane is loaded from the array
+     * element {@code a[f(N)]}, where {@code f(N)} is the
+     * index mapping expression
+     * {@code offset + indexMap[mapOffset + N]]}.
+     *
+     * @param species species of desired vector
+     * @param a the array
+     * @param offset the offset into the array, may be negative if relative
+     * indexes in the index map compensate to produce a value within the
+     * array bounds
+     * @param indexMap the index map
+     * @param mapOffset the offset into the index map
+     * @return the vector loaded from the indexed elements of the array
+     * @throws IndexOutOfBoundsException
+     *         if {@code mapOffset+N < 0}
+     *         or if {@code mapOffset+N >= indexMap.length},
+     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
+     *         is an invalid index into {@code a},
+     *         for any lane {@code N} in the vector
+     * @see HalffloatVector#toIntArray()
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromArray(VectorSpecies<Float16> species,
+                                   Float16[] a, int offset,
+                                   int[] indexMap, int mapOffset) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]);
+    }
+
+    /**
+     * Gathers a new vector composed of elements from an array of type
+     * {@code Float16[]},
+     * under the control of a mask, and
+     * using indexes obtained by adding a fixed {@code offset} to a
+     * series of secondary offsets from an <em>index map</em>.
+     * The index map is a contiguous sequence of {@code VLENGTH}
+     * elements in a second array of {@code int}s, starting at a given
+     * {@code mapOffset}.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * if the lane is set in the mask,
+     * the lane is loaded from the array
+     * element {@code a[f(N)]}, where {@code f(N)} is the
+     * index mapping expression
+     * {@code offset + indexMap[mapOffset + N]]}.
+     * Unset lanes in the resulting vector are set to zero.
+     *
+     * @param species species of desired vector
+     * @param a the array
+     * @param offset the offset into the array, may be negative if relative
+     * indexes in the index map compensate to produce a value within the
+     * array bounds
+     * @param indexMap the index map
+     * @param mapOffset the offset into the index map
+     * @param m the mask controlling lane selection
+     * @return the vector loaded from the indexed elements of the array
+     * @throws IndexOutOfBoundsException
+     *         if {@code mapOffset+N < 0}
+     *         or if {@code mapOffset+N >= indexMap.length},
+     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
+     *         is an invalid index into {@code a},
+     *         for any lane {@code N} in the vector
+     *         where the mask is set
+     * @see HalffloatVector#toIntArray()
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromArray(VectorSpecies<Float16> species,
+                                   Float16[] a, int offset,
+                                   int[] indexMap, int mapOffset,
+                                   VectorMask<Float16> m) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
+    }
+
+
+
+    /**
+     * Loads a vector from a {@linkplain MemorySegment memory segment}
+     * starting at an offset into the memory segment.
+     * Bytes are composed into primitive lane elements according
+     * to the specified byte order.
+     * The vector is arranged into lanes according to
+     * <a href="Vector.html#lane-order">memory ordering</a>.
+     * <p>
+     * This method behaves as if it returns the result of calling
+     * {@link #fromMemorySegment(VectorSpecies,MemorySegment,long,ByteOrder,VectorMask)
+     * fromMemorySegment()} as follows:
+     * <pre>{@code
+     * var m = species.maskAll(true);
+     * return fromMemorySegment(species, ms, offset, bo, m);
+     * }</pre>
+     *
+     * @param species species of desired vector
+     * @param ms the memory segment
+     * @param offset the offset into the memory segment
+     * @param bo the intended byte order
+     * @return a vector loaded from the memory segment
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N*2 < 0}
+     *         or {@code offset+N*2 >= ms.byteSize()}
+     *         for any lane {@code N} in the vector
+     * @throws IllegalStateException if the memory segment's session is not alive,
+     *         or if access occurs from a thread other than the thread owning the session.
+     * @since 19
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromMemorySegment(VectorSpecies<Float16> species,
+                                           MemorySegment ms, long offset,
+                                           ByteOrder bo) {
+        offset = checkFromIndexSize(offset, species.vectorByteSize(), ms.byteSize());
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.dummyVector().fromMemorySegment0(ms, offset).maybeSwap(bo);
+    }
+
+    /**
+     * Loads a vector from a {@linkplain MemorySegment memory segment}
+     * starting at an offset into the memory segment
+     * and using a mask.
+     * Lanes where the mask is unset are filled with the default
+     * value of {@code Float16} (positive zero).
+     * Bytes are composed into primitive lane elements according
+     * to the specified byte order.
+     * The vector is arranged into lanes according to
+     * <a href="Vector.html#lane-order">memory ordering</a>.
+     * <p>
+     * The following pseudocode illustrates the behavior:
+     * <pre>{@code
+     * var slice = ms.asSlice(offset);
+     * Float16[] ar = new Float16[species.length()];
+     * for (int n = 0; n < ar.length; n++) {
+     *     if (m.laneIsSet(n)) {
+     *         ar[n] = slice.getAtIndex(ValuaLayout.JAVA_HALFFLOAT.withByteAlignment(1), n);
+     *     }
+     * }
+     * HalffloatVector r = HalffloatVector.fromArray(species, ar, 0);
+     * }</pre>
+     * @implNote
+     * This operation is likely to be more efficient if
+     * the specified byte order is the same as
+     * {@linkplain ByteOrder#nativeOrder()
+     * the platform native order},
+     * since this method will not need to reorder
+     * the bytes of lane values.
+     *
+     * @param species species of desired vector
+     * @param ms the memory segment
+     * @param offset the offset into the memory segment
+     * @param bo the intended byte order
+     * @param m the mask controlling lane selection
+     * @return a vector loaded from the memory segment
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N*2 < 0}
+     *         or {@code offset+N*2 >= ms.byteSize()}
+     *         for any lane {@code N} in the vector
+     *         where the mask is set
+     * @throws IllegalStateException if the memory segment's session is not alive,
+     *         or if access occurs from a thread other than the thread owning the session.
+     * @since 19
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromMemorySegment(VectorSpecies<Float16> species,
+                                           MemorySegment ms, long offset,
+                                           ByteOrder bo,
+                                           VectorMask<Float16> m) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        if (VectorIntrinsics.indexInRange(offset, vsp.vectorByteSize(), ms.byteSize())) {
+            return vsp.dummyVector().fromMemorySegment0(ms, offset, m, OFFSET_IN_RANGE).maybeSwap(bo);
+        }
+
+        checkMaskFromIndexSize(offset, vsp, m, 2, ms.byteSize());
+        return vsp.dummyVector().fromMemorySegment0(ms, offset, m, OFFSET_OUT_OF_RANGE).maybeSwap(bo);
+    }
+
+    // Memory store operations
+
+    /**
+     * Stores this vector into an array of type {@code Float16[]}
+     * starting at an offset.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * the lane element at index {@code N} is stored into the array
+     * element {@code a[offset+N]}.
+     *
+     * @param a the array, of type {@code Float16[]}
+     * @param offset the offset into the array
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
+     *         for any lane {@code N} in the vector
+     */
+    @ForceInline
+    public final
+    void intoArray(Float16[] a, int offset) {
+        offset = checkFromIndexSize(offset, length(), a.length);
+        HalffloatSpecies vsp = vspecies();
+        VectorSupport.store(
+            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
+            a, arrayAddress(a, offset), false,
+            this,
+            a, offset,
+            (arr, off, v)
+            -> v.stOp(arr, (int) off,
+                      (arr_, off_, i, e) -> arr_[off_ + i] = e));
+    }
+
+    /**
+     * Stores this vector into an array of type {@code Float16[]}
+     * starting at offset and using a mask.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * the lane element at index {@code N} is stored into the array
+     * element {@code a[offset+N]}.
+     * If the mask lane at {@code N} is unset then the corresponding
+     * array element {@code a[offset+N]} is left unchanged.
+     * <p>
+     * Array range checking is done for lanes where the mask is set.
+     * Lanes where the mask is unset are not stored and do not need
+     * to correspond to legitimate elements of {@code a}.
+     * That is, unset lanes may correspond to array indexes less than
+     * zero or beyond the end of the array.
+     *
+     * @param a the array, of type {@code Float16[]}
+     * @param offset the offset into the array
+     * @param m the mask controlling lane storage
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
+     *         for any lane {@code N} in the vector
+     *         where the mask is set
+     */
+    @ForceInline
+    public final
+    void intoArray(Float16[] a, int offset,
+                   VectorMask<Float16> m) {
+        if (m.allTrue()) {
+            intoArray(a, offset);
+        } else {
+            HalffloatSpecies vsp = vspecies();
+            if (!VectorIntrinsics.indexInRange(offset, vsp.length(), a.length)) {
+                checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
+            }
+            intoArray0(a, offset, m);
+        }
+    }
+
+    /**
+     * Scatters this vector into an array of type {@code Float16[]}
+     * using indexes obtained by adding a fixed {@code offset} to a
+     * series of secondary offsets from an <em>index map</em>.
+     * The index map is a contiguous sequence of {@code VLENGTH}
+     * elements in a second array of {@code int}s, starting at a given
+     * {@code mapOffset}.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * the lane element at index {@code N} is stored into the array
+     * element {@code a[f(N)]}, where {@code f(N)} is the
+     * index mapping expression
+     * {@code offset + indexMap[mapOffset + N]]}.
+     *
+     * @param a the array
+     * @param offset an offset to combine with the index map offsets
+     * @param indexMap the index map
+     * @param mapOffset the offset into the index map
+     * @throws IndexOutOfBoundsException
+     *         if {@code mapOffset+N < 0}
+     *         or if {@code mapOffset+N >= indexMap.length},
+     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
+     *         is an invalid index into {@code a},
+     *         for any lane {@code N} in the vector
+     * @see HalffloatVector#toIntArray()
+     */
+    @ForceInline
+    public final
+    void intoArray(Float16[] a, int offset,
+                   int[] indexMap, int mapOffset) {
+        stOp(a, offset,
+             (arr, off, i, e) -> {
+                 int j = indexMap[mapOffset + i];
+                 arr[off + j] = e;
+             });
+    }
+
+    /**
+     * Scatters this vector into an array of type {@code Float16[]},
+     * under the control of a mask, and
+     * using indexes obtained by adding a fixed {@code offset} to a
+     * series of secondary offsets from an <em>index map</em>.
+     * The index map is a contiguous sequence of {@code VLENGTH}
+     * elements in a second array of {@code int}s, starting at a given
+     * {@code mapOffset}.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * if the mask lane at index {@code N} is set then
+     * the lane element at index {@code N} is stored into the array
+     * element {@code a[f(N)]}, where {@code f(N)} is the
+     * index mapping expression
+     * {@code offset + indexMap[mapOffset + N]]}.
+     *
+     * @param a the array
+     * @param offset an offset to combine with the index map offsets
+     * @param indexMap the index map
+     * @param mapOffset the offset into the index map
+     * @param m the mask
+     * @throws IndexOutOfBoundsException
+     *         if {@code mapOffset+N < 0}
+     *         or if {@code mapOffset+N >= indexMap.length},
+     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
+     *         is an invalid index into {@code a},
+     *         for any lane {@code N} in the vector
+     *         where the mask is set
+     * @see HalffloatVector#toIntArray()
+     */
+    @ForceInline
+    public final
+    void intoArray(Float16[] a, int offset,
+                   int[] indexMap, int mapOffset,
+                   VectorMask<Float16> m) {
+        stOp(a, offset, m,
+             (arr, off, i, e) -> {
+                 int j = indexMap[mapOffset + i];
+                 arr[off + j] = e;
+             });
+    }
+
+
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @since 19
+     */
+    @Override
+    @ForceInline
+    public final
+    void intoMemorySegment(MemorySegment ms, long offset,
+                           ByteOrder bo) {
+        if (ms.isReadOnly()) {
+            throw new UnsupportedOperationException("Attempt to write a read-only segment");
+        }
+
+        offset = checkFromIndexSize(offset, byteSize(), ms.byteSize());
+        maybeSwap(bo).intoMemorySegment0(ms, offset);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @since 19
+     */
+    @Override
+    @ForceInline
+    public final
+    void intoMemorySegment(MemorySegment ms, long offset,
+                           ByteOrder bo,
+                           VectorMask<Float16> m) {
+        if (m.allTrue()) {
+            intoMemorySegment(ms, offset, bo);
+        } else {
+            if (ms.isReadOnly()) {
+                throw new UnsupportedOperationException("Attempt to write a read-only segment");
+            }
+            HalffloatSpecies vsp = vspecies();
+            if (!VectorIntrinsics.indexInRange(offset, vsp.vectorByteSize(), ms.byteSize())) {
+                checkMaskFromIndexSize(offset, vsp, m, 2, ms.byteSize());
+            }
+            maybeSwap(bo).intoMemorySegment0(ms, offset, m);
+        }
+    }
+
+    // ================================================
+
+    // Low-level memory operations.
+    //
+    // Note that all of these operations *must* inline into a context
+    // where the exact species of the involved vector is a
+    // compile-time constant.  Otherwise, the intrinsic generation
+    // will fail and performance will suffer.
+    //
+    // In many cases this is achieved by re-deriving a version of the
+    // method in each concrete subclass (per species).  The re-derived
+    // method simply calls one of these generic methods, with exact
+    // parameters for the controlling metadata, which is either a
+    // typed vector or constant species instance.
+
+    // Unchecked loading operations in native byte order.
+    // Caller is responsible for applying index checks, masking, and
+    // byte swapping.
+
+    /*package-private*/
+    abstract
+    HalffloatVector fromArray0(Float16[] a, int offset);
+    @ForceInline
+    final
+    HalffloatVector fromArray0Template(Float16[] a, int offset) {
+        HalffloatSpecies vsp = vspecies();
+        return VectorSupport.load(
+            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
+            a, arrayAddress(a, offset), false,
+            a, offset, vsp,
+            (arr, off, s) -> s.ldOp(arr, (int) off,
+                                    (arr_, off_, i) -> arr_[off_ + i]));
+    }
+
+    /*package-private*/
+    abstract
+    HalffloatVector fromArray0(Float16[] a, int offset, VectorMask<Float16> m, int offsetInRange);
+    @ForceInline
+    final
+    <M extends VectorMask<Float16>>
+    HalffloatVector fromArray0Template(Class<M> maskClass, Float16[] a, int offset, M m, int offsetInRange) {
+        m.check(species());
+        HalffloatSpecies vsp = vspecies();
+        return VectorSupport.loadMasked(
+            vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+            a, arrayAddress(a, offset), false, m, offsetInRange,
+            a, offset, vsp,
+            (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
+                                        (arr_, off_, i) -> arr_[off_ + i]));
+    }
+
+
+
+
+    abstract
+    HalffloatVector fromMemorySegment0(MemorySegment bb, long offset);
+    @ForceInline
+    final
+    HalffloatVector fromMemorySegment0Template(MemorySegment ms, long offset) {
+        HalffloatSpecies vsp = vspecies();
+        return ScopedMemoryAccess.loadFromMemorySegment(
+                vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
+                (AbstractMemorySegmentImpl) ms, offset, vsp,
+                (msp, off, s) -> {
+                    return s.ldLongOp((MemorySegment) msp, off, HalffloatVector::memorySegmentGet);
+                });
+    }
+
+    abstract
+    HalffloatVector fromMemorySegment0(MemorySegment ms, long offset, VectorMask<Float16> m, int offsetInRange);
+    @ForceInline
+    final
+    <M extends VectorMask<Float16>>
+    HalffloatVector fromMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m, int offsetInRange) {
+        HalffloatSpecies vsp = vspecies();
+        m.check(vsp);
+        return ScopedMemoryAccess.loadFromMemorySegmentMasked(
+                vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+                (AbstractMemorySegmentImpl) ms, offset, m, vsp, offsetInRange,
+                (msp, off, s, vm) -> {
+                    return s.ldLongOp((MemorySegment) msp, off, vm, HalffloatVector::memorySegmentGet);
+                });
+    }
+
+    // Unchecked storing operations in native byte order.
+    // Caller is responsible for applying index checks, masking, and
+    // byte swapping.
+
+    abstract
+    void intoArray0(Float16[] a, int offset);
+    @ForceInline
+    final
+    void intoArray0Template(Float16[] a, int offset) {
+        HalffloatSpecies vsp = vspecies();
+        VectorSupport.store(
+            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
+            a, arrayAddress(a, offset), false,
+            this, a, offset,
+            (arr, off, v)
+            -> v.stOp(arr, (int) off,
+                      (arr_, off_, i, e) -> arr_[off_+i] = e));
+    }
+
+    abstract
+    void intoArray0(Float16[] a, int offset, VectorMask<Float16> m);
+    @ForceInline
+    final
+    <M extends VectorMask<Float16>>
+    void intoArray0Template(Class<M> maskClass, Float16[] a, int offset, M m) {
+        m.check(species());
+        HalffloatSpecies vsp = vspecies();
+        VectorSupport.storeMasked(
+            vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+            a, arrayAddress(a, offset), false,
+            this, m, a, offset,
+            (arr, off, v, vm)
+            -> v.stOp(arr, (int) off, vm,
+                      (arr_, off_, i, e) -> arr_[off_ + i] = e));
+    }
+
+
+
+    @ForceInline
+    final
+    void intoMemorySegment0(MemorySegment ms, long offset) {
+        HalffloatSpecies vsp = vspecies();
+        ScopedMemoryAccess.storeIntoMemorySegment(
+                vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
+                this,
+                (AbstractMemorySegmentImpl) ms, offset,
+                (msp, off, v) -> {
+                    v.stLongOp((MemorySegment) msp, off, HalffloatVector::memorySegmentSet);
+                });
+    }
+
+    abstract
+    void intoMemorySegment0(MemorySegment bb, long offset, VectorMask<Float16> m);
+    @ForceInline
+    final
+    <M extends VectorMask<Float16>>
+    void intoMemorySegment0Template(Class<M> maskClass, MemorySegment ms, long offset, M m) {
+        HalffloatSpecies vsp = vspecies();
+        m.check(vsp);
+        ScopedMemoryAccess.storeIntoMemorySegmentMasked(
+                vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+                this, m,
+                (AbstractMemorySegmentImpl) ms, offset,
+                (msp, off, v, vm) -> {
+                    v.stLongOp((MemorySegment) msp, off, vm, HalffloatVector::memorySegmentSet);
+                });
+    }
+
+
+    // End of low-level memory operations.
+
+    private static
+    void checkMaskFromIndexSize(int offset,
+                                HalffloatSpecies vsp,
+                                VectorMask<Float16> m,
+                                int scale,
+                                int limit) {
+        ((AbstractMask<Float16>)m)
+            .checkIndexByLane(offset, limit, vsp.iota(), scale);
+    }
+
+    private static
+    void checkMaskFromIndexSize(long offset,
+                                HalffloatSpecies vsp,
+                                VectorMask<Float16> m,
+                                int scale,
+                                long limit) {
+        ((AbstractMask<Float16>)m)
+            .checkIndexByLane(offset, limit, vsp.iota(), scale);
+    }
+
+    @ForceInline
+    private void conditionalStoreNYI(int offset,
+                                     HalffloatSpecies vsp,
+                                     VectorMask<Float16> m,
+                                     int scale,
+                                     int limit) {
+        if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
+            String msg =
+                String.format("unimplemented: store @%d in [0..%d), %s in %s",
+                              offset, limit, m, vsp);
+            throw new AssertionError(msg);
+        }
+    }
+
+    /*package-private*/
+    @Override
+    @ForceInline
+    final
+    HalffloatVector maybeSwap(ByteOrder bo) {
+        if (bo != NATIVE_ENDIAN) {
+            return this.reinterpretAsBytes()
+                .rearrange(swapBytesShuffle())
+                .reinterpretAsHalffloats();
+        }
+        return this;
+    }
+
+    static final int ARRAY_SHIFT =
+        31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_OBJECT_INDEX_SCALE);
+    static final long ARRAY_BASE =
+        Unsafe.ARRAY_OBJECT_BASE_OFFSET;
+
+    @ForceInline
+    static long arrayAddress(Float16[] a, int index) {
+        return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
+    }
+
+
+
+    @ForceInline
+    static long byteArrayAddress(byte[] a, int index) {
+        return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
+    }
+
+    // ================================================
+
+    /// Reinterpreting view methods:
+    //   lanewise reinterpret: viewAsXVector()
+    //   keep shape, redraw lanes: reinterpretAsEs()
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @ForceInline
+    @Override
+    public final ByteVector reinterpretAsBytes() {
+         // Going to ByteVector, pay close attention to byte order.
+         assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
+         return asByteVectorRaw();
+         //return asByteVectorRaw().rearrange(swapBytesShuffle());
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @ForceInline
+    @Override
+    public final ShortVector viewAsIntegralLanes() {
+        LaneType ilt = LaneType.FLOAT16.asIntegral();
+        return (ShortVector) asVectorRaw(ilt);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     *
+     * @implNote This method always throws
+     * {@code UnsupportedOperationException}, because there is no floating
+     * point type of the same size as {@code Float16}.  The return type
+     * of this method is arbitrarily designated as
+     * {@code Vector<?>}.  Future versions of this API may change the return
+     * type if additional floating point types become available.
+     */
+    @ForceInline
+    @Override
+    public final
+    HalffloatVector
+    viewAsFloatingLanes() {
+        return this;
+    }
+
+    // ================================================
+
+    /// Object methods: toString, equals, hashCode
+    //
+    // Object methods are defined as if via Arrays.toString, etc.,
+    // is applied to the array of elements.  Two equal vectors
+    // are required to have equal species and equal lane values.
+
+    /**
+     * Returns a string representation of this vector, of the form
+     * {@code "[0,1,2...]"}, reporting the lane values of this vector,
+     * in lane order.
+     *
+     * The string is produced as if by a call to {@link
+     * java.util.Arrays#toString(Float16[]) Arrays.toString()},
+     * as appropriate to the {@code Float16} array returned by
+     * {@link #toArray this.toArray()}.
+     *
+     * @return a string of the form {@code "[0,1,2...]"}
+     * reporting the lane values of this vector
+     */
+    @Override
+    @ForceInline
+    public final
+    String toString() {
+        // now that toArray is strongly typed, we can define this
+        return Arrays.toString(toArray());
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    boolean equals(Object obj) {
+        if (obj instanceof Vector) {
+            Vector<?> that = (Vector<?>) obj;
+            if (this.species().equals(that.species())) {
+                return this.eq(that.check(this.species())).allTrue();
+            }
+        }
+        return false;
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    int hashCode() {
+        // now that toArray is strongly typed, we can define this
+        return Objects.hash(species(), Arrays.hashCode(toArray()));
+    }
+
+    // ================================================
+
+    // Species
+
+    /**
+     * Class representing {@link HalffloatVector}'s of the same {@link VectorShape VectorShape}.
+     */
+    /*package-private*/
+    static final class HalffloatSpecies extends AbstractSpecies<Float16> {
+        private HalffloatSpecies(VectorShape shape,
+                Class<? extends HalffloatVector> vectorType,
+                Class<? extends AbstractMask<Float16>> maskType,
+                Function<Object, HalffloatVector> vectorFactory) {
+            super(shape, LaneType.of(Float16.class),
+                  vectorType, maskType,
+                  vectorFactory);
+            assert(this.elementSize() == Float16.SIZE);
+        }
+
+        // Specializing overrides:
+
+        @Override
+        @ForceInline
+        public final Class<Float16> elementType() {
+            return Float16.class;
+        }
+
+        @Override
+        @ForceInline
+        final Class<Float16> genericElementType() {
+            return Float16.class;
+        }
+
+        @SuppressWarnings("unchecked")
+        @Override
+        @ForceInline
+        public final Class<? extends HalffloatVector> vectorType() {
+            return (Class<? extends HalffloatVector>) vectorType;
+        }
+
+        @Override
+        @ForceInline
+        public final long checkValue(long e) {
+            longToElementBits(e);  // only for exception
+            return e;
+        }
+
+        /*package-private*/
+        @Override
+        @ForceInline
+        final HalffloatVector broadcastBits(long bits) {
+            return (HalffloatVector)
+                VectorSupport.fromBitsCoerced(
+                    vectorType, Float16.class, laneCount,
+                    bits, MODE_BROADCAST, this,
+                    (bits_, s_) -> s_.rvOp(i -> bits_));
+        }
+
+        /*package-private*/
+        @ForceInline
+        final HalffloatVector broadcast(Float16 e) {
+            return broadcastBits(toBits(e));
+        }
+
+        @Override
+        @ForceInline
+        public final HalffloatVector broadcast(long e) {
+            return broadcastBits(longToElementBits(e));
+        }
+
+        /*package-private*/
+        final @Override
+        @ForceInline
+        long longToElementBits(long value) {
+            // Do the conversion, and then test it for failure.
+            Float16 e = Float16.valueOf(value);
+            if (e.longValue() != value) {
+                throw badElementBits(value, e);
+            }
+            return toBits(e);
+        }
+
+        /*package-private*/
+        @ForceInline
+        static long toIntegralChecked(Float16 e, boolean convertToInt) {
+            long value = convertToInt ? e.intValue() : e.longValue();
+            if (value != e.longValue()) {
+                throw badArrayBits(e, convertToInt, value);
+            }
+            return value;
+        }
+
+        /* this non-public one is for internal conversions */
+        @Override
+        @ForceInline
+        final HalffloatVector fromIntValues(int[] values) {
+            VectorIntrinsics.requireLength(values.length, laneCount);
+            Float16[] va = new Float16[laneCount()];
+            for (int i = 0; i < va.length; i++) {
+                int lv = values[i];
+                Float16 v = Float16.valueOf(lv);
+                va[i] = v;
+                if ( v.intValue() != lv) {
+                    throw badElementBits(lv, v);
+                }
+            }
+            return dummyVector().fromArray0(va, 0);
+        }
+
+        // Virtual constructors
+
+        @ForceInline
+        @Override final
+        public HalffloatVector fromArray(Object a, int offset) {
+            // User entry point
+            // Defer only to the equivalent method on the vector class, using the same inputs
+            return HalffloatVector
+                .fromArray(this, (Float16[]) a, offset);
+        }
+
+        @ForceInline
+        @Override final
+        public HalffloatVector fromMemorySegment(MemorySegment ms, long offset, ByteOrder bo) {
+            // User entry point
+            // Defer only to the equivalent method on the vector class, using the same inputs
+            return HalffloatVector
+                .fromMemorySegment(this, ms, offset, bo);
+        }
+
+        @ForceInline
+        @Override final
+        HalffloatVector dummyVector() {
+            return (HalffloatVector) super.dummyVector();
+        }
+
+        /*package-private*/
+        final @Override
+        @ForceInline
+        HalffloatVector rvOp(RVOp f) {
+            Float16[] res = new Float16[laneCount()];
+            for (int i = 0; i < res.length; i++) {
+                short bits = (short) f.apply(i);
+                res[i] = fromBits(bits);
+            }
+            return dummyVector().vectorFactory(res);
+        }
+
+        HalffloatVector vOp(FVOp f) {
+            Float16[] res = new Float16[laneCount()];
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i);
+            }
+            return dummyVector().vectorFactory(res);
+        }
+
+        HalffloatVector vOp(VectorMask<Float16> m, FVOp f) {
+            Float16[] res = new Float16[laneCount()];
+            boolean[] mbits = ((AbstractMask<Float16>)m).getBits();
+            for (int i = 0; i < res.length; i++) {
+                if (mbits[i]) {
+                    res[i] = f.apply(i);
+                }
+            }
+            return dummyVector().vectorFactory(res);
+        }
+
+        /*package-private*/
+        @ForceInline
+        <M> HalffloatVector ldOp(M memory, int offset,
+                                      FLdOp<M> f) {
+            return dummyVector().ldOp(memory, offset, f);
+        }
+
+        /*package-private*/
+        @ForceInline
+        <M> HalffloatVector ldOp(M memory, int offset,
+                                      VectorMask<Float16> m,
+                                      FLdOp<M> f) {
+            return dummyVector().ldOp(memory, offset, m, f);
+        }
+
+        /*package-private*/
+        @ForceInline
+        HalffloatVector ldLongOp(MemorySegment memory, long offset,
+                                      FLdLongOp f) {
+            return dummyVector().ldLongOp(memory, offset, f);
+        }
+
+        /*package-private*/
+        @ForceInline
+        HalffloatVector ldLongOp(MemorySegment memory, long offset,
+                                      VectorMask<Float16> m,
+                                      FLdLongOp f) {
+            return dummyVector().ldLongOp(memory, offset, m, f);
+        }
+
+        /*package-private*/
+        @ForceInline
+        <M> void stOp(M memory, int offset, FStOp<M> f) {
+            dummyVector().stOp(memory, offset, f);
+        }
+
+        /*package-private*/
+        @ForceInline
+        <M> void stOp(M memory, int offset,
+                      AbstractMask<Float16> m,
+                      FStOp<M> f) {
+            dummyVector().stOp(memory, offset, m, f);
+        }
+
+        /*package-private*/
+        @ForceInline
+        void stLongOp(MemorySegment memory, long offset, FStLongOp f) {
+            dummyVector().stLongOp(memory, offset, f);
+        }
+
+        /*package-private*/
+        @ForceInline
+        void stLongOp(MemorySegment memory, long offset,
+                      AbstractMask<Float16> m,
+                      FStLongOp f) {
+            dummyVector().stLongOp(memory, offset, m, f);
+        }
+
+        // N.B. Make sure these constant vectors and
+        // masks load up correctly into registers.
+        //
+        // Also, see if we can avoid all that switching.
+        // Could we cache both vectors and both masks in
+        // this species object?
+
+        // Zero and iota vector access
+        @Override
+        @ForceInline
+        public final HalffloatVector zero() {
+            if ((Class<?>) vectorType() == HalffloatMaxVector.class)
+                return HalffloatMaxVector.ZERO;
+            switch (vectorBitSize()) {
+                case 64: return Halffloat64Vector.ZERO;
+                case 128: return Halffloat128Vector.ZERO;
+                case 256: return Halffloat256Vector.ZERO;
+                case 512: return Halffloat512Vector.ZERO;
+            }
+            throw new AssertionError();
+        }
+
+        @Override
+        @ForceInline
+        public final HalffloatVector iota() {
+            if ((Class<?>) vectorType() == HalffloatMaxVector.class)
+                return HalffloatMaxVector.IOTA;
+            switch (vectorBitSize()) {
+                case 64: return Halffloat64Vector.IOTA;
+                case 128: return Halffloat128Vector.IOTA;
+                case 256: return Halffloat256Vector.IOTA;
+                case 512: return Halffloat512Vector.IOTA;
+            }
+            throw new AssertionError();
+        }
+
+        // Mask access
+        @Override
+        @ForceInline
+        public final VectorMask<Float16> maskAll(boolean bit) {
+            if ((Class<?>) vectorType() == HalffloatMaxVector.class)
+                return HalffloatMaxVector.HalffloatMaxMask.maskAll(bit);
+            switch (vectorBitSize()) {
+                case 64: return Halffloat64Vector.Halffloat64Mask.maskAll(bit);
+                case 128: return Halffloat128Vector.Halffloat128Mask.maskAll(bit);
+                case 256: return Halffloat256Vector.Halffloat256Mask.maskAll(bit);
+                case 512: return Halffloat512Vector.Halffloat512Mask.maskAll(bit);
+            }
+            throw new AssertionError();
+        }
+    }
+
+    /**
+     * Finds a species for an element type of {@code Float16} and shape.
+     *
+     * @param s the shape
+     * @return a species for an element type of {@code Float16} and shape
+     * @throws IllegalArgumentException if no such species exists for the shape
+     */
+    static HalffloatSpecies species(VectorShape s) {
+        Objects.requireNonNull(s);
+        switch (s.switchKey) {
+            case VectorShape.SK_64_BIT: return (HalffloatSpecies) SPECIES_64;
+            case VectorShape.SK_128_BIT: return (HalffloatSpecies) SPECIES_128;
+            case VectorShape.SK_256_BIT: return (HalffloatSpecies) SPECIES_256;
+            case VectorShape.SK_512_BIT: return (HalffloatSpecies) SPECIES_512;
+            case VectorShape.SK_Max_BIT: return (HalffloatSpecies) SPECIES_MAX;
+            default: throw new IllegalArgumentException("Bad shape: " + s);
+        }
+    }
+
+    /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
+    public static final VectorSpecies<Float16> SPECIES_64
+        = new HalffloatSpecies(VectorShape.S_64_BIT,
+                            Halffloat64Vector.class,
+                            Halffloat64Vector.Halffloat64Mask.class,
+                            Halffloat64Vector::new);
+
+    /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
+    public static final VectorSpecies<Float16> SPECIES_128
+        = new HalffloatSpecies(VectorShape.S_128_BIT,
+                            Halffloat128Vector.class,
+                            Halffloat128Vector.Halffloat128Mask.class,
+                            Halffloat128Vector::new);
+
+    /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
+    public static final VectorSpecies<Float16> SPECIES_256
+        = new HalffloatSpecies(VectorShape.S_256_BIT,
+                            Halffloat256Vector.class,
+                            Halffloat256Vector.Halffloat256Mask.class,
+                            Halffloat256Vector::new);
+
+    /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
+    public static final VectorSpecies<Float16> SPECIES_512
+        = new HalffloatSpecies(VectorShape.S_512_BIT,
+                            Halffloat512Vector.class,
+                            Halffloat512Vector.Halffloat512Mask.class,
+                            Halffloat512Vector::new);
+
+    /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
+    public static final VectorSpecies<Float16> SPECIES_MAX
+        = new HalffloatSpecies(VectorShape.S_Max_BIT,
+                            HalffloatMaxVector.class,
+                            HalffloatMaxVector.HalffloatMaxMask.class,
+                            HalffloatMaxVector::new);
+
+    /**
+     * Preferred species for {@link HalffloatVector}s.
+     * A preferred species is a species of maximal bit-size for the platform.
+     */
+    public static final VectorSpecies<Float16> SPECIES_PREFERRED
+        = (HalffloatSpecies) VectorSpecies.ofPreferred(Float16.class);
+}
+
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java
index 0c83b037454..0af77b15e6d 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -347,14 +347,16 @@ public final int reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        int res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Integer> m) {
-        return (long) super.reduceLanesTemplate(op, Int128Mask.class, (Int128Mask) m);  // specialized
+        int res = super.reduceLanesTemplate(op, Int128Mask.class, (Int128Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -658,7 +660,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Int128Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Int128Mask) VectorSupport.indexPartiallyInUpperRange(
-                Int128Mask.class, int.class, VLENGTH, offset, limit,
+                Int128Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Int128Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java
index abb10696ba4..61f66403845 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -347,14 +347,16 @@ public final int reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        int res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Integer> m) {
-        return (long) super.reduceLanesTemplate(op, Int256Mask.class, (Int256Mask) m);  // specialized
+        int res = super.reduceLanesTemplate(op, Int256Mask.class, (Int256Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -666,7 +668,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Int256Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Int256Mask) VectorSupport.indexPartiallyInUpperRange(
-                Int256Mask.class, int.class, VLENGTH, offset, limit,
+                Int256Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Int256Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java
index 7b6435e4c0a..1c94bd86112 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -347,14 +347,16 @@ public final int reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        int res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Integer> m) {
-        return (long) super.reduceLanesTemplate(op, Int512Mask.class, (Int512Mask) m);  // specialized
+        int res = super.reduceLanesTemplate(op, Int512Mask.class, (Int512Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -682,7 +684,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Int512Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Int512Mask) VectorSupport.indexPartiallyInUpperRange(
-                Int512Mask.class, int.class, VLENGTH, offset, limit,
+                Int512Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Int512Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java
index 491010be90e..0b06265b0c1 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -347,14 +347,16 @@ public final int reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        int res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Integer> m) {
-        return (long) super.reduceLanesTemplate(op, Int64Mask.class, (Int64Mask) m);  // specialized
+        int res = super.reduceLanesTemplate(op, Int64Mask.class, (Int64Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -654,7 +656,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Int64Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Int64Mask) VectorSupport.indexPartiallyInUpperRange(
-                Int64Mask.class, int.class, VLENGTH, offset, limit,
+                Int64Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Int64Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java
index e20829f7c4f..331e6507c88 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -347,14 +347,16 @@ public final int reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        int res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Integer> m) {
-        return (long) super.reduceLanesTemplate(op, IntMaxMask.class, (IntMaxMask) m);  // specialized
+        int res = super.reduceLanesTemplate(op, IntMaxMask.class, (IntMaxMask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -652,7 +654,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         IntMaxMask indexPartiallyInUpperRange(long offset, long limit) {
             return (IntMaxMask) VectorSupport.indexPartiallyInUpperRange(
-                IntMaxMask.class, int.class, VLENGTH, offset, limit,
+                IntMaxMask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (IntMaxMask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java
index 3317e25e73e..2fdd7c4baf3 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java
@@ -2205,8 +2205,7 @@ final IntVector addIndexTemplate(int scale) {
                 // instruction directly, load IOTA from memory
                 // and multiply.
                 IntVector iota = s.iota();
-                int sc = (int) scale_;
-                return v.add(sc == 1 ? iota : iota.mul(sc));
+                return v.add(scale_ == 1 ? iota : iota.mul((int)scale_));
             });
     }
 
@@ -2269,7 +2268,8 @@ IntVector sliceTemplate(int origin, Vector<Integer> v1) {
         that.check(this);
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Integer> iota = iotaShuffle();
-        VectorMask<Integer> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((int)(length() - origin))));
+        int pivotidx = (int)(length() - origin);
+        VectorMask<Integer> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
         iota = iotaShuffle(origin, 1, true);
         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
     }
@@ -2299,7 +2299,8 @@ IntVector slice(int origin,
     IntVector sliceTemplate(int origin) {
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Integer> iota = iotaShuffle();
-        VectorMask<Integer> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((int)(length() - origin))));
+        int pivotidx = (int)(length() - origin);
+        VectorMask<Integer> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
         iota = iotaShuffle(origin, 1, true);
         return vspecies().zero().blend(this.rearrange(iota), blendMask);
     }
@@ -2361,7 +2362,7 @@ IntVector sliceTemplate(int origin) {
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Integer> iota = iotaShuffle();
         VectorMask<Integer> blendMask = iota.toVector().compare(VectorOperators.GE,
-                                                                  (broadcast((int)(origin))));
+                                                                  broadcast((int)(origin)));
         iota = iotaShuffle(-origin, 1, true);
         return vspecies().zero().blend(this.rearrange(iota), blendMask);
     }
@@ -2926,7 +2927,7 @@ public final double[] toDoubleArray() {
         int[] a = toArray();
         double[] res = new double[a.length];
         for (int i = 0; i < a.length; i++) {
-            res[i] = (double) a[i];
+            res[i] = ((double) a[i]);
         }
         return res;
     }
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java
index 53fa773555f..efece4bff1c 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java
@@ -40,7 +40,8 @@ enum LaneType {
     BYTE(byte.class, Byte.class, byte[].class, 'I', -1, Byte.SIZE, T_BYTE),
     SHORT(short.class, Short.class, short[].class, 'I', -1, Short.SIZE, T_SHORT),
     INT(int.class, Integer.class, int[].class, 'I', -1, Integer.SIZE, T_INT),
-    LONG(long.class, Long.class, long[].class, 'I', -1, Long.SIZE, T_LONG);
+    LONG(long.class, Long.class, long[].class, 'I', -1, Long.SIZE, T_LONG),
+    FLOAT16(Float16.class, Float16.class, Float16[].class, 'F', 11, Float16.SIZE, T_FLOAT16);
 
     LaneType(Class<?> elementType,
              Class<?> genericElementType,
@@ -65,14 +66,14 @@ enum LaneType {
         // int:128 or int:4 or float:16, report the size in the
         // printName.  If we do unsigned or vector or bit lane types,
         // report that condition also.
-        this.typeChar = genericElementType.getSimpleName().charAt(0);
-        assert("FDBSIL".indexOf(typeChar) == ordinal()) : this;
+        this.typeChar = genericElementType.getSimpleName().charAt(getElementTypeIndex(elementType));
+        assert("FDBSILo".indexOf(typeChar) == ordinal()) : this;
         // Same as in JVMS, org.objectweb.asm.Opcodes, etc.:
         this.basicType = basicType;
         assert(basicType ==
                ( (elementSizeLog2 - /*lg(Byte.SIZE)*/ 3)
                  | (elementKind == 'F' ? 4 : 8))) : this;
-        assert("....zcFDBSILoav..".charAt(basicType) == typeChar);
+        assert("....zoFDBSILSoav..".charAt(basicType) == typeChar);
     }
 
     final Class<?> elementType;
@@ -108,13 +109,21 @@ LaneType asFloating() {
         return asFloating;
     }
 
+    static int getElementTypeIndex(Class<?> elementType) {
+        if (elementType == java.lang.Float16.class) {
+            return 2;
+        } else {
+            return 0;
+        }
+    }
+
     /** Decode a class mirror for an element type into an enum. */
     @ForceInline
     static LaneType of(Class<?> elementType) {
         // The following two lines are expected to
         // constant fold in the JIT, if the argument
         // is constant and this method is inlined.
-        int c0 = elementType.getName().charAt(0);
+        int c0 = elementType.getSimpleName().charAt(getElementTypeIndex(elementType));
         LaneType type = ENUM_FROM_C0[c0 & C0_MASK];
         // This line can short-circuit if a valid
         // elementType constant was passed:
@@ -184,7 +193,8 @@ RuntimeException badElementType(Class<?> elementType, Object expected) {
         SK_SHORT    = 4,
         SK_INT      = 5,
         SK_LONG     = 6,
-        SK_LIMIT    = 7;
+        SK_FLOAT16  = 7,
+        SK_LIMIT    = 8;
 
     /*package-private*/
     @ForceInline
@@ -225,7 +235,7 @@ static LaneType ofBasicType(int bt) {
             } catch (ReflectiveOperationException ex) {
                 throw new AssertionError(ex);
             }
-            int c0 = value.elementType.getName().charAt(0);
+            int c0 = value.elementType.getSimpleName().charAt(getElementTypeIndex(value.elementType));
             c0 &= C0_MASK;
             assert(valuesByC0[c0] == null);
             valuesByC0[c0] = value;
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java
index cf552c23a80..aa10a6cac92 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -342,14 +342,16 @@ public final long reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        long res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Long> m) {
-        return (long) super.reduceLanesTemplate(op, Long128Mask.class, (Long128Mask) m);  // specialized
+        long res = super.reduceLanesTemplate(op, Long128Mask.class, (Long128Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -644,7 +646,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Long128Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Long128Mask) VectorSupport.indexPartiallyInUpperRange(
-                Long128Mask.class, long.class, VLENGTH, offset, limit,
+                Long128Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Long128Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java
index ffa1029bdd8..28826cddfc9 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -342,14 +342,16 @@ public final long reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        long res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Long> m) {
-        return (long) super.reduceLanesTemplate(op, Long256Mask.class, (Long256Mask) m);  // specialized
+        long res = super.reduceLanesTemplate(op, Long256Mask.class, (Long256Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -648,7 +650,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Long256Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Long256Mask) VectorSupport.indexPartiallyInUpperRange(
-                Long256Mask.class, long.class, VLENGTH, offset, limit,
+                Long256Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Long256Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java
index aea8fe0fe6c..11483f64497 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -342,14 +342,16 @@ public final long reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        long res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Long> m) {
-        return (long) super.reduceLanesTemplate(op, Long512Mask.class, (Long512Mask) m);  // specialized
+        long res = super.reduceLanesTemplate(op, Long512Mask.class, (Long512Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -656,7 +658,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Long512Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Long512Mask) VectorSupport.indexPartiallyInUpperRange(
-                Long512Mask.class, long.class, VLENGTH, offset, limit,
+                Long512Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Long512Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java
index ffb07535d65..bd33341a8ec 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -342,14 +342,16 @@ public final long reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        long res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Long> m) {
-        return (long) super.reduceLanesTemplate(op, Long64Mask.class, (Long64Mask) m);  // specialized
+        long res = super.reduceLanesTemplate(op, Long64Mask.class, (Long64Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -642,7 +644,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Long64Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Long64Mask) VectorSupport.indexPartiallyInUpperRange(
-                Long64Mask.class, long.class, VLENGTH, offset, limit,
+                Long64Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Long64Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java
index e4197cb7f2e..1fef8e276f0 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -342,14 +342,16 @@ public final long reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        long res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Long> m) {
-        return (long) super.reduceLanesTemplate(op, LongMaxMask.class, (LongMaxMask) m);  // specialized
+        long res = super.reduceLanesTemplate(op, LongMaxMask.class, (LongMaxMask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -642,7 +644,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         LongMaxMask indexPartiallyInUpperRange(long offset, long limit) {
             return (LongMaxMask) VectorSupport.indexPartiallyInUpperRange(
-                LongMaxMask.class, long.class, VLENGTH, offset, limit,
+                LongMaxMask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (LongMaxMask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java
index 9dd3f2eb136..64f51bd333d 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java
@@ -2092,8 +2092,7 @@ final LongVector addIndexTemplate(int scale) {
                 // instruction directly, load IOTA from memory
                 // and multiply.
                 LongVector iota = s.iota();
-                long sc = (long) scale_;
-                return v.add(sc == 1 ? iota : iota.mul(sc));
+                return v.add(scale_ == 1 ? iota : iota.mul((long)scale_));
             });
     }
 
@@ -2135,7 +2134,8 @@ LongVector sliceTemplate(int origin, Vector<Long> v1) {
         that.check(this);
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Long> iota = iotaShuffle();
-        VectorMask<Long> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((long)(length() - origin))));
+        long pivotidx = (long)(length() - origin);
+        VectorMask<Long> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
         iota = iotaShuffle(origin, 1, true);
         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
     }
@@ -2165,7 +2165,8 @@ LongVector slice(int origin,
     LongVector sliceTemplate(int origin) {
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Long> iota = iotaShuffle();
-        VectorMask<Long> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((long)(length() - origin))));
+        long pivotidx = (long)(length() - origin);
+        VectorMask<Long> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
         iota = iotaShuffle(origin, 1, true);
         return vspecies().zero().blend(this.rearrange(iota), blendMask);
     }
@@ -2227,7 +2228,7 @@ LongVector sliceTemplate(int origin) {
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Long> iota = iotaShuffle();
         VectorMask<Long> blendMask = iota.toVector().compare(VectorOperators.GE,
-                                                                  (broadcast((long)(origin))));
+                                                                  broadcast((long)(origin)));
         iota = iotaShuffle(-origin, 1, true);
         return vspecies().zero().blend(this.rearrange(iota), blendMask);
     }
@@ -2787,7 +2788,7 @@ public final double[] toDoubleArray() {
         long[] a = toArray();
         double[] res = new double[a.length];
         for (int i = 0; i < a.length; i++) {
-            res[i] = (double) a[i];
+            res[i] = ((double) a[i]);
         }
         return res;
     }
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java
index 8ae0638e4f3..3c2b673a3bb 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -347,14 +347,16 @@ public final short reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        short res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Short> m) {
-        return (long) super.reduceLanesTemplate(op, Short128Mask.class, (Short128Mask) m);  // specialized
+        short res = super.reduceLanesTemplate(op, Short128Mask.class, (Short128Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -666,7 +668,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Short128Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Short128Mask) VectorSupport.indexPartiallyInUpperRange(
-                Short128Mask.class, short.class, VLENGTH, offset, limit,
+                Short128Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Short128Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java
index cd9d8ceb887..32db9a3a292 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -347,14 +347,16 @@ public final short reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        short res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Short> m) {
-        return (long) super.reduceLanesTemplate(op, Short256Mask.class, (Short256Mask) m);  // specialized
+        short res = super.reduceLanesTemplate(op, Short256Mask.class, (Short256Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -682,7 +684,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Short256Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Short256Mask) VectorSupport.indexPartiallyInUpperRange(
-                Short256Mask.class, short.class, VLENGTH, offset, limit,
+                Short256Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Short256Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java
index 2a959a8181c..cc419227f91 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -347,14 +347,16 @@ public final short reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        short res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Short> m) {
-        return (long) super.reduceLanesTemplate(op, Short512Mask.class, (Short512Mask) m);  // specialized
+        short res = super.reduceLanesTemplate(op, Short512Mask.class, (Short512Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -714,7 +716,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Short512Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Short512Mask) VectorSupport.indexPartiallyInUpperRange(
-                Short512Mask.class, short.class, VLENGTH, offset, limit,
+                Short512Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Short512Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java
index 6090e9cf0d1..b11dc59187c 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -347,14 +347,16 @@ public final short reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        short res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Short> m) {
-        return (long) super.reduceLanesTemplate(op, Short64Mask.class, (Short64Mask) m);  // specialized
+        short res = super.reduceLanesTemplate(op, Short64Mask.class, (Short64Mask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -658,7 +660,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         Short64Mask indexPartiallyInUpperRange(long offset, long limit) {
             return (Short64Mask) VectorSupport.indexPartiallyInUpperRange(
-                Short64Mask.class, short.class, VLENGTH, offset, limit,
+                Short64Mask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (Short64Mask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java
index d451cd4443f..80b4276cc8b 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -347,14 +347,16 @@ public final short reduceLanes(VectorOperators.Associative op,
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        short res = super.reduceLanesTemplate(op);  // specialized
+        return  (long) res;
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<Short> m) {
-        return (long) super.reduceLanesTemplate(op, ShortMaxMask.class, (ShortMaxMask) m);  // specialized
+        short res = super.reduceLanesTemplate(op, ShortMaxMask.class, (ShortMaxMask) m);  // specialized
+        return  (long) res;
     }
 
     @ForceInline
@@ -652,7 +654,7 @@ public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
         /*package-private*/
         ShortMaxMask indexPartiallyInUpperRange(long offset, long limit) {
             return (ShortMaxMask) VectorSupport.indexPartiallyInUpperRange(
-                ShortMaxMask.class, short.class, VLENGTH, offset, limit,
+                ShortMaxMask.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> (ShortMaxMask) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java
index 84f542f07ff..c5adce31ad8 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java
@@ -2221,8 +2221,7 @@ final ShortVector addIndexTemplate(int scale) {
                 // instruction directly, load IOTA from memory
                 // and multiply.
                 ShortVector iota = s.iota();
-                short sc = (short) scale_;
-                return v.add(sc == 1 ? iota : iota.mul(sc));
+                return v.add(scale_ == 1 ? iota : iota.mul((short)scale_));
             });
     }
 
@@ -2285,7 +2284,8 @@ ShortVector sliceTemplate(int origin, Vector<Short> v1) {
         that.check(this);
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Short> iota = iotaShuffle();
-        VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin))));
+        short pivotidx = (short)(length() - origin);
+        VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
         iota = iotaShuffle(origin, 1, true);
         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
     }
@@ -2315,7 +2315,8 @@ ShortVector slice(int origin,
     ShortVector sliceTemplate(int origin) {
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Short> iota = iotaShuffle();
-        VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin))));
+        short pivotidx = (short)(length() - origin);
+        VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
         iota = iotaShuffle(origin, 1, true);
         return vspecies().zero().blend(this.rearrange(iota), blendMask);
     }
@@ -2377,7 +2378,7 @@ ShortVector sliceTemplate(int origin) {
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<Short> iota = iotaShuffle();
         VectorMask<Short> blendMask = iota.toVector().compare(VectorOperators.GE,
-                                                                  (broadcast((short)(origin))));
+                                                                  broadcast((short)(origin)));
         iota = iotaShuffle(-origin, 1, true);
         return vspecies().zero().blend(this.rearrange(iota), blendMask);
     }
@@ -2949,7 +2950,7 @@ public final double[] toDoubleArray() {
         short[] a = toArray();
         double[] res = new double[a.length];
         for (int i = 0; i < a.length; i++) {
-            res[i] = (double) a[i];
+            res[i] = ((double) a[i]);
         }
         return res;
     }
@@ -4011,11 +4012,10 @@ public final ShortVector viewAsIntegralLanes() {
     @ForceInline
     @Override
     public final
-    Vector<?>
+    HalffloatVector
     viewAsFloatingLanes() {
-        LaneType flt = LaneType.SHORT.asFloating();
-        // asFloating() will throw UnsupportedOperationException for the unsupported type short
-        throw new AssertionError("Cannot reach here");
+        LaneType flt = LaneType.FLOAT16.asFloating();
+        return (HalffloatVector) asVectorRaw(flt);
     }
 
     // ================================================
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java
index d34ac79e7c3..45aa8e22a4f 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java
@@ -3027,6 +3027,19 @@ public abstract VectorMask<E> compare(VectorOperators.Comparison op,
      */
     public abstract DoubleVector reinterpretAsDoubles();
 
+    /**
+     * Reinterprets this vector as a vector of the same shape
+     * and contents but a lane type of {@code Float16},
+     * where the lanes are assembled from successive bytes
+     * according to little-endian order.
+     * It is a convenience method for the expression
+     * {@code reinterpretShape(species().withLanes(Float16.class))}.
+     * It may be considered an inverse to {@link Vector#reinterpretAsBytes()}.
+     *
+     * @return a {@code HalffloatVector} with the same shape and information content
+     */
+    public abstract HalffloatVector reinterpretAsHalffloats();
+
     /**
      * Views this vector as a vector of the same shape, length, and
      * contents, but a lane type that is not a floating-point type.
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java
index 1ffbcef821a..622cb548b76 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java
@@ -237,10 +237,18 @@ public static VectorShape preferredShape() {
         return computePreferredShape();
     }
 
+    private static Class<?> getEffectiveLaneType(Class<?> elementType) {
+        if (elementType == Float16.class) {
+            return short.class;
+        } else {
+            return elementType;
+        }
+    }
+
     private static VectorShape computePreferredShape() {
         int prefBitSize = Integer.MAX_VALUE;
         for (LaneType type : LaneType.values()) {
-            Class<?> etype = type.elementType;
+            Class<?> etype = getEffectiveLaneType(type.elementType);
             prefBitSize = Math.min(prefBitSize, getMaxVectorBitSize(etype));
         }
         // If these assertions fail, we must reconsider our API portability assumptions.
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template
index ad878268404..03459b9a8b6 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template
@@ -46,12 +46,12 @@ import static jdk.incubator.vector.VectorOperators.*;
 
 /**
  * A specialized {@link Vector} representing an ordered immutable sequence of
- * {@code $type$} values.
+ * {@code $elemtype$} values.
  */
 @SuppressWarnings("cast")  // warning: redundant cast
 public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
-    $abstractvectortype$($type$[] vec) {
+    $abstractvectortype$($elemtype$[] vec) {
         super(vec);
     }
 
@@ -61,7 +61,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     static final int FORBID_OPCODE_KIND = VO_ONLYFP;
 #end[FP]
 
-    static final ValueLayout.Of$Type$ ELEMENT_LAYOUT = ValueLayout.JAVA_$TYPE$.withByteAlignment(1);
+    static final ValueLayout.Of{#if[FP16]?Short:$Elemtype$} ELEMENT_LAYOUT = ValueLayout.JAVA_{#if[FP16]?SHORT:$TYPE$}.withByteAlignment(1);
 
     @ForceInline
     static int opCode(Operator op) {
@@ -98,7 +98,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     // Virtualized getter
 
     /*package-private*/
-    abstract $type$[] vec();
+    abstract $elemtype$[] vec();
 
     // Virtualized constructors
 
@@ -107,7 +107,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * It is an error if the array is aliased elsewhere.
      */
     /*package-private*/
-    abstract $abstractvectortype$ vectorFactory($type$[] vec);
+    abstract $abstractvectortype$ vectorFactory($elemtype$[] vec);
 
     /**
      * Build a mask directly using my species.
@@ -122,14 +122,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     // Constant loader (takes dummy as vector arg)
     interface FVOp {
-        $type$ apply(int i);
+        $elemtype$ apply(int i);
     }
 
     /*package-private*/
     @ForceInline
     final
     $abstractvectortype$ vOp(FVOp f) {
-        $type$[] res = new $type$[length()];
+        $elemtype$[] res = new $elemtype$[length()];
         for (int i = 0; i < res.length; i++) {
             res[i] = f.apply(i);
         }
@@ -139,7 +139,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     @ForceInline
     final
     $abstractvectortype$ vOp(VectorMask<$Boxtype$> m, FVOp f) {
-        $type$[] res = new $type$[length()];
+        $elemtype$[] res = new $elemtype$[length()];
         boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
         for (int i = 0; i < res.length; i++) {
             if (mbits[i]) {
@@ -153,7 +153,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     /*package-private*/
     interface FUnOp {
-        $type$ apply(int i, $type$ a);
+        $elemtype$ apply(int i, $elemtype$ a);
     }
 
     /*package-private*/
@@ -162,8 +162,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     @ForceInline
     final
     $abstractvectortype$ uOpTemplate(FUnOp f) {
-        $type$[] vec = vec();
-        $type$[] res = new $type$[length()];
+        $elemtype$[] vec = vec();
+        $elemtype$[] res = new $elemtype$[length()];
         for (int i = 0; i < res.length; i++) {
             res[i] = f.apply(i, vec[i]);
         }
@@ -181,8 +181,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         if (m == null) {
             return uOpTemplate(f);
         }
-        $type$[] vec = vec();
-        $type$[] res = new $type$[length()];
+        $elemtype$[] vec = vec();
+        $elemtype$[] res = new $elemtype$[length()];
         boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
         for (int i = 0; i < res.length; i++) {
             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
@@ -194,7 +194,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     /*package-private*/
     interface FBinOp {
-        $type$ apply(int i, $type$ a, $type$ b);
+        $elemtype$ apply(int i, $elemtype$ a, $elemtype$ b);
     }
 
     /*package-private*/
@@ -205,9 +205,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     final
     $abstractvectortype$ bOpTemplate(Vector<$Boxtype$> o,
                                      FBinOp f) {
-        $type$[] res = new $type$[length()];
-        $type$[] vec1 = this.vec();
-        $type$[] vec2 = (($abstractvectortype$)o).vec();
+        $elemtype$[] res = new $elemtype$[length()];
+        $elemtype$[] vec1 = this.vec();
+        $elemtype$[] vec2 = (($abstractvectortype$)o).vec();
         for (int i = 0; i < res.length; i++) {
             res[i] = f.apply(i, vec1[i], vec2[i]);
         }
@@ -227,9 +227,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         if (m == null) {
             return bOpTemplate(o, f);
         }
-        $type$[] res = new $type$[length()];
-        $type$[] vec1 = this.vec();
-        $type$[] vec2 = (($abstractvectortype$)o).vec();
+        $elemtype$[] res = new $elemtype$[length()];
+        $elemtype$[] vec1 = this.vec();
+        $elemtype$[] vec2 = (($abstractvectortype$)o).vec();
         boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
         for (int i = 0; i < res.length; i++) {
             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
@@ -241,7 +241,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     /*package-private*/
     interface FTriOp {
-        $type$ apply(int i, $type$ a, $type$ b, $type$ c);
+        $elemtype$ apply(int i, $elemtype$ a, $elemtype$ b, $elemtype$ c);
     }
 
     /*package-private*/
@@ -254,10 +254,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     $abstractvectortype$ tOpTemplate(Vector<$Boxtype$> o1,
                                      Vector<$Boxtype$> o2,
                                      FTriOp f) {
-        $type$[] res = new $type$[length()];
-        $type$[] vec1 = this.vec();
-        $type$[] vec2 = (($abstractvectortype$)o1).vec();
-        $type$[] vec3 = (($abstractvectortype$)o2).vec();
+        $elemtype$[] res = new $elemtype$[length()];
+        $elemtype$[] vec1 = this.vec();
+        $elemtype$[] vec2 = (($abstractvectortype$)o1).vec();
+        $elemtype$[] vec3 = (($abstractvectortype$)o2).vec();
         for (int i = 0; i < res.length; i++) {
             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
         }
@@ -279,10 +279,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         if (m == null) {
             return tOpTemplate(o1, o2, f);
         }
-        $type$[] res = new $type$[length()];
-        $type$[] vec1 = this.vec();
-        $type$[] vec2 = (($abstractvectortype$)o1).vec();
-        $type$[] vec3 = (($abstractvectortype$)o2).vec();
+        $elemtype$[] res = new $elemtype$[length()];
+        $elemtype$[] vec1 = this.vec();
+        $elemtype$[] vec2 = (($abstractvectortype$)o1).vec();
+        $elemtype$[] vec3 = (($abstractvectortype$)o2).vec();
         boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
         for (int i = 0; i < res.length; i++) {
             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
@@ -294,15 +294,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     /*package-private*/
     abstract
-    $type$ rOp($type$ v, VectorMask<$Boxtype$> m, FBinOp f);
+    $elemtype$ rOp($elemtype$ v, VectorMask<$Boxtype$> m, FBinOp f);
 
     @ForceInline
     final
-    $type$ rOpTemplate($type$ v, VectorMask<$Boxtype$> m, FBinOp f) {
+    $elemtype$ rOpTemplate($elemtype$ v, VectorMask<$Boxtype$> m, FBinOp f) {
         if (m == null) {
             return rOpTemplate(v, f);
         }
-        $type$[] vec = vec();
+        $elemtype$[] vec = vec();
         boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
         for (int i = 0; i < vec.length; i++) {
             v = mbits[i] ? f.apply(i, v, vec[i]) : v;
@@ -312,8 +312,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     @ForceInline
     final
-    $type$ rOpTemplate($type$ v, FBinOp f) {
-        $type$[] vec = vec();
+    $elemtype$ rOpTemplate($elemtype$ v, FBinOp f) {
+        $elemtype$[] vec = vec();
         for (int i = 0; i < vec.length; i++) {
             v = f.apply(i, v, vec[i]);
         }
@@ -324,7 +324,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     /*package-private*/
     interface FLdOp<M> {
-        $type$ apply(M memory, int offset, int i);
+        $elemtype$ apply(M memory, int offset, int i);
     }
 
     /*package-private*/
@@ -333,7 +333,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     <M> $abstractvectortype$ ldOp(M memory, int offset,
                                   FLdOp<M> f) {
         //dummy; no vec = vec();
-        $type$[] res = new $type$[length()];
+        $elemtype$[] res = new $elemtype$[length()];
         for (int i = 0; i < res.length; i++) {
             res[i] = f.apply(memory, offset, i);
         }
@@ -346,8 +346,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     <M> $abstractvectortype$ ldOp(M memory, int offset,
                                   VectorMask<$Boxtype$> m,
                                   FLdOp<M> f) {
-        //$type$[] vec = vec();
-        $type$[] res = new $type$[length()];
+        //$elemtype$[] vec = vec();
+        $elemtype$[] res = new $elemtype$[length()];
         boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
         for (int i = 0; i < res.length; i++) {
             if (mbits[i]) {
@@ -359,7 +359,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     /*package-private*/
     interface FLdLongOp {
-        $type$ apply(MemorySegment memory, long offset, int i);
+        $elemtype$ apply(MemorySegment memory, long offset, int i);
     }
 
     /*package-private*/
@@ -368,7 +368,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     $abstractvectortype$ ldLongOp(MemorySegment memory, long offset,
                                   FLdLongOp f) {
         //dummy; no vec = vec();
-        $type$[] res = new $type$[length()];
+        $elemtype$[] res = new $elemtype$[length()];
         for (int i = 0; i < res.length; i++) {
             res[i] = f.apply(memory, offset, i);
         }
@@ -381,8 +381,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     $abstractvectortype$ ldLongOp(MemorySegment memory, long offset,
                                   VectorMask<$Boxtype$> m,
                                   FLdLongOp f) {
-        //$type$[] vec = vec();
-        $type$[] res = new $type$[length()];
+        //$elemtype$[] vec = vec();
+        $elemtype$[] res = new $elemtype$[length()];
         boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
         for (int i = 0; i < res.length; i++) {
             if (mbits[i]) {
@@ -392,12 +392,12 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         return vectorFactory(res);
     }
 
-    static $type$ memorySegmentGet(MemorySegment ms, long o, int i) {
-        return ms.get(ELEMENT_LAYOUT, o + i * $sizeInBytes$L);
+    static $elemtype$ memorySegmentGet(MemorySegment ms, long o, int i) {
+        return {#if[FP16]?Float16.valueOf(ms.get(ELEMENT_LAYOUT, o + i * $sizeInBytes$L)):ms.get(ELEMENT_LAYOUT, o + i * $sizeInBytes$L)};
     }
 
     interface FStOp<M> {
-        void apply(M memory, int offset, int i, $type$ a);
+        void apply(M memory, int offset, int i, $elemtype$ a);
     }
 
     /*package-private*/
@@ -405,7 +405,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     final
     <M> void stOp(M memory, int offset,
                   FStOp<M> f) {
-        $type$[] vec = vec();
+        $elemtype$[] vec = vec();
         for (int i = 0; i < vec.length; i++) {
             f.apply(memory, offset, i, vec[i]);
         }
@@ -417,7 +417,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     <M> void stOp(M memory, int offset,
                   VectorMask<$Boxtype$> m,
                   FStOp<M> f) {
-        $type$[] vec = vec();
+        $elemtype$[] vec = vec();
         boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
         for (int i = 0; i < vec.length; i++) {
             if (mbits[i]) {
@@ -427,7 +427,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     }
 
     interface FStLongOp {
-        void apply(MemorySegment memory, long offset, int i, $type$ a);
+        void apply(MemorySegment memory, long offset, int i, $elemtype$ a);
     }
 
     /*package-private*/
@@ -435,7 +435,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     final
     void stLongOp(MemorySegment memory, long offset,
                   FStLongOp f) {
-        $type$[] vec = vec();
+        $elemtype$[] vec = vec();
         for (int i = 0; i < vec.length; i++) {
             f.apply(memory, offset, i, vec[i]);
         }
@@ -447,7 +447,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     void stLongOp(MemorySegment memory, long offset,
                   VectorMask<$Boxtype$> m,
                   FStLongOp f) {
-        $type$[] vec = vec();
+        $elemtype$[] vec = vec();
         boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
         for (int i = 0; i < vec.length; i++) {
             if (mbits[i]) {
@@ -456,15 +456,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         }
     }
 
-    static void memorySegmentSet(MemorySegment ms, long o, int i, $type$ e) {
-        ms.set(ELEMENT_LAYOUT, o + i * $sizeInBytes$L, e);
+    static void memorySegmentSet(MemorySegment ms, long o, int i, $elemtype$ e) {
+        ms.set(ELEMENT_LAYOUT, o + i * $sizeInBytes$L, {#if[FP16]?e.shortValue():e});
     }
 
     // Binary test
 
     /*package-private*/
     interface FBinTest {
-        boolean apply(int cond, int i, $type$ a, $type$ b);
+        boolean apply(int cond, int i, $elemtype$ a, $elemtype$ b);
     }
 
     /*package-private*/
@@ -473,8 +473,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     AbstractMask<$Boxtype$> bTest(int cond,
                                   Vector<$Boxtype$> o,
                                   FBinTest f) {
-        $type$[] vec1 = vec();
-        $type$[] vec2 = (($abstractvectortype$)o).vec();
+        $elemtype$[] vec1 = vec();
+        $elemtype$[] vec2 = (($abstractvectortype$)o).vec();
         boolean[] bits = new boolean[length()];
         for (int i = 0; i < length(); i++){
             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
@@ -485,21 +485,21 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #if[BITWISE]
     /*package-private*/
     @ForceInline
-    static $type$ rotateLeft($type$ a, int n) {
+    static $elemtype$ rotateLeft($elemtype$ a, int n) {
 #if[intOrLong]
         return $Boxtype$.rotateLeft(a, n);
 #else[intOrLong]
-        return ($type$)((((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) << (n & $Boxtype$.SIZE-1)) | (((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) >>> ($Boxtype$.SIZE - (n & $Boxtype$.SIZE-1))));
+        return ($elemtype$)((((($elemtype$)a) & $Boxtype$.toUnsignedInt(($elemtype$)-1)) << (n & $Boxtype$.SIZE-1)) | (((($elemtype$)a) & $Boxtype$.toUnsignedInt(($elemtype$)-1)) >>> ($Boxtype$.SIZE - (n & $Boxtype$.SIZE-1))));
 #end[intOrLong]
     }
 
     /*package-private*/
     @ForceInline
-    static $type$ rotateRight($type$ a, int n) {
+    static $elemtype$ rotateRight($elemtype$ a, int n) {
 #if[intOrLong]
         return $Boxtype$.rotateRight(a, n);
 #else[intOrLong]
-        return ($type$)((((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) >>> (n & $Boxtype$.SIZE-1)) | (((($type$)a) & $Boxtype$.toUnsignedInt(($type$)-1)) << ($Boxtype$.SIZE - (n & $Boxtype$.SIZE-1))));
+        return ($elemtype$)((((($elemtype$)a) & $Boxtype$.toUnsignedInt(($elemtype$)-1)) >>> (n & $Boxtype$.SIZE-1)) | (((($elemtype$)a) & $Boxtype$.toUnsignedInt(($elemtype$)-1)) << ($Boxtype$.SIZE - (n & $Boxtype$.SIZE-1))));
 #end[intOrLong]
     }
 #end[BITWISE]
@@ -510,14 +510,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     /*package-private*/
     @ForceInline
-    static long toBits($type$ e) {
-        return {#if[FP]? $Type$.$type$ToRaw$Bitstype$Bits(e): e};
+    static long toBits($elemtype$ e) {
+        return {#if[FP]?$Elemtype$.$fptype$ToRaw$Bitstype$Bits(e): e};
     }
 
     /*package-private*/
     @ForceInline
-    static $type$ fromBits(long bits) {
-        return {#if[FP]?$Type$.$bitstype$BitsTo$Type$}(($bitstype$)bits);
+    static $elemtype$ fromBits(long bits) {
+        return {#if[FP]?$Elemtype$.$bitstype$BitsTo$Fptype$}(($bitstype$)bits);
     }
 
     static $abstractvectortype$ expandHelper(Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) {
@@ -575,11 +575,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     public static $abstractvectortype$ zero(VectorSpecies<$Boxtype$> species) {
         $Type$Species vsp = ($Type$Species) species;
 #if[FP]
-        return VectorSupport.fromBitsCoerced(vsp.vectorType(), $type$.class, species.length(),
-                        toBits(0.0f), MODE_BROADCAST, vsp,
+        return VectorSupport.fromBitsCoerced(vsp.vectorType(), $elemtype$.class, species.length(),
+                        toBits({#if[FP16]?Float16.valueOf(0.0f):0.0f}), MODE_BROADCAST, vsp,
                         ((bits_, s_) -> s_.rvOp(i -> bits_)));
 #else[FP]
-        return VectorSupport.fromBitsCoerced(vsp.vectorType(), $type$.class, species.length(),
+        return VectorSupport.fromBitsCoerced(vsp.vectorType(), $elemtype$.class, species.length(),
                                 0, MODE_BROADCAST, vsp,
                                 ((bits_, s_) -> s_.rvOp(i -> bits_)));
 #end[FP]
@@ -610,7 +610,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @see Vector#broadcast(long)
      * @see VectorSpecies#broadcast(long)
      */
-    public abstract $abstractvectortype$ broadcast($type$ e);
+    public abstract $abstractvectortype$ broadcast($elemtype$ e);
 
     /**
      * Returns a vector of the given species
@@ -626,14 +626,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @see VectorSpecies#broadcast(long)
      */
     @ForceInline
-    public static $abstractvectortype$ broadcast(VectorSpecies<$Boxtype$> species, $type$ e) {
+    public static $abstractvectortype$ broadcast(VectorSpecies<$Boxtype$> species, $elemtype$ e) {
         $Type$Species vsp = ($Type$Species) species;
         return vsp.broadcast(e);
     }
 
     /*package-private*/
     @ForceInline
-    final $abstractvectortype$ broadcastTemplate($type$ e) {
+    final $abstractvectortype$ broadcastTemplate($elemtype$ e) {
         $Type$Species vsp = vspecies();
         return vsp.broadcast(e);
     }
@@ -643,9 +643,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * {@inheritDoc} <!--workaround-->
      * @apiNote
      * When working with vector subtypes like {@code $abstractvectortype$},
-     * {@linkplain #broadcast($type$) the more strongly typed method}
+     * {@linkplain #broadcast($elemtype$) the more strongly typed method}
      * is typically selected.  It can be explicitly selected
-     * using a cast: {@code v.broadcast(($type$)e)}.
+     * using a cast: {@code v.broadcast(($elemtype$)e)}.
      * The two expressions will produce numerically identical results.
      */
     @Override
@@ -667,7 +667,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @throws IllegalArgumentException
      *         if the given {@code long} value cannot
      *         be represented by the vector's {@code ETYPE}
-     * @see #broadcast(VectorSpecies,$type$)
+     * @see #broadcast(VectorSpecies,$elemtype$)
      * @see VectorSpecies#checkValue(long)
      */
     @ForceInline
@@ -706,7 +706,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         }
         int opc = opCode(op);
         return VectorSupport.unaryOp(
-            opc, getClass(), null, $type$.class, length(),
+            opc, getClass(), null, $elemtype$.class, length(),
             this, null,
             UN_IMPL.find(op, opc, $abstractvectortype$::unaryOperations));
     }
@@ -736,7 +736,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         }
         int opc = opCode(op);
         return VectorSupport.unaryOp(
-            opc, getClass(), maskClass, $type$.class, length(),
+            opc, getClass(), maskClass, $elemtype$.class, length(),
             this, m,
             UN_IMPL.find(op, opc, $abstractvectortype$::unaryOperations));
     }
@@ -748,26 +748,26 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     private static UnaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> unaryOperations(int opc_) {
         switch (opc_) {
             case VECTOR_OP_NEG: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) -a);
+                    v0.uOp(m, (i, a) -> ($elemtype$) {#if[FP16]?Float16.valueOf(-a.floatValue()):-a});
             case VECTOR_OP_ABS: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.abs(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) {#if[FP16]?Float16.abs(a):Math.abs(a)});
 #if[!FP]
 #if[intOrLong]
             case VECTOR_OP_BIT_COUNT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) $Boxtype$.bitCount(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) $Boxtype$.bitCount(a));
             case VECTOR_OP_TZ_COUNT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) $Boxtype$.numberOfTrailingZeros(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) $Boxtype$.numberOfTrailingZeros(a));
             case VECTOR_OP_LZ_COUNT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) $Boxtype$.numberOfLeadingZeros(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) $Boxtype$.numberOfLeadingZeros(a));
             case VECTOR_OP_REVERSE: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) $Boxtype$.reverse(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) $Boxtype$.reverse(a));
 #else[intOrLong]
             case VECTOR_OP_BIT_COUNT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) bitCount(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) bitCount(a));
             case VECTOR_OP_TZ_COUNT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) numberOfTrailingZeros(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) numberOfTrailingZeros(a));
             case VECTOR_OP_LZ_COUNT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) numberOfLeadingZeros(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) numberOfLeadingZeros(a));
             case VECTOR_OP_REVERSE: return (v0, m) ->
                     v0.uOp(m, (i, a) -> reverse(a));
 #end[intOrLong]
@@ -777,43 +777,78 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                     v0.uOp(m, (i, a) -> a);
 #else[byte]
             case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) $Boxtype$.reverseBytes(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) $Boxtype$.reverseBytes(a));
 #end[byte]
 #end[BITWISE]
 #end[!FP]
 #if[FP]
+#if[!FP16]
             case VECTOR_OP_SIN: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.sin(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.sin(a));
             case VECTOR_OP_COS: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.cos(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.cos(a));
             case VECTOR_OP_TAN: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.tan(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.tan(a));
             case VECTOR_OP_ASIN: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.asin(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.asin(a));
             case VECTOR_OP_ACOS: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.acos(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.acos(a));
             case VECTOR_OP_ATAN: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.atan(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.atan(a));
             case VECTOR_OP_EXP: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.exp(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.exp(a));
             case VECTOR_OP_LOG: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.log(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.log(a));
             case VECTOR_OP_LOG10: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.log10(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.log10(a));
             case VECTOR_OP_SQRT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.sqrt(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.sqrt(a));
             case VECTOR_OP_CBRT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.cbrt(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.cbrt(a));
             case VECTOR_OP_SINH: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.sinh(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.sinh(a));
             case VECTOR_OP_COSH: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.cosh(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.cosh(a));
             case VECTOR_OP_TANH: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.tanh(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.tanh(a));
             case VECTOR_OP_EXPM1: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.expm1(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.expm1(a));
             case VECTOR_OP_LOG1P: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.log1p(a));
+                    v0.uOp(m, (i, a) -> ($elemtype$) Math.log1p(a));
+#else[!FP16]
+            case VECTOR_OP_SIN: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.sin(a.floatValue())));
+            case VECTOR_OP_COS: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.cos(a.floatValue())));
+            case VECTOR_OP_TAN: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.tan(a.floatValue())));
+            case VECTOR_OP_ASIN: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.asin(a.floatValue())));
+            case VECTOR_OP_ACOS: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.acos(a.floatValue())));
+            case VECTOR_OP_ATAN: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.atan(a.floatValue())));
+            case VECTOR_OP_EXP: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.exp(a.floatValue())));
+            case VECTOR_OP_LOG: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.log(a.floatValue())));
+            case VECTOR_OP_LOG10: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.log10(a.floatValue())));
+            case VECTOR_OP_SQRT: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.sqrt(a.floatValue())));
+            case VECTOR_OP_CBRT: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.cbrt(a.floatValue())));
+            case VECTOR_OP_SINH: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.sinh(a.floatValue())));
+            case VECTOR_OP_COSH: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.cosh(a.floatValue())));
+            case VECTOR_OP_TANH: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.tanh(a.floatValue())));
+            case VECTOR_OP_EXPM1: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.expm1(a.floatValue())));
+            case VECTOR_OP_LOG1P: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> Float16.valueOf(Math.log1p(a.floatValue())));
+#end[!FP16]
 #end[FP]
             default: return null;
         }
@@ -823,8 +858,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     /**
      * {@inheritDoc} <!--workaround-->
-     * @see #lanewise(VectorOperators.Binary,$type$)
-     * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$,VectorMask)
      */
     @Override
     public abstract
@@ -856,7 +891,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                 that = that.lanewise(NOT);
                 op = AND;
             } else if (op == DIV) {
-                VectorMask<$Boxtype$> eqz = that.eq(($type$) 0);
+                VectorMask<$Boxtype$> eqz = that.eq(($elemtype$) 0);
                 if (eqz.anyTrue()) {
                     throw that.divZeroException();
                 }
@@ -866,14 +901,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
         int opc = opCode(op);
         return VectorSupport.binaryOp(
-            opc, getClass(), null, $type$.class, length(),
+            opc, getClass(), null, $elemtype$.class, length(),
             this, that, null,
             BIN_IMPL.find(op, opc, $abstractvectortype$::binaryOperations));
     }
 
     /**
      * {@inheritDoc} <!--workaround-->
-     * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$,VectorMask)
      */
     @Override
     public abstract
@@ -898,7 +933,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                 return this.blend(that, mask.cast(vspecies()));
 #else[FP]
                 VectorMask<$Boxtype$> mask
-                    = this.compare(EQ, ($type$) 0, m);
+                    = this.compare(EQ, ($elemtype$) 0, m);
                 return this.blend(that, mask);
 #end[FP]
             }
@@ -915,7 +950,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                 that = that.lanewise(NOT);
                 op = AND;
             } else if (op == DIV) {
-                VectorMask<$Boxtype$> eqz = that.eq(($type$)0);
+                VectorMask<$Boxtype$> eqz = that.eq(($elemtype$)0);
                 if (eqz.and(m).anyTrue()) {
                     throw that.divZeroException();
                 }
@@ -927,7 +962,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
         int opc = opCode(op);
         return VectorSupport.binaryOp(
-            opc, getClass(), maskClass, $type$.class, length(),
+            opc, getClass(), maskClass, $elemtype$.class, length(),
             this, that, m,
             BIN_IMPL.find(op, opc, $abstractvectortype$::binaryOperations));
     }
@@ -938,31 +973,46 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     private static BinaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> binaryOperations(int opc_) {
         switch (opc_) {
+#if[FP16]
+            case VECTOR_OP_ADD: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.add(a, b));
+            case VECTOR_OP_SUB: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.subtract(a, b));
+            case VECTOR_OP_MUL: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.multiply(a, b));
+            case VECTOR_OP_DIV: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.divide(a, b));
+            case VECTOR_OP_MAX: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.max(a, b));
+            case VECTOR_OP_MIN: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.min(a, b));
+#else[FP16]
             case VECTOR_OP_ADD: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a + b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)(a + b));
             case VECTOR_OP_SUB: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a - b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)(a - b));
             case VECTOR_OP_MUL: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a * b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)(a * b));
             case VECTOR_OP_DIV: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a / b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)(a / b));
             case VECTOR_OP_MAX: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.max(a, b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)Math.max(a, b));
             case VECTOR_OP_MIN: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.min(a, b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)Math.min(a, b));
+#end[FP16]
 #if[BITWISE]
             case VECTOR_OP_AND: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a & b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)(a & b));
             case VECTOR_OP_OR: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a | b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)(a | b));
             case VECTOR_OP_XOR: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a ^ b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($elemtype$)(a ^ b));
             case VECTOR_OP_LSHIFT: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, n) -> ($type$)(a << n));
+                    v0.bOp(v1, vm, (i, a, n) -> ($elemtype$)(a << n));
             case VECTOR_OP_RSHIFT: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, n) -> ($type$)(a >> n));
+                    v0.bOp(v1, vm, (i, a, n) -> ($elemtype$)(a >> n));
             case VECTOR_OP_URSHIFT: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, n) -> ($type$)((a & LSHR_SETUP_MASK) >>> n));
+                    v0.bOp(v1, vm, (i, a, n) -> ($elemtype$)((a & LSHR_SETUP_MASK) >>> n));
             case VECTOR_OP_LROTATE: return (v0, v1, vm) ->
                     v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
             case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
@@ -975,14 +1025,23 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #end[intOrLong]
 #end[BITWISE]
 #if[FP]
+#if[!FP16]
             case VECTOR_OP_OR: return (v0, v1, vm) ->
                     v0.bOp(v1, vm, (i, a, b) -> fromBits(toBits(a) | toBits(b)));
             case VECTOR_OP_ATAN2: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.atan2(a, b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($elemtype$) Math.atan2(a, b));
+            case VECTOR_OP_POW: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> ($elemtype$) Math.pow(a, b));
+            case VECTOR_OP_HYPOT: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> ($elemtype$) Math.hypot(a, b));
+#else[!FP16]
+            case VECTOR_OP_ATAN2: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.valueOf(Math.atan2(a.floatValue(), b.floatValue())));
             case VECTOR_OP_POW: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.pow(a, b));
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.valueOf(Math.pow(a.floatValue(), b.floatValue())));
             case VECTOR_OP_HYPOT: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.hypot(a, b));
+                    v0.bOp(v1, vm, (i, a, b) -> Float16.valueOf(Math.hypot(a.floatValue(), b.floatValue())));
+#end[!FP16]
 #end[FP]
             default: return null;
         }
@@ -1010,18 +1069,18 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @throws UnsupportedOperationException if this vector does
      *         not support the requested operation
      * @see #lanewise(VectorOperators.Binary,Vector)
-     * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$,VectorMask)
      */
     @ForceInline
     public final
     $abstractvectortype$ lanewise(VectorOperators.Binary op,
-                                  $type$ e) {
+                                  $elemtype$ e) {
 #if[BITWISE]
-        if (opKind(op, VO_SHIFT) && ($type$)(int)e == e) {
+        if (opKind(op, VO_SHIFT) && ($elemtype$)(int)e == e) {
             return lanewiseShift(op, (int) e);
         }
         if (op == AND_NOT) {
-            op = AND; e = ($type$) ~e;
+            op = AND; e = ($elemtype$) ~e;
         }
 #end[BITWISE]
         return lanewise(op, broadcast(e));
@@ -1045,19 +1104,19 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @throws UnsupportedOperationException if this vector does
      *         not support the requested operation
      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
-     * @see #lanewise(VectorOperators.Binary,$type$)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$)
      */
     @ForceInline
     public final
     $abstractvectortype$ lanewise(VectorOperators.Binary op,
-                                  $type$ e,
+                                  $elemtype$ e,
                                   VectorMask<$Boxtype$> m) {
 #if[BITWISE]
-        if (opKind(op, VO_SHIFT) && ($type$)(int)e == e) {
+        if (opKind(op, VO_SHIFT) && ($elemtype$)(int)e == e) {
             return lanewiseShift(op, (int) e, m);
         }
         if (op == AND_NOT) {
-            op = AND; e = ($type$) ~e;
+            op = AND; e = ($elemtype$) ~e;
         }
 #end[BITWISE]
         return lanewise(op, broadcast(e), m);
@@ -1068,23 +1127,23 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * {@inheritDoc} <!--workaround-->
      * @apiNote
      * When working with vector subtypes like {@code $abstractvectortype$},
-     * {@linkplain #lanewise(VectorOperators.Binary,$type$)
+     * {@linkplain #lanewise(VectorOperators.Binary,$elemtype$)
      * the more strongly typed method}
      * is typically selected.  It can be explicitly selected
-     * using a cast: {@code v.lanewise(op,($type$)e)}.
+     * using a cast: {@code v.lanewise(op,($elemtype$)e)}.
      * The two expressions will produce numerically identical results.
      */
     @ForceInline
     public final
     $abstractvectortype$ lanewise(VectorOperators.Binary op,
                                   long e) {
-        $type$ e1 = ($type$) e;
+        $elemtype$ e1 = {#if[FP16]?Float16.valueOf(e):($elemtype$) e};
 #if[BITWISE]
         if ((long)e1 != e
             // allow shift ops to clip down their int parameters
             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 #else[BITWISE]
-        if ((long)e1 != e) {
+        if ({#if[FP16]?e1.longValue():(long)e1} != e) {
 #end[BITWISE]
             vspecies().checkValue(e);  // for exception
         }
@@ -1095,23 +1154,23 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * {@inheritDoc} <!--workaround-->
      * @apiNote
      * When working with vector subtypes like {@code $abstractvectortype$},
-     * {@linkplain #lanewise(VectorOperators.Binary,$type$,VectorMask)
+     * {@linkplain #lanewise(VectorOperators.Binary,$elemtype$,VectorMask)
      * the more strongly typed method}
      * is typically selected.  It can be explicitly selected
-     * using a cast: {@code v.lanewise(op,($type$)e,m)}.
+     * using a cast: {@code v.lanewise(op,($elemtype$)e,m)}.
      * The two expressions will produce numerically identical results.
      */
     @ForceInline
     public final
     $abstractvectortype$ lanewise(VectorOperators.Binary op,
                                   long e, VectorMask<$Boxtype$> m) {
-        $type$ e1 = ($type$) e;
+        $elemtype$ e1 = {#if[FP16]?Float16.valueOf(e):($elemtype$) e};
 #if[BITWISE]
         if ((long)e1 != e
             // allow shift ops to clip down their int parameters
             && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 #else[BITWISE]
-        if ((long)e1 != e) {
+        if ({#if[FP16]?e1.longValue():(long)e1} != e) {
 #end[BITWISE]
             vspecies().checkValue(e);  // for exception
         }
@@ -1134,7 +1193,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         e &= SHIFT_MASK;
         int opc = opCode(op);
         return VectorSupport.broadcastInt(
-            opc, getClass(), null, $type$.class, length(),
+            opc, getClass(), null, $elemtype$.class, length(),
             this, e, null,
             BIN_INT_IMPL.find(op, opc, $abstractvectortype$::broadcastIntOperations));
     }
@@ -1155,7 +1214,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         e &= SHIFT_MASK;
         int opc = opCode(op);
         return VectorSupport.broadcastInt(
-            opc, getClass(), maskClass, $type$.class, length(),
+            opc, getClass(), maskClass, $elemtype$.class, length(),
             this, e, m,
             BIN_INT_IMPL.find(op, opc, $abstractvectortype$::broadcastIntOperations));
     }
@@ -1167,11 +1226,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     private static VectorBroadcastIntOp<$abstractvectortype$, VectorMask<$Boxtype$>> broadcastIntOperations(int opc_) {
         switch (opc_) {
             case VECTOR_OP_LSHIFT: return (v, n, m) ->
-                    v.uOp(m, (i, a) -> ($type$)(a << n));
+                    v.uOp(m, (i, a) -> ($elemtype$)(a << n));
             case VECTOR_OP_RSHIFT: return (v, n, m) ->
-                    v.uOp(m, (i, a) -> ($type$)(a >> n));
+                    v.uOp(m, (i, a) -> ($elemtype$)(a >> n));
             case VECTOR_OP_URSHIFT: return (v, n, m) ->
-                    v.uOp(m, (i, a) -> ($type$)((a & LSHR_SETUP_MASK) >>> n));
+                    v.uOp(m, (i, a) -> ($elemtype$)((a & LSHR_SETUP_MASK) >>> n));
             case VECTOR_OP_LROTATE: return (v, n, m) ->
                     v.uOp(m, (i, a) -> rotateLeft(a, (int)n));
             case VECTOR_OP_RROTATE: return (v, n, m) ->
@@ -1190,7 +1249,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     // Also simulate >>> on sub-word variables with a mask.
     private static final int LSHR_SETUP_MASK = ((1 << $Boxtype$.SIZE) - 1);
 #else[byteOrShort]
-    private static final $type$ LSHR_SETUP_MASK = -1;
+    private static final $elemtype$ LSHR_SETUP_MASK = -1;
 #end[byteOrShort]
 #end[BITWISE]
 
@@ -1206,12 +1265,12 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
    /**
      * {@inheritDoc} <!--workaround-->
-     * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
-     * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask)
-     * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask)
-     * @see #lanewise(VectorOperators.Ternary,$type$,$type$)
-     * @see #lanewise(VectorOperators.Ternary,Vector,$type$)
-     * @see #lanewise(VectorOperators.Ternary,$type$,Vector)
+     * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,Vector,$elemtype$,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,$elemtype$,Vector,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$)
+     * @see #lanewise(VectorOperators.Ternary,Vector,$elemtype$)
+     * @see #lanewise(VectorOperators.Ternary,$elemtype$,Vector)
      */
     @Override
     public abstract
@@ -1239,16 +1298,16 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #end[BITWISE]
         int opc = opCode(op);
         return VectorSupport.ternaryOp(
-            opc, getClass(), null, $type$.class, length(),
+            opc, getClass(), null, $elemtype$.class, length(),
             this, that, tother, null,
             TERN_IMPL.find(op, opc, $abstractvectortype$::ternaryOperations));
     }
 
     /**
      * {@inheritDoc} <!--workaround-->
-     * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
-     * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask)
-     * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,Vector,$elemtype$,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,$elemtype$,Vector,VectorMask)
      */
     @Override
     public abstract
@@ -1281,7 +1340,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #end[BITWISE]
         int opc = opCode(op);
         return VectorSupport.ternaryOp(
-            opc, getClass(), maskClass, $type$.class, length(),
+            opc, getClass(), maskClass, $elemtype$.class, length(),
             this, that, tother, m,
             TERN_IMPL.find(op, opc, $abstractvectortype$::ternaryOperations));
     }
@@ -1293,8 +1352,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     private static TernaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> ternaryOperations(int opc_) {
         switch (opc_) {
 #if[FP]
-            case VECTOR_OP_FMA: return (v0, v1_, v2_, m) ->
-                    v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c));
+            case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> v0.tOp(v1_, v2_, m, (i, a, b, c) -> {#if[FP16]?Float16.fma(a, b, c):Math.fma(a, b, c)});
 #end[FP]
             default: return null;
         }
@@ -1317,13 +1375,13 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @throws UnsupportedOperationException if this vector does
      *         not support the requested operation
      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
-     * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$,VectorMask)
      */
     @ForceInline
     public final
     $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,e2)
-                                  $type$ e1,
-                                  $type$ e2) {
+                                  $elemtype$ e1,
+                                  $elemtype$ e2) {
         return lanewise(op, broadcast(e1), broadcast(e2));
     }
 
@@ -1346,13 +1404,13 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @throws UnsupportedOperationException if this vector does
      *         not support the requested operation
      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
-     * @see #lanewise(VectorOperators.Ternary,$type$,$type$)
+     * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$)
      */
     @ForceInline
     public final
     $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
-                                  $type$ e1,
-                                  $type$ e2,
+                                  $elemtype$ e1,
+                                  $elemtype$ e2,
                                   VectorMask<$Boxtype$> m) {
         return lanewise(op, broadcast(e1), broadcast(e2), m);
     }
@@ -1373,14 +1431,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      *         to the input vectors and the scalar
      * @throws UnsupportedOperationException if this vector does
      *         not support the requested operation
-     * @see #lanewise(VectorOperators.Ternary,$type$,$type$)
-     * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$)
+     * @see #lanewise(VectorOperators.Ternary,Vector,$elemtype$,VectorMask)
      */
     @ForceInline
     public final
     $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,v1,e2)
                                   Vector<$Boxtype$> v1,
-                                  $type$ e2) {
+                                  $elemtype$ e2) {
         return lanewise(op, v1, broadcast(e2));
     }
 
@@ -1403,14 +1461,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @throws UnsupportedOperationException if this vector does
      *         not support the requested operation
      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
-     * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
-     * @see #lanewise(VectorOperators.Ternary,Vector,$type$)
+     * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,Vector,$elemtype$)
      */
     @ForceInline
     public final
     $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
                                   Vector<$Boxtype$> v1,
-                                  $type$ e2,
+                                  $elemtype$ e2,
                                   VectorMask<$Boxtype$> m) {
         return lanewise(op, v1, broadcast(e2), m);
     }
@@ -1432,12 +1490,12 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @throws UnsupportedOperationException if this vector does
      *         not support the requested operation
      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
-     * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,$elemtype$,Vector,VectorMask)
      */
     @ForceInline
     public final
     $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,v2)
-                                  $type$ e1,
+                                  $elemtype$ e1,
                                   Vector<$Boxtype$> v2) {
         return lanewise(op, broadcast(e1), v2);
     }
@@ -1461,12 +1519,12 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @throws UnsupportedOperationException if this vector does
      *         not support the requested operation
      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
-     * @see #lanewise(VectorOperators.Ternary,$type$,Vector)
+     * @see #lanewise(VectorOperators.Ternary,$elemtype$,Vector)
      */
     @ForceInline
     public final
     $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
-                                  $type$ e1,
+                                  $elemtype$ e1,
                                   Vector<$Boxtype$> v2,
                                   VectorMask<$Boxtype$> m) {
         return lanewise(op, broadcast(e1), v2, m);
@@ -1482,7 +1540,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     /**
      * {@inheritDoc} <!--workaround-->
-     * @see #add($type$)
+     * @see #add($elemtype$)
      */
     @Override
     @ForceInline
@@ -1497,28 +1555,28 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * the primitive addition operation ({@code +}) to each lane.
      *
      * This method is also equivalent to the expression
-     * {@link #lanewise(VectorOperators.Binary,$type$)
+     * {@link #lanewise(VectorOperators.Binary,$elemtype$)
      *    lanewise}{@code (}{@link VectorOperators#ADD
      *    ADD}{@code , e)}.
      *
      * @param e the input scalar
      * @return the result of adding each lane of this vector to the scalar
      * @see #add(Vector)
-     * @see #broadcast($type$)
-     * @see #add($type$,VectorMask)
+     * @see #broadcast($elemtype$)
+     * @see #add($elemtype$,VectorMask)
      * @see VectorOperators#ADD
      * @see #lanewise(VectorOperators.Binary,Vector)
-     * @see #lanewise(VectorOperators.Binary,$type$)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$)
      */
     @ForceInline
     public final
-    $abstractvectortype$ add($type$ e) {
+    $abstractvectortype$ add($elemtype$ e) {
         return lanewise(ADD, e);
     }
 
     /**
      * {@inheritDoc} <!--workaround-->
-     * @see #add($type$,VectorMask)
+     * @see #add($elemtype$,VectorMask)
      */
     @Override
     @ForceInline
@@ -1535,7 +1593,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * the primitive addition operation ({@code +}) to each lane.
      *
      * This method is also equivalent to the expression
-     * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask)
+     * {@link #lanewise(VectorOperators.Binary,$elemtype$,VectorMask)
      *    lanewise}{@code (}{@link VectorOperators#ADD
      *    ADD}{@code , s, m)}.
      *
@@ -1543,21 +1601,21 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @param m the mask controlling lane selection
      * @return the result of adding each lane of this vector to the scalar
      * @see #add(Vector,VectorMask)
-     * @see #broadcast($type$)
-     * @see #add($type$)
+     * @see #broadcast($elemtype$)
+     * @see #add($elemtype$)
      * @see VectorOperators#ADD
      * @see #lanewise(VectorOperators.Binary,Vector)
-     * @see #lanewise(VectorOperators.Binary,$type$)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$)
      */
     @ForceInline
-    public final $abstractvectortype$ add($type$ e,
+    public final $abstractvectortype$ add($elemtype$ e,
                                           VectorMask<$Boxtype$> m) {
         return lanewise(ADD, e, m);
     }
 
     /**
      * {@inheritDoc} <!--workaround-->
-     * @see #sub($type$)
+     * @see #sub($elemtype$)
      */
     @Override
     @ForceInline
@@ -1572,27 +1630,27 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * the primitive subtraction operation ({@code -}) to each lane.
      *
      * This method is also equivalent to the expression
-     * {@link #lanewise(VectorOperators.Binary,$type$)
+     * {@link #lanewise(VectorOperators.Binary,$elemtype$)
      *    lanewise}{@code (}{@link VectorOperators#SUB
      *    SUB}{@code , e)}.
      *
      * @param e the input scalar
      * @return the result of subtracting the scalar from each lane of this vector
      * @see #sub(Vector)
-     * @see #broadcast($type$)
-     * @see #sub($type$,VectorMask)
+     * @see #broadcast($elemtype$)
+     * @see #sub($elemtype$,VectorMask)
      * @see VectorOperators#SUB
      * @see #lanewise(VectorOperators.Binary,Vector)
-     * @see #lanewise(VectorOperators.Binary,$type$)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$)
      */
     @ForceInline
-    public final $abstractvectortype$ sub($type$ e) {
+    public final $abstractvectortype$ sub($elemtype$ e) {
         return lanewise(SUB, e);
     }
 
     /**
      * {@inheritDoc} <!--workaround-->
-     * @see #sub($type$,VectorMask)
+     * @see #sub($elemtype$,VectorMask)
      */
     @Override
     @ForceInline
@@ -1609,7 +1667,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * the primitive subtraction operation ({@code -}) to each lane.
      *
      * This method is also equivalent to the expression
-     * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask)
+     * {@link #lanewise(VectorOperators.Binary,$elemtype$,VectorMask)
      *    lanewise}{@code (}{@link VectorOperators#SUB
      *    SUB}{@code , s, m)}.
      *
@@ -1617,21 +1675,21 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @param m the mask controlling lane selection
      * @return the result of subtracting the scalar from each lane of this vector
      * @see #sub(Vector,VectorMask)
-     * @see #broadcast($type$)
-     * @see #sub($type$)
+     * @see #broadcast($elemtype$)
+     * @see #sub($elemtype$)
      * @see VectorOperators#SUB
      * @see #lanewise(VectorOperators.Binary,Vector)
-     * @see #lanewise(VectorOperators.Binary,$type$)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$)
      */
     @ForceInline
-    public final $abstractvectortype$ sub($type$ e,
+    public final $abstractvectortype$ sub($elemtype$ e,
                                           VectorMask<$Boxtype$> m) {
         return lanewise(SUB, e, m);
     }
 
     /**
      * {@inheritDoc} <!--workaround-->
-     * @see #mul($type$)
+     * @see #mul($elemtype$)
      */
     @Override
     @ForceInline
@@ -1646,27 +1704,27 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * the primitive multiplication operation ({@code *}) to each lane.
      *
      * This method is also equivalent to the expression
-     * {@link #lanewise(VectorOperators.Binary,$type$)
+     * {@link #lanewise(VectorOperators.Binary,$elemtype$)
      *    lanewise}{@code (}{@link VectorOperators#MUL
      *    MUL}{@code , e)}.
      *
      * @param e the input scalar
      * @return the result of multiplying this vector by the given scalar
      * @see #mul(Vector)
-     * @see #broadcast($type$)
-     * @see #mul($type$,VectorMask)
+     * @see #broadcast($elemtype$)
+     * @see #mul($elemtype$,VectorMask)
      * @see VectorOperators#MUL
      * @see #lanewise(VectorOperators.Binary,Vector)
-     * @see #lanewise(VectorOperators.Binary,$type$)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$)
      */
     @ForceInline
-    public final $abstractvectortype$ mul($type$ e) {
+    public final $abstractvectortype$ mul($elemtype$ e) {
         return lanewise(MUL, e);
     }
 
     /**
      * {@inheritDoc} <!--workaround-->
-     * @see #mul($type$,VectorMask)
+     * @see #mul($elemtype$,VectorMask)
      */
     @Override
     @ForceInline
@@ -1683,7 +1741,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * the primitive multiplication operation ({@code *}) to each lane.
      *
      * This method is also equivalent to the expression
-     * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask)
+     * {@link #lanewise(VectorOperators.Binary,$elemtype$,VectorMask)
      *    lanewise}{@code (}{@link VectorOperators#MUL
      *    MUL}{@code , s, m)}.
      *
@@ -1691,14 +1749,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @param m the mask controlling lane selection
      * @return the result of muling each lane of this vector to the scalar
      * @see #mul(Vector,VectorMask)
-     * @see #broadcast($type$)
-     * @see #mul($type$)
+     * @see #broadcast($elemtype$)
+     * @see #mul($elemtype$)
      * @see VectorOperators#MUL
      * @see #lanewise(VectorOperators.Binary,Vector)
-     * @see #lanewise(VectorOperators.Binary,$type$)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$)
      */
     @ForceInline
-    public final $abstractvectortype$ mul($type$ e,
+    public final $abstractvectortype$ mul($elemtype$ e,
                                           VectorMask<$Boxtype$> m) {
         return lanewise(MUL, e, m);
     }
@@ -1728,7 +1786,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * the primitive division operation ({@code /}) to each lane.
      *
      * This method is also equivalent to the expression
-     * {@link #lanewise(VectorOperators.Binary,$type$)
+     * {@link #lanewise(VectorOperators.Binary,$elemtype$)
      *    lanewise}{@code (}{@link VectorOperators#DIV
      *    DIV}{@code , e)}.
      *
@@ -1745,20 +1803,20 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @param e the input scalar
      * @return the result of dividing each lane of this vector by the scalar
      * @see #div(Vector)
-     * @see #broadcast($type$)
-     * @see #div($type$,VectorMask)
+     * @see #broadcast($elemtype$)
+     * @see #div($elemtype$,VectorMask)
      * @see VectorOperators#DIV
      * @see #lanewise(VectorOperators.Binary,Vector)
-     * @see #lanewise(VectorOperators.Binary,$type$)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$)
      */
     @ForceInline
-    public final $abstractvectortype$ div($type$ e) {
+    public final $abstractvectortype$ div($elemtype$ e) {
         return lanewise(DIV, e);
     }
 
     /**
      * {@inheritDoc} <!--workaround-->
-     * @see #div($type$,VectorMask)
+     * @see #div($elemtype$,VectorMask)
 #if[FP]
      * @apiNote Because the underlying scalar operator is an IEEE
      * floating point number, division by zero in fact will
@@ -1784,7 +1842,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * the primitive division operation ({@code /}) to each lane.
      *
      * This method is also equivalent to the expression
-     * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask)
+     * {@link #lanewise(VectorOperators.Binary,$elemtype$,VectorMask)
      *    lanewise}{@code (}{@link VectorOperators#DIV
      *    DIV}{@code , s, m)}.
      *
@@ -1802,14 +1860,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @param m the mask controlling lane selection
      * @return the result of dividing each lane of this vector by the scalar
      * @see #div(Vector,VectorMask)
-     * @see #broadcast($type$)
-     * @see #div($type$)
+     * @see #broadcast($elemtype$)
+     * @see #div($elemtype$)
      * @see VectorOperators#DIV
      * @see #lanewise(VectorOperators.Binary,Vector)
-     * @see #lanewise(VectorOperators.Binary,$type$)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$)
      */
     @ForceInline
-    public final $abstractvectortype$ div($type$ e,
+    public final $abstractvectortype$ div($elemtype$ e,
                                           VectorMask<$Boxtype$> m) {
         return lanewise(DIV, e, m);
     }
@@ -1844,16 +1902,16 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * corresponding lane values.
      *
      * This method is also equivalent to the expression
-     * {@link #lanewise(VectorOperators.Binary,$type$)
+     * {@link #lanewise(VectorOperators.Binary,$elemtype$)
      *    lanewise}{@code (}{@link VectorOperators#MIN
      *    MIN}{@code , e)}.
      *
      * @param e the input scalar
      * @return the result of multiplying this vector by the given scalar
      * @see #min(Vector)
-     * @see #broadcast($type$)
+     * @see #broadcast($elemtype$)
      * @see VectorOperators#MIN
-     * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$,VectorMask)
 #if[FP]
      * @apiNote
      * For this method, floating point negative
@@ -1862,7 +1920,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #end[FP]
      */
     @ForceInline
-    public final $abstractvectortype$ min($type$ e) {
+    public final $abstractvectortype$ min($elemtype$ e) {
         return lanewise(MIN, e);
     }
 
@@ -1889,16 +1947,16 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * corresponding lane values.
      *
      * This method is also equivalent to the expression
-     * {@link #lanewise(VectorOperators.Binary,$type$)
+     * {@link #lanewise(VectorOperators.Binary,$elemtype$)
      *    lanewise}{@code (}{@link VectorOperators#MAX
      *    MAX}{@code , e)}.
      *
      * @param e the input scalar
      * @return the result of multiplying this vector by the given scalar
      * @see #max(Vector)
-     * @see #broadcast($type$)
+     * @see #broadcast($elemtype$)
      * @see VectorOperators#MAX
-     * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$,VectorMask)
 #if[FP]
      * @apiNote
      * For this method, floating point negative
@@ -1907,7 +1965,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #end[FP]
      */
     @ForceInline
-    public final $abstractvectortype$ max($type$ e) {
+    public final $abstractvectortype$ max($elemtype$ e) {
         return lanewise(MAX, e);
     }
 
@@ -1935,7 +1993,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      *
      * @param v a second input vector
      * @return the bitwise {@code &} of this vector and the second input vector
-     * @see #and($type$)
+     * @see #and($elemtype$)
      * @see #or(Vector)
      * @see #not()
      * @see VectorOperators#AND
@@ -1966,7 +2024,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
      */
     @ForceInline
-    public final $abstractvectortype$ and($type$ e) {
+    public final $abstractvectortype$ and($elemtype$ e) {
         return lanewise(AND, e);
     }
 
@@ -1992,7 +2050,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      *
      * @param v a second input vector
      * @return the bitwise {@code |} of this vector and the second input vector
-     * @see #or($type$)
+     * @see #or($elemtype$)
      * @see #and(Vector)
      * @see #not()
      * @see VectorOperators#OR
@@ -2023,7 +2081,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
      */
     @ForceInline
-    public final $abstractvectortype$ or($type$ e) {
+    public final $abstractvectortype$ or($elemtype$ e) {
         return lanewise(OR, e);
     }
 
@@ -2059,7 +2117,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      *
      * @param b a vector exponent by which to raise this vector
      * @return the {@code b}-th power of this vector
-     * @see #pow($type$)
+     * @see #pow($elemtype$)
      * @see VectorOperators#POW
      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
      */
@@ -2091,10 +2149,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @return the {@code b}-th power of this vector
      * @see #pow(Vector)
      * @see VectorOperators#POW
-     * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
+     * @see #lanewise(VectorOperators.Binary,$elemtype$,VectorMask)
      */
     @ForceInline
-    public final $abstractvectortype$ pow($type$ b) {
+    public final $abstractvectortype$ pow($elemtype$ b) {
         return lanewise(POW, b);
     }
 #end[FP]
@@ -2123,7 +2181,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
 #if[!FP]
 #if[!intOrLong]
-    static int bitCount($type$ a) {
+    static int bitCount($elemtype$ a) {
 #if[short]
         return Integer.bitCount((int)a & 0xFFFF);
 #else[short]
@@ -2134,7 +2192,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #end[!FP]
 #if[!FP]
 #if[!intOrLong]
-    static int numberOfTrailingZeros($type$ a) {
+    static int numberOfTrailingZeros($elemtype$ a) {
 #if[short]
         return a != 0 ? Integer.numberOfTrailingZeros(a) : 16;
 #else[short]
@@ -2145,7 +2203,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #end[!FP]
 #if[!FP]
 #if[!intOrLong]
-    static int numberOfLeadingZeros($type$ a) {
+    static int numberOfLeadingZeros($elemtype$ a) {
 #if[short]
         return a >= 0 ? Integer.numberOfLeadingZeros(a) - 16 : 0;
 #else[short]
@@ -2153,18 +2211,18 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #end[short]
     }
 
-    static $type$ reverse($type$ a) {
+    static $elemtype$ reverse($elemtype$ a) {
         if (a == 0 || a == -1) return a;
 
 #if[short]
-        $type$ b = rotateLeft(a, 8);
-        b = ($type$) (((b & 0x5555) << 1) | ((b & 0xAAAA) >>> 1));
-        b = ($type$) (((b & 0x3333) << 2) | ((b & 0xCCCC) >>> 2));
-        b = ($type$) (((b & 0x0F0F) << 4) | ((b & 0xF0F0) >>> 4));
+        $elemtype$ b = rotateLeft(a, 8);
+        b = ($elemtype$) (((b & 0x5555) << 1) | ((b & 0xAAAA) >>> 1));
+        b = ($elemtype$) (((b & 0x3333) << 2) | ((b & 0xCCCC) >>> 2));
+        b = ($elemtype$) (((b & 0x0F0F) << 4) | ((b & 0xF0F0) >>> 4));
 #else[short]
-        $type$ b = rotateLeft(a, 4);
-        b = ($type$) (((b & 0x55) << 1) | ((b & 0xAA) >>> 1));
-        b = ($type$) (((b & 0x33) << 2) | ((b & 0xCC) >>> 2));
+        $elemtype$ b = rotateLeft(a, 4);
+        b = ($elemtype$) (((b & 0x55) << 1) | ((b & 0xAA) >>> 1));
+        b = ($elemtype$) (((b & 0x33) << 2) | ((b & 0xCC) >>> 2));
 #end[short]
         return b;
     }
@@ -2257,11 +2315,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @param e the input scalar
      * @return the result mask of testing if this vector
      *         is equal to {@code e}
-     * @see #compare(VectorOperators.Comparison,$type$)
+     * @see #compare(VectorOperators.Comparison,$elemtype$)
      */
     @ForceInline
     public final
-    VectorMask<$Boxtype$> eq($type$ e) {
+    VectorMask<$Boxtype$> eq($elemtype$ e) {
         return compare(EQ, e);
     }
 
@@ -2285,11 +2343,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @param e the input scalar
      * @return the mask result of testing if this vector
      *         is less than the input scalar
-     * @see #compare(VectorOperators.Comparison,$type$)
+     * @see #compare(VectorOperators.Comparison,$elemtype$)
      */
     @ForceInline
     public final
-    VectorMask<$Boxtype$> lt($type$ e) {
+    VectorMask<$Boxtype$> lt($elemtype$ e) {
         return compare(LT, e);
     }
 
@@ -2413,7 +2471,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         that.check(this);
         int opc = opCode(op);
         return VectorSupport.compare(
-            opc, getClass(), maskType, $type$.class, length(),
+            opc, getClass(), maskType, $elemtype$.class, length(),
             this, that, null,
             (cond, v0, v1, m1) -> {
                 AbstractMask<$Boxtype$> m
@@ -2435,7 +2493,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         m.check(maskType, this);
         int opc = opCode(op);
         return VectorSupport.compare(
-            opc, getClass(), maskType, $type$.class, length(),
+            opc, getClass(), maskType, $elemtype$.class, length(),
             this, that, m,
             (cond, v0, v1, m1) -> {
                 AbstractMask<$Boxtype$> cmpM
@@ -2448,14 +2506,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     }
 
     @ForceInline
-    private static boolean compareWithOp(int cond, $type$ a, $type$ b) {
+    private static boolean compareWithOp(int cond, $elemtype$ a, $elemtype$ b) {
         return switch (cond) {
             case BT_eq -> a == b;
             case BT_ne -> a != b;
-            case BT_lt -> a < b;
-            case BT_le -> a <= b;
-            case BT_gt -> a > b;
-            case BT_ge -> a >= b;
+            case BT_lt -> {#if[FP16]?a.floatValue() < b.floatValue():a < b};
+            case BT_le -> {#if[FP16]?a.floatValue() <= b.floatValue():a <= b};
+            case BT_gt -> {#if[FP16]?a.floatValue() > b.floatValue():a > b};
+            case BT_ge -> {#if[FP16]?a.floatValue() >= b.floatValue():a >= b};
 #if[!FP]
             case BT_ult -> $Boxtype$.compareUnsigned(a, b) < 0;
             case BT_ule -> $Boxtype$.compareUnsigned(a, b) <= 0;
@@ -2486,17 +2544,17 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      *         compares to the input, according to the selected
      *         comparison operator
      * @see $abstractvectortype$#compare(VectorOperators.Comparison,Vector)
-     * @see #eq($type$)
-     * @see #lt($type$)
+     * @see #eq($elemtype$)
+     * @see #lt($elemtype$)
      */
     public abstract
-    VectorMask<$Boxtype$> compare(Comparison op, $type$ e);
+    VectorMask<$Boxtype$> compare(Comparison op, $elemtype$ e);
 
     /*package-private*/
     @ForceInline
     final
     <M extends VectorMask<$Boxtype$>>
-    M compareTemplate(Class<M> maskType, Comparison op, $type$ e) {
+    M compareTemplate(Class<M> maskType, Comparison op, $elemtype$ e) {
         return compareTemplate(maskType, op, broadcast(e));
     }
 
@@ -2522,7 +2580,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      */
     @ForceInline
     public final VectorMask<$Boxtype$> compare(VectorOperators.Comparison op,
-                                               $type$ e,
+                                               $elemtype$ e,
                                                VectorMask<$Boxtype$> m) {
         return compare(op, broadcast(e), m);
     }
@@ -2570,7 +2628,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     blendTemplate(Class<M> maskType, $abstractvectortype$ v, M m) {
         v.check(this);
         return VectorSupport.blend(
-            getClass(), maskType, $type$.class, length(),
+            getClass(), maskType, $elemtype$.class, length(),
             this, v, m,
             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
     }
@@ -2587,7 +2645,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         // make sure VLENGTH*scale doesn't overflow:
         vsp.checkScale(scale);
         return VectorSupport.indexVector(
-            getClass(), $type$.class, length(),
+            getClass(), $elemtype$.class, length(),
             this, scale, vsp,
             (v, scale_, s)
             -> {
@@ -2595,8 +2653,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                 // instruction directly, load IOTA from memory
                 // and multiply.
                 $abstractvectortype$ iota = s.iota();
-                $type$ sc = ($type$) scale_;
-                return v.add(sc == 1 ? iota : iota.mul(sc));
+                return v.add(scale_ == 1 ? iota : iota.mul({#if[FP16]?Float16.valueOf(scale_):($elemtype$)scale_}));
             });
     }
 
@@ -2617,7 +2674,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      *         the scalar value
      */
     @ForceInline
-    public final $abstractvectortype$ blend($type$ e,
+    public final $abstractvectortype$ blend($elemtype$ e,
                                             VectorMask<$Boxtype$> m) {
         return blend(broadcast(e), m);
     }
@@ -2661,7 +2718,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         that.check(this);
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<$Boxtype$> iota = iotaShuffle();
-        VectorMask<$Boxtype$> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast(($type$)(length() - origin))));
+        $elemtype$ pivotidx = {#if[FP16]?Float16.valueOf(length() - origin):($elemtype$)(length() - origin)};
+        VectorMask<$Boxtype$> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
         iota = iotaShuffle(origin, 1, true);
         return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
     }
@@ -2691,7 +2749,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     $abstractvectortype$ sliceTemplate(int origin) {
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<$Boxtype$> iota = iotaShuffle();
-        VectorMask<$Boxtype$> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast(($type$)(length() - origin))));
+        $elemtype$ pivotidx = {#if[FP16]?Float16.valueOf(length() - origin):($elemtype$)(length() - origin)};
+        VectorMask<$Boxtype$> blendMask = iota.toVector().compare(VectorOperators.LT, broadcast(pivotidx));
         iota = iotaShuffle(origin, 1, true);
         return vspecies().zero().blend(this.rearrange(iota), blendMask);
     }
@@ -2713,7 +2772,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<$Boxtype$> iota = iotaShuffle();
         VectorMask<$Boxtype$> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT,
-                                                                  (broadcast(($type$)(origin))));
+                                                                  (broadcast({#if[FP16]?Float16.valueOf(origin):($elemtype$)(origin)})));
         iota = iotaShuffle(-origin, 1, true);
         return that.blend(this.rearrange(iota), blendMask);
     }
@@ -2753,7 +2812,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         Objects.checkIndex(origin, length() + 1);
         VectorShuffle<$Boxtype$> iota = iotaShuffle();
         VectorMask<$Boxtype$> blendMask = iota.toVector().compare(VectorOperators.GE,
-                                                                  (broadcast(($type$)(origin))));
+                                                                  broadcast({#if[FP16]?Float16.valueOf(origin):($elemtype$)(origin)}));
         iota = iotaShuffle(-origin, 1, true);
         return vspecies().zero().blend(this.rearrange(iota), blendMask);
     }
@@ -2779,7 +2838,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     $abstractvectortype$ rearrangeTemplate(Class<S> shuffletype, S shuffle) {
         shuffle.checkIndexes();
         return VectorSupport.rearrangeOp(
-            getClass(), shuffletype, null, $type$.class, length(),
+            getClass(), shuffletype, null, $elemtype$.class, length(),
             this, shuffle, null,
             (v1, s_, m_) -> v1.uOp((i, a) -> {
                 int ei = s_.laneSource(i);
@@ -2811,11 +2870,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
             throw new AssertionError();
         }
         return VectorSupport.rearrangeOp(
-                   getClass(), shuffletype, masktype, $type$.class, length(),
+                   getClass(), shuffletype, masktype, $elemtype$.class, length(),
                    this, shuffle, m,
                    (v1, s_, m_) -> v1.uOp((i, a) -> {
                         int ei = s_.laneSource(i);
-                        return ei < 0  || !m_.laneIsSet(i) ? 0 : v1.lane(ei);
+                        return ei < 0  || !m_.laneIsSet(i) ? {#if[FP16]?Float16.valueOf(0):0} : v1.lane(ei);
                    }));
     }
 
@@ -2839,7 +2898,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         S ws = (S) shuffle.wrapIndexes();
         $abstractvectortype$ r0 =
             VectorSupport.rearrangeOp(
-                getClass(), shuffletype, null, $type$.class, length(),
+                getClass(), shuffletype, null, $elemtype$.class, length(),
                 this, ws, null,
                 (v0, s_, m_) -> v0.uOp((i, a) -> {
                     int ei = s_.laneSource(i);
@@ -2847,7 +2906,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                 }));
         $abstractvectortype$ r1 =
             VectorSupport.rearrangeOp(
-                getClass(), shuffletype, null, $type$.class, length(),
+                getClass(), shuffletype, null, $elemtype$.class, length(),
                 v, ws, null,
                 (v1, s_, m_) -> v1.uOp((i, a) -> {
                     int ei = s_.laneSource(i);
@@ -2859,10 +2918,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     @ForceInline
     private final
     VectorShuffle<$Boxtype$> toShuffle0($Type$Species dsp) {
-        $type$[] a = toArray();
+        $elemtype$[] a = toArray();
         int[] sa = new int[a.length];
         for (int i = 0; i < a.length; i++) {
-            sa[i] = (int) a[i];
+            sa[i] = {#if[FP16]?a[i].intValue():(int) a[i]};
         }
         return VectorShuffle.fromArray(dsp, sa, 0);
     }
@@ -2873,7 +2932,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     VectorShuffle<$Boxtype$> toShuffleTemplate(Class<?> shuffleType) {
         $Type$Species vsp = vspecies();
         return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
-                                     getClass(), $type$.class, length(),
+                                     getClass(), $elemtype$.class, length(),
                                      shuffleType, byte.class, length(),
                                      this, vsp,
                                      $Type$Vector::toShuffle0);
@@ -2894,7 +2953,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     $Type$Vector compressTemplate(Class<M> masktype, M m) {
       m.check(masktype, this);
       return ($Type$Vector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_COMPRESS, getClass(), masktype,
-                                                        $type$.class, length(), this, m,
+                                                        $elemtype$.class, length(), this, m,
                                                         (v1, m1) -> compressHelper(v1, m1));
     }
 
@@ -2913,7 +2972,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     $Type$Vector expandTemplate(Class<M> masktype, M m) {
       m.check(masktype, this);
       return ($Type$Vector) VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_EXPAND, getClass(), masktype,
-                                                        $type$.class, length(), this, m,
+                                                        $elemtype$.class, length(), this, m,
                                                         (v1, m1) -> expandHelper(v1, m1));
     }
 
@@ -2965,9 +3024,9 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @param mask a bitwise mask to enable blending of the input bits
      * @return the bitwise blend of the given bits into the current vector,
      *         under control of the bitwise mask
-     * @see #bitwiseBlend($type$,$type$)
-     * @see #bitwiseBlend($type$,Vector)
-     * @see #bitwiseBlend(Vector,$type$)
+     * @see #bitwiseBlend($elemtype$,$elemtype$)
+     * @see #bitwiseBlend($elemtype$,Vector)
+     * @see #bitwiseBlend(Vector,$elemtype$)
      * @see VectorOperators#BITWISE_BLEND
      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
      */
@@ -2996,11 +3055,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      *         under control of the bitwise mask
      * @see #bitwiseBlend(Vector,Vector)
      * @see VectorOperators#BITWISE_BLEND
-     * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$,VectorMask)
      */
     @ForceInline
     public final
-    $abstractvectortype$ bitwiseBlend($type$ bits, $type$ mask) {
+    $abstractvectortype$ bitwiseBlend($elemtype$ bits, $elemtype$ mask) {
         return lanewise(BITWISE_BLEND, bits, mask);
     }
 
@@ -3023,11 +3082,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      *         under control of the bitwise mask
      * @see #bitwiseBlend(Vector,Vector)
      * @see VectorOperators#BITWISE_BLEND
-     * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,$elemtype$,Vector,VectorMask)
      */
     @ForceInline
     public final
-    $abstractvectortype$ bitwiseBlend($type$ bits, Vector<$Boxtype$> mask) {
+    $abstractvectortype$ bitwiseBlend($elemtype$ bits, Vector<$Boxtype$> mask) {
         return lanewise(BITWISE_BLEND, bits, mask);
     }
 
@@ -3050,11 +3109,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      *         under control of the bitwise mask
      * @see #bitwiseBlend(Vector,Vector)
      * @see VectorOperators#BITWISE_BLEND
-     * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,Vector,$elemtype$,VectorMask)
      */
     @ForceInline
     public final
-    $abstractvectortype$ bitwiseBlend(Vector<$Boxtype$> bits, $type$ mask) {
+    $abstractvectortype$ bitwiseBlend(Vector<$Boxtype$> bits, $elemtype$ mask) {
         return lanewise(BITWISE_BLEND, bits, mask);
     }
 #end[BITWISE]
@@ -3072,7 +3131,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      *
      * This is a lane-wise ternary operation which applies an operation
      * conforming to the specification of
-     * {@link Math#fma($type$,$type$,$type$) Math.fma(a,b,c)}
+     * {@link {#if[FP16]?Float16:Math}#fma($elemtype$,$elemtype$,$elemtype$) {#if[FP16]?Float16:Math}.fma(a,b,c)}
      * to each lane.
 #if[intOrFloat]
      * The operation is adapted to cast the operands and the result,
@@ -3091,7 +3150,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @return the product of this vector and the second input vector
      *         summed with the third input vector, using extended precision
      *         for the intermediate result
-     * @see #fma($type$,$type$)
+     * @see #fma($elemtype$,$elemtype$)
      * @see VectorOperators#FMA
      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
      */
@@ -3113,7 +3172,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      *
      * This is a lane-wise ternary operation which applies an operation
      * conforming to the specification of
-     * {@link Math#fma($type$,$type$,$type$) Math.fma(a,b,c)}
+     * {@link {#if[FP16]?Float16:Math}#fma($elemtype$,$elemtype$,$elemtype$) {#if[FP16]?Float16:Math}.fma(a,b,c)}
      * to each lane.
 #if[intOrFloat]
      * The operation is adapted to cast the operands and the result,
@@ -3134,15 +3193,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      *         for the intermediate result
      * @see #fma(Vector,Vector)
      * @see VectorOperators#FMA
-     * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,$elemtype$,$elemtype$,VectorMask)
      */
     @ForceInline
     public final
-    $abstractvectortype$ fma($type$ b, $type$ c) {
+    $abstractvectortype$ fma($elemtype$ b, $elemtype$ c) {
         return lanewise(FMA, b, c);
     }
 
-    // Don't bother with (Vector,$type$) and ($type$,Vector) overloadings.
+    // Don't bother with (Vector,$elemtype$) and ($elemtype$,Vector) overloadings.
 #end[FP]
 
     // Type specific horizontal reductions
@@ -3195,7 +3254,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #end[BITWISE]
      * @see VectorOperators#FIRST_NONZERO
      */
-    public abstract $type$ reduceLanes(VectorOperators.Associative op);
+    public abstract $elemtype$ reduceLanes(VectorOperators.Associative op);
 
     /**
      * Returns a value accumulated from selected lanes of this vector,
@@ -3215,7 +3274,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      *  {@code ADD}
 #end[BITWISE]
      * or {@code FIRST_NONZERO},
-     * then the identity value is {#if[FP]?positive }zero, the default {@code $type$} value.
+     * then the identity value is {#if[FP]?positive }zero, the default {@code $elemtype$} value.
      * <li>
      * If the operation is {@code MUL},
      * then the identity value is one.
@@ -3273,24 +3332,24 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      *         not support the requested operation
      * @see #reduceLanes(VectorOperators.Associative)
      */
-    public abstract $type$ reduceLanes(VectorOperators.Associative op,
+    public abstract $elemtype$ reduceLanes(VectorOperators.Associative op,
                                        VectorMask<$Boxtype$> m);
 
     /*package-private*/
     @ForceInline
     final
-    $type$ reduceLanesTemplate(VectorOperators.Associative op,
+    $elemtype$ reduceLanesTemplate(VectorOperators.Associative op,
                                Class<? extends VectorMask<$Boxtype$>> maskClass,
                                VectorMask<$Boxtype$> m) {
         m.check(maskClass, this);
         if (op == FIRST_NONZERO) {
             // FIXME:  The JIT should handle this.
-            $abstractvectortype$ v = broadcast(($type$) 0).blend(this, m);
+            $abstractvectortype$ v = broadcast({#if[FP16]?Float16.valueOf(0):($elemtype$) 0}).blend(this, m);
             return v.reduceLanesTemplate(op);
         }
         int opc = opCode(op);
         return fromBits(VectorSupport.reductionCoerced(
-            opc, getClass(), maskClass, $type$.class, length(),
+            opc, getClass(), maskClass, $elemtype$.class, length(),
             this, m,
             REDUCE_IMPL.find(op, opc, $abstractvectortype$::reductionOperations)));
     }
@@ -3298,17 +3357,17 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     /*package-private*/
     @ForceInline
     final
-    $type$ reduceLanesTemplate(VectorOperators.Associative op) {
+    $elemtype$ reduceLanesTemplate(VectorOperators.Associative op) {
         if (op == FIRST_NONZERO) {
             // FIXME:  The JIT should handle this.
             VectorMask<$Boxbitstype$> thisNZ
                 = this.viewAsIntegralLanes().compare(NE, ($bitstype$) 0);
             int ft = thisNZ.firstTrue();
-            return ft < length() ? this.lane(ft) : ($type$) 0;
+            return ft < length() ? this.lane(ft) : {#if[FP16]?Float16.valueOf(0):($elemtype$) 0};
         }
         int opc = opCode(op);
         return fromBits(VectorSupport.reductionCoerced(
-            opc, getClass(), null, $type$.class, length(),
+            opc, getClass(), null, $elemtype$.class, length(),
             this, null,
             REDUCE_IMPL.find(op, opc, $abstractvectortype$::reductionOperations)));
     }
@@ -3319,32 +3378,43 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     private static ReductionOperation<$abstractvectortype$, VectorMask<$Boxtype$>> reductionOperations(int opc_) {
         switch (opc_) {
+#if[FP16]
             case VECTOR_OP_ADD: return (v, m) ->
-                    toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a + b)));
+                    toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.add(a, b)));
             case VECTOR_OP_MUL: return (v, m) ->
-                    toBits(v.rOp(($type$)1, m, (i, a, b) -> ($type$)(a * b)));
+                    toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.multiply(a, b)));
             case VECTOR_OP_MIN: return (v, m) ->
-                    toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($type$) Math.min(a, b)));
+                    toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.min(a, b)));
             case VECTOR_OP_MAX: return (v, m) ->
-                    toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($type$) Math.max(a, b)));
+                    toBits(v.rOp(Float16.valueOf(0), m, (i, a, b) -> Float16.max(a, b)));
+#else[FP16]
+            case VECTOR_OP_ADD: return (v, m) ->
+                    toBits(v.rOp(($elemtype$)0, m, (i, a, b) -> ($elemtype$)(a + b)));
+            case VECTOR_OP_MUL: return (v, m) ->
+                    toBits(v.rOp(($elemtype$)1, m, (i, a, b) -> ($elemtype$)(a * b)));
+            case VECTOR_OP_MIN: return (v, m) ->
+                    toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($elemtype$) Math.min(a, b)));
+            case VECTOR_OP_MAX: return (v, m) ->
+                    toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($elemtype$) Math.max(a, b)));
+#end[FP16]
 #if[BITWISE]
             case VECTOR_OP_AND: return (v, m) ->
-                    toBits(v.rOp(($type$)-1, m, (i, a, b) -> ($type$)(a & b)));
+                    toBits(v.rOp(($elemtype$)-1, m, (i, a, b) -> ($elemtype$)(a & b)));
             case VECTOR_OP_OR: return (v, m) ->
-                    toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a | b)));
+                    toBits(v.rOp(($elemtype$)0, m, (i, a, b) -> ($elemtype$)(a | b)));
             case VECTOR_OP_XOR: return (v, m) ->
-                    toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a ^ b)));
+                    toBits(v.rOp(($elemtype$)0, m, (i, a, b) -> ($elemtype$)(a ^ b)));
 #end[BITWISE]
             default: return null;
         }
     }
 
 #if[FP]
-    private static final $type$ MIN_OR_INF = $Boxtype$.NEGATIVE_INFINITY;
-    private static final $type$ MAX_OR_INF = $Boxtype$.POSITIVE_INFINITY;
+    private static final $elemtype$ MIN_OR_INF = $Boxtype$.NEGATIVE_INFINITY;
+    private static final $elemtype$ MAX_OR_INF = $Boxtype$.POSITIVE_INFINITY;
 #else[FP]
-    private static final $type$ MIN_OR_INF = $Boxtype$.MIN_VALUE;
-    private static final $type$ MAX_OR_INF = $Boxtype$.MAX_VALUE;
+    private static final $elemtype$ MIN_OR_INF = $Boxtype$.MIN_VALUE;
+    private static final $elemtype$ MAX_OR_INF = $Boxtype$.MAX_VALUE;
 #end[FP]
 
     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
@@ -3361,7 +3431,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @throws IllegalArgumentException if the index is out of range
      * ({@code < 0 || >= length()})
      */
-    public abstract $type$ lane(int i);
+    public abstract $elemtype$ lane(int i);
 
     /**
      * Replaces the lane element of this vector at lane index {@code i} with
@@ -3379,22 +3449,22 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * @throws IllegalArgumentException if the index is out of range
      * ({@code < 0 || >= length()})
      */
-    public abstract $abstractvectortype$ withLane(int i, $type$ e);
+    public abstract $abstractvectortype$ withLane(int i, $elemtype$ e);
 
     // Memory load operations
 
     /**
-     * Returns an array of type {@code $type$[]}
+     * Returns an array of type {@code $elemtype$[]}
      * containing all the lane values.
      * The array length is the same as the vector length.
      * The array elements are stored in lane order.
      * <p>
      * This method behaves as if it stores
      * this vector into an allocated array
-     * (using {@link #intoArray($type$[], int) intoArray})
+     * (using {@link #intoArray($elemtype$[], int) intoArray})
      * and returns the array as follows:
      * <pre>{@code
-     *   $type$[] a = new $type$[this.length()];
+     *   $elemtype$[] a = new $elemtype$[this.length()];
      *   this.intoArray(a, 0);
      *   return a;
      * }</pre>
@@ -3403,8 +3473,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      */
     @ForceInline
     @Override
-    public final $type$[] toArray() {
-        $type$[] a = new $type$[vspecies().laneCount()];
+    public final $elemtype$[] toArray() {
+        $elemtype$[] a = new $elemtype$[vspecies().laneCount()];
         intoArray(a, 0);
         return a;
     }
@@ -3438,10 +3508,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     @ForceInline
     @Override
     public final int[] toIntArray() {
-        $type$[] a = toArray();
+        $elemtype$[] a = toArray();
         int[] res = new int[a.length];
         for (int i = 0; i < a.length; i++) {
-            $type$ e = a[i];
+            $elemtype$ e = a[i];
             res[i] = (int) $Type$Species.toIntegralChecked(e, true);
         }
         return res;
@@ -3475,11 +3545,17 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     @ForceInline
     @Override
     public final long[] toLongArray() {
-        $type$[] a = toArray();
+        $elemtype$[] a = toArray();
         long[] res = new long[a.length];
         for (int i = 0; i < a.length; i++) {
-            $type$ e = a[i];
+#if[FP16]
+            // Value range of integral casted Float16 value is a proper subset of
+            // long value range.
+            res[i] = a[i].longValue();
+#else[FP16]
+            $elemtype$ e = a[i];
             res[i] = $Type$Species.toIntegralChecked(e, false);
+#end[FP16]
         }
         return res;
     }
@@ -3516,17 +3592,17 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     @ForceInline
     @Override
     public final double[] toDoubleArray() {
-        $type$[] a = toArray();
+        $elemtype$[] a = toArray();
         double[] res = new double[a.length];
         for (int i = 0; i < a.length; i++) {
-            res[i] = (double) a[i];
+            res[i] = {#if[FP16]?a[i].doubleValue():((double) a[i])};
         }
         return res;
     }
 #end[double]
 
     /**
-     * Loads a vector from an array of type {@code $type$[]}
+     * Loads a vector from an array of type {@code $elemtype$[]}
      * starting at an offset.
      * For each vector lane, where {@code N} is the vector lane index, the
      * array element at index {@code offset + N} is placed into the
@@ -3543,17 +3619,17 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     @ForceInline
     public static
     $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
-                                   $type$[] a, int offset) {
+                                   $elemtype$[] a, int offset) {
         offset = checkFromIndexSize(offset, species.length(), a.length);
         $Type$Species vsp = ($Type$Species) species;
         return vsp.dummyVector().fromArray0(a, offset);
     }
 
     /**
-     * Loads a vector from an array of type {@code $type$[]}
+     * Loads a vector from an array of type {@code $elemtype$[]}
      * starting at an offset and using a mask.
      * Lanes where the mask is unset are filled with the default
-     * value of {@code $type$} ({#if[FP]?positive }zero).
+     * value of {@code $elemtype$} ({#if[FP]?positive }zero).
      * For each vector lane, where {@code N} is the vector lane index,
      * if the mask lane at index {@code N} is set then the array element at
      * index {@code offset + N} is placed into the resulting vector at lane index
@@ -3573,7 +3649,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     @ForceInline
     public static
     $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
-                                   $type$[] a, int offset,
+                                   $elemtype$[] a, int offset,
                                    VectorMask<$Boxtype$> m) {
         $Type$Species vsp = ($Type$Species) species;
         if (VectorIntrinsics.indexInRange(offset, vsp.length(), a.length)) {
@@ -3586,7 +3662,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     /**
      * Gathers a new vector composed of elements from an array of type
-     * {@code $type$[]},
+     * {@code $elemtype$[]},
      * using indexes obtained by adding a fixed {@code offset} to a
      * series of secondary offsets from an <em>index map</em>.
      * The index map is a contiguous sequence of {@code VLENGTH}
@@ -3619,7 +3695,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     @ForceInline
     public static
     $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
-                                   $type$[] a, int offset,
+                                   $elemtype$[] a, int offset,
                                    int[] indexMap, int mapOffset) {
         $Type$Species vsp = ($Type$Species) species;
         return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]);
@@ -3628,7 +3704,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     @ForceInline
     public static
     $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
-                                   $type$[] a, int offset,
+                                   $elemtype$[] a, int offset,
                                    int[] indexMap, int mapOffset) {
         $Type$Species vsp = ($Type$Species) species;
         IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
@@ -3669,7 +3745,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         vix = VectorIntrinsics.checkIndex(vix, a.length);
 
         return VectorSupport.loadWithMap(
-            vectorType, null, $type$.class, vsp.laneCount(),
+            vectorType, null, $elemtype$.class, vsp.laneCount(),
             isp.vectorType(),
             a, ARRAY_BASE, vix, null,
             a, offset, indexMap, mapOffset, vsp,
@@ -3680,7 +3756,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     /**
      * Gathers a new vector composed of elements from an array of type
-     * {@code $type$[]},
+     * {@code $elemtype$[]},
      * under the control of a mask, and
      * using indexes obtained by adding a fixed {@code offset} to a
      * series of secondary offsets from an <em>index map</em>.
@@ -3718,7 +3794,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     @ForceInline
     public static
     $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
-                                   $type$[] a, int offset,
+                                   $elemtype$[] a, int offset,
                                    int[] indexMap, int mapOffset,
                                    VectorMask<$Boxtype$> m) {
         $Type$Species vsp = ($Type$Species) species;
@@ -3728,7 +3804,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     @ForceInline
     public static
     $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
-                                   $type$[] a, int offset,
+                                   $elemtype$[] a, int offset,
                                    int[] indexMap, int mapOffset,
                                    VectorMask<$Boxtype$> m) {
         if (m.allTrue()) {
@@ -3771,7 +3847,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * Loads a vector from an array of type {@code char[]}
      * starting at an offset and using a mask.
      * Lanes where the mask is unset are filled with the default
-     * value of {@code $type$} ({#if[FP]?positive }zero).
+     * value of {@code $elemtype$} ({#if[FP]?positive }zero).
      * For each vector lane, where {@code N} is the vector lane index,
      * if the mask lane at index {@code N} is set then the array element at
      * index {@code offset + N}
@@ -3926,7 +4002,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * Loads a vector from an array of type {@code boolean[]}
      * starting at an offset and using a mask.
      * Lanes where the mask is unset are filled with the default
-     * value of {@code $type$} ({#if[FP]?positive }zero).
+     * value of {@code $elemtype$} ({#if[FP]?positive }zero).
      * For each vector lane, where {@code N} is the vector lane index,
      * if the mask lane at index {@code N} is set then the array element at
      * index {@code offset + N}
@@ -4096,7 +4172,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * starting at an offset into the memory segment
      * and using a mask.
      * Lanes where the mask is unset are filled with the default
-     * value of {@code $type$} ({#if[FP]?positive }zero).
+     * value of {@code $elemtype$} ({#if[FP]?positive }zero).
      * Bytes are composed into primitive lane elements according
      * to the specified byte order.
      * The vector is arranged into lanes according to
@@ -4105,7 +4181,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * The following pseudocode illustrates the behavior:
      * <pre>{@code
      * var slice = ms.asSlice(offset);
-     * $type$[] ar = new $type$[species.length()];
+     * $elemtype$[] ar = new $elemtype$[species.length()];
      * for (int n = 0; n < ar.length; n++) {
      *     if (m.laneIsSet(n)) {
      *         ar[n] = slice.getAtIndex(ValuaLayout.JAVA_$TYPE$.withByteAlignment(1), n);
@@ -4158,14 +4234,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     // Memory store operations
 
     /**
-     * Stores this vector into an array of type {@code $type$[]}
+     * Stores this vector into an array of type {@code $elemtype$[]}
      * starting at an offset.
      * <p>
      * For each vector lane, where {@code N} is the vector lane index,
      * the lane element at index {@code N} is stored into the array
      * element {@code a[offset+N]}.
      *
-     * @param a the array, of type {@code $type$[]}
+     * @param a the array, of type {@code $elemtype$[]}
      * @param offset the offset into the array
      * @throws IndexOutOfBoundsException
      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
@@ -4173,7 +4249,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      */
     @ForceInline
     public final
-    void intoArray($type$[] a, int offset) {
+    void intoArray($elemtype$[] a, int offset) {
         offset = checkFromIndexSize(offset, length(), a.length);
         $Type$Species vsp = vspecies();
         VectorSupport.store(
@@ -4187,7 +4263,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     }
 
     /**
-     * Stores this vector into an array of type {@code $type$[]}
+     * Stores this vector into an array of type {@code $elemtype$[]}
      * starting at offset and using a mask.
      * <p>
      * For each vector lane, where {@code N} is the vector lane index,
@@ -4202,7 +4278,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * That is, unset lanes may correspond to array indexes less than
      * zero or beyond the end of the array.
      *
-     * @param a the array, of type {@code $type$[]}
+     * @param a the array, of type {@code $elemtype$[]}
      * @param offset the offset into the array
      * @param m the mask controlling lane storage
      * @throws IndexOutOfBoundsException
@@ -4212,7 +4288,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      */
     @ForceInline
     public final
-    void intoArray($type$[] a, int offset,
+    void intoArray($elemtype$[] a, int offset,
                    VectorMask<$Boxtype$> m) {
         if (m.allTrue()) {
             intoArray(a, offset);
@@ -4226,7 +4302,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     }
 
     /**
-     * Scatters this vector into an array of type {@code $type$[]}
+     * Scatters this vector into an array of type {@code $elemtype$[]}
      * using indexes obtained by adding a fixed {@code offset} to a
      * series of secondary offsets from an <em>index map</em>.
      * The index map is a contiguous sequence of {@code VLENGTH}
@@ -4254,7 +4330,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #if[byteOrShort]
     @ForceInline
     public final
-    void intoArray($type$[] a, int offset,
+    void intoArray($elemtype$[] a, int offset,
                    int[] indexMap, int mapOffset) {
         stOp(a, offset,
              (arr, off, i, e) -> {
@@ -4265,7 +4341,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #else[byteOrShort]
     @ForceInline
     public final
-    void intoArray($type$[] a, int offset,
+    void intoArray($elemtype$[] a, int offset,
                    int[] indexMap, int mapOffset) {
         $Type$Species vsp = vspecies();
         IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
@@ -4318,7 +4394,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #end[byteOrShort]
 
     /**
-     * Scatters this vector into an array of type {@code $type$[]},
+     * Scatters this vector into an array of type {@code $elemtype$[]},
      * under the control of a mask, and
      * using indexes obtained by adding a fixed {@code offset} to a
      * series of secondary offsets from an <em>index map</em>.
@@ -4350,7 +4426,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #if[byteOrShort]
     @ForceInline
     public final
-    void intoArray($type$[] a, int offset,
+    void intoArray($elemtype$[] a, int offset,
                    int[] indexMap, int mapOffset,
                    VectorMask<$Boxtype$> m) {
         stOp(a, offset, m,
@@ -4362,7 +4438,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #else[byteOrShort]
     @ForceInline
     public final
-    void intoArray($type$[] a, int offset,
+    void intoArray($elemtype$[] a, int offset,
                    int[] indexMap, int mapOffset,
                    VectorMask<$Boxtype$> m) {
         if (m.allTrue()) {
@@ -4763,10 +4839,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     /*package-private*/
     abstract
-    $abstractvectortype$ fromArray0($type$[] a, int offset);
+    $abstractvectortype$ fromArray0($elemtype$[] a, int offset);
     @ForceInline
     final
-    $abstractvectortype$ fromArray0Template($type$[] a, int offset) {
+    $abstractvectortype$ fromArray0Template($elemtype$[] a, int offset) {
         $Type$Species vsp = vspecies();
         return VectorSupport.load(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
@@ -4778,11 +4854,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     /*package-private*/
     abstract
-    $abstractvectortype$ fromArray0($type$[] a, int offset, VectorMask<$Boxtype$> m, int offsetInRange);
+    $abstractvectortype$ fromArray0($elemtype$[] a, int offset, VectorMask<$Boxtype$> m, int offsetInRange);
     @ForceInline
     final
     <M extends VectorMask<$Boxtype$>>
-    $abstractvectortype$ fromArray0Template(Class<M> maskClass, $type$[] a, int offset, M m, int offsetInRange) {
+    $abstractvectortype$ fromArray0Template(Class<M> maskClass, $elemtype$[] a, int offset, M m, int offsetInRange) {
         m.check(species());
         $Type$Species vsp = vspecies();
         return VectorSupport.loadMasked(
@@ -4796,13 +4872,13 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #if[!byteOrShort]
     /*package-private*/
     abstract
-    $abstractvectortype$ fromArray0($type$[] a, int offset,
+    $abstractvectortype$ fromArray0($elemtype$[] a, int offset,
                                     int[] indexMap, int mapOffset,
                                     VectorMask<$Boxtype$> m);
     @ForceInline
     final
     <M extends VectorMask<$Boxtype$>>
-    $abstractvectortype$ fromArray0Template(Class<M> maskClass, $type$[] a, int offset,
+    $abstractvectortype$ fromArray0Template(Class<M> maskClass, $elemtype$[] a, int offset,
                                             int[] indexMap, int mapOffset, M m) {
         $Type$Species vsp = vspecies();
         IntVector.IntSpecies isp = IntVector.species(vsp.indexShape());
@@ -4845,7 +4921,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         vix = VectorIntrinsics.checkIndex(vix, a.length);
 
         return VectorSupport.loadWithMap(
-            vectorType, maskClass, $type$.class, vsp.laneCount(),
+            vectorType, maskClass, $elemtype$.class, vsp.laneCount(),
             isp.vectorType(),
             a, ARRAY_BASE, vix, m,
             a, offset, indexMap, mapOffset, vsp,
@@ -4957,10 +5033,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     // byte swapping.
 
     abstract
-    void intoArray0($type$[] a, int offset);
+    void intoArray0($elemtype$[] a, int offset);
     @ForceInline
     final
-    void intoArray0Template($type$[] a, int offset) {
+    void intoArray0Template($elemtype$[] a, int offset) {
         $Type$Species vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
@@ -4972,11 +5048,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     }
 
     abstract
-    void intoArray0($type$[] a, int offset, VectorMask<$Boxtype$> m);
+    void intoArray0($elemtype$[] a, int offset, VectorMask<$Boxtype$> m);
     @ForceInline
     final
     <M extends VectorMask<$Boxtype$>>
-    void intoArray0Template(Class<M> maskClass, $type$[] a, int offset, M m) {
+    void intoArray0Template(Class<M> maskClass, $elemtype$[] a, int offset, M m) {
         m.check(species());
         $Type$Species vsp = vspecies();
         VectorSupport.storeMasked(
@@ -4990,13 +5066,13 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
 #if[!byteOrShort]
     abstract
-    void intoArray0($type$[] a, int offset,
+    void intoArray0($elemtype$[] a, int offset,
                     int[] indexMap, int mapOffset,
                     VectorMask<$Boxtype$> m);
     @ForceInline
     final
     <M extends VectorMask<$Boxtype$>>
-    void intoArray0Template(Class<M> maskClass, $type$[] a, int offset,
+    void intoArray0Template(Class<M> maskClass, $elemtype$[] a, int offset,
                             int[] indexMap, int mapOffset, M m) {
         m.check(species());
         $Type$Species vsp = vspecies();
@@ -5172,12 +5248,12 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     }
 
     static final int ARRAY_SHIFT =
-        31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_$TYPE$_INDEX_SCALE);
+        31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_{#if[FP16]?OBJECT:$TYPE$}_INDEX_SCALE);
     static final long ARRAY_BASE =
-        Unsafe.ARRAY_$TYPE$_BASE_OFFSET;
+        Unsafe.ARRAY_{#if[FP16]?OBJECT:$TYPE$}_BASE_OFFSET;
 
     @ForceInline
-    static long arrayAddress($type$[] a, int index) {
+    static long arrayAddress($elemtype$[] a, int index) {
         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
     }
 
@@ -5241,7 +5317,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #if[BITWISE]
         return this;
 #else[BITWISE]
-        LaneType ilt = LaneType.$TYPE$.asIntegral();
+        LaneType ilt = LaneType.{#if[FP16]?FLOAT16:$TYPE$}.asIntegral();
         return ($Bitstype$Vector) asVectorRaw(ilt);
 #end[BITWISE]
     }
@@ -5252,7 +5328,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      *
      * @implNote This method always throws
      * {@code UnsupportedOperationException}, because there is no floating
-     * point type of the same size as {@code $type$}.  The return type
+     * point type of the same size as {@code $elemtype$}.  The return type
      * of this method is arbitrarily designated as
      * {@code Vector<?>}.  Future versions of this API may change the return
      * type if additional floating point types become available.
@@ -5261,18 +5337,22 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     @ForceInline
     @Override
     public final
-    {#if[byteOrShort]?Vector<?>:$Fptype$Vector}
+#if[FP]
+    $Type$Vector
+#else[FP]
+    {#if[byte]?Vector<?>:$Boxfptype$Vector}
+#end[FP]
     viewAsFloatingLanes() {
 #if[FP]
         return this;
 #else[FP]
-        LaneType flt = LaneType.$TYPE$.asFloating();
-#if[!byteOrShort]
-        return ($Fptype$Vector) asVectorRaw(flt);
-#else[!byteOrShort]
-        // asFloating() will throw UnsupportedOperationException for the unsupported type $type$
+        LaneType flt = {#if[short]?LaneType.FLOAT16.asFloating():LaneType.$TYPE$.asFloating()};
+#if[!byte]
+        return ($Boxfptype$Vector) asVectorRaw(flt);
+#else[!byte]
+        // asFloating() will throw UnsupportedOperationException for the unsupported type $elemtype$
         throw new AssertionError("Cannot reach here");
-#end[!byteOrShort]
+#end[!byte]
 #end[FP]
     }
 
@@ -5290,8 +5370,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * in lane order.
      *
      * The string is produced as if by a call to {@link
-     * java.util.Arrays#toString($type$[]) Arrays.toString()},
-     * as appropriate to the {@code $type$} array returned by
+     * java.util.Arrays#toString($elemtype$[]) Arrays.toString()},
+     * as appropriate to the {@code $elemtype$} array returned by
      * {@link #toArray this.toArray()}.
      *
      * @return a string of the form {@code "[0,1,2...]"}
@@ -5345,7 +5425,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                 Class<? extends $abstractvectortype$> vectorType,
                 Class<? extends AbstractMask<$Boxtype$>> maskType,
                 Function<Object, $abstractvectortype$> vectorFactory) {
-            super(shape, LaneType.of($type$.class),
+            super(shape, LaneType.of($elemtype$.class),
                   vectorType, maskType,
                   vectorFactory);
             assert(this.elementSize() == $Boxtype$.SIZE);
@@ -5356,7 +5436,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         @Override
         @ForceInline
         public final Class<$Boxtype$> elementType() {
-            return $type$.class;
+            return $elemtype$.class;
         }
 
         @Override
@@ -5385,14 +5465,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         final $abstractvectortype$ broadcastBits(long bits) {
             return ($abstractvectortype$)
                 VectorSupport.fromBitsCoerced(
-                    vectorType, $type$.class, laneCount,
+                    vectorType, $elemtype$.class, laneCount,
                     bits, MODE_BROADCAST, this,
                     (bits_, s_) -> s_.rvOp(i -> bits_));
         }
 
         /*package-private*/
         @ForceInline
-        {#if[long]?public }final $abstractvectortype$ broadcast($type$ e) {
+        {#if[long]?public }final $abstractvectortype$ broadcast($elemtype$ e) {
             return broadcastBits(toBits(e));
         }
 
@@ -5413,8 +5493,8 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
             return value;
 #else[long]
             // Do the conversion, and then test it for failure.
-            $type$ e = ($type$) value;
-            if ((long) e != value) {
+            $elemtype$ e = {#if[FP16]?Float16.valueOf(value):($elemtype$) value};
+            if ({#if[FP16]?e.longValue():(long) e} != value) {
                 throw badElementBits(value, e);
             }
             return toBits(e);
@@ -5423,11 +5503,18 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
         /*package-private*/
         @ForceInline
-        static long toIntegralChecked($type$ e, boolean convertToInt) {
+        static long toIntegralChecked($elemtype$ e, boolean convertToInt) {
+#if[FP16]
+            long value = convertToInt ? e.intValue() : e.longValue();
+            if (value != e.longValue()) {
+                throw badArrayBits(e, convertToInt, value);
+            }
+#else[FP16]
             long value = convertToInt ? (int) e : (long) e;
-            if (($type$) value != e) {
+            if (($elemtype$) value != e) {
                 throw badArrayBits(e, convertToInt, value);
             }
+#end[FP16]
             return value;
         }
 
@@ -5436,14 +5523,22 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         @ForceInline
         final $abstractvectortype$ fromIntValues(int[] values) {
             VectorIntrinsics.requireLength(values.length, laneCount);
-            $type$[] va = new $type$[laneCount()];
+            $elemtype$[] va = new $elemtype$[laneCount()];
             for (int i = 0; i < va.length; i++) {
                 int lv = values[i];
-                $type$ v = ($type$) lv;
+#if[FP16]
+                $elemtype$ v = Float16.valueOf(lv);
+                va[i] = v;
+                if ( v.intValue() != lv) {
+                    throw badElementBits(lv, v);
+                }
+#else[FP16]
+                $elemtype$ v = ($elemtype$) lv;
                 va[i] = v;
                 if ((int)v != lv) {
                     throw badElementBits(lv, v);
                 }
+#end[FP16]
             }
             return dummyVector().fromArray0(va, 0);
         }
@@ -5456,7 +5551,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
             // User entry point
             // Defer only to the equivalent method on the vector class, using the same inputs
             return $abstractvectortype$
-                .fromArray(this, ($type$[]) a, offset);
+                .fromArray(this, ($elemtype$[]) a, offset);
         }
 
         @ForceInline
@@ -5478,7 +5573,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         final @Override
         @ForceInline
         $abstractvectortype$ rvOp(RVOp f) {
-            $type$[] res = new $type$[laneCount()];
+            $elemtype$[] res = new $elemtype$[laneCount()];
             for (int i = 0; i < res.length; i++) {
                 $bitstype$ bits = {#if[!long]?($bitstype$)} f.apply(i);
                 res[i] = fromBits(bits);
@@ -5487,7 +5582,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         }
 
         $Type$Vector vOp(FVOp f) {
-            $type$[] res = new $type$[laneCount()];
+            $elemtype$[] res = new $elemtype$[laneCount()];
             for (int i = 0; i < res.length; i++) {
                 res[i] = f.apply(i);
             }
@@ -5495,7 +5590,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         }
 
         $Type$Vector vOp(VectorMask<$Boxtype$> m, FVOp f) {
-            $type$[] res = new $type$[laneCount()];
+            $elemtype$[] res = new $elemtype$[laneCount()];
             boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
             for (int i = 0; i < res.length; i++) {
                 if (mbits[i]) {
@@ -5616,10 +5711,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     }
 
     /**
-     * Finds a species for an element type of {@code $type$} and shape.
+     * Finds a species for an element type of {@code $elemtype$} and shape.
      *
      * @param s the shape
-     * @return a species for an element type of {@code $type$} and shape
+     * @return a species for an element type of {@code $elemtype$} and shape
      * @throws IllegalArgumentException if no such species exists for the shape
      */
     static $Type$Species species(VectorShape s) {
@@ -5674,6 +5769,6 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * A preferred species is a species of maximal bit-size for the platform.
      */
     public static final VectorSpecies<$Boxtype$> SPECIES_PREFERRED
-        = ($Type$Species) VectorSpecies.ofPreferred($type$.class);
+        = ($Type$Species) VectorSpecies.ofPreferred($elemtype$.class);
 }
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template
index f2b36066fa7..13ff09bf7d1 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -52,19 +52,19 @@ final class $vectortype$ extends $abstractvectortype$ {
 
     static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
 
-    static final Class<$Boxtype$> ETYPE = $type$.class; // used by the JVM
+    static final Class<$Boxtype$> ETYPE = $elemtype$.class; // used by the JVM
 
-    $vectortype$($type$[] v) {
+    $vectortype$($elemtype$[] v) {
         super(v);
     }
 
     // For compatibility as $vectortype$::new,
     // stored into species.vectorFactory.
     $vectortype$(Object v) {
-        this(($type$[]) v);
+        this(($elemtype$[]) v);
     }
 
-    static final $vectortype$ ZERO = new $vectortype$(new $type$[VLENGTH]);
+    static final $vectortype$ ZERO = new $vectortype$(new $elemtype$[VLENGTH]);
     static final $vectortype$ IOTA = new $vectortype$(VSPECIES.iotaArray());
 
     static {
@@ -88,7 +88,7 @@ final class $vectortype$ extends $abstractvectortype$ {
 
     @ForceInline
     @Override
-    public final Class<$Boxtype$> elementType() { return $type$.class; }
+    public final Class<$Boxtype$> elementType() { return $elemtype$.class; }
 
     @ForceInline
     @Override
@@ -113,15 +113,15 @@ final class $vectortype$ extends $abstractvectortype$ {
     /*package-private*/
     @ForceInline
     final @Override
-    $type$[] vec() {
-        return ($type$[])getPayload();
+    $elemtype$[] vec() {
+        return ($elemtype$[])getPayload();
     }
 
     // Virtualized constructors
 
     @Override
     @ForceInline
-    public final $vectortype$ broadcast($type$ e) {
+    public final $vectortype$ broadcast($elemtype$ e) {
         return ($vectortype$) super.broadcastTemplate(e);  // specialize
     }
 
@@ -169,7 +169,7 @@ final class $vectortype$ extends $abstractvectortype$ {
     // Make a vector of the same species but the given elements:
     @ForceInline
     final @Override
-    $vectortype$ vectorFactory($type$[] vec) {
+    $vectortype$ vectorFactory($elemtype$[] vec) {
         return new $vectortype$(vec);
     }
 
@@ -238,7 +238,7 @@ final class $vectortype$ extends $abstractvectortype$ {
 
     @ForceInline
     final @Override
-    $type$ rOp($type$ v, VectorMask<$Boxtype$> m, FBinOp f) {
+    $elemtype$ rOp($elemtype$ v, VectorMask<$Boxtype$> m, FBinOp f) {
         return super.rOpTemplate(v, m, f);  // specialize
     }
 
@@ -337,13 +337,13 @@ final class $vectortype$ extends $abstractvectortype$ {
 
     @Override
     @ForceInline
-    public final $type$ reduceLanes(VectorOperators.Associative op) {
+    public final $elemtype$ reduceLanes(VectorOperators.Associative op) {
         return super.reduceLanesTemplate(op);  // specialized
     }
 
     @Override
     @ForceInline
-    public final $type$ reduceLanes(VectorOperators.Associative op,
+    public final $elemtype$ reduceLanes(VectorOperators.Associative op,
                                     VectorMask<$Boxtype$> m) {
         return super.reduceLanesTemplate(op, $masktype$.class, ($masktype$) m);  // specialized
     }
@@ -351,14 +351,16 @@ final class $vectortype$ extends $abstractvectortype$ {
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op) {
-        return (long) super.reduceLanesTemplate(op);  // specialized
+        $elemtype$ res = super.reduceLanesTemplate(op);  // specialized
+        return {#if[FP16]?res.longValue(): (long) res};
     }
 
     @Override
     @ForceInline
     public final long reduceLanesToLong(VectorOperators.Associative op,
                                         VectorMask<$Boxtype$> m) {
-        return (long) super.reduceLanesTemplate(op, $masktype$.class, ($masktype$) m);  // specialized
+        $elemtype$ res = super.reduceLanesTemplate(op, $masktype$.class, ($masktype$) m);  // specialized
+        return {#if[FP16]?res.longValue(): (long) res};
     }
 
     @ForceInline
@@ -390,7 +392,7 @@ final class $vectortype$ extends $abstractvectortype$ {
 
     @Override
     @ForceInline
-    public final $masktype$ compare(Comparison op, $type$ s) {
+    public final $masktype$ compare(Comparison op, $elemtype$ s) {
         return super.compareTemplate($masktype$.class, op, s);  // specialize
     }
 
@@ -516,7 +518,7 @@ final class $vectortype$ extends $abstractvectortype$ {
 #if[FP]
     @ForceInline
     @Override
-    public $type$ lane(int i) {
+    public $elemtype$ lane(int i) {
 #if[!Max]
         $bitstype$ bits;
         switch(i) {
@@ -552,7 +554,7 @@ final class $vectortype$ extends $abstractvectortype$ {
         }
         $bitstype$ bits = laneHelper(i);
 #end[!Max]
-        return $Type$.$bitstype$BitsTo$Fptype$(bits);
+        return $Elemtype$.$bitstype$BitsTo{#if[FP16]?Float16:$Fptype$}(bits);
     }
 
     public $bitstype$ laneHelper(int i) {
@@ -560,14 +562,14 @@ final class $vectortype$ extends $abstractvectortype$ {
                      VCLASS, ETYPE, VLENGTH,
                      this, i,
                      (vec, ix) -> {
-                     $type$[] vecarr = vec.vec();
-                     return (long)$Type$.$type$To$Bitstype$Bits(vecarr[ix]);
+                     $elemtype$[] vecarr = vec.vec();
+                     return (long)$Elemtype$.{#if[FP16]?float16:$elemtype$}To$Bitstype$Bits(vecarr[ix]);
                      });
     }
 
     @ForceInline
     @Override
-    public $vectortype$ withLane(int i, $type$ e) {
+    public $vectortype$ withLane(int i, $elemtype$ e) {
 #if[!Max]
         switch(i) {
             case 0: return withLaneHelper(0, e);
@@ -604,20 +606,20 @@ final class $vectortype$ extends $abstractvectortype$ {
 #end[!Max]
     }
 
-    public $vectortype$ withLaneHelper(int i, $type$ e) {
+    public $vectortype$ withLaneHelper(int i, $elemtype$ e) {
         return VectorSupport.insert(
                                 VCLASS, ETYPE, VLENGTH,
-                                this, i, (long)$Type$.$type$To$Bitstype$Bits(e),
+                                this, i, (long)$Elemtype$.{#if[FP16]?float16:$elemtype$}To$Bitstype$Bits(e),
                                 (v, ix, bits) -> {
-                                    $type$[] res = v.vec().clone();
-                                    res[ix] = $Type$.$bitstype$BitsTo$Type$(($bitstype$)bits);
+                                    $elemtype$[] res = v.vec().clone();
+                                    res[ix] = $Elemtype$.$bitstype$BitsTo$Elemtype$(($bitstype$)bits);
                                     return v.vectorFactory(res);
                                 });
     }
 #else[FP]
     @ForceInline
     @Override
-    public $type$ lane(int i) {
+    public $elemtype$ lane(int i) {
 #if[!Max]
         switch(i) {
             case 0: return laneHelper(0);
@@ -706,19 +708,19 @@ final class $vectortype$ extends $abstractvectortype$ {
 #end[!Max]
     }
 
-    public $type$ laneHelper(int i) {
-        return ($type$) VectorSupport.extract(
+    public $elemtype$ laneHelper(int i) {
+        return ($elemtype$) VectorSupport.extract(
                                 VCLASS, ETYPE, VLENGTH,
                                 this, i,
                                 (vec, ix) -> {
-                                    $type$[] vecarr = vec.vec();
+                                    $elemtype$[] vecarr = vec.vec();
                                     return (long)vecarr[ix];
                                 });
     }
 
     @ForceInline
     @Override
-    public $vectortype$ withLane(int i, $type$ e) {
+    public $vectortype$ withLane(int i, $elemtype$ e) {
 #if[!Max]
         switch (i) {
             case 0: return withLaneHelper(0, e);
@@ -807,13 +809,13 @@ final class $vectortype$ extends $abstractvectortype$ {
 #end[!Max]
     }
 
-    public $vectortype$ withLaneHelper(int i, $type$ e) {
+    public $vectortype$ withLaneHelper(int i, $elemtype$ e) {
         return VectorSupport.insert(
                                 VCLASS, ETYPE, VLENGTH,
                                 this, i, (long)e,
                                 (v, ix, bits) -> {
-                                    $type$[] res = v.vec().clone();
-                                    res[ix] = ($type$)bits;
+                                    $elemtype$[] res = v.vec().clone();
+                                    res[ix] = ($elemtype$)bits;
                                     return v.vectorFactory(res);
                                 });
     }
@@ -823,7 +825,7 @@ final class $vectortype$ extends $abstractvectortype$ {
 
     static final class $masktype$ extends AbstractMask<$Boxtype$> {
         static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
-        static final Class<$Boxtype$> ETYPE = $type$.class; // used by the JVM
+        static final Class<$Boxtype$> ETYPE = $elemtype$.class; // used by the JVM
 
         $masktype$(boolean[] bits) {
             this(bits, 0);
@@ -925,7 +927,7 @@ final class $vectortype$ extends $abstractvectortype$ {
         /*package-private*/
         $masktype$ indexPartiallyInUpperRange(long offset, long limit) {
             return ($masktype$) VectorSupport.indexPartiallyInUpperRange(
-                $masktype$.class, $type$.class, VLENGTH, offset, limit,
+                $masktype$.class, ETYPE, VLENGTH, offset, limit,
                 (o, l) -> ($masktype$) TRUE_MASK.indexPartiallyInRange(o, l));
         }
 
@@ -942,7 +944,11 @@ final class $vectortype$ extends $abstractvectortype$ {
         public $masktype$ compress() {
             return ($masktype$)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
                 $vectortype$.class, $masktype$.class, ETYPE, VLENGTH, null, this,
+#if[FP16]
+                (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, Float.floatToFloat16(m1.trueCount())));
+#else[FP16]
                 (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
+#end[FP16]
         }
 
 
@@ -953,7 +959,7 @@ final class $vectortype$ extends $abstractvectortype$ {
         public $masktype$ and(VectorMask<$Boxtype$> mask) {
             Objects.requireNonNull(mask);
             $masktype$ m = ($masktype$)mask;
-            return VectorSupport.binaryOp(VECTOR_OP_AND, $masktype$.class, null, $bitstype$.class, VLENGTH,
+            return VectorSupport.binaryOp(VECTOR_OP_AND, $masktype$.class, null, $maskbitstype$.class, VLENGTH,
                                           this, m, null,
                                           (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
         }
@@ -963,7 +969,7 @@ final class $vectortype$ extends $abstractvectortype$ {
         public $masktype$ or(VectorMask<$Boxtype$> mask) {
             Objects.requireNonNull(mask);
             $masktype$ m = ($masktype$)mask;
-            return VectorSupport.binaryOp(VECTOR_OP_OR, $masktype$.class, null, $bitstype$.class, VLENGTH,
+            return VectorSupport.binaryOp(VECTOR_OP_OR, $masktype$.class, null, $maskbitstype$.class, VLENGTH,
                                           this, m, null,
                                           (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
         }
@@ -973,7 +979,7 @@ final class $vectortype$ extends $abstractvectortype$ {
         public $masktype$ xor(VectorMask<$Boxtype$> mask) {
             Objects.requireNonNull(mask);
             $masktype$ m = ($masktype$)mask;
-            return VectorSupport.binaryOp(VECTOR_OP_XOR, $masktype$.class, null, $bitstype$.class, VLENGTH,
+            return VectorSupport.binaryOp(VECTOR_OP_XOR, $masktype$.class, null, $maskbitstype$.class, VLENGTH,
                                           this, m, null,
                                           (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
         }
@@ -983,21 +989,21 @@ final class $vectortype$ extends $abstractvectortype$ {
         @Override
         @ForceInline
         public int trueCount() {
-            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, $masktype$.class, $bitstype$.class, VLENGTH, this,
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, $masktype$.class, $maskbitstype$.class, VLENGTH, this,
                                                       (m) -> trueCountHelper(m.getBits()));
         }
 
         @Override
         @ForceInline
         public int firstTrue() {
-            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, $masktype$.class, $bitstype$.class, VLENGTH, this,
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, $masktype$.class, $maskbitstype$.class, VLENGTH, this,
                                                       (m) -> firstTrueHelper(m.getBits()));
         }
 
         @Override
         @ForceInline
         public int lastTrue() {
-            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, $masktype$.class, $bitstype$.class, VLENGTH, this,
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, $masktype$.class, $maskbitstype$.class, VLENGTH, this,
                                                       (m) -> lastTrueHelper(m.getBits()));
         }
 
@@ -1007,7 +1013,7 @@ final class $vectortype$ extends $abstractvectortype$ {
             if (length() > Long.SIZE) {
                 throw new UnsupportedOperationException("too many lanes for one long");
             }
-            return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, $masktype$.class, $bitstype$.class, VLENGTH, this,
+            return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, $masktype$.class, $maskbitstype$.class, VLENGTH, this,
                                                       (m) -> toLongHelper(m.getBits()));
         }
 
@@ -1017,7 +1023,7 @@ final class $vectortype$ extends $abstractvectortype$ {
         @ForceInline
         public boolean laneIsSet(int i) {
             Objects.checkIndex(i, length());
-            return VectorSupport.extract($masktype$.class, $type$.class, VLENGTH,
+            return VectorSupport.extract($masktype$.class, $elemtype$.class, VLENGTH,
                                          this, i, (m, idx) -> (m.getBits()[idx] ? 1L : 0L)) == 1L;
         }
 
@@ -1026,7 +1032,7 @@ final class $vectortype$ extends $abstractvectortype$ {
         @Override
         @ForceInline
         public boolean anyTrue() {
-            return VectorSupport.test(BT_ne, $masktype$.class, $bitstype$.class, VLENGTH,
+            return VectorSupport.test(BT_ne, $masktype$.class, $maskbitstype$.class, VLENGTH,
                                          this, vspecies().maskAll(true),
                                          (m, __) -> anyTrueHelper((($masktype$)m).getBits()));
         }
@@ -1034,7 +1040,7 @@ final class $vectortype$ extends $abstractvectortype$ {
         @Override
         @ForceInline
         public boolean allTrue() {
-            return VectorSupport.test(BT_overflow, $masktype$.class, $bitstype$.class, VLENGTH,
+            return VectorSupport.test(BT_overflow, $masktype$.class, $maskbitstype$.class, VLENGTH,
                                          this, vspecies().maskAll(true),
                                          (m, __) -> allTrueHelper((($masktype$)m).getBits()));
         }
@@ -1042,7 +1048,7 @@ final class $vectortype$ extends $abstractvectortype$ {
         @ForceInline
         /*package-private*/
         static $masktype$ maskAll(boolean bit) {
-            return VectorSupport.fromBitsCoerced($masktype$.class, $bitstype$.class, VLENGTH,
+            return VectorSupport.fromBitsCoerced($masktype$.class, $maskbitstype$.class, VLENGTH,
                                                  (bit ? -1 : 0), MODE_BROADCAST, null,
                                                  (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
         }
@@ -1070,7 +1076,7 @@ final class $vectortype$ extends $abstractvectortype$ {
 
     static final class $shuffletype$ extends AbstractShuffle<$Boxtype$> {
         static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
-        static final Class<$Boxtype$> ETYPE = $type$.class; // used by the JVM
+        static final Class<$Boxtype$> ETYPE = $elemtype$.class; // used by the JVM
 
         $shuffletype$(byte[] reorder) {
             super(VLENGTH, reorder);
@@ -1140,14 +1146,14 @@ final class $vectortype$ extends $abstractvectortype$ {
     @ForceInline
     @Override
     final
-    $abstractvectortype$ fromArray0($type$[] a, int offset) {
+    $abstractvectortype$ fromArray0($elemtype$[] a, int offset) {
         return super.fromArray0Template(a, offset);  // specialize
     }
 
     @ForceInline
     @Override
     final
-    $abstractvectortype$ fromArray0($type$[] a, int offset, VectorMask<$Boxtype$> m, int offsetInRange) {
+    $abstractvectortype$ fromArray0($elemtype$[] a, int offset, VectorMask<$Boxtype$> m, int offsetInRange) {
         return super.fromArray0Template($masktype$.class, a, offset, ($masktype$) m, offsetInRange);  // specialize
     }
 
@@ -1155,7 +1161,7 @@ final class $vectortype$ extends $abstractvectortype$ {
     @ForceInline
     @Override
     final
-    $abstractvectortype$ fromArray0($type$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) {
+    $abstractvectortype$ fromArray0($elemtype$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) {
         return super.fromArray0Template($masktype$.class, a, offset, indexMap, mapOffset, ($masktype$) m);
     }
 #end[!byteOrShort]
@@ -1209,14 +1215,14 @@ final class $vectortype$ extends $abstractvectortype$ {
     @ForceInline
     @Override
     final
-    void intoArray0($type$[] a, int offset) {
+    void intoArray0($elemtype$[] a, int offset) {
         super.intoArray0Template(a, offset);  // specialize
     }
 
     @ForceInline
     @Override
     final
-    void intoArray0($type$[] a, int offset, VectorMask<$Boxtype$> m) {
+    void intoArray0($elemtype$[] a, int offset, VectorMask<$Boxtype$> m) {
         super.intoArray0Template($masktype$.class, a, offset, ($masktype$) m);
     }
 
@@ -1224,7 +1230,7 @@ final class $vectortype$ extends $abstractvectortype$ {
     @ForceInline
     @Override
     final
-    void intoArray0($type$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) {
+    void intoArray0($elemtype$[] a, int offset, int[] indexMap, int mapOffset, VectorMask<$Boxtype$> m) {
         super.intoArray0Template($masktype$.class, a, offset, indexMap, mapOffset, ($masktype$) m);
     }
 #end[!byteOrShort]
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh
index 6841a47c757..19aad5b3b5a 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh
@@ -53,10 +53,12 @@ typeprefix=
 globalArgs=""
 #globalArgs="$globalArgs -KextraOverrides"
 
-for type in byte short int long float double
+for type in byte short int long float double Halffloat
 do
+
   Type="$(tr '[:lower:]' '[:upper:]' <<< ${type:0:1})${type:1}"
   TYPE="$(tr '[:lower:]' '[:upper:]' <<< ${type})"
+
   args=$globalArgs
   args="$args -K$type -Dtype=$type -DType=$Type -DTYPE=$TYPE"
 
@@ -66,25 +68,33 @@ do
   kind=BITWISE
 
   bitstype=$type
+  maskbitstype=$type
   Bitstype=$Type
   Boxbitstype=$Boxtype
 
   fptype=$type
   Fptype=$Type
   Boxfptype=$Boxtype
+  elemtype=$type
+  Elemtype=$Type
+  FPtype=$type
+
 
-  case $type in
-    byte)
+  case $Type in
+    Byte)
       Wideboxtype=Integer
       sizeInBytes=1
       args="$args -KbyteOrShort"
       ;;
-    short)
+    Short)
+      fptype=float16
+      Fptype=Float16
+      Boxfptype=Halffloat
       Wideboxtype=Integer
       sizeInBytes=2
       args="$args -KbyteOrShort"
       ;;
-    int)
+    Int)
       Boxtype=Integer
       Wideboxtype=Integer
       Boxbitstype=Integer
@@ -94,35 +104,55 @@ do
       sizeInBytes=4
       args="$args -KintOrLong -KintOrFP -KintOrFloat"
       ;;
-    long)
+    Long)
       fptype=double
       Fptype=Double
       Boxfptype=Double
       sizeInBytes=8
       args="$args -KintOrLong -KlongOrDouble"
       ;;
-    float)
+    Float)
       kind=FP
       bitstype=int
+      maskbitstype=int
       Bitstype=Int
       Boxbitstype=Integer
       sizeInBytes=4
       args="$args -KintOrFP -KintOrFloat"
+      FPtype=FP32
       ;;
-    double)
+    Double)
       kind=FP
       bitstype=long
+      maskbitstype=long
       Bitstype=Long
       Boxbitstype=Long
       sizeInBytes=8
       args="$args -KintOrFP -KlongOrDouble"
+      FPtype=FP64
+      ;;
+    Halffloat)
+      kind=FP
+      bitstype=short
+      maskbitstype=short
+      Bitstype=Short
+      Boxbitstype=Short
+      sizeInBytes=2
+      Boxtype=Float16
+      elemtype=Float16
+      Elemtype=Float16
+      FPtype=FP16
+      fptype=float16
+      Fptype=Float16
+      args="$args -KbyteOrShort -KshortOrFP -KshortOrHalffloat"
       ;;
   esac
 
-  args="$args -K$kind -DBoxtype=$Boxtype -DWideboxtype=$Wideboxtype"
-  args="$args -Dbitstype=$bitstype -DBitstype=$Bitstype -DBoxbitstype=$Boxbitstype"
+  args="$args -K$FPtype -K$kind -DBoxtype=$Boxtype -DWideboxtype=$Wideboxtype"
+  args="$args -Dbitstype=$bitstype -Dmaskbitstype=$maskbitstype -DBitstype=$Bitstype -DBoxbitstype=$Boxbitstype"
   args="$args -Dfptype=$fptype -DFptype=$Fptype -DBoxfptype=$Boxfptype"
   args="$args -DsizeInBytes=$sizeInBytes"
+  args="$args -Delemtype=$elemtype -DElemtype=$Elemtype"
 
   abstractvectortype=${typeprefix}${Type}Vector
   abstractbitsvectortype=${typeprefix}${Bitstype}Vector
diff --git a/test/jdk/jdk/incubator/vector/Short128VectorTests.java b/test/jdk/jdk/incubator/vector/Short128VectorTests.java
index e40a40686c9..8ffeaa22a88 100644
--- a/test/jdk/jdk/incubator/vector/Short128VectorTests.java
+++ b/test/jdk/jdk/incubator/vector/Short128VectorTests.java
@@ -1382,9 +1382,14 @@ void viewAsIntegeralLanesTest() {
         Assert.assertEquals(asIntegral.species(), SPECIES);
     }
 
-    @Test(expectedExceptions = UnsupportedOperationException.class)
+    @Test
     void viewAsFloatingLanesTest() {
-        SPECIES.zero().viewAsFloatingLanes();
+        Vector<?> asFloating = SPECIES.zero().viewAsFloatingLanes();
+        VectorSpecies<?> asFloatingSpecies = asFloating.species();
+        Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType());
+        Assert.assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape());
+        Assert.assertEquals(asFloatingSpecies.length(), SPECIES.length());
+        Assert.assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
     }
 
     @Test
diff --git a/test/jdk/jdk/incubator/vector/Short256VectorTests.java b/test/jdk/jdk/incubator/vector/Short256VectorTests.java
index 02138e3e8aa..59248b01a3c 100644
--- a/test/jdk/jdk/incubator/vector/Short256VectorTests.java
+++ b/test/jdk/jdk/incubator/vector/Short256VectorTests.java
@@ -1382,9 +1382,14 @@ void viewAsIntegeralLanesTest() {
         Assert.assertEquals(asIntegral.species(), SPECIES);
     }
 
-    @Test(expectedExceptions = UnsupportedOperationException.class)
+    @Test
     void viewAsFloatingLanesTest() {
-        SPECIES.zero().viewAsFloatingLanes();
+        Vector<?> asFloating = SPECIES.zero().viewAsFloatingLanes();
+        VectorSpecies<?> asFloatingSpecies = asFloating.species();
+        Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType());
+        Assert.assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape());
+        Assert.assertEquals(asFloatingSpecies.length(), SPECIES.length());
+        Assert.assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
     }
 
     @Test
diff --git a/test/jdk/jdk/incubator/vector/Short512VectorTests.java b/test/jdk/jdk/incubator/vector/Short512VectorTests.java
index 9577f22f58c..6524a0be416 100644
--- a/test/jdk/jdk/incubator/vector/Short512VectorTests.java
+++ b/test/jdk/jdk/incubator/vector/Short512VectorTests.java
@@ -1382,9 +1382,14 @@ void viewAsIntegeralLanesTest() {
         Assert.assertEquals(asIntegral.species(), SPECIES);
     }
 
-    @Test(expectedExceptions = UnsupportedOperationException.class)
+    @Test
     void viewAsFloatingLanesTest() {
-        SPECIES.zero().viewAsFloatingLanes();
+        Vector<?> asFloating = SPECIES.zero().viewAsFloatingLanes();
+        VectorSpecies<?> asFloatingSpecies = asFloating.species();
+        Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType());
+        Assert.assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape());
+        Assert.assertEquals(asFloatingSpecies.length(), SPECIES.length());
+        Assert.assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
     }
 
     @Test
diff --git a/test/jdk/jdk/incubator/vector/Short64VectorTests.java b/test/jdk/jdk/incubator/vector/Short64VectorTests.java
index 71b3c6046b4..87853b4c182 100644
--- a/test/jdk/jdk/incubator/vector/Short64VectorTests.java
+++ b/test/jdk/jdk/incubator/vector/Short64VectorTests.java
@@ -1382,9 +1382,14 @@ void viewAsIntegeralLanesTest() {
         Assert.assertEquals(asIntegral.species(), SPECIES);
     }
 
-    @Test(expectedExceptions = UnsupportedOperationException.class)
+    @Test
     void viewAsFloatingLanesTest() {
-        SPECIES.zero().viewAsFloatingLanes();
+        Vector<?> asFloating = SPECIES.zero().viewAsFloatingLanes();
+        VectorSpecies<?> asFloatingSpecies = asFloating.species();
+        Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType());
+        Assert.assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape());
+        Assert.assertEquals(asFloatingSpecies.length(), SPECIES.length());
+        Assert.assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
     }
 
     @Test
diff --git a/test/jdk/jdk/incubator/vector/ShortMaxVectorTests.java b/test/jdk/jdk/incubator/vector/ShortMaxVectorTests.java
index 4a6adbf2c8e..b10f951913e 100644
--- a/test/jdk/jdk/incubator/vector/ShortMaxVectorTests.java
+++ b/test/jdk/jdk/incubator/vector/ShortMaxVectorTests.java
@@ -1387,9 +1387,14 @@ void viewAsIntegeralLanesTest() {
         Assert.assertEquals(asIntegral.species(), SPECIES);
     }
 
-    @Test(expectedExceptions = UnsupportedOperationException.class)
+    @Test
     void viewAsFloatingLanesTest() {
-        SPECIES.zero().viewAsFloatingLanes();
+        Vector<?> asFloating = SPECIES.zero().viewAsFloatingLanes();
+        VectorSpecies<?> asFloatingSpecies = asFloating.species();
+        Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType());
+        Assert.assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape());
+        Assert.assertEquals(asFloatingSpecies.length(), SPECIES.length());
+        Assert.assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
     }
 
     @Test
diff --git a/test/jdk/jdk/incubator/vector/templates/Unit-header.template b/test/jdk/jdk/incubator/vector/templates/Unit-header.template
index 4d3795ea3d1..016a570f3f1 100644
--- a/test/jdk/jdk/incubator/vector/templates/Unit-header.template
+++ b/test/jdk/jdk/incubator/vector/templates/Unit-header.template
@@ -1750,13 +1750,13 @@ relativeError));
         Assert.assertEquals(asFloating.species(), SPECIES);
     }
 #else[FP]
-#if[byteOrShort]
+#if[byte]
 
     @Test(expectedExceptions = UnsupportedOperationException.class)
     void viewAsFloatingLanesTest() {
         SPECIES.zero().viewAsFloatingLanes();
     }
-#else[byteOrShort]
+#else[byte]
 
     @Test
     void viewAsFloatingLanesTest() {
@@ -1767,7 +1767,7 @@ relativeError));
         Assert.assertEquals(asFloatingSpecies.length(), SPECIES.length());
         Assert.assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
     }
-#end[byteOrShort]
+#end[byte]
 #end[FP]
 #if[BITWISE]