diff --git a/include/cudaq/Optimizer/CodeGen/Pipelines.h b/include/cudaq/Optimizer/CodeGen/Pipelines.h
index afb39831ba..08ef770c1b 100644
--- a/include/cudaq/Optimizer/CodeGen/Pipelines.h
+++ b/include/cudaq/Optimizer/CodeGen/Pipelines.h
@@ -30,35 +30,18 @@ void commonPipelineConvertToQIR(mlir::PassManager &pm,
                                 mlir::StringRef codeGenFor = "qir",
                                 mlir::StringRef passConfigAs = "qir");
 
-/// \deprecated{Only for Python, since it can't use the new QIR codegen.}
-void commonPipelineConvertToQIR_PythonWorkaround(
-    mlir::PassManager &pm, const std::optional<mlir::StringRef> &convertTo);
-
 /// \brief Pipeline builder to convert Quake to QIR.
 /// Does not specify a particular QIR profile.
 inline void addPipelineConvertToQIR(mlir::PassManager &pm) {
   commonPipelineConvertToQIR(pm);
 }
 
-/// \deprecated{Only for Python, since it can't use the new QIR codegen.}
-inline void addPipelineConvertToQIR_PythonWorkaround(mlir::PassManager &pm) {
-  commonPipelineConvertToQIR_PythonWorkaround(pm, std::nullopt);
-}
-
 /// \brief Pipeline builder to convert Quake to QIR.
 /// Specifies a particular QIR profile in \p convertTo.
 /// \p pm Pass manager to append passes to
 /// \p convertTo name of QIR profile (e.g., `qir-base`, `qir-adaptive`, ...)
 void addPipelineConvertToQIR(mlir::PassManager &pm, mlir::StringRef convertTo);
 
-/// \deprecated{Only for Python, since it can't use the new QIR codegen.}
-inline void
-addPipelineConvertToQIR_PythonWorkaround(mlir::PassManager &pm,
-                                         mlir::StringRef convertTo) {
-  commonPipelineConvertToQIR_PythonWorkaround(pm, convertTo);
-  addQIRProfilePipeline(pm, convertTo);
-}
-
 void addLowerToCCPipeline(mlir::OpPassManager &pm);
 
 void addPipelineTranslateToOpenQASM(mlir::PassManager &pm);
diff --git a/lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp b/lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp
index f267655cf7..329c9520ec 100644
--- a/lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp
+++ b/lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp
@@ -1091,7 +1091,7 @@ struct QuantumGatePattern : public OpConversionPattern<OP> {
 
     // Process the controls, sorting them by type.
     for (auto pr : llvm::zip(op.getControls(), adaptor.getControls())) {
-      if (isa<quake::VeqType>(std::get<0>(pr).getType())) {
+      if (isaVeqArgument(std::get<0>(pr).getType())) {
         numArrayCtrls++;
         auto sizeCall = rewriter.create<func::CallOp>(
             loc, i64Ty, cudaq::opt::QIRArrayGetSize,
@@ -1154,6 +1154,18 @@ struct QuantumGatePattern : public OpConversionPattern<OP> {
     return forwardOrEraseOp();
   }
 
+  static bool isaVeqArgument(Type ty) {
+    // TODO: Need a way to identify arrays when using the opaque pointer
+    // variant. (In Python, the arguments may already be converted.)
+    auto alreadyConverted = [](Type ty) {
+      if (auto ptrTy = dyn_cast<cudaq::cc::PointerType>(ty))
+        if (auto strTy = dyn_cast<LLVM::LLVMStructType>(ptrTy.getElementType()))
+          return strTy.isIdentified() && strTy.getName() == "Array";
+      return false;
+    };
+    return isa<quake::VeqType>(ty) || alreadyConverted(ty);
+  }
+
   static bool conformsToIntendedCall(std::size_t numControls, Value ctrl, OP op,
                                      StringRef qirFunctionName) {
     if (numControls != 1)
@@ -1818,9 +1830,7 @@ struct QuakeToQIRAPIPrepPass
   }
 
   void guaranteeMzIsLabeled(quake::MzOp mz, int &counter, OpBuilder &builder) {
-    if (mz.getRegisterNameAttr() &&
-        /* FIXME: issue 2538: the name should never be empty. */
-        !mz.getRegisterNameAttr().getValue().empty()) {
+    if (mz.getRegisterNameAttr()) {
       mz->setAttr(cudaq::opt::MzAssignedNameAttrName, builder.getUnitAttr());
       return;
     }
diff --git a/lib/Optimizer/CodeGen/Pipelines.cpp b/lib/Optimizer/CodeGen/Pipelines.cpp
index 393dad5c65..596d7adcf3 100644
--- a/lib/Optimizer/CodeGen/Pipelines.cpp
+++ b/lib/Optimizer/CodeGen/Pipelines.cpp
@@ -51,37 +51,6 @@ void cudaq::opt::commonPipelineConvertToQIR(PassManager &pm,
   pm.addPass(createCCToLLVM());
 }
 
-void cudaq::opt::commonPipelineConvertToQIR_PythonWorkaround(
-    PassManager &pm, const std::optional<StringRef> &convertTo) {
-  pm.addNestedPass<func::FuncOp>(createApplyControlNegations());
-  addAggressiveEarlyInlining(pm);
-  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
-  pm.addNestedPass<func::FuncOp>(createUnwindLoweringPass());
-  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
-  pm.addPass(createApplyOpSpecializationPass());
-  pm.addNestedPass<func::FuncOp>(createExpandMeasurementsPass());
-  pm.addNestedPass<func::FuncOp>(createClassicalMemToReg());
-  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
-  pm.addNestedPass<func::FuncOp>(createCSEPass());
-  pm.addNestedPass<func::FuncOp>(createQuakeAddDeallocs());
-  pm.addNestedPass<func::FuncOp>(createQuakeAddMetadata());
-  pm.addNestedPass<func::FuncOp>(createLoopNormalize());
-  LoopUnrollOptions luo;
-  luo.allowBreak = convertTo && (*convertTo == "qir-adaptive");
-  pm.addNestedPass<func::FuncOp>(createLoopUnroll(luo));
-  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
-  pm.addNestedPass<func::FuncOp>(createCSEPass());
-  pm.addNestedPass<func::FuncOp>(createLowerToCFGPass());
-  pm.addNestedPass<func::FuncOp>(createCombineQuantumAllocations());
-  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
-  pm.addNestedPass<func::FuncOp>(createCSEPass());
-  if (convertTo && (*convertTo == "qir-base"))
-    pm.addNestedPass<func::FuncOp>(createDelayMeasurementsPass());
-  pm.addPass(createConvertMathToFuncs());
-  pm.addPass(createSymbolDCEPass());
-  pm.addPass(createConvertToQIR());
-}
-
 void cudaq::opt::addPipelineTranslateToOpenQASM(PassManager &pm) {
   pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
   pm.addNestedPass<func::FuncOp>(createCSEPass());
diff --git a/lib/Optimizer/Dialect/Quake/QuakeOps.cpp b/lib/Optimizer/Dialect/Quake/QuakeOps.cpp
index ad35594fbb..b5697b789b 100644
--- a/lib/Optimizer/Dialect/Quake/QuakeOps.cpp
+++ b/lib/Optimizer/Dialect/Quake/QuakeOps.cpp
@@ -517,38 +517,41 @@ void quake::WrapOp::getCanonicalizationPatterns(RewritePatternSet &patterns,
 //===----------------------------------------------------------------------===//
 
 // Common verification for measurement operations.
-static LogicalResult verifyMeasurements(Operation *const op,
-                                        TypeRange targetsType,
-                                        const Type bitsType) {
+template <typename MEAS>
+LogicalResult verifyMeasurements(MEAS op, TypeRange targetsType,
+                                 const Type bitsType) {
   if (failed(verifyWireResultsAreLinear(op)))
     return failure();
   bool mustBeStdvec =
       targetsType.size() > 1 ||
       (targetsType.size() == 1 && isa<quake::VeqType>(targetsType[0]));
   if (mustBeStdvec) {
-    if (!isa<cudaq::cc::StdvecType>(op->getResult(0).getType()))
-      return op->emitOpError("must return `!cc.stdvec<!quake.measure>`, when "
-                             "measuring a qreg, a series of qubits, or both");
+    if (!isa<cudaq::cc::StdvecType>(op.getMeasOut().getType()))
+      return op.emitOpError("must return `!cc.stdvec<!quake.measure>`, when "
+                            "measuring a qreg, a series of qubits, or both");
   } else {
-    if (!isa<quake::MeasureType>(op->getResult(0).getType()))
+    if (!isa<quake::MeasureType>(op.getMeasOut().getType()))
       return op->emitOpError(
           "must return `!quake.measure` when measuring exactly one qubit");
   }
+  if (op.getRegisterName())
+    if (op.getRegisterName()->empty())
+      return op->emitError("quake measurement name cannot be empty.");
   return success();
 }
 
 LogicalResult quake::MxOp::verify() {
-  return verifyMeasurements(getOperation(), getTargets().getType(),
+  return verifyMeasurements(*this, getTargets().getType(),
                             getMeasOut().getType());
 }
 
 LogicalResult quake::MyOp::verify() {
-  return verifyMeasurements(getOperation(), getTargets().getType(),
+  return verifyMeasurements(*this, getTargets().getType(),
                             getMeasOut().getType());
 }
 
 LogicalResult quake::MzOp::verify() {
-  return verifyMeasurements(getOperation(), getTargets().getType(),
+  return verifyMeasurements(*this, getTargets().getType(),
                             getMeasOut().getType());
 }
 
diff --git a/lib/Optimizer/Transforms/GlobalizeArrayValues.cpp b/lib/Optimizer/Transforms/GlobalizeArrayValues.cpp
index b9728ed067..00ccc8c07f 100644
--- a/lib/Optimizer/Transforms/GlobalizeArrayValues.cpp
+++ b/lib/Optimizer/Transforms/GlobalizeArrayValues.cpp
@@ -87,6 +87,23 @@ convertArrayAttrToGlobalConstant(MLIRContext *ctx, Location loc,
 }
 
 namespace {
+
+// This pattern replaces a cc.const_array with a global constant. It can
+// recognize a couple of usage patterns and will generate efficient IR in those
+// cases.
+//
+// Pattern 1: The entire constant array is stored to a stack variable(s). Here
+// we can eliminate the stack allocation and use the global constant.
+//
+// Pattern 2: Individual elements at dynamic offsets are extracted from the
+// constant array and used. This can be replaced with a compute pointer
+// operation using the global constant and a load of the element at the computed
+// offset.
+//
+// Default: If the usage is not recognized, the constant array value is replaced
+// with a load of the entire global variable. In this case, LLVM's optimizations
+// are counted on to help demote the (large?) sequence value to primitive memory
+// address arithmetic.
 struct ConstantArrayPattern
     : public OpRewritePattern<cudaq::cc::ConstantArrayOp> {
   explicit ConstantArrayPattern(MLIRContext *ctx, ModuleOp module,
@@ -95,21 +112,31 @@ struct ConstantArrayPattern
 
   LogicalResult matchAndRewrite(cudaq::cc::ConstantArrayOp conarr,
                                 PatternRewriter &rewriter) const override {
+    auto func = conarr->getParentOfType<func::FuncOp>();
+    if (!func)
+      return failure();
+
     SmallVector<cudaq::cc::AllocaOp> allocas;
     SmallVector<cudaq::cc::StoreOp> stores;
+    SmallVector<cudaq::cc::ExtractValueOp> extracts;
+    bool loadAsValue = false;
     for (auto *usr : conarr->getUsers()) {
       auto store = dyn_cast<cudaq::cc::StoreOp>(usr);
-      if (!store)
-        return failure();
-      auto alloca = store.getPtrvalue().getDefiningOp<cudaq::cc::AllocaOp>();
-      if (!alloca)
-        return failure();
-      stores.push_back(store);
-      allocas.push_back(alloca);
+      auto extract = dyn_cast<cudaq::cc::ExtractValueOp>(usr);
+      if (store) {
+        auto alloca = store.getPtrvalue().getDefiningOp<cudaq::cc::AllocaOp>();
+        if (!alloca)
+          continue;
+        stores.push_back(store);
+        allocas.push_back(alloca);
+        continue;
+      }
+      if (extract) {
+        extracts.push_back(extract);
+        continue;
+      }
+      loadAsValue = true;
     }
-    auto func = conarr->getParentOfType<func::FuncOp>();
-    if (!func)
-      return failure();
     std::string globalName =
         func.getName().str() + ".rodata_" + std::to_string(counter++);
     auto *ctx = rewriter.getContext();
@@ -118,12 +145,39 @@ struct ConstantArrayPattern
     if (failed(convertArrayAttrToGlobalConstant(ctx, conarr.getLoc(), valueAttr,
                                                 module, globalName, eleTy)))
       return failure();
-    for (auto alloca : allocas)
-      rewriter.replaceOpWithNewOp<cudaq::cc::AddressOfOp>(
-          alloca, alloca.getType(), globalName);
-    for (auto store : stores)
-      rewriter.eraseOp(store);
-    rewriter.eraseOp(conarr);
+    auto loc = conarr.getLoc();
+    if (!extracts.empty()) {
+      auto base = rewriter.create<cudaq::cc::AddressOfOp>(
+          loc, cudaq::cc::PointerType::get(conarr.getType()), globalName);
+      auto elePtrTy = cudaq::cc::PointerType::get(eleTy);
+      for (auto extract : extracts) {
+        SmallVector<cudaq::cc::ComputePtrArg> args;
+        unsigned i = 0;
+        for (auto arg : extract.getRawConstantIndices()) {
+          if (arg == cudaq::cc::ExtractValueOp::getDynamicIndexValue())
+            args.push_back(extract.getDynamicIndices()[i++]);
+          else
+            args.push_back(arg);
+        }
+        OpBuilder::InsertionGuard guard(rewriter);
+        rewriter.setInsertionPoint(extract);
+        auto addrVal =
+            rewriter.create<cudaq::cc::ComputePtrOp>(loc, elePtrTy, base, args);
+        rewriter.replaceOpWithNewOp<cudaq::cc::LoadOp>(extract, addrVal);
+      }
+    }
+    if (!stores.empty()) {
+      for (auto alloca : allocas)
+        rewriter.replaceOpWithNewOp<cudaq::cc::AddressOfOp>(
+            alloca, alloca.getType(), globalName);
+      for (auto store : stores)
+        rewriter.eraseOp(store);
+    }
+    if (loadAsValue) {
+      auto base = rewriter.create<cudaq::cc::AddressOfOp>(
+          loc, cudaq::cc::PointerType::get(conarr.getType()), globalName);
+      rewriter.replaceOpWithNewOp<cudaq::cc::LoadOp>(conarr, base);
+    }
     return success();
   }
 
diff --git a/python/cudaq/kernel/ast_bridge.py b/python/cudaq/kernel/ast_bridge.py
index 06f77857c7..3463b4a1e0 100644
--- a/python/cudaq/kernel/ast_bridge.py
+++ b/python/cudaq/kernel/ast_bridge.py
@@ -1749,9 +1749,11 @@ def bodyBuilder(iterVal):
                     self.ctx) if len(qubits) == 1 and quake.RefType.isinstance(
                         qubits[0].type) else cc.StdvecType.get(
                             self.ctx, quake.MeasureType.get(self.ctx))
-                measureResult = opCtor(measTy, [],
-                                       qubits,
-                                       registerName=registerName).result
+                label = registerName
+                if not label:
+                    label = None
+                measureResult = opCtor(measTy, [], qubits,
+                                       registerName=label).result
                 if pushResultToStack:
                     self.pushValue(
                         quake.DiscriminateOp(resTy, measureResult).result)
@@ -3152,6 +3154,73 @@ def bodyBuilder(iterVar):
                                             isDecrementing=isDecrementing)
                 return
 
+        # We can simplify `for i,j in enumerate(L)` MLIR code immensely
+        # by just building a for loop over the iterable object L and using
+        # the index into that iterable and the element.
+        if isinstance(node.iter, ast.Call):
+            if node.iter.func.id == 'enumerate':
+                [self.visit(arg) for arg in node.iter.args]
+                if len(self.valueStack) == 2:
+                    iterable = self.popValue()
+                    self.popValue()
+                else:
+                    assert len(self.valueStack) == 1
+                    iterable = self.popValue()
+                iterable = self.ifPointerThenLoad(iterable)
+                totalSize = None
+                extractFunctor = None
+                varNames = []
+                for elt in node.target.elts:
+                    varNames.append(elt.id)
+
+                beEfficient = False
+                if quake.VeqType.isinstance(iterable.type):
+                    totalSize = quake.VeqSizeOp(self.getIntegerType(),
+                                                iterable).result
+
+                    def functor(seq, idx):
+                        q = quake.ExtractRefOp(self.getRefType(),
+                                               seq,
+                                               -1,
+                                               index=idx).result
+                        return [idx, q]
+
+                    extractFunctor = functor
+                    beEfficient = True
+                elif cc.StdvecType.isinstance(iterable.type):
+                    totalSize = cc.StdvecSizeOp(self.getIntegerType(),
+                                                iterable).result
+
+                    def functor(seq, idx):
+                        vecTy = cc.StdvecType.getElementType(seq.type)
+                        dataTy = cc.PointerType.get(self.ctx, vecTy)
+                        arrTy = vecTy
+                        if not cc.ArrayType.isinstance(arrTy):
+                            arrTy = cc.ArrayType.get(self.ctx, vecTy)
+                        dataArrTy = cc.PointerType.get(self.ctx, arrTy)
+                        data = cc.StdvecDataOp(dataArrTy, seq).result
+                        v = cc.ComputePtrOp(
+                            dataTy, data, [idx],
+                            DenseI32ArrayAttr.get([kDynamicPtrIndex],
+                                                  context=self.ctx)).result
+                        return [idx, v]
+
+                    extractFunctor = functor
+                    beEfficient = True
+
+                if beEfficient:
+
+                    def bodyBuilder(iterVar):
+                        self.symbolTable.pushScope()
+                        values = extractFunctor(iterable, iterVar)
+                        for i, v in enumerate(values):
+                            self.symbolTable[varNames[i]] = v
+                        [self.visit(b) for b in node.body]
+                        self.symbolTable.popScope()
+
+                    self.createInvariantForLoop(totalSize, bodyBuilder)
+                    return
+
         self.visit(node.iter)
         assert len(self.valueStack) > 0 and len(self.valueStack) < 3
 
diff --git a/python/cudaq/kernel/kernel_builder.py b/python/cudaq/kernel/kernel_builder.py
index f77b130830..bdb117271a 100644
--- a/python/cudaq/kernel/kernel_builder.py
+++ b/python/cudaq/kernel/kernel_builder.py
@@ -1076,10 +1076,12 @@ def mz(self, target, regName=None):
             if quake.VeqType.isinstance(target.mlirValue.type):
                 retTy = stdvecTy
                 measTy = cc.StdvecType.get(self.ctx, measTy)
-            res = quake.MzOp(
-                measTy, [], [target.mlirValue],
-                registerName=StringAttr.get(regName, context=self.ctx)
-                if regName is not None else '')
+            if regName is not None:
+                res = quake.MzOp(measTy, [], [target.mlirValue],
+                                 registerName=StringAttr.get(regName,
+                                                             context=self.ctx))
+            else:
+                res = quake.MzOp(measTy, [], [target.mlirValue])
             disc = quake.DiscriminateOp(retTy, res)
             return self.__createQuakeValue(disc.result)
 
diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp
index 9a6e48203f..3232e26b11 100644
--- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp
+++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp
@@ -104,7 +104,7 @@ jitAndCreateArgs(const std::string &name, MlirModule module,
         {.startingArgIdx = startingArgIdx}));
     pm.addPass(cudaq::opt::createLambdaLiftingPass());
     pm.addPass(createSymbolDCEPass());
-    cudaq::opt::addPipelineConvertToQIR_PythonWorkaround(pm);
+    cudaq::opt::addPipelineConvertToQIR(pm);
 
     DefaultTimingManager tm;
     tm.setEnabled(cudaq::isTimingTagEnabled(cudaq::TIMING_JIT_PASSES));
@@ -596,9 +596,9 @@ std::string getQIR(const std::string &name, MlirModule module,
   PassManager pm(context);
   pm.addPass(cudaq::opt::createLambdaLiftingPass());
   if (profile.empty())
-    cudaq::opt::addPipelineConvertToQIR_PythonWorkaround(pm);
+    cudaq::opt::addPipelineConvertToQIR(pm);
   else
-    cudaq::opt::addPipelineConvertToQIR_PythonWorkaround(pm, profile);
+    cudaq::opt::addPipelineConvertToQIR(pm, profile);
   DefaultTimingManager tm;
   tm.setEnabled(cudaq::isTimingTagEnabled(cudaq::TIMING_JIT_PASSES));
   auto timingScope = tm.getRootScope(); // starts the timer
diff --git a/runtime/common/Trace.cpp b/runtime/common/Trace.cpp
index be60099b1c..d309feecfa 100644
--- a/runtime/common/Trace.cpp
+++ b/runtime/common/Trace.cpp
@@ -10,11 +10,10 @@
 #include <algorithm>
 #include <cassert>
 
-namespace cudaq {
-
-void Trace::appendInstruction(std::string_view name, std::vector<double> params,
-                              std::vector<QuditInfo> controls,
-                              std::vector<QuditInfo> targets) {
+void cudaq::Trace::appendInstruction(std::string_view name,
+                                     std::vector<double> params,
+                                     std::vector<QuditInfo> controls,
+                                     std::vector<QuditInfo> targets) {
   assert(!targets.empty() && "An instruction must have at least one target");
   auto findMaxID = [](const std::vector<QuditInfo> &qudits) -> std::size_t {
     return std::max_element(qudits.cbegin(), qudits.cend(),
@@ -27,5 +26,3 @@ void Trace::appendInstruction(std::string_view name, std::vector<double> params,
   numQudits = std::max(numQudits, maxID + 1);
   instructions.emplace_back(name, params, controls, targets);
 }
-
-} // namespace cudaq
diff --git a/runtime/nvqir/NVQIR.cpp b/runtime/nvqir/NVQIR.cpp
index f06c967e11..9be1571ed6 100644
--- a/runtime/nvqir/NVQIR.cpp
+++ b/runtime/nvqir/NVQIR.cpp
@@ -1029,7 +1029,7 @@ void generalizedInvokeWithRotationsControlsTargets(
     controls[i] = va_arg(args, Qubit *);
   }
   for (i = 0; i < numControlQubitOperands; ++i) {
-    arrayAndLength[i] = 0;
+    arrayAndLength[numControlArrayOperands + i] = 0;
     controls[numControlArrayOperands + i] = va_arg(args, Qubit *);
   }
   for (i = 0; i < numTargetOperands; ++i)