diff --git a/include/cudaq/Optimizer/CodeGen/Pipelines.h b/include/cudaq/Optimizer/CodeGen/Pipelines.h index afb39831ba..08ef770c1b 100644 --- a/include/cudaq/Optimizer/CodeGen/Pipelines.h +++ b/include/cudaq/Optimizer/CodeGen/Pipelines.h @@ -30,35 +30,18 @@ void commonPipelineConvertToQIR(mlir::PassManager &pm, mlir::StringRef codeGenFor = "qir", mlir::StringRef passConfigAs = "qir"); -/// \deprecated{Only for Python, since it can't use the new QIR codegen.} -void commonPipelineConvertToQIR_PythonWorkaround( - mlir::PassManager &pm, const std::optional &convertTo); - /// \brief Pipeline builder to convert Quake to QIR. /// Does not specify a particular QIR profile. inline void addPipelineConvertToQIR(mlir::PassManager &pm) { commonPipelineConvertToQIR(pm); } -/// \deprecated{Only for Python, since it can't use the new QIR codegen.} -inline void addPipelineConvertToQIR_PythonWorkaround(mlir::PassManager &pm) { - commonPipelineConvertToQIR_PythonWorkaround(pm, std::nullopt); -} - /// \brief Pipeline builder to convert Quake to QIR. /// Specifies a particular QIR profile in \p convertTo. /// \p pm Pass manager to append passes to /// \p convertTo name of QIR profile (e.g., `qir-base`, `qir-adaptive`, ...) void addPipelineConvertToQIR(mlir::PassManager &pm, mlir::StringRef convertTo); -/// \deprecated{Only for Python, since it can't use the new QIR codegen.} -inline void -addPipelineConvertToQIR_PythonWorkaround(mlir::PassManager &pm, - mlir::StringRef convertTo) { - commonPipelineConvertToQIR_PythonWorkaround(pm, convertTo); - addQIRProfilePipeline(pm, convertTo); -} - void addLowerToCCPipeline(mlir::OpPassManager &pm); void addPipelineTranslateToOpenQASM(mlir::PassManager &pm); diff --git a/lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp b/lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp index f267655cf7..329c9520ec 100644 --- a/lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp +++ b/lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp @@ -1091,7 +1091,7 @@ struct QuantumGatePattern : public OpConversionPattern { // Process the controls, sorting them by type. for (auto pr : llvm::zip(op.getControls(), adaptor.getControls())) { - if (isa(std::get<0>(pr).getType())) { + if (isaVeqArgument(std::get<0>(pr).getType())) { numArrayCtrls++; auto sizeCall = rewriter.create( loc, i64Ty, cudaq::opt::QIRArrayGetSize, @@ -1154,6 +1154,18 @@ struct QuantumGatePattern : public OpConversionPattern { return forwardOrEraseOp(); } + static bool isaVeqArgument(Type ty) { + // TODO: Need a way to identify arrays when using the opaque pointer + // variant. (In Python, the arguments may already be converted.) + auto alreadyConverted = [](Type ty) { + if (auto ptrTy = dyn_cast(ty)) + if (auto strTy = dyn_cast(ptrTy.getElementType())) + return strTy.isIdentified() && strTy.getName() == "Array"; + return false; + }; + return isa(ty) || alreadyConverted(ty); + } + static bool conformsToIntendedCall(std::size_t numControls, Value ctrl, OP op, StringRef qirFunctionName) { if (numControls != 1) @@ -1818,9 +1830,7 @@ struct QuakeToQIRAPIPrepPass } void guaranteeMzIsLabeled(quake::MzOp mz, int &counter, OpBuilder &builder) { - if (mz.getRegisterNameAttr() && - /* FIXME: issue 2538: the name should never be empty. */ - !mz.getRegisterNameAttr().getValue().empty()) { + if (mz.getRegisterNameAttr()) { mz->setAttr(cudaq::opt::MzAssignedNameAttrName, builder.getUnitAttr()); return; } diff --git a/lib/Optimizer/CodeGen/Pipelines.cpp b/lib/Optimizer/CodeGen/Pipelines.cpp index 393dad5c65..596d7adcf3 100644 --- a/lib/Optimizer/CodeGen/Pipelines.cpp +++ b/lib/Optimizer/CodeGen/Pipelines.cpp @@ -51,37 +51,6 @@ void cudaq::opt::commonPipelineConvertToQIR(PassManager &pm, pm.addPass(createCCToLLVM()); } -void cudaq::opt::commonPipelineConvertToQIR_PythonWorkaround( - PassManager &pm, const std::optional &convertTo) { - pm.addNestedPass(createApplyControlNegations()); - addAggressiveEarlyInlining(pm); - pm.addNestedPass(createCanonicalizerPass()); - pm.addNestedPass(createUnwindLoweringPass()); - pm.addNestedPass(createCanonicalizerPass()); - pm.addPass(createApplyOpSpecializationPass()); - pm.addNestedPass(createExpandMeasurementsPass()); - pm.addNestedPass(createClassicalMemToReg()); - pm.addNestedPass(createCanonicalizerPass()); - pm.addNestedPass(createCSEPass()); - pm.addNestedPass(createQuakeAddDeallocs()); - pm.addNestedPass(createQuakeAddMetadata()); - pm.addNestedPass(createLoopNormalize()); - LoopUnrollOptions luo; - luo.allowBreak = convertTo && (*convertTo == "qir-adaptive"); - pm.addNestedPass(createLoopUnroll(luo)); - pm.addNestedPass(createCanonicalizerPass()); - pm.addNestedPass(createCSEPass()); - pm.addNestedPass(createLowerToCFGPass()); - pm.addNestedPass(createCombineQuantumAllocations()); - pm.addNestedPass(createCanonicalizerPass()); - pm.addNestedPass(createCSEPass()); - if (convertTo && (*convertTo == "qir-base")) - pm.addNestedPass(createDelayMeasurementsPass()); - pm.addPass(createConvertMathToFuncs()); - pm.addPass(createSymbolDCEPass()); - pm.addPass(createConvertToQIR()); -} - void cudaq::opt::addPipelineTranslateToOpenQASM(PassManager &pm) { pm.addNestedPass(createCanonicalizerPass()); pm.addNestedPass(createCSEPass()); diff --git a/lib/Optimizer/Dialect/Quake/QuakeOps.cpp b/lib/Optimizer/Dialect/Quake/QuakeOps.cpp index ad35594fbb..b5697b789b 100644 --- a/lib/Optimizer/Dialect/Quake/QuakeOps.cpp +++ b/lib/Optimizer/Dialect/Quake/QuakeOps.cpp @@ -517,38 +517,41 @@ void quake::WrapOp::getCanonicalizationPatterns(RewritePatternSet &patterns, //===----------------------------------------------------------------------===// // Common verification for measurement operations. -static LogicalResult verifyMeasurements(Operation *const op, - TypeRange targetsType, - const Type bitsType) { +template +LogicalResult verifyMeasurements(MEAS op, TypeRange targetsType, + const Type bitsType) { if (failed(verifyWireResultsAreLinear(op))) return failure(); bool mustBeStdvec = targetsType.size() > 1 || (targetsType.size() == 1 && isa(targetsType[0])); if (mustBeStdvec) { - if (!isa(op->getResult(0).getType())) - return op->emitOpError("must return `!cc.stdvec`, when " - "measuring a qreg, a series of qubits, or both"); + if (!isa(op.getMeasOut().getType())) + return op.emitOpError("must return `!cc.stdvec`, when " + "measuring a qreg, a series of qubits, or both"); } else { - if (!isa(op->getResult(0).getType())) + if (!isa(op.getMeasOut().getType())) return op->emitOpError( "must return `!quake.measure` when measuring exactly one qubit"); } + if (op.getRegisterName()) + if (op.getRegisterName()->empty()) + return op->emitError("quake measurement name cannot be empty."); return success(); } LogicalResult quake::MxOp::verify() { - return verifyMeasurements(getOperation(), getTargets().getType(), + return verifyMeasurements(*this, getTargets().getType(), getMeasOut().getType()); } LogicalResult quake::MyOp::verify() { - return verifyMeasurements(getOperation(), getTargets().getType(), + return verifyMeasurements(*this, getTargets().getType(), getMeasOut().getType()); } LogicalResult quake::MzOp::verify() { - return verifyMeasurements(getOperation(), getTargets().getType(), + return verifyMeasurements(*this, getTargets().getType(), getMeasOut().getType()); } diff --git a/lib/Optimizer/Transforms/GlobalizeArrayValues.cpp b/lib/Optimizer/Transforms/GlobalizeArrayValues.cpp index b9728ed067..00ccc8c07f 100644 --- a/lib/Optimizer/Transforms/GlobalizeArrayValues.cpp +++ b/lib/Optimizer/Transforms/GlobalizeArrayValues.cpp @@ -87,6 +87,23 @@ convertArrayAttrToGlobalConstant(MLIRContext *ctx, Location loc, } namespace { + +// This pattern replaces a cc.const_array with a global constant. It can +// recognize a couple of usage patterns and will generate efficient IR in those +// cases. +// +// Pattern 1: The entire constant array is stored to a stack variable(s). Here +// we can eliminate the stack allocation and use the global constant. +// +// Pattern 2: Individual elements at dynamic offsets are extracted from the +// constant array and used. This can be replaced with a compute pointer +// operation using the global constant and a load of the element at the computed +// offset. +// +// Default: If the usage is not recognized, the constant array value is replaced +// with a load of the entire global variable. In this case, LLVM's optimizations +// are counted on to help demote the (large?) sequence value to primitive memory +// address arithmetic. struct ConstantArrayPattern : public OpRewritePattern { explicit ConstantArrayPattern(MLIRContext *ctx, ModuleOp module, @@ -95,21 +112,31 @@ struct ConstantArrayPattern LogicalResult matchAndRewrite(cudaq::cc::ConstantArrayOp conarr, PatternRewriter &rewriter) const override { + auto func = conarr->getParentOfType(); + if (!func) + return failure(); + SmallVector allocas; SmallVector stores; + SmallVector extracts; + bool loadAsValue = false; for (auto *usr : conarr->getUsers()) { auto store = dyn_cast(usr); - if (!store) - return failure(); - auto alloca = store.getPtrvalue().getDefiningOp(); - if (!alloca) - return failure(); - stores.push_back(store); - allocas.push_back(alloca); + auto extract = dyn_cast(usr); + if (store) { + auto alloca = store.getPtrvalue().getDefiningOp(); + if (!alloca) + continue; + stores.push_back(store); + allocas.push_back(alloca); + continue; + } + if (extract) { + extracts.push_back(extract); + continue; + } + loadAsValue = true; } - auto func = conarr->getParentOfType(); - if (!func) - return failure(); std::string globalName = func.getName().str() + ".rodata_" + std::to_string(counter++); auto *ctx = rewriter.getContext(); @@ -118,12 +145,39 @@ struct ConstantArrayPattern if (failed(convertArrayAttrToGlobalConstant(ctx, conarr.getLoc(), valueAttr, module, globalName, eleTy))) return failure(); - for (auto alloca : allocas) - rewriter.replaceOpWithNewOp( - alloca, alloca.getType(), globalName); - for (auto store : stores) - rewriter.eraseOp(store); - rewriter.eraseOp(conarr); + auto loc = conarr.getLoc(); + if (!extracts.empty()) { + auto base = rewriter.create( + loc, cudaq::cc::PointerType::get(conarr.getType()), globalName); + auto elePtrTy = cudaq::cc::PointerType::get(eleTy); + for (auto extract : extracts) { + SmallVector args; + unsigned i = 0; + for (auto arg : extract.getRawConstantIndices()) { + if (arg == cudaq::cc::ExtractValueOp::getDynamicIndexValue()) + args.push_back(extract.getDynamicIndices()[i++]); + else + args.push_back(arg); + } + OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPoint(extract); + auto addrVal = + rewriter.create(loc, elePtrTy, base, args); + rewriter.replaceOpWithNewOp(extract, addrVal); + } + } + if (!stores.empty()) { + for (auto alloca : allocas) + rewriter.replaceOpWithNewOp( + alloca, alloca.getType(), globalName); + for (auto store : stores) + rewriter.eraseOp(store); + } + if (loadAsValue) { + auto base = rewriter.create( + loc, cudaq::cc::PointerType::get(conarr.getType()), globalName); + rewriter.replaceOpWithNewOp(conarr, base); + } return success(); } diff --git a/python/cudaq/kernel/ast_bridge.py b/python/cudaq/kernel/ast_bridge.py index 06f77857c7..3463b4a1e0 100644 --- a/python/cudaq/kernel/ast_bridge.py +++ b/python/cudaq/kernel/ast_bridge.py @@ -1749,9 +1749,11 @@ def bodyBuilder(iterVal): self.ctx) if len(qubits) == 1 and quake.RefType.isinstance( qubits[0].type) else cc.StdvecType.get( self.ctx, quake.MeasureType.get(self.ctx)) - measureResult = opCtor(measTy, [], - qubits, - registerName=registerName).result + label = registerName + if not label: + label = None + measureResult = opCtor(measTy, [], qubits, + registerName=label).result if pushResultToStack: self.pushValue( quake.DiscriminateOp(resTy, measureResult).result) @@ -3152,6 +3154,73 @@ def bodyBuilder(iterVar): isDecrementing=isDecrementing) return + # We can simplify `for i,j in enumerate(L)` MLIR code immensely + # by just building a for loop over the iterable object L and using + # the index into that iterable and the element. + if isinstance(node.iter, ast.Call): + if node.iter.func.id == 'enumerate': + [self.visit(arg) for arg in node.iter.args] + if len(self.valueStack) == 2: + iterable = self.popValue() + self.popValue() + else: + assert len(self.valueStack) == 1 + iterable = self.popValue() + iterable = self.ifPointerThenLoad(iterable) + totalSize = None + extractFunctor = None + varNames = [] + for elt in node.target.elts: + varNames.append(elt.id) + + beEfficient = False + if quake.VeqType.isinstance(iterable.type): + totalSize = quake.VeqSizeOp(self.getIntegerType(), + iterable).result + + def functor(seq, idx): + q = quake.ExtractRefOp(self.getRefType(), + seq, + -1, + index=idx).result + return [idx, q] + + extractFunctor = functor + beEfficient = True + elif cc.StdvecType.isinstance(iterable.type): + totalSize = cc.StdvecSizeOp(self.getIntegerType(), + iterable).result + + def functor(seq, idx): + vecTy = cc.StdvecType.getElementType(seq.type) + dataTy = cc.PointerType.get(self.ctx, vecTy) + arrTy = vecTy + if not cc.ArrayType.isinstance(arrTy): + arrTy = cc.ArrayType.get(self.ctx, vecTy) + dataArrTy = cc.PointerType.get(self.ctx, arrTy) + data = cc.StdvecDataOp(dataArrTy, seq).result + v = cc.ComputePtrOp( + dataTy, data, [idx], + DenseI32ArrayAttr.get([kDynamicPtrIndex], + context=self.ctx)).result + return [idx, v] + + extractFunctor = functor + beEfficient = True + + if beEfficient: + + def bodyBuilder(iterVar): + self.symbolTable.pushScope() + values = extractFunctor(iterable, iterVar) + for i, v in enumerate(values): + self.symbolTable[varNames[i]] = v + [self.visit(b) for b in node.body] + self.symbolTable.popScope() + + self.createInvariantForLoop(totalSize, bodyBuilder) + return + self.visit(node.iter) assert len(self.valueStack) > 0 and len(self.valueStack) < 3 diff --git a/python/cudaq/kernel/kernel_builder.py b/python/cudaq/kernel/kernel_builder.py index f77b130830..bdb117271a 100644 --- a/python/cudaq/kernel/kernel_builder.py +++ b/python/cudaq/kernel/kernel_builder.py @@ -1076,10 +1076,12 @@ def mz(self, target, regName=None): if quake.VeqType.isinstance(target.mlirValue.type): retTy = stdvecTy measTy = cc.StdvecType.get(self.ctx, measTy) - res = quake.MzOp( - measTy, [], [target.mlirValue], - registerName=StringAttr.get(regName, context=self.ctx) - if regName is not None else '') + if regName is not None: + res = quake.MzOp(measTy, [], [target.mlirValue], + registerName=StringAttr.get(regName, + context=self.ctx)) + else: + res = quake.MzOp(measTy, [], [target.mlirValue]) disc = quake.DiscriminateOp(retTy, res) return self.__createQuakeValue(disc.result) diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 9a6e48203f..3232e26b11 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -104,7 +104,7 @@ jitAndCreateArgs(const std::string &name, MlirModule module, {.startingArgIdx = startingArgIdx})); pm.addPass(cudaq::opt::createLambdaLiftingPass()); pm.addPass(createSymbolDCEPass()); - cudaq::opt::addPipelineConvertToQIR_PythonWorkaround(pm); + cudaq::opt::addPipelineConvertToQIR(pm); DefaultTimingManager tm; tm.setEnabled(cudaq::isTimingTagEnabled(cudaq::TIMING_JIT_PASSES)); @@ -596,9 +596,9 @@ std::string getQIR(const std::string &name, MlirModule module, PassManager pm(context); pm.addPass(cudaq::opt::createLambdaLiftingPass()); if (profile.empty()) - cudaq::opt::addPipelineConvertToQIR_PythonWorkaround(pm); + cudaq::opt::addPipelineConvertToQIR(pm); else - cudaq::opt::addPipelineConvertToQIR_PythonWorkaround(pm, profile); + cudaq::opt::addPipelineConvertToQIR(pm, profile); DefaultTimingManager tm; tm.setEnabled(cudaq::isTimingTagEnabled(cudaq::TIMING_JIT_PASSES)); auto timingScope = tm.getRootScope(); // starts the timer diff --git a/runtime/common/Trace.cpp b/runtime/common/Trace.cpp index be60099b1c..d309feecfa 100644 --- a/runtime/common/Trace.cpp +++ b/runtime/common/Trace.cpp @@ -10,11 +10,10 @@ #include #include -namespace cudaq { - -void Trace::appendInstruction(std::string_view name, std::vector params, - std::vector controls, - std::vector targets) { +void cudaq::Trace::appendInstruction(std::string_view name, + std::vector params, + std::vector controls, + std::vector targets) { assert(!targets.empty() && "An instruction must have at least one target"); auto findMaxID = [](const std::vector &qudits) -> std::size_t { return std::max_element(qudits.cbegin(), qudits.cend(), @@ -27,5 +26,3 @@ void Trace::appendInstruction(std::string_view name, std::vector params, numQudits = std::max(numQudits, maxID + 1); instructions.emplace_back(name, params, controls, targets); } - -} // namespace cudaq diff --git a/runtime/nvqir/NVQIR.cpp b/runtime/nvqir/NVQIR.cpp index f06c967e11..9be1571ed6 100644 --- a/runtime/nvqir/NVQIR.cpp +++ b/runtime/nvqir/NVQIR.cpp @@ -1029,7 +1029,7 @@ void generalizedInvokeWithRotationsControlsTargets( controls[i] = va_arg(args, Qubit *); } for (i = 0; i < numControlQubitOperands; ++i) { - arrayAndLength[i] = 0; + arrayAndLength[numControlArrayOperands + i] = 0; controls[numControlArrayOperands + i] = va_arg(args, Qubit *); } for (i = 0; i < numTargetOperands; ++i)