Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pauli word] Rework the implementation from front to back. #2338

Merged
merged 20 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions include/cudaq/Optimizer/Builder/Factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,13 @@ inline mlir::Type stateImplType(mlir::Type eleTy) {
return cudaq::opt::factory::getPointerType(eleTy.getContext());
}

// Host side types for std::string and std::vector
// Generate host side type for std::string. The result is the type of a block of
// bytes and the length to allocate. This allows for the creation of code to
// allocate a variable, stride across such a variable, etc. The ModuleOp must
// contain the size of a pauli_word in its attributes.
cudaq::cc::ArrayType genHostStringType(mlir::ModuleOp module);

cudaq::cc::StructType stlStringType(mlir::MLIRContext *ctx);
// Host side types for std::vector
cudaq::cc::StructType stlVectorType(mlir::Type eleTy);

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -246,6 +250,9 @@ bool hasSRet(mlir::func::FuncOp funcOp);
mlir::FunctionType toHostSideFuncType(mlir::FunctionType funcTy,
bool addThisPtr, mlir::ModuleOp module);

/// Convert device type, \p ty, to host side type.
schweitzpgi marked this conversation as resolved.
Show resolved Hide resolved
mlir::Type convertToHostSideType(mlir::Type ty, mlir::ModuleOp module);

// Return `true` if the given type corresponds to a standard vector type
// according to our convention.
// The convention is a `ptr<struct<ptr<T>, ptr<T>, ptr<T>>>`.
Expand Down
5 changes: 5 additions & 0 deletions include/cudaq/Optimizer/Builder/Intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,16 @@ static constexpr const char getCudaqSizeFromTriple[] =
// typically specialized to be bit packed).
static constexpr const char stdvecBoolCtorFromInitList[] =
"__nvqpp_initializer_list_to_vector_bool";

// Convert a (likely packed) std::vector<bool> into a sequence of bytes, each
// holding a boolean value.
static constexpr const char stdvecBoolUnpackToInitList[] =
"__nvqpp_vector_bool_to_initializer_list";

// Free any temporary buffers used to hold std::vector<bool> data.
static constexpr const char stdvecBoolFreeTemporaryLists[] =
"__nvqpp_vector_bool_free_temporary_initlists";

// The internal data of the cudaq::state object must be `2**n` in length. This
// function returns the value `n`.
static constexpr const char getNumQubitsFromCudaqState[] =
Expand Down
21 changes: 21 additions & 0 deletions include/cudaq/Optimizer/Builder/Runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@

#include "cudaq/Optimizer/Builder/Factory.h"

//===----------------------------------------------------------------------===//
//
// Runtime helper functions are functions that will appear in the runtime
// library (implementations are defined in either the headers or libraries in
// the `runtime` directory). These helper functions may never be assumed to
// appear on the device-side, so these helpers should only be used in host-side
// code.
//
//===----------------------------------------------------------------------===//

namespace cudaq::runtime {

/// Prefix for all kernel entry functions.
Expand Down Expand Up @@ -52,4 +62,15 @@ static constexpr const char CudaqRegisterKernelName[] =
static constexpr const char cudaqAHSPrefixName[] =
"__analog_hamiltonian_kernel__";

// Host-side helper functions for working with `cudaq::pauli_word` or a
// `std::string`. These include both fully dynamic and binding time (library
// build time) helper functions.
static constexpr const char sizeofStringAttrName[] = "cc.sizeof_string";
static constexpr const char getPauliWordSize[] =
"_ZNK5cudaq10pauli_word11_nvqpp_sizeEv";
static constexpr const char getPauliWordData[] =
"_ZNK5cudaq10pauli_word11_nvqpp_dataEv";
static constexpr const char bindingGetStringData[] = "__nvqpp_getStringData";
static constexpr const char bindingGetStringSize[] = "__nvqpp_getStringSize";

} // namespace cudaq::runtime
5 changes: 5 additions & 0 deletions include/cudaq/Optimizer/Dialect/CC/CCTypes.td
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,12 @@ def cc_StructType : CCType<"Struct", "struct",
];

let extraClassDeclaration = [{
// O(1)
bool isEmpty() const { return getMembers().empty(); }

// O(n)
std::size_t getNumMembers() const { return getMembers().size(); }

Type getMember(unsigned position) { return getMembers()[position]; }
}];
}
Expand Down
26 changes: 23 additions & 3 deletions lib/Frontend/nvqpp/ASTBridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,10 @@ class QPUCodeFinder : public clang::RecursiveASTVisitor<QPUCodeFinder> {
using Base = clang::RecursiveASTVisitor<QPUCodeFinder>;
explicit QPUCodeFinder(
cudaq::EmittedFunctionsCollection &funcsToEmit, clang::CallGraph &cgb,
clang::ItaniumMangleContext *mangler,
clang::ItaniumMangleContext *mangler, ModuleOp module,
std::unordered_map<std::string, std::string> &customOperations)
: functionsToEmit(funcsToEmit), callGraphBuilder(cgb), mangler(mangler),
customOperationNames(customOperations) {}
module(module), customOperationNames(customOperations) {}

/// Add a kernel to the list of kernels to process.
template <bool replace = true>
Expand Down Expand Up @@ -332,6 +332,25 @@ class QPUCodeFinder : public clang::RecursiveASTVisitor<QPUCodeFinder> {
tuplesAreReversed = !opt->isZero();
}
}
if (cudaq::isInNamespace(x, "cudaq") &&
cudaq::isInNamespace(x, "details") &&
x->getName().equals("_nvqpp_sizeof")) {
// This constexpr is the sizeof a pauli_word and a std::string.
auto loc = x->getLocation();
auto opt = x->getAnyInitializer()->getIntegerConstantExpr(
x->getASTContext(), &loc, false);
assert(opt && "must compute the sizeof a cudaq::pauli_word");
auto sizeofString = opt->getZExtValue();
auto sizeAttr = module->getAttr(cudaq::runtime::sizeofStringAttrName);
if (sizeAttr) {
assert(sizeofString == cast<IntegerAttr>(sizeAttr).getUInt());
} else {
auto *ctx = module.getContext();
auto i64Ty = IntegerType::get(ctx, 64);
module->setAttr(cudaq::runtime::sizeofStringAttrName,
IntegerAttr::get(i64Ty, sizeofString));
}
}
// The check to make sure that quantum data types are only used in kernels
// is done here. This checks both variable declarations and parameters.
if (quantumTypesNotAllowed)
Expand All @@ -357,6 +376,7 @@ class QPUCodeFinder : public clang::RecursiveASTVisitor<QPUCodeFinder> {
cudaq::EmittedFunctionsCollection &functionsToEmit;
clang::CallGraph &callGraphBuilder;
clang::ItaniumMangleContext *mangler;
ModuleOp module;
std::unordered_map<std::string, std::string> &customOperationNames;
// A class that is being visited. Need to run semantics checks on it if and
// only if it has a quantum kernel.
Expand Down Expand Up @@ -648,7 +668,7 @@ void ASTBridgeAction::ASTBridgeConsumer::HandleTranslationUnit(

bool ASTBridgeAction::ASTBridgeConsumer::HandleTopLevelDecl(
clang::DeclGroupRef dg) {
QPUCodeFinder finder(functionsToEmit, callGraphBuilder, mangler,
QPUCodeFinder finder(functionsToEmit, callGraphBuilder, mangler, module.get(),
customOperationNames);
// Loop over all decls, saving the function decls that are quantum kernels.
for (const auto *decl : dg)
Expand Down
4 changes: 3 additions & 1 deletion lib/Frontend/nvqpp/ConvertStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,9 @@ bool QuakeBridgeVisitor::VisitReturnStmt(clang::ReturnStmt *x) {
ValueRange{heapCopy, dynSize});
};
IRBuilder irb(builder);
Value tySize = irb.getByteSizeOfType(loc, eleTy);
Value tySize;
if (!cudaq::cc::isDynamicType(eleTy))
tySize = irb.getByteSizeOfType(loc, eleTy);
if (!tySize) {
TODO_x(toLocation(x), x, mangler, "unhandled vector element type");
return false;
Expand Down
54 changes: 36 additions & 18 deletions lib/Optimizer/Builder/Factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
******************************************************************************/

#include "cudaq/Optimizer/Builder/Intrinsics.h"
#include "cudaq/Optimizer/Builder/Runtime.h"
#include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h"
#include "cudaq/Optimizer/Dialect/CC/CCOps.h"
#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h"
Expand Down Expand Up @@ -305,12 +306,15 @@ cc::LoopOp factory::createMonotonicLoop(
return loop;
}

cc::StructType factory::stlStringType(MLIRContext *ctx) {
cc::ArrayType factory::genHostStringType(ModuleOp mod) {
auto *ctx = mod.getContext();
auto i8Ty = IntegerType::get(ctx, 8);
auto ptrI8Ty = cc::PointerType::get(i8Ty);
auto i64Ty = IntegerType::get(ctx, 64);
auto padTy = cc::ArrayType::get(ctx, i8Ty, 16);
return cc::StructType::get(ctx, ArrayRef<Type>{ptrI8Ty, i64Ty, padTy});
auto sizeAttr = mod->getAttr(cudaq::runtime::sizeofStringAttrName);
if (sizeAttr) {
auto size = cast<IntegerAttr>(sizeAttr).getInt();
return cc::ArrayType::get(ctx, i8Ty, size);
}
return cc::ArrayType::get(ctx, i8Ty, sizeof(std::string));
}

// FIXME: We should get the underlying structure of a std::vector from the
Expand All @@ -321,6 +325,22 @@ cc::StructType factory::stlVectorType(Type eleTy) {
return cc::StructType::get(ctx, ArrayRef<Type>{ptrTy, ptrTy, ptrTy});
}

// Note that this is the raw host type, where std::vector<bool> is distinct.
// When converting to the device side, the distinction is deliberately removed
// making std::vector<bool> the same format as std::vector<char>.
static cc::StructType stlHostVectorType(Type eleTy) {
MLIRContext *ctx = eleTy.getContext();
if (eleTy != IntegerType::get(ctx, 1)) {
// std::vector<T> where T != bool.
return factory::stlVectorType(eleTy);
}
// std::vector<bool> is a different type than std::vector<T>.
auto ptrTy = cc::PointerType::get(eleTy);
auto i8Ty = IntegerType::get(ctx, 8);
auto padout = cc::ArrayType::get(ctx, i8Ty, 32);
return cc::StructType::get(ctx, ArrayRef<Type>{ptrTy, padout});
}

// FIXME: Give these front-end names so we can disambiguate more types.
cc::StructType factory::getDynamicBufferType(MLIRContext *ctx) {
auto ptrTy = cc::PointerType::get(IntegerType::get(ctx, 8));
Expand All @@ -342,24 +362,19 @@ Type factory::getSRetElementType(FunctionType funcTy) {
return funcTy.getResult(0);
}

static Type convertToHostSideType(Type ty) {
Type factory::convertToHostSideType(Type ty, ModuleOp mod) {
if (auto memrefTy = dyn_cast<cc::StdvecType>(ty))
return convertToHostSideType(
factory::stlVectorType(memrefTy.getElementType()));
return stlHostVectorType(
convertToHostSideType(memrefTy.getElementType(), mod));
if (isa<cc::IndirectCallableType>(ty))
return cc::PointerType::get(IntegerType::get(ty.getContext(), 8));
if (auto memrefTy = dyn_cast<cc::CharspanType>(ty)) {
// `pauli_word` is an object with a std::vector in the header files at
// present. This data type *must* be updated if it becomes a std::string
// once again.
return convertToHostSideType(
factory::stlVectorType(IntegerType::get(ty.getContext(), 8)));
}
if (auto csTy = dyn_cast<cc::CharspanType>(ty))
return genHostStringType(mod);
auto *ctx = ty.getContext();
if (auto structTy = dyn_cast<cc::StructType>(ty)) {
SmallVector<Type> newMembers;
for (auto mem : structTy.getMembers())
newMembers.push_back(convertToHostSideType(mem));
newMembers.push_back(convertToHostSideType(mem, mod));
if (structTy.getName())
return cc::StructType::get(ctx, structTy.getName(), newMembers,
structTy.getBitSize(), structTy.getAlignment(),
Expand Down Expand Up @@ -579,7 +594,7 @@ FunctionType factory::toHostSideFuncType(FunctionType funcTy, bool addThisPtr,
// returned via a sret argument in the first position. When this argument
// is added, the this pointer becomes the second argument. Both are opaque
// pointers at this point.
auto eleTy = convertToHostSideType(getSRetElementType(funcTy));
auto eleTy = convertToHostSideType(getSRetElementType(funcTy), module);
inputTys.push_back(cc::PointerType::get(eleTy));
hasSRet = true;
} else {
Expand All @@ -595,7 +610,7 @@ FunctionType factory::toHostSideFuncType(FunctionType funcTy, bool addThisPtr,

// Add all the explicit (not hidden) arguments after the hidden ones.
for (auto kernelTy : funcTy.getInputs()) {
auto hostTy = convertToHostSideType(kernelTy);
auto hostTy = convertToHostSideType(kernelTy, module);
if (auto strTy = dyn_cast<cc::StructType>(hostTy)) {
// On x86_64 and aarch64, a struct that is smaller than 128 bits may be
// passed in registers as separate arguments. See classifyArgumentType()
Expand Down Expand Up @@ -636,6 +651,9 @@ FunctionType factory::toHostSideFuncType(FunctionType funcTy, bool addThisPtr,
}
// Pass a struct as a byval pointer.
hostTy = cc::PointerType::get(hostTy);
} else if (isa<cc::ArrayType>(hostTy)) {
// Pass a raw data block as a pointer. (It's a struct passed as a blob.)
hostTy = cc::PointerType::get(hostTy);
}
inputTys.push_back(hostTy);
}
Expand Down
29 changes: 28 additions & 1 deletion lib/Optimizer/Builder/Intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,18 @@ inline bool operator<(const IntrinsicCode &icode, const IntrinsicCode &jcode) {
/// well as prototypes for LLVM intrinsics and C library calls that are used by
/// the compiler. The table should be kept in sorted order.
static constexpr IntrinsicCode intrinsicTable[] = {
// These following pauli_word helper functions are only available on the
// host-side. They ought not be called in kernel code.
{cudaq::runtime::getPauliWordData,
{},
"func.func private @_ZNK5cudaq10pauli_word11_nvqpp_dataEv(%pw : "
"!cc.ptr<i8>) -> !cc.ptr<i8>"},
{cudaq::runtime::getPauliWordSize,
{cudaq::runtime::getPauliWordData, cudaq::runtime::bindingGetStringData,
cudaq::runtime::bindingGetStringSize},
"func.func private @_ZNK5cudaq10pauli_word11_nvqpp_sizeEv(%pw : "
"!cc.ptr<i8>) -> i64"},

// Initialize a (preallocated) buffer (the first parameter) with i64 values
// on the semi-open range `[0..n)` where `n` is the second parameter.
{cudaq::runtime::getLinkableKernelKey,
Expand Down Expand Up @@ -292,6 +304,15 @@ static constexpr IntrinsicCode intrinsicTable[] = {
func.func private @__nvqpp_getStateVectorLength_fp64(%p : i64, %o : i64) -> i64
)#"},

// Quasi-portable entry points for use with non-C++ front ends (Python).
{cudaq::runtime::bindingGetStringData,
{},
"func.func private @__nvqpp_getStringData(%p: !cc.ptr<i8>) -> "
"!cc.ptr<i8>"},
{cudaq::runtime::bindingGetStringSize,
{},
"func.func private @__nvqpp_getStringSize(%p: !cc.ptr<i8>) -> i64"},

// __nvqpp_initializer_list_to_vector_bool
{cudaq::stdvecBoolCtorFromInitList,
{},
Expand All @@ -307,11 +328,17 @@ static constexpr IntrinsicCode intrinsicTable[] = {
return %0 : !cc.ptr<i8>
})#"},

// __nvqpp_vector_bool_free_temporary_lists
{cudaq::stdvecBoolFreeTemporaryLists,
{},
R"#(
func.func private @__nvqpp_vector_bool_free_temporary_initlists(!cc.ptr<i8>) -> ())#"},

// __nvqpp_vector_bool_to_initializer_list
{cudaq::stdvecBoolUnpackToInitList,
{},
R"#(
func.func private @__nvqpp_vector_bool_to_initializer_list(!cc.ptr<!cc.struct<{!cc.ptr<i1>, !cc.ptr<i1>, !cc.ptr<i1>}>>, !cc.ptr<!cc.struct<{!cc.ptr<i1>, !cc.ptr<i1>, !cc.ptr<i1>}>>) -> ())#"},
func.func private @__nvqpp_vector_bool_to_initializer_list(!cc.ptr<!cc.struct<{!cc.ptr<i1>, !cc.ptr<i1>, !cc.ptr<i1>}>>, !cc.ptr<!cc.struct<{!cc.ptr<i1>, !cc.array<i8 x 32>}>>, !cc.ptr<!cc.ptr<i8>>) -> ())#"},

{"__nvqpp_zeroDynamicResult", {}, R"#(
func.func private @__nvqpp_zeroDynamicResult() -> !cc.struct<{!cc.ptr<i8>, i64}> {
Expand Down
4 changes: 4 additions & 0 deletions lib/Optimizer/Dialect/CC/CCOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ Value cudaq::cc::getByteSizeOfType(OpBuilder &builder, Location loc, Type ty,
return builder.create<arith::MulIOp>(loc, builder.getI64Type(), v,
scale);
})
.Case([&](cudaq::cc::SpanLikeType) -> Value {
// Uniformly on the device size: {ptr, i64}
return createInt(16);
})
.Default({});
}

Expand Down
2 changes: 1 addition & 1 deletion lib/Optimizer/Dialect/CC/CCTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ Type cc::SpanLikeType::getElementType() const {
}

bool isDynamicType(Type ty) {
if (isa<StdvecType>(ty))
if (isa<SpanLikeType>(ty))
return true;
if (auto strTy = dyn_cast<StructType>(ty)) {
for (auto memTy : strTy.getMembers())
Expand Down
5 changes: 4 additions & 1 deletion lib/Optimizer/Transforms/DecompositionPatterns.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,14 +362,17 @@ struct ExpPauliDecomposition : public OpRewritePattern<quake::ExpPauliOp> {
auto strAttr = cast<mlir::StringAttr>(attr.value());
optPauliWordStr = strAttr.getValue();
}
} else if (auto lit = addrOp.getDefiningOp<
cudaq::cc::CreateStringLiteralOp>()) {
optPauliWordStr = lit.getStringLiteral();
}
}
}
}

// Assert that we have a constant known pauli word
if (!optPauliWordStr.has_value())
return failure();
return expPauliOp.emitOpError("cannot determine pauli word string");

auto pauliWordStr = optPauliWordStr.value();

Expand Down
Loading
Loading