diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index 1111dbabfb..f2ef1ada4c 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -69,7 +69,7 @@ env: NGC_QUANTUM_TEAM: cuda-quantum NVQC_FUNCTION_ID: 3bfa0342-7d2a-4f1b-8e81-b6608d28ca7d # :::: - NGC_NVQC_DEPLOYMENT_SPEC: GFN:L40:gl40_1.br20_2xlarge:1:1 + NGC_NVQC_DEPLOYMENT_SPEC: GFN:L40S:gl40s_1.br25_2xlarge:1:1 # If vars below are changed, it is recommended to also update the # workflow_dispatch defaults above so they stay in sync. cudaq_test_image: nvcr.io/nvidia/nightly/cuda-quantum:latest @@ -490,7 +490,7 @@ jobs: export ORCA_ACCESS_URL='${{ secrets.ORCA_ACCESS_URL }}' set +e # Allow script to keep going through errors test_err_sum=0 - cpp_tests="docs/sphinx/examples/cpp/providers/orca.cpp" + cpp_tests="docs/sphinx/examples/cpp/providers/orca.cpp docs/sphinx/examples/cpp/providers/orca_mqpu.cpp" for filename in $cpp_tests; do [ -e "$filename" ] || echo "::error::Couldn't find file ($filename)" nvq++ --target orca --orca-url $ORCA_ACCESS_URL $filename @@ -509,7 +509,7 @@ jobs: test_err_sum=$((test_err_sum+1)) fi done - python_tests="docs/sphinx/examples/python/providers/orca.py" + python_tests="docs/sphinx/examples/python/providers/orca.py docs/sphinx/examples/python/providers/orca_mqpu.py" for filename in $python_tests; do [ -e "$filename" ] || echo "::error::Couldn't find file ($filename)" python3 $filename 1> /dev/null @@ -607,8 +607,8 @@ jobs: for filename in `find targettests/Remote-Sim -name '*.cpp'`; do # unsupport_args and compile_errors are compile error tests # nested_vectors: Compiler fails to synthesize nested vector parameters (https://github.com/NVIDIA/cuda-quantum/issues/2001) - # state_init: New argument synthesis is not executed for nvqc (https://github.com/NVIDIA/cuda-quantum/issues/2146) - if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"compile_errors"* ]] && [[ "$filename" != *"nested_vectors"* ]] && [[ "$filename" != *"qvector_init_from_state"* ]]; then + # qvector_init_from_state, qvector_init_from_state_lazy, test_trotter: New argument synthesis is not executed for nvqc (https://github.com/NVIDIA/cuda-quantum/issues/2146) + if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"compile_errors"* ]] && [[ "$filename" != *"nested_vectors"* ]] && [[ "$filename" != *"qvector_init_from_state"* ]] && [[ "$filename" != *"qvector_init_from_state_lazy"* ]] && [[ "$filename" != *"test_trotter"* ]]; then echo "$filename" nvqc_config="" # Look for a --remote-mqpu-auto-launch to determine the number of QPUs diff --git a/.github/workflows/nvqc_regression_tests.yml b/.github/workflows/nvqc_regression_tests.yml index 571cbb191f..10cab6a077 100644 --- a/.github/workflows/nvqc_regression_tests.yml +++ b/.github/workflows/nvqc_regression_tests.yml @@ -128,8 +128,9 @@ jobs: # nested_vectors: related to vector of pauli_words (https://github.com/NVIDIA/cuda-quantum/issues/1957) # custom_operation: https://github.com/NVIDIA/cuda-quantum/issues/1985 # return_values: only supported in 0.8 NVQC service. - # state_init: only supported in 0.8 NVQC service. - if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"state_overlap"* ]] && [[ "$filename" != *"compile_errors"* ]] && [[ "$filename" != *"nested_vectors"* ]] && [[ "$filename" != *"pauli_word"* ]] && [[ "$filename" != *"custom_operation"* ]] && [[ "$filename" != *"return_values"* ]] && [[ "$filename" != *"qvector_init_from_state"* ]] && [[ "$filename" != *"qvector_init_from_vector"* ]]; then + # qvector_init_from_vector: only supported in 0.8 NVQC service. + # qvector_init_from_state, qvector_init_from_state_lazy, test_trotter: not supported yet on nvqc: https://github.com/NVIDIA/cuda-quantum/issues/2146 + if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"state_overlap"* ]] && [[ "$filename" != *"compile_errors"* ]] && [[ "$filename" != *"nested_vectors"* ]] && [[ "$filename" != *"pauli_word"* ]] && [[ "$filename" != *"custom_operation"* ]] && [[ "$filename" != *"return_values"* ]] && [[ "$filename" != *"qvector_init_from_state"* ]] && [[ "$filename" != *"qvector_init_from_state_lazy"* ]] && [[ "$filename" != *"qvector_init_from_vector"* ]] && [[ "$filename" != *"test_trotter"* ]]; then echo "$filename" nvqc_config="" # Look for a --remote-mqpu-auto-launch to determine the number of QPUs diff --git a/cmake/Modules/CUDAQConfig.cmake b/cmake/Modules/CUDAQConfig.cmake index 507cadeaea..6f893a250b 100644 --- a/cmake/Modules/CUDAQConfig.cmake +++ b/cmake/Modules/CUDAQConfig.cmake @@ -107,6 +107,13 @@ add_library(cudaq::cudaq-qpp-density-matrix-cpu-target SHARED IMPORTED) IMPORTED_LOCATION "${CUDAQ_LIBRARY_DIR}/libnvqir-dm${CMAKE_SHARED_LIBRARY_SUFFIX}" IMPORTED_SONAME "libnvqir-dm${CMAKE_SHARED_LIBRARY_SUFFIX}" IMPORTED_LINK_INTERFACE_LIBRARIES "cudaq::cudaq-platform-default;cudaq::cudaq-em-default") + +# QPP photonics Target +add_library(cudaq::cudaq-photonics-cpu-target SHARED IMPORTED) +set_target_properties(cudaq::cudaq-photonics-cpu-target PROPERTIES + IMPORTED_LOCATION "${CUDAQ_LIBRARY_DIR}/libnvqir-photonics${CMAKE_SHARED_LIBRARY_SUFFIX}" + IMPORTED_SONAME "libnvqir-photonics${CMAKE_SHARED_LIBRARY_SUFFIX}" + IMPORTED_LINK_INTERFACE_LIBRARIES "cudaq::cudaq-platform-default;cudaq::cudaq-em-photonics") # ------------------------- if(NOT TARGET cudaq::cudaq) diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 35c9acbe74..7eaca661b7 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -103,7 +103,7 @@ if (CUSTATEVEC_ROOT AND CUDA_FOUND) endif() endif() -add_nvqpp_test(photonics_sim providers/photonics.cpp TARGET photonics) +add_nvqpp_test(photonics_sim providers/photonics.cpp TARGET photonics-cpu) add_nvqpp_test(SampleAsyncRemote using/cudaq/platform/sample_async_remote.cpp TARGET remote-mqpu SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/snippets/cpp) set_tests_properties( nvqpp_SampleAsyncRemote @@ -141,6 +141,7 @@ if (CUDAQ_ENABLE_PYTHON) add_pycudaq_test(DepolarizingNoise noise_depolarization.py) add_pycudaq_test(PhaseFlipNoise noise_phase_flip.py) add_pycudaq_test(KrausNoise noise_kraus_operator.py) + add_pycudaq_test(NoiseCallback noise_callback.py) if (CUTENSORNET_ROOT AND CUDA_FOUND) # This example uses tensornet backend. diff --git a/docs/sphinx/api/default_ops.rst b/docs/sphinx/api/default_ops.rst index bbd2b37fa0..19b658c9f9 100644 --- a/docs/sphinx/api/default_ops.rst +++ b/docs/sphinx/api/default_ops.rst @@ -638,31 +638,67 @@ operations, each operating on 2 qubits. Photonic Operations on Qudits ============================= -These operations are valid only on the `photonics` target which does not support the quantum operations above. +These operations are valid only on the `photonics-cpu` target which does not support +the quantum operations above. -:code:`plus` +:code:`create` --------------------- -This is a place-holder, to be updated later. +This operation increments the number of photons in a qumode up to a maximum value +defined by the qudit level that represents the qumode. If it is applied to a qumode +where the number of photons is already at the maximum value, the operation has no +effect. + +:math:`U|0\rangle → |1\rangle, U|1\rangle → |2\rangle, U|2\rangle → |3\rangle, \cdots, U|d\rangle → |d\rangle` +where :math:`d` is the qudit level. .. tab:: Python .. code-block:: python q = qudit(3) - plus(q) + create(q) .. tab:: C++ .. code-block:: cpp cudaq::qvector<3> q(1); - plus(q[0]); + create(q[0]); + +:code:`annihilate` +--------------------- + +This operation reduces the number of photons in a qumode up to a minimum value of +0 representing the vacuum state. If it is applied to a qumode where the number of +photons is already at the minimum value 0, the operation has no effect. + +:math:`U|0\rangle → |0\rangle, U|1\rangle → |0\rangle, U|2\rangle → |1\rangle, \cdots, U|d\rangle → |d-1\rangle` +where :math:`d` is the qudit level. + +.. tab:: Python + + .. code-block:: python + + q = qudit(3) + annihilate(q) + +.. tab:: C++ + + .. code-block:: cpp + + cudaq::qvector<3> q(1); + annihilate(q[0]); :code:`phase_shift` --------------------- -This is a place-holder, to be updated later. +A phase shifter adds a phase :math:`\phi` on a qumode. For the annihilation (:math:`a_1`) +and creation operators (:math:`a_1^\dagger`) of a qumode, the phase shift operator +is defined by + +.. math:: + P(\phi) = \exp\left(i \phi a_1^\dagger a_1 \right) .. tab:: Python @@ -681,7 +717,13 @@ This is a place-holder, to be updated later. :code:`beam_splitter` --------------------- -This is a place-holder, to be updated later. +Beam splitters act on two qumodes together and it is parametrized by a single angle +:math:`\theta`, relating to reflectivity. +For the annihilation (:math:`a_1` and :math:`a_2`) and creation operators (:math:`a_1^\dagger` +and :math:`a_2^\dagger`) of two qumodes, the beam splitter operator is defined by + +.. math:: + B(\theta) = \exp\left[i \theta (a_1^\dagger a_2 + a_1 a_2^\dagger) \right] .. tab:: Python @@ -700,19 +742,19 @@ This is a place-holder, to be updated later. :code:`mz` --------------------- -This operation returns the measurement results of the input qudit(s). +This operation returns the measurement results of the input qumode(s). .. tab:: Python .. code-block:: python - qutrits = [qudit(3) for _ in range(2)] - mz(qutrits) + qumodes = [qudit(3) for _ in range(2)] + mz(qumodes) .. tab:: C++ .. code-block:: cpp - cudaq::qvector<3> qutrits(2); - mz(qutrits); + cudaq::qvector<3> qumodes(2); + mz(qumodes); diff --git a/docs/sphinx/api/languages/cpp_api.rst b/docs/sphinx/api/languages/cpp_api.rst index 99e0b2eb07..8e5ef24e26 100644 --- a/docs/sphinx/api/languages/cpp_api.rst +++ b/docs/sphinx/api/languages/cpp_api.rst @@ -44,6 +44,8 @@ Common .. doxygenclass:: cudaq::async_result :members: +.. doxygentypedef:: async_sample_result + .. doxygenstruct:: cudaq::ExecutionResult :members: @@ -74,6 +76,8 @@ Common .. doxygenclass:: cudaq::RemoteSimulationState +.. doxygenstruct:: cudaq::PhotonicState + .. doxygenclass:: cudaq::registry::RegisteredType :members: @@ -115,6 +119,8 @@ Noise Modeling .. doxygenclass:: cudaq::noise_model :members: +.. doxygenenum:: cudaq::noise_model_type + Kernel Builder =============== @@ -166,7 +172,9 @@ Platform .. doxygenclass:: cudaq::BaseRemoteSimulatorQPU -.. doxygenclass:: cudaq::BaseNvcfSimulatorQPU +.. doxygenclass:: cudaq::BaseNvcfSimulatorQPU + +.. doxygenclass:: cudaq::OrcaRemoteRESTQPU .. doxygenclass:: cudaq::quantum_platform :members: @@ -229,5 +237,7 @@ Namespaces .. doxygennamespace:: cudaq::orca :desc-only: -.. doxygenfunction:: cudaq::orca::sample(std::vector &input_state, std::vector &loop_lengths, std::vector &bs_angles, int n_samples = 10000) -.. doxygenfunction:: cudaq::orca::sample(std::vector &input_state, std::vector &loop_lengths, std::vector &bs_angles, std::vector &ps_angles, int n_samples = 10000) +.. doxygenfunction:: cudaq::orca::sample(std::vector &input_state, std::vector &loop_lengths, std::vector &bs_angles, int n_samples = 10000, std::size_t qpu_id = 0) +.. doxygenfunction:: cudaq::orca::sample(std::vector &input_state, std::vector &loop_lengths, std::vector &bs_angles, std::vector &ps_angles, int n_samples = 10000, std::size_t qpu_id = 0) +.. doxygenfunction:: cudaq::orca::sample_async(std::vector &input_state, std::vector &loop_lengths, std::vector &bs_angles, int n_samples = 10000, std::size_t qpu_id = 0) +.. doxygenfunction:: cudaq::orca::sample_async(std::vector &input_state, std::vector &loop_lengths, std::vector &bs_angles, std::vector &ps_angles, int n_samples = 10000, std::size_t qpu_id = 0) diff --git a/docs/sphinx/examples/cpp/basics/noise_callback.cpp b/docs/sphinx/examples/cpp/basics/noise_callback.cpp new file mode 100644 index 0000000000..9c8a9724dd --- /dev/null +++ b/docs/sphinx/examples/cpp/basics/noise_callback.cpp @@ -0,0 +1,67 @@ +// Compile and run with: +// ``` +// nvq++ noise_callback.cpp --target density-matrix-cpu -o dyn.x +// && ./dyn.x +// ``` +// +// Note: You must set the target to a density matrix backend for the noise +// to successfully impact the system. + +#include +#include + +// CUDA-Q supports several different models of noise. In this +// case, we will examine the dynamic noise channel specified as a callback +// function. + +int main() { + + // We will begin by defining an empty noise model that we will add + // our channel to. + cudaq::noise_model noise; + // Noise model callback function + const auto rx_noise = [](const auto &qubits, + const auto ¶ms) -> cudaq::kraus_channel { + // Model a pulse-length based rotation gate: + // the bigger the angle, the longer the pulse, i.e., more amplitude damping. + auto angle = params[0]; + // Normalize the angle into the [0, 2*pi] range + while (angle > 2. * M_PI) + angle -= 2. * M_PI; + + while (angle < 0) + angle += 2. * M_PI; + // Damping rate is linearly proportional to the angle + const auto damping_rate = angle / (2. * M_PI); + std::cout << "Angle = " << params[0] + << ", amplitude damping rate = " << damping_rate << "\n"; + return cudaq::amplitude_damping_channel(damping_rate); + }; + + // Bind the noise model callback function to the `rx` gate + noise.add_channel(rx_noise); + + auto kernel = [](double angle) __qpu__ { + cudaq::qubit q; + rx(angle, q); + mz(q); + }; + + // Now let's set the noise and we're ready to run the simulation! + cudaq::set_noise(noise); + + // Our results should show measurements in both the |0> and |1> states, + // indicating that the noise has successfully impacted the system. Note: a + // `rx(pi)` is equivalent to a Pauli X gate, and thus, it should be in the |1> + // state if no noise is present. + auto noisy_counts = cudaq::sample(kernel, M_PI); + std::cout << "Noisy result:\n"; + noisy_counts.dump(); + + // To confirm this, we can run the simulation again without noise. + cudaq::unset_noise(); + auto noiseless_counts = cudaq::sample(kernel, M_PI); + std::cout << "Noiseless result:\n"; + noiseless_counts.dump(); + return 0; +} diff --git a/docs/sphinx/examples/cpp/other/trotter_kernel_mode.cpp b/docs/sphinx/examples/cpp/other/trotter_kernel_mode.cpp index ef6d9a22ea..8edda798e1 100644 --- a/docs/sphinx/examples/cpp/other/trotter_kernel_mode.cpp +++ b/docs/sphinx/examples/cpp/other/trotter_kernel_mode.cpp @@ -42,10 +42,11 @@ int SPINS = 11; // set to around 25 qubits for `nvidia` target int STEPS = 10; // set to around 100 for `nvidia` target // Compile and run with: +// clang-format off // ``` -// nvq++ --enable-mlir -v trotter_kernel_mode.cpp -o trotter.x -target nvidia && -// ./trotter.x +// nvq++ --enable-mlir -v trotter_kernel_mode.cpp -o trotter.x --target nvidia && ./trotter.x // ``` +// clang-format off // Alternating up/down spins struct initState { diff --git a/docs/sphinx/examples/cpp/providers/orca.cpp b/docs/sphinx/examples/cpp/providers/orca.cpp index a23f7aa49e..8ed95926ee 100644 --- a/docs/sphinx/examples/cpp/providers/orca.cpp +++ b/docs/sphinx/examples/cpp/providers/orca.cpp @@ -5,21 +5,16 @@ // To use the ORCA Computing target you will need to set the ORCA_ACCESS_URL // environment variable or pass the URL to the `--orca-url` flag. -#include "cudaq/orca.h" -#include "cudaq.h" - -// define helper function to generate linear spaced vectors -template -void linear_spaced_vector(std::vector &xs, T min, T max, std::size_t N) { - T h = (max - min) / static_cast(N - 1); - typename std::vector::iterator x; - T val; - for (x = xs.begin(), val = min; x != xs.end(); ++x, val += h) { - *x = val; - } -} +#include +#include +#include +#include +#include +#include int main() { + using namespace std::this_thread; // sleep_for, sleep_until + using namespace std::chrono_literals; // `ns`, `us`, `ms`, `s`, `h`, etc. // A time-bin boson sampling experiment: An input state of 4 indistinguishable // photons mixed with 4 vacuum states across 8 time bins (modes) enter the @@ -33,10 +28,10 @@ int main() { // half of 8 time bins is filled with a single photon and the other half is // filled with the vacuum state (empty) - std::vector input_state{1, 0, 1, 0, 1, 0, 1, 0}; + std::vector input_state = {1, 0, 1, 0, 1, 0, 1, 0}; // The time bin interferometer in this example has two loops, each of length 1 - std::vector loop_lengths{1, 1}; + std::vector loop_lengths = {1, 1}; // helper variables to calculate the number of beam splitters and phase // shifters needed in the TBI @@ -47,24 +42,28 @@ int main() { const std::size_t n_beam_splitters = n_loops * n_modes - sum_loop_lengths; // beam splitter angles (created as a linear spaced vector of angles) - std::vector bs_angles(n_beam_splitters); - linear_spaced_vector(bs_angles, M_PI / 8, M_PI / 3, n_beam_splitters); + std::vector bs_angles = + cudaq::linspace(M_PI / 3, M_PI / 6, n_beam_splitters); // Optionally, we can also specify the phase shifter angles (created as a // linear spaced vector of angles), if the system includes phase shifters // ``` - // std::vector ps_angles(n_beam_splitters); - // linear_spaced_vector(ps_angles, M_PI / 6, M_PI / 3, n_beam_splitters); + // std::vector ps_angles = cudaq::linspace(M_PI / 3, M_PI / 5, + // n_beam_splitters); // ``` // we can also set number of requested samples int n_samples{10000}; - // Submit to ORCA synchronously (e.g., wait for the job result to be returned - // before proceeding with the rest of the execution). + // Submit to ORCA synchronously (e.g., wait for the job result to be + // returned before proceeding with the rest of the execution). + std::cout << "Submitting to ORCA Server synchronously" << std::endl; auto counts = cudaq::orca::sample(input_state, loop_lengths, bs_angles, n_samples); + // Print the results + counts.dump(); + // If the system includes phase shifters, the phase shifter angles can be // included in the call @@ -73,8 +72,27 @@ int main() { // ps_angles, n_samples); // ``` - // Print the results - counts.dump(); + // Alternatively we can submit to ORCA asynchronously (e.g., continue + // executing code in the file until the job has been returned). + std::cout << "Submitting to ORCA Server asynchronously" << std::endl; + auto async_results = cudaq::orca::sample_async(input_state, loop_lengths, + bs_angles, n_samples); + + // Can write the future to file: + { + std::ofstream out("saveMe.json"); + out << async_results; + } + + // Then come back and read it in later. + cudaq::async_result readIn; + std::ifstream in("saveMe.json"); + in >> readIn; + + sleep_for(200ms); // wait for the job to be processed + // Get the results of the read in future. + auto async_counts = readIn.get(); + async_counts.dump(); return 0; } \ No newline at end of file diff --git a/docs/sphinx/examples/cpp/providers/orca_mqpu.cpp b/docs/sphinx/examples/cpp/providers/orca_mqpu.cpp new file mode 100644 index 0000000000..afd2b4121f --- /dev/null +++ b/docs/sphinx/examples/cpp/providers/orca_mqpu.cpp @@ -0,0 +1,42 @@ +// Compile and run with: +// ``` +// nvq++ orca_mqpu.cpp --target orca --orca-url \ +// "http://localhost:3035,http://localhost:3037" -o out.x && ./out.x +// ``` +// See accompanying example `orca.cpp` for detailed explanation. + +#include +#include +#include +#include + +int main() { + + auto &platform = cudaq::get_platform(); + auto num_qpus = platform.num_qpus(); + printf("Number of QPUs: %zu\n", num_qpus); + + // A time-bin boson sampling experiment + std::vector input_state = {1, 0, 1, 0, 1, 0, 1, 0}; + std::vector loop_lengths = {1, 1}; + std::size_t sum_loop_lengths{std::accumulate( + loop_lengths.begin(), loop_lengths.end(), static_cast(0))}; + const std::size_t n_loops = loop_lengths.size(); + const std::size_t n_modes = input_state.size(); + const std::size_t n_beam_splitters = n_loops * n_modes - sum_loop_lengths; + std::vector bs_angles = + cudaq::linspace(M_PI / 3, M_PI / 6, n_beam_splitters); + int n_samples{10000}; + + std::cout << "Submitting to ORCA Server asynchronously" << std::endl; + std::vector countFutures; + for (std::size_t i = 0; i < num_qpus; i++) { + countFutures.emplace_back(cudaq::orca::sample_async( + input_state, loop_lengths, bs_angles, n_samples, i)); + } + + for (auto &counts : countFutures) { + counts.get().dump(); + } + return 0; +} diff --git a/docs/sphinx/examples/cpp/providers/photonics.cpp b/docs/sphinx/examples/cpp/providers/photonics.cpp index a25756c7f8..2933345005 100644 --- a/docs/sphinx/examples/cpp/providers/photonics.cpp +++ b/docs/sphinx/examples/cpp/providers/photonics.cpp @@ -1,6 +1,6 @@ // Compile and run with: // ``` -// nvq++ --target photonics photonics.cpp +// nvq++ --target photonics-cpu photonics.cpp // ./a.out // ``` @@ -9,11 +9,11 @@ struct photonicsKernel { void operator()() __qpu__ { - cudaq::qvector<3> qutrits(2); - plus(qutrits[0]); - plus(qutrits[1]); - plus(qutrits[1]); - mz(qutrits); + cudaq::qvector<3> qumodes(2); + create(qumodes[0]); + create(qumodes[1]); + create(qumodes[1]); + mz(qumodes); } }; diff --git a/docs/sphinx/examples/cpp/providers/photonics_tbi.cpp b/docs/sphinx/examples/cpp/providers/photonics_tbi.cpp deleted file mode 100644 index 1c62cb9cf0..0000000000 --- a/docs/sphinx/examples/cpp/providers/photonics_tbi.cpp +++ /dev/null @@ -1,84 +0,0 @@ -// Compile and run with: -// ``` -// nvq++ --target photonics photonics_tbi.cpp -o tbi.x -// ./tbi.x -// ``` - -#include "cudaq.h" -#include "cudaq/photonics.h" - -#include - -// Global variables -static const std::size_t one{1}; - -static constexpr std::size_t n_modes{4}; -static constexpr std::array input_state{2, 1, 3, 1}; - -static constexpr std::size_t d{ - std::accumulate(input_state.begin(), input_state.end(), one)}; - -struct TBI { - auto operator()(std::vector const &bs_angles, - std::vector const &ps_angles, - std::vector const &input_state, - std::vector const &loop_lengths) __qpu__ { - auto n_modes = ::n_modes; - const auto d = ::d; - - cudaq::qvector quds(n_modes); // |00...00> d-dimensions - for (std::size_t i = 0; i < n_modes; i++) { - for (std::size_t j = 0; j < input_state[i]; j++) { - plus(quds[i]); // setting to |input_state> - } - } - - std::size_t c = 0; - for (std::size_t ll : loop_lengths) { - for (std::size_t i = 0; i < (n_modes - ll); i++) { - beam_splitter(quds[i], quds[i + ll], bs_angles[c]); - phase_shift(quds[i], ps_angles[c]); - c++; - } - } - mz(quds); - } -}; - -template -void LinearSpacedArray(std::vector &xs, T min, T max, std::size_t N) { - T h = (max - min) / static_cast(N - 1); - typename std::vector::iterator x; - T val; - for (x = xs.begin(), val = min; x != xs.end(); ++x, val += h) { - *x = val; - } -} - -int main() { - std::size_t n_loops{2}; - std::vector loop_lengths{1, 2}; - std::vector input_state(std::begin(::input_state), - std::end(::input_state)); - - const std::size_t zero{0}; - std::size_t sum_loop_lenghts{ - std::accumulate(loop_lengths.begin(), loop_lengths.end(), zero)}; - - std::size_t n_beam_splitters = n_loops * ::n_modes - sum_loop_lenghts; - - std::vector bs_angles(n_beam_splitters); - std::vector ps_angles(n_beam_splitters); - - LinearSpacedArray(bs_angles, M_PI / 3, M_PI / 6, n_beam_splitters); - LinearSpacedArray(ps_angles, M_PI / 3, M_PI / 5, n_beam_splitters); - - auto counts = cudaq::sample(1000000, TBI{}, bs_angles, ps_angles, input_state, - loop_lengths); - - for (auto &[k, v] : counts) { - std::cout << k << ":" << v << " "; - } - std::cout << std::endl; - return 0; -} \ No newline at end of file diff --git a/docs/sphinx/examples/cpp/providers/photonics_tbi_get_state.cpp b/docs/sphinx/examples/cpp/providers/photonics_tbi_get_state.cpp new file mode 100644 index 0000000000..5fe4f7e34d --- /dev/null +++ b/docs/sphinx/examples/cpp/providers/photonics_tbi_get_state.cpp @@ -0,0 +1,69 @@ +// Compile and run with: +// ``` +// nvq++ --target photonics-cpu photonics_tbi_get_state.cpp && ./a.out +// ``` + +#include +#include +#include + +// Global variables +static constexpr std::size_t one = 1; + +static constexpr std::size_t n_modes = 8; +static constexpr std::array input_state = {1, 0, 1, 0, + 1, 0, 1, 0}; + +static constexpr std::size_t d = + std::accumulate(input_state.begin(), input_state.end(), one); + +struct TBI { + auto operator()(std::vector const &bs_angles, + std::vector const &ps_angles, + std::vector const &input_state, + std::vector const &loop_lengths) __qpu__ { + auto n_modes = ::n_modes; + const auto d = ::d; + + cudaq::qvector qumodes(n_modes); // |00...00> d-dimensions + for (std::size_t i = 0; i < n_modes; i++) { + for (std::size_t j = 0; j < input_state[i]; j++) { + create(qumodes[i]); // setting to |input_state> + } + } + + std::size_t c = 0; + for (std::size_t ll : loop_lengths) { + for (std::size_t i = 0; i < (n_modes - ll); i++) { + beam_splitter(qumodes[i], qumodes[i + ll], bs_angles[c]); + phase_shift(qumodes[i], ps_angles[c]); + c++; + } + } + } +}; + +int main() { + std::size_t n_loops = 2; + std::vector loop_lengths = {1, 1}; + std::vector input_state(std::begin(::input_state), + std::end(::input_state)); + + const std::size_t zero = 0; + std::size_t sum_loop_lenghts{ + std::accumulate(loop_lengths.begin(), loop_lengths.end(), zero)}; + + std::size_t n_beam_splitters = n_loops * ::n_modes - sum_loop_lenghts; + + std::vector bs_angles = + cudaq::linspace(M_PI / 3, M_PI / 6, n_beam_splitters); + std::vector ps_angles = + cudaq::linspace(M_PI / 3, M_PI / 5, n_beam_splitters); + + auto state = + cudaq::get_state(TBI{}, bs_angles, ps_angles, input_state, loop_lengths); + + state.dump(); + + return 0; +} \ No newline at end of file diff --git a/docs/sphinx/examples/cpp/providers/photonics_tbi_sample.cpp b/docs/sphinx/examples/cpp/providers/photonics_tbi_sample.cpp new file mode 100644 index 0000000000..7533978128 --- /dev/null +++ b/docs/sphinx/examples/cpp/providers/photonics_tbi_sample.cpp @@ -0,0 +1,69 @@ +// Compile and run with: +// ``` +// nvq++ --target photonics-cpu photonics_tbi_sample.cpp && ./a.out +// ``` + +#include +#include +#include + +// Global variables +static constexpr std::size_t one = 1; + +static constexpr std::size_t n_modes = 8; +static constexpr std::array input_state = {1, 0, 1, 0, + 1, 0, 1, 0}; + +static constexpr std::size_t d = + std::accumulate(input_state.begin(), input_state.end(), one); + +struct TBI { + auto operator()(std::vector const &bs_angles, + std::vector const &ps_angles, + std::vector const &input_state, + std::vector const &loop_lengths) __qpu__ { + auto n_modes = ::n_modes; + const auto d = ::d; + + cudaq::qvector qumodes(n_modes); // |00...00> d-dimensions + for (std::size_t i = 0; i < n_modes; i++) { + for (std::size_t j = 0; j < input_state[i]; j++) { + create(qumodes[i]); // setting to |input_state> + } + } + + std::size_t c = 0; + for (std::size_t ll : loop_lengths) { + for (std::size_t i = 0; i < (n_modes - ll); i++) { + beam_splitter(qumodes[i], qumodes[i + ll], bs_angles[c]); + phase_shift(qumodes[i], ps_angles[c]); + c++; + } + } + } +}; + +int main() { + std::size_t n_loops = 2; + std::vector loop_lengths = {1, 1}; + std::vector input_state(std::begin(::input_state), + std::end(::input_state)); + + const std::size_t zero = 0; + std::size_t sum_loop_lenghts{ + std::accumulate(loop_lengths.begin(), loop_lengths.end(), zero)}; + + std::size_t n_beam_splitters = n_loops * ::n_modes - sum_loop_lenghts; + + std::vector bs_angles = + cudaq::linspace(M_PI / 3, M_PI / 6, n_beam_splitters); + std::vector ps_angles = + cudaq::linspace(M_PI / 3, M_PI / 5, n_beam_splitters); + + auto counts = cudaq::sample(1000000, TBI{}, bs_angles, ps_angles, input_state, + loop_lengths); + + counts.dump(); + + return 0; +} \ No newline at end of file diff --git a/docs/sphinx/examples/python/noise_callback.py b/docs/sphinx/examples/python/noise_callback.py new file mode 100644 index 0000000000..8eba41deeb --- /dev/null +++ b/docs/sphinx/examples/python/noise_callback.py @@ -0,0 +1,45 @@ +import cudaq +import numpy as np + +# Set the target to our density matrix simulator. +cudaq.set_target('density-matrix-cpu') + +noise = cudaq.NoiseModel() + + +# Noise model callback function +def rx_noise(qubits, params): + # Model a pulse-length based rotation gate: + # the bigger the angle, the longer the pulse, i.e., more amplitude damping. + angle = params[0] + angle = angle % (2 * np.pi) + # Damping rate is linearly proportional to the angle + damping_rate = np.abs(angle / (2 * np.pi)) + print(f"Angle = {angle}, amplitude damping rate = {damping_rate}.") + return cudaq.AmplitudeDampingChannel(damping_rate) + + +# Bind the noise model callback function to the `rx` gate +noise.add_channel('rx', rx_noise) + + +@cudaq.kernel +def kernel(angle: float): + qubit = cudaq.qubit() + rx(angle, qubit) + mz(qubit) + + +# Now we're ready to run the noisy simulation of our kernel. +# Note: We must pass the noise model to sample via keyword. +noisy_result = cudaq.sample(kernel, np.pi, noise_model=noise) +print(noisy_result) + +# Our results should show measurements in both the |0> and |1> states, indicating +# that the noise has successfully impacted the system. +# Note: a `rx(pi)` is equivalent to a Pauli X gate, and thus, it should be +# in the |1> state if no noise is present. + +# To confirm this, we can run the simulation again without noise. +noiseless_result = cudaq.sample(kernel, np.pi) +print(noiseless_result) diff --git a/docs/sphinx/examples/python/providers/orca.py b/docs/sphinx/examples/python/providers/orca.py index 5f91aad305..79f5cfd770 100644 --- a/docs/sphinx/examples/python/providers/orca.py +++ b/docs/sphinx/examples/python/providers/orca.py @@ -1,4 +1,5 @@ import cudaq +import time import numpy as np import os @@ -34,20 +35,22 @@ n_beam_splitters = len(loop_lengths) * len(input_state) - sum(loop_lengths) # beam splitter angles -bs_angles = np.linspace(np.pi / 8, np.pi / 3, n_beam_splitters) +bs_angles = np.linspace(np.pi / 3, np.pi / 6, n_beam_splitters) # Optionally, we can also specify the phase shifter angles, if the system # includes phase shifters # ``` -# ps_angles = np.linspace(np.pi / 6, np.pi / 3, n_beam_splitters) +# ps_angles = np.linspace(np.pi / 3, np.pi / 5, n_beam_splitters) # ``` # we can also set number of requested samples n_samples = 10000 +# Option A: # By using the synchronous `cudaq.orca.sample`, the execution of # any remaining classical code in the file will occur only # after the job has been returned from ORCA Server. +print("Submitting to ORCA Server synchronously") counts = cudaq.orca.sample(input_state, loop_lengths, bs_angles, n_samples) # If the system includes phase shifters, the phase shifter angles can be @@ -59,3 +62,32 @@ # Print the results print(counts) + +# Option B: +# By using the asynchronous `cudaq.orca.sample_async`, the remaining +# classical code will be executed while the job is being handled +# by Orca. This is ideal when submitting via a queue over +# the cloud. +print("Submitting to ORCA Server asynchronously") +async_results = cudaq.orca.sample_async(input_state, loop_lengths, bs_angles, + n_samples) +# ... more classical code to run ... + +# We can either retrieve the results later in the program with +# ``` +# async_counts = async_results.get() +# ``` +# or we can also write the job reference (`async_results`) to +# a file and load it later or from a different process. +file = open("future.txt", "w") +file.write(str(async_results)) +file.close() + +# We can later read the file content and retrieve the job +# information and results. +time.sleep(0.2) # wait for the job to be processed +same_file = open("future.txt", "r") +retrieved_async_results = cudaq.AsyncSampleResult(str(same_file.read())) + +counts = retrieved_async_results.get() +print(counts) diff --git a/docs/sphinx/examples/python/providers/orca_mqpu.py b/docs/sphinx/examples/python/providers/orca_mqpu.py new file mode 100644 index 0000000000..a990ce932b --- /dev/null +++ b/docs/sphinx/examples/python/providers/orca_mqpu.py @@ -0,0 +1,32 @@ +import cudaq +import numpy as np + +# See accompanying example `orca.py` for detailed explanation. + +# Provide list of URLs to the remote ORCA targets +orca_urls = "http://localhost:3035,http://localhost:3037" +cudaq.set_target("orca", url=orca_urls) + +qpu_count = cudaq.get_target().num_qpus() +print("Number of virtual QPUs:", qpu_count) + +# A time-bin boson sampling experiment +input_state = [1, 0, 1, 0, 1, 0, 1, 0] +loop_lengths = [1, 1] +n_beam_splitters = len(loop_lengths) * len(input_state) - sum(loop_lengths) +bs_angles = np.linspace(np.pi / 3, np.pi / 6, n_beam_splitters) +n_samples = 10000 + +count_futures = [] +for i in range(qpu_count): + result = cudaq.orca.sample_async(input_state, + loop_lengths, + bs_angles, + n_samples, + qpu_id=i) + count_futures.append(result) + +print("Sampling jobs launched for asynchronous processing.") + +for counts in count_futures: + counts.get().dump() diff --git a/docs/sphinx/examples/python/providers/photonics.py b/docs/sphinx/examples/python/providers/photonics.py index 1075c4874a..46ea5b0bd8 100644 --- a/docs/sphinx/examples/python/providers/photonics.py +++ b/docs/sphinx/examples/python/providers/photonics.py @@ -1,16 +1,19 @@ import cudaq -cudaq.set_target("photonics") +cudaq.set_target("photonics-cpu") @cudaq.kernel def photonicsKernel(): - qutrits = [qudit(3) for _ in range(2)] - plus(qutrits[0]) - plus(qutrits[1]) - plus(qutrits[1]) - mz(qutrits) + qumodes = [qudit(3) for _ in range(2)] + create(qumodes[0]) + create(qumodes[1]) + create(qumodes[1]) + mz(qumodes) counts = cudaq.sample(photonicsKernel) print(counts) + +state = cudaq.get_state(photonicsKernel) +print(state) diff --git a/docs/sphinx/examples/python/providers/photonics_tbi_get_state.py b/docs/sphinx/examples/python/providers/photonics_tbi_get_state.py new file mode 100644 index 0000000000..d008897c9c --- /dev/null +++ b/docs/sphinx/examples/python/providers/photonics_tbi_get_state.py @@ -0,0 +1,38 @@ +import cudaq +import numpy as np + +cudaq.set_target("photonics-cpu") + + +@cudaq.kernel +def TBI( + bs_angles: list[float], + ps_angles: list[float], + input_state: list[int], + loop_lengths: list[int], +): + n_modes = len(input_state) + level = sum(input_state) + 1 # qudit level + + qumodes = [qudit(level) for _ in range(n_modes)] + + for i in range(n_modes): + for _ in range(input_state[i]): + create(qumodes[i]) + + counter = 0 + for j in loop_lengths: + for i in range(n_modes - j): + beam_splitter(qumodes[i], qumodes[i + j], bs_angles[counter]) + phase_shift(qumodes[i], ps_angles[counter]) + counter += 1 + + +input_state = [1, 0, 1, 0, 1, 0, 1, 0] +loop_lengths = [1, 1] +n_beam_splitters = len(loop_lengths) * len(input_state) - sum(loop_lengths) +bs_angles = np.linspace(np.pi / 3, np.pi / 6, n_beam_splitters) +ps_angles = np.linspace(np.pi / 3, np.pi / 5, n_beam_splitters) + +state = cudaq.get_state(TBI, bs_angles, ps_angles, input_state, loop_lengths) +state.dump() \ No newline at end of file diff --git a/docs/sphinx/examples/python/providers/photonics_tbi.py b/docs/sphinx/examples/python/providers/photonics_tbi_sample.py similarity index 72% rename from docs/sphinx/examples/python/providers/photonics_tbi.py rename to docs/sphinx/examples/python/providers/photonics_tbi_sample.py index aff5679b5e..4dc2c48ca2 100644 --- a/docs/sphinx/examples/python/providers/photonics_tbi.py +++ b/docs/sphinx/examples/python/providers/photonics_tbi_sample.py @@ -1,7 +1,7 @@ import cudaq import numpy as np -cudaq.set_target("photonics") +cudaq.set_target("photonics-cpu") @cudaq.kernel @@ -14,24 +14,25 @@ def TBI( n_modes = len(input_state) level = sum(input_state) + 1 # qudit level - quds = [qudit(level) for _ in range(n_modes)] + qumodes = [qudit(level) for _ in range(n_modes)] for i in range(n_modes): for _ in range(input_state[i]): - plus(quds[i]) + create(qumodes[i]) counter = 0 for j in loop_lengths: for i in range(n_modes - j): - beam_splitter(quds[i], quds[i + j], bs_angles[counter]) - phase_shift(quds[i], ps_angles[counter]) + beam_splitter(qumodes[i], qumodes[i + j], bs_angles[counter]) + phase_shift(qumodes[i], ps_angles[counter]) counter += 1 - mz(quds) + mz(qumodes) -input_state = [2, 1, 3, 1] -loop_lengths = [1, 2] +input_state = [1, 0, 1, 0, 1, 0, 1, 0] +loop_lengths = [1, 1] + n_beam_splitters = len(loop_lengths) * len(input_state) - sum(loop_lengths) bs_angles = np.linspace(np.pi / 3, np.pi / 6, n_beam_splitters) ps_angles = np.linspace(np.pi / 3, np.pi / 5, n_beam_splitters) diff --git a/docs/sphinx/examples/python/random_walk_qpe.py b/docs/sphinx/examples/python/random_walk_qpe.py new file mode 100644 index 0000000000..af28ddd100 --- /dev/null +++ b/docs/sphinx/examples/python/random_walk_qpe.py @@ -0,0 +1,62 @@ +# Compile and run with: +# ``` +# nvq++ random_walk_qpe.cpp -o qpe.x && ./qpe.x +# ``` + +import cudaq +from typing import List +from cudaq import spin + +# Here we demonstrate an algorithm expressed as a CUDA-Q kernel +# that incorporates non-trivial control flow and conditional +# quantum instruction invocation. + + +# Define the random walk phase estimation kernel +@cudaq.kernel +def rwpe_kernel(n_iter: int, mu: float, sigma: float) -> float: + iteration = 0 + + # Allocate the qubits + number_of_qubits = 2 + qubits = cudaq.qvector(number_of_qubits) + + # Alias them + aux = qubits[0] + + target = qubits[number_of_qubits - 1] + + x(target) + + while iteration < n_iter: + h(aux) + rz(1.0 - (mu / sigma), aux) + rz(.25 / sigma, target) + x.ctrl(aux, target) + rz(-.25 / sigma, target) + x.ctrl(aux, target) + h(aux) + if mz(aux): + x(aux) + mu = mu + sigma * .6065 + else: + mu = mu - sigma * .6065 + + sigma *= .7951 + iteration += 1 + + return 2.0 * mu + + +# Main function to execute the kernel +def main(): + n_iterations = 24 + mu = 0.7951 + sigma = 0.6065 + + phase = rwpe_kernel(n_iterations, mu, sigma) + print(f"Phase = {phase:.6f}") + + +if __name__ == "__main__": + main() diff --git a/docs/sphinx/using/backends/hardware.rst b/docs/sphinx/using/backends/hardware.rst index 203e0268b1..0dbb53a3d8 100644 --- a/docs/sphinx/using/backends/hardware.rst +++ b/docs/sphinx/using/backends/hardware.rst @@ -312,6 +312,16 @@ configuration. export ORCA_ACCESS_URL="https://" + +Sometimes the requests to the PT-1 require an authentication token. This token can be set as an +environment variable named ``ORCA_AUTH_TOKEN``. For example, if the token is :code:`AbCdEf123456`, +you can set the environment variable as follows: + +.. code:: bash + + export ORCA_AUTH_TOKEN="AbCdEf123456" + + Submission from C++ ````````````````````````` diff --git a/docs/sphinx/using/backends/simulators.rst b/docs/sphinx/using/backends/simulators.rst index bfb3049dc4..a4a9e1449d 100644 --- a/docs/sphinx/using/backends/simulators.rst +++ b/docs/sphinx/using/backends/simulators.rst @@ -112,10 +112,10 @@ setting the target. - Number of CPU threads used for circuit processing. The default value is `8`. * - ``CUDAQ_MAX_CPU_MEMORY_GB`` - non-negative integer, or `NONE` - - CPU memory size (in GB) allowed for state-vector migration. `NONE` means unlimited (up to physical memory constraints). Default is 0 (disabled). + - CPU memory size (in GB) allowed for state-vector migration. `NONE` means unlimited (up to physical memory constraints). Default is 0GB (disabled, variable is not set to any value). * - ``CUDAQ_MAX_GPU_MEMORY_GB`` - positive integer, or `NONE` - - GPU memory (in GB) allowed for on-device state-vector allocation. As the state-vector size exceeds this limit, host memory will be utilized for migration. `NONE` means unlimited (up to physical memory constraints). This is the default. + - GPU memory (in GB) allowed for on-device state-vector allocation. As the state-vector size exceeds this limit, host memory will be utilized for migration. `NONE` means unlimited (up to physical memory constraints). This is the default. .. deprecated:: 0.8 The :code:`nvidia-fp64` targets, which is equivalent setting the `fp64` option on the :code:`nvidia` target, @@ -169,10 +169,11 @@ To execute a program on the multi-node multi-GPU NVIDIA target, use the followin If a target is set in the application code, this target will override the :code:`--target` command line flag given during program invocation. .. note:: - (1) The order of the option settings are interchangeable. - For example, `cudaq.set_target('nvidia', option='mgpu,fp64')` is equivalent to `cudaq.set_target('nvidia', option='fp64.mgpu')`. + + * The order of the option settings are interchangeable. + For example, `cudaq.set_target('nvidia', option='mgpu,fp64')` is equivalent to `cudaq.set_target('nvidia', option='fp64,mgpu')`. - (2) The `nvidia` target has single-precision as the default setting. Thus, using `option='mgpu'` implies that `option='mgpu,fp32'`. + * The `nvidia` target has single-precision as the default setting. Thus, using `option='mgpu'` implies that `option='mgpu,fp32'`. .. tab:: C++ @@ -343,7 +344,7 @@ CUDA-Q provides a couple of tensor-network simulator targets accelerated with the :code:`cuTensorNet` library. These backends are available for use from both C++ and Python. -Tensor network-based simulators are suitable for large-scale simulation of certain classes of quantum circuits involving many qubits beyond the memory limit of state vector based simulators. For example, computing the expectation value of a Hamiltonian via :code:`cudaq::observe` can be performed efficiently, thanks to :code:`cuTensorNet` contraction optimization capability. On the other hand, conditional circuits, i.e., those with mid-circuit measurements or reset, despite being supported by both backends, may result in poor performance. +Tensor network simulators are suitable for large-scale simulation of certain classes of quantum circuits involving many qubits beyond the memory limit of state vector based simulators. For example, computing the expectation value of a Hamiltonian via :code:`cudaq::observe` can be performed efficiently, thanks to :code:`cuTensorNet` contraction optimization capability. On the other hand, conditional circuits, i.e., those with mid-circuit measurements or reset, despite being supported by both backends, may result in poor performance. Multi-node multi-GPU +++++++++++++++++++++++++++++++++++ @@ -476,14 +477,14 @@ Specific aspects of the simulation can be configured by defining the following e .. note:: The parallelism of Jacobi method (the default `CUDAQ_MPS_SVD_ALGO` setting) gives GPU better performance on small and medium size matrices. - If you expect the a large number of singular values (e.g., increasing the `CUDAQ_MPS_MAX_BOND` setting), please adjust the `CUDAQ_MPS_SVD_ALGO` setting accordingly. + If you expect a large number of singular values (e.g., increasing the `CUDAQ_MPS_MAX_BOND` setting), please adjust the `CUDAQ_MPS_SVD_ALGO` setting accordingly. Default Simulator ================================== .. _default-simulator: -If no explicit target is set, i.e. if the code is compiled without any :code:`--target` flags, then CUDA-Q makes a default choice for the simulator. +If no explicit target is set, i.e., if the code is compiled without any :code:`--target` flags, then CUDA-Q makes a default choice for the simulator. If an NVIDIA GPU and CUDA runtime libraries are available, the default target is set to `nvidia`. This will utilize the :ref:`cuQuantum single-GPU state vector simulator `. On CPU-only systems, the default target is set to `qpp-cpu` which uses the :ref:`OpenMP CPU-only simulator `. diff --git a/docs/sphinx/using/extending/_noise.rst b/docs/sphinx/using/extending/_noise.rst index dcdaa0e780..0ff2132811 100644 --- a/docs/sphinx/using/extending/_noise.rst +++ b/docs/sphinx/using/extending/_noise.rst @@ -40,10 +40,67 @@ constructor should validate the completeness (CPTP) relation. A :code:`cudaq::noise_model` encapsulates a mapping of quantum operation names to a vector of :code:`kraus_channel` that is to be applied after invocation of that quantum operation. A :code:`noise_model` can be constructed with a nullary constructor, and -:code:`kraus_channels` can be added via a templated :code:`add_channel` method, where the -template type is the quantum operation the channel applies to (e.g. :code:`model.add_channel\(channel)`). Clients (e.g. simulator backends) can retrieve the :code:`kraus_channel` to +:code:`kraus_channels` can be added via :code:`add_channel` and :code:`add_all_qubit_channel` methods with +the operation given as a string or as a template argument. +The operation name or the template type specifies the quantum operation the channel applies to +(e.g. :code:`model.add_channel\(channel)` or :code:`model.add_channel("h", channel)`). +Clients (e.g. simulator backends) can retrieve the :code:`kraus_channel` to apply to the simulated state via a :code:`noise_model::get_channel(...)` call. +When adding an error channel to a noise model for a quantum operation +we can assign the noise channel to instances of that operation on specific qubit operands or +to any occurrence of the operation, regardless of which qubits it acts on. + +.. tab:: Python + + .. code-block:: python + + # Add a noise channel to z gate on qubit 0 + noise.add_channel('z', [0], noise_channel) + # Add a noise channel to x gate, regardless of qubit operands. + noise.add_all_qubit_channel('x', noise_channel) + + +.. tab:: C++ + + .. code-block:: cpp + + // Add a noise channel to z gate on qubit 0 + noise.add_channel("z", {0}, noise_channel); + // Add a noise channel to x gate, regardless of qubit operands. + noise.add_all_qubit_channel("x", noise_channel) + +In addition to static noise channels, users can also define a noise channel as a +callback function, which returns a concrete channel definition in terms of Kraus matrices +depending on the gate operands and gate parameters if any. + +.. tab:: Python + + .. code-block:: python + + # Noise channel callback function + def noise_cb(qubits, params): + # Construct a channel based on specific operands and parameters + ... + return noise_channel + + # Add a dynamic noise channel to the 'rx' gate. + noise.add_channel('rx', noise_cb) + + +.. tab:: C++ + + .. code-block:: cpp + + // Add a dynamic noise channel to the 'rx' gate. + noise.add_channel("rx", + [](const auto &qubits, const auto ¶ms) -> cudaq::kraus_channel { + // Construct a channel based on specific operands and parameters + ... + return noiseChannel; + }); + + Noise models can be constructed via the :code:`cudaq::noise_model` and specified for execution via a public :code:`cudaq::set_noise(cudaq::noise_model&)` function. This function should forward the :code:`noise_model` to the current :code:`quantum_platform` which can attach it diff --git a/docs/sphinx/using/extending/cudaq_ir.rst b/docs/sphinx/using/extending/cudaq_ir.rst index 070f6369f2..ffb90346ae 100644 --- a/docs/sphinx/using/extending/cudaq_ir.rst +++ b/docs/sphinx/using/extending/cudaq_ir.rst @@ -26,7 +26,7 @@ Let's see the output of :code:`nvq++` in verbose mode. Consider a simple code li $ nvq++ simple.cpp -v --save-temps cudaq-quake --emit-llvm-file simple.cpp -o simple.qke - cudaq-opt --pass-pipeline=builtin.module(canonicalize,lambda-lifting,canonicalize,apply-op-specialization,kernel-execution,inline{default-pipeline=func.func(indirect-to-direct-calls)},func.func(quake-add-metadata),device-code-loader{use-quake=1},expand-measurements,func.func(lower-to-cfg),canonicalize,cse) simple.qke -o simple.qke.LpsXpu + cudaq-opt --pass-pipeline=builtin.module(canonicalize,lambda-lifting,canonicalize,apply-op-specialization,kernel-execution,indirect-to-direct-calls,inline,func.func(quake-add-metadata),device-code-loader{use-quake=1},expand-measurements,func.func(lower-to-cfg),canonicalize,cse) simple.qke -o simple.qke.LpsXpu cudaq-translate --convert-to=qir simple.qke.LpsXpu -o simple.ll.p3De4L fixup-linkage.pl simple.qke simple.ll llc --relocation-model=pic --filetype=obj -O2 simple.ll.p3De4L -o simple.qke.o diff --git a/docs/sphinx/using/install/data_center_install.rst b/docs/sphinx/using/install/data_center_install.rst index f65d1ac7bf..b73cc94640 100644 --- a/docs/sphinx/using/install/data_center_install.rst +++ b/docs/sphinx/using/install/data_center_install.rst @@ -245,7 +245,7 @@ Python-specific tools: in the way as you installed Python itself. If you installed Python via the package manager for your system, you may need to install an additional package to get the development headers. The package name is usually your python version followed by either a `-dev` or `-devel` suffix. - If you are using a `Conda environment `__, + If you are using a `Conda environment `__, the necessary headers should already be installed. - Pip package manager: Make sure the `pip` module is enable for your Python version. We refer to the Python `documentation `__ for diff --git a/include/cudaq/Frontend/nvqpp/ASTBridge.h b/include/cudaq/Frontend/nvqpp/ASTBridge.h index 38ec4048f3..baf4518fce 100644 --- a/include/cudaq/Frontend/nvqpp/ASTBridge.h +++ b/include/cudaq/Frontend/nvqpp/ASTBridge.h @@ -286,6 +286,7 @@ class QuakeBridgeVisitor DataRecursionQueue *q = nullptr); bool VisitCXXConstructExpr(clang::CXXConstructExpr *x); bool VisitCXXOperatorCallExpr(clang::CXXOperatorCallExpr *x); + bool VisitCXXParenListInitExpr(clang::CXXParenListInitExpr *x); bool WalkUpFromCXXOperatorCallExpr(clang::CXXOperatorCallExpr *x); bool TraverseDeclRefExpr(clang::DeclRefExpr *x, DataRecursionQueue *q = nullptr); @@ -499,6 +500,9 @@ class QuakeBridgeVisitor bool isItaniumCXXABI(); private: + /// Check that the value on the top of the stack is an entry-point kernel. + bool hasTOSEntryKernel(); + /// Map the block arguments to the names of the function parameters. void addArgumentSymbols(mlir::Block *entryBlock, mlir::ArrayRef parameters); diff --git a/include/cudaq/Optimizer/Builder/Factory.h b/include/cudaq/Optimizer/Builder/Factory.h index 6c627c64c9..868cf4c861 100644 --- a/include/cudaq/Optimizer/Builder/Factory.h +++ b/include/cudaq/Optimizer/Builder/Factory.h @@ -266,6 +266,10 @@ mlir::Value createCast(mlir::OpBuilder &builder, mlir::Location loc, std::vector> readGlobalConstantArray(cudaq::cc::GlobalOp &global); +std::pair +getOrAddFunc(mlir::Location loc, mlir::StringRef funcName, + mlir::FunctionType funcTy, mlir::ModuleOp module); + } // namespace factory std::size_t getDataSize(llvm::DataLayout &dataLayout, mlir::Type ty); diff --git a/include/cudaq/Optimizer/Builder/Runtime.h b/include/cudaq/Optimizer/Builder/Runtime.h index 194e030e37..b6c0d0e2cf 100644 --- a/include/cudaq/Optimizer/Builder/Runtime.h +++ b/include/cudaq/Optimizer/Builder/Runtime.h @@ -29,4 +29,23 @@ static constexpr const char launchKernelHybridFuncName[] = "hybridLaunchKernel"; static constexpr const char mangledNameMap[] = "quake.mangled_name_map"; +static constexpr const char deviceCodeHolderAdd[] = + "__cudaq_deviceCodeHolderAdd"; + +static constexpr const char registerLinkableKernel[] = + "__cudaq_registerLinkableKernel"; +static constexpr const char getLinkableKernelKey[] = + "__cudaq_getLinkableKernelKey"; +static constexpr const char getLinkableKernelName[] = + "__cudaq_getLinkableKernelName"; +static constexpr const char getLinkableKernelDeviceSide[] = + "__cudaq_getLinkableKernelDeviceFunction"; + +static constexpr const char CudaqRegisterLambdaName[] = + "cudaqRegisterLambdaName"; +static constexpr const char CudaqRegisterArgsCreator[] = + "cudaqRegisterArgsCreator"; +static constexpr const char CudaqRegisterKernelName[] = + "cudaqRegisterKernelName"; + } // namespace cudaq::runtime diff --git a/include/cudaq/Optimizer/CallGraphFix.h b/include/cudaq/Optimizer/CallGraphFix.h new file mode 100644 index 0000000000..cce040048e --- /dev/null +++ b/include/cudaq/Optimizer/CallGraphFix.h @@ -0,0 +1,47 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#pragma once + +#include "mlir/Analysis/CallGraph.h" + +namespace llvm { +// FIXME: `GraphTraits` specialization for `const mlir::CallGraphNode *` in +// "mlir/Analysis/CallGraph.h" has a bug. +// In particular, `GraphTraits` typedef'ed `NodeRef +// -> mlir::CallGraphNode *`, (without `const`), causing problems when using +// `mlir::CallGraphNode` with graph iterator (e.g., `llvm::df_iterator`). The +// entry node getter has the signature `NodeRef getEntryNode(NodeRef node)`, +// i.e., `mlir::CallGraphNode * getEntryNode(mlir::CallGraphNode * node)`; but a +// graph iterator for `const mlir::CallGraphNode *` will pass a `const +// mlir::CallGraphNode *` to that `getEntryNode` function => compile error. +// Here, we define a non-const overload, which hasn't been defined, to work +// around that issue. +// +// Note: this isn't an issue for the whole `mlir::CallGraph` graph, i.e., +// `GraphTraits`. `getEntryNode` is defined as +// `getExternalCallerNode`, which is a const method of `mlir::CallGraph`. + +template <> +struct GraphTraits { + using NodeRef = mlir::CallGraphNode *; + static NodeRef getEntryNode(NodeRef node) { return node; } + + static NodeRef unwrap(const mlir::CallGraphNode::Edge &edge) { + return edge.getTarget(); + } + using ChildIteratorType = + mapped_iterator; + static ChildIteratorType child_begin(NodeRef node) { + return {node->begin(), &unwrap}; + } + static ChildIteratorType child_end(NodeRef node) { + return {node->end(), &unwrap}; + } +}; +} // namespace llvm diff --git a/include/cudaq/Optimizer/CodeGen/Passes.h b/include/cudaq/Optimizer/CodeGen/Passes.h index dfcc080beb..3354301de7 100644 --- a/include/cudaq/Optimizer/CodeGen/Passes.h +++ b/include/cudaq/Optimizer/CodeGen/Passes.h @@ -27,15 +27,19 @@ class LLVMStructType; } // namespace mlir namespace cudaq::opt { -void registerConvertToQIRPass(); /// Convert (generic) QIR to the profile-specific QIR for a specific target. /// @param pm Pass Manager to add QIR passes to /// @param convertTo Expected to be `qir-base` or `qir-adaptive` (comes from the /// cudaq-translate command line `--convert-to` parameter) -void addQIRProfilePipeline(mlir::OpPassManager &pm, llvm::StringRef convertTo); +/// @param performPrep Whether or not to perform the initial prep pass (normally +/// true, but false for the WireSet QIR path) +void addQIRProfilePipeline(mlir::OpPassManager &pm, llvm::StringRef convertTo, + bool performPrep = true); void addLowerToCCPipeline(mlir::OpPassManager &pm); +void addWiresetToProfileQIRPipeline(mlir::OpPassManager &pm, + llvm::StringRef profile); /// @brief Verify that all `CallOp` targets are QIR- or NVQIR-defined functions /// or in the provided allowed list. @@ -45,7 +49,6 @@ createVerifyNVQIRCallOpsPass(const std::vector &allowedFuncs); // Use the addQIRProfilePipeline() for the following passes. std::unique_ptr createQIRToQIRProfilePass(llvm::StringRef convertTo); -std::unique_ptr verifyQIRProfilePass(llvm::StringRef convertTo); std::unique_ptr createQIRProfilePreparationPass(); std::unique_ptr createConvertToQIRFuncPass(llvm::StringRef convertTo); @@ -59,6 +62,7 @@ void registerCodeGenDialect(mlir::DialectRegistry ®istry); mlir::LLVM::LLVMStructType lambdaAsPairOfPointers(mlir::MLIRContext *context); void registerToExecutionManagerCCPipeline(); +void registerWireSetToProfileQIRPipeline(); void populateCCTypeConversions(mlir::LLVMTypeConverter *converter); // declarative passes diff --git a/include/cudaq/Optimizer/CodeGen/Passes.td b/include/cudaq/Optimizer/CodeGen/Passes.td index 082078914c..2120ea0836 100644 --- a/include/cudaq/Optimizer/CodeGen/Passes.td +++ b/include/cudaq/Optimizer/CodeGen/Passes.td @@ -23,9 +23,7 @@ def QuakeToCC : Pass<"quake-to-cc", "mlir::ModuleOp"> { execution manager. }]; - let dependentDialects = [ - "cudaq::cc::CCDialect", "mlir::LLVM::LLVMDialect" - ]; + let dependentDialects = ["cudaq::cc::CCDialect", "mlir::LLVM::LLVMDialect"]; } def QuakeToCCPrep : Pass<"quake-to-cc-prep", "mlir::ModuleOp"> { @@ -89,21 +87,6 @@ def QIRToQIRProfilePrep : Pass<"qir-profile-prep", "mlir::ModuleOp"> { let constructor = "cudaq::opt::createQIRProfilePreparationPass()"; } -def VerifyQIRProfile : Pass<"verify-qir-profile", "mlir::LLVM::LLVMFuncOp"> { - let summary = "Verify that the output conforms to the specific profile"; - let description = [{ - This pass scans over functions in the LLVM-IR dialect to make sure they - conform to the QIR specific profile. - }]; - - let options = [ - Option<"convertTo", "convert-to", "std::string", "\"qir-base\"", - "Which QIR profile to convert to (default is 'qir-base')"> - ]; - - let constructor = "cudaq::opt::verifyQIRProfilePass(\"qir-base\")"; -} - def QIRToQIRProfileFunc : Pass<"quake-to-qir-func", "mlir::LLVM::LLVMFuncOp"> { let summary = "Analyze kernels and add attributes and record calls."; @@ -118,7 +101,7 @@ def QIRToQIRProfileFunc : Pass<"quake-to-qir-func", }]; let options = [ - Option<"convertTo", "convert-to", "std::string", "\"qir-base\"", + Option<"convertTo", "convert-to", "std::string", /*default=*/"\"qir-base\"", "Which QIR profile to convert to (default is 'qir-base')"> ]; @@ -127,8 +110,10 @@ def QIRToQIRProfileFunc : Pass<"quake-to-qir-func", def QIRToQIRProfile : Pass<"convert-to-qir-profile"> { let summary = - "After lowering a Quake kernel to QIR, lower further to the specific QIR Profile."; + "Lower full QIR further to the specific QIR Profile."; let description = [{ + This is run after lowering a Quake kernel to full QIR. + This is a subpass of the pipeline to convert to the specific QIR Profile. This pass lowers various QIR DAGs to the specific QIR Profile. See @@ -179,4 +164,52 @@ def VerifyNVQIRCallOps : let constructor = "cudaq::opt::createVerifyNVQIRCallOpsPass({})"; } +def VerifyQIRProfile : Pass<"verify-qir-profile", "mlir::LLVM::LLVMFuncOp"> { + let summary = "Verify that the output conforms to the specific profile"; + let description = [{ + This pass scans over functions in the LLVM-IR dialect to make sure they + conform to the QIR specific profile. + }]; + + let options = [ + Option<"convertTo", "convert-to", "std::string", "\"qir-base\"", + "Which QIR profile to convert to (default is 'qir-base')"> + ]; +} + +def WireSetToProfileQIR : Pass<"wireset-to-profile-qir", "mlir::func::FuncOp"> { + let summary = "Convert quake using wire sets to a profile of QIR"; + let description = [{ + This pass takes quake in "value semantics" form and after it has been + converted to use wire sets (qubit management, mapping, etc.) and converts + the code to CC dialect with QIR calls, etc. + }]; + + let dependentDialects = ["cudaq::cc::CCDialect", "mlir::LLVM::LLVMDialect"]; + let options = [ + Option<"convertTo", "convert-to", "std::string", /*default=*/"\"qir-base\"", + "Select the profile to convert wire sets to."> + ]; +} + +def WireSetToProfileQIRPost : + Pass<"wireset-to-profile-qir-post", "mlir::ModuleOp"> { + let summary = "Post processing for lowering wire sets to a profile of QIR"; + let description = [{ + This pass should be run immediately after wireset-to-profile-qir. + }]; + + let dependentDialects = ["cudaq::cc::CCDialect", "mlir::func::FuncDialect"]; +} + +def WireSetToProfileQIRPrep : + Pass<"wireset-to-profile-qir-prep", "mlir::ModuleOp"> { + let summary = "Prepare for lowering wire sets to a profile of QIR"; + let description = [{ + This pass should be run immediately before wireset-to-profile-qir. + }]; + + let dependentDialects = ["cudaq::cc::CCDialect", "mlir::func::FuncDialect"]; +} + #endif // CUDAQ_OPT_OPTIMIZER_CODEGEN_PASSES diff --git a/include/cudaq/Optimizer/CodeGen/Peephole.td b/include/cudaq/Optimizer/CodeGen/Peephole.td index e56271ce38..32f32b5d21 100644 --- a/include/cudaq/Optimizer/CodeGen/Peephole.td +++ b/include/cudaq/Optimizer/CodeGen/Peephole.td @@ -17,7 +17,7 @@ include "mlir/IR/PatternBase.td" //===----------------------------------------------------------------------===// def InvokeOnXWithOneControl : Constraint>; + "$0 && callToInvokeWithXCtrlOneTarget($0.getValue(), $1)">>; def CreateCallCnot : NativeCodeCall< "[&]() -> std::size_t {" @@ -35,7 +35,7 @@ def XCtrlOneTargetToCNot : Pat< //===----------------------------------------------------------------------===// -def NeedsRenaming : Constraint>; +def NeedsRenaming : Constraint>; def CreateAddressOf : NativeCodeCall< "$_builder.create($_loc, $0.getType()," @@ -52,7 +52,7 @@ def AddrOfCisToBase : Pat< // Apply special rule for `mz`. See below. def FuncNotMeasure : Constraint>; + "!($_self && $_self.getValue().startswith(cudaq::opt::QIRMeasure))">>; def CreateCallOp : NativeCodeCall< "[&]() -> std::size_t {" @@ -72,7 +72,7 @@ def CalleeConv : Pat< //===----------------------------------------------------------------------===// def IsArrayGetElementPtrId : Constraint>; + "$0 && $0.getValue().str() == cudaq::opt::QIRArrayGetElementPtr1d">>; def EraseArrayGEPOp : NativeCodeCall< "$_builder.create($_loc," @@ -85,7 +85,7 @@ def EraseDeadArrayGEP : Pat< //===----------------------------------------------------------------------===// def IsaAllocateCall : Constraint>; + "$0 && $0.getValue().str() == cudaq::opt::QIRArrayQubitAllocateArray">>; def EraseArrayAllocateOp : NativeCodeCall< "$_builder.create($_loc," @@ -103,8 +103,8 @@ def EraseArrayAlloc : Pat< //===----------------------------------------------------------------------===// def IsaReleaseCall : Constraint>; + "$0 && ($0.getValue().str() == cudaq::opt::QIRArrayQubitReleaseArray || " + "$0.getValue().str() == cudaq::opt::QIRArrayQubitReleaseQubit)">>; def EraseArrayReleaseOp : NativeCodeCall<"static_cast(0)">; @@ -120,7 +120,7 @@ def EraseArrayRelease : Pat< //===----------------------------------------------------------------------===// def IsaMeasureCall : Constraint>; + "$_self && $_self.getValue() == cudaq::opt::QIRMeasure">>; def IsaIntToPtrOperand : Constraint>; @@ -138,7 +138,7 @@ def MeasureCallConv : Pat< //===----------------------------------------------------------------------===// def IsaMeasureToRegisterCall : Constraint>; + "$_self && $_self.getValue() == cudaq::opt::QIRMeasureToRegister">>; // %result = call @__quantum__qis__mz__to__register(%qbit, i8) : (!Qubit) -> i1 // ──────────────────────────────────────────────────────────────────────────── diff --git a/include/cudaq/Optimizer/CodeGen/QIRAttributeNames.h b/include/cudaq/Optimizer/CodeGen/QIRAttributeNames.h new file mode 100644 index 0000000000..764d61fea7 --- /dev/null +++ b/include/cudaq/Optimizer/CodeGen/QIRAttributeNames.h @@ -0,0 +1,27 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#pragma once + +/// This file provides some common QIR attribute names for use in code gen. + +namespace cudaq::opt { + +static constexpr const char QIRRegisterNameAttr[] = "registerName"; +static constexpr const char QIREntryPointAttrName[] = "entry_point"; +static constexpr const char QIRProfilesAttrName[] = "qir_profiles"; +static constexpr const char QIROutputLabelingSchemaAttrName[] = + "output_labeling_schema"; +static constexpr const char QIROutputNamesAttrName[] = "output_names"; +static constexpr const char QIRRequiredQubitsAttrName[] = "requiredQubits"; +static constexpr const char QIRRequiredResultsAttrName[] = "requiredResults"; +static constexpr const char QIRIrreversibleFlagName[] = "irreversible"; + +static constexpr const char StartingOffsetAttrName[] = "StartingOffset"; + +} // namespace cudaq::opt diff --git a/include/cudaq/Optimizer/CodeGen/QIRFunctionNames.h b/include/cudaq/Optimizer/CodeGen/QIRFunctionNames.h index 738f031845..c17a90583b 100644 --- a/include/cudaq/Optimizer/CodeGen/QIRFunctionNames.h +++ b/include/cudaq/Optimizer/CodeGen/QIRFunctionNames.h @@ -24,6 +24,7 @@ static constexpr const char QIRMeasureToRegister[] = static constexpr const char QIRCnot[] = "__quantum__qis__cnot"; static constexpr const char QIRCphase[] = "__quantum__qis__cphase"; +static constexpr const char QIRCZ[] = "__quantum__qis__cz"; static constexpr const char QIRReadResultBody[] = "__quantum__qis__read_result__body"; @@ -73,10 +74,21 @@ static constexpr const char QIRArrayConcatArray[] = static constexpr const char QIRArrayCreateArray[] = "__quantum__rt__array_create_1d"; +/// Dynamic qubit management helper functions. These are currently only used by +/// the NVQIR simulator. +static constexpr const char QIRisDynamicQubitManagement[] = + "__quantum__rt__is_dynamic_qubit_management"; +static constexpr const char QIRsetDynamicQubitManagement[] = + "__quantum__rt__set_dynamic_qubit_management"; + /// QIR Base/Adaptive Profile record output function names static constexpr const char QIRRecordOutput[] = "__quantum__rt__result_record_output"; +/// Custom NVQIR method to cleanup result maps in between consecutive programs. +static constexpr const char QIRClearResultMaps[] = + "__quantum__rt__clear_result_maps"; + inline mlir::Type getQuantumTypeByName(mlir::StringRef type, mlir::MLIRContext *context) { return mlir::LLVM::LLVMStructType::getOpaque(type, context); diff --git a/include/cudaq/Optimizer/Dialect/CC/CCOps.td b/include/cudaq/Optimizer/Dialect/CC/CCOps.td index a517714041..a58e3d403d 100644 --- a/include/cudaq/Optimizer/Dialect/CC/CCOps.td +++ b/include/cudaq/Optimizer/Dialect/CC/CCOps.td @@ -1439,6 +1439,49 @@ def cc_CallCallableOp : CCOp<"call_callable", [CallOpInterface]> { }]; } +def cc_CallIndirectCallableOp : CCOp<"call_indirect_callable", + [CallOpInterface]> { + let summary = "Call a C++ callable, unresolved, at run-time."; + let description = [{ + This effectively connects a call from one kernel to another kernel, which + would have been done at link-time in host code, at run-time on the device + side. This allows calls between kernels defined in separate compilation + units. The definitions of these caller/callee functions are not both present + at compile-time, so they are exposed to the CUDAQ runtime for stitching or + LTO at JIT compile time. + }]; + + let arguments = (ins + cc_IndirectCallableType:$callee, + Variadic:$args + ); + let results = (outs Variadic:$results); + let hasVerifier = 1; + let hasCanonicalizer = 1; + + let assemblyFormat = [{ + $callee (`,` $args^)? `:` functional-type(operands, results) attr-dict + }]; + + let extraClassDeclaration = [{ + /// Get the argument operands to the called function. + operand_range getArgOperands() { + return {arg_operand_begin(), arg_operand_end()}; + } + + operand_iterator arg_operand_begin() { return ++operand_begin(); } + operand_iterator arg_operand_end() { return operand_end(); } + + /// Return the callee of this operation. + mlir::CallInterfaceCallable getCallableForCallee() { return getCallee(); } + + mlir::FunctionType getFunctionType() { + return mlir::FunctionType::get(getContext(), getOperands().getType(), + getResults().getTypes()); + } + }]; +} + def cc_InstantiateCallableOp : CCOp<"instantiate_callable", [Pure]> { let summary = "Construction of a callable object."; let description = [{ diff --git a/include/cudaq/Optimizer/Dialect/CC/CCTypes.td b/include/cudaq/Optimizer/Dialect/CC/CCTypes.td index 01ddf98e21..d8a5820abe 100644 --- a/include/cudaq/Optimizer/Dialect/CC/CCTypes.td +++ b/include/cudaq/Optimizer/Dialect/CC/CCTypes.td @@ -194,6 +194,30 @@ def cc_CallableType : CCType<"Callable", "callable"> { }]; } +def cc_IndirectCallableType : CCType<"IndirectCallable", "indirect_callable"> { + let summary = "Proxy for cudaq::qkernel."; + let description = [{ + An entry-point kernel may take a reference to another kernel as an argument. + The passed kernel may be entirely opaque at compile-time with its definition + present in some other compilation module. + + It is on the programmer to use the cudaq::qkernel type. This wrapper class + is very much like std::function, but it extends that functionality with some + extra information for the runtime to be able to "link" the distinct kernels + on the device side and provide, for example, LTO at JIT compile time. + }]; + + let parameters = (ins "mlir::FunctionType":$signature); + + let assemblyFormat = "`<` $signature `>`"; + + let builders = [ + TypeBuilderWithInferredContext<(ins "mlir::FunctionType":$signature), [{ + return Base::get(signature.getContext(), signature); + }]> + ]; +} + //===----------------------------------------------------------------------===// // StdVectorType - implemented as a span //===----------------------------------------------------------------------===// diff --git a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.h b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.h index 54337779f8..69f65eeb03 100644 --- a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.h +++ b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.h @@ -70,6 +70,13 @@ void genericOpPrinter(mlir::OpAsmPrinter &_odsPrinter, mlir::Operation *op, // Utility functions to test the form of an operation. //===----------------------------------------------------------------------===// +// Is \p op in the Quake dialect? +inline bool isQuakeOperation(mlir::Operation *op) { + if (auto *dialect = op->getDialect()) + return dialect->getNamespace().equals("quake"); + return false; +} + namespace quake { /// Returns true if and only if any quantum operand has type `!quake.ref` or /// `!quake.veq`. diff --git a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td index 3d14bad6cd..ea1681e340 100644 --- a/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td +++ b/include/cudaq/Optimizer/Dialect/Quake/QuakeOps.td @@ -641,6 +641,54 @@ def quake_ReturnWireOp : QuakeOp<"return_wire"> { let assemblyFormat = "$target `:` type(operands) attr-dict"; } +//===----------------------------------------------------------------------===// +// Struq handling +//===----------------------------------------------------------------------===// + +def quake_MakeStruqOp : QuakeOp<"make_struq", [Pure]> { + let summary = "create a quantum struct from a set of quantum references"; + let description = [{ + Given a list of values of quantum reference type, creates a new quantum + product reference type. This is a logical grouping and does not imply any + new quantum references are created. + + This operation can be useful for grouping a number of values of type `veq` + into a logical product type, which may be passed to a pure device kernel + as a single unit, for example. These product types may always be erased into + a vector of the quantum references used to compose them via a make_struq op. + }]; + + let arguments = (ins Variadic:$veqs); + let results = (outs StruqType); + let hasVerifier = 1; + + let assemblyFormat = [{ + $veqs `:` functional-type(operands, results) attr-dict + }]; +} + +def quake_GetMemberOp : QuakeOp<"get_member", [Pure]> { + let summary = "extract quantum references from a quantum struct"; + let description = [{ + The get_member operation can be used to extract a set of quantum references + from a quantum struct (product) type. The fields in the quantum struct are + indexed from 0 to $n-1$ where $n$ is the number of fields. An index outside + of this range will produce a verification error. + }]; + + let arguments = (ins + StruqType:$struq, + I32Attr:$index + ); + let results = (outs NonStruqRefType); + let hasCanonicalizer = 1; + let hasVerifier = 1; + + let assemblyFormat = [{ + $struq `[` $index `]` `:` functional-type(operands, results) attr-dict + }]; +} + //===----------------------------------------------------------------------===// // ToControl, FromControl pair //===----------------------------------------------------------------------===// diff --git a/include/cudaq/Optimizer/Dialect/Quake/QuakeTypes.h b/include/cudaq/Optimizer/Dialect/Quake/QuakeTypes.h index 7b62009b15..c14e22e838 100644 --- a/include/cudaq/Optimizer/Dialect/Quake/QuakeTypes.h +++ b/include/cudaq/Optimizer/Dialect/Quake/QuakeTypes.h @@ -25,7 +25,7 @@ namespace quake { inline bool isQuantumType(mlir::Type ty) { // NB: this intentionally excludes MeasureType. return mlir::isa(ty); + quake::ControlType, quake::StruqType>(ty); } /// \returns true if \p `ty` is a Quake type. @@ -34,10 +34,16 @@ inline bool isQuakeType(mlir::Type ty) { return isQuantumType(ty) || mlir::isa(ty); } -inline bool isQuantumReferenceType(mlir::Type ty) { +/// \returns true if \p ty is a quantum reference type, excluding `struq`. +inline bool isNonStruqReferenceType(mlir::Type ty) { return mlir::isa(ty); } +/// \returns true if \p ty is a quantum reference type. +inline bool isQuantumReferenceType(mlir::Type ty) { + return isNonStruqReferenceType(ty) || mlir::isa(ty); +} + /// A quake wire type is a linear type. inline bool isLinearType(mlir::Type ty) { return mlir::isa(ty); diff --git a/include/cudaq/Optimizer/Dialect/Quake/QuakeTypes.td b/include/cudaq/Optimizer/Dialect/Quake/QuakeTypes.td index a57c3c27bc..2e6fc9dfe3 100644 --- a/include/cudaq/Optimizer/Dialect/Quake/QuakeTypes.td +++ b/include/cudaq/Optimizer/Dialect/Quake/QuakeTypes.td @@ -161,6 +161,41 @@ def VeqType : QuakeType<"Veq", "veq"> { }]; } +//===----------------------------------------------------------------------===// +// StruqType: quantum reference type; product of veq and ref types. +//===----------------------------------------------------------------------===// + +def StruqType : QuakeType<"Struq", "struq"> { + let summary = "a product type of quantum references"; + let description = [{ + This type allows one to group veqs of quantum references together in a + single product type. + + To support Python, a struq type can be assigned a name. This allows the + python bridge to perform dictionary based lookups on member field names. + }]; + + let parameters = (ins + "mlir::StringAttr":$name, + ArrayRefParameter<"mlir::Type">:$members + ); + let hasCustomAssemblyFormat = 1; + + let extraClassDeclaration = [{ + std::size_t getNumMembers() const { return getMembers().size(); } + }]; + + let builders = [ + TypeBuilder<(ins CArg<"llvm::ArrayRef">:$members), [{ + return $_get($_ctxt, mlir::StringAttr{}, members); + }]>, + TypeBuilder<(ins CArg<"llvm::StringRef">:$name, + CArg<"llvm::ArrayRef">:$members), [{ + return $_get($_ctxt, mlir::StringAttr::get($_ctxt, name), members); + }]> + ]; +} + //===----------------------------------------------------------------------===// // MeasureType: classical data type //===----------------------------------------------------------------------===// @@ -183,14 +218,19 @@ def MeasureType : QuakeType<"Measure", "measure"> { } def AnyQTypeLike : TypeConstraint, "quake quantum types">; + ControlType.predicate, RefType.predicate, StruqType.predicate]>, + "quake quantum types">; def AnyQType : Type; def AnyQTargetTypeLike : TypeConstraint, "quake quantum target types">; def AnyQTargetType : Type; -def AnyRefTypeLike : TypeConstraint, "quake quantum reference types">; def AnyRefType : Type; +def NonStruqRefTypeLike : TypeConstraint, "non-struct quake quantum reference types">; +def NonStruqRefType : Type; def AnyQValueTypeLike : TypeConstraint, "quake quantum value types">; def AnyQValueType : Type; diff --git a/include/cudaq/Optimizer/Transforms/Passes.h b/include/cudaq/Optimizer/Transforms/Passes.h index 40a9c87205..6274a8de29 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.h +++ b/include/cudaq/Optimizer/Transforms/Passes.h @@ -18,10 +18,6 @@ namespace cudaq::opt { -/// Pass to generate the device code loading stubs. -std::unique_ptr -createGenerateDeviceCodeLoader(bool genAsQuake = false); - /// Add a pass pipeline to transform call between kernels to direct calls that /// do not go through the runtime layers, inline all calls, and detect if calls /// to kernels remain in the fully inlined into entry point kernel. diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index c99d5fccc1..9ca3810f39 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -168,8 +168,7 @@ def ConstPropComplex : Pass<"const-prop-complex", "mlir::ModuleOp"> { }]; } -def ConvertToDirectCalls : - Pass<"indirect-to-direct-calls", "mlir::func::FuncOp"> { +def ConvertToDirectCalls : Pass<"indirect-to-direct-calls", "mlir::ModuleOp"> { let summary = "Convert calls to direct calls to Quake routines."; let description = [{ Rewrite the calls in the IR so that they point to the generated code and not @@ -184,7 +183,8 @@ def DelayMeasurements : Pass<"delay-measurements", "mlir::func::FuncOp"> { "Move measurements as late as possible"; let description = [{ - Move measurements as late as possible. This is useful for a Base Profile QIR program. + Move measurements as late as possible. This is useful for a Base Profile + QIR program. }]; let constructor = "cudaq::opt::createDelayMeasurementsPass()"; @@ -214,6 +214,49 @@ def DecompositionPass: Pass<"decomposition", "mlir::ModuleOp"> { ]; } +def DependencyAnalysis : Pass<"dep-analysis", "mlir::ModuleOp"> { + let summary = "Maps qubits and reorders operations based on dependency graph."; + let description = [{ + A dependency graph is a Directed Acyclic Graph (DAG) where each node + represents an operation, and each edge represents a "depends on" relation + between that operation and another operation. For example, in the following + snippet the `x` operation depends on the `h` operation because it is applied + to the same qubit (`q`), so the `h` operation must happen before the `x` + operation: + ```c++ + cudaq::qubit q; + x(q); + h(q); + ``` + + Once a dependency graph is created, it is then scheduled, assigning each + operation a virtual cycle. Operations that don't depend on each other may + be scheduled at the same cycle. However, an operation that depends on a + second operation must be scheduled after the second operation. The + scheduling algorithm tries to pack operations as densely as possible, + minimizing the number of cycles between operations. + + From this dependency graph, we can calculate the lifetime of a qubit: from + the cycle in which it is first used through the cycle in which it is last + used. If two virtual qubits have non-overlapping lifetimes, they can be + assigned to the same physical qubit, as every virtual qubit is assumed to be + fully reset before release. Failure to fully reset virtual qubits before + release is undefinied behavior, and will likely lead to incorrect output + when running DependencyAnalysis. + }]; + + let dependentDialects = ["quake::QuakeDialect"]; + + let statistics = [ + Statistic<"numVirtualQubits", "num-virtual-qubits", + "Number of virtual qubits used">, + Statistic<"numPhysicalQubits", "num-physical-qubits", + "Number of phyiscal qubits used">, + Statistic<"numCycles", "num-cycles", + "Length of kernel in cycles">, + ]; +} + def EraseNopCalls : Pass<"erase-nop-calls", "mlir::func::FuncOp"> { let summary = "Erase calls to any builtin intrinsics that are NOPs."; let description = [{ @@ -281,13 +324,14 @@ def GenerateDeviceCodeLoader : Pass<"device-code-loader", "mlir::ModuleOp"> { }]; let dependentDialects = ["mlir::LLVM::LLVMDialect"]; - let constructor = "cudaq::opt::createGenerateDeviceCodeLoader()"; let options = [ Option<"outputFilename", "output-filename", "std::string", /*default=*/"\"-\"", "Name of output file.">, Option<"generateAsQuake", "use-quake", "bool", - /*default=*/"false", "Output should be module in Quake dialect."> + /*default=*/"true", "Output should be module in Quake dialect.">, + Option<"jitTime", "jit-compile", "bool", + /*default=*/"false", "Running pass at JIT compile time (default=false)."> ]; } diff --git a/include/cudaq/Support/TargetConfig.h b/include/cudaq/Support/TargetConfig.h index dc2097912e..d49a661e99 100644 --- a/include/cudaq/Support/TargetConfig.h +++ b/include/cudaq/Support/TargetConfig.h @@ -24,6 +24,8 @@ enum TargetFeatureFlag : unsigned { flagsFP64 = 0x0002, flagsMgpu = 0x0004, flagsMqpu = 0x0008, + flagsDepAnalysis = 0x0010, + flagsQPP = 0x0020, }; /// @brief Configuration argument type annotation @@ -69,6 +71,8 @@ struct BackendEndConfigEntry { std::optional LibraryMode; /// IR lowering configuration (hardware REST QPU) std::string PlatformLoweringConfig; + /// Exact cudaq-opt passes for pseudo-targets + std::string TargetPassPipeline; /// Codegen emission configuration (hardware REST QPU) std::string CodegenEmission; /// Post code generation IR passes configuration (hardware REST QPU) diff --git a/lib/Frontend/nvqpp/ASTBridge.cpp b/lib/Frontend/nvqpp/ASTBridge.cpp index 646e90ce83..806f3c6bde 100644 --- a/lib/Frontend/nvqpp/ASTBridge.cpp +++ b/lib/Frontend/nvqpp/ASTBridge.cpp @@ -159,13 +159,24 @@ class QPUCodeFinder : public clang::RecursiveASTVisitor { customOperationNames(customOperations) {} /// Add a kernel to the list of kernels to process. - void processQpu(std::string &&kernelName, const clang::FunctionDecl *f) { + template + void processQpu(const std::string &kernelName, const clang::FunctionDecl *f) { LLVM_DEBUG(llvm::dbgs() << "adding kernel: " << kernelName << ", " << reinterpret_cast(const_cast(f)) << '\n'); - functionsToEmit.push_back(std::make_pair(std::move(kernelName), f)); - callGraphBuilder.addToCallGraph(const_cast(f)); + auto iter = std::find_if(functionsToEmit.begin(), functionsToEmit.end(), + [&](auto p) { return p.first == kernelName; }); + if constexpr (replace) { + if (iter == functionsToEmit.end()) + functionsToEmit.push_back(std::make_pair(kernelName, f)); + else + iter->second = f; + callGraphBuilder.addToCallGraph(const_cast(f)); + } else { + if (iter == functionsToEmit.end()) + functionsToEmit.push_back(std::make_pair(kernelName, f)); + } } // Check some of the restrictions and limitations on kernel classes. These @@ -220,10 +231,10 @@ class QPUCodeFinder : public clang::RecursiveASTVisitor { return result; } - bool VisitFunctionDecl(clang::FunctionDecl *func) { + bool VisitFunctionDecl(clang::FunctionDecl *x) { if (ignoreTemplate) return true; - func = func->getDefinition(); + auto *func = x->getDefinition(); if (func) { bool runChecks = false; @@ -248,10 +259,15 @@ class QPUCodeFinder : public clang::RecursiveASTVisitor { processQpu(cudaq::details::getTagNameOfFunctionDecl(func, mangler), func); } + } else if (cudaq::ASTBridgeAction::ASTBridgeConsumer::isQuantum(x)) { + // Add declarations to support separate compilation. + processQpu( + cudaq::details::getTagNameOfFunctionDecl(x, mangler), x); } return true; } + // NB: DataRecursionQueue* argument intentionally omitted. bool TraverseLambdaExpr(clang::LambdaExpr *x) { bool saveQuantumTypesNotAllowed = quantumTypesNotAllowed; // Rationale: a lambda expression may be passed from classical C++ code into @@ -267,9 +283,9 @@ class QPUCodeFinder : public clang::RecursiveASTVisitor { if (ignoreTemplate) return true; if (const auto *cxxMethodDecl = lambda->getCallOperator()) - if (const auto *f = cxxMethodDecl->getAsFunction()->getDefinition(); - f && cudaq::ASTBridgeAction::ASTBridgeConsumer::isQuantum(f)) - processQpu(cudaq::details::getTagNameOfFunctionDecl(f, mangler), f); + if (const auto *f = cxxMethodDecl->getAsFunction()->getDefinition()) + if (cudaq::ASTBridgeAction::ASTBridgeConsumer::isQuantum(f)) + processQpu(cudaq::details::getTagNameOfFunctionDecl(f, mangler), f); return true; } diff --git a/lib/Frontend/nvqpp/ConvertDecl.cpp b/lib/Frontend/nvqpp/ConvertDecl.cpp index a0c3064c3d..149959c8e8 100644 --- a/lib/Frontend/nvqpp/ConvertDecl.cpp +++ b/lib/Frontend/nvqpp/ConvertDecl.cpp @@ -91,8 +91,9 @@ void QuakeBridgeVisitor::addArgumentSymbols( // Transform pass-by-value arguments to stack slots. auto loc = toLocation(argVal); auto parmTy = entryBlock->getArgument(index).getType(); - if (isa(parmTy)) { symbolTable.insert(name, entryBlock->getArgument(index)); } else { @@ -114,26 +115,10 @@ void QuakeBridgeVisitor::createEntryBlock(func::FuncOp func, addArgumentSymbols(entryBlock, x->parameters()); } -std::pair +std::pair QuakeBridgeVisitor::getOrAddFunc(Location loc, StringRef funcName, FunctionType funcTy) { - auto func = module.lookupSymbol(funcName); - if (func) { - if (!func.empty()) { - // Already lowered function func, skip it. - return {func, /*defined=*/true}; - } - // Function was declared but not defined. - return {func, /*defined=*/false}; - } - // Function not found, so add it to the module. - OpBuilder build(module.getBodyRegion()); - OpBuilder::InsertionGuard guard(build); - build.setInsertionPointToEnd(module.getBody()); - SmallVector attrs; - func = build.create(loc, funcName, funcTy, attrs); - func.setPrivate(); - return {func, /*defined=*/false}; + return cudaq::opt::factory::getOrAddFunc(loc, funcName, funcTy, module); } bool QuakeBridgeVisitor::interceptRecordDecl(clang::RecordDecl *x) { @@ -176,6 +161,14 @@ bool QuakeBridgeVisitor::interceptRecordDecl(clang::RecordDecl *x) { return pushType(cc::StateType::get(ctx)); if (name.equals("pauli_word")) return pushType(cc::CharspanType::get(ctx)); + if (name.equals("qkernel")) { + auto *cts = cast(x); + // Traverse template argument 0 to get the function's signature. + if (!TraverseType(cts->getTemplateArgs()[0].getAsType())) + return false; + auto fnTy = cast(popType()); + return pushType(cc::IndirectCallableType::get(fnTy)); + } auto loc = toLocation(x); TODO_loc(loc, "unhandled type, " + name + ", in cudaq namespace"); } @@ -649,9 +642,8 @@ bool QuakeBridgeVisitor::VisitVarDecl(clang::VarDecl *x) { if (auto qType = dyn_cast(type)) { // Variable is of !quake.ref type. if (x->hasInit() && !valueStack.empty()) { - auto val = popValue(); - symbolTable.insert(name, val); - return pushValue(val); + symbolTable.insert(name, peekValue()); + return true; } auto zero = builder.create( loc, 0, builder.getIntegerType(64)); @@ -663,6 +655,13 @@ bool QuakeBridgeVisitor::VisitVarDecl(clang::VarDecl *x) { return pushValue(addressTheQubit); } + if (isa(type)) { + // A pure quantum struct is just passed along by value. It cannot be stored + // to a variable. + symbolTable.insert(name, peekValue()); + return true; + } + // Here we maybe have something like auto var = mz(qreg) if (auto vecType = dyn_cast(type)) { // Variable is of !cc.stdvec type. @@ -796,6 +795,12 @@ bool QuakeBridgeVisitor::VisitVarDecl(clang::VarDecl *x) { return pushValue(cast); } + // Don't allocate memory for a quantum or value-semantic struct. + if (auto insertValOp = initValue.getDefiningOp()) { + symbolTable.insert(x->getName(), initValue); + return pushValue(initValue); + } + // Initialization expression resulted in a value. Create a variable and save // that value to the variable's memory address. Value alloca = builder.create(loc, type); diff --git a/lib/Frontend/nvqpp/ConvertExpr.cpp b/lib/Frontend/nvqpp/ConvertExpr.cpp index 1c21ecb8cd..5976e1355d 100644 --- a/lib/Frontend/nvqpp/ConvertExpr.cpp +++ b/lib/Frontend/nvqpp/ConvertExpr.cpp @@ -1109,14 +1109,23 @@ bool QuakeBridgeVisitor::VisitMemberExpr(clang::MemberExpr *x) { if (auto *field = dyn_cast(x->getMemberDecl())) { auto loc = toLocation(x->getSourceRange()); auto object = popValue(); // DeclRefExpr + auto ty = popType(); + std::int32_t offset = field->getFieldIndex(); + if (isa(object.getType())) { + return pushValue( + builder.create(loc, ty, object, offset)); + } + if (!isa(object.getType())) { + reportClangError(x, mangler, + "internal error: struct must be an object in memory"); + return false; + } auto eleTy = cast(object.getType()).getElementType(); SmallVector offsets; if (auto arrTy = dyn_cast(eleTy)) if (arrTy.isUnknownSize()) offsets.push_back(0); - std::int32_t offset = field->getFieldIndex(); offsets.push_back(offset); - auto ty = popType(); return pushValue(builder.create( loc, cc::PointerType::get(ty), object, offsets)); } @@ -1260,6 +1269,17 @@ bool QuakeBridgeVisitor::VisitCallExpr(clang::CallExpr *x) { return pushValue(builder.create( loc, elePtrTy, vecPtr, ValueRange{negativeOneIndex})); } + if (funcName.equals("data")) + if (auto memberCall = dyn_cast(x)) + if (memberCall->getImplicitObjectArgument()) { + [[maybe_unused]] auto calleeTy = popType(); + assert(isa(calleeTy)); + // data() returns a pointer to a sequence of elements. + auto eleTy = cast(svec.getType()).getElementType(); + auto eleArrTy = cc::PointerType::get(cc::ArrayType::get(eleTy)); + return pushValue( + builder.create(loc, eleArrTy, svec)); + } TODO_loc(loc, "unhandled std::vector member function, " + funcName); } @@ -2163,6 +2183,16 @@ bool QuakeBridgeVisitor::WalkUpFromCXXOperatorCallExpr( return WalkUpFromCallExpr(x) && VisitCXXOperatorCallExpr(x); } +bool QuakeBridgeVisitor::hasTOSEntryKernel() { + if (auto fn = peekValue().getDefiningOp()) { + auto name = fn.getValue().str(); + for (auto fdPair : functionsToEmit) + if (getCudaqKernelName(fdPair.first) == name) + return true; + } + return false; +} + bool QuakeBridgeVisitor::VisitCXXOperatorCallExpr( clang::CXXOperatorCallExpr *x) { auto loc = toLocation(x->getSourceRange()); @@ -2178,11 +2208,26 @@ bool QuakeBridgeVisitor::VisitCXXOperatorCallExpr( if (isCudaQType(typeName)) { auto idx_var = popValue(); auto qreg_var = popValue(); - + auto *arg0 = x->getArg(0); + if (isa(arg0)) { + // This is a subscript operator on a data member and the type is a + // quantum type (likely a `qview`). This can only happen in a quantum + // `struct`, which the spec says must be one-level deep at most and must + // only contain references to qubits explicitly allocated in other + // variables. `qreg_var` will be a `quake.get_member`. Do not add this + // extract `Op` to the symbol table, but always generate a new + // `quake.extract_ref` `Op` to get the exact qubit (reference) value. + auto address_qubit = + builder.create(loc, qreg_var, idx_var); + return replaceTOSValue(address_qubit); + } // Get name of the qreg, e.g. qr, and use it to construct a name for the // element, which is intended to be qr%n when n is the index of the // accessed qubit. - StringRef qregName = getNamedDecl(x->getArg(0))->getName(); + if (!isa(arg0)) + reportClangError(x, mangler, + "internal error: expected a variable name"); + StringRef qregName = getNamedDecl(arg0)->getName(); auto name = getQubitSymbolTableName(qregName, idx_var); char *varName = strdup(name.c_str()); @@ -2190,12 +2235,15 @@ bool QuakeBridgeVisitor::VisitCXXOperatorCallExpr( if (symbolTable.count(name)) return replaceTOSValue(symbolTable.lookup(name)); - // Otherwise create an operation to access the qubit, store that value in - // the symbol table, and return the AddressQubit operation's resulting - // value. + // Otherwise create an operation to access the qubit, store that value + // in the symbol table, and return the AddressQubit operation's + // resulting value. auto address_qubit = builder.create(loc, qreg_var, idx_var); + // NB: varName is built from the variable name *and* the index value. This + // front-end optimization is likely unnecessary as the compiler can always + // canonicalize and merge identical quake.extract_ref operations. symbolTable.insert(StringRef(varName), address_qubit); return replaceTOSValue(address_qubit); } @@ -2246,21 +2294,11 @@ bool QuakeBridgeVisitor::VisitCXXOperatorCallExpr( auto tos = popValue(); auto tosTy = tos.getType(); auto ptrTy = dyn_cast(tosTy); - bool isEntryKernel = [&]() { - // TODO: make this lambda a member function. - if (auto fn = peekValue().getDefiningOp()) { - auto name = fn.getValue().str(); - for (auto fdPair : functionsToEmit) - if (getCudaqKernelName(fdPair.first) == name) - return true; - } - return false; - }(); - if (ptrTy || isEntryKernel) { + bool isEntryKernel = hasTOSEntryKernel(); + if ((ptrTy && isa(ptrTy.getElementType())) || + isEntryKernel) { // The call operator has an object in the call position, so we want to // replace it with an indirect call to the func::ConstantOp. - assert((isEntryKernel || isa(ptrTy.getElementType())) && - "expected kernel as callable class"); auto indirect = popValue(); auto funcTy = cast(indirect.getType()); auto call = builder.create( @@ -2269,6 +2307,23 @@ bool QuakeBridgeVisitor::VisitCXXOperatorCallExpr( return true; return pushValue(call.getResult(0)); } + auto indCallTy = [&]() -> cc::IndirectCallableType { + if (ptrTy) { + auto ty = dyn_cast(ptrTy.getElementType()); + if (ty) + return ty; + } + return dyn_cast(tosTy); + }(); + if (indCallTy) { + [[maybe_unused]] auto discardedCallOp = popValue(); + auto funcTy = cast(indCallTy.getSignature()); + auto call = builder.create( + loc, funcTy.getResults(), tos, args); + if (call.getResults().empty()) + return true; + return pushValue(call.getResult(0)); + } auto callableTy = cast(tosTy); auto callInd = builder.create( loc, callableTy.getSignature().getResults(), tos, args); @@ -2367,7 +2422,10 @@ bool QuakeBridgeVisitor::VisitInitListExpr(clang::InitListExpr *x) { bool allRef = std::all_of(last.begin(), last.end(), [](auto v) { return isa(v.getType()); }); - if (allRef) { + if (allRef && isa(initListTy)) + return pushValue(builder.create(loc, initListTy, last)); + + if (allRef && !isa(initListTy)) { // Initializer list contains all quantum reference types. In this case we // want to create quake code to concatenate the references into a veq. if (size > 1) { @@ -2438,6 +2496,11 @@ bool QuakeBridgeVisitor::VisitInitListExpr(clang::InitListExpr *x) { auto globalInit = builder.create(loc, ptrTy, name); return pushValue(globalInit); } + + // If quantum, use value semantics with cc insert / extract value. + if (isa(eleTy)) + return pushValue(builder.create(loc, eleTy, last)); + Value alloca = (numEles > 1) ? builder.create(loc, eleTy, arrSize) : builder.create(loc, eleTy); @@ -2528,6 +2591,19 @@ static Type getEleTyFromVectorCtor(Type ctorTy) { return ctorTy; } +bool QuakeBridgeVisitor::VisitCXXParenListInitExpr( + clang::CXXParenListInitExpr *x) { + auto ty = peekType(); + assert(ty && "type must be present"); + LLVM_DEBUG(llvm::dbgs() << "paren list type: " << ty << '\n'); + auto structTy = dyn_cast(ty); + if (!structTy) + return true; + auto loc = toLocation(x); + auto last = lastValues(structTy.getMembers().size()); + return pushValue(builder.create(loc, structTy, last)); +} + bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { auto loc = toLocation(x); auto *ctor = x->getConstructor(); @@ -2810,8 +2886,12 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { if (ctor->isDefaultConstructor()) reportClangError(ctor, mangler, "Default std::vector constructor within quantum " - "kernel is not allowed " - "(cannot resize the vector)."); + "kernel is not allowed (cannot resize the vector)."); + + if (ctor->isMoveConstructor()) { + // Just use the !cc.stdvec value at TOS. + return true; + } } } @@ -2823,12 +2903,17 @@ bool QuakeBridgeVisitor::VisitCXXConstructExpr(clang::CXXConstructExpr *x) { return true; } - if (ctor->isCopyOrMoveConstructor() && parent->isPOD()) { - // Copy or move constructor on a POD struct. The value stack should contain - // the object to load the value from. - auto fromStruct = popValue(); - assert(isa(ctorTy) && "POD must be a struct type"); - return pushValue(builder.create(loc, fromStruct)); + if (ctor->isCopyOrMoveConstructor()) { + // Just walk through copy constructors for quantum struct types. + if (isa(ctorTy)) + return true; + if (parent->isPOD()) { + // Copy or move constructor on a POD struct. The value stack should + // contain the object to load the value from. + auto fromStruct = popValue(); + assert(isa(ctorTy) && "POD must be a struct type"); + return pushValue(builder.create(loc, fromStruct)); + } } if (ctor->isCopyConstructor() && ctor->isTrivial() && diff --git a/lib/Frontend/nvqpp/ConvertType.cpp b/lib/Frontend/nvqpp/ConvertType.cpp index 4fbf1559ba..16d4735f6b 100644 --- a/lib/Frontend/nvqpp/ConvertType.cpp +++ b/lib/Frontend/nvqpp/ConvertType.cpp @@ -22,12 +22,6 @@ static bool isArithmeticType(Type t) { return isa(t); } -/// Is \p t a quantum reference type. In the bridge, quantum types are always -/// reference types. -static bool isQuantumType(Type t) { - return isa(t); -} - /// Allow `array of [array of]* T`, where `T` is arithmetic. static bool isStaticArithmeticSequenceType(Type t) { if (auto vec = dyn_cast(t)) { @@ -119,6 +113,8 @@ static bool isKernelSignatureType(FunctionType t); static bool isKernelCallable(Type t) { if (auto lambdaTy = dyn_cast(t)) return isKernelSignatureType(lambdaTy.getSignature()); + if (auto lambdaTy = dyn_cast(t)) + return isKernelSignatureType(lambdaTy.getSignature()); return false; } @@ -142,7 +138,8 @@ static bool isKernelResultType(Type t) { /// (function), or a string. static bool isKernelArgumentType(Type t) { return isArithmeticType(t) || isComposedArithmeticType(t) || - isQuantumType(t) || isKernelCallable(t) || isFunctionCallable(t) || + quake::isQuantumReferenceType(t) || isKernelCallable(t) || + isFunctionCallable(t) || // TODO: move from pointers to a builtin string type. cudaq::isCharPointerType(t); } @@ -241,13 +238,93 @@ bool QuakeBridgeVisitor::VisitRecordDecl(clang::RecordDecl *x) { auto *ctx = builder.getContext(); if (!x->getDefinition()) return pushType(cc::StructType::get(ctx, name, /*isOpaque=*/true)); + SmallVector fieldTys = lastTypes(std::distance(x->field_begin(), x->field_end())); auto [width, alignInBytes] = getWidthAndAlignment(x); - if (name.empty()) - return pushType(cc::StructType::get(ctx, fieldTys, width, alignInBytes)); - return pushType( - cc::StructType::get(ctx, name, fieldTys, width, alignInBytes)); + bool isStruq = !fieldTys.empty(); + for (auto ty : fieldTys) + if (!quake::isQuantumReferenceType(ty)) + isStruq = false; + + auto ty = [&]() -> Type { + if (isStruq) + return quake::StruqType::get(ctx, fieldTys); + if (name.empty()) + return cc::StructType::get(ctx, fieldTys, width, alignInBytes); + return cc::StructType::get(ctx, name, fieldTys, width, alignInBytes); + }(); + + // Do some error analysis on the product type. Check the following: + + // - If this is a struq: + if (isa(ty)) { + // -- does it contain invalid C++ types? + for (auto *field : x->fields()) { + auto *ty = field->getType().getTypePtr(); + bool isRef = false; + if (ty->isLValueReferenceType()) { + auto *lref = cast(ty); + isRef = true; + ty = lref->getPointeeType().getTypePtr(); + } + if (auto *tyDecl = ty->getAsRecordDecl()) { + if (auto *ident = tyDecl->getIdentifier()) { + auto name = ident->getName(); + if (isInNamespace(tyDecl, "cudaq")) { + if (isRef) { + // can be owning container; so can be qubit, qarray, or qvector + if ((name.equals("qudit") || name.equals("qubit") || + name.equals("qvector") || name.equals("qarray"))) + continue; + } + // must be qview or qview& + if (name.equals("qview")) + continue; + } + } + } + reportClangError(x, mangler, "quantum struct has invalid member type."); + } + // -- does it contain contain a struq member? Not allowed. + for (auto fieldTy : fieldTys) + if (isa(fieldTy)) + reportClangError(x, mangler, + "recursive quantum struct types are not allowed."); + } + + // - Is this a struct does it have quantum types? Not allowed. + if (!isa(ty)) + for (auto fieldTy : fieldTys) + if (quake::isQuakeType(fieldTy)) + reportClangError( + x, mangler, + "hybrid quantum-classical struct types are not allowed."); + + // - Does this product type have (user-defined) member functions? Not allowed. + if (auto *cxxRd = dyn_cast(x)) { + auto numMethods = [&cxxRd]() { + std::size_t count = 0; + for (auto methodIter = cxxRd->method_begin(); + methodIter != cxxRd->method_end(); ++methodIter) { + // Don't check if this is a __qpu__ struct method + if (auto attr = (*methodIter)->getAttr(); + attr && attr->getAnnotation().str() == cudaq::kernelAnnotation) + continue; + // Check if the method is not implicit (i.e., user-defined) + if (!(*methodIter)->isImplicit()) + count++; + } + return count; + }(); + + if (numMethods > 0) + reportClangError( + x, mangler, + "struct with user-defined methods is not allowed in quantum kernel."); + } + + return pushType(ty); } bool QuakeBridgeVisitor::VisitFunctionProtoType(clang::FunctionProtoType *t) { @@ -364,8 +441,8 @@ bool QuakeBridgeVisitor::VisitLValueReferenceType( if (t->getPointeeType()->isUndeducedAutoType()) return pushType(cc::PointerType::get(builder.getContext())); auto eleTy = popType(); - if (isa( - eleTy)) + if (isa(eleTy)) return pushType(eleTy); return pushType(cc::PointerType::get(eleTy)); } @@ -376,8 +453,9 @@ bool QuakeBridgeVisitor::VisitRValueReferenceType( return pushType(cc::PointerType::get(builder.getContext())); auto eleTy = popType(); // FIXME: LLVMStructType is promoted as a temporary workaround. - if (isa(eleTy)) + if (isa(eleTy)) return pushType(eleTy); return pushType(cc::PointerType::get(eleTy)); } @@ -429,14 +507,17 @@ bool QuakeBridgeVisitor::doSyntaxChecks(const clang::FunctionDecl *x) { auto astTy = x->getType(); // Verify the argument and return types are valid for a kernel. auto *protoTy = dyn_cast(astTy.getTypePtr()); - if (!protoTy) { - reportClangError(x, mangler, "kernel must have a prototype"); + auto syntaxError = [&](const char(&msg)[N]) -> bool { + reportClangError(x, mangler, msg); + [[maybe_unused]] auto ty = popType(); + LLVM_DEBUG(llvm::dbgs() << "invalid type: " << ty << '\n'); return false; - } + }; + if (!protoTy) + return syntaxError("kernel must have a prototype"); if (protoTy->getNumParams() != funcTy.getNumInputs()) { // The arity of the function doesn't match, so report an error. - reportClangError(x, mangler, "kernel has unexpected arguments"); - return false; + return syntaxError("kernel has unexpected arguments"); } for (auto [t, p] : llvm::zip(funcTy.getInputs(), x->parameters())) { // Structs, lambdas, functions are valid callable objects. Also pure @@ -444,14 +525,12 @@ bool QuakeBridgeVisitor::doSyntaxChecks(const clang::FunctionDecl *x) { if (isKernelArgumentType(t) || isReferenceToCallableRecord(t, p) || isReferenceToCudaqStateType(t)) continue; - reportClangError(p, mangler, "kernel argument type not supported"); - return false; + return syntaxError("kernel argument type not supported"); } for (auto t : funcTy.getResults()) { if (isKernelResultType(t)) continue; - reportClangError(x, mangler, "kernel result type not supported"); - return false; + return syntaxError("kernel result type not supported"); } return true; } diff --git a/lib/Optimizer/Builder/Factory.cpp b/lib/Optimizer/Builder/Factory.cpp index bc130fb205..1c14ec349a 100644 --- a/lib/Optimizer/Builder/Factory.cpp +++ b/lib/Optimizer/Builder/Factory.cpp @@ -43,6 +43,8 @@ static Type genBufferType(Type ty) { auto *ctx = ty.getContext(); if (isa(ty)) return cudaq::cc::PointerType::get(ctx); + if (isa(ty)) + return IntegerType::get(ctx, 64); if (auto vecTy = dyn_cast(ty)) { auto i64Ty = IntegerType::get(ctx, 64); if (isOutput) { @@ -368,6 +370,8 @@ static Type convertToHostSideType(Type ty) { if (auto memrefTy = dyn_cast(ty)) return convertToHostSideType( factory::stlVectorType(memrefTy.getElementType())); + if (isa(ty)) + return cc::PointerType::get(IntegerType::get(ty.getContext(), 8)); if (auto memrefTy = dyn_cast(ty)) { // `pauli_word` is an object with a std::vector in the header files at // present. This data type *must* be updated if it becomes a std::string @@ -610,4 +614,26 @@ factory::readGlobalConstantArray(cudaq::cc::GlobalOp &global) { return result; } +std::pair +factory::getOrAddFunc(mlir::Location loc, mlir::StringRef funcName, + mlir::FunctionType funcTy, mlir::ModuleOp module) { + auto func = module.lookupSymbol(funcName); + if (func) { + if (!func.empty()) { + // Already lowered function func, skip it. + return {func, /*defined=*/true}; + } + // Function was declared but not defined. + return {func, /*defined=*/false}; + } + // Function not found, so add it to the module. + OpBuilder build(module.getBodyRegion()); + OpBuilder::InsertionGuard guard(build); + build.setInsertionPointToEnd(module.getBody()); + SmallVector attrs; + func = build.create(loc, funcName, funcTy, attrs); + func.setPrivate(); + return {func, /*defined=*/false}; +} + } // namespace cudaq::opt diff --git a/lib/Optimizer/Builder/Intrinsics.cpp b/lib/Optimizer/Builder/Intrinsics.cpp index 36770edefb..12e430dc03 100644 --- a/lib/Optimizer/Builder/Intrinsics.cpp +++ b/lib/Optimizer/Builder/Intrinsics.cpp @@ -51,6 +51,17 @@ inline bool operator<(const IntrinsicCode &icode, const IntrinsicCode &jcode) { static constexpr IntrinsicCode intrinsicTable[] = { // Initialize a (preallocated) buffer (the first parameter) with i64 values // on the semi-open range `[0..n)` where `n` is the second parameter. + {cudaq::runtime::getLinkableKernelKey, + {}, + R"#( + func.func private @__cudaq_getLinkableKernelKey(!cc.ptr) -> i64 +)#"}, + {cudaq::runtime::registerLinkableKernel, + {}, + R"#( + func.func private @__cudaq_registerLinkableKernel(!cc.ptr, !cc.ptr, !cc.ptr) -> () +)#"}, + {cudaq::setCudaqRangeVector, {}, R"#( @@ -310,6 +321,24 @@ static constexpr IntrinsicCode intrinsicTable[] = { R"#( func.func private @altLaunchKernel(!cc.ptr, !cc.ptr, !cc.ptr, i64, i64) -> ())#"}, + {cudaq::runtime::CudaqRegisterArgsCreator, + {}, + R"#( + func.func private @cudaqRegisterArgsCreator(!cc.ptr, !cc.ptr) -> () +)#"}, + {cudaq::runtime::CudaqRegisterKernelName, + {cudaq::runtime::CudaqRegisterArgsCreator, + cudaq::runtime::CudaqRegisterLambdaName, + cudaq::runtime::registerLinkableKernel, + cudaq::runtime::getLinkableKernelKey}, + "func.func private @cudaqRegisterKernelName(!cc.ptr) -> ()"}, + + {cudaq::runtime::CudaqRegisterLambdaName, + {}, + R"#( + llvm.func @cudaqRegisterLambdaName(!llvm.ptr, !llvm.ptr) attributes {sym_visibility = "private"} +)#"}, + {"free", {}, "func.func private @free(!cc.ptr) -> ()"}, // hybridLaunchKernel(kernelName, thunk, commBuffer, buffSize, diff --git a/lib/Optimizer/CodeGen/CCToLLVM.cpp b/lib/Optimizer/CodeGen/CCToLLVM.cpp index 01596ae760..a47dfa742b 100644 --- a/lib/Optimizer/CodeGen/CCToLLVM.cpp +++ b/lib/Optimizer/CodeGen/CCToLLVM.cpp @@ -9,6 +9,7 @@ #include "cudaq/Optimizer/CodeGen/CCToLLVM.h" #include "CodeGenOps.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" +#include "cudaq/Optimizer/Builder/Runtime.h" #include "cudaq/Optimizer/CodeGen/Passes.h" #include "cudaq/Optimizer/Dialect/CC/CCOps.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" @@ -189,6 +190,50 @@ class CallCallableOpPattern } }; +class CallIndirectCallableOpPattern + : public ConvertOpToLLVMPattern { +public: + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + LogicalResult + matchAndRewrite(cudaq::cc::CallIndirectCallableOp call, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto loc = call.getLoc(); + auto parentModule = call->getParentOfType(); + auto funcPtrTy = getTypeConverter()->convertType( + cast(call.getCallee().getType()) + .getSignature()); + auto ptrTy = LLVM::LLVMPointerType::get(rewriter.getI8Type()); + auto funcTy = cast( + cast(funcPtrTy).getElementType()); + auto i64Ty = rewriter.getI64Type(); // intptr_t + FlatSymbolRefAttr funSymbol = cudaq::opt::factory::createLLVMFunctionSymbol( + cudaq::runtime::getLinkableKernelDeviceSide, ptrTy, {i64Ty}, + parentModule); + + // Use the runtime helper function to convert the key to a pointer to the + // function that was intended to be called. This can only be functional if + // the runtime support has been linked into the executable and the + // device-side functions are located in the same address space as well. None + // of these functions should be expected to reside on remote hardware. + // Therefore, this will likely only be useful in a simulation target. + auto lookee = rewriter.create( + loc, ptrTy, funSymbol, ValueRange{adaptor.getCallee()}); + auto lookup = + rewriter.create(loc, funcPtrTy, lookee.getResult()); + + // Call the function that was just found in the map. + SmallVector args = {lookup.getResult()}; + args.append(adaptor.getArgs().begin(), adaptor.getArgs().end()); + if (isa(funcTy.getReturnType())) + rewriter.replaceOpWithNewOp(call, std::nullopt, args); + else + rewriter.replaceOpWithNewOp(call, funcTy.getReturnType(), + args); + return success(); + } +}; + class CastOpPattern : public ConvertOpToLLVMPattern { public: using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; @@ -670,7 +715,8 @@ class UndefOpPattern : public ConvertOpToLLVMPattern { void cudaq::opt::populateCCToLLVMPatterns(LLVMTypeConverter &typeConverter, RewritePatternSet &patterns) { patterns.insertaddConversion([](cc::IndirectCallableType type) { + return IntegerType::get(type.getContext(), 64); + }); converter->addConversion([](cc::CallableType type) { return lambdaAsPairOfPointers(type.getContext()); }); diff --git a/lib/Optimizer/CodeGen/ConvertToCC.cpp b/lib/Optimizer/CodeGen/ConvertToCC.cpp index ca8ce1b2cd..fc8972691a 100644 --- a/lib/Optimizer/CodeGen/ConvertToCC.cpp +++ b/lib/Optimizer/CodeGen/ConvertToCC.cpp @@ -42,6 +42,12 @@ struct QuakeTypeConverter : public TypeConverter { return cudaq::cc::PointerType::get( cudaq::opt::getCudaqQubitSpanType(ty.getContext())); }); + addConversion([&](quake::StruqType ty) { + SmallVector mems; + for (auto m : ty.getMembers()) + mems.push_back(convertType(m)); + return cudaq::cc::StructType::get(ty.getContext(), mems); + }); addConversion([](quake::MeasureType ty) { return IntegerType::get(ty.getContext(), 64); }); diff --git a/lib/Optimizer/CodeGen/ConvertToQIR.cpp b/lib/Optimizer/CodeGen/ConvertToQIR.cpp index e4f370876e..c5b4606e2d 100644 --- a/lib/Optimizer/CodeGen/ConvertToQIR.cpp +++ b/lib/Optimizer/CodeGen/ConvertToQIR.cpp @@ -68,7 +68,7 @@ class ConvertToQIR : public cudaq::opt::impl::ConvertToQIRBase { /// Measurement counter for unnamed measurements. Resets every module. unsigned measureCounter = 0; - // This is an ad hox transformation to convert constant array values into a + // This is an ad hoc transformation to convert constant array values into a // buffer of constants. LogicalResult eraseConstantArrayOps() { bool ok = true; @@ -198,6 +198,13 @@ void cudaq::opt::initializeTypeConversions(LLVMTypeConverter &typeConverter) { [](quake::VeqType type) { return getArrayType(type.getContext()); }); typeConverter.addConversion( [](quake::RefType type) { return getQubitType(type.getContext()); }); + typeConverter.addConversion([&](quake::StruqType type) { + SmallVector mems; + for (auto m : type.getMembers()) + mems.push_back(typeConverter.convertType(m)); + return LLVM::LLVMStructType::getLiteral(type.getContext(), mems, + /*packed=*/false); + }); typeConverter.addConversion([](quake::MeasureType type) { return IntegerType::get(type.getContext(), 1); }); diff --git a/lib/Optimizer/CodeGen/ConvertToQIRProfile.cpp b/lib/Optimizer/CodeGen/ConvertToQIRProfile.cpp index fcc719c947..2ae90d302b 100644 --- a/lib/Optimizer/CodeGen/ConvertToQIRProfile.cpp +++ b/lib/Optimizer/CodeGen/ConvertToQIRProfile.cpp @@ -10,6 +10,7 @@ #include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/CodeGen/Passes.h" #include "cudaq/Optimizer/CodeGen/Peephole.h" +#include "cudaq/Optimizer/CodeGen/QIRAttributeNames.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Todo.h" #include "nlohmann/json.hpp" @@ -59,8 +60,6 @@ static std::optional sliceLowerBound(Operation *op) { return {}; } -static constexpr char StartingOffsetAttrName[] = "StartingOffset"; - namespace { struct FunctionAnalysisData { std::size_t nQubits = 0; @@ -103,7 +102,10 @@ struct FunctionProfileAnalysis { return; FunctionAnalysisData data; funcOp->walk([&](LLVM::CallOp callOp) { - StringRef funcName = callOp.getCalleeAttr().getValue(); + auto funcNameAttr = callOp.getCalleeAttr(); + if (!funcNameAttr) + return; + auto funcName = funcNameAttr.getValue(); // For every allocation call, create a range of integers to uniquely // identify the qubits in the allocation. @@ -179,7 +181,7 @@ struct FunctionProfileAnalysis { auto resIdx = IntegerAttr::get(intTy, data.nResults); callOp->setAttr(resultIndexName, resIdx); auto regName = [&]() -> StringAttr { - if (auto nameAttr = callOp->getAttr("registerName") + if (auto nameAttr = callOp->getAttr(cudaq::opt::QIRRegisterNameAttr) .dyn_cast_or_null()) return nameAttr; return {}; @@ -215,22 +217,46 @@ struct AddFuncAttribute : public OpRewritePattern { bool isAdaptive = convertTo == "qir-adaptive"; const char *profileName = isAdaptive ? "adaptive_profile" : "base_profile"; + auto requiredQubitsStr = std::to_string(info.nQubits); + StringRef requiredQubitsStrRef = requiredQubitsStr; + if (auto stringAttr = op->getAttr(cudaq::opt::QIRRequiredQubitsAttrName) + .dyn_cast_or_null()) + requiredQubitsStrRef = stringAttr; + auto requiredResultsStr = std::to_string(info.nResults); + StringRef requiredResultsStrRef = requiredResultsStr; + if (auto stringAttr = op->getAttr(cudaq::opt::QIRRequiredResultsAttrName) + .dyn_cast_or_null()) + requiredResultsStrRef = stringAttr; + StringRef outputNamesStrRef; + std::string resultQubitJSONStr; + if (auto strAttr = op->getAttr(cudaq::opt::QIROutputNamesAttrName) + .dyn_cast_or_null()) { + outputNamesStrRef = strAttr; + } else { + resultQubitJSONStr = resultQubitJSON.dump(); + outputNamesStrRef = resultQubitJSONStr; + } + // QIR functions need certain attributes, add them here. // TODO: Update schema_id with valid value (issues #385 and #556) - auto arrAttr = rewriter.getArrayAttr(ArrayRef{ - rewriter.getStringAttr("entry_point"), - rewriter.getStrArrayAttr({"qir_profiles", profileName}), - rewriter.getStrArrayAttr({"output_labeling_schema", "schema_id"}), - rewriter.getStrArrayAttr({"output_names", resultQubitJSON.dump()}), + SmallVector attrArray{ + rewriter.getStringAttr(cudaq::opt::QIREntryPointAttrName), + rewriter.getStrArrayAttr( + {cudaq::opt::QIRProfilesAttrName, profileName}), + rewriter.getStrArrayAttr( + {cudaq::opt::QIROutputLabelingSchemaAttrName, "schema_id"}), + rewriter.getStrArrayAttr( + {cudaq::opt::QIROutputNamesAttrName, outputNamesStrRef}), rewriter.getStrArrayAttr( // TODO: change to required_num_qubits once providers support it // (issues #385 and #556) - {"requiredQubits", std::to_string(info.nQubits)}), + {cudaq::opt::QIRRequiredQubitsAttrName, requiredQubitsStrRef}), rewriter.getStrArrayAttr( // TODO: change to required_num_results once providers support it // (issues #385 and #556) - {"requiredResults", std::to_string(info.nResults)})}); - op.setPassthroughAttr(arrAttr); + {cudaq::opt::QIRRequiredResultsAttrName, requiredResultsStrRef})}; + + op.setPassthroughAttr(rewriter.getArrayAttr(attrArray)); // Stick the record calls in the exit block. auto builder = cudaq::IRBuilder::atBlockTerminator(&op.getBody().back()); @@ -292,7 +318,7 @@ struct AddCallAttribute : public OpRewritePattern { assert(startIter != info.allocationOffsets.end()); auto startVal = startIter->second; rewriter.startRootUpdate(op); - op->setAttr(StartingOffsetAttrName, + op->setAttr(cudaq::opt::StartingOffsetAttrName, rewriter.getIntegerAttr(rewriter.getI64Type(), startVal)); rewriter.finalizeRootUpdate(op); return success(); @@ -333,10 +359,13 @@ struct QIRToQIRProfileFuncPass return op.empty() || op.getPassthroughAttr(); }); target.addDynamicallyLegalOp([](LLVM::CallOp op) { - StringRef funcName = op.getCalleeAttr().getValue(); + auto funcNameAttr = op.getCalleeAttr(); + if (!funcNameAttr) + return true; + auto funcName = funcNameAttr.getValue(); return (!funcName.equals(cudaq::opt::QIRArrayQubitAllocateArray) && !funcName.equals(cudaq::opt::QIRQubitAllocate)) || - op->hasAttr(StartingOffsetAttrName); + op->hasAttr(cudaq::opt::StartingOffsetAttrName); }); if (failed(applyPartialConversion(op, target, std::move(patterns)))) { @@ -372,11 +401,12 @@ struct ArrayGetElementPtrConv : public OpRewritePattern { auto loc = op.getLoc(); if (call.getCallee()->equals(cudaq::opt::QIRArrayGetElementPtr1d)) { auto *alloc = call.getOperand(0).getDefiningOp(); - if (!alloc->hasAttr(StartingOffsetAttrName)) + if (!alloc->hasAttr(cudaq::opt::StartingOffsetAttrName)) return failure(); Value disp = call.getOperand(1); Value off = rewriter.create( - loc, disp.getType(), alloc->getAttr(StartingOffsetAttrName)); + loc, disp.getType(), + alloc->getAttr(cudaq::opt::StartingOffsetAttrName)); Value qubit = rewriter.create(loc, off, disp); rewriter.replaceOpWithNewOp(op, op.getType(), qubit); return success(); @@ -392,17 +422,44 @@ struct CallAlloc : public OpRewritePattern { PatternRewriter &rewriter) const override { if (!call.getCallee()->equals(cudaq::opt::QIRQubitAllocate)) return failure(); - if (!call->hasAttr(StartingOffsetAttrName)) + if (!call->hasAttr(cudaq::opt::StartingOffsetAttrName)) return failure(); auto loc = call.getLoc(); Value qubit = rewriter.create( - loc, rewriter.getI64Type(), call->getAttr(StartingOffsetAttrName)); + loc, rewriter.getI64Type(), + call->getAttr(cudaq::opt::StartingOffsetAttrName)); auto resTy = call.getResult().getType(); rewriter.replaceOpWithNewOp(call, resTy, qubit); return success(); } }; +// %1 = address_of @__quantum__qis__z__ctl +// %2 = call @invokewithControlBits %1, %ctrl, %targ +// ───────────────────────────────────────────────── +// %2 = call __quantum__qis__cz %ctrl, %targ +struct ZCtrlOneTargetToCZ : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(LLVM::CallOp call, + PatternRewriter &rewriter) const override { + ValueRange args(call.getArgOperands()); + if (args.size() == 4 && call.getCallee() && + call.getCallee()->equals(cudaq::opt::NVQIRInvokeWithControlBits)) { + if (auto addrOf = dyn_cast_or_null( + args[1].getDefiningOp())) { + if (addrOf.getGlobalName().startswith( + std::string(cudaq::opt::QIRQISPrefix) + "z__ctl")) { + rewriter.replaceOpWithNewOp( + call, TypeRange{}, cudaq::opt::QIRCZ, args.drop_front(2)); + return success(); + } + } + } + return failure(); + } +}; + /// QIR to the Specific QIR Profile /// /// This pass converts patterns in LLVM-IR dialect using QIR calls, etc. into a @@ -426,10 +483,11 @@ struct QIRToQIRProfileQIRPass RewritePatternSet patterns(context); // Note: LoadMeasureResult is not compliant with the Base Profile, so don't // add it here unless we're specifically doing the Adaptive Profile. - patterns.insert(context); + patterns + .insert(context); if (convertTo.getValue() == "qir-adaptive") patterns.insert(context); if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) @@ -469,14 +527,17 @@ struct QIRProfilePreparationPass ModuleOp module = getOperation(); auto *ctx = module.getContext(); - // Add cnot declaration as it may be - // referenced after peepholes run. + // Add cnot declaration as it may be referenced after peepholes run. cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::QIRCnot, LLVM::LLVMVoidType::get(ctx), {cudaq::opt::getQubitType(ctx), cudaq::opt::getQubitType(ctx)}, module); - // Add measure_body as it has a different - // signature than measure. + // Add cz declaration as it may be referenced after peepholes run. + cudaq::opt::factory::createLLVMFunctionSymbol( + cudaq::opt::QIRCZ, LLVM::LLVMVoidType::get(ctx), + {cudaq::opt::getQubitType(ctx), cudaq::opt::getQubitType(ctx)}, module); + + // Add measure_body as it has a different signature than measure. cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::QIRMeasureBody, LLVM::LLVMVoidType::get(ctx), {cudaq::opt::getQubitType(ctx), cudaq::opt::getResultType(ctx)}, @@ -486,16 +547,13 @@ struct QIRProfilePreparationPass cudaq::opt::QIRReadResultBody, IntegerType::get(ctx, 1), {cudaq::opt::getResultType(ctx)}, module); - // Add record functions for any - // measurements. + // Add record functions for any measurements. cudaq::opt::factory::createLLVMFunctionSymbol( cudaq::opt::QIRRecordOutput, LLVM::LLVMVoidType::get(ctx), {cudaq::opt::getResultType(ctx), cudaq::opt::getCharPointerType(ctx)}, module); - // Add functions - // `__quantum__qis__*__body` for all - // functions matching + // Add functions `__quantum__qis__*__body` for all functions matching // `__quantum__qis__*` that are found. for (auto &global : module) if (auto func = dyn_cast(global)) @@ -508,11 +566,11 @@ struct QIRProfilePreparationPass // Apply irreversible attribute to measurement functions for (auto &funcName : measurementFunctionNames) { Operation *op = SymbolTable::lookupSymbolIn(module, funcName); - auto funcOp = llvm::dyn_cast_or_null(op); + auto funcOp = llvm::dyn_cast_if_present(op); if (funcOp) { auto builder = OpBuilder(op); - auto arrAttr = builder.getArrayAttr( - ArrayRef{builder.getStringAttr("irreversible")}); + auto arrAttr = builder.getArrayAttr(ArrayRef{ + builder.getStringAttr(cudaq::opt::QIRIrreversibleFlagName)}); funcOp.setPassthroughAttr(arrAttr); } } @@ -525,84 +583,16 @@ std::unique_ptr cudaq::opt::createQIRProfilePreparationPass() { } //===----------------------------------------------------------------------===// - -namespace { -/// Verify that the specific profile QIR code is sane. For now, this simply -/// checks that the QIR doesn't have any "bonus" calls to arbitrary code that is -/// not possibly defined in the QIR standard. -struct VerifyQIRProfilePass - : public cudaq::opt::VerifyQIRProfileBase { - explicit VerifyQIRProfilePass(llvm::StringRef convertTo_) - : VerifyQIRProfileBase() { - convertTo.setValue(convertTo_.str()); - } - - void runOnOperation() override { - LLVM::LLVMFuncOp func = getOperation(); - bool passFailed = false; - if (!func->hasAttr(cudaq::entryPointAttrName)) - return; - auto *ctx = &getContext(); - bool isBaseProfile = convertTo.getValue() == "qir-base"; - func.walk([&](Operation *op) { - if (auto call = dyn_cast(op)) { - auto funcName = call.getCalleeAttr().getValue(); - if (!funcName.startswith("__quantum_") || - funcName.equals(cudaq::opt::QIRCustomOp)) { - call.emitOpError("unexpected call in QIR base profile"); - passFailed = true; - return WalkResult::advance(); - } - - // Check that qubits are unique values. - const std::size_t numOpnds = call.getNumOperands(); - auto qubitTy = cudaq::opt::getQubitType(ctx); - if (numOpnds > 0) - for (std::size_t i = 0; i < numOpnds - 1; ++i) - if (call.getOperand(i).getType() == qubitTy) - for (std::size_t j = i + 1; j < numOpnds; ++j) - if (call.getOperand(j).getType() == qubitTy) { - auto i1 = - call.getOperand(i).getDefiningOp(); - auto j1 = - call.getOperand(j).getDefiningOp(); - if (i1 && j1 && i1.getOperand() == j1.getOperand()) { - call.emitOpError("uses same qubit as multiple operands"); - passFailed = true; - return WalkResult::interrupt(); - } - } - return WalkResult::advance(); - } - if (isBaseProfile && isa(op)) { - op->emitOpError("QIR base profile does not support control-flow"); - passFailed = true; - } - return WalkResult::advance(); - }); - if (passFailed) { - emitError(func.getLoc(), - "function " + func.getName() + - " not compatible with the QIR base profile."); - signalPassFailure(); - } - } -}; -} // namespace - -std::unique_ptr -cudaq::opt::verifyQIRProfilePass(llvm::StringRef convertTo) { - return std::make_unique(convertTo); -} - // The various passes defined here should be added as a pass pipeline. void cudaq::opt::addQIRProfilePipeline(OpPassManager &pm, - llvm::StringRef convertTo) { + llvm::StringRef convertTo, + bool performPrep) { assert(convertTo == "qir-adaptive" || convertTo == "qir-base"); - pm.addPass(createQIRProfilePreparationPass()); + if (performPrep) + pm.addPass(createQIRProfilePreparationPass()); pm.addNestedPass(createConvertToQIRFuncPass(convertTo)); pm.addPass(createQIRToQIRProfilePass(convertTo)); - pm.addNestedPass(verifyQIRProfilePass(convertTo)); + VerifyQIRProfileOptions vqpo = {convertTo.str()}; + pm.addNestedPass(createVerifyQIRProfile(vqpo)); } diff --git a/lib/Optimizer/CodeGen/Pipelines.cpp b/lib/Optimizer/CodeGen/Pipelines.cpp index ff716ddfa2..247805fd2b 100644 --- a/lib/Optimizer/CodeGen/Pipelines.cpp +++ b/lib/Optimizer/CodeGen/Pipelines.cpp @@ -37,12 +37,18 @@ void cudaq::opt::commonPipelineConvertToQIR( if (convertTo && convertTo->equals("qir-base")) pm.addNestedPass(createDelayMeasurementsPass()); pm.addPass(createConvertMathToFuncs()); + pm.addPass(createSymbolDCEPass()); pm.addPass(createConvertToQIR()); } void cudaq::opt::addPipelineTranslateToOpenQASM(PassManager &pm) { pm.addPass(createCanonicalizerPass()); pm.addPass(createCSEPass()); + pm.addNestedPass(createClassicalMemToReg()); + pm.addPass(createLoopUnroll()); + pm.addPass(createCanonicalizerPass()); + pm.addPass(createLiftArrayAlloc()); + pm.addPass(createStatePreparation()); } void cudaq::opt::addPipelineTranslateToIQMJson(PassManager &pm) { diff --git a/lib/Optimizer/CodeGen/QuakeToLLVM.cpp b/lib/Optimizer/CodeGen/QuakeToLLVM.cpp index 7ed9b1b25f..30d2981ac4 100644 --- a/lib/Optimizer/CodeGen/QuakeToLLVM.cpp +++ b/lib/Optimizer/CodeGen/QuakeToLLVM.cpp @@ -336,6 +336,43 @@ class ExtractQubitOpRewrite } }; +class GetMemberOpPattern : public ConvertOpToLLVMPattern { +public: + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + LogicalResult + matchAndRewrite(quake::GetMemberOp extract, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto toTy = getTypeConverter()->convertType(extract.getType()); + std::int64_t position = adaptor.getIndex(); + rewriter.replaceOpWithNewOp( + extract, toTy, adaptor.getStruq(), ArrayRef{position}); + return success(); + } +}; + +class MakeStruqOpPattern : public ConvertOpToLLVMPattern { +public: + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + LogicalResult + matchAndRewrite(quake::MakeStruqOp mkStruq, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto loc = mkStruq.getLoc(); + auto *ctx = rewriter.getContext(); + auto toTy = getTypeConverter()->convertType(mkStruq.getType()); + Value result = rewriter.create(loc, toTy); + std::int64_t count = 0; + for (auto op : adaptor.getOperands()) { + auto off = DenseI64ArrayAttr::get(ctx, ArrayRef{count}); + result = rewriter.create(loc, toTy, result, op, off); + count++; + } + rewriter.replaceOp(mkStruq, result); + return success(); + } +}; + class SubveqOpRewrite : public ConvertOpToLLVMPattern { public: using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; @@ -462,11 +499,12 @@ class ExpPauliRewrite : public ConvertOpToLLVMPattern { cudaq::opt::factory::genLlvmI64Constant(loc, rewriter, numElements); // Set the string literal data - auto strPtr = rewriter.create( - loc, LLVM::LLVMPointerType::get(rewriter.getI8Type()), alloca, - ValueRange{zero, zero}); - auto castedPauli = rewriter.create( - loc, cudaq::opt::factory::getPointerType(context), pauliWord); + auto charPtrTy = cudaq::opt::factory::getPointerType(context); + auto strPtrTy = LLVM::LLVMPointerType::get(charPtrTy); + auto strPtr = rewriter.create(loc, strPtrTy, alloca, + ValueRange{zero, zero}); + auto castedPauli = + rewriter.create(loc, charPtrTy, pauliWord); rewriter.create(loc, castedPauli, strPtr); // Set the integer length @@ -476,8 +514,8 @@ class ExpPauliRewrite : public ConvertOpToLLVMPattern { rewriter.create(loc, size, intPtr); // Cast to raw opaque pointer - auto castedStore = rewriter.create( - loc, cudaq::opt::factory::getPointerType(context), alloca); + auto castedStore = + rewriter.create(loc, charPtrTy, alloca); operands.push_back(castedStore); rewriter.replaceOpWithNewOp(instOp, TypeRange{}, symbolRef, operands); @@ -1316,13 +1354,45 @@ class CustomUnitaryOpRewrite StringRef generatorName = sref.getRootReference(); auto globalOp = parentModule.lookupSymbol(generatorName); + const auto customOpName = [&]() -> std::string { + auto globalName = generatorName.str(); + // IMPORTANT: this must match the logic to generate global data + // globalName = f'{nvqppPrefix}{opName}_generator_{numTargets}.rodata' + const std::string nvqppPrefix = "__nvqpp__mlirgen__"; + const std::string generatorSuffix = "_generator"; + if (globalName.starts_with(nvqppPrefix)) { + globalName = globalName.substr(nvqppPrefix.size()); + const size_t pos = globalName.find(generatorSuffix); + if (pos != std::string::npos) + return globalName.substr(0, pos); + } + + return ""; + }(); + + // Create a global string for the op name + auto insertPoint = rewriter.saveInsertionPoint(); + rewriter.setInsertionPointToStart(parentModule.getBody()); + // Create the custom op name global + auto builder = cudaq::IRBuilder::atBlockEnd(parentModule.getBody()); + auto opNameGlobal = + builder.genCStringLiteralAppendNul(loc, parentModule, customOpName); + // Shift back to the function + rewriter.restoreInsertionPoint(insertPoint); + // Get the string address and bit cast + auto opNameRef = rewriter.create( + loc, cudaq::opt::factory::getPointerType(opNameGlobal.getType()), + opNameGlobal.getSymName()); + auto castedOpNameRef = rewriter.create( + loc, cudaq::opt::factory::getPointerType(context), opNameRef); + if (!globalOp) return op.emitOpError("global not found for custom op"); auto complex64Ty = typeConverter->convertType(ComplexType::get(rewriter.getF64Type())); auto complex64PtrTy = LLVM::LLVMPointerType::get(complex64Ty); - Type type = getTypeConverter()->convertType(globalOp.getType()); + Type type = typeConverter->convertType(globalOp.getType()); auto addrOp = rewriter.create(loc, type, generatorName); auto unitaryData = rewriter.create(loc, complex64PtrTy, addrOp); @@ -1334,12 +1404,13 @@ class CustomUnitaryOpRewrite cudaq::opt::factory::createLLVMFunctionSymbol( qirFunctionName, LLVM::LLVMVoidType::get(context), {complex64PtrTy, cudaq::opt::getArrayType(context), - cudaq::opt::getArrayType(context)}, + cudaq::opt::getArrayType(context), + LLVM::LLVMPointerType::get(rewriter.getI8Type())}, parentModule); rewriter.replaceOpWithNewOp( op, TypeRange{}, customSymbolRef, - ValueRange{unitaryData, controlArr, targetArr}); + ValueRange{unitaryData, controlArr, targetArr, castedOpNameRef}); return success(); } @@ -1352,19 +1423,21 @@ void cudaq::opt::populateQuakeToLLVMPatterns(LLVMTypeConverter &typeConverter, auto *context = patterns.getContext(); patterns.insert(context); - patterns.insert< - AllocaOpRewrite, ConcatOpRewrite, CustomUnitaryOpRewrite, - DeallocOpRewrite, DiscriminateOpPattern, ExtractQubitOpRewrite, - ExpPauliRewrite, OneTargetRewrite, - OneTargetRewrite, OneTargetRewrite, - OneTargetRewrite, OneTargetRewrite, - OneTargetRewrite, OneTargetOneParamRewrite, - OneTargetTwoParamRewrite, - OneTargetOneParamRewrite, - OneTargetOneParamRewrite, - OneTargetOneParamRewrite, - OneTargetTwoParamRewrite, - OneTargetThreeParamRewrite, QmemRAIIOpRewrite, ResetRewrite, - SubveqOpRewrite, TwoTargetRewrite>(typeConverter); + patterns + .insert, OneTargetRewrite, + OneTargetRewrite, OneTargetRewrite, + OneTargetRewrite, OneTargetRewrite, + OneTargetOneParamRewrite, + OneTargetTwoParamRewrite, + OneTargetOneParamRewrite, + OneTargetOneParamRewrite, + OneTargetOneParamRewrite, + OneTargetTwoParamRewrite, + OneTargetThreeParamRewrite, QmemRAIIOpRewrite, + ResetRewrite, SubveqOpRewrite, TwoTargetRewrite>( + typeConverter); patterns.insert>(typeConverter, measureCounter); } diff --git a/lib/Optimizer/CodeGen/TranslateToIQMJson.cpp b/lib/Optimizer/CodeGen/TranslateToIQMJson.cpp index 9d1128995b..ee76eb31f8 100644 --- a/lib/Optimizer/CodeGen/TranslateToIQMJson.cpp +++ b/lib/Optimizer/CodeGen/TranslateToIQMJson.cpp @@ -68,6 +68,14 @@ static LogicalResult emitOperation(nlohmann::json &json, return success(); } +static LogicalResult emitOperation(nlohmann::json &json, + cudaq::Emitter &emitter, + quake::BorrowWireOp op) { + auto name = std::string("QB") + std::to_string(op.getIdentity() + 1); + emitter.getOrAssignName(op.getResult(), name); + return success(); +} + static LogicalResult emitOperation(nlohmann::json &json, cudaq::Emitter &emitter, quake::ExtractRefOp op) { @@ -105,6 +113,13 @@ static LogicalResult emitOperation(nlohmann::json &json, json["args"] = nlohmann::json::object(); for (auto control : optor.getControls()) qubits.push_back(emitter.getOrAssignName(control).str()); + + // Propagate the name of this qubit into the operation output values. + emitter.getOrAssignName( + optor->getResult(0), + emitter.getOrAssignName(optor.getControls()[0]).str()); + emitter.getOrAssignName(optor->getResult(1), + emitter.getOrAssignName(optor.getTarget(0)).str()); } else { json["name"] = name; @@ -121,6 +136,10 @@ static LogicalResult emitOperation(nlohmann::json &json, }; json["args"]["angle_t"] = convertToFullTurns(*parameter0); json["args"]["phase_t"] = convertToFullTurns(*parameter1); + + // Propagate the name of this qubit into the operation output values. + emitter.getOrAssignName(optor->getResult(0), + emitter.getOrAssignName(optor.getTarget(0)).str()); } if (optor.getTargets().size() != 1) @@ -164,6 +183,8 @@ static LogicalResult emitOperation(nlohmann::json &json, // Quake .Case( [&](auto op) { return emitOperation(json, emitter, op); }) + .Case( + [&](auto op) { return emitOperation(json, emitter, op); }) .Case( [&](auto op) { return emitOperation(json, emitter, op); }) .Case( @@ -173,6 +194,7 @@ static LogicalResult emitOperation(nlohmann::json &json, // Ignore .Case([](auto) { return success(); }) .Case([](auto) { return success(); }) + .Case([](auto) { return success(); }) .Case([](auto) { return success(); }) .Case([](auto) { return success(); }) .Default([&](Operation *) -> LogicalResult { diff --git a/lib/Optimizer/CodeGen/VerifyQIRProfile.cpp b/lib/Optimizer/CodeGen/VerifyQIRProfile.cpp new file mode 100644 index 0000000000..02ccc932fa --- /dev/null +++ b/lib/Optimizer/CodeGen/VerifyQIRProfile.cpp @@ -0,0 +1,97 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "PassDetails.h" +#include "cudaq/Optimizer/Builder/Intrinsics.h" +#include "cudaq/Optimizer/CodeGen/Passes.h" +#include "cudaq/Optimizer/CodeGen/Peephole.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "cudaq/Todo.h" +#include "nlohmann/json.hpp" +#include "llvm/ADT/SmallSet.h" +#include "mlir/Conversion/LLVMCommon/ConversionTarget.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" + +#define DEBUG_TYPE "verify-qir-profile" + +namespace cudaq::opt { +#define GEN_PASS_DEF_VERIFYQIRPROFILE +#include "cudaq/Optimizer/CodeGen/Passes.h.inc" +} // namespace cudaq::opt + +using namespace mlir; + +namespace { +/// Verify that the specific profile QIR code is sane. For now, this simply +/// checks that the QIR doesn't have any "bonus" calls to arbitrary code that is +/// not possibly defined in the QIR standard. +struct VerifyQIRProfilePass + : public cudaq::opt::impl::VerifyQIRProfileBase { + using VerifyQIRProfileBase::VerifyQIRProfileBase; + + void runOnOperation() override { + LLVM::LLVMFuncOp func = getOperation(); + bool passFailed = false; + if (!func->hasAttr(cudaq::entryPointAttrName)) + return; + auto *ctx = &getContext(); + bool isBaseProfile = convertTo.getValue() == "qir-base"; + func.walk([&](Operation *op) { + if (auto call = dyn_cast(op)) { + auto funcNameAttr = call.getCalleeAttr(); + if (!funcNameAttr) + return WalkResult::advance(); + auto funcName = funcNameAttr.getValue(); + if (!funcName.startswith("__quantum_") || + funcName.equals(cudaq::opt::QIRCustomOp)) { + call.emitOpError("unexpected call in QIR base profile"); + passFailed = true; + return WalkResult::advance(); + } + + // Check that qubits are unique values. + const std::size_t numOpnds = call.getNumOperands(); + auto qubitTy = cudaq::opt::getQubitType(ctx); + if (numOpnds > 0) + for (std::size_t i = 0; i < numOpnds - 1; ++i) + if (call.getOperand(i).getType() == qubitTy) + for (std::size_t j = i + 1; j < numOpnds; ++j) + if (call.getOperand(j).getType() == qubitTy) { + auto i1 = + call.getOperand(i).getDefiningOp(); + auto j1 = + call.getOperand(j).getDefiningOp(); + if (i1 && j1 && i1.getOperand() == j1.getOperand()) { + call.emitOpError("uses same qubit as multiple operands"); + passFailed = true; + return WalkResult::interrupt(); + } + } + return WalkResult::advance(); + } + if (isBaseProfile && isa(op)) { + op->emitOpError("QIR base profile does not support control-flow"); + passFailed = true; + } + return WalkResult::advance(); + }); + if (passFailed) { + emitError(func.getLoc(), + "function " + func.getName() + + " not compatible with the QIR base profile."); + signalPassFailure(); + } + } +}; +} // namespace diff --git a/lib/Optimizer/CodeGen/WireSetsToProfileQIR.cpp b/lib/Optimizer/CodeGen/WireSetsToProfileQIR.cpp new file mode 100644 index 0000000000..52430cf518 --- /dev/null +++ b/lib/Optimizer/CodeGen/WireSetsToProfileQIR.cpp @@ -0,0 +1,696 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "PassDetails.h" +#include "cudaq/Optimizer/Builder/Intrinsics.h" +#include "cudaq/Optimizer/CallGraphFix.h" +#include "cudaq/Optimizer/CodeGen/CudaqFunctionNames.h" +#include "cudaq/Optimizer/CodeGen/Passes.h" +#include "cudaq/Optimizer/CodeGen/Pipelines.h" +#include "cudaq/Optimizer/CodeGen/QIRAttributeNames.h" +#include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" +#include "cudaq/Optimizer/CodeGen/QuakeToCC.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" +#include "cudaq/Optimizer/Dialect/CC/CCTypes.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "nlohmann/json.hpp" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Support/Debug.h" +#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h" +#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Pass/PassOptions.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" + +#define DEBUG_TYPE "wireset-to-profile-qir" + +/** + \file + + If the Quake code is using wire sets (referencing discrete "physical" quantum + units), then codegen should not use full QIR. Full QIR uses virtual qubits, + so the physical mapping would be completely lost. + + This pass translates Quake that uses wire sets to QIR calls (in the + CC dialect and FuncDialect), which can themselves be further lowered to + LLVM-IR dialect using the CCToLLVM lowering passes. + + Prerequisites: + The Quake IR should be + - in DAG form (no CC control flow operations or calls) + - using value semantics and wire_set globals + - decomposed into single control (at most) gate form + - negated controls must have been erased + */ + +namespace cudaq::opt { +#define GEN_PASS_DEF_WIRESETTOPROFILEQIR +#define GEN_PASS_DEF_WIRESETTOPROFILEQIRPOST +#define GEN_PASS_DEF_WIRESETTOPROFILEQIRPREP +#include "cudaq/Optimizer/CodeGen/Passes.h.inc" +} // namespace cudaq::opt + +using namespace mlir; + +namespace { +struct QuakeTypeConverter : public TypeConverter { + QuakeTypeConverter() { + addConversion([](Type ty) { return ty; }); + addConversion([](quake::WireType ty) { + return cudaq::opt::getQubitType(ty.getContext()); + }); + addConversion([](quake::MeasureType ty) { + return cudaq::opt::getResultType(ty.getContext()); + }); + } +}; +} // namespace + +static constexpr std::string_view qis_prefix = "__quantum__qis__"; +static constexpr std::string_view qis_body_suffix = "__body"; +static constexpr std::string_view qis_ctl_suffix = "__ctl"; + +static std::string toQisBodyName(std::string &&name) { + return std::string(qis_prefix) + std::move(name) + + std::string(qis_body_suffix); +} + +static std::string toQisCtlName(std::string &&name) { + return std::string(qis_prefix) + std::move(name) + + std::string(qis_ctl_suffix); +} + +// Store by result to prevent collisions on a single qubit having +// multiple measurements (Adaptive Profile) +// map[result] --> [qb,regName] +// Use std::map to keep these sorted in ascending order. While this isn't +// required, it makes viewing the QIR easier. +using OutputNamesType = + std::map>; + +template +struct GeneralRewrite : OpConversionPattern { + using Base = OpConversionPattern; + using Base::Base; + + LogicalResult + matchAndRewrite(OP qop, typename Base::OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + if (qop.getNegatedQubitControls()) + return failure(); + + auto instName = qop->getName().stripDialect().str(); + if (qop.getIsAdj() && (instName == "t" || instName == "s")) + instName += "dg"; + + auto loc = qop.getLoc(); + std::string funcName = [&]() { + if (qop.getControls().empty()) + return toQisBodyName(std::move(instName)); + if (instName == "x") { + instName = "cnot"; + return toQisBodyName(std::move(instName)); + } + return toQisCtlName(std::move(instName)); + }(); // NB: instName is dead + if (funcName.ends_with(qis_ctl_suffix) && + adaptor.getControls().size() == 1 && adaptor.getTargets().size() == 1) { + auto *ctx = rewriter.getContext(); + auto qbTy = cudaq::opt::getQubitType(ctx); + auto arrTy = cudaq::opt::getArrayType(ctx); + SmallVector argTys = {arrTy, qbTy}; + ModuleOp mod = qop->template getParentOfType(); + FlatSymbolRefAttr qisFuncSymbol; + if (auto f = mod.lookupSymbol(funcName)) { + auto fTy = f.getFunctionType(); + auto fSym = f.getSymNameAttr(); + qisFuncSymbol = FlatSymbolRefAttr::get(ctx, funcName); + Value fVal = rewriter.create(loc, fTy, fSym); + auto ptrI8Ty = cudaq::cc::PointerType::get(rewriter.getI8Type()); + Value fPtrVal = + rewriter.create(loc, ptrI8Ty, fVal); + Value one = rewriter.create(loc, 1, 64); + SmallVector callParamVals{one, fPtrVal, + *adaptor.getControls().begin(), + *adaptor.getTargets().begin()}; + SmallVector qubits(adaptor.getControls().begin(), + adaptor.getControls().end()); + qubits.append(adaptor.getTargets().begin(), adaptor.getTargets().end()); + rewriter.create(loc, std::nullopt, + cudaq::opt::NVQIRInvokeWithControlBits, + callParamVals); + rewriter.replaceOp(qop, qubits); + return success(); + } + return failure(); + } else { + SmallVector qubits(adaptor.getControls().begin(), + adaptor.getControls().end()); + qubits.append(adaptor.getTargets().begin(), adaptor.getTargets().end()); + rewriter.create(loc, std::nullopt, funcName, + adaptor.getOperands()); + rewriter.replaceOp(qop, qubits); + return success(); + } + return failure(); + } +}; + +namespace { +struct BorrowWireRewrite : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(quake::BorrowWireOp borrowWire, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto id = borrowWire.getIdentity(); + auto loc = borrowWire.getLoc(); + Value idCon = rewriter.create(loc, id, 64); + auto imTy = + cudaq::cc::PointerType::get(NoneType::get(rewriter.getContext())); + idCon = rewriter.create(loc, imTy, idCon); + rewriter.replaceOpWithNewOp( + borrowWire, cudaq::opt::getQubitType(rewriter.getContext()), idCon); + return success(); + } +}; + +struct ResetRewrite : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(quake::ResetOp reset, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + SmallVector qubits{adaptor.getTargets()}; + auto loc = reset.getLoc(); + std::string funcName = toQisBodyName(std::string("reset")); + rewriter.create(loc, std::nullopt, funcName, + adaptor.getOperands()); + rewriter.replaceOp(reset, qubits); + return success(); + } +}; + +struct BranchRewrite : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(cf::BranchOp branchOp, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto qubitTy = cudaq::opt::getQubitType(rewriter.getContext()); + rewriter.startRootUpdate(branchOp); + if (branchOp.getSuccessor()) + for (auto arg : branchOp.getSuccessor()->getArguments()) + if (isa(arg.getType())) + arg.setType(qubitTy); + for (auto operand : branchOp.getOperands()) + if (isa(operand.getType())) + operand.setType(qubitTy); + rewriter.finalizeRootUpdate(branchOp); + return success(); + } +}; + +struct CondBranchRewrite : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(cf::CondBranchOp branchOp, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto qubitTy = cudaq::opt::getQubitType(rewriter.getContext()); + rewriter.startRootUpdate(branchOp); + for (auto suc : branchOp.getSuccessors()) + for (auto arg : suc->getArguments()) + if (isa(arg.getType())) + arg.setType(qubitTy); + for (auto operand : branchOp.getOperands()) + if (isa(operand.getType())) + operand.setType(qubitTy); + rewriter.finalizeRootUpdate(branchOp); + return success(); + } +}; + +struct ReturnWireRewrite : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(quake::ReturnWireOp returnWire, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + rewriter.eraseOp(returnWire); + return success(); + } +}; + +struct WireSetRewrite : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(quake::WireSetOp wireSetOp, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + rewriter.eraseOp(wireSetOp); + return success(); + } +}; + +struct MzRewrite : OpConversionPattern { + using Base = OpConversionPattern; + explicit MzRewrite(TypeConverter &typeConverter, unsigned &counter, + OutputNamesType &resultQubitVals, MLIRContext *ctxt, + PatternBenefit benefit = 1) + : Base(typeConverter, ctxt, benefit), resultCount(counter), + resultQubitVals(resultQubitVals) {} + + LogicalResult + matchAndRewrite(quake::MzOp meas, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + + bool measureFollowedByDiscriminate = [&]() { + for (auto user : meas->getResult(0).getUsers()) + if (isa(user)) + return true; + return false; + }(); + + // FIXME: Must use sequentially assigned result ids + std::string funcName = toQisBodyName(std::string("mz")); + auto loc = meas.getLoc(); + Value idCon = rewriter.create(loc, resultCount++, 64); + auto imTy = + cudaq::cc::PointerType::get(NoneType::get(rewriter.getContext())); + idCon = rewriter.create(loc, imTy, idCon); + Value resultVal = rewriter.create( + loc, cudaq::opt::getResultType(rewriter.getContext()), idCon); + rewriter.create( + loc, std::nullopt, funcName, + ValueRange{adaptor.getTargets()[0], resultVal}); + rewriter.replaceOp(meas, ValueRange{resultVal, adaptor.getTargets()[0]}); + + auto regName = meas.getRegisterName(); + // Populate __quantum__rt__result_record_output if there is a register name + // without any downstream DiscriminateOp's. + if (regName && !measureFollowedByDiscriminate) { + cudaq::IRBuilder irb(rewriter.getContext()); + auto mod = meas->getParentOfType(); + // NB: This is thread safe as it should never do an insertion, just a + // lookup. + auto nameObj = irb.genCStringLiteralAppendNul(loc, mod, *regName); + auto arrI8Ty = mlir::LLVM::LLVMArrayType::get(rewriter.getI8Type(), + regName->size() + 1); + auto ptrArrTy = cudaq::cc::PointerType::get(arrI8Ty); + Value nameVal = rewriter.create( + loc, ptrArrTy, nameObj.getName()); + auto cstrTy = cudaq::cc::PointerType::get(rewriter.getI8Type()); + Value nameValCStr = + rewriter.create(loc, cstrTy, nameVal); + + rewriter.create(loc, std::nullopt, + cudaq::opt::QIRRecordOutput, + ValueRange{resultVal, nameValCStr}); + } + + // Populate resultQubitVals[] + std::size_t qubitNum = 0; + Value v = adaptor.getTargets()[0]; + while (auto tmpOp = v.getDefiningOp()) + v = tmpOp.getOperand(); + if (auto x = cudaq::opt::factory::getIntIfConstant(v)) + qubitNum = *x; + std::string regNameStr; + if (regName) + regNameStr = regName->str(); + resultQubitVals[resultCount - 1] = std::make_pair(qubitNum, regNameStr); + + return success(); + } + +private: + unsigned &resultCount; + OutputNamesType &resultQubitVals; +}; + +struct DiscriminateRewrite : OpConversionPattern { + using Base = OpConversionPattern; + + explicit DiscriminateRewrite(TypeConverter &typeConverter, bool adaptive, + DenseMap &nameMap, + MLIRContext *ctxt, PatternBenefit benefit = 1) + : Base(typeConverter, ctxt, benefit), isAdaptiveProfile(adaptive), + regNameMap(nameMap) {} + + LogicalResult + matchAndRewrite(quake::DiscriminateOp disc, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto loc = disc.getLoc(); + + auto mod = disc->getParentOfType(); + cudaq::IRBuilder irb(rewriter.getContext()); + auto iter = regNameMap.find(disc.getOperation()); + assert(iter != regNameMap.end() && "discriminate must be in map"); + // NB: This is thread safe as it should never do an insertion, just a + // lookup. + auto nameObj = irb.genCStringLiteralAppendNul(loc, mod, iter->second); + auto arrI8Ty = mlir::LLVM::LLVMArrayType::get(rewriter.getI8Type(), + iter->second.size() + 1); + auto ptrArrTy = cudaq::cc::PointerType::get(arrI8Ty); + Value nameVal = rewriter.create(loc, ptrArrTy, + nameObj.getName()); + auto cstrTy = cudaq::cc::PointerType::get(rewriter.getI8Type()); + Value nameValCStr = + rewriter.create(loc, cstrTy, nameVal); + + rewriter.create( + loc, std::nullopt, cudaq::opt::QIRRecordOutput, + ValueRange{adaptor.getMeasurement(), nameValCStr}); + if (isAdaptiveProfile) { + std::string funcName = toQisBodyName(std::string("read_result")); + rewriter.replaceOpWithNewOp( + disc, rewriter.getI1Type(), funcName, + ValueRange{adaptor.getMeasurement()}); + } else { + Value undef = + rewriter.create(loc, rewriter.getI1Type()); + rewriter.replaceOp(disc, undef); + } + return success(); + } + +private: + bool isAdaptiveProfile; + DenseMap ®NameMap; +}; + +struct WireSetToProfileQIRPass + : public cudaq::opt::impl::WireSetToProfileQIRBase< + WireSetToProfileQIRPass> { + using WireSetToProfileQIRBase::WireSetToProfileQIRBase; + + void runOnOperation() override { + auto op = getOperation(); + auto *context = &getContext(); + OpBuilder builder(op); + DenseMap regNameMap; + op.walk([&](quake::DiscriminateOp disc) { + auto meas = disc.getMeasurement().getDefiningOp(); + auto name = meas ? meas.getRegisterName() : std::nullopt; + if (name) + regNameMap[disc.getOperation()] = *name; + else + regNameMap[disc.getOperation()] = "?"; + }); + std::optional highestIdentity; + op.walk([&](quake::BorrowWireOp op) { + highestIdentity = highestIdentity + ? std::max(*highestIdentity, op.getIdentity()) + : op.getIdentity(); + }); + if (highestIdentity) + op->setAttr(cudaq::opt::QIRRequiredQubitsAttrName, + builder.getStringAttr(std::to_string(*highestIdentity + 1))); + + RewritePatternSet patterns(context); + QuakeTypeConverter quakeTypeConverter; + unsigned resultCounter = 0; + OutputNamesType resultQubitVals; + patterns.insert, GeneralRewrite, + GeneralRewrite, GeneralRewrite, + GeneralRewrite, GeneralRewrite, + GeneralRewrite, GeneralRewrite, + GeneralRewrite, GeneralRewrite, + GeneralRewrite, GeneralRewrite, + GeneralRewrite, BorrowWireRewrite, + ResetRewrite, ReturnWireRewrite>(quakeTypeConverter, + context); + patterns.insert(quakeTypeConverter, resultCounter, + resultQubitVals, context); + const bool isAdaptiveProfile = convertTo == "qir-adaptive"; + patterns.insert(quakeTypeConverter, isAdaptiveProfile, + regNameMap, context); + ConversionTarget target(*context); + target.addLegalDialect(); + target.addIllegalDialect(); + target.addLegalOp(); + + LLVM_DEBUG(llvm::dbgs() << "Module before:\n"; op.dump()); + if (failed(applyPartialConversion(op, target, std::move(patterns)))) + signalPassFailure(); + + if (resultCounter > 0) { + nlohmann::json resultQubitJSON{resultQubitVals}; + op->setAttr(cudaq::opt::QIROutputNamesAttrName, + builder.getStringAttr(resultQubitJSON.dump())); + } + + if (highestIdentity) + op->setAttr(cudaq::opt::QIRRequiredResultsAttrName, + builder.getStringAttr(std::to_string(resultCounter))); + + LLVM_DEBUG(llvm::dbgs() << "Module after:\n"; op.dump()); + } +}; + +// Runs on the module. Prepare the module for conversion to QIR calls. +// We have to add the declarations of the QIR (QIS) functions and preprocess the +// names of the measurements, adding them to the Module as well as creating them +// when they are absent. +struct WireSetToProfileQIRPrepPass + : public cudaq::opt::impl::WireSetToProfileQIRPrepBase< + WireSetToProfileQIRPrepPass> { + using WireSetToProfileQIRPrepBase::WireSetToProfileQIRPrepBase; + + void runOnOperation() override { + ModuleOp op = getOperation(); + auto *ctx = &getContext(); + + OpBuilder builder(ctx); + builder.setInsertionPointToEnd(op.getBody()); + auto loc = builder.getUnknownLoc(); + + auto createNewDecl = [&](const std::string &name, FunctionType ty) { + auto func = builder.create(loc, name, ty); + func.setPrivate(); + }; + auto addNewDecl = [&](std::string &&suffix, FunctionType ty) { + createNewDecl(std::string(qis_prefix) + std::move(suffix), ty); + }; + auto addBodyDecl = [&](std::string &&name, FunctionType ty) { + addNewDecl(std::move(name) + std::string(qis_body_suffix), ty); + }; + auto addCtlDecl = [&](std::string &&name, FunctionType ty) { + addNewDecl(std::move(name) + std::string(qis_ctl_suffix), ty); + }; + auto addDecls = [&](const char *name, FunctionType bodyTy, + FunctionType ctlTy) { + addBodyDecl(name, bodyTy); + addCtlDecl(name, ctlTy); + }; + + LLVM_DEBUG(llvm::dbgs() << "Module before prep:\n"; op.dump()); + // Insert declarations for all the functions we *may* be using. + auto qbTy = cudaq::opt::getQubitType(ctx); + auto targ1Ty = FunctionType::get(ctx, TypeRange{qbTy}, TypeRange{}); + auto targ1CtrlTy = + FunctionType::get(ctx, TypeRange{qbTy, qbTy}, TypeRange{}); + addDecls("h", targ1Ty, targ1CtrlTy); + addDecls("x", targ1Ty, targ1CtrlTy); + addDecls("y", targ1Ty, targ1CtrlTy); + addDecls("z", targ1Ty, targ1CtrlTy); + addDecls("s", targ1Ty, targ1CtrlTy); + addDecls("t", targ1Ty, targ1CtrlTy); + addDecls("sdg", targ1Ty, targ1CtrlTy); + addDecls("tdg", targ1Ty, targ1CtrlTy); + addBodyDecl("reset", targ1Ty); + + auto f64Ty = builder.getF64Type(); + auto param1Targ1Ty = + FunctionType::get(ctx, TypeRange{f64Ty, qbTy}, TypeRange{}); + auto param1Targ1CtrlTy = + FunctionType::get(ctx, TypeRange{f64Ty, qbTy, qbTy}, TypeRange{}); + addDecls("rx", param1Targ1Ty, param1Targ1CtrlTy); + addDecls("ry", param1Targ1Ty, param1Targ1CtrlTy); + addDecls("rz", param1Targ1Ty, param1Targ1CtrlTy); + addDecls("r1", param1Targ1Ty, param1Targ1CtrlTy); + + auto param2Targ1Ty = + FunctionType::get(ctx, TypeRange{f64Ty, f64Ty, qbTy}, TypeRange{}); + auto param2Targ1CtrlTy = FunctionType::get( + ctx, TypeRange{f64Ty, f64Ty, qbTy, qbTy}, TypeRange{}); + addDecls("phased_rx", param2Targ1Ty, param2Targ1CtrlTy); + + auto param3Targ1Ty = FunctionType::get( + ctx, TypeRange{f64Ty, f64Ty, f64Ty, qbTy}, TypeRange{}); + auto param3Targ1CtrlTy = FunctionType::get( + ctx, TypeRange{f64Ty, f64Ty, f64Ty, qbTy, qbTy}, TypeRange{}); + addDecls("u3", param3Targ1Ty, param3Targ1CtrlTy); + + auto targ2Ty = targ1CtrlTy; + auto targ2CtrlTy = + FunctionType::get(ctx, TypeRange{qbTy, qbTy, qbTy}, TypeRange{}); + addDecls("swap", targ2Ty, targ2CtrlTy); + addBodyDecl("cnot", targ2Ty); + + auto resTy = cudaq::opt::getResultType(ctx); + auto measTy = FunctionType::get(ctx, TypeRange{qbTy, resTy}, TypeRange{}); + addBodyDecl("mz", measTy); + auto readResTy = FunctionType::get(ctx, TypeRange{resTy}, + TypeRange{builder.getI1Type()}); + createNewDecl("__quantum__qis__read_result__body", readResTy); + + auto i8PtrTy = cudaq::cc::PointerType::get(builder.getI8Type()); + auto recordTy = + FunctionType::get(ctx, TypeRange{resTy, i8PtrTy}, TypeRange{}); + createNewDecl(cudaq::opt::QIRRecordOutput, recordTy); + + auto invokeCtrlTy = FunctionType::get( + ctx, TypeRange{builder.getI64Type(), i8PtrTy, qbTy, qbTy}, TypeRange{}); + createNewDecl(cudaq::opt::NVQIRInvokeWithControlBits, invokeCtrlTy); + + unsigned counter = 0; + op.walk([&](quake::MzOp meas) { + auto optName = meas.getRegisterName(); + std::string name; + if (optName) { + name = *optName; + } else { + name = std::to_string(counter++); + constexpr std::size_t padTo = 5; + name = std::string(padTo - std::min(padTo, name.length()), '0') + name; + meas.setRegisterName(name); + } + cudaq::IRBuilder irb(builder); + irb.genCStringLiteralAppendNul(meas.getLoc(), op, name); + }); + cudaq::IRBuilder irb(builder); + irb.genCStringLiteralAppendNul(builder.getUnknownLoc(), op, "?"); + + LLVM_DEBUG(llvm::dbgs() << "Module after prep:\n"; op->dump()); + } +}; + +struct WireSetToProfileQIRPostPass + : public cudaq::opt::impl::WireSetToProfileQIRPostBase< + WireSetToProfileQIRPostPass> { + using WireSetToProfileQIRPostBase::WireSetToProfileQIRPostBase; + + /// Apply required QIR function attributes to the entry-point functions. + void addAttributes(ModuleOp moduleOp, MLIRContext *ctx) { + OpBuilder builder(moduleOp); + + // Build the call graph of the module + CallGraph callGraph(moduleOp); + + // Traverse the module looking for entry-point functions. When one is found, + // consult the call graph to find the highest qubit identity and highest QIR + // result number used in the call graph. + for (Operation &op : moduleOp) { + if (auto funcOp = dyn_cast(op)) { + if (op.hasAttr(cudaq::entryPointAttrName)) { + mlir::CallGraphNode *node = + callGraph.lookupNode(funcOp.getCallableRegion()); + std::optional highestIdentity; + std::optional highestResult; + for (auto it = llvm::df_begin(node), itEnd = llvm::df_end(node); + it != itEnd; ++it) { + if (it->isExternal()) + continue; + auto *callableRegion = it->getCallableRegion(); + auto parentFuncOp = + callableRegion->getParentOfType(); + + if (auto reqQubits = + parentFuncOp->getAttr(cudaq::opt::QIRRequiredQubitsAttrName) + .dyn_cast_or_null()) { + std::uint32_t thisFuncReqQubits = 0; + if (!reqQubits.strref().getAsInteger(10, thisFuncReqQubits)) { + auto thisFuncHighestIdentity = thisFuncReqQubits - 1; + highestIdentity = + highestIdentity + ? std::max(*highestIdentity, thisFuncHighestIdentity) + : thisFuncHighestIdentity; + } + } + + if (auto reqResults = + parentFuncOp + ->getAttr(cudaq::opt::QIRRequiredResultsAttrName) + .dyn_cast_or_null()) { + std::uint32_t thisFuncReqResults = 0; + if (!reqResults.strref().getAsInteger(10, thisFuncReqResults)) { + auto thisFuncHighestResult = thisFuncReqResults - 1; + highestResult = highestResult ? std::max(*highestResult, + thisFuncHighestResult) + : thisFuncHighestResult; + } + } + } // end call graph traversal + + // Apply the final attribute on the entrypoint function + if (highestIdentity) + funcOp->setAttr( + cudaq::opt::QIRRequiredQubitsAttrName, + builder.getStringAttr(std::to_string(*highestIdentity + 1))); + if (highestResult) + funcOp->setAttr( + cudaq::opt::QIRRequiredResultsAttrName, + builder.getStringAttr(std::to_string(*highestResult + 1))); + } + } + } + } + + void runOnOperation() override { + ModuleOp op = getOperation(); + auto *ctx = &getContext(); + addAttributes(op, ctx); + RewritePatternSet patterns(ctx); + QuakeTypeConverter quakeTypeConverter; + patterns.insert(quakeTypeConverter, ctx); + ConversionTarget target(*ctx); + target.addIllegalDialect(); + + LLVM_DEBUG(llvm::dbgs() << "Module before:\n"; op.dump()); + if (failed(applyPartialConversion(op, target, std::move(patterns)))) + signalPassFailure(); + LLVM_DEBUG(llvm::dbgs() << "Module after:\n"; op.dump()); + } +}; +} // namespace + +void cudaq::opt::addWiresetToProfileQIRPipeline(OpPassManager &pm, + StringRef profile) { + pm.addPass(cudaq::opt::createWireSetToProfileQIRPrep()); + WireSetToProfileQIROptions wopt; + if (!profile.empty()) + wopt.convertTo = profile.str(); + pm.addNestedPass(cudaq::opt::createWireSetToProfileQIR(wopt)); + pm.addPass(cudaq::opt::createWireSetToProfileQIRPost()); + // Perform final cleanup for other dialect conversions (like func.func) + pm.addPass(cudaq::opt::createConvertToQIR()); + if (profile.starts_with("qir")) + cudaq::opt::addQIRProfilePipeline(pm, profile, /*performPrep=*/false); +} + +// Pipeline option: let the user specify the profile name. +struct WiresetToProfileQIRPipelineOptions + : public PassPipelineOptions { + PassOptions::Option profile{ + *this, "convert-to", llvm::cl::desc(""), llvm::cl::init("qir-base")}; +}; + +void cudaq::opt::registerWireSetToProfileQIRPipeline() { + PassPipelineRegistration( + "lower-wireset-to-profile-qir", + "Convert quake directly to one of the profiles of QIR.", + [](OpPassManager &pm, const WiresetToProfileQIRPipelineOptions &opt) { + addWiresetToProfileQIRPipeline(pm, opt.profile); + }); +} diff --git a/lib/Optimizer/Dialect/CC/CCOps.cpp b/lib/Optimizer/Dialect/CC/CCOps.cpp index 9bfe2b4e4a..7708a42c36 100644 --- a/lib/Optimizer/Dialect/CC/CCOps.cpp +++ b/lib/Optimizer/Dialect/CC/CCOps.cpp @@ -319,6 +319,9 @@ LogicalResult cudaq::cc::CastOp::verify() { auto iTy2 = cast(outTy); if ((iTy1.getWidth() < iTy2.getWidth()) && !getSint() && !getZint()) return emitOpError("integer extension must be signed or unsigned."); + } else if (isa(inTy) && isa(outTy)) { + // ok: nop + // the indirect callable value is an integer key on the device side. } else if (isa(inTy) && isa(outTy)) { // ok: inttoptr } else if (isa(inTy) && isa(outTy)) { @@ -354,6 +357,9 @@ LogicalResult cudaq::cc::CastOp::verify() { } else if (isa(inTy) && isa(outTy)) { // ok, type conversion of a complex value // NB: use complex.re or complex.im to convert (extract) a fp value. + } else if (isa(inTy) && isa(outTy)) { + // ok, type conversion of a function to an indirect callable + // Folding will remove this. } else { // Could support a bitcast of a float with pointer size bits to/from a // pointer, but that doesn't seem like it would be very common. @@ -1945,6 +1951,56 @@ LogicalResult cudaq::cc::CallCallableOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// CallIndirectCallableOp +//===----------------------------------------------------------------------===// + +LogicalResult cudaq::cc::CallIndirectCallableOp::verify() { + FunctionType funcTy = + cast(getCallee().getType()).getSignature(); + + // Check argument types. + auto argTys = funcTy.getInputs(); + if (argTys.size() != getArgOperands().size()) + return emitOpError("call has incorrect arity"); + for (auto [targArg, argVal] : llvm::zip(argTys, getArgOperands())) + if (targArg != argVal.getType()) + return emitOpError("argument type mismatch"); + + // Check return types. + auto resTys = funcTy.getResults(); + if (resTys.size() != getResults().size()) + return emitOpError("call has incorrect coarity"); + for (auto [targRes, callVal] : llvm::zip(resTys, getResults())) + if (targRes != callVal.getType()) + return emitOpError("result type mismatch"); + return success(); +} + +namespace { +struct MakeDirectCall + : public OpRewritePattern { + using Base = OpRewritePattern; + using Base::Base; + + LogicalResult matchAndRewrite(cudaq::cc::CallIndirectCallableOp indCall, + PatternRewriter &rewriter) const override { + if (auto cast = indCall.getCallee().getDefiningOp()) + if (auto fn = cast.getValue().getDefiningOp()) { + rewriter.replaceOpWithNewOp(indCall, fn, + indCall.getArgs()); + return success(); + } + return failure(); + } +}; +} // namespace + +void cudaq::cc::CallIndirectCallableOp::getCanonicalizationPatterns( + RewritePatternSet &patterns, MLIRContext *context) { + patterns.add(context); +} + //===----------------------------------------------------------------------===// // ConditionOp //===----------------------------------------------------------------------===// diff --git a/lib/Optimizer/Dialect/CC/CCTypes.cpp b/lib/Optimizer/Dialect/CC/CCTypes.cpp index b7c0f3cda4..816695e173 100644 --- a/lib/Optimizer/Dialect/CC/CCTypes.cpp +++ b/lib/Optimizer/Dialect/CC/CCTypes.cpp @@ -177,8 +177,8 @@ CallableType CallableType::getNoSignature(MLIRContext *ctx) { } void CCDialect::registerTypes() { - addTypes(); + addTypes(); } } // namespace cudaq::cc diff --git a/lib/Optimizer/Dialect/Quake/QuakeOps.cpp b/lib/Optimizer/Dialect/Quake/QuakeOps.cpp index a3c6373320..e3d9ab13d1 100644 --- a/lib/Optimizer/Dialect/Quake/QuakeOps.cpp +++ b/lib/Optimizer/Dialect/Quake/QuakeOps.cpp @@ -26,15 +26,6 @@ namespace { #include "cudaq/Optimizer/Dialect/Quake/Canonical.inc" } // namespace -// Is \p op in the Quake dialect? -// TODO: Is this StringRef comparison faster than calling MLIRContext:: -// getLoadedDialect("quake")? -static bool isQuakeOperation(Operation *op) { - if (auto *dialect = op->getDialect()) - return dialect->getNamespace().equals("quake"); - return false; -} - static LogicalResult verifyWireResultsAreLinear(Operation *op) { for (Value v : op->getOpResults()) if (isa(v.getType())) { @@ -506,6 +497,52 @@ LogicalResult quake::ExtractRefOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// GetMemberOp +//===----------------------------------------------------------------------===// + +LogicalResult quake::GetMemberOp::verify() { + std::uint32_t index = getIndex(); + auto strTy = cast(getStruq().getType()); + std::uint32_t size = strTy.getNumMembers(); + if (index >= size) + return emitOpError("invalid index [" + std::to_string(index) + + "] because >= size [" + std::to_string(size) + "]"); + if (getType() != strTy.getMembers()[index]) + return emitOpError("result type does not match member " + + std::to_string(index) + " type"); + return success(); +} + +namespace { +struct BypassMakeStruq : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(quake::GetMemberOp getMem, + PatternRewriter &rewriter) const override { + if (auto makeStruq = + getMem.getStruq().getDefiningOp()) { + auto toStrTy = cast(getMem.getStruq().getType()); + std::uint32_t idx = getMem.getIndex(); + Value from = makeStruq.getOperand(idx); + auto toTy = toStrTy.getMembers()[idx]; + if (from.getType() != toTy) { + rewriter.replaceOpWithNewOp(getMem, toTy, from); + } else { + rewriter.replaceOp(getMem, from); + } + return success(); + } + return failure(); + } +}; +} // namespace + +void quake::GetMemberOp::getCanonicalizationPatterns( + RewritePatternSet &patterns, MLIRContext *context) { + patterns.add(context); +} + //===----------------------------------------------------------------------===// // InitializeStateOp //===----------------------------------------------------------------------===// @@ -576,6 +613,26 @@ void quake::InitializeStateOp::getCanonicalizationPatterns( patterns.add(context); } +//===----------------------------------------------------------------------===// +// MakeStruqOp +//===----------------------------------------------------------------------===// + +LogicalResult quake::MakeStruqOp::verify() { + if (getType().getNumMembers() != getNumOperands()) + return emitOpError("result type has different member count than operands"); + for (auto [ty, opnd] : llvm::zip(getType().getMembers(), getOperands())) { + if (ty == opnd.getType()) + continue; + auto veqTy = dyn_cast(ty); + auto veqOpndTy = dyn_cast(opnd.getType()); + if (veqTy && !veqTy.hasSpecifiedSize() && veqOpndTy && + veqOpndTy.hasSpecifiedSize()) + continue; + return emitOpError("member type not compatible with operand type"); + } + return success(); +} + //===----------------------------------------------------------------------===// // RelaxSizeOp //===----------------------------------------------------------------------===// diff --git a/lib/Optimizer/Dialect/Quake/QuakeTypes.cpp b/lib/Optimizer/Dialect/Quake/QuakeTypes.cpp index 606baed123..959a869fe5 100644 --- a/lib/Optimizer/Dialect/Quake/QuakeTypes.cpp +++ b/lib/Optimizer/Dialect/Quake/QuakeTypes.cpp @@ -39,7 +39,7 @@ void quake::VeqType::print(AsmPrinter &os) const { Type quake::VeqType::parse(AsmParser &parser) { if (parser.parseLess()) return {}; - std::size_t size; + std::size_t size = 0; if (succeeded(parser.parseOptionalQuestion())) size = 0; else if (parser.parseInteger(size)) @@ -58,6 +58,42 @@ quake::VeqType::verify(llvm::function_ref emitError, //===----------------------------------------------------------------------===// +Type quake::StruqType::parse(AsmParser &parser) { + if (parser.parseLess()) + return {}; + std::string name; + auto *ctx = parser.getContext(); + StringAttr nameAttr; + if (succeeded(parser.parseOptionalString(&name))) { + nameAttr = StringAttr::get(ctx, name); + if (parser.parseColon()) + return {}; + } + SmallVector members; + do { + Type member; + auto optTy = parser.parseOptionalType(member); + if (!optTy.has_value()) + break; + if (!succeeded(*optTy)) + return {}; + members.push_back(member); + } while (succeeded(parser.parseOptionalComma())); + if (parser.parseGreater()) + return {}; + return quake::StruqType::get(ctx, nameAttr, members); +} + +void quake::StruqType::print(AsmPrinter &printer) const { + printer << '<'; + if (getName()) + printer << getName() << ": "; + llvm::interleaveComma(getMembers(), printer); + printer << '>'; +} + +//===----------------------------------------------------------------------===// + void quake::QuakeDialect::registerTypes() { - addTypes(); + addTypes(); } diff --git a/lib/Optimizer/Transforms/AggressiveEarlyInlining.cpp b/lib/Optimizer/Transforms/AggressiveEarlyInlining.cpp index 73fd980c9e..f359412876 100644 --- a/lib/Optimizer/Transforms/AggressiveEarlyInlining.cpp +++ b/lib/Optimizer/Transforms/AggressiveEarlyInlining.cpp @@ -26,15 +26,15 @@ namespace cudaq::opt { using namespace mlir; -static bool isIndirectFunc(llvm::StringRef funcName, - llvm::StringMap indirectMap) { +static bool isIndirectFunc(StringRef funcName, + llvm::StringMap indirectMap) { return indirectMap.find(funcName) != indirectMap.end(); } // Return the inverted mangled name map. -static std::optional> +static std::optional> getConversionMap(ModuleOp module) { - llvm::StringMap result; + llvm::StringMap result; if (auto mangledNameMap = module->getAttrOfType( cudaq::runtime::mangledNameMap)) { for (auto namedAttr : mangledNameMap) { @@ -53,25 +53,31 @@ namespace { /// dialect calls and callables as well.] class RewriteCall : public OpRewritePattern { public: - RewriteCall(MLIRContext *ctx, llvm::StringMap &indirectMap) - : OpRewritePattern(ctx), indirectMap(indirectMap) {} + RewriteCall(MLIRContext *ctx, llvm::StringMap &indirectMap, + ModuleOp m) + : OpRewritePattern(ctx), indirectMap(indirectMap), module(m) {} - LogicalResult matchAndRewrite(func::CallOp op, + LogicalResult matchAndRewrite(func::CallOp call, PatternRewriter &rewriter) const override { - if (!isIndirectFunc(op.getCallee(), indirectMap)) + if (!isIndirectFunc(call.getCallee(), indirectMap)) return failure(); - rewriter.startRootUpdate(op); - auto callee = op.getCallee(); - llvm::StringRef directName = indirectMap[callee]; - op.setCalleeAttr(SymbolRefAttr::get(op.getContext(), directName)); + auto callee = call.getCallee(); + StringRef directName = indirectMap[callee]; + auto *ctx = rewriter.getContext(); + auto loc = call.getLoc(); + auto funcTy = call.getCalleeType(); + cudaq::opt::factory::getOrAddFunc(loc, directName, funcTy, module); + rewriter.startRootUpdate(call); + call.setCalleeAttr(SymbolRefAttr::get(ctx, directName)); + rewriter.finalizeRootUpdate(call); LLVM_DEBUG(llvm::dbgs() << "Rewriting " << directName << '\n'); - rewriter.finalizeRootUpdate(op); return success(); } private: - llvm::StringMap &indirectMap; + llvm::StringMap &indirectMap; + ModuleOp module; }; /// Translate indirect calls to direct calls. @@ -81,14 +87,13 @@ class ConvertToDirectCalls using ConvertToDirectCallsBase::ConvertToDirectCallsBase; void runOnOperation() override { - auto op = getOperation(); + ModuleOp module = getOperation(); auto *ctx = &getContext(); - auto module = op->template getParentOfType(); if (auto indirectMapOpt = getConversionMap(module)) { - LLVM_DEBUG(llvm::dbgs() << "Processing: " << op << '\n'); + LLVM_DEBUG(llvm::dbgs() << "Processing: " << module << '\n'); RewritePatternSet patterns(ctx); - patterns.insert(ctx, *indirectMapOpt); - if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) + patterns.insert(ctx, *indirectMapOpt, module); + if (failed(applyPatternsAndFoldGreedily(module, std::move(patterns)))) signalPassFailure(); } } @@ -136,7 +141,7 @@ static void defaultInlinerOptPipeline(OpPassManager &pm) { /// graph. void cudaq::opt::addAggressiveEarlyInlining(OpPassManager &pm) { llvm::StringMap opPipelines; - pm.addNestedPass(cudaq::opt::createConvertToDirectCalls()); + pm.addPass(cudaq::opt::createConvertToDirectCalls()); pm.addPass(createInlinerPass(opPipelines, defaultInlinerOptPipeline)); pm.addNestedPass(cudaq::opt::createCheckKernelCalls()); } diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index 7a0aaa4cb6..a6b94d9a59 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -41,6 +41,7 @@ add_cudaq_library(OptTransforms LowerUnwind.cpp Mapping.cpp MemToReg.cpp + DependencyAnalysis.cpp MultiControlDecomposition.cpp ObserveAnsatz.cpp PruneCtrlRelations.cpp @@ -64,3 +65,8 @@ add_cudaq_library(OptTransforms OptimBuilder QuakeDialect ) + +target_include_directories(OptTransforms SYSTEM + PRIVATE ${CMAKE_SOURCE_DIR}/tpls/eigen + PRIVATE ${CMAKE_SOURCE_DIR}/runtime +) diff --git a/lib/Optimizer/Transforms/DecompositionPatterns.cpp b/lib/Optimizer/Transforms/DecompositionPatterns.cpp index 3849329843..54ee23741f 100644 --- a/lib/Optimizer/Transforms/DecompositionPatterns.cpp +++ b/lib/Optimizer/Transforms/DecompositionPatterns.cpp @@ -335,18 +335,51 @@ struct ExpPauliDecomposition : public OpRewritePattern { LogicalResult matchAndRewrite(quake::ExpPauliOp expPauliOp, PatternRewriter &rewriter) const override { auto loc = expPauliOp.getLoc(); + auto module = expPauliOp->getParentOfType(); auto qubits = expPauliOp.getQubits(); auto theta = expPauliOp.getParameter(); auto pauliWord = expPauliOp.getPauli(); + std::optional optPauliWordStr; + if (auto defOp = + pauliWord.getDefiningOp()) { + optPauliWordStr = defOp.getStringLiteral(); + } else { + // Get the pauli word string from a constant global string generated + // during argument synthesis. + auto stringOp = expPauliOp.getOperand(2); + auto stringTy = stringOp.getType(); + if (auto charSpanTy = dyn_cast(stringTy)) { + if (auto vecInit = stringOp.getDefiningOp()) { + auto addrOp = vecInit.getOperand(0); + if (auto cast = addrOp.getDefiningOp()) + addrOp = cast.getOperand(); + if (auto addr = addrOp.getDefiningOp()) { + auto globalName = addr.getGlobalName(); + auto symbol = module.lookupSymbol(globalName); + if (auto global = dyn_cast(symbol)) { + auto attr = global.getValue(); + auto strAttr = cast(attr.value()); + optPauliWordStr = strAttr.getValue(); + } + } + } + } + } + // Assert that we have a constant known pauli word - auto defOp = pauliWord.getDefiningOp(); - if (!defOp) + if (!optPauliWordStr.has_value()) return failure(); + auto pauliWordStr = optPauliWordStr.value(); + + // Remove optional last zero character + auto size = pauliWordStr.size(); + if (size > 0 && pauliWordStr[size - 1] == '\0') + size--; + SmallVector qubitSupport; - StringRef pauliWordStr = defOp.getStringLiteral(); - for (std::size_t i = 0; i < pauliWordStr.size(); i++) { + for (std::size_t i = 0; i < size; i++) { Value index = rewriter.create(loc, i, 64); Value qubitI = rewriter.create(loc, qubits, index); if (pauliWordStr[i] != 'I') diff --git a/lib/Optimizer/Transforms/DependencyAnalysis.cpp b/lib/Optimizer/Transforms/DependencyAnalysis.cpp new file mode 100644 index 0000000000..eb80d2991c --- /dev/null +++ b/lib/Optimizer/Transforms/DependencyAnalysis.cpp @@ -0,0 +1,3280 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "PassDetails.h" +#include "cudaq/Frontend/nvqpp/AttributeNames.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" +#include "cudaq/Optimizer/Transforms/Passes.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/Passes.h" + +#define DEBUG_TYPE "dep-analysis" + +using namespace mlir; + +#define RAW(X) quake::X +#define RAW_MEASURE_OPS MEASURE_OPS(RAW) +#define RAW_GATE_OPS GATE_OPS(RAW) +#define RAW_QUANTUM_OPS QUANTUM_OPS(RAW) + +//===----------------------------------------------------------------------===// +// Generated logic +//===----------------------------------------------------------------------===// +namespace cudaq::opt { +#define GEN_PASS_DEF_DEPENDENCYANALYSIS +#include "cudaq/Optimizer/Transforms/Passes.h.inc" +} // namespace cudaq::opt + +namespace { +// TODO: Someday, it would probably make sense to make VirtualQIDs and +// PhysicalQIDs be data structures with metadata, not just integer +// identifiers. Some useful metadata would include the lifetime, +// which graph they belong to, where the DependencyNode representing +// their allocation is, etc... + +/// A `PhysicalQID` is an index that will be used when generating +/// `quake.borrow_wire`s. It represents a physical wire. +typedef std::size_t PhysicalQID; + +/// A `VirtualQID` is a unique identifier for a virtual wire. +/// It is a handy way to refer to a specific virtual wire. +typedef std::size_t VirtualQID; + +/// Given a `quake` operation and an result index for a wire result, +/// returns the corresponding operand index for the wire input. +std::size_t getOperandIDXFromResultIDX(std::size_t resultidx, Operation *op) { + // The results for a measure are `(!quake.measure, !quake.wire)` + if (isa(op)) + return 0; + // Currently, all classical operands precede all quantum operands + for (auto type : op->getOperandTypes()) { + if (!quake::isQuantumType(type)) + resultidx++; + else + break; + } + return resultidx; +} + +/// Given a `quake` operation and an operand index for a wire input, +/// returns the corresponding result index for the wire result. +/// This is almost the inverse of `getOperandIDXFromResultIDX`, +/// and is the inverse if `quake.measure` results are ignored. +std::size_t getResultIDXFromOperandIDX(std::size_t operand_idx, Operation *op) { + // The results for a measure are `(!quake.measure, !quake.wire)` + if (isa(op)) + return 1; + std::size_t numPrecedingClassical = 0; + for (auto type : op->getOperandTypes()) { + if (!quake::isQuantumType(type)) + numPrecedingClassical++; + else + break; + } + + // Verify that all classical operands precede all quantum operands + assert(numPrecedingClassical + op->getNumResults() == op->getNumOperands()); + assert(operand_idx >= numPrecedingClassical && "invalid operand index"); + return operand_idx - numPrecedingClassical; +} + +/// Represents a qubit lifetime from the first cycle it is in use +/// to the last cycle it is in use (inclusive). +class LifeTime { +protected: + unsigned begin; + unsigned end; + +public: + LifeTime(unsigned begin, unsigned end) : begin(begin), end(end) { + assert(end >= begin && "invalid lifetime"); + }; + + /// Returns true if \p this is entirely after \p other + bool isAfter(LifeTime other) { return begin > other.end; } + + bool isOverlapping(LifeTime other) { + return !isAfter(other) && !other.isAfter(*this); + } + + /// Calculates the distance between \p this and \p other, + /// in terms of the # of cycles between the end of the earlier + /// LifeTime and the beginning of the later LifeTime. + /// Returns 0 if the LifeTimes overlap. + unsigned distance(LifeTime other) { + if (isOverlapping(other)) + return 0; + return std::max(begin, other.begin) - std::min(end, other.end); + } + + /// Modifies \p this LifeTime to be inclusive of \p other + /// and any cycles between \p this and \p other. + void combine(LifeTime other) { + begin = std::min(begin, other.begin); + end = std::max(end, other.end); + } + + unsigned getBegin() { return begin; } + unsigned getEnd() { return end; } +}; + +/// Contains LifeTime information for allocating physical qubits for +/// VirtualQIDs. +class LifeTimeAnalysis { +private: + SmallVector> lifetimes; + + /// Given a candidate lifetime, tries to find a qubit to reuse, + /// minimizing the distance between the lifetime of the existing + /// qubit and \p lifetime, and otherwise allocates a new qubit + PhysicalQID allocatePhysical(LifeTime lifetime) { + std::optional best_reuse = std::nullopt; + std::optional empty = std::nullopt; + unsigned best_distance = INT_MAX; + + for (unsigned i = 0; i < lifetimes.size(); i++) { + if (!lifetimes[i]) { + empty = i; + continue; + } + + auto other = lifetimes[i].value(); + auto distance = lifetime.distance(other); + if (!lifetime.isOverlapping(other) && distance < best_distance) { + best_reuse = i; + best_distance = distance; + } + } + + // Reuse a qubit based on its lifetime in the same scope + if (best_reuse) { + auto physical = best_reuse.value(); + lifetimes[physical]->combine(lifetime); + return physical; + } + + // Reuse a qubit without a lifetime (used in a different frame) + if (empty) { + auto physical = empty.value(); + lifetimes[physical] = lifetime; + return physical; + } + + // Fall back: allocate a new qubit + lifetimes.push_back(lifetime); + return lifetimes.size() - 1; + } + +public: + LifeTimeAnalysis() : lifetimes() {} + + /// Given a candidate lifetime, tries to find a qubit to reuse, + /// minimizing the distance between the lifetime of the existing + /// qubit and \p lifetime, and otherwise allocates a new qubit. + PhysicalQID allocatePhysical(VirtualQID qid, LifeTime lifetime) { + auto phys = allocatePhysical(lifetime); + return phys; + } + + /// Clears the lifetime information (erasing in-use lifetimes), + /// and returns a set of all physical qubits currently in use. + /// + /// This is meant to be called by an `IfDependencyNode` after + /// performing qubit allocation in the inner blocks, so that the + /// inner blocks can perform qubit allocation in a clean state, but + /// the parent `IfDependencyNode` can capture the inner allocation + /// information. + SetVector clearFrame() { + SetVector frame; + for (uint i = 0; i < lifetimes.size(); i++) { + if (lifetimes[i]) { + frame.insert(i); + lifetimes[i] = std::nullopt; + } + } + return frame; + } + + /// Sets the lifetime for \p phys to \p lifetime, essentially + /// reallocating \p phys (used by an `IfDependencyNode` to + /// mark a qubit as in use for the entirety of the `if`). + void reallocatePhysical(PhysicalQID phys, LifeTime lifetime) { + assert(phys < lifetimes.size() && "Illegal qubit to reallocate!"); + assert(!lifetimes[phys] && "Cannot reallocate qubit still allocated!"); + lifetimes[phys] = lifetime; + } + + std::size_t getCount() { return lifetimes.size(); } + + void dump() { + llvm::outs() << "# qubits: " << getCount() << ", cycles: "; + for (std::size_t i = 0; i < lifetimes.size(); i++) + if (lifetimes[i]) + llvm::outs() << lifetimes[i].value().getBegin() << " - " + << lifetimes[i].value().getEnd() << " "; + else + llvm::outs() << "unused "; + llvm::outs() << "\n"; + } +}; + +class DependencyGraph; + +/// A DependencyNode represents an MLIR value or operation with attached +/// metadata. Most importantly, it captures dependency relations between quake +/// operations on wires, which is used for scheduling, lifetime analysis, +/// allocating physical qubits, lifting optimizations, and code generation. +/// +/// There is a family of DependencyNodes, based on what types of MLIR +/// values/operations they represent: The most common type of DependencyNode, +/// the OpDependencyNode, represents a quantum gate operation, or a classical +/// operation. ArgDependencyNode represents a block argument. InitDependencyNode +/// and RootDependencyNode represent the allocation/de-allocation of a quake +/// wire, respectively. A TerminatorDependencyNode represents a block +/// terminator, so a bit of care must be taken to ensure that it is always the +/// last operation in a block during code generation. An IfDependencyNode +/// represents an if, and therefore contains information about the then and else +/// blocks. An IfDependencyNode is treated as a "rectangle" by the analysis, +/// where the analysis of the outside scope does not look inside the `if` +/// (though optimizations are free to do so, as long as the maintain the +/// boundary afterwards). Finally, a ShadowDependencyNode represents a +/// dependency on a quantum-dependent classical value from a higher scope than +/// the operation that depends on it. This is necessary to ensure that the `if` +/// the dependent operation is in depends on the classical value, and the +/// operation inside the `if` can instead depend on the shadow dependency from +/// the `if`, ensuring that the boundaries of the `if` are properly maintained. +/// +/// There are three types of "containers" for DependencyNodes: +/// A DependencyBlock represents an MLIR block, with ArgDependencyNodes +/// representing the linear block arguments (quake wires), a DependencyGraph +/// representing the block's body, and the terminator for the block. A +/// DependencyGraph is a DAG consisting of DependencyNodes somehow related by +/// interaction. It contains useful metadata and functions for reasoning about +/// and manipulating the DAG. Finally, an IfDependencyNode contains a +/// DependencyBlock each for the then and else branches. +class DependencyNode { + // DependencyGraph performs manipulations/analyses over DependencyNodes + friend class DependencyGraph; + // Needs access to successors/dependencies for various uses + friend class OpDependencyNode; + // Needs access to successors/dependencies for lifting/lowering + friend class IfDependencyNode; + +public: + /// A DependencyEdge is a dependency on a specific result from a specific + /// node. It also contains useful metadata, such as the wire qid/qubit the + /// edge represents (if applicable), and the underlying MLIR value the edge + /// represents (through `getValue`). + struct DependencyEdge { + public: + DependencyNode *node; + // If a given dependency appears multiple times, + // (e.g., multiple results of the dependency are used by this node), + // it is important to know which result from the dependency + // corresponds to which operand. + // Otherwise, the dependency will be code gen'ed first, and it will + // be impossible to know (e.g., which result is a control and which is a + // target). Resultidx tracks this information. + std::size_t resultidx; + std::optional qid; + std::optional qubit; + + DependencyEdge() : node(nullptr), resultidx(INT_MAX), qid(std::nullopt) {} + + DependencyEdge(DependencyNode *node, std::size_t resultidx) + : node(node), resultidx(resultidx) { + assert(node && "DependencyEdge: node cannot be null"); + qid = node->getQIDForResult(resultidx); + } + + /// Returns the underlying DependencyNode * without attached metadata + DependencyNode *operator->() { return node; } + + /// Returns the value represented by this DependencyEdge + Value getValue() { return node->getResult(resultidx); } + }; + +protected: + // Currently, successors are unordered, as any operation with non-linear + // results (e.g., `mz`) does not have a known # of successors. + SetVector successors; + // Dependencies are in the order of operands, this ordering is relied upon in + // the optimizations and during code generation + SmallVector dependencies; + // The set of virtual wires flowing through this node. + // TODO: it would probably make sense to have a similar tracking of physical + // wires here. + SetVector qids; + std::optional cycle = std::nullopt; + bool hasCodeGen = false; + unsigned height = 0; + + virtual void dumpNode() = 0; + + void dumpSubGraph(int tabIndex) { + for (int i = 0; i < tabIndex; i++) { + llvm::outs() << "\t"; + } + + dumpNode(); + + for (auto dependency : dependencies) + dependency->dumpSubGraph(tabIndex + 1); + } + + /// Returns the MLIR value representing the result of this node at \p + /// resultidx + virtual Value getResult(unsigned resultidx) = 0; + + /// Returns a name for the node to use for checking equivalence. + // TODO: this is currently a little hacky and could be done a little better by + // adding say an "equivalent node" function and overloading. For example, + // block arguments can be checked by arg number, but currently the arg number + // is part of the OpName for them. Allocs do have qid/qubit alloc info checked + // explicitly in an overload of prefixEquivalent. Arithmetic constants are + // handled by adding the constant value to the string, very inefficient... + virtual std::string getOpName() = 0; + + /// Generates quake code for this node at the current insertion point in \p + /// builder + virtual void codeGen(OpBuilder &builder) = 0; + + /// Recalculates the height of this node + virtual void updateHeight() { + height = 0; + for (auto edge : dependencies) { + if (edge->getHeight() > height) + height = edge->getHeight(); + } + height += numTicks(); + } + +public: + DependencyNode() : successors(), dependencies({}), qids({}), height(0) {} + + virtual ~DependencyNode(){}; + + /// Returns true if \p this is a graph root (has no successors, e.g., a wire + /// de-alloc) + virtual bool isRoot() { return successors.empty(); }; + /// Returns true if \p this is a graph leaf (has no dependencies, e.g., a wire + /// alloc) + virtual bool isLeaf() { return dependencies.empty(); }; + /// Returns true if \p this is not an operation which has an associated cycle + /// cost + virtual bool isSkip() { return numTicks() == 0; }; + /// Returns true if the associated value/operation is a quantum + /// value/operation + virtual bool isQuantumOp() = 0; + /// Returns the number of cycles this node takes + virtual unsigned numTicks() = 0; + /// Returns true if and only if this is an InitDependencyNode + virtual bool isAlloc() { return false; } + /// Returns the height of this dependency node, based on the # of cycles it + /// will take and the heights of its dependencies + unsigned getHeight() { return height; }; + /// Prints this node and its dependencies to llvm::outs() + void dump() { dumpSubGraph(0); } + /// Returns true if this node is a quantum operation/value or has a quantum + /// operation as an ancestor. In fact, after inlining, Canonicalization, and + /// CSE, this should only returns false for arithmetic constants. + virtual bool isQuantumDependent() { + if (isQuantumOp()) + return true; + for (auto dependency : dependencies) + if (dependency->isQuantumDependent()) + return true; + return false; + }; + /// Returns true if this node contains more dependency nodes inside of it + /// (currently, this is only true of `IfDependencyNode`s). + virtual bool isContainer() { return false; } + + /// Returns the index of the dependency for \p qid in this node, if such an + /// index exists + std::optional getDependencyForQID(VirtualQID qid) { + for (unsigned i = 0; i < dependencies.size(); i++) + if (dependencies[i].qid == qid) + return std::optional(i); + + return std::nullopt; + } + + /// Returns the immediate successor node for the wire represented by \p qid + /// + /// This function assumes that wires are linear types with only one use. + /// Otherwise, this function would need to return a list of successors, + /// and any users of this function would need to handle all the returned + /// successors. + virtual DependencyNode *getSuccessorForQID(VirtualQID qid) { + assert(qids.contains(qid) && + "Asking for a qid that doesn't flow through this operation!"); + for (auto successor : successors) { + // Special case: ignore patch discriminate for a measure + if (!successor->isQuantumOp()) + continue; + + auto idx = successor->getDependencyForQID(qid); + // If the successor has a dependency for the given QID, ensure that the + // dependency is actually on this node, otherwise the QID flows through + // a different successor first, so this isn't the successor we're looking + // for + if (idx && successor->dependencies[idx.value()].node == this) + return successor; + } + + assert(false && "Couldn't find successor for linear type!"); + } + + /// Recursively find nodes scheduled at a given cycle + SetVector + getNodesAtCycle(unsigned _cycle, SetVector &seen) { + SetVector nodes; + + if (seen.contains(this)) + return nodes; + + seen.insert(this); + + if (!isSkip()) { + assert(cycle.has_value() && "Trying to use cycle of unscheduled node"); + + if (cycle.value() < _cycle) + return nodes; + else if (cycle.value() == _cycle) { + nodes.insert(this); + return nodes; + } + } + + for (auto dependency : dependencies) + nodes.set_union(dependency->getNodesAtCycle(_cycle, seen)); + + return nodes; + } + + /// Returns true if \p this and \p other are equivalent nodes with equivalent + /// dependencies. + virtual bool prefixEquivalentTo(DependencyNode *other) { + if (getOpName() != other->getOpName()) + return false; + if (height != other->height) + return false; + if (dependencies.size() != other->dependencies.size()) + return false; + for (unsigned i = 0; i < dependencies.size(); i++) { + if (dependencies[i].qid != other->dependencies[i].qid) { + if (!dependencies[i].qubit.has_value()) + return false; + if (dependencies[i].qubit != other->dependencies[i].qubit) + return false; + } + // TODO: I think the above nested check should be the same as in + // postfixEquivalentTo, as in the following: + /* + if (dependencies[i].qubit != other->dependencies[i].qubit) + return false; + if (dependencies[i].qid != other->dependencies[i].qid) + if (dependencies[i].qubit.has_value() || + other->dependencies[i].qubit.has_value()) + return false;*/ + if (!dependencies[i].node->prefixEquivalentTo( + other->dependencies[i].node)) + return false; + } + return true; + } + + /// Returns true if \p this and \p other are equivalent nodes with equivalent + /// input wires, but without looking at dependencies. + /// TODO: Currently, this does not handle classical values, which it should + /// really return false for as a first approximation. + /// TODO: Arithmetic constants, aka DependencyNodes where + /// `isQuantumDependent()` is false, can be tested for equivalence, + /// however, testing quantum dependent values for equivalence is really + /// difficult in general, unless the classical values are actually + /// shadowed values from a higher scope. + virtual bool postfixEquivalentTo(DependencyNode *other) { + if (getOpName() != other->getOpName()) + return false; + if (dependencies.size() != other->dependencies.size()) + return false; + for (unsigned i = 0; i < dependencies.size(); i++) { + if (dependencies[i].qubit != other->dependencies[i].qubit) + return false; + if (dependencies[i].qid != other->dependencies[i].qid) + if (dependencies[i].qubit.has_value() || + other->dependencies[i].qubit.has_value()) + return false; + } + return true; + } + + /// Returns the qubits that flow through this instruction + virtual SetVector getQubits() { + return SetVector(); + } + + /// Replaces every dependency on this node with the DependencyEdge \p other. + /// This should only be used if it is known that any other node will have + /// exactly one dependency on this node (the only real way to guarantee this + /// is if this node has only one result). + void replaceWith(DependencyEdge other) { + for (auto successor : successors) { + for (auto &dependency : successor->dependencies) { + if (dependency.node == this) { + dependency = other; + other->successors.remove(this); + other->successors.insert(successor); + } + } + } + } + + /// Recursively updates the dependency edges for \p qid to use \p qubit for + /// this node and successors + virtual void updateWithPhysical(VirtualQID qid, PhysicalQID qubit) { + for (auto &dependency : dependencies) { + if (dependency.qid && dependency.qid == qid) { + dependency.qubit = qubit; + break; + } + } + + for (auto successor : successors) + if (successor->qids.contains(qid)) + successor->updateWithPhysical(qid, qubit); + } + + /// Recursively replaces \p old_qid with \p new_qid for this node and its + /// successors + virtual void updateQID(VirtualQID old_qid, VirtualQID new_qid) { + qids.remove(old_qid); + qids.insert(new_qid); + + auto idx = getDependencyForQID(old_qid); + + if (idx) + dependencies[idx.value()].qid = new_qid; + + for (auto successor : successors) + if (successor->qids.contains(old_qid)) + successor->updateQID(old_qid, new_qid); + } + + /// If a wire's first and last use inside a block is in an `if`, move the + /// alloc/de-alloc into both the then and else blocks (separately) of the + /// `if`, which will make the lifetime analysis more accurate and may provide + /// additional lifting opportunities. If the wire is not used in the then or + /// else branch it is deleted after being moved in. + /// + /// This function works recursively outside-in, so wires are moved in + /// (contracted) as far as possible. + /// + /// This function should only be called on DependencyNodes where + /// `isContainer()` is true. + virtual void contractAllocsPass(unsigned &next_qid) { + assert(false && + "contractAllocPass can only be called on an IfDependencyNode"); + } + + /// Given a virtual wire and corresponding alloc/de-alloc nodes from a parent + /// scope, moves the virtual wire into the then and else blocks (separately) + /// of the `if`. + /// + /// This is used by contractAllocsPass, when `this->isContainer()` is true and + /// this node is the first and last use of the virtual wire in the parent + /// scope. + virtual void lowerAlloc(DependencyNode *init, DependencyNode *root, + VirtualQID alloc, unsigned &next_qid) { + assert(false && "lowerAlloc can only be called on an IfDependencyNode"); + } + + /// Recursively schedules nodes and performs lifetime analysis to allocate + /// physical qubits for virtual wires, working inside out. For + /// `IfDependencyNode`s, this means combining the physical qubit allocations + /// of the then and else blocks, and then performing lifting optimizations, + /// where common operations in the then and else blocks are lifted to the + /// graph containing the `if`, hence the need to pass \p parent_graph + /// + /// This function should only be called on DependencyNodes where isContainer() + /// is true. + virtual void performAnalysis(LifeTimeAnalysis &set, + DependencyGraph *parent_graph) { + assert(false && + "performAnalysis can only be called on an IfDependencyNode"); + } + + /// Remove this dependency node from the path for \p qid by replacing + /// successor dependencies on \p qid with the relevant dependency from this + /// node. + virtual void eraseEdgeForQID(VirtualQID qid) = 0; + + virtual std::optional getQIDForResult(std::size_t resultidx) = 0; +}; + +/// An InitDependencyNode represents an allocation of virtual wire or physical +/// qubit (more concretely, a `quake.borrow_wire`). This node will always be a +/// leaf node, as it will never have dependencies. +/// TODO: The reason it doesn't derive from OpDependencyNode is historical and +/// doesn't apply anymore, but it's also not really clear that there would be +/// any benefit to deriving since it overloads a lot of the functions anyway. +class InitDependencyNode : public DependencyNode { +protected: + Value wire; + std::optional qubit = std::nullopt; + + void dumpNode() override { + llvm::outs() << "Initial value for QID " << getQID(); + if (qubit) + llvm::outs() << " -> phys: " << qubit.value(); + llvm::outs() << ": "; + wire.dump(); + } + + Value getResult(unsigned resultidx) override { + assert(resultidx == 0 && "Illegal resultidx"); + return wire; + } + + std::string getOpName() override { return "init"; }; + + /// Generates quake code for this node at the current insertion point in \p + /// builder + void codeGen(OpBuilder &builder) override { + assert(qubit.has_value() && "Trying to codeGen a virtual allocation " + "without a physical qubit assigned!"); + auto wirety = quake::WireType::get(builder.getContext()); + auto alloc = builder.create( + builder.getUnknownLoc(), wirety, + cudaq::opt::topologyAgnosticWiresetName, qubit.value()); + wire = alloc.getResult(); + hasCodeGen = true; + } + +public: + InitDependencyNode(quake::BorrowWireOp op) : wire(op.getResult()) { + // Lookup qid from op + auto qid = op.getIdentity(); + qids.insert(qid); + }; + + // "Allocation" occurs statically in the base and adaptive profiles, so takes + // no cycles + unsigned numTicks() override { return 0; } + bool isQuantumOp() override { return true; } + + /// Returns the qid for the virtual wire this node allocates + VirtualQID getQID() { return qids.front(); } + + /// Returns the qubit for the physical wire this node allocates if assigned + std::optional getQubit() { return qubit; } + + ~InitDependencyNode() override {} + + bool isAlloc() override { return true; } + + /// Assigns the physical qubit \p phys to this virtual wire, recursively + /// updating all dependencies on this node + void assignToPhysical(PhysicalQID phys) { + qubit = phys; + updateWithPhysical(getQID(), phys); + } + + bool prefixEquivalentTo(DependencyNode *other) override { + if (!other->isAlloc()) + return false; + + auto other_init = static_cast(other); + + // Two allocations are equivalent if they represent the same physical qubit. + // TODO: with qids now being unique, this test can refer to qids if qubits + // are not yet assigned (or even pointer equivalence in the meantime). + // However, since allocations are currently always lifted, it does not come + // up currently. + return qubit && other_init->qubit && qubit == other_init->qubit; + } + + void eraseEdgeForQID(VirtualQID qid) override { + assert(false && "Can't call eraseEdgeForQID with an InitDependencyNode"); + } + + SetVector getQubits() override { + SetVector qubits; + if (qubit) + qubits.insert(qubit.value()); + return qubits; + } + + std::optional getQIDForResult(std::size_t resultidx) override { + assert(resultidx == 0 && "Invalid resultidx"); + return std::optional(getQID()); + } +}; + +/// An OpDependencyNode represents a quantum or classical operation. +class OpDependencyNode : public DependencyNode { + friend class IfDependencyNode; + friend class ShadowDependencyNode; + +protected: + Operation *associated; + bool quantumOp; + + virtual void dumpNode() override { + llvm::outs() << "QIDs: "; + bool printComma = false; + for (auto qid : qids) { + if (printComma) + llvm::outs() << ", "; + llvm::outs() << qid; + printComma = true; + } + if (cycle.has_value()) + llvm::outs() << " @ " << cycle.value(); + llvm::outs() << " | " << height << ", " << numTicks() << " | "; + associated->dump(); + } + + /// Returns the MLIR value representing the result of this node at \p + /// resultidx + Value getResult(unsigned resultidx) override { + return associated->getResult(resultidx); + } + + /// Returns a name for the node to use for checking equivalence. + // TODO: this is currently a little hacky and could be done a little better by + // adding say an "equivalent node" function and overloading. For example, + // block arguments can be checked by arg number, but currently the arg number + // is part of the OpName for them. Allocs do have qid/qubit alloc info checked + // explicitly in an overload of prefixEquivalent. Arithmetic constants are + // handled by adding the constant value to the string, very inefficient... + std::string getOpName() override { + if (isa(associated)) { + if (auto cstf = dyn_cast(associated)) { + auto value = cstf.getValue().cast().getValueAsDouble(); + return std::to_string(value); + } else if (auto cstidx = dyn_cast(associated)) { + auto value = cstidx.getValue().cast().getInt(); + return std::to_string(value); + } else if (auto cstint = dyn_cast(associated)) { + auto value = cstint.getValue().cast().getInt(); + return std::to_string(value); + } + } + return associated->getName().getStringRef().str(); + }; + + /// A helper to gather the MLIR values that will be used as operands for this + /// operation when generating code, based on the dependencies of this node. + SmallVector gatherOperands(OpBuilder &builder) { + SmallVector operands(dependencies.size()); + for (std::size_t i = 0; i < dependencies.size(); i++) { + auto dependency = dependencies[i]; + + // Ensure classical values are available and that any allocs are added + if (dependency->isSkip()) + dependency->codeGen(builder); + + assert(dependency->hasCodeGen && + "Generating code for successor before dependency"); + + // Get relevant result from dependency's updated op + // to use as the relevant operand + operands[i] = dependency->getResult(dependency.resultidx); + } + + return operands; + } + + /// A helper to generate the quake code for this operation + virtual void genOp(OpBuilder &builder) { + auto oldOp = associated; + auto operands = gatherOperands(builder); + + associated = + Operation::create(oldOp->getLoc(), oldOp->getName(), + oldOp->getResultTypes(), operands, oldOp->getAttrs()); + associated->removeAttr("dnodeid"); + builder.insert(associated); + } + + /// Generates quake code for this node at the current insertion point in \p + /// builder using the dependencies of the node as operands. + /// + /// Classical constants will be duplicated everywhere they are used, + /// while all quantum-dependent operations will only have code generated + /// once. + /// + /// If this operation is a quantum operation, then all quantum-dependent + /// dependencies must already have code generated for them. If this assumption + /// doesn't hold, it is likely something going wrong with scheduling or the + /// graph structure, but the error may only show up here. + virtual void codeGen(OpBuilder &builder) override { + if (hasCodeGen && isQuantumDependent()) + return; + + // Non-quake operations have code generated aggressively + // This ensures that code gen is not too aggressive + if (isSkip()) + for (auto dependency : dependencies) + if (!dependency->hasCodeGen && dependency->isQuantumDependent()) + // Wait for quantum op dependency to be codeGen'ed + return; + + genOp(builder); + hasCodeGen = true; + + // Ensure classical values are generated + for (auto successor : successors) + if (successor->isSkip() && isQuantumDependent()) + successor->codeGen(builder); + } + +public: + OpDependencyNode(Operation *op, SmallVector _dependencies) + : associated(op) { + assert(op && "Cannot make dependency node for null op"); + assert(_dependencies.size() == op->getNumOperands() && + "Wrong # of dependencies to construct node"); + + dependencies = _dependencies; + + quantumOp = isQuakeOperation(op); + // TODO: quake.discriminate is currently the only operation in the quake + // dialect that doesn't operate on wires. This will need to be updated if + // that changes. + if (isa(op)) + quantumOp = false; + + height = 0; + // Ingest dependencies, setting up metadata + for (std::size_t i = 0; i < dependencies.size(); i++) { + auto edge = dependencies[i]; + + assert(edge->getResult(edge.resultidx) == associated->getOperand(i) && + "Dependency isn't actually a dependency!"); + // Add this as a successor to each dependency + edge->successors.insert(this); + + // Update metadata + if (edge.qid.has_value() && quantumOp) + qids.insert(edge.qid.value()); + } + + updateHeight(); + }; + + virtual ~OpDependencyNode() override {} + + /// Currently, all quantum operations are considered to take 1 cycle. + // TODO: make the cycle time configurable per operation. + virtual unsigned numTicks() override { return isQuantumOp() ? 1 : 0; } + virtual bool isQuantumOp() override { return quantumOp; } + + unsigned getHeight() { return height; } + + virtual std::size_t getResultForDependency(std::size_t operandidx) { + return getResultIDXFromOperandIDX(operandidx, associated); + } + + virtual void eraseEdgeForQID(VirtualQID qid) override { + assert(qids.contains(qid) && "Erasing edge for QID not in node!"); + auto successor = getSuccessorForQID(qid); + auto out_idx = successor->getDependencyForQID(qid).value(); + auto in_idx = getDependencyForQID(qid).value(); + auto dependency = dependencies[in_idx]; + dependencies.erase(dependencies.begin() + in_idx); + successor->dependencies[out_idx] = dependency; + dependency->successors.insert(successor); + + bool remove = true; + + // Remove successor if it has no other dependencies on this + for (auto dependency : successor->dependencies) + if (dependency.node == this) + remove = false; + + if (remove) + successors.remove(successor); + + // Update successor's height after adding a new dependency + // This won't fix the height recursively, but is key for lifting + // as if a dependency was lifted, now the successor may be liftable + successor->updateHeight(); + + remove = true; + for (auto edge : dependencies) + if (edge.node == dependency.node) + remove = false; + + // Only remove this as a successor from dependency if this was the last + // QID from dependency we depended on + if (remove) + dependency->successors.remove(this); + + qids.remove(qid); + } + + /// Removes this OpDependencyNode from the graph by replacing all successor + /// dependencies with the relevant dependency from this node. Also deletes + /// this node and any classical values that only this node depends on. + /// + /// `erase` will not handle classical successors of this operation + /// (e.g., a `quake.discriminate` if this operation is a `quake.mz`, or any + /// classical results of an `if`). It is the responsibility of the caller + /// to cleanup such values. Similarly, it is up to the caller to delete this + /// node after it is erased. + void erase() { + for (auto successor : successors) { + bool remove = true; + for (auto &edge : successor->dependencies) { + if (edge.node == this) { + // If the output isn't a linear type, then don't worry about it + if (quake::isQuantumType(edge.getValue().getType())) { + auto idx = getDependencyForQID(edge.qid.value()).value(); + auto dependency = dependencies[idx]; + edge = dependency; + dependency->successors.insert(successor); + } else { + remove = false; + } + } + } + + if (remove) { + successors.remove(successor); + successor->updateHeight(); + } + } + + // Clean up any now unused constants this node relies on + for (auto dependency : dependencies) { + dependency->successors.remove(this); + if (dependency->successors.empty() && !dependency->isQuantumDependent()) { + // TODO: probably not necessary to call erase here, as the dependency + // now has no successors, and should be a classical constant, so + // wouldn't have any dependencies either + static_cast(dependency.node)->erase(); + delete dependency.node; + } + } + } + + virtual std::optional + getQIDForResult(std::size_t resultidx) override { + if (!isQuantumOp()) + return std::nullopt; + auto operand = getOperandIDXFromResultIDX(resultidx, associated); + if (operand >= dependencies.size()) + return std::nullopt; + return dependencies[operand].qid; + } +}; + +/// A DependencyGraph is a DAG consisting of DependencyNodes somehow related by +/// interaction. It contains useful metadata and functions for reasoning about +/// and manipulating the DAG. +class DependencyGraph { +private: + // The set of root nodes in the DAG (it's a set for repeatable iteration + // order) + SetVector roots; + // Tracks the node for the alloc of each virtual wire allocated in the DAG + DenseMap allocs; + // Tracks the leaf node for each virtual wire in the DAG + DenseMap leafs; + // The set of virtual wires used in the DAG. With the assumption that wires + // are linear types, we can assume that each such virtual wire should have + // a single related leaf/root. + SetVector qids; + // Tracks the dependency node introducing each physical qubit in the DAG. + // Currently, since physical qubit allocations are always lifted, the + // associated DependencyNode will always be an InitDependencyNode. However, if + // they were not always lifted, than it may also be a container DependencyNode + // somewhere inside of which the qubit is allocated. + // TODO: if physical wires are not combined, this needs to not be a single + // node, as the same physical qubit can be allocated, used, and de-allocated + // multiple times in a graph, which would present problems. + DenseMap qubits; + unsigned total_height = 0; + SetVector containers; + + /// Starting from \p next, searches through \p next's family + /// (excluding already seen nodes) to find all the interconnected roots + /// that this graph represents. + /// Also fills in metadata about the height of the graph, and the qids in the + /// graph. + void gatherRoots(SetVector &seen, DependencyNode *next) { + if (seen.contains(next) || !next->isQuantumDependent()) + return; + + if (next->isRoot()) { + roots.insert(next); + if (next->height > total_height) + total_height = next->height; + } + + seen.insert(next); + + if (next->isLeaf() && next->isQuantumOp()) { + leafs.insert({next->qids.front(), next}); + qids.insert(next->qids.front()); + } + + if (next->isAlloc()) { + auto init = static_cast(next); + allocs[init->getQID()] = init; + } + + if (next->isContainer()) + containers.insert(next); + + for (auto successor : next->successors) + gatherRoots(seen, successor); + for (auto dependency : next->dependencies) + gatherRoots(seen, dependency.node); + } + + /// Recursively finds all nodes in the graph scheduled at \p cycle + SetVector getNodesAtCycle(unsigned cycle) { + SetVector nodes; + SetVector seen; + for (auto root : roots) + nodes.set_union(root->getNodesAtCycle(cycle, seen)); + return nodes; + } + + /// Recursively updates the height metadata of dependencies of \p next, and + /// then \p next itself, skipping nodes in \p seen. Every updated node is + /// added to \p seen. Dependencies are updated first so that the update to \p + /// next uses up-to-date information. + void updateHeight(SetVector &seen, DependencyNode *next) { + if (seen.contains(next)) + return; + + seen.insert(next); + + for (auto dependency : next->dependencies) + updateHeight(seen, dependency.node); + + next->updateHeight(); + } + + /// Assigns cycles to quantum operations. A node must be scheduled after all + /// of its dependencies, and before all of its successors. A node cannot be + /// scheduled at a negative cycle, nor can it be scheduled at a cycle greater + /// than or equal to the height of the graph to which it belongs. + /// + /// The scheduling algorithm (as currently implemented) works by always + /// following the longest path first. + /// The longest path will always be "saturated" with an operation every cycle, + /// so we know exactly when to schedule every operation along that path. + /// Then, every successor (not on the path) of an operation on the path should + /// be scheduled as early as possible, (the earliest an operation can be + /// scheduled is determined by its height). Likewise, every dependency (not on + /// the path) should be scheduled as late as possible. Because we work + /// outwards from the longest path, this ensures that every other path is + /// scheduled as "densely" as possible around the connections with the longest + /// path, while still having a valid schedule. + /// + /// Always following the longest path first is essentially an implementation + /// of a transitive reduction of the graph. The only auxiliary data structure + /// used here is a sorted copy of the dependency list. The length of a path + /// is equal to the height of the node which is metadata present from + /// construction. + /// + /// The current implementation of the scheduling algorithm optimizes for + /// increased qubit reuse optimizations by minimizing qubit lifetimes. + /// An alternative approach could optimize for more circuit-length + /// reduction by recognizing lifting opportunities and scheduling operations + /// with that in mind. + /// + /// \p level is essentially the depth from the tallest point in the graph + void schedule(SetVector &seen, DependencyNode *next, + unsigned level) { + // Ignore classical values that don't depend on quantum values + if (seen.contains(next) || !next->isQuantumDependent()) + return; + + seen.insert(next); + + // The height of a node (minus numTicks()) is the earliest a node can be + // scheduled + if (level < next->height) + level = next->height; + + unsigned current = level; + if (!next->isSkip()) { + current -= next->numTicks(); + next->cycle = current; + } + + // Sort dependencies by height to always follow the longest path first. + // Without this, two dependencies may be scheduled at the same cycle, + // even if one of the dependencies depends on the other. + // This sort of mimics working over a transitive reduction of the graph. + SmallVector sorted(next->dependencies); + std::sort(sorted.begin(), sorted.end(), [](auto x, auto y) { + return x.node->getHeight() > y.node->getHeight(); + }); + + // Schedule dependencies as late as possible + for (auto dependency : sorted) + if (!dependency->isLeaf()) + schedule(seen, dependency.node, current); + + // Schedule unscheduled successors as early as possible + for (auto successor : next->successors) + if (!successor->isRoot()) + schedule(seen, successor, + current + next->numTicks() + successor->numTicks()); + } + + /// Replaces the leaf for \p old_qid (if \p old_qid is part of the graph) with + /// \p new_leaf which has \p new_qid by removing the old leaf for \p old_qid + /// from the graph metadata and replacing the dependency on the old leaf with + /// \p new_leaf. + /// + /// If \p old_qid was not part of the graph, this has the effect of adding \p + /// new_leaf to the graph. + /// + /// Cleaning up the old leaf is the responsibility of the caller. + // TODO: replaceLeaf, replaceRoot, and replaceLeafAndRoot have confusing and + // overlapping functionality and are used to both replace and add + // leafs/roots. This makes them quite fragile. These responsibilities + // should be clearly separated into `add` and `replace` functions. + // Specifically, replaceLeaf can be used to both add a new leaf (but + // then old_qid is still passed with meaning which is bizarre), but also + // can be used to actually replace a leaf with a new leaf, as intended. + // There should really be a separate mechanism to add a new virtual wire + // to a graph along with a corresponding leaf and root (used when + // lifting allocations), and then a separate mechanism like here to get + // rid of the old leaf, and update the metadata (used for lowering + // allocations, to replace the block argument and terminator dependency + // with an alloc and de-alloc respectively). + void replaceLeaf(VirtualQID old_qid, VirtualQID new_qid, + DependencyNode *new_leaf) { + assert(new_leaf->isLeaf() && "Invalid leaf!"); + + if (leafs.count(old_qid) == 1) { + auto old_leaf = leafs[old_qid]; + + auto first_use = old_leaf->getSuccessorForQID(old_qid); + auto idx = first_use->getDependencyForQID(old_qid).value(); + + first_use->dependencies[idx] = + DependencyNode::DependencyEdge(new_leaf, 0); + // If new_qid is different from the old_qid, updateQIDs() in + // replaceLeafAndRoot will handle updating this + first_use->dependencies[idx].qid = old_qid; + old_leaf->successors.remove(first_use); + new_leaf->successors.insert(first_use); + if (old_leaf->isAlloc()) { + allocs.erase(allocs.find(old_qid)); + auto alloc = static_cast(old_leaf); + if (alloc->getQubit()) + qubits.erase(qubits.find(alloc->getQubit().value())); + } + } + + leafs[new_qid] = new_leaf; + if (new_leaf->isAlloc()) { + auto alloc = static_cast(new_leaf); + allocs[new_qid] = alloc; + if (alloc->getQubit()) + qubits[alloc->getQubit().value()] = alloc; + } + } + + /// Replaces the root for \p old_qid (if \p old_qid is part of the graph) with + /// \p new_root which has \p new_qid by removing the old root for \p old_qid + /// from the graph metadata and replacing the old root with \p new_root as the + /// successor of the last use of \p old_qid. + /// + /// If \p old_qid was not part of the graph, this has the effect of adding \p + /// new_root to the graph. + /// + /// Cleaning up the old root is the responsibility of the caller. + // TODO: see noted attached to replaceLeaf above + void replaceRoot(VirtualQID old_qid, VirtualQID new_qid, + DependencyNode *new_root) { + assert(new_root->isRoot() && "Invalid root!"); + + if (qids.contains(old_qid)) { + auto old_root = getRootForQID(old_qid); + + auto idx = old_root->getDependencyForQID(old_qid).value(); + + auto dep = old_root->dependencies[idx]; + dep->successors.remove(old_root); + dep->successors.insert(new_root); + // If new_qid is different from the old_qid, updateQIDs() in + // replaceLeafAndRoot will handle updating this + dep.qid = old_qid; + + new_root->dependencies.push_back(dep); + old_root->dependencies.erase(old_root->dependencies.begin() + idx); + + // If the terminator is somehow getting deleted, then the entire block + // must be empty, and then it will never be used + if (old_root->dependencies.empty()) + roots.remove(old_root); + + old_root->qids.remove(old_qid); + } + + // If new_qid is different from the old_qid, updateQIDs() in + // replaceLeafAndRoot will handle updating this + new_root->qids.insert(old_qid); + roots.insert(new_root); + } + + /// Gathers all the nodes in the graph into seen, starting from next + void gatherNodes(SetVector &seen, DependencyNode *next) { + if (seen.contains(next) || !next->isQuantumDependent()) + return; + + seen.insert(next); + + for (auto successor : next->successors) + gatherNodes(seen, successor); + for (auto dependency : next->dependencies) + gatherNodes(seen, dependency.node); + } + +public: + DependencyGraph(DependencyNode *root) { + total_height = 0; + SetVector seen; + qids = SetVector(); + gatherRoots(seen, root); + } + + /// Cleans up all nodes in the graph, except for ArgDependencyNodes, which are + /// the responsibility of the DependencyBlock that owns this graph + ~DependencyGraph() { + SetVector nodes; + for (auto root : roots) + gatherNodes(nodes, root); + + for (auto node : nodes) + // ArgDependencyNodes are handled by the block and skipped here. + // ShadowDependencyNodes are deleted here. This is safe, because + // a new ShadowDependencyNode is created for each use of a + // ShadowDependency (which may be undesirable eventually). + if (!node->isLeaf() || !node->isQuantumDependent() || node->isAlloc()) + delete node; + } + + /// Returns a set of all roots in the DAG + SetVector &getRoots() { return roots; } + + /// Calculates the LifeTime for the virtual wire \p qid. + /// The graph must be scheduled, and the wire must be used in at least one + /// operation for this function to succeed. + LifeTime getLifeTimeForQID(VirtualQID qid) { + auto first_use = getFirstUseOfQID(qid); + assert(first_use && "Cannot compute LifeTime of unused qid"); + auto last_use = getLastUseOfQID(qid); + assert(last_use && "Cannot compute LifeTime of unused qid"); + assert(first_use->cycle.has_value() && + "Graph must be scheduled before lifetimes can be ascertained"); + assert(last_use->cycle.has_value() && + "Graph must be scheduled before lifetimes can be ascertained"); + unsigned first = first_use->cycle.value(); + auto last = last_use->cycle.value(); + + return LifeTime(first, last); + } + + /// Calculates the LifeTime for \p qubit. + /// The graph must be scheduled, and \p qubit must be used in at least one + /// operation for this function to succeed. + LifeTime getLifeTimeForQubit(PhysicalQID qubit) { + DependencyNode *first_use = getFirstUseOfQubit(qubit); + assert(first_use && "Cannot compute LifeTime of unused qubit"); + DependencyNode *last_use = getLastUseOfQubit(qubit); + assert(last_use && "Cannot compute LifeTime of unused qubit"); + + assert(first_use->cycle.has_value() && + "Graph must be scheduled before lifetimes can be ascertained"); + assert(last_use->cycle.has_value() && + "Graph must be scheduled before lifetimes can be ascertained"); + unsigned first = first_use->cycle.value(); + auto last = last_use->cycle.value(); + + return LifeTime(first, last); + } + + /// Returns the first use of the virtual wire \p qid, or nullptr if \p qid is + /// unused in the graph. + // TODO: could make this a little safer by having a separate "hasUse" check, + // and then asserting that here. + OpDependencyNode *getFirstUseOfQID(VirtualQID qid) { + assert(qids.contains(qid) && "Given qid not in dependency graph"); + DependencyNode *firstUse = leafs[qid]->successors[0]; + if (firstUse->isRoot()) + return nullptr; + // If a node is neither a root or leaf, it must be an OpDependencyNode + return static_cast(firstUse); + } + + /// Returns the last use of the virtual wire \p qid, or nullptr if \p qid is + /// unused in the graph. + // TODO: could make this a little safer by having a separate "hasUse" check, + // and then asserting that here. + OpDependencyNode *getLastUseOfQID(VirtualQID qid) { + assert(qids.contains(qid) && "Given qid not in dependency graph"); + DependencyNode *root = getRootForQID(qid); + DependencyNode *lastUse = nullptr; + for (auto dependency : root->dependencies) { + if (dependency.qid == qid) { + lastUse = dependency.node; + break; + } + } + if (lastUse && lastUse->isLeaf()) + return nullptr; + // If a node is neither a root or leaf, it must be an OpDependencyNode + return static_cast(lastUse); + } + + /// Returns the first use of the physical qubit \p qubit, or nullptr if \p + /// qubit is unused in the graph. + // TODO: could make this a little safer by having a separate "hasUse" check, + // and then asserting that here. + OpDependencyNode *getFirstUseOfQubit(PhysicalQID qubit) { + assert((qubits.count(qubit) == 1) && "Given qubit not in dependency graph"); + auto defining = qubits[qubit]; + // Qubit is defined here, return the first use + if (defining->isAlloc()) { + auto first_use = defining->successors.front(); + if (first_use->isRoot()) + return nullptr; + return static_cast(first_use); + } + + // Qubit is defined in a container which is an OpDependencyNode + return static_cast(defining); + } + + /// Returns the last use of the physical qubit \p qubit, or nullptr if \p + /// qubit is unused in the graph. + // TODO: could make this a little safer by having a separate "hasUse" check, + // and then asserting that here. + OpDependencyNode *getLastUseOfQubit(PhysicalQID qubit) { + assert((qubits.count(qubit) == 1) && "Given qubit not in dependency graph"); + auto defining = qubits[qubit]; + // Qubit is defined here, return the last use + if (defining->isAlloc()) { + auto alloc = static_cast(defining); + return getLastUseOfQID(alloc->getQID()); + } + + // Qubit is defined in a container which is an OpDependencyNode + return static_cast(defining); + } + + /// Returns the alloc node for the virtual wire \p qid, fails if no such node + /// is found + InitDependencyNode *getAllocForQID(VirtualQID qid) { + assert(allocs.count(qid) == 1 && "Given qid not allocated in graph"); + return allocs[qid]; + } + + /// Returns the root for the virtual wire \p qid, fails if no such root is + /// found + DependencyNode *getRootForQID(VirtualQID qid) { + assert(qids.contains(qid) && "Given qid not in dependency graph"); + for (auto root : roots) + if (root->qids.contains(qid)) + return root; + + assert(false && "Could not find root for qid"); + } + + /// Returns the alloc node for the physical qubit \p qubit, fails if no such + /// node is found + InitDependencyNode *getAllocForQubit(PhysicalQID qubit) { + assert(qubits.count(qubit) == 1 && qubits[qubit]->isAlloc() && + "Given qubit not allocated in graph!"); + return static_cast(qubits[qubit]); + } + + /// Returns the root for the physical qubit \p qubit, fails if no such root is + /// found + DependencyNode *getRootForQubit(PhysicalQID qubit) { + for (auto root : roots) + if (root->getQubits().contains(qubit)) + return root; + assert(false && "Could not find root for qubit"); + } + + /// Generate code for all nodes at the given cycle in the graph, + /// as well as all non-quantum nodes relying on those nodes with + /// no other dependencies at later cycles. + void codeGenAt(unsigned cycle, OpBuilder &builder) { + SetVector nodes = getNodesAtCycle(cycle); + + for (auto node : nodes) + node->codeGen(builder); + } + + unsigned getHeight() { return total_height; } + + /// Returns a set containing all virtual wires used in this DAG + SetVector getQIDs() { return SetVector(qids); } + + /// Returns the set of virtual wires allocated in the DAG + SetVector getVirtualAllocs() { + SetVector allocated; + for (auto [qid, leaf] : allocs) + if (!leaf->getQubit()) + allocated.insert(qid); + return allocated; + } + + /// Returns the set of all physical qubits in the DAG + SetVector getQubits() { + auto allocated = SetVector(); + for (auto [qubit, _] : qubits) + allocated.insert(qubit); + return allocated; + } + + /// Returns the set of physical qubits allocated in the DAG + SetVector getAllocatedQubits() { + auto allocated = SetVector(); + for (auto [qubit, definining] : qubits) + if (definining->isAlloc()) + allocated.insert(qubit); + return allocated; + } + + /// Assigns the virtual wire \p qid to the physical qubit \p phys, + /// assuming that \p qid is allocated in the graph. + void assignToPhysical(VirtualQID qid, PhysicalQID phys) { + // Call helper function to perform relevant checks + auto alloc = getAllocForQID(qid); + qubits[phys] = alloc; + alloc->assignToPhysical(phys); + } + + /// If a physical wire representing \p phys exists, combines the virtual wire + /// \p qid with the physical wire representing \p phys, resulting in a single + /// physical wire \p phys. Otherwise, works like `assignToPhysical`. + /// + /// If combining with an existing physical wire, this function will clean up + /// the extra allocation/de-allocation nodes for the physical wire after + /// combining. + void combineWithPhysicalWire(VirtualQID qid, PhysicalQID phys) { + if (qubits.count(phys) != 1) { + assignToPhysical(qid, phys); + return; + } + + assert(allocs.count(qid) == 1 && "Assigning a qid not in DependencyGraph!"); + auto new_lifetime = getLifeTimeForQID(qid); + auto old_lifetime = getLifeTimeForQubit(phys); + + // TODO: can probably clean up a bit + if (new_lifetime.isAfter(old_lifetime)) { + auto new_alloc = getAllocForQID(qid); + auto old_root = getRootForQubit(phys); + + auto successor = new_alloc->getSuccessorForQID(qid); + auto idx = successor->getDependencyForQID(qid).value(); + + // Replace new allocation with result value for old wire + auto dep = old_root->dependencies[0]; + successor->dependencies[idx] = dep; + dep->successors.insert(successor); + dep->successors.remove(old_root); + + dep->updateQID(new_alloc->getQID(), dep.qid.value()); + + roots.remove(old_root); + delete old_root; + allocs.erase(allocs.find(new_alloc->getQID())); + delete new_alloc; + + successor->updateWithPhysical(dep.qid.value(), phys); + } else { + auto old_alloc = getAllocForQubit(phys); + auto new_root = getRootForQID(qid); + + auto successor = old_alloc->getSuccessorForQID(old_alloc->getQID()); + auto idx = successor->getDependencyForQID(old_alloc->getQID()).value(); + + auto dep = new_root->dependencies[0]; + successor->dependencies[idx] = dep; + dep->successors.insert(successor); + dep->successors.remove(new_root); + + dep->updateQID(old_alloc->getQID(), dep.qid.value()); + + roots.remove(new_root); + allocs.erase(allocs.find(old_alloc->getQID())); + delete old_alloc; + delete new_root; + + auto new_alloc = getAllocForQID(qid); + new_alloc->assignToPhysical(phys); + qubits[phys] = new_alloc; + } + } + + /// Tells the graph that \p qubit is allocated and used inside \p container. + /// + /// Currently unused as all \p qubit allocations are lifted from containers, + /// but necessary if the implementation did not do that. + void addPhysicalAllocation(DependencyNode *container, PhysicalQID qubit) { + assert(containers.contains(container) && + "Illegal container in addPhysicalAllocation"); + qubits[qubit] = container; + } + + /// Qubits allocated within a dependency block that are only used inside an + /// `if` in that block, can be moved inside the `if`. + /// + /// Works outside-in, to contract as tightly as possible. + void contractAllocsPass(unsigned &next_qid) { + for (auto container : containers) + container->contractAllocsPass(next_qid); + } + + /// Assigns a cycle to every quantum operation in each dependency graph + /// (including `if`s containing quantum operations). + /// + /// Every node must be assigned a schedule greater than or equal to the height + /// of each of its dependencies + /// + /// The current implementation of the scheduling algorithm can be found in + /// DependencyGraph::schedule + void schedulingPass() { + SetVector seen; + // Schedule from the roots in order of height (starting from the tallest + // root) + auto sorted = SmallVector({roots.begin(), roots.end()}); + std::sort(sorted.begin(), sorted.end(), + [](auto x, auto y) { return x->getHeight() > y->getHeight(); }); + + // Every node visiting during scheduling will be in seen, so + // if the scheduling function has already visited the root then it will be + // skipped + for (auto root : sorted) { + // Can either schedule starting with a level of `root->getHeight()`, which + // will result in more operations at earlier cycles, or `total_height`, + // which will result in more operations at later cycles + schedule(seen, root, total_height); + } + } + + void dump() { + llvm::outs() << "Graph Start\n"; + for (auto root : roots) + root->dump(); + llvm::outs() << "Graph End\n"; + } + + /// Recursively invokes performAnalysis on all container nodes within this DAG + void performAnalysis(LifeTimeAnalysis &set) { + for (auto container : containers) + container->performAnalysis(set, this); + } + + /// Removes the alloc/de-alloc nodes for \p qid, assuming \p qid is allocated + /// within this DAG It is the responsibility of the caller to delete the nodes + /// if desired. + // TODO: ensure callers cleanup the nodes properly (it doesn't look like + // contractallocsPass or lowerAlloc do) + void removeVirtualAlloc(VirtualQID qid) { + // TODO: This function does not look right. First, it should ensure that \p + // qid is actually allocated in this graph, to avoid, among other issues, + // removing the TerminatorDependencyNode from the graph. + // Second, it should remove both the alloc and the root together, not + // in separate checks. Third, I don't know about the below comment + // "ignore already removed qid", that should probably be an error if + // you're trying to remove a qid again. This is currently only used by + // contractAllocsPass, so probably was written overly specific for + // that use case. + + // Ignore already removed qid + if (allocs.count(qid) == 1) + allocs.erase(allocs.find(qid)); + + if (qids.contains(qid)) { + auto toRemove = getRootForQID(qid); + roots.remove(toRemove); + } + } + + /// Simultaneously replaces the leaf and root nodes for \p qid, or + /// adds them if \p qid was not present before. The operations are separate, + /// but doing them together makes it harder to produce an invalid graph. + /// + /// Mostly, this function ensures that the graph metadata is properly updated + /// when replacing the leaf and root. In the case that the new_leaf has a + /// different qid than \p qid, this function will remove the metadata for + /// \p qid, and will update the qids of all nodes and edges that were along + /// the path for \p qid. + /// + /// It is assumed that there is a path between \p new_leaf and \p new_root for + /// \p qid, otherwise, the updated metadata is likely to be wrong. + /// + /// It is the responsibility of the caller to delete the replaced leaf/root if + /// desired. + // TODO: See above comment on `replaceLeaf`: this function is pretty fragile + // as currently written and used. + // TODO: Worth checking that callers delete the replaced leaf/root properly + // when applicable. I think lowerAlloc does, which is the main place. + // TODO: I think DependencyGraph::updateQID will be useful when cleaning this + // up. + void replaceLeafAndRoot(VirtualQID qid, DependencyNode *new_leaf, + DependencyNode *new_root) { + auto new_qid = qid; + if (!new_leaf->qids.empty()) + new_qid = new_leaf->qids.front(); + + replaceLeaf(qid, new_qid, new_leaf); + replaceRoot(qid, new_qid, new_root); + + qids.insert(new_qid); + + if (new_qid != qid) { + qids.remove(qid); + new_leaf->updateQID(qid, new_qid); + } + } + + /// Removes \p qid from the metadata for this graph + void removeQID(VirtualQID qid) { + leafs.erase(leafs.find(qid)); + qids.remove(qid); + } + + /// Replaces \p old_qid with \p new_qid in the graph and updates relevant + /// metdata + void updateQID(VirtualQID old_qid, VirtualQID new_qid) { + assert(qids.contains(old_qid) && "Given qid not found in graph!"); + assert(!qids.contains(new_qid) && "Given qid to add already in graph!"); + auto leaf = leafs[old_qid]; + leaf->updateQID(old_qid, new_qid); + + leafs.erase(leafs.find(old_qid)); + leafs[new_qid] = leaf; + if (leaf->isAlloc()) { + allocs.erase(allocs.find(old_qid)); + auto alloc = static_cast(leaf); + allocs[new_qid] = alloc; + // Qubit info will remain intact, no need to update + } + + qids.remove(old_qid); + qids.insert(new_qid); + } + + void updateHeight() { + total_height = 0; + SetVector seen; + for (auto root : roots) { + updateHeight(seen, root); + if (root->height > total_height) + total_height = root->height; + } + } +}; + +/// Represent the deallocation (`quake.return_wire` op) of a virtual/physical +/// wire +// TODO: come up with a better name, since terminators are also roots +class RootDependencyNode : public OpDependencyNode { +protected: + void dumpNode() override { + llvm::outs() << "Dealloc for QID "; + for (auto qid : qids) + llvm::outs() << qid; + llvm::outs() << ": "; + associated->dump(); + } + + void genOp(OpBuilder &builder) override { + auto wire = dependencies[0].getValue(); + auto newOp = + builder.create(builder.getUnknownLoc(), wire); + newOp->setAttrs(associated->getAttrs()); + newOp->removeAttr("dnodeid"); + associated = newOp; + } + +public: + RootDependencyNode(quake::ReturnWireOp op, + SmallVector dependencies) + : OpDependencyNode(op, dependencies) { + // TODO: does this below comment still hold? + // numTicks won't be properly calculated by OpDependencyNode constructor, + // so have to recompute height here + updateHeight(); + }; + + ~RootDependencyNode() override {} + + bool isSkip() override { return true; } + + unsigned numTicks() override { return 0; } + + void eraseEdgeForQID(VirtualQID qid) override { + if (qids.contains(qid)) + dependencies.clear(); + } + + SetVector getQubits() override { + SetVector qubits; + for (auto dependency : dependencies) + if (dependency.qubit.has_value()) + qubits.insert(dependency.qubit.value()); + return qubits; + } +}; + +/// Represents a block argument. Block arguments must have linear types, +/// and therefore will always represent wires. +class ArgDependencyNode : public DependencyNode { + friend class DependencyBlock; + +protected: + BlockArgument barg; + unsigned argNum = 0; + + void dumpNode() override { + // TODO: I don't think this can ever be false + if (!qids.empty()) + llvm::outs() << "QID: " << qids.front() << ", "; + llvm::outs() << "argNum: " << argNum << ", "; + barg.dump(); + } + + Value getResult(unsigned resultidx) override { + assert(resultidx == 0 && "Invalid resultidx"); + return barg; + } + + virtual std::string getOpName() override { + return std::to_string(barg.getArgNumber()).append("arg"); + }; + + void codeGen(OpBuilder &builder) override{}; + +public: + ArgDependencyNode(BlockArgument arg) + : barg(arg), argNum(arg.getArgNumber()) {} + + ArgDependencyNode(BlockArgument arg, unsigned num) : barg(arg), argNum(num) {} + + ArgDependencyNode(BlockArgument arg, DependencyEdge val) + : ArgDependencyNode(arg) { + auto qid = val->getQIDForResult(val.resultidx); + if (qid.has_value()) + qids.insert(qid.value()); + } + + ArgDependencyNode(BlockArgument arg, DependencyEdge val, unsigned num) + : barg(arg), argNum(num) { + auto qid = val->getQIDForResult(val.resultidx); + if (qid.has_value()) + qids.insert(qid.value()); + } + + ~ArgDependencyNode() override {} + + bool isRoot() override { return false; } + bool isLeaf() override { return true; } + // TODO: I'm pretty sure this is always true + bool isQuantumOp() override { return quake::isQuantumType(barg.getType()); } + unsigned numTicks() override { return 0; } + + void eraseEdgeForQID(VirtualQID qid) override { + assert(false && "Can't call eraseEdgeForQID with an ArgDependencyNode"); + } + + std::optional getQIDForResult(std::size_t resultidx) override { + assert(resultidx == 0 && "Invalid resultidx"); + if (qids.size() == 1) + return std::optional(qids.front()); + return std::nullopt; + } + + unsigned getArgNumber() { return argNum; } +}; + +/// Wires are linear types and therefore are passed as operands to `if`s, +/// ensuring that `if`s act properly as "solid containers" for them. However, +/// this is not the case with quantum-dependent classical values. To ensure the +/// solidity of `if`s, we introduce "shadow dependencies" between `if`s and any +/// quantum-dependent classical values used within the body of the `if`. Then, +/// instead of referring to the value directly within the `if`, we use a +/// ShadowDependencyNode to depend on the value without depending on the node +/// for the value since the node for the value is located in a different graph. +/// +/// A concrete example of where things can go wrong without shadow dependencies +/// is in `test/Quake/dependency-if-bug-classical.qke`. +class ShadowDependencyNode : public DependencyNode { + friend class IfDependencyNode; + +protected: + OpDependencyNode *shadowed; + DependencyEdge shadow_edge; + + void dumpNode() override { + llvm::outs() << "Shadow dependency on: "; + shadowed->dumpNode(); + } + + Value getResult(unsigned resultidx) override { + return shadowed->getResult(resultidx); + } + + virtual std::string getOpName() override { + return shadowed->getOpName().append("shadow"); + }; + + void codeGen(OpBuilder &builder) override { + // Don't generate any code, instead just ensure that the + if (shadowed->hasCodeGen) + hasCodeGen = true; + }; + +public: + // TODO: constructor should ensure that the value from shadowed is not a + // quantum type (but that shadowed is quantumDependent). + ShadowDependencyNode(OpDependencyNode *shadowed, std::size_t resultidx) + : shadowed(shadowed), shadow_edge(shadowed, resultidx) {} + + ~ShadowDependencyNode() override {} + + bool isRoot() override { return false; } + bool isLeaf() override { return true; } + bool isQuantumOp() override { return false; } + unsigned numTicks() override { return 0; } + + void eraseEdgeForQID(VirtualQID qid) override { + assert(false && "Can't call eraseEdgeForQID with an ShadowDependencyNode"); + } + + std::optional getQIDForResult(std::size_t resultidx) override { + return std::nullopt; + } + + DependencyEdge getShadowedEdge() { return shadow_edge; } +}; + +/// Represents a block terminator, usually either a `cc.continue` or a `return`. +/// Importantly, a block terminator should only have code generated for it +/// after all over nodes in the graph have code generated, so that it is always +/// the last operation in the block. +class TerminatorDependencyNode : public OpDependencyNode { +protected: + void dumpNode() override { + llvm::outs() << "Block Terminator With QIDs "; + bool dumpComma = false; + for (auto qid : qids) { + if (dumpComma) + llvm::outs() << ", "; + llvm::outs() << qid; + dumpComma = true; + } + llvm::outs() << ": "; + associated->dump(); + } + + unsigned numTicks() override { return 0; } + + bool isQuantumOp() override { return qids.size() > 0; } + + // If the terminator is not a quantum operation, this could be called + // by dependencies, so do nothing. + void codeGen(OpBuilder &builder) override{}; + +public: + TerminatorDependencyNode(Operation *terminator, + SmallVector dependencies) + : OpDependencyNode(terminator, dependencies) { + assert(terminator->hasTrait() && + "Invalid terminator"); + for (auto dependency : dependencies) + if (dependency.qid.has_value()) + qids.insert(dependency.qid.value()); + } + + ~TerminatorDependencyNode() override {} + + /// This will actually generate code for the terminator, it should only be + /// called after all other operations in the block have code generated. + void genTerminator(OpBuilder &builder) { OpDependencyNode::codeGen(builder); } + + void eraseEdgeForQID(VirtualQID qid) override { + for (unsigned i = 0; i < dependencies.size(); i++) + if (dependencies[i].qid == qid) + dependencies.erase(dependencies.begin() + i); + qids.remove(qid); + } + + std::optional getQIDForResult(std::size_t resultidx) override { + if (resultidx >= dependencies.size() || + !dependencies[resultidx]->isQuantumOp()) + return std::nullopt; + return dependencies[resultidx].qid; + } + + SetVector getQubits() override { + SetVector qubits; + for (auto dependency : dependencies) + if (dependency.qubit.has_value()) + qubits.insert(dependency.qubit.value()); + return qubits; + } +}; + +/// A DependencyBlock represents an mlir::block. +/// It contains a DependencyGraph representing the block body, +/// ArgDependencyNodes for the block arguments, and a TerminatorDependencyNode +/// for the block terminator. +class DependencyBlock { +private: + SmallVector argdnodes; + DependencyGraph *graph; + Block *block; + TerminatorDependencyNode *terminator; + +public: + DependencyBlock(SmallVector argdnodes, + DependencyGraph *graph, Block *block, + TerminatorDependencyNode *terminator) + : argdnodes(argdnodes), graph(graph), block(block), + terminator(terminator) {} + + ~DependencyBlock() { + // Terminator is cleaned up by graph since it must be a root + delete graph; + // Arguments are not handled by the graph since they may not show up in the + // graph + for (auto argdnode : argdnodes) + delete argdnode; + } + + unsigned getHeight() { return graph->getHeight(); } + + SetVector getVirtualAllocs() { return graph->getVirtualAllocs(); } + + SetVector getQIDs() { return graph->getQIDs(); } + + DependencyGraph *getBlockGraph() { return graph; } + + TerminatorDependencyNode *getTerminator() { return terminator; } + + /// Allocates physical qubits for all virtual wires + /// allocated within the block, using lifetime information + /// from the DependencyGraph representing the body. + /// + /// Currently, reuse decisions are enforced by coupling virtual wires + /// assigned to the same physical wire, so they become a single physical + /// wire. This is not strictly necessary, but is an effective and simple + /// way to ensure that other analyses/optimizations respect the reuse + /// decisions. + void allocatePhyiscalQubits(LifeTimeAnalysis &set) { + for (auto qubit : graph->getQubits()) { + auto lifetime = graph->getLifeTimeForQubit(qubit); + set.reallocatePhysical(qubit, lifetime); + } + + // New physical qubits will be captured by `set` + for (auto qid : getVirtualAllocs()) { + if (!graph->getFirstUseOfQID(qid)) + continue; + + auto lifetime = graph->getLifeTimeForQID(qid); + LLVM_DEBUG(llvm::dbgs() << "Qid " << qid); + LLVM_DEBUG(llvm::dbgs() + << " is in use from cycle " << lifetime.getBegin()); + LLVM_DEBUG(llvm::dbgs() << " through cycle " << lifetime.getEnd()); + LLVM_DEBUG(llvm::dbgs() << "\n"); + + auto phys = set.allocatePhysical(qid, lifetime); + LLVM_DEBUG(llvm::dbgs() + << "\tIt is mapped to the physical qubit " << phys); + LLVM_DEBUG(llvm::dbgs() << "\n\n"); + + // This will assign the virtual wire qid to the physical wire phys, + // combining with existing uses of phys to thread the wire through. + // This ensures that further optimizations respect the schedule of + // operations on phys, and that all qids mapped to phys remain mapped + // to phys. + + // If this is not desired, use `graph->assignToPhysical` here instead, + // but it is crucial to somehow otherwise ensure that the + // scheduling of operations on phys is respected by further + // optimizations, as there is the potential for incorrect IR output. + + // Also importantly, doing so will require changes to graph metadata, + // to ensure that multiple allocations of the same physical wire within a + // single graph are handled properly. + graph->combineWithPhysicalWire(qid, phys); + } + } + + /// Generates code for the block arguments, body, and terminator. + /// + /// It is up to the caller to move the insertion point of \p builder outside + /// the block after construction. + Block *codeGen(OpBuilder &builder, Region *region) { + Block *newBlock = builder.createBlock(region); + for (unsigned i = 0; i < argdnodes.size(); i++) { + auto old_barg = argdnodes[i]->barg; + argdnodes[i]->barg = + newBlock->addArgument(old_barg.getType(), old_barg.getLoc()); + assert(i == argdnodes[i]->argNum && "Malformed Block Argument!"); + argdnodes[i]->hasCodeGen = true; + } + + builder.setInsertionPointToStart(newBlock); + + for (unsigned cycle = 0; cycle < graph->getHeight(); cycle++) + graph->codeGenAt(cycle, builder); + + terminator->genTerminator(builder); + + block = newBlock; + + return newBlock; + } + + void dump() { + llvm::outs() << "Block with (" << argdnodes.size() << ") args:\n"; + // block->dump(); + // llvm::outs() << "Block graph:\n"; + graph->dump(); + llvm::outs() << "End block\n"; + } + + void updateHeight() { graph->updateHeight(); } + + /// Recursively schedules nodes and performs lifetime analysis to allocate + /// physical qubits for virtual wires, working inside out. For + /// `DependencyBlock`s, this means recurring on any containers inside + /// the body of the block, then performing scheduling, and finally + /// allocating physical qubits based on lifetime information. + void performAnalysis(LifeTimeAnalysis &set) { + // The analysis works inside-out, so first resolve all nested `if`s + graph->performAnalysis(set); + + // Update metadata after the analysis + updateHeight(); + // Schedule the nodes for lifetime analysis + schedulingPass(); + + // Finally, perform lifetime analysis and allocate physical qubits + // Allocations will be captured in `set` + allocatePhyiscalQubits(set); + } + + /// Checks to see if qubits allocated within a block are only used + /// inside an `if` in that block, in which case they can be moved + /// inside the `if`. + /// + /// Works outside-in, to contract as tightly as possible. + /// + /// Assumes \p next_qid is a counter whose value is a VirtualQID + /// that is not already in use in the circuit. + void contractAllocsPass(unsigned &next_qid) { + // Look for contract-able allocations in this block + for (auto alloc : getVirtualAllocs()) { + auto first_use = graph->getFirstUseOfQID(alloc); + assert(first_use && "Unused virtual wire in block!"); + auto last_use = graph->getLastUseOfQID(alloc); + if (first_use == last_use && first_use->isContainer()) { + // Move alloc inside + auto root = graph->getRootForQID(alloc); + auto init = graph->getAllocForQID(alloc); + first_use->lowerAlloc(init, root, alloc, next_qid); + // Qid is no longer used in this block, remove related metadata + graph->removeVirtualAlloc(alloc); + graph->removeQID(alloc); + } + } + + // Outside-in, so recur only after applying pass to this block + graph->contractAllocsPass(next_qid); + } + + /// Moves an alloc/de-alloc pair for the virtual wire \p qid into this block, + /// Replacing the existing block argument and terminator dependencies for the + /// wire. + void lowerAlloc(DependencyNode *init, DependencyNode *root, VirtualQID qid) { + // No need to clean up existing terminator (hopefully) + graph->replaceLeafAndRoot(qid, init, root); + // Clean up old block argument + removeArgument(qid); + // If the qid isn't actually used in the block, remove it + if (!graph->getFirstUseOfQID(qid)) { + // TODO: clean up init and root in this case + graph->removeVirtualAlloc(qid); + graph->removeQID(qid); + } + } + + /// Removes an alloc/de-alloc pair for the virtual wire \p qid from this + /// block, Replacing the pair with a new block argument and terminator + /// dependency for the wire. + /// + /// The caller is responsible for cleaning up the old alloc/de-alloc pair. + void liftAlloc(VirtualQID qid, DependencyNode *lifted_alloc) { + auto new_edge = DependencyNode::DependencyEdge{lifted_alloc, 0}; + auto new_argdnode = addArgument(new_edge); + + graph->replaceLeafAndRoot(qid, new_argdnode, terminator); + } + + void schedulingPass() { graph->schedulingPass(); } + + /// Removes a block argument/terminator dependency pair for a virtual wire \p + /// qid flowing through this block + void removeQID(VirtualQID qid) { + // TODO: ensure that the virtual wire does flow through the block as an + // argument/terminator pair. removeArgument will at least ensure that such + // an argument exists, but terminator->eraseEdgeForQID below won't. + removeArgument(qid); + + terminator->eraseEdgeForQID(qid); + graph->removeQID(qid); + } + + SetVector getQubits() { return graph->getQubits(); } + + SetVector getAllocatedQubits() { + return graph->getAllocatedQubits(); + } + + /// Adds a block argument and corresponding ArgDependencyNode to the block + DependencyNode *addArgument(DependencyNode::DependencyEdge incoming) { + auto new_barg = block->addArgument(incoming.getValue().getType(), + incoming.getValue().getLoc()); + auto new_argdnode = + new ArgDependencyNode(new_barg, incoming, argdnodes.size()); + argdnodes.push_back(new_argdnode); + return new_argdnode; + } + + /// Removes the block argument and cleans up the corresponding + /// ArgDependencyNode for \p qid + void removeArgument(VirtualQID qid) { + unsigned i = 0; + bool found = false; + for (; i < argdnodes.size(); i++) { + if (argdnodes[i]->qids.contains(qid)) { + delete argdnodes[i]; + argdnodes.erase(argdnodes.begin() + i); + found = true; + break; + } + } + + assert(found && "Could not find argument to remove!"); + + // Shift the offset of all arguments after the removed one + for (; i < argdnodes.size(); i++) + argdnodes[i]->argNum--; + } + + std::optional getQIDForResult(std::size_t resultidx) { + return terminator->getQIDForResult(resultidx); + } +}; + +class IfDependencyNode : public OpDependencyNode { +protected: + DependencyBlock *then_block; + DependencyBlock *else_block; + SmallVector results; + SetVector freevars; + + // TODO: figure out nice way to display + void dumpNode() override { + this->OpDependencyNode::dumpNode(); + // llvm::outs() << "If with results:\n"; + // for (auto result : results) + // result.dump(); + llvm::outs() << "Then "; + then_block->dump(); + llvm::outs() << "Else "; + else_block->dump(); + } + + /// Checks if \p then_use and \p else_use are prefixEquivalent and have no + /// quantum dependencies, and if so lifts them before the this `if` node. + /// + /// Assumes that \p then_use is from then_block and \p else_use is from + /// else_block, but this is not checked. + bool tryLiftingBefore(OpDependencyNode *then_use, + OpDependencyNode *else_use) { + if (!then_use || !else_use) + return false; + + // The algorithmic logic assumes `if`s are fully resolved once, + // but lifting them to a parent scope will cause them to be resolved + // again, so lifting `if`s is not a good idea. Also, the equivalence + // check currently ignores the body of `if`s. + if (then_use->isContainer()) + return false; + + if (then_use->prefixEquivalentTo(else_use)) { + // If two nodes are equivalent, all their dependencies will be too, + // but we can't lift them until all their dependencies have been lifted, + // so we skip them for now. + if (then_use->height > then_use->numTicks()) + return false; + + liftOpBefore(then_use, else_use); + return true; + } + + return false; + } + + /// Checks if \p then_use and \p else_use are equivalent and have no classical + /// dependencies/results, and if so lifts them before the this `if` node. + /// + /// Assumes that \p then_use is from then_block and \p else_use is from + /// else_block, but this is not checked. + bool tryLiftingAfter(OpDependencyNode *then_use, OpDependencyNode *else_use) { + if (!then_use || !else_use) + return false; + + // TODO: measure ops are a delicate special case because of the classical + // measure result. When lifting before, we can lift the discriminate op as + // well. However, it may have interactions with other classical values, and + // then be "returned" from the `if` + if (isa(then_use->associated)) + return false; + + // The algorithmic logic assumes `if`s are fully resolved once, + // but lifting them to a parent scope will cause them to be resolved + // again, so lifting `if`s is not a good idea. Also, the equivalence + // check currently ignores the body of `if`s. + if (then_use->isContainer()) + return false; + + // TODO: probably shouldn't try lifting containers + // see targettests/execution/qubit_management_bug_lifting_ifs.cpp + + if (then_use->postfixEquivalentTo(else_use)) { + // If two nodes are equivalent, all their successors should be too + // but we can't lift them until all their successors have been lifted, + // so we skip them for now. + for (auto successor : then_use->successors) + if (!successor->isSkip()) + return false; + // TODO: Classical input from within the if scope poses an issue for + // lifting for a similar reason as measures + for (auto dependency : then_use->dependencies) + if (!dependency->isQuantumOp()) + return false; + + liftOpAfter(then_use, else_use); + return true; + } + + return false; + } + + /// Lifts equivalent operations from the then and else blocks to be added as a + /// successor to this node. The lifted operation will have dependencies on the + /// results from this `if` node. Wires that used to flow through + /// then_op/else_op will still flow to the terminator and be returned. + /// + /// Assumes \p then_op and \p else_op are equivalent quantum operations from + /// then_block and else_block respectively, without classical input or + /// results. + /// + /// This function is responsible for cleaning up \p then_use and \p else_use + /// after lifting. + void liftOpAfter(OpDependencyNode *then_op, OpDependencyNode *else_op) { + auto newDeps = SmallVector(); + auto allocated = then_block->getAllocatedQubits(); + + unsigned i = 0; + while (!then_op->dependencies.empty()) { + // Every dependency is erased as it is processed, so we always grab + // the front dependency + auto dependency = then_op->dependencies.front(); + assert(dependency.qid && "Lifting operations with classical input after " + "blocks is not yet supported."); + + // TODO: if allocations are not always lifted, then it is necessary to + // lift allocations then_op depends on, but only if it is safe to lift. + + auto then_qid = dependency.qid.value(); + auto then_qubit_opt = dependency.qubit; + auto resultidx = then_op->getResultForDependency(i); + + // Remove edge in the `if` body, erases the current dependency too + then_op->eraseEdgeForQID(then_qid); + // Update iterator as number of dependencies has changed + + // Add new edge from after this `if` + auto successor = getSuccessorForQID(then_qid); + auto idx = successor->getDependencyForQID(then_qid).value(); + + newDeps.push_back(successor->dependencies[idx]); + successor->dependencies[idx] = DependencyEdge{then_op, resultidx}; + successor->dependencies[idx].qid = then_qid; + successor->dependencies[idx].qubit = then_qubit_opt; + then_op->successors.insert(successor); + + // Readd QID + then_op->qids.insert(then_qid); + i++; + } + + successors.insert(then_op); + then_op->dependencies = newDeps; + else_op->erase(); + delete else_op; + } + + /// Lifts equivalent operations from the then and else blocks to be added as a + /// dependency to this node. The lifted operation will have dependencies on + /// block argument replaced with the relevant dependencies from this `if` + /// node, and the relevant dependencies from this `if` node will be replaced + /// with the results from the lifted operation. + /// + /// Assumes \p then_op and \p else_op are equivalent quantum operations from + /// then_block and else_block respectively, without classical input or + /// results. + /// + /// This function is responsible for cleaning up \p then_use and \p else_use, + /// as well as any unused classical values depending on them after lifting. + void liftOpBefore(OpDependencyNode *then_op, OpDependencyNode *else_op) { + auto newDeps = SmallVector(); + + // Measure ops are a delicate special case because of the classical measure + // result. When lifting before, we can lift the discriminate op as well, + // but, the classical result is now free in the body of the if (assuming it + // was used) so we must add a shadow dependency on it. + // TODO: a similar problem can arise for classical results from lifted + // `if`s. + // This will cause bugs currently. The easy solution is to avoid + // lifting `if`s, and the trickier solution is to add shadow + // dependencies for, and properly clean up, arbitrary classical + // results for lifted operations. + if (isa(then_op->associated)) { + auto then_discriminate = then_op->successors.front()->isQuantumOp() + ? then_op->successors.back() + : then_op->successors.front(); + auto else_discriminate = else_op->successors.front()->isQuantumOp() + ? else_op->successors.back() + : else_op->successors.front(); + auto casted = static_cast(then_discriminate); + // Lifting the classical value requires adding a shadow dependency on it. + // TODO: only do so if the classical value is used (and clean it up if + // not). + auto newfreevar = new ShadowDependencyNode(casted, 0); + auto newEdge = DependencyEdge{newfreevar, 0}; + then_discriminate->replaceWith(newEdge); + else_discriminate->replaceWith(newEdge); + dependencies.push_back(newEdge); + freevars.insert(newfreevar); + + delete else_discriminate; + } + + // Construct new dependencies for then_op based on the dependencies for this + // `if` + for (unsigned i = 0; i < then_op->dependencies.size(); i++) { + auto dependency = then_op->dependencies[i]; + + if (freevars.contains(dependency.node)) { + // If the dependency is a free variable with this `if` as the frontier, + // then we can just use the value directly, instead of the shadowed + // value + auto shadowNode = static_cast(dependency.node); + auto edge = shadowNode->getShadowedEdge(); + newDeps.push_back(edge); + shadowNode->successors.remove(then_op); + // Remove shadowNode if it is no longer needed + if (shadowNode->successors.empty()) { + for (unsigned i = 0; i < dependencies.size(); i++) + if (dependencies[i].node == edge.node && + dependencies[i].resultidx == edge.resultidx) + dependencies.erase(dependencies.begin() + i); + freevars.remove(shadowNode); + delete shadowNode; + } + } else if (dependency->isLeaf() && dependency->isQuantumOp()) { + // The dependency is a block argument, and therefore reflects a + // dependency for this `if` First, find the relevant argument + ArgDependencyNode *arg = + static_cast(dependency.node); + auto num = arg->getArgNumber(); + // Then, get the dependency from this `if` for the relevant argument, + // this will be the new dependency for `then_op` + auto newDep = dependencies[num + 1]; + newDep->successors.remove(this); + newDep->successors.insert(then_op); + newDeps.push_back(newDep); + arg->successors.remove(then_op); + + // Replace the dependency with the relevant result from the lifted node + dependencies[num + 1] = + DependencyEdge{then_op, then_op->getResultForDependency(i)}; + + dependencies[num + 1].qubit = newDep.qubit; + + // Remove then_op from the route for then_qid inside the block + then_op->eraseEdgeForQID(dependency.qid.value()); + // Readd qid + then_op->qids.insert(dependency.qid.value()); + // Update iterator as number of dependencies has changed + i--; + } else if (!dependency->isQuantumOp()) { + newDeps.push_back(dependency); + } else { + assert( + false && + "Trying to lift a quantum operation before dependency was lifted"); + } + } + + else_op->erase(); + delete else_op; + + // Patch successors + then_op->successors.insert(this); + then_op->dependencies = newDeps; + } + + /// Lifts a qubit allocated in the then/else blocks to be allocated + /// in the graph \p parent containing this `if`. Adds a new linear argument, + /// depending on the lifted alloc, and corresponding result, depended on by + /// the lifted dealloc, to this `if`. If the phyiscal qubit is present in an + /// inner block, the inner alloc/de-alloc pair will be removed. Both blocks + /// will have the new argument/terminator dependency added, so the wire flows + /// through the block properly even if it is not used. + /// + /// Currently, all allocations are lifted from `if`s, so they can be combined + /// in the parent context. This is legal, as `if`s are treated as "solid + /// barriers" in the parent graph, so allocating before/after the `if` is + /// equivalent to allocating within the `if`. This effectively undoes + /// contractAllocsPass after having allocs be contracted for performing the + /// analysis is no longer helpful. + /// + /// This is not necessary to always perform, instead, one could only lift + /// allocs when lifting operations on those allocs, but it is very difficult + /// to do that safely. + /// + /// Assumes (and checks) that \p qubit is allocated in either/both the + /// then/else blocks. + /// + /// If the qubit is present in both child blocks, then the extra + /// alloc/de-alloc pair is cleaned up here. + void liftAlloc(PhysicalQID qubit, DependencyGraph *parent) { + InitDependencyNode *lifted_alloc = nullptr; + DependencyNode *lifted_root = nullptr; + + bool then_contains = false; + bool else_contains = false; + + auto then_graph = then_block->getBlockGraph(); + auto else_graph = else_block->getBlockGraph(); + + // Remove virtual allocs from inner blocks + if (else_block->getAllocatedQubits().contains(qubit)) { + lifted_alloc = else_graph->getAllocForQubit(qubit); + lifted_root = else_graph->getRootForQubit(qubit); + else_block->liftAlloc(lifted_alloc->getQID(), lifted_alloc); + else_contains = true; + } + + if (then_block->getAllocatedQubits().contains(qubit)) { + auto then_alloc = then_graph->getAllocForQubit(qubit); + auto then_root = then_graph->getRootForQubit(qubit); + // If the qubit is only in one block, use the alloc/dealloc pair + // from that block + if (!else_contains) { + lifted_alloc = then_alloc; + lifted_root = then_root; + } + + // lifted_alloc will be else_alloc if both blocks contain + // the qubit, so the metadata for the then_block graph + // will be updated correctly when replacing the alloc/dealloc + // with a block arg and terminator edge. + then_block->liftAlloc(then_alloc->getQID(), lifted_alloc); + then_contains = true; + + // Clean up extra alloc/root pair if both blocks contain + // the qubit + if (lifted_alloc != then_alloc) { + delete then_alloc; + delete then_root; + } + } + + assert(lifted_alloc && lifted_root && "Illegal qubit to lift!"); + + if (!then_contains) { + auto new_arg = then_block->addArgument(DependencyEdge{lifted_alloc, 0}); + auto terminator = then_block->getTerminator(); + terminator->dependencies.push_back(DependencyEdge{new_arg, 0}); + terminator->qids.insert(lifted_alloc->getQID()); + new_arg->successors.insert(terminator); + then_graph->replaceLeafAndRoot(lifted_alloc->getQID(), new_arg, + terminator); + } + + if (!else_contains) { + auto new_arg = else_block->addArgument(DependencyEdge{lifted_alloc, 0}); + auto terminator = else_block->getTerminator(); + terminator->dependencies.push_back(DependencyEdge{new_arg, 0}); + terminator->qids.insert(lifted_alloc->getQID()); + new_arg->successors.insert(terminator); + else_graph->replaceLeafAndRoot(lifted_alloc->getQID(), new_arg, + terminator); + } + + qids.insert(lifted_alloc->getQID()); + // Hook lifted_root to the relevant result wire from this + this->successors.insert(lifted_root); + auto out_edge = DependencyEdge{this, results.size()}; + out_edge.qid = lifted_alloc->getQID(); + out_edge.qubit = lifted_alloc->getQubit(); + lifted_root->dependencies.push_back(out_edge); + // Hook this to lifted_alloc by adding a new dependency for the lifted wire + DependencyEdge in_edge(lifted_alloc, 0); + in_edge.qid = lifted_alloc->getQID(); + in_edge.qubit = lifted_alloc->getQubit(); + dependencies.push_back(in_edge); + // Add a corresponding result wire for the lifted wire which will flow + // to lifted_root + results.push_back(in_edge.getValue().getType()); + // Hook lifted_alloc to this + lifted_alloc->successors.insert(this); + + // Add virtual alloc to current scope + parent->replaceLeafAndRoot(lifted_alloc->getQID(), lifted_alloc, + lifted_root); + } + + /// Combines physical allocations from the then and else branches + /// by pairing them together and possibly re-indexing while respecting + /// reuse decisions. + void combineAllocs(SetVector then_allocs, + SetVector else_allocs) { + SetVector combined; + combined.set_union(then_allocs); + combined.set_union(else_allocs); + + // Currently, respecting reuse is enforced by combining physical wires. + // TODO: can combine allocs in much smarter ways, possibly with heuristics, + // to do a better job of finding lifting opportunities. + // To do so, would need to implement re-indexing (with the current + // implementation using just a single wire per physical qubit, this + // could be done easily with an updateQubit function like updateQID). + } + + void genOp(OpBuilder &builder) override { + cudaq::cc::IfOp oldOp = dyn_cast(associated); + + auto operands = gatherOperands(builder); + + // Remove operands from shadow dependencies + // First operand must be conditional, skip it + for (unsigned i = 1; i < operands.size(); i++) { + if (!quake::isQuantumType(operands[i].getType())) { + operands.erase(operands.begin() + i); + i--; + } + } + + auto newIf = + builder.create(oldOp->getLoc(), results, operands); + auto *then_region = &newIf.getThenRegion(); + then_block->codeGen(builder, then_region); + + auto *else_region = &newIf.getElseRegion(); + else_block->codeGen(builder, else_region); + + associated = newIf; + builder.setInsertionPointAfter(associated); + } + + std::optional getQIDForResult(std::size_t resultidx) override { + return then_block->getQIDForResult(resultidx); + } + +public: + IfDependencyNode(cudaq::cc::IfOp op, + SmallVector _dependencies, + DependencyBlock *then_block, DependencyBlock *else_block, + SetVector _freevars) + : OpDependencyNode(op.getOperation(), _dependencies), + then_block(then_block), else_block(else_block) { + for (auto freevar : _freevars) { + dependencies.push_back(freevar->getShadowedEdge()); + freevars.insert(freevar); + } + + results = SmallVector(op.getResultTypes()); + // Unfortunately, some metadata won't be computed properly by + // OpDependencyNode constructor, so recompute here + height = 0; + for (auto edge : dependencies) { + if (edge->getHeight() > height) + height = edge->getHeight(); + if (edge.qid.has_value() && isQuantumOp()) + qids.insert(edge.qid.value()); + } + height += numTicks(); + } + + ~IfDependencyNode() override { + delete then_block; + delete else_block; + } + + unsigned numTicks() override { + return std::max(then_block->getHeight(), else_block->getHeight()); + } + + bool isSkip() override { return numTicks() == 0; } + + bool isQuantumOp() override { return numTicks() > 0; } + + bool isContainer() override { return true; } + + SetVector getQubits() override { + auto qubits = SetVector(); + qubits.set_union(then_block->getQubits()); + qubits.set_union(else_block->getQubits()); + return qubits; + } + + void contractAllocsPass(unsigned &next_qid) override { + then_block->contractAllocsPass(next_qid); + else_block->contractAllocsPass(next_qid); + } + + /// Removes \p qid (and associated args/terminator dependencies) + /// from the inner blocks of the `if`. Also removes this `if` + /// from the dependency path for \p qid. + /// The expectation is that \p qid flows + /// through both the then and else blocks of this `if` + void eraseEdgeForQID(VirtualQID qid) override { + // First, calculate which result to remove, but don't remove it yet + unsigned offset = 0; + for (; offset < results.size(); offset++) + if (getQIDForResult(offset) == qid) + break; + + // Erase the actual edge with the blocks now set up properly + this->OpDependencyNode::eraseEdgeForQID(qid); + + // Now, remove the QID from the blocks so that the blocks are set up + // properly + then_block->removeQID(qid); + else_block->removeQID(qid); + + // Finally, remove the calculated result, which can no longer be calculated + // because it was removed from the blocks + results.erase(results.begin() + offset); + + // Since we're removing a result, update the result indices of successors + for (auto successor : successors) + for (unsigned j = 0; j < successor->dependencies.size(); j++) + if (successor->dependencies[j].node == this && + successor->dependencies[j].resultidx >= offset) + successor->dependencies[j].resultidx--; + } + + /// Finds and lifts common operations from the then and else branches to the + /// parent scope. This is an optimization that a) potentially reduces the + /// height of `if`s, and b) allows parent graphs to make more informed + /// scheduling and reuse decisions, as information previously hidden by the + /// "solid barrier" abstraction of `if`s is now available to them. + /// + /// Operations are considered equivalent if the operations themselves are + /// equivalent, and all physical/virtual wires passed as operands are + /// equivalent. Since wires from the parent context may still be virtual, it + /// is important to distinguish physical vs virtual wires when checking + /// equivalence. + /// + /// Lifting operations will likely change the schedule of the + /// then and else blocks, so it is important to ensure that this schedule + /// change does not create conflicts where the same physical qubit is now + /// used by multiple operations at the same cycle. This can be ensured + /// either by avoiding lifting if it would lead to such a conflict, or by + /// somehow ensuring that the resulting schedule is still valid. + /// Similarly, lifting operations that use physical qubits allocated in + /// the then and else blocks requires lifting the physical qubit + /// allocation as well, which may present problems if the same physical + /// qubit is reused in the block and thus allocated again. + /// + /// The current implementation solves this by coupling virtual wires to + /// form a single physical wire, which means that reusing a physical qubit + /// will introduce a dependency on the previous use of the qubit. + /// Since scheduling ensures that a node cannot be scheduled at the same + /// cycle as its dependencies, this ensures a reused physical wire will + /// still only be used AFTER the previous use. Then, because there is + /// is only one physical wire, even with reuses, lifting the physical + /// qubit allocation from the inner blocks is no problem. + /// See `DpendencyBlock::allocatePhyiscalQubits` for more details on the + /// current implementation. + /// + /// This approach prioritizes qubit reuse over potential circuit-length + /// reduction from lifting, other approaches with other tradeoffs have + /// yet to be explored. + void performLiftingPass() { + bool lifted = false; + + // Currently, inner allocated qubits are always lifted in `performAnalysis`, + // so this code is unnecessary. + // If that becomes undesirable, uncomment the following code to allow + // lifting of inner allocated qubits. for (auto qubit : getQubits()) { + // if (!then_block->getAllocatedQubits().contains(qubit) || + // !else_block->getAllocatedQubits().contains(qubit)) + // continue; + // auto then_use = then_block->getFirstUseOfQubit(qubit); + // auto else_use = else_block->getFirstUseOfQubit(qubit); + + // if (tryLiftingBefore(then_use, else_use, parent)) { + // lifted = true; + // continue; + // } + + // then_use = then_block->getLastUseOfQubit(qubit); + // else_use = else_block->getLastUseOfQubit(qubit); + + // if (tryLiftingAfter(then_use, else_use, parent)) { + // lifted = true; + // continue; + // } + // } + + // All qubits are lifted, so we can focus on lifting the QIDs flowing + // through this `if` + bool run_more = true; + auto unliftableQIDs = SetVector(); + + // Lifting operations may reveal more liftable operations! + while (run_more) { + run_more = false; + auto liftableQIDs = SetVector(qids); + liftableQIDs.set_subtract(unliftableQIDs); + + for (auto qid : liftableQIDs) { + if (!then_block->getQIDs().contains(qid) || + !else_block->getQIDs().contains(qid)) { + unliftableQIDs.insert(qid); + continue; + } + + auto then_graph = then_block->getBlockGraph(); + auto else_graph = else_block->getBlockGraph(); + + auto then_use = then_graph->getFirstUseOfQID(qid); + auto else_use = else_graph->getFirstUseOfQID(qid); + + if (!then_use || !else_use) { + // QID is no longer referenced in the if, erase it + // TODO: if this `if` has no more uses, clean it up + if (!then_use && !else_use) + eraseEdgeForQID(qid); + unliftableQIDs.insert(qid); + continue; + } + + if (tryLiftingBefore(then_use, else_use)) { + lifted = true; + run_more = true; + continue; + } + + then_use = then_graph->getLastUseOfQID(qid); + else_use = else_graph->getLastUseOfQID(qid); + + if (tryLiftingAfter(then_use, else_use)) { + lifted = true; + run_more = true; + continue; + } + } + } + + // Recompute inner block metadata after lifting + if (lifted) { + then_block->updateHeight(); + else_block->updateHeight(); + then_block->schedulingPass(); + else_block->schedulingPass(); + } + } + + /// Performs the analysis and optimizations on this `if` statement inside out: + /// * First, recurs on the then and else blocks, resolving inner `if`s , + /// performing scheduling, and making qubit allocation/reuse decisions. + /// * Physical qubit allocations from the two blocks are combined, respecting + /// reuse but allowing re-indexing. + /// * Equivalent operations are lifted from the beginning/end of the blocks. + /// + /// In the current implementation, after the physical qubit allocations are + /// combined, they are lifted from the inner block to the parent scope of the + /// `if`. This is necessary due to the implementation decision to couple + /// wires that reuse the same physical qubit. Since `if`s are treated as a + /// "solid rectangle" by the parent scope, this does not have any + /// particular downsides at the moment, as it does not change the lifetime + /// of the qubit in the parent scope. + void performAnalysis(LifeTimeAnalysis &set, + DependencyGraph *parent_graph) override { + // Recur first, as analysis works inside-out + then_block->performAnalysis(set); + // Capture allocations from then_block analysis + auto pqids1 = set.clearFrame(); + else_block->performAnalysis(set); + // Capture allocations from else_block analysis + auto pqids2 = set.clearFrame(); + + // Combine then and else allocations + combineAllocs(pqids1, pqids2); + + // Lift all physical allocations out of the if + auto allocs = then_block->getAllocatedQubits(); + allocs.set_union(else_block->getAllocatedQubits()); + + for (auto qubit : allocs) + liftAlloc(qubit, parent_graph); + + // Lift equivalent operations between then and else blocks + performLiftingPass(); + } + + /// Move a virtual wire allocated and de-allocated (but not used!) from an + /// outer scope to be allocated and de-allocated within both the then and else + /// blocks. + /// + /// As a result, removes the dependency on, and result for, \p qid from this + /// node. + void lowerAlloc(DependencyNode *init, DependencyNode *root, VirtualQID qid, + unsigned &next_qid) override { + assert(successors.contains(root) && "Illegal root for contractAlloc"); + assert(init->successors.contains(this) && "Illegal init for contractAlloc"); + root->dependencies.erase(root->dependencies.begin()); + init->successors.clear(); + successors.remove(root); + auto alloc = static_cast(init); + auto alloc_copy = new InitDependencyNode(*alloc); + auto dealloc = static_cast(root); + auto dealloc_copy = new RootDependencyNode(*dealloc); + std::size_t offset = getDependencyForQID(qid).value(); + associated->eraseOperand(offset); + + for (unsigned i = 0; i < results.size(); i++) + if (getQIDForResult(i) == qid) + results.erase(results.begin() + i); + + dependencies.erase(dependencies.begin() + offset); + then_block->lowerAlloc(alloc, root, qid); + else_block->lowerAlloc(alloc_copy, dealloc_copy, qid); + + // If else_block actually uses the qid, update it using the unique qid + // counter next_qid to ensure uniqueness of the qid as we copy it from + // the then block to the else block. + // TODO: only really need to do this if both blocks contain the qid. + if (else_block->getQIDs().contains(qid)) + else_block->getBlockGraph()->updateQID(qid, next_qid++); + qids.remove(qid); + + // Since we're removing a result, update the result indices of successors + for (auto successor : successors) + for (unsigned i = 0; i < successor->dependencies.size(); i++) + if (successor->dependencies[i].node == this && + successor->dependencies[i].resultidx >= offset) + successor->dependencies[i].resultidx--; + } + + /// Recursively replaces \p old_qid with \p new_qid for this node and its + /// successors. For an `if`, this will also perform the replacement in the + /// then and else blocks. + void updateQID(VirtualQID old_qid, VirtualQID new_qid) override { + then_block->getBlockGraph()->updateQID(old_qid, new_qid); + else_block->getBlockGraph()->updateQID(old_qid, new_qid); + this->DependencyNode::updateQID(old_qid, new_qid); + } +}; + +/// Validates that \p op meets the assumptions: +/// * operations are in linear value semantics +/// * control flow operations (except `if`s) are not allowed +/// * memory stores may be rearranged (this is not a hard error) +bool validateOp(Operation *op) { + if (isQuakeOperation(op) && !quake::isLinearValueForm(op) && + !isa(op)) { + op->emitRemark("DependencyAnalysisPass: requires all quake operations to " + "be in value form. Function will be skipped"); + return false; + } + + if (op->getRegions().size() != 0 && !isa(op)) { + op->emitRemark("DependencyAnalysisPass: loops are not supported. Function " + "will be skipped"); + return false; + } + + if (isa(op)) { + op->emitRemark("DependencyAnalysisPass: branching operations are not " + "supported. Function will be skipped"); + return false; + } + + if (isa(op)) { + op->emitRemark("DependencyAnalysisPass: function calls are not supported. " + "Function will be skipped"); + return false; + } + + if (hasEffect(op) && !isQuakeOperation(op)) { + op->emitWarning("DependencyAnalysisPass: memory stores are volatile and " + "may be reordered"); + } + + if (isa(op)) { + op->emitRemark( + "DependencyAnalysisPass: `quake.borrow_wire` is only " + "supported qubit allocation operation. Function will be skipped"); + return false; + } + + return true; +} + +/// Validates that \p func meets the assumptions: +/// * function bodies contain a single block +[[maybe_unused]] bool validateFunc(func::FuncOp func) { + if (func.getBlocks().size() != 1) { + func.emitRemark("DependencyAnalysisPass: multiple blocks are not " + "supported. Function will be skipped"); + return false; + } + + // TODO: function arguments aren't really supported properly + // in places like `OpDependencyNode::erase` or when handling + // shadow dependencies, especially classical arguments. + // I think function arguments shouldn't be supported and a + // check should be made here, though the above issues could + // be addressed and then they may be supported ok. + + return true; +} + +class DependencyAnalysisEngine { +private: + SmallVector perOp; + DenseMap argMap; + SmallVector ifStack; + DenseMap> freeClassicals; + unsigned vallocs; + +public: + DependencyAnalysisEngine() + : perOp({}), argMap({}), ifStack({}), freeClassicals({}), vallocs(0) {} + + /// Creates a new dependency block for \p b by constructing a dependency graph + /// for the body of \p b starting from the block terminator. + /// + /// Any operation not somehow connected to the block terminator (this will + /// only happen if the entirety of a wire is irrelevant to the block + /// terminator, in which case it is considered to be dead code) + DependencyBlock * + visitBlock(mlir::Block *b, + SmallVector dependencies) { + SmallVector argdnodes; + for (auto targ : b->getArguments()) { + ArgDependencyNode *dnode; + // Entry block has no argument dependencies + if (dependencies.size() > 0) + dnode = + new ArgDependencyNode(targ, dependencies[targ.getArgNumber() + 1]); + else + dnode = new ArgDependencyNode(targ); + argMap[targ] = dnode; + argdnodes.push_back(dnode); + } + + DenseMap roots; + TerminatorDependencyNode *terminator = nullptr; + for (auto &op : b->getOperations()) { + bool isTerminator = (&op == b->getTerminator()); + auto node = visitOp(&op, isTerminator); + + if (!node) + return nullptr; + + if (isa(&op)) + roots[node] = &op; + + if (isTerminator) { + assert(op.hasTrait() && + "Illegal terminator op!"); + terminator = static_cast(node); + } + } + + DependencyGraph *new_graph = new DependencyGraph(terminator); + auto included = new_graph->getRoots(); + + // In debug mode, alert about dead code wires + // TODO: If an unused wire flows through an `if` with a useful wire, + // then the unused wire is considered useful as the parent context + // doesn't know that it doesn't interact with anything inside the if, + // it would be nice to have a "hasInteraction" predicate inside `if`s + // to be able to detect this case and do a better job of removing + // unused wires. + // + // In fact, it may be possible to completely split `if`s into various + // non-interacting sub-graphs, which may make solving this problem + // easier, and may or may not present more optimization opportunities. + // TODO: clean up memory for unused wires. + // Adam: I think this could be done in a silly way by placing the root + // in a new graph, and then deleting the graph should clean up all + // the nodes for the wire. + LLVM_DEBUG(for (auto [root, op] + : roots) { + if (!included.contains(root)) { + llvm::dbgs() + << "DependencyAnalysisPass: Wire is dead code and its " + << "operations will be deleted (did you forget to return a value?)" + << root << "\n"; + } + }); + + return new DependencyBlock(argdnodes, new_graph, b, terminator); + } + + /// Creates and returns a new dependency node for \p op, connecting it to the + /// nodes created for the defining operations of the operands of \p op + DependencyNode *visitOp(Operation *op, bool isTerminator) { + if (!validateOp(op)) + return nullptr; + + SmallVector dependencies; + for (unsigned i = 0; i < op->getNumOperands(); i++) + dependencies.push_back(visitValue(op->getOperand(i))); + + DependencyNode *newNode; + + if (auto init = dyn_cast(op)) { + newNode = new InitDependencyNode(init); + vallocs++; + } else if (auto sink = dyn_cast(op)) { + newNode = new RootDependencyNode(sink, dependencies); + } else if (auto ifop = dyn_cast(op)) { + freeClassicals[op] = SetVector(); + ifStack.push_back(op); + auto then_block = visitBlock(ifop.getThenEntryBlock(), dependencies); + auto else_block = visitBlock(ifop.getElseEntryBlock(), dependencies); + if (!then_block || !else_block) + return nullptr; + ifStack.pop_back(); + + SetVector freeIn = freeClassicals[op]; + freeClassicals.erase(freeClassicals.find(op)); + + newNode = new IfDependencyNode(ifop, dependencies, then_block, else_block, + freeIn); + } else if (isTerminator) { + newNode = new TerminatorDependencyNode(op, dependencies); + } else { + newNode = new OpDependencyNode(op, dependencies); + } + + // Dnodeid is the next slot of the dnode vector + auto id = perOp.size(); + + // Add dnodeid attribute + OpBuilder builder(op); + op->setAttr("dnodeid", builder.getUI32IntegerAttr(id)); + perOp.push_back(newNode); + + return newNode; + } + + /// Returns the dependency node for the defining operation of \p v + /// Assumption: defining operation for \p v exists and already has been + /// visited. + /// + /// If \p v a classical value from a different scope, allocates a + /// ShadowDependencyNode, and adds it to the frontier of the parent node + /// of the use of \p v that is at the same scope as \p v + DependencyNode::DependencyEdge visitValue(Value v) { + if (auto barg = dyn_cast(v)) + return DependencyNode::DependencyEdge{argMap[barg], 0}; + + auto defOp = v.getDefiningOp(); + assert(defOp && + "Cannot handle value that is neither a BlockArgument nor OpResult"); + + auto resultidx = dyn_cast(v).getResultNumber(); + + // Since we walk forward through the ast, every value should be defined + // before it is used, so we should have already visited defOp, + // and thus should have a memoized dnode for defOp, fail if not + assert(defOp->hasAttr("dnodeid") && "No dnodeid found for operation"); + + auto id = defOp->getAttr("dnodeid").cast().getUInt(); + auto dnode = perOp[id]; + + if (!ifStack.empty() && defOp->getParentOp() != ifStack.back() && + dnode->isQuantumDependent()) { + auto opdnode = static_cast(dnode); + auto shadow_node = new ShadowDependencyNode{opdnode, resultidx}; + + auto parent = ifStack.back(); + + while (parent->getParentOp() != defOp->getParentOp()) + parent = parent->getParentOp(); + + freeClassicals[parent].insert(shadow_node); + + return DependencyNode::DependencyEdge{shadow_node, resultidx}; + } + + return DependencyNode::DependencyEdge{dnode, resultidx}; + } + + /// Cleans up semi-constructed dependency graph when backing out of running + /// DependencyAnalysis because of an encountered error + void clean() { + // TODO: clean up nodes + // Adam: can use perOps, have to be careful about nested nodes though + } + + unsigned getNumVirtualAllocs() { return vallocs; } +}; + +struct DependencyAnalysisPass + : public cudaq::opt::impl::DependencyAnalysisBase { + using DependencyAnalysisBase::DependencyAnalysisBase; + + /// DependencyAnalysis constructs a data structure representing the + /// quake code, performs several analyses/optimizations, and then generates + /// new quake code based on the resulting data structure. + /// + /// First, the quake code is walked, constructing a DependencyBlock for the + /// body of every kernel function. + /// + /// Next, virtual qubit allocations are lowered to the inner-most scope in + /// which they are used (see `contractAllocsPass`). This works outside-in. + /// Lowering virtual qubit allocations opens up more qubit reuse opportunities + /// within inner scopes, and gives the remaining optimizations more + /// flexibility. + /// + /// Then, an inside-out analysis/optimization pass is performed (see + /// `performAnalysis` in `IfDependencyNode` and `DependencyBlock`), assigning + /// physical qubits to virtual wires, and lifting common optimizations. This + /// inside-out analysis/optimization pass works as follows: + /// - Step 1: `if`s inside blocks are resolved (hence inside-out), once an + /// `if` is resolved, the information contained by it will not be changed + /// (with the possible exception of re-indexing physical qubits). + /// - Step 2: blocks are resolved. + /// * First the nodes inside them are scheduled, assigning a cycle to every + /// node such that each node is scheduled after all of its dependencies, and + /// before all of its successors (see `DependencyGraph::schedule`). + /// * Then, physical qubits are allocated and assigned to virtual wires based + /// on lifetime information (i.e., which cycles the virtual wire is used + /// in). This algorithm treats `if`s as "solid rectangles", where all qubits + /// in use anywhere in the `if` are considered in use for the entire `if` by + /// the parent scope. In other words, the lifetime analysis does not look + /// inside `if`s (see `DependencyBlock::allocatePhysicalQubits`). + /// - Step 3: Once its blocks are resolved, then the parent `if` is resolved. + /// * First, the allocations from the two inner blocks are combined/matched + /// (respecting reuse within the blocks but with re-indexing allowed) (see + /// `IfDependencyNode::combineAllocs`). + /// * Second, equivalent operations at the beginning/end of the then and else + /// blocks are lifted to the parent context, before/after the `if` (see + /// `IfDependencyNode::performLifting`). + /// - Step 4: return to Step 1 for the parent block the `if` is in, with an + /// additional inner `if` resolved + /// + /// Finally, quake code is re-generated based on the resulting + /// `DependencyBlock. + void runOnOperation() override { + auto mod = getOperation(); + + for (auto &op : mod) { + if (auto func = dyn_cast(op)) { + if (!func->hasAttr("cudaq-kernel")) + continue; + + if (func.getNumResults() == 0) { + func->emitRemark("Function marked 'cudaq-kernel' returns no results, " + "qubit management skipped."); + continue; + } + + validateFunc(func); + + Block *oldBlock = &func.front(); + + auto engine = DependencyAnalysisEngine(); + + // Construct a DependencyBlock for the function body based on the quake + // AST + auto body = engine.visitBlock( + oldBlock, SmallVector()); + + if (!body) { + LLVM_DEBUG( + llvm::dbgs() + << "Encountered issue, backing out and skipping function\n"); + engine.clean(); + continue; + } + + auto vallocs = engine.getNumVirtualAllocs(); + + OpBuilder builder(func); + LifeTimeAnalysis set; + // First, move allocs in as deep as possible. This is outside-in, so it + // is separated from the rest of the analysis passes. + body->contractAllocsPass(vallocs); + // Next, do the scheduling, lifetime analysis/allocation, and lifting + // passes inside-out + body->performAnalysis(set); + // Finally, perform code generation to move back to quake + body->codeGen(builder, &func.getRegion()); + + // TODO: Various pass statistics are accessible via the following: + // * Total number of virtual qubits (included eliminated dead wires): + // `engine.getNumVirtualAllocs()` + // * Total number of physical qubits: + // `set.getCount()` + // * Total number of cycles (make call before contractAllocsPass and + // after performAnalysis to see before/after): + // `body->getHeight()` + + delete body; + // Replace old block + oldBlock->erase(); + } + } + } +}; + +} // namespace diff --git a/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp b/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp index 1c3ec42a1c..ee12eb4efd 100644 --- a/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp +++ b/lib/Optimizer/Transforms/GenDeviceCodeLoader.cpp @@ -8,65 +8,33 @@ #include "PassDetails.h" #include "cudaq/Frontend/nvqpp/AttributeNames.h" -#include "cudaq/Optimizer/Builder/Factory.h" +#include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/Builder/Runtime.h" +#include "cudaq/Optimizer/CallGraphFix.h" #include "cudaq/Optimizer/Dialect/CC/CCOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ToolOutputFile.h" -#include "mlir/Analysis/CallGraph.h" #include "mlir/IR/Diagnostics.h" #include "mlir/Transforms/Passes.h" +namespace cudaq::opt { +#define GEN_PASS_DEF_GENERATEDEVICECODELOADER +#include "cudaq/Optimizer/Transforms/Passes.h.inc" +} // namespace cudaq::opt + #define DEBUG_TYPE "device-code-loader" using namespace mlir; -namespace llvm { -// FIXME: `GraphTraits` specialization for `const mlir::CallGraphNode *` in -// "mlir/Analysis/CallGraph.h" has a bug. -// In particular, `GraphTraits` typedef'ed `NodeRef -// -> mlir::CallGraphNode *`, (without `const`), causing problems when using -// `mlir::CallGraphNode` with graph iterator (e.g., `llvm::df_iterator`). The -// entry node getter has the signature `NodeRef getEntryNode(NodeRef node)`, -// i.e., `mlir::CallGraphNode * getEntryNode(mlir::CallGraphNode * node)`; but a -// graph iterator for `const mlir::CallGraphNode *` will pass a `const -// mlir::CallGraphNode *` to that `getEntryNode` function => compile error. -// Here, we define a non-const overload, which hasn't been defined, to work -// around that issue. -// -// Note: this isn't an issue for the whole `mlir::CallGraph` graph, i.e., -// `GraphTraits`. `getEntryNode` is defined as -// `getExternalCallerNode`, which is a const method of `mlir::CallGraph`. - -template <> -struct GraphTraits { - using NodeRef = mlir::CallGraphNode *; - static NodeRef getEntryNode(NodeRef node) { return node; } - - static NodeRef unwrap(const mlir::CallGraphNode::Edge &edge) { - return edge.getTarget(); - } - using ChildIteratorType = - mapped_iterator; - static ChildIteratorType child_begin(NodeRef node) { - return {node->begin(), &unwrap}; - } - static ChildIteratorType child_end(NodeRef node) { - return {node->end(), &unwrap}; - } -}; -} // namespace llvm - namespace { -class GenerateDeviceCodeLoader - : public cudaq::opt::GenerateDeviceCodeLoaderBase< - GenerateDeviceCodeLoader> { +class GenerateDeviceCodeLoaderPass + : public cudaq::opt::impl::GenerateDeviceCodeLoaderBase< + GenerateDeviceCodeLoaderPass> { public: - GenerateDeviceCodeLoader() = default; - GenerateDeviceCodeLoader(bool genAsQuake) { generateAsQuake = genAsQuake; } + using GenerateDeviceCodeLoaderBase::GenerateDeviceCodeLoaderBase; void runOnOperation() override { auto module = getOperation(); @@ -83,13 +51,16 @@ class GenerateDeviceCodeLoader if (generateAsQuake) { // Add declaration of deviceCodeHolderAdd builder.create( - loc, "deviceCodeHolderAdd", + loc, cudaq::runtime::deviceCodeHolderAdd, LLVM::LLVMFunctionType::get( cudaq::opt::factory::getVoidType(ctx), {cudaq::opt::factory::getPointerType(ctx), cudaq::opt::factory::getPointerType(ctx)})); } + auto mangledNameMap = + module->getAttrOfType(cudaq::runtime::mangledNameMap); + // Collect all function declarations to forward as part of each Module. // These are thrown in so the Module's CallOps are complete. Unused // declarations are just thrown away when the code is JIT compiled. @@ -115,106 +86,140 @@ class GenerateDeviceCodeLoader // Create a call graph to track kernel dependency. mlir::CallGraph callGraph(module); for (auto &op : *module.getBody()) { - // FIXME: May not be a FuncOp in the future. - if (auto funcOp = dyn_cast(op)) { - if (!funcOp.getName().startswith(cudaq::runtime::cudaqGenPrefixName)) - continue; - if (funcOp->hasAttr(cudaq::generatorAnnotation)) - continue; - auto className = - funcOp.getName().drop_front(cudaq::runtime::cudaqGenPrefixLength); - LLVM_DEBUG(llvm::dbgs() << "processing function " << className << '\n'); - // Generate LLVM-IR dialect to register the device code loading. - std::string thunkName = className.str() + ".thunk"; - std::string funcCode; - llvm::raw_string_ostream strOut(funcCode); - OpPrintingFlags opf; - opf.enableDebugInfo(/*enable=*/true, - /*pretty=*/false); - strOut << "module attributes " << module->getAttrDictionary() << " { "; - - // We'll also need any non-inlined functions that are - // called by our cudaq kernel - // Set of dependent kernels that we've included. - // Note: the `CallGraphNode` does include 'this' function. - mlir::CallGraphNode *node = - callGraph.lookupNode(funcOp.getCallableRegion()); - // Iterate over all dependent kernels starting at this node. - for (auto it = llvm::df_begin(node), itEnd = llvm::df_end(node); - it != itEnd; ++it) { - // Only consider those that are defined in this module. - if (!it->isExternal()) { - auto *callableRegion = it->getCallableRegion(); - auto parentFuncOp = - callableRegion->getParentOfType(); - LLVM_DEBUG(llvm::dbgs() << " Adding dependent function " - << parentFuncOp->getName() << '\n'); - parentFuncOp.print(strOut, opf); - strOut << '\n'; - } - } + auto funcOp = dyn_cast(op); + if (!funcOp) + continue; + if (!funcOp.getName().startswith(cudaq::runtime::cudaqGenPrefixName)) + continue; + if (funcOp->hasAttr(cudaq::generatorAnnotation) || funcOp.empty()) + continue; + auto className = + funcOp.getName().drop_front(cudaq::runtime::cudaqGenPrefixLength); + LLVM_DEBUG(llvm::dbgs() << "processing function " << className << '\n'); + // Generate LLVM-IR dialect to register the device code loading. + std::string thunkName = className.str() + ".thunk"; + std::string funcCode; + llvm::raw_string_ostream strOut(funcCode); + OpPrintingFlags opf; + opf.enableDebugInfo(/*enable=*/true, + /*pretty=*/false); + strOut << "module attributes " << module->getAttrDictionary() << " { "; - // Include the generated kernel thunk if present since it is on the - // callee side of the launchKernel() callback. - if (auto *thunkFunc = module.lookupSymbol(thunkName)) { - LLVM_DEBUG(llvm::dbgs() << "found thunk function\n"); - strOut << *thunkFunc << '\n'; + // We'll also need any non-inlined functions that are + // called by our cudaq kernel + // Set of dependent kernels that we've included. + // Note: the `CallGraphNode` does include 'this' function. + mlir::CallGraphNode *node = + callGraph.lookupNode(funcOp.getCallableRegion()); + // Iterate over all dependent kernels starting at this node. + for (auto it = llvm::df_begin(node), itEnd = llvm::df_end(node); + it != itEnd; ++it) { + // Only consider those that are defined in this module. + if (!it->isExternal()) { + auto *callableRegion = it->getCallableRegion(); + auto parentFuncOp = + callableRegion->getParentOfType(); + LLVM_DEBUG(llvm::dbgs() << " Adding dependent function " + << parentFuncOp->getName() << '\n'); + parentFuncOp.print(strOut, opf); + strOut << '\n'; } - if (auto *zeroDynRes = - module.lookupSymbol("__nvqpp_zeroDynamicResult")) { - LLVM_DEBUG(llvm::dbgs() << "found zero dyn result function\n"); - strOut << *zeroDynRes << '\n'; + } + + // Include the generated kernel thunk if present since it is on the + // callee side of the launchKernel() callback. + if (auto *thunkFunc = module.lookupSymbol(thunkName)) { + LLVM_DEBUG(llvm::dbgs() << "found thunk function\n"); + strOut << *thunkFunc << '\n'; + } + if (auto *zeroDynRes = module.lookupSymbol("__nvqpp_zeroDynamicResult")) { + LLVM_DEBUG(llvm::dbgs() << "found zero dyn result function\n"); + strOut << *zeroDynRes << '\n'; + } + if (auto *createDynRes = + module.lookupSymbol("__nvqpp_createDynamicResult")) { + LLVM_DEBUG(llvm::dbgs() << "found create dyn result function\n"); + strOut << *createDynRes << '\n'; + } + + // Conservatively, include all declarations. (Unreferenced ones can be + // erased with a symbol DCE.) + for (auto *op : declarations) + strOut << *op << '\n'; + strOut << "\n}\n" << '\0'; + + auto devCode = builder.create( + loc, cudaq::opt::factory::getStringType(ctx, funcCode.size()), + /*isConstant=*/true, LLVM::Linkage::Private, + className.str() + "CodeHolder.extract_device_code", + builder.getStringAttr(funcCode), /*alignment=*/0); + auto devName = builder.create( + loc, cudaq::opt::factory::getStringType(ctx, className.size() + 1), + /*isConstant=*/true, LLVM::Linkage::Private, + className.str() + "CodeHolder.extract_device_name", + builder.getStringAttr(className.str() + '\0'), /*alignment=*/0); + auto initFun = builder.create( + loc, className.str() + ".init_func", + LLVM::LLVMFunctionType::get(cudaq::opt::factory::getVoidType(ctx), + {})); + auto insPt = builder.saveInsertionPoint(); + auto *initFunEntry = initFun.addEntryBlock(); + builder.setInsertionPointToStart(initFunEntry); + auto devRef = builder.create( + loc, cudaq::opt::factory::getPointerType(devName.getType()), + devName.getSymName()); + auto codeRef = builder.create( + loc, cudaq::opt::factory::getPointerType(devCode.getType()), + devCode.getSymName()); + auto castDevRef = builder.create( + loc, cudaq::opt::factory::getPointerType(ctx), devRef); + auto castCodeRef = builder.create( + loc, cudaq::opt::factory::getPointerType(ctx), codeRef); + builder.create(loc, std::nullopt, + cudaq::runtime::deviceCodeHolderAdd, + ValueRange{castDevRef, castCodeRef}); + + auto kernName = funcOp.getSymName().str(); + if (!jitTime && mangledNameMap && !mangledNameMap.empty() && + mangledNameMap.contains(kernName)) { + auto hostFuncNameAttr = mangledNameMap.getAs(kernName); + auto hostFuncName = hostFuncNameAttr.getValue(); + auto hostFuncOp = module.lookupSymbol(hostFuncName); + if (!hostFuncOp) { + // Using a fake type. We just want the symbol of an artifact defined + // in host code. We're not calling this function. + hostFuncOp = + cudaq::opt::factory::createFunction(hostFuncName, {}, {}, module); + hostFuncOp.setPrivate(); } - if (auto *createDynRes = - module.lookupSymbol("__nvqpp_createDynamicResult")) { - LLVM_DEBUG(llvm::dbgs() << "found create dyn result function\n"); - strOut << *createDynRes << '\n'; + auto ptrTy = cudaq::cc::PointerType::get(builder.getI8Type()); + auto entryRef = builder.create( + loc, hostFuncOp.getFunctionType(), hostFuncOp.getSymName()); + auto castEntryRef = + builder.create(loc, ptrTy, entryRef); + auto deviceRef = builder.create( + loc, funcOp.getFunctionType(), funcOp.getSymName()); + auto castDeviceRef = + builder.create(loc, ptrTy, deviceRef); + auto castKernNameRef = + builder.create(loc, ptrTy, devRef); + + cudaq::IRBuilder irBuilder(builder); + if (failed(irBuilder.loadIntrinsic( + module, cudaq::runtime::registerLinkableKernel))) { + signalPassFailure(); } - for (auto *op : declarations) - strOut << *op << '\n'; - strOut << "\n}\n" << '\0'; - auto devCode = builder.create( - loc, cudaq::opt::factory::getStringType(ctx, funcCode.size()), - /*isConstant=*/true, LLVM::Linkage::Private, - className.str() + "CodeHolder.extract_device_code", - builder.getStringAttr(funcCode), /*alignment=*/0); - auto devName = builder.create( - loc, cudaq::opt::factory::getStringType(ctx, className.size() + 1), - /*isConstant=*/true, LLVM::Linkage::Private, - className.str() + "CodeHolder.extract_device_name", - builder.getStringAttr(className.str() + '\0'), /*alignment=*/0); - auto initFun = builder.create( - loc, className.str() + ".init_func", - LLVM::LLVMFunctionType::get(cudaq::opt::factory::getVoidType(ctx), - {}), - LLVM::Linkage::External); - auto insPt = builder.saveInsertionPoint(); - auto *initFunEntry = initFun.addEntryBlock(); - builder.setInsertionPointToStart(initFunEntry); - auto devRef = builder.create( - loc, cudaq::opt::factory::getPointerType(devName.getType()), - devName.getSymName()); - auto codeRef = builder.create( - loc, cudaq::opt::factory::getPointerType(devCode.getType()), - devCode.getSymName()); - auto castDevRef = builder.create( - loc, cudaq::opt::factory::getPointerType(ctx), devRef); - auto castCodeRef = builder.create( - loc, cudaq::opt::factory::getPointerType(ctx), codeRef); - builder.create(loc, std::nullopt, "deviceCodeHolderAdd", - ValueRange{castDevRef, castCodeRef}); - builder.create(loc, ValueRange{}); - builder.restoreInsertionPoint(insPt); - cudaq::opt::factory::createGlobalCtorCall( - module, mlir::FlatSymbolRefAttr::get(ctx, initFun.getName())); + builder.create( + loc, std::nullopt, cudaq::runtime::registerLinkableKernel, + ValueRange{castEntryRef, castKernNameRef, castDeviceRef}); } + + builder.create(loc, ValueRange{}); + builder.restoreInsertionPoint(insPt); + cudaq::opt::factory::createGlobalCtorCall( + module, mlir::FlatSymbolRefAttr::get(ctx, initFun.getName())); } out.keep(); } }; } // namespace - -std::unique_ptr -cudaq::opt::createGenerateDeviceCodeLoader(bool genAsQuake) { - return std::make_unique(genAsQuake); -} diff --git a/lib/Optimizer/Transforms/GenKernelExecution.cpp b/lib/Optimizer/Transforms/GenKernelExecution.cpp index aa0cbbb8f1..7d693921f1 100644 --- a/lib/Optimizer/Transforms/GenKernelExecution.cpp +++ b/lib/Optimizer/Transforms/GenKernelExecution.cpp @@ -34,14 +34,6 @@ namespace cudaq::opt { using namespace mlir; -// Define some constant function name strings. -static constexpr const char CudaqRegisterLambdaName[] = - "cudaqRegisterLambdaName"; -static constexpr const char CudaqRegisterArgsCreator[] = - "cudaqRegisterArgsCreator"; -static constexpr const char CudaqRegisterKernelName[] = - "cudaqRegisterKernelName"; - /// This value is used to indicate that a kernel does not return a result. static constexpr std::uint64_t NoResultOffset = std::numeric_limits::max(); @@ -616,6 +608,8 @@ class GenerateKernelExecution op->erase(); for (std::size_t i = 0, end = funcOp.getNumResults(); i != end; ++i) funcOp.eraseResult(0); + modifiedDevKernels.insert( + std::pair{funcOp.getName(), newFuncTy.getInput(0)}); } /// In the thunk, we need to unpack any `std::vector` objects encoded in the @@ -744,6 +738,12 @@ class GenerateKernelExecution Value trailingData, Value val, Type inTy, std::int64_t off, cudaq::cc::StructType structTy) { + if (isa(inTy)) { + auto i64Ty = builder.getI64Type(); + auto key = + builder.create(loc, i64Ty, val, off); + return {builder.create(loc, inTy, key), trailingData}; + } if (isa(inTy)) return {builder.create(loc, inTy), trailingData}; if (auto stdVecTy = dyn_cast(inTy)) { @@ -1162,6 +1162,21 @@ class GenerateKernelExecution // If the argument is a callable, skip it. if (isa(quakeTy)) continue; + + // Argument is a packaged kernel. In this case, the argument is some + // unknown kernel that may be called. The packaged argument is coming + // from opaque C++ host code, so we need to identify what kernel it + // references and then pass its name as a span of characters to the + // launch kernel. + if (isa(quakeTy)) { + auto kernKey = builder.create( + loc, builder.getI64Type(), cudaq::runtime::getLinkableKernelKey, + ValueRange{arg}); + stVal = builder.create( + loc, stVal.getType(), stVal, kernKey.getResult(0), idx); + continue; + } + // If the argument is an empty struct, skip it. if (auto strTy = dyn_cast(quakeTy)) if (strTy.isEmpty()) @@ -1180,7 +1195,7 @@ class GenerateKernelExecution loc, ptrInTy.getElementType()); builder.create(loc, std::nullopt, cudaq::stdvecBoolUnpackToInitList, - ArrayRef{tmp, arg}); + ValueRange{tmp, arg}); arg = blockValues[idx] = tmp; } // FIXME: call the `size` member function. For expediency, assume this @@ -1311,7 +1326,9 @@ class GenerateKernelExecution builder.create(loc, std::nullopt, "free", ArrayRef{heapCast}); } - } else if (auto strTy = dyn_cast(quakeTy)) { + continue; + } + if (auto strTy = dyn_cast(quakeTy)) { if (cudaq::cc::isDynamicType(strTy)) vecToBuffer = encodeDynamicStructData(loc, builder, strTy, arg, temp, vecToBuffer); @@ -1512,11 +1529,12 @@ class GenerateKernelExecution return true; } - LLVM::LLVMFuncOp registerKernelForExecution(Location loc, OpBuilder &builder, - const std::string &classNameStr, - LLVM::GlobalOp kernelNameObj, - func::FuncOp argsCreatorFunc, - StringRef mangledName) { + /// Generate a function to be executed at load-time which will register the + /// kernel with the runtime. + LLVM::LLVMFuncOp registerKernelWithRuntimeForExecution( + Location loc, OpBuilder &builder, const std::string &classNameStr, + LLVM::GlobalOp kernelNameObj, func::FuncOp argsCreatorFunc, + StringRef mangledName) { auto module = getOperation(); auto *ctx = builder.getContext(); auto ptrType = cudaq::cc::PointerType::get(builder.getI8Type()); @@ -1530,7 +1548,8 @@ class GenerateKernelExecution loc, cudaq::opt::factory::getPointerType(kernelNameObj.getType()), kernelNameObj.getSymName()); auto castKernRef = builder.create(loc, ptrType, kernRef); - builder.create(loc, std::nullopt, CudaqRegisterKernelName, + builder.create(loc, std::nullopt, + cudaq::runtime::CudaqRegisterKernelName, ValueRange{castKernRef}); if (isCodegenPackedData(codegenKind)) { @@ -1543,7 +1562,7 @@ class GenerateKernelExecution auto castLoadArgsCreator = builder.create(loc, ptrType, loadArgsCreator); builder.create( - loc, std::nullopt, CudaqRegisterArgsCreator, + loc, std::nullopt, cudaq::runtime::CudaqRegisterArgsCreator, ValueRange{castKernRef, castLoadArgsCreator}); } @@ -1558,15 +1577,6 @@ class GenerateKernelExecution auto insertPoint = builder.saveInsertionPoint(); builder.setInsertionPointToStart(module.getBody()); - // Create the function if it doesn't already exist. - if (!module.lookupSymbol(CudaqRegisterLambdaName)) - builder.create( - module.getLoc(), CudaqRegisterLambdaName, - LLVM::LLVMFunctionType::get( - cudaq::opt::factory::getVoidType(ctx), - {cudaq::opt::factory::getPointerType(ctx), - cudaq::opt::factory::getPointerType(ctx)})); - // Create this global name, it is unique for any lambda // bc classNameStr contains the parentFunc + varName auto lambdaName = builder.create( @@ -1585,7 +1595,8 @@ class GenerateKernelExecution loc, cudaq::opt::factory::getPointerType(ctx), lambdaRef); auto castKernelRef = builder.create( loc, cudaq::opt::factory::getPointerType(ctx), castKernRef); - builder.create(loc, std::nullopt, CudaqRegisterLambdaName, + builder.create(loc, std::nullopt, + cudaq::runtime::CudaqRegisterLambdaName, ValueRange{castLambdaRef, castKernelRef}); } } @@ -1597,8 +1608,6 @@ class GenerateKernelExecution // Load the prototypes of runtime functions that we may call into the Module. LogicalResult loadPrototypes() { ModuleOp module = getOperation(); - auto *ctx = module.getContext(); - auto builder = OpBuilder::atBlockEnd(module.getBody()); auto mangledNameMap = module->getAttrOfType(cudaq::runtime::mangledNameMap); if (!mangledNameMap || mangledNameMap.empty()) @@ -1624,15 +1633,9 @@ class GenerateKernelExecution return module.emitError("invalid codegen kind value."); } - auto loc = module.getLoc(); - auto ptrType = cudaq::cc::PointerType::get(builder.getI8Type()); - auto regKern = builder.create( - loc, CudaqRegisterKernelName, FunctionType::get(ctx, {ptrType}, {})); - regKern.setPrivate(); - auto regArgs = builder.create( - loc, CudaqRegisterArgsCreator, - FunctionType::get(ctx, {ptrType, ptrType}, {})); - regArgs.setPrivate(); + if (failed(irBuilder.loadIntrinsic( + module, cudaq::runtime::CudaqRegisterKernelName))) + return module.emitError("could not load kernel registration API"); if (failed(irBuilder.loadIntrinsic(module, "malloc"))) return module.emitError("could not load malloc"); @@ -1766,20 +1769,55 @@ class GenerateKernelExecution // Generate a function at startup to register this kernel as having // been processed for kernel execution. - auto initFun = - registerKernelForExecution(loc, builder, classNameStr, kernelNameObj, - argsCreatorFunc, mangledName); + auto initFun = registerKernelWithRuntimeForExecution( + loc, builder, classNameStr, kernelNameObj, argsCreatorFunc, + mangledName); // Create a global with a default ctor to be run at program startup. // The ctor will execute the above function, which will register this // kernel as having been processed. cudaq::opt::factory::createGlobalCtorCall( module, FlatSymbolRefAttr::get(ctx, initFun.getName())); - LLVM_DEBUG(llvm::dbgs() << module << '\n'); + + SmallVector deadCalls; + module.walk([&](func::CallOp call) { + if (!call.getResults().empty()) { + auto callee = call.getCallee(); + auto iter = modifiedDevKernels.find(callee); + if (iter != modifiedDevKernels.end()) { + OpBuilder builder(call); + Type ty = call.getResult(0).getType(); + auto loc = call.getLoc(); + auto strTy = cast( + cast(iter->second).getElementType()); + auto buff = builder.create(loc, strTy); + SmallVector args = {buff}; + args.append(call.getOperands().begin(), call.getOperands().end()); + builder.create(loc, TypeRange{}, callee, args); + auto buffPtrPtr = builder.create( + loc, cudaq::cc::PointerType::get(strTy.getMember(0)), buff, + ArrayRef{0}); + auto buffPtr = builder.create(loc, buffPtrPtr); + auto buffSizePtr = builder.create( + loc, cudaq::cc::PointerType::get(strTy.getMember(1)), buff, + ArrayRef{1}); + auto buffSize = builder.create(loc, buffSizePtr); + auto sv = builder.create(loc, ty, buffPtr, + buffSize); + call.getResult(0).replaceAllUsesWith(sv); + deadCalls.push_back(call); + } + } + }); + for (auto *op : deadCalls) + op->erase(); + + LLVM_DEBUG(llvm::dbgs() << "final module:\n" << module << '\n'); } out.keep(); } const DataLayout *dataLayout = nullptr; + DenseMap modifiedDevKernels; }; } // namespace diff --git a/lib/Optimizer/Transforms/MultiControlDecomposition.cpp b/lib/Optimizer/Transforms/MultiControlDecomposition.cpp index e21b7bfe22..0bf965920d 100644 --- a/lib/Optimizer/Transforms/MultiControlDecomposition.cpp +++ b/lib/Optimizer/Transforms/MultiControlDecomposition.cpp @@ -98,7 +98,7 @@ Decomposer::extractControls(quake::OperatorInterface op, ArrayRef Decomposer::getAncillas(Location loc, std::size_t numAncillas) { OpBuilder::InsertionGuard g(builder); builder.setInsertionPointToStart(entryBlock); - // If we don't have enough ancillas, allocate new more. + // If we don't have enough ancillas, allocate some more. for (size_t i = allocatedAncillas.size(); i < numAncillas; ++i) allocatedAncillas.push_back(builder.create(loc)); return {allocatedAncillas.begin(), allocatedAncillas.begin() + numAncillas}; diff --git a/lib/Optimizer/Transforms/ObserveAnsatz.cpp b/lib/Optimizer/Transforms/ObserveAnsatz.cpp index 40ce15d2cc..0d2ba3b38a 100644 --- a/lib/Optimizer/Transforms/ObserveAnsatz.cpp +++ b/lib/Optimizer/Transforms/ObserveAnsatz.cpp @@ -29,14 +29,36 @@ enum class MeasureBasis { I, X, Y, Z }; void appendMeasurement(MeasureBasis &basis, OpBuilder &builder, Location &loc, Value &qubit) { SmallVector targets{qubit}; - if (basis == MeasureBasis::X) { - builder.create(loc, ValueRange{}, targets); - } else if (basis == MeasureBasis::Y) { - llvm::APFloat d(M_PI_2); - Value rotation = - builder.create(loc, d, builder.getF64Type()); - SmallVector params{rotation}; - builder.create(loc, params, ValueRange{}, targets); + if (quake::isLinearType(qubit.getType())) { + // Value semantics + auto wireTy = quake::WireType::get(builder.getContext()); + if (basis == MeasureBasis::X) { + auto newOp = builder.create( + loc, TypeRange{wireTy}, /*is_adj=*/false, ValueRange{}, ValueRange{}, + targets, DenseBoolArrayAttr{}); + qubit.replaceAllUsesExcept(newOp.getResult(0), newOp); + qubit = newOp.getResult(0); + } else if (basis == MeasureBasis::Y) { + llvm::APFloat d(M_PI_2); + Value rotation = + builder.create(loc, d, builder.getF64Type()); + auto newOp = builder.create( + loc, TypeRange{wireTy}, /*is_adj=*/false, ValueRange{rotation}, + ValueRange{}, ValueRange{qubit}, DenseBoolArrayAttr{}); + qubit.replaceAllUsesExcept(newOp.getResult(0), newOp); + qubit = newOp.getResult(0); + } + } else { + // Reference semantics + if (basis == MeasureBasis::X) { + builder.create(loc, ValueRange{}, targets); + } else if (basis == MeasureBasis::Y) { + llvm::APFloat d(M_PI_2); + Value rotation = + builder.create(loc, d, builder.getF64Type()); + SmallVector params{rotation}; + builder.create(loc, params, ValueRange{}, targets); + } } } @@ -83,6 +105,39 @@ struct AnsatzFunctionAnalysis { AnsatzMetadata data; + funcOp->walk([&](quake::BorrowWireOp op) { + Value wire = op.getResult(); + // Wires are linear types that must be used exactly once, so traverse + // those uses until the end of the linear operators. + // NOTE - if this is ever moved to other passes that have different use + // cases than this one, then it needs to be updated to support ResetOp, + // which is not an operator interface (I don't think). + while (auto gate = + dyn_cast(*wire.getUsers().begin())) { + std::size_t qopNum = 0; + auto controls = gate.getControls(); + for (auto w : controls) { + if (w == wire) + break; + else + qopNum++; + } + if (qopNum >= controls.size()) { + for (auto w : gate.getTargets()) { + if (w == wire) + break; + else + qopNum++; + } + } + wire = gate.getWires()[qopNum]; + if (wire.getUsers().empty()) + break; + } + data.qubitValues.insert({data.nQubits++, wire}); + return WalkResult::advance(); + }); + // walk and find all quantum allocations auto walkResult = funcOp->walk([&](quake::AllocaOp op) { if (auto veq = dyn_cast(op.getResult().getType())) { @@ -186,12 +241,21 @@ struct AppendMeasurements : public OpRewritePattern { // observe kernels, we remove them here since we are adding specific // measurements below. for (auto *op : iter->second.measurements) { - if (!op->getUsers().empty()) { + bool safeToRemove = [&]() { + for (auto user : op->getUsers()) + if (!isa(user)) + return false; + return true; + }(); + if (!safeToRemove) { std::string msg = "Cannot observe kernel with non dangling measurements.\n"; funcOp.emitError(msg); return failure(); } + for (auto result : op->getResults()) + if (quake::isLinearType(result.getType())) + result.replaceAllUsesWith(op->getOperand(0)); op->erase(); } @@ -227,20 +291,30 @@ struct AppendMeasurements : public OpRewritePattern { auto qubitVal = seek->second; // append the measurement basis change ops + // Note: when using value semantics, qubitVal will be updated to the new + // wire here. appendMeasurement(basis, builder, loc, qubitVal); if (xElement + zElement != 0) qubitsToMeasure.push_back(qubitVal); } + auto measTy = quake::MeasureType::get(builder.getContext()); + auto wireTy = quake::WireType::get(builder.getContext()); for (auto &[measureNum, qubitToMeasure] : llvm::enumerate(qubitsToMeasure)) { // add the measure char regName[16]; std::snprintf(regName, sizeof(regName), "r%05lu", measureNum); - auto measTy = quake::MeasureType::get(builder.getContext()); - builder.create(loc, measTy, qubitToMeasure, - builder.getStringAttr(regName)); + if (quake::isLinearType(qubitToMeasure.getType())) { + auto newOp = builder.create( + loc, TypeRange{measTy, wireTy}, ValueRange{qubitToMeasure}, + builder.getStringAttr(regName)); + qubitToMeasure.replaceAllUsesExcept(newOp.getResult(1), newOp); + } else { + builder.create(loc, measTy, qubitToMeasure, + builder.getStringAttr(regName)); + } } rewriter.finalizeRootUpdate(funcOp); diff --git a/lib/Optimizer/Transforms/StatePreparation.cpp b/lib/Optimizer/Transforms/StatePreparation.cpp index e2453199e7..be4832c960 100644 --- a/lib/Optimizer/Transforms/StatePreparation.cpp +++ b/lib/Optimizer/Transforms/StatePreparation.cpp @@ -329,6 +329,8 @@ namespace { LogicalResult transform(ModuleOp module, func::FuncOp funcOp, double phaseThreshold) { + if (funcOp.empty()) + return success(); auto builder = OpBuilder::atBlockBegin(&funcOp.getBody().front()); auto toErase = std::vector(); auto result = success(); diff --git a/lib/Optimizer/Transforms/UnitarySynthesis.cpp b/lib/Optimizer/Transforms/UnitarySynthesis.cpp index 34f8df1085..503dfcb5de 100644 --- a/lib/Optimizer/Transforms/UnitarySynthesis.cpp +++ b/lib/Optimizer/Transforms/UnitarySynthesis.cpp @@ -7,6 +7,7 @@ ******************************************************************************/ #include "PassDetails.h" +#include "common/EigenDense.h" #include "cudaq/Optimizer/Builder/Factory.h" #include "cudaq/Optimizer/CodeGen/Passes.h" #include "cudaq/Optimizer/Dialect/CC/CCOps.h" @@ -19,6 +20,8 @@ #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" +#include +#include namespace cudaq::opt { #define GEN_PASS_DEF_UNITARYSYNTHESIS @@ -28,16 +31,47 @@ namespace cudaq::opt { #define DEBUG_TYPE "unitary-synthesis" using namespace mlir; +using namespace std::complex_literals; namespace { + +constexpr double TOL = 1e-7; + +/// Base class for unitary synthesis, i.e. decomposing an arbitrary unitary +/// matrix into native gate set. The native gate set here includes all the +/// quantum operations supported by CUDA-Q. Additional passes may be required to +/// convert CUDA-Q gate set to hardware specific gate set. +class Decomposer { +private: + Eigen::MatrixXcd targetMatrix; + +public: + /// Function which implements the unitary synthesis algorithm. The result of + /// decomposition which depends on the algorithm must be convertible to + /// quantum operations. For example, result is saved into class member(s) as + /// the parameters to be applied to `Rx`, `Ry`, and `Rz` gates. + virtual void decompose() = 0; + /// Create the replacement function which invokes native quantum operations. + /// The original `quake.custom_op` is replaced by `quake.apply` operation that + /// calls the new replacement function with the same operands as the original + /// operation. The 'control' and 'adjoint' variations are handled by + /// `ApplySpecialization` pass. + virtual void emitDecomposedFuncOp(quake::CustomUnitarySymbolOp customOp, + PatternRewriter &rewriter, + std::string funcName) = 0; + bool isAboveThreshold(double value) { return std::abs(value) > TOL; }; + virtual ~Decomposer() = default; +}; + +/// Result structure for 1-q Euler decomposition in ZYZ basis struct EulerAngles { double alpha; double beta; double gamma; }; -struct BasisZYZ { - std::array, 4> matrix; +struct OneQubitOpZYZ : public Decomposer { + Eigen::Matrix2cd targetMatrix; EulerAngles angles; /// Updates to the global phase double phase; @@ -45,31 +79,357 @@ struct BasisZYZ { /// This logic is based on https://arxiv.org/pdf/quant-ph/9503016 and its /// corresponding explanation in https://threeplusone.com/pubs/on_gates.pdf, /// Section 4. - void decompose() { - using namespace std::complex_literals; + void decompose() override { /// Rescale the input unitary matrix, `u`, to be special unitary. /// Extract a phase factor, `phase`, so that /// `determinant(inverse_phase * unitary) = 1` - auto det = (matrix[0] * matrix[3]) - (matrix[1] * matrix[2]); + auto det = targetMatrix.determinant(); phase = 0.5 * std::arg(det); - std::array, 4> specialUnitary; - std::transform( - matrix.begin(), matrix.end(), specialUnitary.begin(), - [&](auto element) { return element * std::exp(-1i * phase); }); - auto abs00 = std::abs(specialUnitary[0]); - auto abs01 = std::abs(specialUnitary[1]); + Eigen::Matrix2cd specialUnitary = std::exp(-1i * phase) * targetMatrix; + auto abs00 = std::abs(specialUnitary(0, 0)); + auto abs01 = std::abs(specialUnitary(0, 1)); if (abs00 >= abs01) - angles.beta = 2 * std::acos(abs00); + angles.beta = 2.0 * std::acos(abs00); else - angles.beta = 2 * std::asin(abs01); - auto sum = std::atan2(specialUnitary[3].imag(), specialUnitary[3].real()); - auto diff = std::atan2(specialUnitary[2].imag(), specialUnitary[2].real()); + angles.beta = 2.0 * std::asin(abs01); + auto sum = + std::atan2(specialUnitary(1, 1).imag(), specialUnitary(1, 1).real()); + auto diff = + std::atan2(specialUnitary(1, 0).imag(), specialUnitary(1, 0).real()); angles.alpha = sum + diff; angles.gamma = sum - diff; } - BasisZYZ(const std::vector> &vec) { - std::copy(vec.begin(), vec.begin() + 4, matrix.begin()); + void emitDecomposedFuncOp(quake::CustomUnitarySymbolOp customOp, + PatternRewriter &rewriter, + std::string funcName) override { + auto parentModule = customOp->getParentOfType(); + Location loc = customOp->getLoc(); + auto targets = customOp.getTargets(); + auto funcTy = + FunctionType::get(parentModule.getContext(), targets[0].getType(), {}); + auto insPt = rewriter.saveInsertionPoint(); + rewriter.setInsertionPointToStart(parentModule.getBody()); + auto func = + rewriter.create(parentModule->getLoc(), funcName, funcTy); + func.setPrivate(); + auto *block = func.addEntryBlock(); + rewriter.setInsertionPointToStart(block); + auto arguments = func.getArguments(); + FloatType floatTy = rewriter.getF64Type(); + /// NOTE: Operator notation is right-to-left, whereas circuit notation + /// is left-to-right. Hence, angles are applied as: + /// Rz(gamma)Ry(beta)Rz(alpha) + if (isAboveThreshold(angles.gamma)) { + auto gamma = cudaq::opt::factory::createFloatConstant( + loc, rewriter, angles.gamma, floatTy); + rewriter.create(loc, gamma, ValueRange{}, arguments); + } + if (isAboveThreshold(angles.beta)) { + auto beta = cudaq::opt::factory::createFloatConstant( + loc, rewriter, angles.beta, floatTy); + rewriter.create(loc, beta, ValueRange{}, arguments); + } + if (isAboveThreshold(angles.alpha)) { + auto alpha = cudaq::opt::factory::createFloatConstant( + loc, rewriter, angles.alpha, floatTy); + rewriter.create(loc, alpha, ValueRange{}, arguments); + } + /// NOTE: Typically global phase can be ignored but, if this decomposition + /// is applied in a kernel that is called with `cudaq::control`, the global + /// phase will become a local phase and give a wrong result if we don't keep + /// track of that. + /// NOTE: R1-Rz pair results in a half the applied global phase angle, + /// hence, we need to multiply the angle by 2 + auto globalPhase = 2.0 * phase; + if (isAboveThreshold(globalPhase)) { + auto phase = cudaq::opt::factory::createFloatConstant( + loc, rewriter, globalPhase, floatTy); + Value negPhase = rewriter.create(loc, phase); + rewriter.create(loc, phase, ValueRange{}, arguments[0]); + rewriter.create(loc, negPhase, ValueRange{}, arguments[0]); + } + rewriter.create(loc); + rewriter.restoreInsertionPoint(insPt); + } + + OneQubitOpZYZ(const Eigen::Matrix2cd &vec) { + targetMatrix = vec; + decompose(); + } +}; + +/// Result for 2-q KAK decomposition +struct KAKComponents { + // KAK decomposition allows to express arbitrary 2-qubit unitary (U) in the + // form: U = (a1 ⊗ a0) x exp(i(xXX + yYY + zZZ)) x (b1 ⊗ b0) where, a0, a1, + // b0, b1 are single qubit operations, and the exponential is specified by the + // 3 coefficients of the canonical class vector - x, y, z + Eigen::Matrix2cd a0; + Eigen::Matrix2cd a1; + Eigen::Matrix2cd b0; + Eigen::Matrix2cd b1; + double x; + double y; + double z; +}; + +/// Helper function to convert a matrix into 'magic' basis +/// M = 1 / sqrt(2) * 1 0 0 i +/// 0 i 1 0 +/// 0 i −1 0 +/// 1 0 0 −i +const Eigen::Matrix4cd &MagicBasisMatrix() { + static Eigen::Matrix4cd MagicBasisMatrix; + MagicBasisMatrix << 1.0, 0.0, 0.0, 1i, 0.0, 1i, 1.0, 0, 0, 1i, -1.0, 0, 1.0, + 0, 0, -1i; + MagicBasisMatrix = MagicBasisMatrix * M_SQRT1_2; + return MagicBasisMatrix; +} + +/// Helper function to convert a matrix into 'magic' basis +const Eigen::Matrix4cd &MagicBasisMatrixAdj() { + static Eigen::Matrix4cd MagicBasisMatrixAdj = MagicBasisMatrix().adjoint(); + return MagicBasisMatrixAdj; +} + +/// Helper function to extract the coefficients of canonical vector +/// Gamma matrix = +1 +1 −1 +1 +/// +1 +1 +1 −1 +/// +1 −1 −1 −1 +/// +1 −1 +1 +1 +const Eigen::Matrix4cd &GammaFactor() { + + static Eigen::Matrix4cd GammaT; + GammaT << 1, 1, 1, 1, 1, 1, -1, -1, -1, 1, -1, 1, 1, -1, -1, 1; + GammaT /= 4; + return GammaT; +} + +/// Given an input matrix which is unitary, find two orthogonal matrices, 'left' +/// and 'right', and a diagonal unitary matrix, 'diagonal', such that +/// `input_matrix = left * diagonal * right.transpose()`. This function uses QZ +/// decomposition for this purpose. +/// NOTE: This function may not generate accurate diagonal matrix in some corner +/// cases like degenerate matrices. +std::tuple +bidiagonalize(const Eigen::Matrix4cd &matrix) { + Eigen::Matrix4d real = matrix.real(); + Eigen::Matrix4d imag = matrix.imag(); + Eigen::RealQZ qz(4); + qz.compute(real, imag); + Eigen::Matrix4d left = qz.matrixQ(); + Eigen::Matrix4d right = qz.matrixZ(); + if (left.determinant() < 0.0) + left.col(0) *= -1.0; + if (right.determinant() < 0.0) + right.row(0) *= -1.0; + Eigen::Matrix4cd diagonal = left.transpose() * matrix * right.transpose(); + assert(diagonal.isDiagonal(TOL)); + return std::make_tuple(left, diagonal, right); +} + +/// Separate input matrix into local operations. The input matrix must be +/// special orthogonal. Given a map, SU(2) × SU(2) -> SO(4), +/// map(A, B) = M.adjoint() (A ⊗ B∗) M, find A and B. +std::tuple> +extractSU2FromSO4(const Eigen::Matrix4cd &matrix) { + /// Verify input matrix is special orthogonal + assert(std::abs(std::abs(matrix.determinant()) - 1.0) < TOL); + assert((matrix * matrix.transpose() - Eigen::Matrix4cd::Identity()).norm() < + TOL); + Eigen::Matrix4cd mb = MagicBasisMatrix() * matrix * MagicBasisMatrixAdj(); + /// Use Kronecker factorization + size_t r = 0; + size_t c = 0; + double largest = std::abs(mb(r, c)); + for (size_t i = 0; i < 4; i++) + for (size_t j = 0; j < 4; j++) { + if (std::abs(mb(i, j)) >= largest) { + largest = std::abs(mb(i, j)); + r = i; + c = j; + } + } + Eigen::Matrix2cd part1 = Eigen::Matrix2cd::Zero(); + Eigen::Matrix2cd part2 = Eigen::Matrix2cd::Zero(); + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + part1((r >> 1) ^ i, (c >> 1) ^ j) = mb(r ^ (i << 1), c ^ (j << 1)); + part2((r & 1) ^ i, (c & 1) ^ j) = mb(r ^ i, c ^ j); + } + } + auto det1 = part1.determinant(); + if (std::abs(det1) > TOL) + part1 /= (std::sqrt(det1)); + auto det2 = part2.determinant(); + if (std::abs(det2) > TOL) + part2 /= (std::sqrt(det2)); + std::complex phase = + mb(r, c) / (part1(r >> 1, c >> 1) * part2(r & 1, c & 1)); + if (phase.real() < 0.0) { + part1 *= -1; + phase = -phase; + } + assert(mb.isApprox(phase * Eigen::kroneckerProduct(part1, part2), TOL)); + assert(part1.isUnitary(TOL) && part2.isUnitary(TOL)); + return std::make_tuple(part1, part2, phase); +} + +/// Compute exp(i(x XX + y YY + z ZZ)) matrix for verification +Eigen::Matrix4cd canonicalVecToMatrix(double x, double y, double z) { + Eigen::Matrix2cd X{Eigen::Matrix2cd::Zero()}; + Eigen::Matrix2cd Y{Eigen::Matrix2cd::Zero()}; + Eigen::Matrix2cd Z{Eigen::Matrix2cd::Zero()}; + X << 0, 1, 1, 0; + Y << 0, -1i, 1i, 0; + Z << 1, 0, 0, -1; + auto XX = Eigen::kroneckerProduct(X, X); + auto YY = Eigen::kroneckerProduct(Y, Y); + auto ZZ = Eigen::kroneckerProduct(Z, Z); + return (1i * (x * XX + y * YY + z * ZZ)).exp(); +} + +struct TwoQubitOpKAK : public Decomposer { + Eigen::Matrix4cd targetMatrix; + KAKComponents components; + /// Updates to the global phase + std::complex phase; + + /// This logic is based on the Cartan's KAK decomposition. + /// Ref: https://arxiv.org/pdf/quant-ph/0507171 + /// Ref: https://arxiv.org/pdf/0806.4015 + void decompose() override { + /// Step0: Convert to special unitary + phase = std::pow(targetMatrix.determinant(), 0.25); + auto specialUnitary = targetMatrix / phase; + /// Step1: Convert into magic basis + Eigen::Matrix4cd matrixMagicBasis = + MagicBasisMatrixAdj() * specialUnitary * MagicBasisMatrix(); + /// Step2: Diagonalize + auto [left, diagonal, right] = bidiagonalize(matrixMagicBasis); + /// Step3: Get the KAK components + auto [a1, a0, aPh] = extractSU2FromSO4(left); + components.a0 = a0; + components.a1 = a1; + phase *= aPh; + auto [b1, b0, bPh] = extractSU2FromSO4(right); + components.b0 = b0; + components.b1 = b1; + phase *= bPh; + /// Step4: Get the coefficients of canonical class vector + if (diagonal.determinant().real() < 0.0) + diagonal(0, 0) *= 1.0; + Eigen::Vector4cd diagonalPhases; + for (size_t i = 0; i < 4; i++) + diagonalPhases(i) = std::arg(diagonal(i, i)); + auto coefficients = GammaFactor() * diagonalPhases; + components.x = coefficients(1).real(); + components.y = coefficients(2).real(); + components.z = coefficients(3).real(); + phase *= std::exp(1i * coefficients(0)); + /// Final check to verify results + auto canVecToMat = + canonicalVecToMatrix(components.x, components.y, components.z); + assert(targetMatrix.isApprox(phase * Eigen::kroneckerProduct(a1, a0) * + canVecToMat * + Eigen::kroneckerProduct(b1, b0), + TOL)); + } + + void emitDecomposedFuncOp(quake::CustomUnitarySymbolOp customOp, + PatternRewriter &rewriter, + std::string funcName) override { + auto a0 = OneQubitOpZYZ(components.a0); + a0.emitDecomposedFuncOp(customOp, rewriter, funcName + "a0"); + auto a1 = OneQubitOpZYZ(components.a1); + a1.emitDecomposedFuncOp(customOp, rewriter, funcName + "a1"); + auto b0 = OneQubitOpZYZ(components.b0); + b0.emitDecomposedFuncOp(customOp, rewriter, funcName + "b0"); + auto b1 = OneQubitOpZYZ(components.b1); + b1.emitDecomposedFuncOp(customOp, rewriter, funcName + "b1"); + auto parentModule = customOp->getParentOfType(); + Location loc = customOp->getLoc(); + auto targets = customOp.getTargets(); + auto funcTy = + FunctionType::get(parentModule.getContext(), targets.getTypes(), {}); + auto insPt = rewriter.saveInsertionPoint(); + rewriter.setInsertionPointToStart(parentModule.getBody()); + auto func = + rewriter.create(parentModule->getLoc(), funcName, funcTy); + func.setPrivate(); + auto *block = func.addEntryBlock(); + rewriter.setInsertionPointToStart(block); + auto arguments = func.getArguments(); + FloatType floatTy = rewriter.getF64Type(); + /// NOTE: Operator notation is right-to-left, whereas circuit notation is + /// left-to-right. Hence, operations are applied in reverse order. + rewriter.create( + loc, TypeRange{}, + SymbolRefAttr::get(rewriter.getContext(), funcName + "b0"), false, + ValueRange{}, ValueRange{arguments[1]}); + rewriter.create( + loc, TypeRange{}, + SymbolRefAttr::get(rewriter.getContext(), funcName + "b1"), false, + ValueRange{}, ValueRange{arguments[0]}); + /// TODO: Refactor to use a transformation pass for `quake.exp_pauli` + /// XX + if (isAboveThreshold(components.x)) { + rewriter.create(loc, arguments[0]); + rewriter.create(loc, arguments[1]); + rewriter.create(loc, arguments[1], arguments[0]); + auto xAngle = cudaq::opt::factory::createFloatConstant( + loc, rewriter, -2.0 * components.x, floatTy); + rewriter.create(loc, xAngle, ValueRange{}, arguments[0]); + rewriter.create(loc, arguments[1], arguments[0]); + rewriter.create(loc, arguments[1]); + rewriter.create(loc, arguments[0]); + } + /// YY + if (isAboveThreshold(components.y)) { + auto piBy2 = cudaq::opt::factory::createFloatConstant(loc, rewriter, + M_PI_2, floatTy); + rewriter.create(loc, piBy2, ValueRange{}, arguments[0]); + rewriter.create(loc, piBy2, ValueRange{}, arguments[1]); + rewriter.create(loc, arguments[1], arguments[0]); + auto yAngle = cudaq::opt::factory::createFloatConstant( + loc, rewriter, -2.0 * components.y, floatTy); + rewriter.create(loc, yAngle, ValueRange{}, arguments[0]); + rewriter.create(loc, arguments[1], arguments[0]); + Value negPiBy2 = rewriter.create(loc, piBy2); + rewriter.create(loc, negPiBy2, ValueRange{}, arguments[1]); + rewriter.create(loc, negPiBy2, ValueRange{}, arguments[0]); + } + /// ZZ + if (isAboveThreshold(components.z)) { + rewriter.create(loc, arguments[1], arguments[0]); + auto zAngle = cudaq::opt::factory::createFloatConstant( + loc, rewriter, -2.0 * components.z, floatTy); + rewriter.create(loc, zAngle, ValueRange{}, arguments[0]); + rewriter.create(loc, arguments[1], arguments[0]); + } + rewriter.create( + loc, TypeRange{}, + SymbolRefAttr::get(rewriter.getContext(), funcName + "a0"), false, + ValueRange{}, ValueRange{arguments[1]}); + rewriter.create( + loc, TypeRange{}, + SymbolRefAttr::get(rewriter.getContext(), funcName + "a1"), false, + ValueRange{}, ValueRange{arguments[0]}); + auto globalPhase = 2.0 * std::arg(phase); + if (isAboveThreshold(globalPhase)) { + auto phase = cudaq::opt::factory::createFloatConstant( + loc, rewriter, globalPhase, floatTy); + Value negPhase = rewriter.create(loc, phase); + rewriter.create(loc, phase, ValueRange{}, arguments[0]); + rewriter.create(loc, negPhase, ValueRange{}, arguments[0]); + } + rewriter.create(loc); + rewriter.restoreInsertionPoint(insPt); + } + + TwoQubitOpKAK(const Eigen::MatrixXcd &vec) { + targetMatrix = vec; decompose(); } }; @@ -82,9 +442,6 @@ class CustomUnitaryPattern LogicalResult matchAndRewrite(quake::CustomUnitarySymbolOp customOp, PatternRewriter &rewriter) const override { auto parentModule = customOp->getParentOfType(); - Location loc = customOp->getLoc(); - auto targets = customOp.getTargets(); - auto controls = customOp.getControls(); /// Get the global constant holding the concrete matrix corresponding to /// this custom operation invocation StringRef generatorName = customOp.getGenerator().getRootReference(); @@ -95,72 +452,34 @@ class CustomUnitaryPattern std::string funcName = pair.first.str() + ".kernel" + pair.second.str(); /// If the replacement function doesn't exist, create it here if (!parentModule.lookupSymbol(funcName)) { - auto unitary = cudaq::opt::factory::readGlobalConstantArray(globalOp); - /// TODO: Expand the logic to decompose upto 4-qubit operations - if (unitary.size() != 4) { - customOp.emitWarning( - "Decomposition of only single qubit custom operations supported."); + auto matrix = cudaq::opt::factory::readGlobalConstantArray(globalOp); + size_t dimension = std::sqrt(matrix.size()); + auto unitary = + Eigen::Map(matrix.data(), dimension, dimension); + unitary.transposeInPlace(); + if (!unitary.isUnitary(TOL)) { + customOp.emitWarning("The custom operation matrix must be unitary."); return failure(); } - /// Controls are handled via apply specialization, hence not included in - /// arguments - auto funcTy = - FunctionType::get(parentModule.getContext(), targets.getTypes(), {}); - auto insPt = rewriter.saveInsertionPoint(); - rewriter.setInsertionPointToStart(parentModule.getBody()); - auto func = rewriter.create(parentModule->getLoc(), - funcName, funcTy); - func.setPrivate(); - auto *block = func.addEntryBlock(); - rewriter.setInsertionPointToStart(block); - /// Use Euler angle decomposition for single qubit operation - auto zyz = BasisZYZ(unitary); - /// For 1-qubit operation, apply on 'all' the targets - auto arguments = func.getArguments(); - FloatType floatTy = rewriter.getF64Type(); - /// Ignore angles less than some threshold - auto isAboveThreshold = [&](auto value) { - const double epsilon = 1e-9; - return std::abs(value) > epsilon; - }; - /// NOTE: Operator notation is right-to-left, whereas circuit notation is - /// left-to-right. Hence, angles are applied as Rz(gamma)Ry(beta)Rz(alpha) - if (isAboveThreshold(zyz.angles.gamma)) { - auto gamma = cudaq::opt::factory::createFloatConstant( - loc, rewriter, zyz.angles.gamma, floatTy); - rewriter.create(loc, gamma, ValueRange{}, arguments); - } - if (isAboveThreshold(zyz.angles.beta)) { - auto beta = cudaq::opt::factory::createFloatConstant( - loc, rewriter, zyz.angles.beta, floatTy); - rewriter.create(loc, beta, ValueRange{}, arguments); - } - if (isAboveThreshold(zyz.angles.alpha)) { - auto alpha = cudaq::opt::factory::createFloatConstant( - loc, rewriter, zyz.angles.alpha, floatTy); - rewriter.create(loc, alpha, ValueRange{}, arguments); - } - /// NOTE: Typically global phase can be ignored but, if this decomposition - /// is applied in a kernel that is called with `cudaq::control`, the - /// global phase will become a local phase and give a wrong result if we - /// don't keep track of that. - /// NOTE: R1-Rz pair results in a half the applied global phase angle, - /// hence, we need to multiply the angle by 2 - auto globalPhase = 2 * zyz.phase; - if (isAboveThreshold(globalPhase)) { - auto phase = cudaq::opt::factory::createFloatConstant( - loc, rewriter, globalPhase, floatTy); - Value negPhase = rewriter.create(loc, phase); - rewriter.create(loc, phase, ValueRange{}, arguments[0]); - rewriter.create(loc, negPhase, ValueRange{}, arguments[0]); + switch (dimension) { + case 2: { + auto zyz = OneQubitOpZYZ(unitary); + zyz.emitDecomposedFuncOp(customOp, rewriter, funcName); + } break; + case 4: { + auto kak = TwoQubitOpKAK(unitary); + kak.emitDecomposedFuncOp(customOp, rewriter, funcName); + } break; + default: + customOp.emitWarning( + "Decomposition of only 1 and 2 qubit custom operations supported."); + return failure(); } - rewriter.create(loc); - rewriter.restoreInsertionPoint(insPt); } rewriter.replaceOpWithNewOp( customOp, TypeRange{}, SymbolRefAttr::get(rewriter.getContext(), funcName), customOp.isAdj(), - controls, targets); + customOp.getControls(), customOp.getTargets()); return success(); } }; diff --git a/lib/Optimizer/Transforms/WiresToWiresets.cpp b/lib/Optimizer/Transforms/WiresToWiresets.cpp index b010bbb271..0ce96b4b8f 100644 --- a/lib/Optimizer/Transforms/WiresToWiresets.cpp +++ b/lib/Optimizer/Transforms/WiresToWiresets.cpp @@ -75,6 +75,11 @@ struct AssignWireIndicesPass void runOnOperation() override { func::FuncOp func = getOperation(); + // Only run on the entrypoint, the expectation is that inlining has been + // done already, so there should only be one kernel remaining. + if (!func->hasAttr(cudaq::entryPointAttrName)) + return; + // TODO: someday we may want to allow calls to non-quantum functions if (cudaq::opt::hasCallOp(func)) { func.emitRemark( @@ -82,11 +87,6 @@ struct AssignWireIndicesPass return; } - // Only run on the entrypoint, the expectation is that inlining has been - // done already, so there should only be one kernel remaining. - if (!func->hasAttr(cudaq::entryPointAttrName)) - return; - auto *ctx = &getContext(); RewritePatternSet patterns(ctx); unsigned x = 0; diff --git a/lib/Support/Config/TargetConfig.cpp b/lib/Support/Config/TargetConfig.cpp index a75595b655..0814f3f846 100644 --- a/lib/Support/Config/TargetConfig.cpp +++ b/lib/Support/Config/TargetConfig.cpp @@ -32,7 +32,9 @@ static std::unordered_map stringToFeatureFlag{{"fp32", cudaq::config::flagsFP32}, {"fp64", cudaq::config::flagsFP64}, {"mgpu", cudaq::config::flagsMgpu}, - {"mqpu", cudaq::config::flagsMqpu}}; + {"mqpu", cudaq::config::flagsMqpu}, + {"dep-analysis", cudaq::config::flagsDepAnalysis}, + {"qpp", cudaq::config::flagsQPP}}; } /// @brief Convert the backend config entry into nvq++ compatible script. @@ -52,6 +54,10 @@ static std::string processSimBackendConfig( output << "PLATFORM_LOWERING_CONFIG=\"" << configValue.PlatformLoweringConfig << "\"\n"; + if (!configValue.TargetPassPipeline.empty()) + output << "TARGET_PASS_PIPELINE=\"" << configValue.TargetPassPipeline + << "\"\n"; + if (!configValue.CodegenEmission.empty()) output << "CODEGEN_EMISSION=" << configValue.CodegenEmission << "\n"; @@ -302,6 +308,7 @@ void MappingTraits::mapping( io.mapOptional("gen-target-backend", info.GenTargetBackend); io.mapOptional("library-mode", info.LibraryMode); io.mapOptional("platform-lowering-config", info.PlatformLoweringConfig); + io.mapOptional("target-pass-pipeline", info.TargetPassPipeline); io.mapOptional("codegen-emission", info.CodegenEmission); io.mapOptional("post-codegen-passes", info.PostCodeGenPasses); io.mapOptional("platform-library", info.PlatformLibrary); diff --git a/python/cudaq/handlers/photonics_kernel.py b/python/cudaq/handlers/photonics_kernel.py index fc676c33a9..d345fd629d 100644 --- a/python/cudaq/handlers/photonics_kernel.py +++ b/python/cudaq/handlers/photonics_kernel.py @@ -13,9 +13,6 @@ from ..mlir._mlir_libs._quakeDialects import cudaq_runtime -# The qudit level must be explicitly defined -globalQuditLevel = None - @dataclass class PyQudit: @@ -32,8 +29,39 @@ class PyQudit: level: int id: int - def __del__(self): - cudaq_runtime.photonics.release_qudit(self.level, self.id) + +class QuditManager(object): + """ + A class to explicitly manage resource allocation for qudits within a + `PhotonicsKernel`. + """ + qudit_level = None + allocated_ids = [] + + @classmethod + def reset(cls): + cls.qudit_level = None + cls.allocated_ids = [] + + @classmethod + def allocate(cls, level: int): + if cls.qudit_level is None: + cls.qudit_level = level + elif level != cls.qudit_level: + raise RuntimeError( + "The qudits must be of same level within a kernel.") + id = cudaq_runtime.photonics.allocate_qudit(cls.qudit_level) + cls.allocated_ids.append(id) + return PyQudit(cls.qudit_level, id) + + def __enter__(cls): + cls.reset() + + def __exit__(cls, exc_type, exc_val, exc_tb): + while cls.allocated_ids: + cudaq_runtime.photonics.release_qudit(cls.allocated_ids.pop(), + cls.qudit_level) + cls.reset() def _is_qudit_type(q: any) -> bool: @@ -63,7 +91,7 @@ def _check_args(q: any): RuntimeError: If the qudit level is not set. Exception: If input argument is not instance of `PyQudit` class. """ - if globalQuditLevel is None: + if QuditManager.qudit_level is None: raise RuntimeError( "Qudit level not set. Define a qudit (`qudit(level=N)`) or list of qudits." ) @@ -89,15 +117,40 @@ def qudit(level: int) -> PyQudit: RuntimeError: If a qudit of level different than one already defined in the kernel is requested. """ - global globalQuditLevel + return QuditManager.allocate(level) - if globalQuditLevel is None: - globalQuditLevel = level - elif level != globalQuditLevel: - raise RuntimeError("The qudits must be of same level within a kernel.") +def create(qudit: PyQudit): + """ + Apply create gate on the input qudit. + U|0> -> |1>, U|1> -> |2>, ..., and U|d> -> |d> - id = cudaq_runtime.photonics.allocate_qudit(globalQuditLevel) - return PyQudit(globalQuditLevel, id) + Args: + qudit: An instance of `PyQudit` class. + + Raises: + RuntimeError: If the qudit level is not set. + Exception: If input argument is not instance of `PyQudit` class. + """ + _check_args(qudit) + cudaq_runtime.photonics.apply_operation("create", [], + [[qudit.level, qudit.id]]) + + +def annihilate(qudit: PyQudit): + """ + Apply annihilate gate on the input qudit. + U|0> -> |0>, U|1> -> |0>, ..., and U|d> -> |d-1> + + Args: + qudit: An instance of `PyQudit` class. + + Raises: + RuntimeError: If the qudit level is not set. + Exception: If input argument is not instance of `PyQudit` class. + """ + _check_args(qudit) + cudaq_runtime.photonics.apply_operation("annihilate", [], + [[qudit.level, qudit.id]]) def plus(qudit: PyQudit): @@ -113,7 +166,7 @@ def plus(qudit: PyQudit): Exception: If input argument is not instance of `PyQudit` class. """ _check_args(qudit) - cudaq_runtime.photonics.apply_operation('plusGate', [], + cudaq_runtime.photonics.apply_operation("plus", [], [[qudit.level, qudit.id]]) @@ -131,7 +184,7 @@ def phase_shift(qudit: PyQudit, phi: float): Exception: If input argument is not instance of `PyQudit` class. """ _check_args(qudit) - cudaq_runtime.photonics.apply_operation('phaseShiftGate', [phi], + cudaq_runtime.photonics.apply_operation("phase_shift", [phi], [[qudit.level, qudit.id]]) @@ -150,7 +203,7 @@ def beam_splitter(q: PyQudit, r: PyQudit, theta: float): Exception: If input argument is not instance of `PyQudit` class. """ _check_args([q, r]) - cudaq_runtime.photonics.apply_operation('beamSplitterGate', [theta], + cudaq_runtime.photonics.apply_operation("beam_splitter", [theta], [[q.level, q.id], [r.level, r.id]]) @@ -182,32 +235,34 @@ def mz(qudits: PyQudit | List[PyQudit], register_name=''): class PhotonicsHandler(object): """ The `PhotonicsHandler` class serves as to process CUDA-Q kernels for the - `photonics` target. - The target must be set to `photonics` prior to invoking a `PhotonicsHandler`. + `photonics-cpu` target. + The target must be set to `photonics-cpu` prior to invoking a `PhotonicsHandler`. The quantum operations in this kernel apply to qudits defined by `qudit(level=N)` or a list of qudits. The qudits within a kernel must be of the same level. - Allowed quantum operations are: `plus`, `phase_shift`, `beam_splitter`, and `mz`. + Allowed quantum operations are: `create`, `annihilate`, `plus`, + `phase_shift`, `beam_splitter`, and `mz`. """ def __init__(self, function): if 'photonics' != cudaq_runtime.get_target().name: raise RuntimeError( - "A photonics kernel can only be used with 'photonics' target.") - - global globalQuditLevel - globalQuditLevel = None + "A photonics kernel can only be used with 'photonics-cpu' target.") + QuditManager.reset() self.kernelFunction = function - self.kernelFunction.__globals__['qudit'] = qudit - self.kernelFunction.__globals__['plus'] = plus - self.kernelFunction.__globals__['phase_shift'] = phase_shift - self.kernelFunction.__globals__['beam_splitter'] = beam_splitter - self.kernelFunction.__globals__['mz'] = mz + self.kernelFunction.__globals__["qudit"] = qudit + self.kernelFunction.__globals__["create"] = create + self.kernelFunction.__globals__["annihilate"] = annihilate + self.kernelFunction.__globals__["plus"] = plus + self.kernelFunction.__globals__["phase_shift"] = phase_shift + self.kernelFunction.__globals__["beam_splitter"] = beam_splitter + self.kernelFunction.__globals__["mz"] = mz def __call__(self, *args): - return self.kernelFunction(*args) + with QuditManager(): + return self.kernelFunction(*args) diff --git a/python/cudaq/kernel/ast_bridge.py b/python/cudaq/kernel/ast_bridge.py index fa50edaba3..6597b38f5d 100644 --- a/python/cudaq/kernel/ast_bridge.py +++ b/python/cudaq/kernel/ast_bridge.py @@ -251,7 +251,8 @@ def isQuantumType(self, ty): Return True if the given type is quantum (is a `VeqType` or `RefType`). Return False otherwise. """ - return quake.RefType.isinstance(ty) or quake.VeqType.isinstance(ty) + return quake.RefType.isinstance(ty) or quake.VeqType.isinstance( + ty) or quake.StruqType.isinstance(ty) def isMeasureResultType(self, ty, value): """ @@ -526,7 +527,10 @@ def getStructMemberIdx(self, memberName, structTy): the index of the variable in the struct and the specific MLIR type for the variable. """ - structName = cc.StructType.getName(structTy) + if cc.StructType.isinstance(structTy): + structName = cc.StructType.getName(structTy) + else: + structName = quake.StruqType.getName(structTy) structIdx = None _, userType = globalRegisteredTypes[structName] for i, (k, _) in enumerate(userType.items()): @@ -665,18 +669,11 @@ def convertArithmeticToSuperiorType(self, values, type): return retValues - def isQuantumStructType(self, structTy): + def isQuantumStructType(self, ty): """ - Return True if the given struct type has one or more quantum member variables. + Return True if the given struct type has only quantum member variables. """ - if not cc.StructType.isinstance(structTy): - self.emitFatalError( - f'isQuantumStructType called on type that is not a struct ({structTy})' - ) - - return True in [ - self.isQuantumType(t) for t in cc.StructType.getTypes(structTy) - ] + return quake.StruqType.isinstance(ty) def mlirTypeFromAnnotation(self, annotation): """ @@ -843,9 +840,6 @@ def needsStackSlot(self, type): function. """ # FIXME add more as we need them - if cc.StructType.isinstance(type) and self.isQuantumStructType(type): - # If we have a quantum struct, we don't want to add a stack slot - return False return ComplexType.isinstance(type) or F64Type.isinstance( type) or F32Type.isinstance(type) or IntegerType.isinstance( type) or cc.StructType.isinstance(type) @@ -927,7 +921,7 @@ def visit_FunctionDef(self, node): # Set this kernel as an entry point if the argument types are classical only def isQuantumTy(ty): return quake.RefType.isinstance(ty) or quake.VeqType.isinstance( - ty) + ty) or quake.StruqType.isinstance(ty) areQuantumTypes = [isQuantumTy(ty) for ty in self.argTypes] if True not in areQuantumTypes and not self.disableEntryPointTag: @@ -1179,17 +1173,16 @@ def visit_Attribute(self, node): if isinstance(node.value, ast.Name) and node.value.id in self.symbolTable: value = self.symbolTable[node.value.id] - if cc.StructType.isinstance( - value.type) and self.isQuantumStructType(value.type): + if self.isQuantumStructType(value.type): # Here we have a quantum struct, need to use extract value instead # of load from compute pointer. structIdx, memberTy = self.getStructMemberIdx( node.attr, value.type) self.pushValue( - cc.ExtractValueOp( - memberTy, value, [], - DenseI32ArrayAttr.get([structIdx], - context=self.ctx)).result) + quake.GetMemberOp( + memberTy, value, + IntegerAttr.get(self.getIntegerType(32), + structIdx)).result) return if cc.PointerType.isinstance(value.type): @@ -1903,24 +1896,51 @@ def bodyBuilder(iterVal): mlirTypeFromPyType(v, self.ctx) for _, v in annotations.items() ] - structTy = cc.StructType.getNamed(self.ctx, node.func.id, - structTys) + # Ensure we don't use hybrid data types + numQuantumMemberTys = sum( + [1 if self.isQuantumType(ty) else 0 for ty in structTys]) + if numQuantumMemberTys != 0: # we have quantum member types + if numQuantumMemberTys != len(structTys): + self.emitFatalError( + f'hybrid quantum-classical data types not allowed in kernel code', + node) + + isStruq = not (not structTys) + for fieldTy in structTys: + if not self.isQuantumType(fieldTy): + isStruq = False + if isStruq: + structTy = quake.StruqType.getNamed(self.ctx, node.func.id, + structTys) + # Disallow recursive quantum struct types. + for fieldTy in structTys: + if self.isQuantumStructType(fieldTy): + self.emitFatalError( + 'recursive quantum struct types not allowed.', + node) + else: + structTy = cc.StructType.getNamed(self.ctx, node.func.id, + structTys) + + # Disallow user specified methods on structs + if len({ + k: v + for k, v in cls.__dict__.items() + if not (k.startswith('__') and k.endswith('__')) + }) != 0: + self.emitFatalError( + 'struct types with user specified methods are not allowed.', + node) + nArgs = len(self.valueStack) ctorArgs = [self.popValue() for _ in range(nArgs)] ctorArgs.reverse() - if self.isQuantumStructType(structTy): - # If we have a struct with quantum types, we do not - # want to allocate struct memory and load / store pointers - # to quantum memory, so we'll instead use value semantics - # with InsertValue - undefOp = cc.UndefOp(structTy).result - for i, arg in enumerate(ctorArgs): - undefOp = cc.InsertValueOp( - structTy, undefOp, arg, - DenseI64ArrayAttr.get([i], context=self.ctx)).result - - self.pushValue(undefOp) + if isStruq: + # If we have a quantum struct. We cannot allocate classical + # memory and load / store quantum type values to that memory + # space, so use `quake.MakeStruqOp`. + self.pushValue(quake.MakeStruqOp(structTy, ctorArgs).result) return stackSlot = cc.AllocaOp(cc.PointerType.get(self.ctx, structTy), diff --git a/python/cudaq/kernel/kernel_decorator.py b/python/cudaq/kernel/kernel_decorator.py index b43c54a6c8..d83f3208ac 100644 --- a/python/cudaq/kernel/kernel_decorator.py +++ b/python/cudaq/kernel/kernel_decorator.py @@ -355,12 +355,22 @@ def __call__(self, *args): except RuntimeError: target_name = None - if 'photonics' == target_name: + if 'photonics-cpu' == target_name: if self.kernelFunction is None: raise RuntimeError( - "The 'photonics' target must be used with a valid function." + "The 'photonics-cpu' target must be used with a valid function." ) - PhotonicsHandler(self.kernelFunction)(*args) + # NOTE: Since this handler does not support MLIR mode (yet), just + # invoke the kernel. If calling from a bound function, need to + # unpack the arguments, for example, see `pyGetStateLibraryMode` + try: + context_name = cudaq_runtime.getExecutionContextName() + except RuntimeError: + context_name = None + callable_args = args + if "extract-state" == context_name and len(args) == 1: + callable_args = args[0] + PhotonicsHandler(self.kernelFunction)(*callable_args) return # Prepare captured state storage for the run diff --git a/python/cudaq/kernel/register_op.py b/python/cudaq/kernel/register_op.py index 73ac749c25..8d321b26a5 100644 --- a/python/cudaq/kernel/register_op.py +++ b/python/cudaq/kernel/register_op.py @@ -13,6 +13,7 @@ from .utils import globalRegisteredOperations from .kernel_builder import PyKernel, __generalCustomOperation +from ..mlir._mlir_libs._quakeDialects import cudaq_runtime def register_operation(operation_name: str, unitary): @@ -58,5 +59,8 @@ def kernel(): # Make available to kernel builder object setattr(PyKernel, operation_name, partialmethod(__generalCustomOperation, operation_name)) + # Let the runtime know about this registered operation. + # Note: the matrix generator/construction is not known by the ExecutionManager in this case since we don't expect the ExecutionManager to be involved. + cudaq_runtime.register_custom_operation(operation_name) return diff --git a/python/cudaq/kernel/utils.py b/python/cudaq/kernel/utils.py index 99d9705ea1..f3c0f1e52b 100644 --- a/python/cudaq/kernel/utils.py +++ b/python/cudaq/kernel/utils.py @@ -213,8 +213,37 @@ def emitFatalErrorOverride(msg): # One final check to see if this is a custom data type. if id in globalRegisteredTypes: - _, memberTys = globalRegisteredTypes[id] + pyType, memberTys = globalRegisteredTypes[id] structTys = [mlirTypeFromPyType(v, ctx) for _, v in memberTys.items()] + for ty in structTys: + if cc.StructType.isinstance(ty): + localEmitFatalError( + 'recursive struct types are not allowed in kernels.') + + if len({ + k: v + for k, v in pyType.__dict__.items() + if not (k.startswith('__') and k.endswith('__')) + }) != 0: + localEmitFatalError( + 'struct types with user specified methods are not allowed.') + + numQuantumMemberTys = sum([ + 1 if + (quake.RefType.isinstance(ty) or quake.VeqType.isinstance(ty) or + quake.StruqType.isinstance(ty)) else 0 for ty in structTys + ]) + numStruqMemberTys = sum( + [1 if (quake.StruqType.isinstance(ty)) else 0 for ty in structTys]) + if numQuantumMemberTys != 0: # we have quantum member types + if numQuantumMemberTys != len(structTys): + emitFatalError( + f'hybrid quantum-classical data types not allowed in kernel code.' + ) + if numStruqMemberTys != 0: + emitFatalError(f'recursive quantum struct types not allowed.') + return quake.StruqType.getNamed(ctx, id, structTys) + return cc.StructType.getNamed(ctx, id, structTys) localEmitFatalError( @@ -320,19 +349,37 @@ def mlirTypeFromPyType(argType, ctx, **kwargs): argInstance = kwargs['argInstance'] if isinstance(argInstance, Callable): return cc.CallableType.get(ctx, argInstance.argTypes) - else: - if argType == list[int]: - return cc.StdvecType.get(ctx, mlirTypeFromPyType(int, ctx)) - if argType == list[float]: - return cc.StdvecType.get(ctx, mlirTypeFromPyType(float, ctx)) for name, (customTys, memberTys) in globalRegisteredTypes.items(): if argType == customTys: structTys = [ mlirTypeFromPyType(v, ctx) for _, v in memberTys.items() ] + numQuantumMemberTys = sum([ + 1 if + (quake.RefType.isinstance(ty) or quake.VeqType.isinstance(ty) or + quake.StruqType.isinstance(ty)) else 0 for ty in structTys + ]) + numStruqMemberTys = sum([ + 1 if (quake.StruqType.isinstance(ty)) else 0 for ty in structTys + ]) + if numQuantumMemberTys != 0: # we have quantum member types + if numQuantumMemberTys != len(structTys): + emitFatalError( + f'hybrid quantum-classical data types not allowed') + if numStruqMemberTys != 0: + emitFatalError( + f'recursive quantum struct types not allowed.') + return quake.StruqType.getNamed(ctx, name, structTys) + return cc.StructType.getNamed(ctx, name, structTys) + if 'argInstance' not in kwargs: + if argType == list[int]: + return cc.StdvecType.get(ctx, mlirTypeFromPyType(int, ctx)) + if argType == list[float]: + return cc.StdvecType.get(ctx, mlirTypeFromPyType(float, ctx)) + emitFatalError( f"Can not handle conversion of python type {argType} to MLIR type.") diff --git a/python/extension/CMakeLists.txt b/python/extension/CMakeLists.txt index b8459aaa0e..de6ba323bd 100644 --- a/python/extension/CMakeLists.txt +++ b/python/extension/CMakeLists.txt @@ -48,6 +48,7 @@ declare_mlir_python_extension(CUDAQuantumPythonSources.Extension ../runtime/common/py_NoiseModel.cpp ../runtime/common/py_ObserveResult.cpp ../runtime/common/py_SampleResult.cpp + ../runtime/common/py_CustomOpRegistry.cpp ../runtime/cudaq/algorithms/py_draw.cpp ../runtime/cudaq/algorithms/py_observe_async.cpp ../runtime/cudaq/algorithms/py_optimizer.cpp @@ -69,10 +70,13 @@ declare_mlir_python_extension(CUDAQuantumPythonSources.Extension ../runtime/utils/PyRemoteSimulatorQPU.cpp ../runtime/utils/PyRestRemoteClient.cpp ../utils/LinkedLibraryHolder.cpp + ../../runtime/common/ArgumentConversion.cpp ../../runtime/cudaq/platform/common/QuantumExecutionQueue.cpp ../../runtime/cudaq/platform/default/rest_server/RemoteRuntimeClient.cpp + ../../runtime/cudaq/platform/orca/OrcaExecutor.cpp ../../runtime/cudaq/platform/orca/OrcaQPU.cpp - ../../runtime/common/ArgumentConversion.cpp + ../../runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.cpp + ../../runtime/cudaq/platform/orca/OrcaServerHelper.cpp EMBED_CAPI_LINK_LIBS CUDAQuantumMLIRCAPI diff --git a/python/extension/CUDAQuantumExtension.cpp b/python/extension/CUDAQuantumExtension.cpp index ebace1ab73..dc78fe38de 100644 --- a/python/extension/CUDAQuantumExtension.cpp +++ b/python/extension/CUDAQuantumExtension.cpp @@ -10,6 +10,7 @@ #include "cudaq.h" #include "cudaq/Support/Version.h" #include "cudaq/platform/orca/orca_qpu.h" +#include "runtime/common/py_CustomOpRegistry.h" #include "runtime/common/py_ExecutionContext.h" #include "runtime/common/py_NoiseModel.h" #include "runtime/common/py_ObserveResult.h" @@ -103,9 +104,11 @@ PYBIND11_MODULE(_quakeDialects, m) { cudaq::bindVQE(cudaqRuntime); cudaq::bindAltLaunchKernel(cudaqRuntime); cudaq::bindTestUtils(cudaqRuntime, *holder.get()); + cudaq::bindCustomOpRegistry(cudaqRuntime); cudaqRuntime.def("set_random_seed", &cudaq::set_random_seed, "Provide the seed for backend quantum kernel simulation."); + cudaqRuntime.def("num_available_gpus", &cudaq::num_available_gpus, "The number of available GPUs detected on the system."); @@ -164,20 +167,41 @@ PYBIND11_MODULE(_quakeDialects, m) { orcaSubmodule.def( "sample", py::overload_cast &, std::vector &, - std::vector &, std::vector &, int>( - &cudaq::orca::sample), + std::vector &, std::vector &, int, + std::size_t>(&cudaq::orca::sample), "Performs Time Bin Interferometer (TBI) boson sampling experiments on " "ORCA's backends", py::arg("input_state"), py::arg("loop_lengths"), py::arg("bs_angles"), - py::arg("ps_angles") = nullptr, py::arg("n_samples") = 10000); + py::arg("ps_angles"), py::arg("n_samples") = 10000, + py::arg("qpu_id") = 0); orcaSubmodule.def( "sample", py::overload_cast &, std::vector &, - std::vector &, int>(&cudaq::orca::sample), + std::vector &, int, std::size_t>( + &cudaq::orca::sample), + "Performs Time Bin Interferometer (TBI) boson sampling experiments on " + "ORCA's backends", + py::arg("input_state"), py::arg("loop_lengths"), py::arg("bs_angles"), + py::arg("n_samples") = 10000, py::arg("qpu_id") = 0); + orcaSubmodule.def( + "sample_async", + py::overload_cast &, std::vector &, + std::vector &, std::vector &, int, + std::size_t>(&cudaq::orca::sample_async), "Performs Time Bin Interferometer (TBI) boson sampling experiments on " "ORCA's backends", py::arg("input_state"), py::arg("loop_lengths"), py::arg("bs_angles"), - py::arg("n_samples") = 10000); + py::arg("ps_angles"), py::arg("n_samples") = 10000, + py::arg("qpu_id") = 0); + orcaSubmodule.def( + "sample_async", + py::overload_cast &, std::vector &, + std::vector &, int, std::size_t>( + &cudaq::orca::sample_async), + "Performs Time Bin Interferometer (TBI) boson sampling experiments on " + "ORCA's backends", + py::arg("input_state"), py::arg("loop_lengths"), py::arg("bs_angles"), + py::arg("n_samples") = 10000, py::arg("qpu_id") = 0); auto photonicsSubmodule = cudaqRuntime.def_submodule("photonics"); photonicsSubmodule.def( @@ -215,7 +239,6 @@ PYBIND11_MODULE(_quakeDialects, m) { cudaq::getExecutionManager()->returnQudit(cudaq::QuditInfo(level, id)); }, "Release a qudit of given id.", py::arg("level"), py::arg("id")); - cudaqRuntime.def("cloneModule", [](MlirModule mod) { return wrap(unwrap(mod).clone()); }); cudaqRuntime.def("isTerminator", [](MlirOperation op) { diff --git a/python/runtime/common/py_CustomOpRegistry.cpp b/python/runtime/common/py_CustomOpRegistry.cpp new file mode 100644 index 0000000000..224d99bf29 --- /dev/null +++ b/python/runtime/common/py_CustomOpRegistry.cpp @@ -0,0 +1,34 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ +#include "py_CustomOpRegistry.h" +#include "common/CustomOp.h" +#include +#include +#include + +namespace cudaq { +struct py_unitary_operation : public unitary_operation { + std::vector> + unitary(const std::vector ¶meters = + std::vector()) const override { + throw std::runtime_error("Attempt to invoke the placeholder for Python " + "unitary op. This is illegal."); + return {}; + } +}; + +void bindCustomOpRegistry(py::module &mod) { + mod.def( + "register_custom_operation", + [&](const std::string &opName) { + cudaq::customOpRegistry::getInstance() + .registerOperation(opName); + }, + "Register a custom operation"); +} +} // namespace cudaq diff --git a/python/runtime/common/py_CustomOpRegistry.h b/python/runtime/common/py_CustomOpRegistry.h new file mode 100644 index 0000000000..c3a89a96c6 --- /dev/null +++ b/python/runtime/common/py_CustomOpRegistry.h @@ -0,0 +1,16 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include + +namespace py = pybind11; + +namespace cudaq { +/// @brief Bind the custom operation registry to Python. +void bindCustomOpRegistry(py::module &mod); +} // namespace cudaq diff --git a/python/runtime/common/py_ExecutionContext.cpp b/python/runtime/common/py_ExecutionContext.cpp index baa1ca518c..b1fc025e1e 100644 --- a/python/runtime/common/py_ExecutionContext.cpp +++ b/python/runtime/common/py_ExecutionContext.cpp @@ -48,5 +48,9 @@ void bindExecutionContext(py::module &mod) { auto &platform = cudaq::get_platform(); return platform.supports_conditional_feedback(); }); + mod.def("getExecutionContextName", []() { + auto &self = cudaq::get_platform(); + return self.get_exec_ctx()->name; + }); } } // namespace cudaq diff --git a/python/runtime/common/py_NoiseModel.cpp b/python/runtime/common/py_NoiseModel.cpp index 9187d15c66..ce71f4d7d3 100644 --- a/python/runtime/common/py_NoiseModel.cpp +++ b/python/runtime/common/py_NoiseModel.cpp @@ -11,6 +11,7 @@ #include "cudaq.h" #include #include +#include #include namespace cudaq { @@ -70,6 +71,36 @@ of the specified quantum operation. qubits (List[int]): The qubit/s to apply the noise channel to. channel (cudaq.KrausChannel): The :class:`KrausChannel` to apply to the specified `operator` on the specified `qubits`.)#") + .def( + "add_channel", + [](noise_model &self, std::string &opName, + const noise_model::PredicateFuncTy &pre) { + self.add_channel(opName, pre); + }, + py::arg("operator"), py::arg("pre"), + R"#(Add the given :class:`KrausChannel` generator callback to be applied after invocation +of the specified quantum operation. + +Args: + operator (str): The quantum operator to apply the noise channel to. + pre (Callable): The callback which takes qubits operands and gate parameters and returns a concrete :class:`KrausChannel` to apply + to the specified `operator`.)#") + .def( + "add_all_qubit_channel", + [](noise_model &self, std::string &opName, kraus_channel &channel, + std::size_t num_controls = 0) { + self.add_all_qubit_channel(opName, channel, num_controls); + }, + py::arg("operator"), py::arg("channel"), py::arg("num_controls") = 0, + + R"#(Add the given :class:`KrausChannel` to be applied after invocation +of the specified quantum operation on arbitrary qubits. + +Args: + operator (str): The quantum operator to apply the noise channel to. + channel (cudaq.KrausChannel): The :class:`KrausChannel` to apply + to the specified `operator` on any arbitrary qubits. + num_controls: Number of control bits. Default is 0 (no control bits).)#") .def( "get_channels", [](noise_model self, const std::string &op, @@ -112,6 +143,7 @@ void bindNoiseChannels(py::module &mod) { "The `KrausChannel` is composed of a list of " ":class:`KrausOperator`'s and " "is applied to a specific qubit or set of qubits.") + .def(py::init<>(), "Create an empty :class:`KrausChannel`") .def(py::init>(), "Create a :class:`KrausChannel` composed of a list of " ":class:`KrausOperator`'s.") @@ -162,7 +194,7 @@ void bindNoiseChannels(py::module &mod) { For `probability = 0.0`, the channel will behave noise-free. For `probability = 0.75`, the channel will fully depolarize the state. - For `proability = 1.0`, the channel will be uniform.)#") + For `probability = 1.0`, the channel will be uniform.)#") .def(py::init(), py::arg("probability"), "Initialize the `DepolarizationChannel` with the provided " "`probability`."); diff --git a/python/runtime/cudaq/algorithms/py_state.cpp b/python/runtime/cudaq/algorithms/py_state.cpp index 08665ba78c..fb9bdd5c64 100644 --- a/python/runtime/cudaq/algorithms/py_state.cpp +++ b/python/runtime/cudaq/algorithms/py_state.cpp @@ -58,6 +58,22 @@ state pyGetState(py::object kernel, py::args args) { }); } +state pyGetStateLibraryMode(py::object kernel, py::args args) { + return details::extractState([&]() mutable { + if (0 == args.size()) + cudaq::invokeKernel(std::forward(kernel)); + else { + args = simplifiedValidateInputArguments(args); + std::vector argsData; + for (size_t i = 0; i < args.size(); i++) { + py::object arg = args[i]; + argsData.emplace_back(std::forward(arg)); + } + cudaq::invokeKernel(std::forward(kernel), argsData); + } + }); +} + /// @brief Python implementation of the `RemoteSimulationState`. // Note: Python kernel arguments are wrapped hence need to be unwrapped // accordingly. @@ -137,6 +153,21 @@ state pyGetStateRemote(py::object kernel, py::args args) { size, returnOffset)); } +state pyGetStateLibraryMode(py::object kernel, py::args args) { + return details::extractState([&]() mutable { + if (0 == args.size()) + cudaq::invokeKernel(std::forward(kernel)); + else { + std::vector argsData; + for (size_t i = 0; i < args.size(); i++) { + py::object arg = args[i]; + argsData.emplace_back(std::forward(arg)); + } + cudaq::invokeKernel(std::forward(kernel), argsData); + } + }); +} + /// @brief Bind the get_state cudaq function void bindPyState(py::module &mod, LinkedLibraryHolder &holder) { @@ -629,6 +660,8 @@ index pair. if (holder.getTarget().name == "remote-mqpu" || holder.getTarget().name == "nvqc") return pyGetStateRemote(kernel, args); + if (holder.getTarget().name == "photonics-cpu") + return pyGetStateLibraryMode(kernel, args); return pyGetState(kernel, args); }, R"#(Return the :class:`State` of the system after execution of the provided `kernel`. diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 8a87f0e090..5b6c7b5938 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -97,10 +97,11 @@ jitAndCreateArgs(const std::string &name, MlirModule module, PassManager pm(context); pm.addNestedPass( cudaq::opt::createPySynthCallableBlockArgs(names)); - pm.addPass(cudaq::opt::createGenerateDeviceCodeLoader(/*genAsQuake=*/true)); + pm.addPass(cudaq::opt::createGenerateDeviceCodeLoader({.jitTime = true})); pm.addPass(cudaq::opt::createGenerateKernelExecution( {.startingArgIdx = startingArgIdx})); pm.addPass(cudaq::opt::createLambdaLiftingPass()); + pm.addPass(createSymbolDCEPass()); cudaq::opt::addPipelineConvertToQIR(pm); DefaultTimingManager tm; diff --git a/python/runtime/mlir/py_register_dialects.cpp b/python/runtime/mlir/py_register_dialects.cpp index f87d7d479c..3dd5a66ff3 100644 --- a/python/runtime/mlir/py_register_dialects.cpp +++ b/python/runtime/mlir/py_register_dialects.cpp @@ -97,6 +97,50 @@ void registerQuakeDialectAndTypes(py::module &m) { return veqTy.getSize(); }, py::arg("veqTypeInstance")); + + mlir_type_subclass( + quakeMod, "StruqType", + [](MlirType type) { return unwrap(type).isa(); }) + .def_classmethod( + "get", + [](py::object cls, MlirContext ctx, py::list aggregateTypes) { + SmallVector inTys; + for (auto &t : aggregateTypes) + inTys.push_back(unwrap(t.cast())); + + return wrap(quake::StruqType::get(unwrap(ctx), inTys)); + }) + .def_classmethod("getNamed", + [](py::object cls, MlirContext ctx, + const std::string &name, py::list aggregateTypes) { + SmallVector inTys; + for (auto &t : aggregateTypes) + inTys.push_back(unwrap(t.cast())); + + return wrap( + quake::StruqType::get(unwrap(ctx), name, inTys)); + }) + .def_classmethod( + "getTypes", + [](py::object cls, MlirType structTy) { + auto ty = dyn_cast(unwrap(structTy)); + if (!ty) + throw std::runtime_error( + "invalid type passed to StruqType.getTypes(), must be a " + "quake.struq"); + std::vector ret; + for (auto &t : ty.getMembers()) + ret.push_back(wrap(t)); + return ret; + }) + .def_classmethod("getName", [](py::object cls, MlirType structTy) { + auto ty = dyn_cast(unwrap(structTy)); + if (!ty) + throw std::runtime_error( + "invalid type passed to StruqType.getName(), must be a " + "quake.struq"); + return ty.getName().getValue().str(); + }); } void registerCCDialectAndTypes(py::module &m) { diff --git a/python/runtime/utils/PyRemoteRESTQPU.cpp b/python/runtime/utils/PyRemoteRESTQPU.cpp index 06a182e446..1ec7cd7c09 100644 --- a/python/runtime/utils/PyRemoteRESTQPU.cpp +++ b/python/runtime/utils/PyRemoteRESTQPU.cpp @@ -19,7 +19,7 @@ // ServerHelper, for example, was not invoked at all. using namespace mlir; -extern "C" void deviceCodeHolderAdd(const char *, const char *); +extern "C" void __cudaq_deviceCodeHolderAdd(const char *, const char *); namespace cudaq { @@ -103,7 +103,7 @@ class PyRemoteRESTQPU : public cudaq::BaseRemoteRESTQPU { } // The remote rest qpu workflow will need the module string in // the internal registry. - deviceCodeHolderAdd(kernelName.c_str(), moduleStr.c_str()); + __cudaq_deviceCodeHolderAdd(kernelName.c_str(), moduleStr.c_str()); return std::make_tuple(cloned, context, wrapper->rawArgs); } }; diff --git a/python/runtime/utils/PyRemoteSimulatorQPU.cpp b/python/runtime/utils/PyRemoteSimulatorQPU.cpp index e4a25cdda6..4cc998c363 100644 --- a/python/runtime/utils/PyRemoteSimulatorQPU.cpp +++ b/python/runtime/utils/PyRemoteSimulatorQPU.cpp @@ -51,7 +51,8 @@ launchKernelImpl(cudaq::ExecutionContext *executionContextPtr, std::unique_ptr &m_client, const std::string &m_simName, const std::string &name, void (*kernelFunc)(void *), void *args, - std::uint64_t voidStarSize, std::uint64_t resultOffset) { + std::uint64_t voidStarSize, std::uint64_t resultOffset, + const std::vector &rawArgs) { auto *wrapper = reinterpret_cast(args); auto m_module = wrapper->mod; auto callableNames = wrapper->callableNames; @@ -131,12 +132,14 @@ class PyRemoteSimulatorQPU : public cudaq::BaseRemoteSimulatorQPU { void launchKernel(const std::string &name, void (*kernelFunc)(void *), void *args, std::uint64_t voidStarSize, - std::uint64_t resultOffset) override { + std::uint64_t resultOffset, + const std::vector &rawArgs) override { cudaq::info("PyRemoteSimulatorQPU: Launch kernel named '{}' remote QPU {} " "(simulator = {})", name, qpu_id, m_simName); ::launchKernelImpl(getExecutionContextForMyThread(), m_client, m_simName, - name, kernelFunc, args, voidStarSize, resultOffset); + name, kernelFunc, args, voidStarSize, resultOffset, + rawArgs); } void launchKernel(const std::string &name, @@ -177,12 +180,14 @@ class PyNvcfSimulatorQPU : public cudaq::BaseNvcfSimulatorQPU { void launchKernel(const std::string &name, void (*kernelFunc)(void *), void *args, std::uint64_t voidStarSize, - std::uint64_t resultOffset) override { + std::uint64_t resultOffset, + const std::vector &rawArgs) override { cudaq::info("PyNvcfSimulatorQPU: Launch kernel named '{}' remote QPU {} " "(simulator = {})", name, qpu_id, m_simName); ::launchKernelImpl(getExecutionContextForMyThread(), m_client, m_simName, - name, kernelFunc, args, voidStarSize, resultOffset); + name, kernelFunc, args, voidStarSize, resultOffset, + rawArgs); } void launchKernel(const std::string &name, diff --git a/python/tests/backends/test_IQM.py b/python/tests/backends/test_IQM.py index d60f00aa5b..dbc24ade14 100644 --- a/python/tests/backends/test_IQM.py +++ b/python/tests/backends/test_IQM.py @@ -200,7 +200,22 @@ def test_IQM_state_preparation_builder(): assert assert_close(counts["11"], 0., 2) -def test_arbitrary_unitary_synthesis(): +def test_exp_pauli(): + + @cudaq.kernel + def test(): + q = cudaq.qvector(2) + exp_pauli(1.0, q, "XX") + + shots = 10000 + # gives results like { 11:7074 10:0 01:0 00:2926 } + counts = cudaq.sample(test, shots_count=shots) + counts.dump() + assert assert_close(counts["01"], 0., 2) + assert assert_close(counts["10"], 0., 2) + + +def test_1q_unitary_synthesis(): cudaq.register_operation("custom_h", 1. / np.sqrt(2.) * np.array([1, 1, 1, -1])) @@ -232,11 +247,43 @@ def bell(): custom_x.ctrl(qubits[0], qubits[1]) counts = cudaq.sample(bell) - counts.dump() # Gives result like { 11:499 10:0 01:0 00:499 } assert counts['01'] == 0 and counts['10'] == 0 +def test_2q_unitary_synthesis(): + + cudaq.register_operation( + "custom_cnot", + np.array([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0])) + + @cudaq.kernel + def bell_pair(): + qubits = cudaq.qvector(2) + h(qubits[0]) + custom_cnot(qubits[0], qubits[1]) + + counts = cudaq.sample(bell_pair) + # Gives result like { 11:499 10:0 01:0 00:499 } + assert counts['01'] == 0 and counts['10'] == 0 + + cudaq.register_operation( + "custom_cz", np.array([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, + -1])) + + @cudaq.kernel + def ctrl_z_kernel(): + qubits = cudaq.qvector(5) + controls = cudaq.qvector(2) + custom_cz(qubits[1], qubits[0]) + x(qubits[2]) + custom_cz(qubits[3], qubits[2]) + x(controls) + + counts = cudaq.sample(ctrl_z_kernel) + assert counts["0010011"] == 999 + + # leave for gdb debugging if __name__ == "__main__": loc = os.path.abspath(__file__) diff --git a/python/tests/backends/test_IonQ.py b/python/tests/backends/test_IonQ.py index 90f2537d7a..11b393936c 100644 --- a/python/tests/backends/test_IonQ.py +++ b/python/tests/backends/test_IonQ.py @@ -189,7 +189,21 @@ def test_ionq_state_preparation_builder(): assert not '11' in counts -def test_arbitrary_unitary_synthesis(): +def test_exp_pauli(): + + @cudaq.kernel + def test(): + q = cudaq.qvector(2) + exp_pauli(1.0, q, "XX") + + counts = cudaq.sample(test) + assert '00' in counts + assert '11' in counts + assert not '01' in counts + assert not '10' in counts + + +def test_1q_unitary_synthesis(): cudaq.register_operation("custom_h", 1. / np.sqrt(2.) * np.array([1, 1, 1, -1])) @@ -224,6 +238,54 @@ def bell(): assert len(counts) == 2 assert "00" in counts and "11" in counts + cudaq.register_operation("custom_s", np.array([1, 0, 0, 1j])) + cudaq.register_operation("custom_s_adj", np.array([1, 0, 0, -1j])) + + @cudaq.kernel + def kernel(): + q = cudaq.qubit() + h(q) + custom_s.adj(q) + custom_s_adj(q) + h(q) + + counts = cudaq.sample(kernel) + counts.dump() + assert counts["1"] == 1000 + + +def test_2q_unitary_synthesis(): + + cudaq.register_operation( + "custom_cnot", + np.array([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0])) + + @cudaq.kernel + def bell_pair(): + qubits = cudaq.qvector(2) + h(qubits[0]) + custom_cnot(qubits[0], qubits[1]) + + counts = cudaq.sample(bell_pair) + assert len(counts) == 2 + assert "00" in counts and "11" in counts + + cudaq.register_operation( + "custom_cz", np.array([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, + -1])) + + @cudaq.kernel + def ctrl_z_kernel(): + qubits = cudaq.qvector(5) + controls = cudaq.qvector(2) + custom_cz(qubits[1], qubits[0]) + x(qubits[2]) + custom_cz(qubits[3], qubits[2]) + x(controls) + + counts = cudaq.sample(ctrl_z_kernel) + assert counts["0010011"] == 1000 + # leave for gdb debugging if __name__ == "__main__": diff --git a/python/tests/backends/test_OQC.py b/python/tests/backends/test_OQC.py index 3723916abe..55f48de613 100644 --- a/python/tests/backends/test_OQC.py +++ b/python/tests/backends/test_OQC.py @@ -188,7 +188,21 @@ def test_OQC_state_preparation_builder(): assert not '11' in counts -def test_arbitrary_unitary_synthesis(): +def test_exp_pauli(): + + @cudaq.kernel + def test(): + q = cudaq.qvector(2) + exp_pauli(1.0, q, "XX") + + counts = cudaq.sample(test) + assert '00' in counts + assert '11' in counts + assert not '01' in counts + assert not '10' in counts + + +def test_1q_unitary_synthesis(): cudaq.register_operation("custom_h", 1. / np.sqrt(2.) * np.array([1, 1, 1, -1])) @@ -223,6 +237,54 @@ def bell(): assert len(counts) == 2 assert "00" in counts and "11" in counts + cudaq.register_operation("custom_s", np.array([1, 0, 0, 1j])) + cudaq.register_operation("custom_s_adj", np.array([1, 0, 0, -1j])) + + @cudaq.kernel + def kernel(): + q = cudaq.qubit() + h(q) + custom_s.adj(q) + custom_s_adj(q) + h(q) + + counts = cudaq.sample(kernel) + counts.dump() + assert counts["1"] == 1000 + + +def test_2q_unitary_synthesis(): + + cudaq.register_operation( + "custom_cnot", + np.array([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0])) + + @cudaq.kernel + def bell_pair(): + qubits = cudaq.qvector(2) + h(qubits[0]) + custom_cnot(qubits[0], qubits[1]) + + counts = cudaq.sample(bell_pair) + assert len(counts) == 2 + assert "00" in counts and "11" in counts + + cudaq.register_operation( + "custom_cz", np.array([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, + -1])) + + @cudaq.kernel + def ctrl_z_kernel(): + qubits = cudaq.qvector(5) + controls = cudaq.qvector(2) + custom_cz(qubits[1], qubits[0]) + x(qubits[2]) + custom_cz(qubits[3], qubits[2]) + x(controls) + + counts = cudaq.sample(ctrl_z_kernel) + assert counts["0010011"] == 1000 + # leave for gdb debugging if __name__ == "__main__": diff --git a/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py b/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py index e62657ea69..71647a9c70 100644 --- a/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py +++ b/python/tests/backends/test_Quantinuum_LocalEmulation_builder.py @@ -149,6 +149,29 @@ def test_quantinuum_state_synthesis(): assert 'Could not successfully apply quake-synth.' in repr(e) +def test_exp_pauli(): + test = cudaq.make_kernel() + q = test.qalloc(2) + test.exp_pauli(1.0, q, "XX") + + counts = cudaq.sample(test) + assert '00' in counts + assert '11' in counts + assert not '01' in counts + assert not '10' in counts + + +def test_exp_pauli_param(): + test, w = cudaq.make_kernel(cudaq.pauli_word) + q = test.qalloc(2) + test.exp_pauli(1.0, q, w) + + # FIXME: should work after new launchKernel becomes default. + with pytest.raises(RuntimeError) as e: + counts = cudaq.sample(test, cudaq.pauli_word("XX")) + assert 'Remote rest platform Quake lowering failed.' in repr(e) + + # leave for gdb debugging if __name__ == "__main__": loc = os.path.abspath(__file__) diff --git a/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py b/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py index fd5c0f536c..5765da2009 100644 --- a/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py +++ b/python/tests/backends/test_Quantinuum_LocalEmulation_kernel.py @@ -180,7 +180,34 @@ def kernel(state: cudaq.State): assert 'Could not successfully apply quake-synth.' in repr(e) -def test_arbitrary_unitary_synthesis(): +def test_exp_pauli(): + + @cudaq.kernel + def test(): + q = cudaq.qvector(2) + exp_pauli(1.0, q, "XX") + + counts = cudaq.sample(test) + assert '00' in counts + assert '11' in counts + assert not '01' in counts + assert not '10' in counts + + +def test_exp_pauli_param(): + + @cudaq.kernel + def test_param(w: cudaq.pauli_word): + q = cudaq.qvector(2) + exp_pauli(1.0, q, w) + + # FIXME: should work after new launchKernel becomes default. + with pytest.raises(RuntimeError) as e: + counts = cudaq.sample(test_param, cudaq.pauli_word("XX")) + assert 'Remote rest platform Quake lowering failed.' in repr(e) + + +def test_1q_unitary_synthesis(): cudaq.register_operation("custom_h", 1. / np.sqrt(2.) * np.array([1, 1, 1, -1])) @@ -192,7 +219,6 @@ def basic_x(): custom_x(qubit) counts = cudaq.sample(basic_x) - counts.dump() assert len(counts) == 1 and "1" in counts @cudaq.kernel @@ -201,7 +227,6 @@ def basic_h(): custom_h(qubit) counts = cudaq.sample(basic_h) - counts.dump() assert "0" in counts and "1" in counts @cudaq.kernel @@ -211,7 +236,6 @@ def bell(): custom_x.ctrl(qubits[0], qubits[1]) counts = cudaq.sample(bell) - counts.dump() assert len(counts) == 2 assert "00" in counts and "11" in counts @@ -227,10 +251,61 @@ def kernel(): h(q) counts = cudaq.sample(kernel) - counts.dump() assert counts["1"] == 1000 +def test_2q_unitary_synthesis(): + + cudaq.register_operation( + "custom_cnot", + np.array([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0])) + + @cudaq.kernel + def bell_pair(): + qubits = cudaq.qvector(2) + h(qubits[0]) + custom_cnot(qubits[0], qubits[1]) + + counts = cudaq.sample(bell_pair) + assert len(counts) == 2 + assert "00" in counts and "11" in counts + + cudaq.register_operation( + "custom_cz", np.array([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, + -1])) + + @cudaq.kernel + def ctrl_z_kernel(): + qubits = cudaq.qvector(5) + controls = cudaq.qvector(2) + custom_cz(qubits[1], qubits[0]) + x(qubits[2]) + custom_cz(qubits[3], qubits[2]) + x(controls) + + counts = cudaq.sample(ctrl_z_kernel) + assert counts["0010011"] == 1000 + + +def test_3q_unitary_synthesis(): + cudaq.register_operation( + "toffoli", + np.array([ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0 + ])) + + @cudaq.kernel + def test_toffoli(): + q = cudaq.qvector(3) + x(q) + toffoli(q[0], q[1], q[2]) + + with pytest.raises(RuntimeError): + cudaq.sample(test_toffoli) + + # leave for gdb debugging if __name__ == "__main__": loc = os.path.abspath(__file__) diff --git a/python/tests/backends/test_Quantinuum_builder.py b/python/tests/backends/test_Quantinuum_builder.py index 952b2cd582..b13c493178 100644 --- a/python/tests/backends/test_Quantinuum_builder.py +++ b/python/tests/backends/test_Quantinuum_builder.py @@ -45,7 +45,8 @@ def startUpMockServer(): if not check_server_connection(port): p.terminate() - pytest.exit("Mock server did not start in time, skipping tests.", returncode=1) + pytest.exit("Mock server did not start in time, skipping tests.", + returncode=1) yield credsName @@ -163,6 +164,18 @@ def test_quantinuum_state_preparation(): assert not '11' in counts +def test_exp_pauli(): + test = cudaq.make_kernel() + q = test.qalloc(2) + test.exp_pauli(1.0, q, "XX") + + counts = cudaq.sample(test) + assert '00' in counts + assert '11' in counts + assert not '01' in counts + assert not '10' in counts + + # leave for gdb debugging if __name__ == "__main__": loc = os.path.abspath(__file__) diff --git a/python/tests/backends/test_Quantinuum_kernel.py b/python/tests/backends/test_Quantinuum_kernel.py index 1472a1f3d4..88989be341 100644 --- a/python/tests/backends/test_Quantinuum_kernel.py +++ b/python/tests/backends/test_Quantinuum_kernel.py @@ -45,7 +45,8 @@ def startUpMockServer(): if not check_server_connection(port): p.terminate() - pytest.exit("Mock server did not start in time, skipping tests.", returncode=1) + pytest.exit("Mock server did not start in time, skipping tests.", + returncode=1) yield credsName @@ -189,6 +190,20 @@ def kernel(vec: List[complex]): assert not '11' in counts +def test_exp_pauli(): + + @cudaq.kernel + def test(): + q = cudaq.qvector(2) + exp_pauli(1.0, q, "XX") + + counts = cudaq.sample(test) + assert '00' in counts + assert '11' in counts + assert not '01' in counts + assert not '10' in counts + + # leave for gdb debugging if __name__ == "__main__": loc = os.path.abspath(__file__) diff --git a/python/tests/builder/test_NoiseModel.py b/python/tests/builder/test_NoiseModel.py index 1a70031c55..85d3514f94 100644 --- a/python/tests/builder/test_NoiseModel.py +++ b/python/tests/builder/test_NoiseModel.py @@ -12,6 +12,7 @@ import numpy as np import cudaq +import random def test_depolarization_channel(): @@ -226,6 +227,7 @@ def test_kraus_channel(): assert ('1' in counts) cudaq.reset_target() + def test_row_major(): cudaq.set_target('density-matrix-cpu') cudaq.set_random_seed(13) @@ -233,10 +235,12 @@ def test_row_major(): error_prob = 0.2 shots = 10000 # Default numpy array is row major - kraus_0=np.array([[1.0,0.0], [0.0,np.sqrt(1-error_prob)]],dtype=np.complex128) - kraus_1=np.array([[0.0,np.sqrt(error_prob)],[0.0,0.0]],dtype=np.complex128) + kraus_0 = np.array([[1.0, 0.0], [0.0, np.sqrt(1 - error_prob)]], + dtype=np.complex128) + kraus_1 = np.array([[0.0, np.sqrt(error_prob)], [0.0, 0.0]], + dtype=np.complex128) # This will throw if the row-column major convention is mixed up - t1_channel=cudaq.KrausChannel([kraus_0,kraus_1]) + t1_channel = cudaq.KrausChannel([kraus_0, kraus_1]) noise = cudaq.NoiseModel() noise.add_channel('x', [0], t1_channel) cudaq.set_noise(noise) @@ -246,9 +250,10 @@ def test_row_major(): noisy_counts = cudaq.sample(circuit, shots_count=shots) noisy_counts.dump() # Decay to |0> ~ error_prob - assert np.isclose(noisy_counts.probability("0"), error_prob, atol=.2) + assert np.isclose(noisy_counts.probability("0"), error_prob, atol=.2) cudaq.reset_target() + def test_column_major(): cudaq.set_target('density-matrix-cpu') cudaq.set_random_seed(13) @@ -257,10 +262,14 @@ def test_column_major(): shots = 10000 # Input data in column major # Note: same data but with order = 'F' => the buffer storage will be in column major - kraus_0=np.array([[1.0,0.0], [0.0,np.sqrt(1-error_prob)]],dtype=np.complex128,order='F') - kraus_1=np.array([[0.0,np.sqrt(error_prob)],[0.0,0.0]],dtype=np.complex128,order='F') + kraus_0 = np.array([[1.0, 0.0], [0.0, np.sqrt(1 - error_prob)]], + dtype=np.complex128, + order='F') + kraus_1 = np.array([[0.0, np.sqrt(error_prob)], [0.0, 0.0]], + dtype=np.complex128, + order='F') # This will throw if the row-column major convention is mixed up - t1_channel=cudaq.KrausChannel([kraus_0,kraus_1]) + t1_channel = cudaq.KrausChannel([kraus_0, kraus_1]) noise = cudaq.NoiseModel() noise.add_channel('x', [0], t1_channel) cudaq.set_noise(noise) @@ -270,32 +279,251 @@ def test_column_major(): noisy_counts = cudaq.sample(circuit, shots_count=shots) noisy_counts.dump() # Decay to |0> ~ error_prob - assert np.isclose(noisy_counts.probability("0"), error_prob, atol=.2) + assert np.isclose(noisy_counts.probability("0"), error_prob, atol=.2) cudaq.reset_target() + def test_noise_u3(): cudaq.set_target('density-matrix-cpu') cudaq.set_random_seed(13) # Amplitude damping error_prob = 0.2 shots = 10000 - kraus_0=np.array([[1.0,0.0], [0.0,np.sqrt(1-error_prob)]],dtype=np.complex128) - kraus_1=np.array([[0.0,np.sqrt(error_prob)],[0.0,0.0]],dtype=np.complex128) + kraus_0 = np.array([[1.0, 0.0], [0.0, np.sqrt(1 - error_prob)]], + dtype=np.complex128) + kraus_1 = np.array([[0.0, np.sqrt(error_prob)], [0.0, 0.0]], + dtype=np.complex128) # This will throw if the row-column major convention is mixed up - t1_channel=cudaq.KrausChannel([kraus_0,kraus_1]) + t1_channel = cudaq.KrausChannel([kraus_0, kraus_1]) noise = cudaq.NoiseModel() noise.add_channel('u3', [0], t1_channel) cudaq.set_noise(noise) circuit = cudaq.make_kernel() q = circuit.qalloc() # U3(pi,−pi/2,pi/2) == X - circuit.u3(np.pi, -np.pi/2, np.pi/2, q) + circuit.u3(np.pi, -np.pi / 2, np.pi / 2, q) noisy_counts = cudaq.sample(circuit, shots_count=shots) noisy_counts.dump() # Decay to |0> ~ error_prob - assert np.isclose(noisy_counts.probability("0"), error_prob, atol=.1) + assert np.isclose(noisy_counts.probability("0"), error_prob, atol=.1) + cudaq.reset_target() + + +def test_all_qubit_channel(): + cudaq.set_target('density-matrix-cpu') + cudaq.set_random_seed(13) + noise = cudaq.NoiseModel() + bf = cudaq.BitFlipChannel(1.0) + noise.add_all_qubit_channel('x', bf) + kernel = cudaq.make_kernel() + num_qubits = 3 + qubits = kernel.qalloc(num_qubits) + kernel.x(qubits) + kernel.mz(qubits) + shots = 252 + noisy_counts = cudaq.sample(kernel, shots_count=shots, noise_model=noise) + noisy_counts.dump() + # Decay to |000> + assert np.isclose(noisy_counts.probability("0" * num_qubits), 1.0) + cudaq.reset_target() + + +def test_all_qubit_channel_with_control(): + cudaq.set_target('density-matrix-cpu') + cudaq.set_random_seed(13) + noise = cudaq.NoiseModel() + k0 = np.array( + [[0.99498743710662, 0., 0., 0.], [0., 0.99498743710662, 0., 0.], + [0., 0., 0.99498743710662, 0.], [0., 0., 0., 0.99498743710662]], + dtype=np.complex128) + k1 = np.array( + [[0., 0., 0.05773502691896258, 0.], [0., 0., 0., 0.05773502691896258], + [0.05773502691896258, 0., 0., 0.], [0., 0.05773502691896258, 0., 0.]], + dtype=np.complex128) + k2 = np.array([[0., 0., -1j * 0.05773502691896258, 0.], + [0., 0., 0., -1j * 0.05773502691896258], + [1j * 0.05773502691896258, 0., 0., 0.], + [0., 1j * 0.05773502691896258, 0., 0.]], + dtype=np.complex128) + k3 = np.array( + [[0.05773502691896258, 0., 0., 0.], [0., 0.05773502691896258, 0., 0.], + [0., 0., -0.05773502691896258, 0.], [0., 0., 0., -0.05773502691896258] + ], + dtype=np.complex128) + kraus_channel = cudaq.KrausChannel([k0, k1, k2, k3]) + noise.add_all_qubit_channel('x', kraus_channel, num_controls=1) + num_qubits = 5 + num_tests = 4 + for i in range(num_tests): + kernel = cudaq.make_kernel() + qubits = kernel.qalloc(num_qubits) + # Pick a qubit pair + qubit_pair = random.sample(range(num_qubits), 2) + print(f"qubit pair: {qubit_pair}") + q = qubits[qubit_pair[0]] + r = qubits[qubit_pair[1]] + kernel.h(q) + kernel.cx(q, r) + kernel.mz(qubits) + shots = 1024 + noisy_counts = cudaq.sample(kernel, + shots_count=shots, + noise_model=noise) + noisy_counts.dump() + # All tests have some noisy states beside the bell pair. + assert (len(noisy_counts) > 2) cudaq.reset_target() + +def test_all_qubit_channel_with_control_prefix(): + cudaq.set_target('density-matrix-cpu') + cudaq.set_random_seed(13) + noise = cudaq.NoiseModel() + k0 = np.array( + [[0.99498743710662, 0., 0., 0.], [0., 0.99498743710662, 0., 0.], + [0., 0., 0.99498743710662, 0.], [0., 0., 0., 0.99498743710662]], + dtype=np.complex128) + k1 = np.array( + [[0., 0., 0.05773502691896258, 0.], [0., 0., 0., 0.05773502691896258], + [0.05773502691896258, 0., 0., 0.], [0., 0.05773502691896258, 0., 0.]], + dtype=np.complex128) + k2 = np.array([[0., 0., -1j * 0.05773502691896258, 0.], + [0., 0., 0., -1j * 0.05773502691896258], + [1j * 0.05773502691896258, 0., 0., 0.], + [0., 1j * 0.05773502691896258, 0., 0.]], + dtype=np.complex128) + k3 = np.array( + [[0.05773502691896258, 0., 0., 0.], [0., 0.05773502691896258, 0., 0.], + [0., 0., -0.05773502691896258, 0.], [0., 0., 0., -0.05773502691896258] + ], + dtype=np.complex128) + kraus_channel = cudaq.KrausChannel([k0, k1, k2, k3]) + noise.add_all_qubit_channel('cx', kraus_channel) + num_qubits = 5 + num_tests = 4 + for i in range(num_tests): + kernel = cudaq.make_kernel() + qubits = kernel.qalloc(num_qubits) + # Pick a qubit pair + qubit_pair = random.sample(range(num_qubits), 2) + print(f"qubit pair: {qubit_pair}") + q = qubits[qubit_pair[0]] + r = qubits[qubit_pair[1]] + kernel.h(q) + kernel.cx(q, r) + kernel.mz(qubits) + shots = 1024 + noisy_counts = cudaq.sample(kernel, + shots_count=shots, + noise_model=noise) + noisy_counts.dump() + # All tests have some noisy states beside the bell pair. + assert (len(noisy_counts) > 2) + cudaq.reset_target() + + +def test_callback_channel(): + cudaq.set_target('density-matrix-cpu') + cudaq.set_random_seed(13) + + def noise_cb(qubits, params): + if qubits[0] != 2: + return cudaq.BitFlipChannel(1.0) + return cudaq.KrausChannel() + + noise = cudaq.NoiseModel() + noise.add_channel('x', noise_cb) + kernel = cudaq.make_kernel() + num_qubits = 5 + qubits = kernel.qalloc(num_qubits) + kernel.x(qubits) + kernel.mz(qubits) + shots = 252 + noisy_counts = cudaq.sample(kernel, shots_count=shots, noise_model=noise) + noisy_counts.dump() + # All qubits, except q[2], are flipped. + assert np.isclose(noisy_counts.probability("00100"), 1.0) + cudaq.reset_target() + + +def test_callback_channel_with_params(): + cudaq.set_target('density-matrix-cpu') + cudaq.set_random_seed(13) + + def noise_cb(qubits, params): + assert len(params) == 1 + # For testing: only add noise if the angle is positive. + if params[0] > 0: + return cudaq.BitFlipChannel(1.0) + return cudaq.KrausChannel() + + noise = cudaq.NoiseModel() + noise.add_channel('rx', noise_cb) + kernel = cudaq.make_kernel() + qubit = kernel.qalloc() + # Rx(pi) == X + kernel.rx(np.pi, qubit) + kernel.mz(qubit) + shots = 252 + noisy_counts = cudaq.sample(kernel, shots_count=shots, noise_model=noise) + noisy_counts.dump() + # Due to 100% bit-flip, it becomes "0". + assert np.isclose(noisy_counts.probability("0"), 1.0) + + kernel = cudaq.make_kernel() + qubit = kernel.qalloc() + # Rx(-pi) == X + kernel.rx(-np.pi, qubit) + kernel.mz(qubit) + shots = 252 + noisy_counts = cudaq.sample(kernel, shots_count=shots, noise_model=noise) + noisy_counts.dump() + # Due to our custom setup, a negative angle will have no noise. + assert np.isclose(noisy_counts.probability("1"), 1.0) + cudaq.reset_target() + + +def check_custom_op_noise(noise_model): + cudaq.set_random_seed(13) + cudaq.set_target('density-matrix-cpu') + + @cudaq.kernel + def basic(): + q = cudaq.qubit() + custom_x(q) + + shots = 100 + counts = cudaq.sample(basic, shots_count=shots, noise_model=noise_model) + counts.dump() + assert np.isclose(counts.probability("0"), 1.0) + cudaq.reset_target() + + +def test_custom_op(): + cudaq.register_operation("custom_x", np.array([0, 1, 1, 0])) + + # (Gate name + Operand) + noise = cudaq.NoiseModel() + # Bit flip channel with `1.0` probability of the qubit flipping 180 degrees. + bit_flip_one = cudaq.BitFlipChannel(1.0) + noise.add_channel('custom_x', [0], bit_flip_one) + check_custom_op_noise(noise) + + # All-qubit + noise = cudaq.NoiseModel() + # Bit flip channel with `1.0` probability of the qubit flipping 180 degrees. + noise.add_all_qubit_channel('custom_x', bit_flip_one) + check_custom_op_noise(noise) + + # Callback + def noise_cb(qubits, params): + return bit_flip_one + + noise = cudaq.NoiseModel() + noise.add_channel('custom_x', noise_cb) + check_custom_op_noise(noise) + + # leave for gdb debugging if __name__ == "__main__": loc = os.path.abspath(__file__) diff --git a/python/tests/custom/test_unitary_synthesis.py b/python/tests/custom/test_euler_decomposition.py similarity index 100% rename from python/tests/custom/test_unitary_synthesis.py rename to python/tests/custom/test_euler_decomposition.py diff --git a/python/tests/custom/test_kak_decomposition.py b/python/tests/custom/test_kak_decomposition.py new file mode 100644 index 0000000000..e95298dc6b --- /dev/null +++ b/python/tests/custom/test_kak_decomposition.py @@ -0,0 +1,157 @@ +# ============================================================================ # +# Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. # +# All rights reserved. # +# # +# This source code and the accompanying materials are made available under # +# the terms of the Apache License 2.0 which accompanies this distribution. # +# ============================================================================ # + +import pytest +import numpy as np + +import cudaq + +## NOTE: The random operations in this file are generated using +# `scipy.stats.unitary_group.rvs(4)` with `seed=13`. The synthesized +# kernels are generated by running transformation passes on the original +# kernels which use the custom operation. These conversions are covered +# in the `test/Transforms/UnitarySynthesis/random_unitary_*` tests. + + +def check_state(matrix, state): + # state must match the first column of the custom unitary matrix + assert np.isclose(matrix[:, 0], np.array(state), atol=1e-8).all() + + +def test_random_unitary_1(): + # yapf: disable + matrix1 = np.array([ + [-0.25534142 + 0.04562918j, 0.11619328 + 0.7978548j, 0.19980911 - 0.24754117j, 0.05245516 + 0.42272181j], + [ 0.48212336 - 0.35275169j, 0.47307302 + 0.204771j, 0.38804407 + 0.34346751j, -0.30236462 - 0.13199084j], + [ 0.53000373 - 0.05204794j,-0.05546452 + 0.04480838j,-0.39853872 - 0.60358143j, -0.40979785 + 0.1422147j], + [ 0.20174057 + 0.50152752j, 0.04256283 - 0.2780322j, 0.14896845 + 0.29140402j, -0.16938781 + 0.70203793j]]) + # yapf: enable + + cudaq.register_operation("op1", matrix1) + + @cudaq.kernel + def kernel1(): + q = cudaq.qvector(2) + op1(q[1], q[0]) + + cudaq.get_state(kernel1).dump() + check_state(matrix1, cudaq.get_state(kernel1)) + + @cudaq.kernel + def synth_kernel1(): + q = cudaq.qvector(2) + + rz(3.9582625248746566, q[0]) + ry(0.93802610748277016, q[0]) + rz(2.2568237856512323, q[0]) + + rz(-1.9066099708330588, q[1]) + ry(2.783651792391125, q[1]) + rz(0.7280736766746525, q[1]) + + h(q[1]) + h(q[0]) + x.ctrl(q[0], q[1]) + rz(1.1436094116691584, q[1]) + x.ctrl(q[0], q[1]) + h(q[0]) + h(q[1]) + + rx(1.5707963267948966, q[1]) + rx(1.5707963267948966, q[0]) + x.ctrl(q[0], q[1]) + rz(0.13346974688431834, q[1]) + x.ctrl(q[0], q[1]) + rx(-1.5707963267948966, q[0]) + rx(-1.5707963267948966, q[1]) + + x.ctrl(q[0], q[1]) + rz(-0.43621539909016882, q[1]) + x.ctrl(q[0], q[1]) + + rz(1.6888584582114208, q[0]) + ry(2.2872369478030228, q[0]) + rz(-3.1401730467170035, q[0]) + + rz(2.088853123967366, q[1]) + ry(2.0186522227162649, q[1]) + rz(-0.20630121734301887, q[1]) + + r1(-1.2996367006005645, q[1]) + rz(1.2996367006005645, q[1]) + + check_state(matrix1, cudaq.get_state(synth_kernel1)) + + +def test_random_unitary_2(): + # yapf: disable + matrix2 = np.array([[ 0.18897759+0.33963024j, 0.12335642-0.48243451j, 0.42873799-0.22386284j, -0.38231687-0.46998072j], + [ 0.26665664+0.31917547j, 0.66539471+0.25221666j,-0.47503402-0.12900718j, -0.26305423+0.09570885j], + [-0.1821702 +0.14533363j, 0.18060332-0.34169107j, 0.00131404-0.64370213j, 0.54215898+0.29670066j], + [-0.30045971+0.72895551j, -0.26715636-0.15790473j,-0.06966553+0.32335977j, -0.13738248+0.39211303j]]) + # yapf: enable + + cudaq.register_operation("op2", matrix2) + + @cudaq.kernel + def kernel2(): + q = cudaq.qvector(2) + op2(q[1], q[0]) + + check_state(matrix2, cudaq.get_state(kernel2)) + + @cudaq.kernel + def synth_kernel2(): + q = cudaq.qvector(2) + + rz(3.3597983877882998, q[0]) + ry(1.1124416939078243, q[0]) + rz(-1.5227607222807453, q[0]) + + rz(1.0022361850018475, q[1]) + ry(2.3499858725474598, q[1]) + rz(0.70669321414482034, q[1]) + + h(q[1]) + h(q[0]) + x.ctrl(q[0], q[1]) + rz(0.41098890378696051, q[1]) + x.ctrl(q[0], q[1]) + h(q[0]) + h(q[1]) + + rx(1.5707963267948966, q[1]) + rx(1.5707963267948966, q[0]) + x.ctrl(q[0], q[1]) + rz(-4.0833361355387012, q[1]) + x.ctrl(q[0], q[1]) + rx(-1.5707963267948966, q[0]) + rx(-1.5707963267948966, q[1]) + + x.ctrl(q[0], q[1]) + rz(1.2323317339216211, q[1]) + x.ctrl(q[0], q[1]) + + rz(-0.57588264019689317, q[0]) + ry(0.45370093726152877, q[0]) + rz(0.63586258232390358, q[0]) + + rz(0.44527705872095541, q[1]) + ry(1.7688004823405488, q[1]) + rz(1.0308660415707038, q[1]) + + r1(0.89327181859161264, q[1]) + rz(-0.89327181859161264, q[1]) + + check_state(matrix2, cudaq.get_state(synth_kernel2)) + + +# leave for gdb debugging +if __name__ == "__main__": + loc = os.path.abspath(__file__) + pytest.main([loc, "-rP"]) diff --git a/python/tests/handlers/test_photonics_kernel.py b/python/tests/handlers/test_photonics_kernel.py index ddfbaaf37a..562a483964 100644 --- a/python/tests/handlers/test_photonics_kernel.py +++ b/python/tests/handlers/test_photonics_kernel.py @@ -7,12 +7,15 @@ # ============================================================================ # import pytest + +from typing import List + import cudaq @pytest.fixture(autouse=True) def do_something(): - cudaq.set_target("photonics") + cudaq.set_target("photonics-cpu") yield cudaq.reset_target() cudaq.__clearKernelRegistries() @@ -23,24 +26,28 @@ def test_qudit(): @cudaq.kernel def kernel(): q = qudit(level=4) - plus(q) - plus(q) - plus(q) + create(q) + create(q) + create(q) mz(q) counts = cudaq.sample(kernel) assert len(counts) == 1 assert '3' in counts + state = cudaq.get_state(kernel) + state.dump() + assert 4 == state.__len__() + def test_qudit_list(): @cudaq.kernel def kernel(): - qutrits = [qudit(3) for _ in range(2)] - plus(qutrits[0]) - plus(qutrits[1]) - mz(qutrits) + qumodes = [qudit(3) for _ in range(2)] + create(qumodes[0]) + create(qumodes[1]) + mz(qumodes) counts = cudaq.sample(kernel) assert len(counts) == 1 @@ -52,7 +59,7 @@ def test_qudit_invalid(): @cudaq.kernel def kernel(): q = [i for i in range(2)] - plus(q[0]) + create(q[0]) with pytest.raises(RuntimeError) as e: cudaq.sample(kernel) @@ -63,27 +70,73 @@ def test_supported_gates(): @cudaq.kernel def kernel(): - quds = [qudit(5) for _ in range(3)] + qumodes = [qudit(5) for _ in range(3)] - plus(quds[0]) - plus(quds[1]) - plus(quds[2]) + create(qumodes[0]) + create(qumodes[1]) + create(qumodes[2]) - phase_shift(quds[1], 0.5) - beam_splitter(quds[0], quds[1], 1.3) + phase_shift(qumodes[1], 0.5) + beam_splitter(qumodes[0], qumodes[1], 1.3) - mz(quds) + mz(qumodes) counts = cudaq.sample(kernel) counts.dump() +def test_kernel_with_args(): + """Test that `PhotonicsHandler` supports basic arguments. + The check here is that all the test kernels run successfully.""" + + @cudaq.kernel + def kernel_1f(theta: float): + q = qudit(4) + create(q) + phase_shift(q, theta) + mz(q) + + result = cudaq.sample(kernel_1f, 0.5) + result.dump() + + state = cudaq.get_state(kernel_1f, 0.5) + state.dump() + + @cudaq.kernel + def kernel_2f(theta: float, phi: float): + quds = [qudit(3) for _ in range(2)] + create(quds[0]) + phase_shift(quds[0], theta) + beam_splitter(quds[0], quds[1], phi) + mz(quds) + + result = cudaq.sample(kernel_2f, 0.7854, 0.3927) + result.dump() + + state = cudaq.get_state(kernel_2f, 0.7854, 0.3927) + state.dump() + + @cudaq.kernel + def kernel_list(angles: List[float]): + quds = [qudit(2) for _ in range(3)] + create(quds[0]) + phase_shift(quds[1], angles[0]) + phase_shift(quds[2], angles[1]) + mz(quds) + + result = cudaq.sample(kernel_list, [0.5236, 1.0472]) + result.dump() + + state = cudaq.get_state(kernel_list, [0.5236, 1.0472]) + state.dump() + + def test_target_change(): @cudaq.kernel def kernel(): q = qudit(level=2) - plus(q) + create(q) mz(q) res = cudaq.sample(kernel) @@ -100,7 +153,7 @@ def bell_pair(): res = cudaq.sample(bell_pair) assert len(res) == 2 and '00' in res and '11' in res - cudaq.set_target("photonics") + cudaq.set_target("photonics-cpu") res = cudaq.sample(kernel) assert len(res) == 1 and '1' in res @@ -132,7 +185,7 @@ def test_unsupported_types(): @cudaq.kernel def kernel1(): q = cudaq.qubit() - plus(q) + create(q) with pytest.raises(RuntimeError) as e: cudaq.sample(kernel1) @@ -141,7 +194,7 @@ def kernel1(): @cudaq.kernel def kernel2(): q = cudaq.qvector(2) - plus(q[0]) + create(q[0]) with pytest.raises(Exception) as e: cudaq.sample(kernel2) @@ -154,7 +207,7 @@ def test_target_handler(): @cudaq.kernel def kernel(): q = qudit(level=3) - plus(q) + create(q) mz(q) with pytest.raises(RuntimeError): diff --git a/python/tests/kernel/test_adjoint_operations.py b/python/tests/kernel/test_adjoint_operations.py index aa41b056af..74064e19b0 100644 --- a/python/tests/kernel/test_adjoint_operations.py +++ b/python/tests/kernel/test_adjoint_operations.py @@ -96,7 +96,7 @@ def sdg_1_state_negate(): # Place qubit in 1-state. x(qubit) - # Superpositoin. + # Superposition. h(qubit) # Rotate around Z by -pi/2, twice. Total rotation of -pi. sdg(qubit) diff --git a/python/tests/kernel/test_kernel_exp_pauli.py b/python/tests/kernel/test_kernel_exp_pauli.py new file mode 100644 index 0000000000..9906ba366a --- /dev/null +++ b/python/tests/kernel/test_kernel_exp_pauli.py @@ -0,0 +1,37 @@ +# ============================================================================ # +# Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. # +# All rights reserved. # +# # +# This source code and the accompanying materials are made available under # +# the terms of the Apache License 2.0 which accompanies this distribution. # +# ============================================================================ # + +import cudaq + + +def test_exp_pauli(): + + @cudaq.kernel + def test(): + q = cudaq.qvector(2) + exp_pauli(1.0, q, "XX") + + counts = cudaq.sample(test) + assert '00' in counts + assert '11' in counts + assert not '01' in counts + assert not '10' in counts + + +def test_exp_pauli_param(): + + @cudaq.kernel + def test_param(w: cudaq.pauli_word): + q = cudaq.qvector(2) + exp_pauli(1.0, q, w) + + counts = cudaq.sample(test_param, cudaq.pauli_word("XX")) + assert '00' in counts + assert '11' in counts + assert not '01' in counts + assert not '10' in counts diff --git a/python/tests/kernel/test_kernel_features.py b/python/tests/kernel/test_kernel_features.py index 52624cf6ed..b6267cb75d 100644 --- a/python/tests/kernel/test_kernel_features.py +++ b/python/tests/kernel/test_kernel_features.py @@ -194,12 +194,12 @@ def grover(N: int, M: int, oracle: Callable[[cudaq.qview], None]): def test_pauli_word_input(): h2_data = [ - 3, 1, 1, 3, 0.0454063, 0, 2, 0, 0, 0, 0.17028, 0, 0, 0, 2, 0, -0.220041, - -0, 1, 3, 3, 1, 0.0454063, 0, 0, 0, 0, 0, -0.106477, 0, 0, 2, 0, 0, - 0.17028, 0, 0, 0, 0, 2, -0.220041, -0, 3, 3, 1, 1, -0.0454063, -0, 2, 2, - 0, 0, 0.168336, 0, 2, 0, 2, 0, 0.1202, 0, 0, 2, 0, 2, 0.1202, 0, 2, 0, - 0, 2, 0.165607, 0, 0, 2, 2, 0, 0.165607, 0, 0, 0, 2, 2, 0.174073, 0, 1, - 1, 3, 3, -0.0454063, -0, 15 + 3, 1, 1, 3, 0.0454063, 0, 2, 0, 0, 0, 0.17028, 0, 0, 0, 2, 0, + -0.220041, -0, 1, 3, 3, 1, 0.0454063, 0, 0, 0, 0, 0, -0.106477, 0, 0, + 2, 0, 0, 0.17028, 0, 0, 0, 0, 2, -0.220041, -0, 3, 3, 1, 1, -0.0454063, + -0, 2, 2, 0, 0, 0.168336, 0, 2, 0, 2, 0, 0.1202, 0, 0, 2, 0, 2, 0.1202, + 0, 2, 0, 0, 2, 0.165607, 0, 0, 2, 2, 0, 0.165607, 0, 0, 0, 2, 2, + 0.174073, 0, 1, 1, 3, 3, -0.0454063, -0, 15 ] h = cudaq.SpinOperator(h2_data, 4) @@ -242,12 +242,12 @@ def test(theta: float, paulis: list[cudaq.pauli_word]): def test_exp_pauli(): h2_data = [ - 3, 1, 1, 3, 0.0454063, 0, 2, 0, 0, 0, 0.17028, 0, 0, 0, 2, 0, -0.220041, - -0, 1, 3, 3, 1, 0.0454063, 0, 0, 0, 0, 0, -0.106477, 0, 0, 2, 0, 0, - 0.17028, 0, 0, 0, 0, 2, -0.220041, -0, 3, 3, 1, 1, -0.0454063, -0, 2, 2, - 0, 0, 0.168336, 0, 2, 0, 2, 0, 0.1202, 0, 0, 2, 0, 2, 0.1202, 0, 2, 0, - 0, 2, 0.165607, 0, 0, 2, 2, 0, 0.165607, 0, 0, 0, 2, 2, 0.174073, 0, 1, - 1, 3, 3, -0.0454063, -0, 15 + 3, 1, 1, 3, 0.0454063, 0, 2, 0, 0, 0, 0.17028, 0, 0, 0, 2, 0, + -0.220041, -0, 1, 3, 3, 1, 0.0454063, 0, 0, 0, 0, 0, -0.106477, 0, 0, + 2, 0, 0, 0.17028, 0, 0, 0, 0, 2, -0.220041, -0, 3, 3, 1, 1, -0.0454063, + -0, 2, 2, 0, 0, 0.168336, 0, 2, 0, 2, 0, 0.1202, 0, 0, 2, 0, 2, 0.1202, + 0, 2, 0, 0, 2, 0.165607, 0, 0, 2, 2, 0, 0.165607, 0, 0, 0, 2, 2, + 0.174073, 0, 1, 1, 3, 3, -0.0454063, -0, 15 ] h = cudaq.SpinOperator(h2_data, 4) @@ -1725,6 +1725,102 @@ def run(): run() +def test_disallow_hybrid_types(): + from dataclasses import dataclass + # Ensure we don't allow hybrid type s + @dataclass + class hybrid: + q: cudaq.qview + i: int + + with pytest.raises(RuntimeError) as e: + + @cudaq.kernel + def test(): + q = cudaq.qvector(2) + h = hybrid(q, 1) + + test() + + with pytest.raises(RuntimeError) as e: + + @cudaq.kernel + def testtest(h: hybrid): + x(h.q[h.i]) + + testtest.compile() + + +def test_disallow_quantum_struct_return(): + from dataclasses import dataclass + # Ensure we don't allow hybrid type s + @dataclass + class T: + q: cudaq.qview + + with pytest.raises(RuntimeError) as e: + + @cudaq.kernel + def test() -> T: + q = cudaq.qvector(2) + h = T(q) + return h + + test() + +def test_disallow_recursive_quantum_struct(): + from dataclasses import dataclass + @dataclass + class T: + q: cudaq.qview + + @dataclass + class Holder: + t : T + + with pytest.raises(RuntimeError) as e: + + @cudaq.kernel + def test(): + q = cudaq.qvector(2) + t = T(q) + hh = Holder(t) + + print(test) + + with pytest.raises(RuntimeError) as e: + + @cudaq.kernel + def test(hh : Holder): + pass + + print(test) + +def test_disallow_struct_with_methods(): + from dataclasses import dataclass + @dataclass + class T: + q: cudaq.qview + def doSomething(self): + pass + + with pytest.raises(RuntimeError) as e: + + @cudaq.kernel + def test(t : T): + pass + + print(test) + + with pytest.raises(RuntimeError) as e: + + @cudaq.kernel + def test(): + q = cudaq.qvector(2) + t = T(q) + print(test) + + @skipIfPythonLessThan39 def test_issue_9(): diff --git a/python/tests/kernel/test_kernel_translate.py b/python/tests/kernel/test_kernel_translate.py index b2028ab102..29fc9f6964 100644 --- a/python/tests/kernel/test_kernel_translate.py +++ b/python/tests/kernel/test_kernel_translate.py @@ -9,6 +9,8 @@ import pytest import cudaq +import numpy as np + @cudaq.kernel def bell_pair(): @@ -19,7 +21,7 @@ def bell_pair(): @cudaq.kernel -def kernel(numQubits: int): +def kernel_loop_params(numQubits: int): q = cudaq.qvector(numQubits) h(q) for i in range(numQubits - 1): @@ -28,6 +30,24 @@ def kernel(numQubits: int): mz(q[i]) +@cudaq.kernel +def kernel_loop(): + numQubits = 5 + q = cudaq.qvector(numQubits) + h(q) + for i in range(4): + cx(q[i], q[i + 1]) + for i in range(numQubits): + mz(q[i]) + + +@cudaq.kernel +def kernel_vector(): + c = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] + q = cudaq.qvector(c) + mz(q) + + @cudaq.kernel def kernel_with_call(): @@ -47,15 +67,25 @@ def test_translate_openqasm_with_ignored_args(): assert "qreg var0[2];" in asm +def test_translate_openqasm_loop(): + asm = cudaq.translate(kernel_loop, format="openqasm2") + assert "qreg var0[5];" in asm + + +def test_translate_openqasm_vector(): + asm = cudaq.translate(kernel_vector, format="openqasm2") + assert "qreg var0[2];" in asm + + def test_translate_openqasm_with_args(): with pytest.raises(RuntimeError) as e: - print(cudaq.translate(kernel, 5, format="openqasm2")) + print(cudaq.translate(kernel_loop_params, 5, format="openqasm2")) assert 'Cannot translate function with arguments to OpenQASM 2.0.' in repr( e) def test_translate_openqasm_synth(): - synth = cudaq.synthesize(kernel, 4) + synth = cudaq.synthesize(kernel_loop_params, 4) asm = cudaq.translate(synth, format="openqasm2") assert "measure var0[3] -> var8[0]" in asm @@ -79,7 +109,7 @@ def test_translate_qir_ignored_args(): def test_translate_qir_with_args(): - qir = cudaq.translate(kernel, 5, format="qir") + qir = cudaq.translate(kernel_loop_params, 5, format="qir") assert "%2 = tail call %Array* @__quantum__rt__qubit_allocate_array(i64 %0)" in qir @@ -99,7 +129,7 @@ def test_translate_qir_base_ignored_args(): def test_translate_qir_base_args(): - synth = cudaq.synthesize(kernel, 5) + synth = cudaq.synthesize(kernel_loop_params, 5) qir = cudaq.translate(synth, 5, format="qir-base") assert '"qir_profiles"="base_profile"' in qir @@ -115,6 +145,6 @@ def test_translate_qir_adaptive_ignored_args(): def test_translate_qir_adaptive_args(): - synth = cudaq.synthesize(kernel, 5) + synth = cudaq.synthesize(kernel_loop_params, 5) qir = cudaq.translate(synth, 5, format="qir-adaptive") assert '"qir_profiles"="adaptive_profile"' in qir diff --git a/python/tests/mlir/adjoint.py b/python/tests/mlir/adjoint.py index dad638e49e..3dd5b6af64 100644 --- a/python/tests/mlir/adjoint.py +++ b/python/tests/mlir/adjoint.py @@ -275,6 +275,7 @@ def test_sample_adjoint_qreg(): print(kernel) +# CHECK-LABEL: test_sample_adjoint_qreg # CHECK-LABEL: func.func @__nvqpp__mlirgen____nvqppBuilderKernel_{{.*}}( # CHECK-SAME: %[[VAL_0:.*]]: i64) attributes {"cudaq-entrypoint"} { # CHECK-DAG: %[[VAL_1:.*]] = arith.constant 1 : i64 diff --git a/python/tests/mlir/conditional.py b/python/tests/mlir/conditional.py index 677900564b..15f23c2362 100644 --- a/python/tests/mlir/conditional.py +++ b/python/tests/mlir/conditional.py @@ -113,6 +113,7 @@ def then_function(): print(kernel) +# CHECK-LABEL: test_kernel_conditional_with_sample # CHECK-LABEL: func.func @__nvqpp__mlirgen____nvqppBuilderKernel_{{.*}}() attributes {"cudaq-entrypoint"} { # CHECK: %[[VAL_0:.*]] = quake.alloca !quake.ref # CHECK: quake.x %[[VAL_0]] : (!quake.ref) -> () @@ -155,6 +156,7 @@ def then(): print(kernel) +# CHECK-LABEL: test_cif_extract_ref_bug # CHECK-LABEL: func.func @__nvqpp__mlirgen____nvqppBuilderKernel_{{.*}}() attributes {"cudaq-entrypoint"} { # CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<2> # CHECK: %[[VAL_1:.*]] = quake.extract_ref %[[VAL_0]][0] : (!quake.veq<2>) -> !quake.ref diff --git a/python/tests/mlir/control.py b/python/tests/mlir/control.py index ccbe64fd53..bdc7fd16b1 100644 --- a/python/tests/mlir/control.py +++ b/python/tests/mlir/control.py @@ -331,6 +331,7 @@ def test_sample_control_qreg_args(): print(kernel) +# CHECK-LABEL: test_sample_control_qreg_args # CHECK-LABEL: func.func @__nvqpp__mlirgen____nvqppBuilderKernel_{{.*}}() attributes {"cudaq-entrypoint"} { # CHECK: %[[VAL_5:.*]] = quake.alloca !quake.veq<2> # CHECK: %[[VAL_6:.*]] = quake.alloca !quake.ref @@ -387,6 +388,7 @@ def test_sample_apply_call_control(): print(kernel) +# CHECK-LABEL: test_sample_apply_call_control # CHECK-LABEL: func.func @__nvqpp__mlirgen____nvqppBuilderKernel_{{.*}}() attributes {"cudaq-entrypoint"} { # CHECK: %[[VAL_0:.*]] = quake.alloca !quake.ref # CHECK: %[[VAL_1:.*]] = quake.alloca !quake.ref diff --git a/python/tests/mlir/ctrl_gates.py b/python/tests/mlir/ctrl_gates.py index 08dbb6f920..5b348ac395 100644 --- a/python/tests/mlir/ctrl_gates.py +++ b/python/tests/mlir/ctrl_gates.py @@ -82,6 +82,7 @@ def test_kernel_ctrl_rotation(): print(kernel) +# CHECK-LABEL: test_kernel_ctrl_rotation # CHECK-LABEL: func.func @__nvqpp__mlirgen____nvqppBuilderKernel_{{.*}}( # CHECK-SAME: %[[VAL_0:.*]]: !cc.stdvec) attributes {"cudaq-entrypoint"} { # CHECK-DAG: %[[VAL_1:.*]] = arith.constant 3.000000e+00 : f64 @@ -137,6 +138,7 @@ def test_kernel_multi_ctrl(): print(kernel) +# CHECK-LABEL: test_kernel_multi_ctrl # CHECK-LABEL: func.func @__nvqpp__mlirgen____nvqppBuilderKernel_{{.*}}() attributes {"cudaq-entrypoint"} { # CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<3> # CHECK: %[[VAL_1:.*]] = quake.extract_ref %[[VAL_0]][0] : (!quake.veq<3>) -> !quake.ref @@ -175,6 +177,7 @@ def test_kernel_ctrl_register(): print(kernel) +# CHECK-LABEL: test_kernel_ctrl_register # CHECK-LABEL: func.func @__nvqpp__mlirgen____nvqppBuilderKernel_{{.*}}() attributes {"cudaq-entrypoint"} { # CHECK-DAG: %[[VAL_0:.*]] = quake.alloca !quake.veq<3> # CHECK-DAG: %[[VAL_1:.*]] = quake.alloca !quake.veq<2> @@ -221,6 +224,7 @@ def test_kernel_rotation_ctrl_register(): print(kernel) +# CHECK-LABEL: test_kernel_rotation_ctrl_register # CHECK-LABEL: func.func @__nvqpp__mlirgen____nvqppBuilderKernel_{{.*}}( # CHECK-SAME: %[[VAL_0:.*]]: !cc.stdvec) attributes {"cudaq-entrypoint"} { # CHECK-DAG: %[[VAL_1:.*]] = arith.constant 3 : i64 @@ -286,6 +290,7 @@ def test_ctrl_swap(): print(kernel) +# CHECK-LABEL: test_ctrl_swap # CHECK-LABEL: func.func @__nvqpp__mlirgen____nvqppBuilderKernel_{{.*}}() attributes {"cudaq-entrypoint"} { # CHECK-DAG: %[[VAL_0:.*]] = quake.alloca !quake.ref # CHECK-DAG: %[[VAL_1:.*]] = quake.alloca !quake.ref diff --git a/python/tests/mlir/one_qubit.py b/python/tests/mlir/one_qubit.py index 039462865a..530eb0698b 100644 --- a/python/tests/mlir/one_qubit.py +++ b/python/tests/mlir/one_qubit.py @@ -81,6 +81,7 @@ def test_kernel_param_1q(): print(kernel) +# CHECK-LABEL: test_kernel_param_1q # CHECK-LABEL: func.func @__nvqpp__mlirgen____nvqppBuilderKernel_{{.*}}( # CHECK-SAME: %[[VAL_0:.*]]: f64) attributes {"cudaq-entrypoint"} { # CHECK: %0 = quake.alloca !quake.veq<1> diff --git a/python/tests/mlir/quantum_struct.py b/python/tests/mlir/quantum_struct.py new file mode 100644 index 0000000000..8ed8081bbe --- /dev/null +++ b/python/tests/mlir/quantum_struct.py @@ -0,0 +1,39 @@ +# ============================================================================ # +# Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. # +# All rights reserved. # +# # +# This source code and the accompanying materials are made available under # +# the terms of the Apache License 2.0 which accompanies this distribution. # +# ============================================================================ # + +# RUN: PYTHONPATH=../../ pytest -rP %s | FileCheck %s + + +import pytest +import cudaq +from dataclasses import dataclass + +def test_quantum_struct(): + @dataclass + class patch: + q : cudaq.qview + r : cudaq.qview + + @cudaq.kernel + def entry(): + q = cudaq.qvector(2) + r = cudaq.qvector(2) + p = patch(q, r) + h(p.r[0]) + + print(entry) + +# CHECK-LABEL: func.func @__nvqpp__mlirgen__entry() +# CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<2> +# CHECK: %[[VAL_1:.*]] = quake.alloca !quake.veq<2> +# The struq type is erased in this example. +# CHECK: %[[VAL_2:.*]] = quake.extract_ref %[[VAL_1]][0] : (!quake.veq<2>) -> !quake.ref +# CHECK: quake.h %[[VAL_2]] : (!quake.ref) -> () +# CHECK: return +# CHECK: } + diff --git a/python/tests/mlir/quantum_type.py b/python/tests/mlir/quantum_type.py index b204c10f77..3fec7bbcaf 100644 --- a/python/tests/mlir/quantum_type.py +++ b/python/tests/mlir/quantum_type.py @@ -6,7 +6,8 @@ # the terms of the Apache License 2.0 which accompanies this distribution. # # ============================================================================ # -# RUN: PYTHONPATH=../../ pytest -rP %s | FileCheck %s +# Workaround for kernels that may appear in jumbled order. +# RUN: PYTHONPATH=../../ pytest -rP %s > %t && FileCheck %s < %t && FileCheck --check-prefix=NAUGHTY %s < %t && FileCheck --check-prefix=NICE %s < %t import pytest @@ -47,41 +48,35 @@ def run(): # Test here is that it compiles and runs successfully print(run) -# CHECK-LABEL: func.func @__nvqpp__mlirgen__logicalH( -# CHECK-SAME: %[[VAL_0:.*]]: !cc.struct<"patch" {!quake.veq, !quake.veq, !quake.veq}>) attributes {"cudaq-entrypoint"} { -# CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 -# CHECK: %[[VAL_2:.*]] = arith.constant 0 : i64 -# CHECK: %[[VAL_3:.*]] = cc.extract_value %[[VAL_0]][0] : (!cc.struct<"patch" {!quake.veq, !quake.veq, !quake.veq}>) -> !quake.veq +# NAUGHTY-LABEL: func.func @__nvqpp__mlirgen__logicalH( +# NAUGHTY-SAME: %[[VAL_0:.*]]: !quake.struq<"patch": !quake.veq, !quake.veq, !quake.veq>) { +# NAUGHTY: %[[VAL_3:.*]] = quake.get_member %[[VAL_0]][0] : (!quake.struq<"patch": !quake.veq, !quake.veq, !quake.veq>) -> !quake.veq +# NAUGHTY: %[[VAL_4:.*]] = quake.veq_size %[[VAL_3]] : (!quake.veq) -> i64 +# NAUGHTY: return +# NAUGHTY: } + +# NICE-LABEL: func.func @__nvqpp__mlirgen__logicalX( +# NICE-SAME: %[[VAL_0:.*]]: !quake.struq<"patch": !quake.veq, !quake.veq, !quake.veq>) { +# NICE: %[[VAL_3:.*]] = quake.get_member %[[VAL_0]][1] : (!quake.struq<"patch": !quake.veq, !quake.veq, !quake.veq>) -> !quake.veq +# NICE: %[[VAL_4:.*]] = quake.veq_size %[[VAL_3]] : (!quake.veq) -> i64 +# NICE: return +# NICE: } + +# CHECK-LABEL: func.func @__nvqpp__mlirgen__logicalZ( +# CHECK-SAME: %[[VAL_0:.*]]: !quake.struq<"patch": !quake.veq, !quake.veq, !quake.veq>) { +# CHECK: %[[VAL_3:.*]] = quake.get_member %[[VAL_0]][2] : (!quake.struq<"patch": !quake.veq, !quake.veq, !quake.veq>) -> !quake.veq # CHECK: %[[VAL_4:.*]] = quake.veq_size %[[VAL_3]] : (!quake.veq) -> i64 -# CHECK: %[[VAL_5:.*]] = cc.loop while ((%[[VAL_6:.*]] = %[[VAL_2]]) -> (i64)) { -# CHECK: %[[VAL_7:.*]] = arith.cmpi slt, %[[VAL_6]], %[[VAL_4]] : i64 -# CHECK: cc.condition %[[VAL_7]](%[[VAL_6]] : i64) -# CHECK: } do { -# CHECK: ^bb0(%[[VAL_8:.*]]: i64): -# CHECK: %[[VAL_9:.*]] = quake.extract_ref %[[VAL_3]]{{\[}}%[[VAL_8]]] : (!quake.veq, i64) -> !quake.ref -# CHECK: quake.h %[[VAL_9]] : (!quake.ref) -> () -# CHECK: cc.continue %[[VAL_8]] : i64 -# CHECK: } step { -# CHECK: ^bb0(%[[VAL_10:.*]]: i64): -# CHECK: %[[VAL_11:.*]] = arith.addi %[[VAL_10]], %[[VAL_1]] : i64 -# CHECK: cc.continue %[[VAL_11]] : i64 -# CHECK: } {invariant} # CHECK: return # CHECK: } -# CHECK-LABEL: func.func @__nvqpp__mlirgen__run() attributes {"cudaq-entrypoint"} { +# CHECK-LABEL: func.func @__nvqpp__mlirgen__run() # CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<2> -# CHECK: %[[VAL_1:.*]] = quake.relax_size %[[VAL_0]] : (!quake.veq<2>) -> !quake.veq +# CHECK: %[[VAL_1:.*]] = quake.alloca !quake.veq<2> # CHECK: %[[VAL_2:.*]] = quake.alloca !quake.veq<2> -# CHECK: %[[VAL_3:.*]] = quake.relax_size %[[VAL_2]] : (!quake.veq<2>) -> !quake.veq -# CHECK: %[[VAL_4:.*]] = quake.alloca !quake.veq<2> -# CHECK: %[[VAL_5:.*]] = quake.relax_size %[[VAL_4]] : (!quake.veq<2>) -> !quake.veq -# CHECK: %[[VAL_6:.*]] = cc.undef !cc.struct<"patch" {!quake.veq, !quake.veq, !quake.veq}> -# CHECK: %[[VAL_7:.*]] = cc.insert_value %[[VAL_1]], %[[VAL_6]][0] : (!cc.struct<"patch" {!quake.veq, !quake.veq, !quake.veq}>, !quake.veq) -> !cc.struct<"patch" {!quake.veq, !quake.veq, !quake.veq}> -# CHECK: %[[VAL_8:.*]] = cc.insert_value %[[VAL_3]], %[[VAL_7]][1] : (!cc.struct<"patch" {!quake.veq, !quake.veq, !quake.veq}>, !quake.veq) -> !cc.struct<"patch" {!quake.veq, !quake.veq, !quake.veq}> -# CHECK: %[[VAL_9:.*]] = cc.insert_value %[[VAL_5]], %[[VAL_8]][2] : (!cc.struct<"patch" {!quake.veq, !quake.veq, !quake.veq}>, !quake.veq) -> !cc.struct<"patch" {!quake.veq, !quake.veq, !quake.veq}> -# CHECK: call @__nvqpp__mlirgen__logicalH(%[[VAL_9]]) : (!cc.struct<"patch" {!quake.veq, !quake.veq, !quake.veq}>) -> () -# CHECK: call @__nvqpp__mlirgen__logicalX(%[[VAL_9]]) : (!cc.struct<"patch" {!quake.veq, !quake.veq, !quake.veq}>) -> () -# CHECK: call @__nvqpp__mlirgen__logicalZ(%[[VAL_9]]) : (!cc.struct<"patch" {!quake.veq, !quake.veq, !quake.veq}>) -> () +# CHECK: %[[VAL_3:.*]] = quake.make_struq %[[VAL_0]], %[[VAL_1]], %[[VAL_2]] : (!quake.veq<2>, !quake.veq<2>, !quake.veq<2>) -> !quake.struq<"patch": !quake.veq, !quake.veq, !quake.veq> +# CHECK: call @__nvqpp__mlirgen__logicalH(%[[VAL_3]]) : (!quake.struq<"patch": !quake.veq, !quake.veq, !quake.veq>) -> () +# CHECK: call @__nvqpp__mlirgen__logicalX(%[[VAL_3]]) : (!quake.struq<"patch": !quake.veq, !quake.veq, !quake.veq>) -> () +# CHECK: call @__nvqpp__mlirgen__logicalZ(%[[VAL_3]]) : (!quake.struq<"patch": !quake.veq, !quake.veq, !quake.veq>) -> () # CHECK: return -# CHECK: } \ No newline at end of file +# CHECK: } + diff --git a/python/tests/mlir/test_output_qir.py b/python/tests/mlir/test_output_qir.py index 22f50704d7..eab4ca9171 100644 --- a/python/tests/mlir/test_output_qir.py +++ b/python/tests/mlir/test_output_qir.py @@ -22,9 +22,9 @@ def ghz(numQubits: int): for i, qubitIdx in enumerate(range(numQubits - 1)): x.ctrl(qubits[i], qubits[qubitIdx + 1]) - print(cudaq.to_qir(ghz)) + print(cudaq.translate(ghz, format="qir")) ghz_synth = cudaq.synthesize(ghz, 5) - print(cudaq.to_qir(ghz_synth, profile='qir-base')) + print(cudaq.translate(ghz_synth, format='qir-base')) # CHECK: %[[VAL_0:.*]] = tail call diff --git a/python/utils/LinkedLibraryHolder.cpp b/python/utils/LinkedLibraryHolder.cpp index 5026061b8a..e24084fb86 100644 --- a/python/utils/LinkedLibraryHolder.cpp +++ b/python/utils/LinkedLibraryHolder.cpp @@ -13,6 +13,7 @@ #include "cudaq/platform.h" #include "cudaq/target_control.h" #include "nvqir/CircuitSimulator.h" +#include "nvqir/photonics/PhotonicCircuitSimulator.h" #include #include #include @@ -22,6 +23,7 @@ // Our hook into configuring the NVQIR backend. extern "C" { void __nvqir__setCircuitSimulator(nvqir::CircuitSimulator *); +void __nvqir__setPhotonicCircuitSimulator(nvqir::PhotonicCircuitSimulator *); } namespace cudaq { @@ -383,6 +385,17 @@ LinkedLibraryHolder::getSimulator(const std::string &simName) { std::string("getCircuitSimulator_") + simName); } +nvqir::PhotonicCircuitSimulator * +LinkedLibraryHolder::getPhotonicSimulator(const std::string &simName) { + auto end = availableSimulators.end(); + auto iter = std::find(availableSimulators.begin(), end, simName); + if (iter == end) + throw std::runtime_error("Invalid simulator requested: " + simName); + + return getUniquePluginInstance( + std::string("getPhotonicCircuitSimulator_") + simName); +} + quantum_platform * LinkedLibraryHolder::getPlatform(const std::string &platformName) { auto end = availablePlatforms.end(); @@ -454,7 +467,11 @@ void LinkedLibraryHolder::setTarget( cudaq::info("Setting target={} (sim={}, platform={})", targetName, target.simulatorName, target.platformName); - __nvqir__setCircuitSimulator(getSimulator(target.simulatorName)); + if (targetName != "photonics-cpu") + __nvqir__setCircuitSimulator(getSimulator(target.simulatorName)); + else + __nvqir__setPhotonicCircuitSimulator( + getPhotonicSimulator(target.simulatorName)); auto *platform = getPlatform(target.platformName); // Pack the config into the backend string name @@ -477,7 +494,7 @@ void LinkedLibraryHolder::setTarget( setQuantumPlatformInternal(platform); currentTarget = targetName; - if ("photonics" == targetName) { + if ("photonics-cpu" == targetName) { std::filesystem::path libPath = cudaqLibPath / fmt::format("libcudaq-em-photonics.{}", libSuffix); auto *em = getUniquePluginInstance( diff --git a/python/utils/LinkedLibraryHolder.h b/python/utils/LinkedLibraryHolder.h index c630e199a8..1c689aade6 100644 --- a/python/utils/LinkedLibraryHolder.h +++ b/python/utils/LinkedLibraryHolder.h @@ -18,7 +18,8 @@ namespace nvqir { class CircuitSimulator; -} +class PhotonicCircuitSimulator; +} // namespace nvqir namespace cudaq { @@ -81,6 +82,10 @@ class LinkedLibraryHolder { /// @brief Return the registered simulator with the given name. nvqir::CircuitSimulator *getSimulator(const std::string &name); + /// @brief Return the registered photonic simulator with the given name. + nvqir::PhotonicCircuitSimulator * + getPhotonicSimulator(const std::string &simName); + /// @brief Return the registered quantum_platform with the given name. quantum_platform *getPlatform(const std::string &name); diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index bcd47a8caf..910d5a0958 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -21,7 +21,9 @@ install (DIRECTORY cudaq DESTINATION include PATTERN "nlopt-src" EXCLUDE) install (DIRECTORY common DESTINATION include FILES_MATCHING PATTERN "*.h") install (FILES nvqir/CircuitSimulator.h + nvqir/photonics/PhotonicCircuitSimulator.h nvqir/QIRTypes.h nvqir/Gates.h + nvqir/photonics/PhotonicGates.h DESTINATION include/nvqir) install (FILES cudaq.h DESTINATION include) diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index a9c12c0fe0..424cbd8873 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -7,13 +7,17 @@ ******************************************************************************/ #include "ArgumentConversion.h" +#include "cudaq.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/Builder/Runtime.h" #include "cudaq/Todo.h" +#include "cudaq/qis/pauli_word.h" +#include "cudaq/utils/registry.h" #include "llvm/ADT/TypeSwitch.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/IR/BuiltinAttributes.h" +#include "mlir/Parser/Parser.h" using namespace mlir; @@ -199,7 +203,7 @@ Value dispatchSubtype(OpBuilder &builder, Type ty, void *p, ModuleOp substMod, return {}; }) .Case([&](cudaq::cc::CharspanType strTy) { - return genConstant(builder, *static_cast(p), + return genConstant(builder, static_cast(p)->str(), substMod); }) .Case([&](cudaq::cc::StdvecType ty) { @@ -224,6 +228,11 @@ Value genConstant(OpBuilder &builder, cudaq::cc::StdvecType vecTy, void *p, auto eleTy = vecTy.getElementType(); auto elePtrTy = cudaq::cc::PointerType::get(eleTy); auto eleSize = cudaq::opt::getDataSize(layout, eleTy); + if (isa(eleTy)) { + // char span type (i.e. pauli word) is a `vector` + eleSize = sizeof(VectorType); + } + assert(eleSize && "element must have a size"); auto loc = builder.getUnknownLoc(); std::int32_t vecSize = delta / eleSize; @@ -284,6 +293,33 @@ Value genConstant(OpBuilder &builder, cudaq::cc::ArrayType arrTy, void *p, return aggie; } +Value genConstant(OpBuilder &builder, cudaq::cc::IndirectCallableType indCallTy, + void *p, ModuleOp sourceMod, ModuleOp substMod, + llvm::DataLayout &layout) { + auto key = cudaq::registry::__cudaq_getLinkableKernelKey(p); + auto *name = cudaq::registry::getLinkableKernelNameOrNull(key); + if (!name) + return {}; + auto code = cudaq::get_quake_by_name(name, /*throwException=*/false); + auto *ctx = builder.getContext(); + auto fromModule = parseSourceString(code, ctx); + OpBuilder cloneBuilder(ctx); + cloneBuilder.setInsertionPointToStart(substMod.getBody()); + for (auto &i : *fromModule->getBody()) { + auto s = dyn_cast_if_present(i); + if (!s || sourceMod.lookupSymbol(s.getNameAttr()) || + substMod.lookupSymbol(s.getNameAttr())) + continue; + auto clone = cloneBuilder.clone(i); + cast(clone).setPrivate(); + } + auto loc = builder.getUnknownLoc(); + auto func = builder.create( + loc, indCallTy.getSignature(), + std::string{cudaq::runtime::cudaqGenPrefixName} + name); + return builder.create(loc, indCallTy, func); +} + //===----------------------------------------------------------------------===// cudaq::opt::ArgumentConverter::ArgumentConverter(StringRef kernelName, @@ -361,7 +397,7 @@ void cudaq::opt::ArgumentConverter::gen(const std::vector &arguments) { return {}; }) .Case([&](cc::CharspanType strTy) { - return buildSubst(*static_cast(argPtr), + return buildSubst(static_cast(argPtr)->str(), substModule); }) .Case([&](cc::PointerType ptrTy) -> cc::ArgumentSubstitutionOp { @@ -380,6 +416,10 @@ void cudaq::opt::ArgumentConverter::gen(const std::vector &arguments) { .Case([&](cc::ArrayType ty) { return buildSubst(ty, argPtr, substModule, dataLayout); }) + .Case([&](cc::IndirectCallableType ty) { + return buildSubst(ty, argPtr, sourceModule, substModule, + dataLayout); + }) .Default({}); if (subst) substitutions.emplace_back(std::move(subst)); @@ -406,8 +446,10 @@ void cudaq::opt::ArgumentConverter::gen_drop_front( if (numDrop >= arguments.size()) return; std::vector partialArgs; + int drop = numDrop; for (void *arg : arguments) { - if (numDrop--) { + if (drop > 0) { + drop--; partialArgs.push_back(nullptr); continue; } diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index f472520882..61c26dc791 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -24,6 +24,7 @@ #include "cudaq/Optimizer/Dialect/CC/CCDialect.h" #include "cudaq/Optimizer/Dialect/CC/CCOps.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "cudaq/Support/Plugin.h" #include "cudaq/Support/TargetConfig.h" @@ -107,6 +108,10 @@ class BaseRemoteRESTQPU : public cudaq::QPU { /// `-mlir-print-ir-after-all` in `cudaq-opt`. bool enablePrintMLIREachPass = false; + /// @brief Flag indicating whether we should enable MLIR pass statistics + /// to be printed. This is similar to `-mlir-pass-statistics` in `cudaq-opt` + bool enablePassStatistics = false; + /// @brief If we are emulating locally, keep track /// of JIT engines for invoking the kernels. std::vector jitEngines; @@ -248,6 +253,8 @@ class BaseRemoteRESTQPU : public cudaq::QPU { getEnvBool("CUDAQ_MLIR_DISABLE_THREADING", disableMLIRthreading); enablePrintMLIREachPass = getEnvBool("CUDAQ_MLIR_PRINT_EACH_PASS", enablePrintMLIREachPass); + enablePassStatistics = + getEnvBool("CUDAQ_MLIR_PASS_STATISTICS", enablePassStatistics); // If the very verbose enablePrintMLIREachPass flag is set, then // multi-threading must be disabled. @@ -482,6 +489,9 @@ class BaseRemoteRESTQPU : public cudaq::QPU { // Create a new Module to clone the ansatz into it auto tmpModuleOp = builder.create(); tmpModuleOp.push_back(ansatz.clone()); + moduleOp.walk([&](quake::WireSetOp wireSetOp) { + tmpModuleOp.push_back(wireSetOp.clone()); + }); // Extract the binary symplectic encoding auto [binarySymplecticForm, coeffs] = term.get_raw_data(); @@ -497,7 +507,14 @@ class BaseRemoteRESTQPU : public cudaq::QPU { pm.enableIRPrinting(); if (failed(pm.run(tmpModuleOp))) throw std::runtime_error("Could not apply measurements to ansatz."); - runPassPipeline(passPipelineConfig, tmpModuleOp); + // The full pass pipeline was run above, but the ansatz pass can + // introduce gates that aren't supported by the backend, so we need to + // re-run the gate set mapping if that existed in the original pass + // pipeline. + auto csvSplit = cudaq::split(passPipelineConfig, ','); + for (auto &pass : csvSplit) + if (pass.ends_with("-gate-set-mapping")) + runPassPipeline(pass, tmpModuleOp); modules.emplace_back(term.to_string(false), tmpModuleOp); } } else @@ -527,7 +544,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (disableMLIRthreading) moduleOpI.getContext()->disableMultithreading(); if (failed(translation(moduleOpI, outStr, postCodeGenPasses, printIR, - enablePrintMLIREachPass))) + enablePrintMLIREachPass, enablePassStatistics))) throw std::runtime_error("Could not successfully translate to " + codegenTranslation + "."); } @@ -563,7 +580,8 @@ class BaseRemoteRESTQPU : public cudaq::QPU { /// synchronous invocation. void launchKernel(const std::string &kernelName, void (*kernelFunc)(void *), void *args, std::uint64_t voidStarSize, - std::uint64_t resultOffset) override { + std::uint64_t resultOffset, + const std::vector &rawArgs) override { cudaq::info("launching remote rest kernel ({})", kernelName); // TODO future iterations of this should support non-void return types. @@ -573,7 +591,11 @@ class BaseRemoteRESTQPU : public cudaq::QPU { "cudaq::observe(), or cudaq::draw()."); // Get the Quake code, lowered according to config file. - auto codes = lowerQuakeCode(kernelName, args); + // FIXME: For python, we reach here with rawArgs being empty and args having + // the arguments. Python should be using the streamlined argument synthesis, + // but apparently it isn't. This works around that bug. + auto codes = rawArgs.empty() ? lowerQuakeCode(kernelName, args) + : lowerQuakeCode(kernelName, rawArgs); completeLaunchKernel(kernelName, std::move(codes)); } diff --git a/runtime/common/BaseRemoteSimulatorQPU.h b/runtime/common/BaseRemoteSimulatorQPU.h index b1f5ead104..667fba5941 100644 --- a/runtime/common/BaseRemoteSimulatorQPU.h +++ b/runtime/common/BaseRemoteSimulatorQPU.h @@ -112,7 +112,9 @@ class BaseRemoteSimulatorQPU : public cudaq::QPU { void launchKernel(const std::string &name, void (*kernelFunc)(void *), void *args, std::uint64_t voidStarSize, - std::uint64_t resultOffset) override { + std::uint64_t resultOffset, + const std::vector &rawArgs) override { + // Remote simulation cannot deal with rawArgs. Drop them on the floor. launchKernelImpl(name, kernelFunc, args, voidStarSize, resultOffset, nullptr); } diff --git a/runtime/common/CMakeLists.txt b/runtime/common/CMakeLists.txt index 09a6563226..bb8a5ecaba 100644 --- a/runtime/common/CMakeLists.txt +++ b/runtime/common/CMakeLists.txt @@ -11,6 +11,7 @@ set(LIBRARY_NAME cudaq-common) set(COMMON_EXTRA_DEPS "") set(COMMON_RUNTIME_SRC + CustomOp.cpp Environment.cpp Executor.cpp Future.cpp @@ -97,7 +98,6 @@ set_property(GLOBAL APPEND PROPERTY CUDAQ_RUNTIME_LIBS cudaq-mlir-runtime) # Note: JIT.cpp contains throw statements. Should RTTI be enabled? set_source_files_properties( - ArgumentConversion.cpp Environment.cpp JIT.cpp Logger.cpp @@ -133,6 +133,7 @@ target_link_libraries(cudaq-mlir-runtime MLIRLLVMCommonConversion MLIRLLVMToLLVMIRTranslation PRIVATE + cudaq spdlog::spdlog) install(TARGETS cudaq-mlir-runtime DESTINATION lib) diff --git a/runtime/common/CustomOp.cpp b/runtime/common/CustomOp.cpp new file mode 100644 index 0000000000..8c4e19695e --- /dev/null +++ b/runtime/common/CustomOp.cpp @@ -0,0 +1,36 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "CustomOp.h" + +namespace cudaq { +customOpRegistry &customOpRegistry::getInstance() { + static customOpRegistry instance; + return instance; +} + +void customOpRegistry::clearRegisteredOperations() { + std::unique_lock lock(mtx); + registeredOperations.clear(); +} + +bool customOpRegistry::isOperationRegistered(const std::string &name) { + std::shared_lock lock(mtx); + return registeredOperations.find(name) != registeredOperations.end(); +} + +const unitary_operation & +customOpRegistry::getOperation(const std::string &name) { + std::shared_lock lock(mtx); + auto iter = registeredOperations.find(name); + if (iter == registeredOperations.end()) { + throw std::runtime_error("Operation not registered: " + name); + } + return *iter->second; +} +} // namespace cudaq diff --git a/runtime/common/CustomOp.h b/runtime/common/CustomOp.h new file mode 100644 index 0000000000..ecc3e6a5aa --- /dev/null +++ b/runtime/common/CustomOp.h @@ -0,0 +1,77 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace cudaq { +/// @brief Define a `unitary_operation` type that exposes +/// a sub-type specific unitary representation of the +/// operation. +struct unitary_operation { + /// @brief Given a set of rotation parameters, return + /// a row-major 1D array representing the unitary operation + virtual std::vector> unitary( + const std::vector ¶meters = std::vector()) const = 0; + virtual ~unitary_operation() {} +}; + +/// @brief Singleton class for managing and storing unitary operations. +class customOpRegistry { +public: + /// @brief Get the singleton instance of the `customOpRegistry`. + static customOpRegistry &getInstance(); + +private: + /// @brief Constructor + // Private to prevent direct instantiation. + customOpRegistry() {} + +public: + customOpRegistry(const customOpRegistry &) = delete; + void operator=(const customOpRegistry &) = delete; + + /// @brief Register a new custom unitary operation under the + /// provided operation name. + template + void registerOperation(const std::string &name) { + { + std::shared_lock lock(mtx); + auto iter = registeredOperations.find(name); + if (iter != registeredOperations.end()) + return; + } + std::unique_lock lock(mtx); + registeredOperations.insert({name, std::make_unique()}); + } + + /// Clear the registered operations + void clearRegisteredOperations(); + + /// Returns true if the operation with the given name is registered. + bool isOperationRegistered(const std::string &name); + + /// Get the unitary operation associated with the given name. + /// This will throw an exception if the operation is not registered. + const unitary_operation &getOperation(const std::string &name); + +private: + /// @brief Keep track of a registry of user-provided unitary operations. + std::unordered_map> + registeredOperations; + /// @brief Mutex to protect concurrent access to the registry. + std::shared_mutex mtx; +}; +} // namespace cudaq diff --git a/runtime/common/KernelWrapper.h b/runtime/common/KernelWrapper.h index e0af46e71e..6c30efa58d 100644 --- a/runtime/common/KernelWrapper.h +++ b/runtime/common/KernelWrapper.h @@ -522,7 +522,7 @@ std::invoke_result_t invokeKernel(QuantumKernel &&fn, auto serializedArgsBuffer = serializeArgs(std::forward(args)...); cudaq::get_platform().launchKernel(fn.name(), nullptr, (void *)serializedArgsBuffer.data(), - serializedArgsBuffer.size(), 0); + serializedArgsBuffer.size(), 0, {}); } else { // In library mode, to use the remote simulator platform, we need to pack // the argument and delegate to the platform's launchKernel rather than @@ -538,13 +538,16 @@ std::invoke_result_t invokeKernel(QuantumKernel &&fn, // send on the function pointer to the platform to retrieve the symbol name // since the typeid of a function only contains signature info. if constexpr (std::is_class_v>) + // FIXME: this shouldn't use the serialization code any longer. It should + // build a vector of void* and pass that instead. cudaq::get_platform().launchKernel(cudaq::getKernelName(fn), nullptr, (void *)serializedArgsBuffer.data(), - serializedArgsBuffer.size(), 0); + serializedArgsBuffer.size(), 0, {}); else cudaq::get_platform().launchKernel( cudaq::getKernelName(fn), reinterpret_cast(&fn), - (void *)serializedArgsBuffer.data(), serializedArgsBuffer.size(), 0); + (void *)serializedArgsBuffer.data(), serializedArgsBuffer.size(), 0, + {}); } #else return fn(std::forward(args)...); diff --git a/runtime/common/NoiseModel.cpp b/runtime/common/NoiseModel.cpp index 5cb37cfaf8..b0a71ecee8 100644 --- a/runtime/common/NoiseModel.cpp +++ b/runtime/common/NoiseModel.cpp @@ -8,8 +8,8 @@ #include "NoiseModel.h" #include "Logger.h" +#include "common/CustomOp.h" #include "common/EigenDense.h" - namespace cudaq { template @@ -82,7 +82,9 @@ kraus_channel::kraus_channel(std::vector &_ops) : ops(_ops) { validateCompleteness(); } -kraus_channel::kraus_channel(const kraus_channel &other) : ops(other.ops) {} +kraus_channel::kraus_channel(const kraus_channel &other) + : ops(other.ops), noise_type(other.noise_type), + parameters(other.parameters) {} std::size_t kraus_channel::size() const { return ops.size(); } @@ -94,6 +96,8 @@ kraus_op &kraus_channel::operator[](const std::size_t idx) { return ops[idx]; } kraus_channel &kraus_channel::operator=(const kraus_channel &other) { ops = other.ops; + noise_type = other.noise_type; + parameters = other.parameters; return *this; } @@ -105,7 +109,8 @@ void noise_model::add_channel(const std::string &quantumOp, const kraus_channel &channel) { if (std::find(std::begin(availableOps), std::end(availableOps), quantumOp) == - std::end(availableOps)) + std::end(availableOps) && + !customOpRegistry::getInstance().isOperationRegistered(quantumOp)) throw std::runtime_error( "Invalid quantum op for noise_model::add_channel (" + quantumOp + ")."); @@ -135,18 +140,135 @@ void noise_model::add_channel(const std::string &quantumOp, iter->second.push_back(channel); } +void noise_model::add_all_qubit_channel(const std::string &quantumOp, + const kraus_channel &channel, + int numControls) { + auto actualGateName = quantumOp; + const bool isCustomOp = + customOpRegistry::getInstance().isOperationRegistered(actualGateName); + if (numControls == 0 && quantumOp.starts_with('c') && !isCustomOp) { + // Infer the number of control bits from gate name (with 'c' prefixes) + // Note: We only support up to 2 control bits using this notation, e.g., + // 'cx', 'ccx'. Users will need to use the numControls parameter for more + // complex cases. + // Note: this convention doesn't apply to custom operations. + numControls = quantumOp.starts_with("cc") ? 2 : 1; + actualGateName = quantumOp.substr(numControls); + if (actualGateName.starts_with('c')) + throw std::runtime_error( + "Controlled gates with more than 2 control bits must be specified " + "using the numControls parameter."); + } + + if (std::find(std::begin(availableOps), std::end(availableOps), + actualGateName) == std::end(availableOps) && + !isCustomOp) + throw std::runtime_error( + "Invalid quantum op for noise_model::add_channel (" + quantumOp + ")."); + GateIdentifier key(actualGateName, numControls); + auto iter = defaultNoiseModel.find(key); + if (iter == defaultNoiseModel.end()) { + cudaq::info("Adding new all-qubit kraus_channel to noise_model ({}, number " + "of control bits = {})", + actualGateName, numControls); + defaultNoiseModel.insert({key, {channel}}); + return; + } + + cudaq::info("kraus_channel existed for {}, adding new kraus_channel to " + "noise_model (number of control bits = {})", + actualGateName, numControls); + + iter->second.push_back(channel); +} + +void noise_model::add_channel(const std::string &quantumOp, + const PredicateFuncTy &pred) { + if (std::find(std::begin(availableOps), std::end(availableOps), quantumOp) == + std::end(availableOps) && + !customOpRegistry::getInstance().isOperationRegistered(quantumOp)) + throw std::runtime_error( + "Invalid quantum op for noise_model::add_channel (" + quantumOp + ")."); + auto iter = gatePredicates.find(quantumOp); + if (iter == gatePredicates.end()) { + cudaq::info("Adding new callback kraus_channel to noise_model for {}.", + quantumOp); + gatePredicates.insert({quantumOp, pred}); + return; + } + + throw std::logic_error("An callback kraus_channel has been defined for " + + quantumOp + " gate."); +} + std::vector noise_model::get_channels(const std::string &quantumOp, - const std::vector &qubits) const { + const std::vector &targetQubits, + const std::vector &controlQubits, + const std::vector ¶ms) const { + std::vector qubits{controlQubits.begin(), controlQubits.end()}; + qubits.insert(qubits.end(), targetQubits.begin(), targetQubits.end()); + const auto verifyChannelDimension = + [&](const std::vector &channels) { + auto nQubits = qubits.size(); + auto dim = 1UL << nQubits; + return std::all_of( + channels.begin(), channels.end(), [dim](const auto &channel) { + return channel.empty() || channel.dimension() == dim; + }); + }; + + std::vector resultChannels; + // Search qubit-specific noise settings auto key = std::make_pair(quantumOp, qubits); auto iter = noiseModel.find(key); - if (iter == noiseModel.end()) { - cudaq::info("No kraus_channel available for {} on {}.", quantumOp, qubits); - return {}; + // Note: we've validated the channel dimension in the 'add_channel' method. + if (iter != noiseModel.end()) { + cudaq::info("Found kraus_channel for {} on {}.", quantumOp, qubits); + const auto &krausChannel = iter->second; + resultChannels.insert(resultChannels.end(), krausChannel.begin(), + krausChannel.end()); } - cudaq::info("Found kraus_channel for {} on {}.", quantumOp, qubits); - return iter->second; -} + // Look up default noise channel + auto defaultIter = + defaultNoiseModel.find(GateIdentifier(quantumOp, controlQubits.size())); + if (defaultIter != defaultNoiseModel.end()) { + cudaq::info( + "Found default kraus_channel setting for {} with {} control bits.", + quantumOp, controlQubits.size()); + if (!verifyChannelDimension(defaultIter->second)) + throw std::runtime_error( + fmt::format("Dimension mismatch: all-qubit kraus_channel with for " + "{} with {} control qubits encountered unexpected " + "kraus operator dimension (expecting dimension of {}).", + quantumOp, controlQubits.size(), 1UL << qubits.size())); + + const auto &krausChannel = defaultIter->second; + resultChannels.insert(resultChannels.end(), krausChannel.begin(), + krausChannel.end()); + } + + // Look up predicate-specific noise settings + auto predIter = gatePredicates.find(quantumOp); + if (predIter != gatePredicates.end()) { + cudaq::info("Found callback kraus_channel setting for {}.", quantumOp); + const auto krausChannel = predIter->second(qubits, params); + if (!verifyChannelDimension({krausChannel})) + throw std::runtime_error(fmt::format( + "Dimension mismatch: kraus_channel with for " + "{} on qubits {} with gate parameters {} encountered unexpected " + "kraus operator dimension (expecting dimension of {}, got {}).", + quantumOp, qubits, params, 1UL << qubits.size(), + krausChannel.dimension())); + if (!krausChannel.empty()) + resultChannels.emplace_back(krausChannel); + } + + if (resultChannels.empty()) + cudaq::info("No kraus_channel available for {} on {}.", quantumOp, qubits); + + return resultChannels; +} } // namespace cudaq diff --git a/runtime/common/NoiseModel.h b/runtime/common/NoiseModel.h index 4823134faf..2e16258c54 100644 --- a/runtime/common/NoiseModel.h +++ b/runtime/common/NoiseModel.h @@ -12,12 +12,24 @@ #include #include +#include #include #include #include namespace cudaq { +/// @brief Noise model enumerated type that allows downstream simulators of +/// `kraus_channel` objects to apply simulator-specific logic for well-known +/// noise models. +enum class noise_model_type { + unknown, + depolarization_channel, + amplitude_damping_channel, + bit_flip_channel, + phase_flip_channel +}; + /// @brief A kraus_op represents a single Kraus operation, /// described as a complex matrix of specific size. The matrix /// is represented here as a 1d array (specifically a std::vector). @@ -106,6 +118,12 @@ class kraus_channel { } public: + /// @brief Noise type enumeration + noise_model_type noise_type = noise_model_type::unknown; + + /// @brief Noise parameter values + std::vector parameters; + ~kraus_channel() = default; /// @brief The nullary constructor @@ -158,6 +176,13 @@ class kraus_channel { /// to a Kraus channel containing a number of kraus_ops to /// be applied to the density matrix representation of the state. class noise_model { +public: + /// @brief Callback function type for noise channel. + /// Given the qubit operands and gate parameters, this function should return + /// a concrete noise channel. + using PredicateFuncTy = std::function &, const std::vector &)>; + protected: /// @brief Noise Model data map key is a (quantum Op + qubits applied to) using KeyT = std::pair>; @@ -179,20 +204,57 @@ class noise_model { using NoiseModelOpMap = std::unordered_map, KeyTHash>; - static constexpr const char *availableOps[] = { - "x", "y", "z", "h", "s", "t", "rx", "ry", "rz", "r1", "u3"}; - // The noise model is a mapping of quantum operation // names to a Kraus channel applied after the operation is applied. NoiseModelOpMap noiseModel; + /// @brief Gate identity for a match-all condition. + // In this case, it will match an operation with any qubits. + // The controlled versions of a gate are tracked by the number of control + // qubits. + struct GateIdentifier { + std::string name; + std::size_t numControls; + bool operator==(const GateIdentifier &other) const { + return other.name == name && other.numControls == numControls; + }; + }; + + // In addition to specific (gate + operands) map, we have a default map, + // which tracks noise channels attached to all operations of that type. + // This map is keyed by the gate-name + number of control bits, e.g., x(1) + // means cnot. + struct GateIdentifierHash { + std::size_t operator()(const GateIdentifier &p) const { + const std::string fullName = + p.name + "(" + std::to_string(p.numControls) + ")"; + return std::hash{}(fullName); + } + }; + + /// @brief Useful typedef for the noise model data map + using DefaultNoiseModelOpMap = + std::unordered_map, + GateIdentifierHash>; + /// @brief Matched-all noise channel map + DefaultNoiseModelOpMap defaultNoiseModel; + + /// @brief Noise model by callback function map + std::unordered_map gatePredicates; + + static constexpr const char *availableOps[] = { + "x", "y", "z", "h", "s", "t", "rx", "ry", "rz", "r1", "u3"}; + public: /// @brief default constructor noise_model() = default; /// @brief Return true if there are no kraus_channels in this noise model. /// @return - bool empty() const { return noiseModel.empty(); } + bool empty() const { + return noiseModel.empty() && defaultNoiseModel.empty() && + gatePredicates.empty(); + } /// @brief Add the Kraus channel to the specified one-qubit quantum /// operation. It applies to the quantumOp operation for the specified @@ -200,11 +262,25 @@ class noise_model { void add_channel(const std::string &quantumOp, const std::vector &qubits, const kraus_channel &channel); - void add_channel(const std::string &quantumOp, - const std::vector &&qubits, - const kraus_channel &channel) { - add_channel(quantumOp, qubits, channel); - } + + /// @brief Add the Kraus channel as a callback to the specified quantum + /// operation. + // The callback function will be called with the gate operands and gate + // parameters whenever the specified quantum operation is executed. The + // callback function should return a concrete noise channel. This can be an + // empty noise channel if no noise is expected. + /// @param quantumOp Quantum operation that the noise channel applies to. + /// @param pred Callback function that generates a noise channel. + void add_channel(const std::string &quantumOp, const PredicateFuncTy &pred); + + /// @brief Add the Kraus channel that applies to a quantum operation on any + /// arbitrary qubits. + /// @param quantumOp Quantum operation that the noise channel applies to. + /// @param channel The Kraus channel to apply. + /// @param numControls Number of control qubits for the gate. Default is 0 + /// (gate without a control modifier). + void add_all_qubit_channel(const std::string &quantumOp, + const kraus_channel &channel, int numControls = 0); /// @brief Add the provided kraus_channel to all /// specified quantum operations. @@ -219,19 +295,48 @@ class noise_model { add_channel(name, qubits, channel); } + /// @brief Add the provided kraus_channel callback to all + /// specified quantum operations. + template + void add_channel(const PredicateFuncTy &pred) { + std::vector names; + std::apply( + [&](const auto &...elements) { (names.push_back(elements.name), ...); }, + std::tuple()); + for (auto &name : names) + add_channel(name, pred); + } + + /// @brief Add the provided kraus_channel to all + /// specified quantum operations applying on arbitrary qubits. + template + void add_all_qubit_channel(const kraus_channel &channel, + int numControls = 0) { + std::vector names; + std::apply( + [&](const auto &...elements) { (names.push_back(elements.name), ...); }, + std::tuple()); + for (auto &name : names) + add_all_qubit_channel(name, channel, numControls); + } + /// @brief Return relevant kraus_channels on the specified qubits for // the given quantum operation. This will merge Kraus channels // that exists for the same quantumOp and qubits. std::vector get_channels(const std::string &quantumOp, - const std::vector &qubits) const; + const std::vector &targetQubits, + const std::vector &controlQubits = {}, + const std::vector ¶ms = {}) const; /// @brief Get all kraus_channels on the given qubits template std::vector - get_channels(const std::vector &qubits) const { + get_channels(const std::vector &targetQubits, + const std::vector &controlQubits = {}, + const std::vector ¶ms = {}) const { QuantumOp op; - return get_channels(op.name, qubits); + return get_channels(op.name, targetQubits, controlQubits, params); } }; @@ -252,6 +357,8 @@ class depolarization_channel : public kraus_channel { k3v{std::sqrt(probability / three), 0, 0, negOne * std::sqrt(probability / three)}; ops = {k0v, k1v, k2v, k3v}; + this->parameters.push_back(probability); + noise_type = noise_model_type::depolarization_channel; validateCompleteness(); } }; @@ -265,6 +372,8 @@ class amplitude_damping_channel : public kraus_channel { std::vector k0v{1, 0, 0, std::sqrt(1 - probability)}, k1v{0, std::sqrt(probability), 0, 0}; ops = {k0v, k1v}; + this->parameters.push_back(probability); + noise_type = noise_model_type::amplitude_damping_channel; validateCompleteness(); } }; @@ -279,6 +388,8 @@ class bit_flip_channel : public kraus_channel { std::sqrt(1 - probability)}, k1v{0, std::sqrt(probability), std::sqrt(probability), 0}; ops = {k0v, k1v}; + this->parameters.push_back(probability); + noise_type = noise_model_type::bit_flip_channel; validateCompleteness(); } }; @@ -294,6 +405,8 @@ class phase_flip_channel : public kraus_channel { std::sqrt(1 - probability)}, k1v{std::sqrt(probability), 0, 0, negOne * std::sqrt(probability)}; ops = {k0v, k1v}; + this->parameters.push_back(probability); + noise_type = noise_model_type::phase_flip_channel; validateCompleteness(); } }; diff --git a/runtime/common/RuntimeMLIR.cpp b/runtime/common/RuntimeMLIR.cpp index b4d433080e..4a34b6adf8 100644 --- a/runtime/common/RuntimeMLIR.cpp +++ b/runtime/common/RuntimeMLIR.cpp @@ -84,6 +84,7 @@ std::unique_ptr initializeMLIR() { cudaq::opt::registerAggressiveEarlyInlining(); cudaq::opt::registerUnrollingPipeline(); cudaq::opt::registerTargetPipelines(); + cudaq::opt::registerWireSetToProfileQIRPipeline(); cudaq::opt::registerMappingPipeline(); mlirLLVMInitialized = true; } diff --git a/runtime/common/RuntimeMLIR.h b/runtime/common/RuntimeMLIR.h index e2878192b2..64049875a3 100644 --- a/runtime/common/RuntimeMLIR.h +++ b/runtime/common/RuntimeMLIR.h @@ -28,9 +28,10 @@ namespace cudaq { /// @param additionalPasses Additional passes to run at the end /// @param printIR Print IR to `stderr` /// @param printIntermediateMLIR Print IR in between each pass +/// @param printStats Print pass statistics using TranslateFromMLIRFunction = std::function; + bool, bool)>; /// @brief Initialize MLIR with CUDA-Q dialects and return the /// MLIRContext. @@ -61,10 +62,10 @@ class Translation { mlir::LogicalResult operator()(mlir::Operation *op, llvm::raw_string_ostream &output, const std::string &additionalPasses, - bool printIR, - bool printIntermediateMLIR) const { + bool printIR, bool printIntermediateMLIR, + bool printStats) const { return function(op, output, additionalPasses, printIR, - printIntermediateMLIR); + printIntermediateMLIR, printStats); } private: diff --git a/runtime/common/RuntimeMLIRCommonImpl.h b/runtime/common/RuntimeMLIRCommonImpl.h index c396136ce8..e567b1f0a7 100644 --- a/runtime/common/RuntimeMLIRCommonImpl.h +++ b/runtime/common/RuntimeMLIRCommonImpl.h @@ -16,9 +16,11 @@ #include "cudaq/Optimizer/CodeGen/OpenQASMEmitter.h" #include "cudaq/Optimizer/CodeGen/Passes.h" #include "cudaq/Optimizer/CodeGen/Pipelines.h" +#include "cudaq/Optimizer/CodeGen/QIRAttributeNames.h" #include "cudaq/Optimizer/CodeGen/QIRFunctionNames.h" #include "cudaq/Optimizer/Dialect/CC/CCDialect.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" #include "cudaq/Optimizer/Transforms/Passes.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/IR/Instructions.h" @@ -356,7 +358,7 @@ mlir::LogicalResult qirProfileTranslationFunction(const char *qirProfile, mlir::Operation *op, llvm::raw_string_ostream &output, const std::string &additionalPasses, bool printIR, - bool printIntermediateMLIR) { + bool printIntermediateMLIR, bool printStats) { ScopedTraceWithContext(cudaq::TIMING_JIT, "qirProfileTranslationFunction"); const std::uint32_t qir_major_version = 1; @@ -369,9 +371,20 @@ qirProfileTranslationFunction(const char *qirProfile, mlir::Operation *op, mlir::PassManager pm(context); if (printIntermediateMLIR) pm.enableIRPrinting(); + if (printStats) + pm.enableStatistics(); std::string errMsg; llvm::raw_string_ostream errOs(errMsg); - cudaq::opt::addPipelineConvertToQIR(pm, qirProfile); + bool containsWireSet = + op->walk([](quake::WireSetOp wireSetOp) { + return mlir::WalkResult::interrupt(); + }).wasInterrupted(); + + if (containsWireSet) + cudaq::opt::addWiresetToProfileQIRPipeline(pm, qirProfile); + else + cudaq::opt::addPipelineConvertToQIR(pm, qirProfile); + // Add additional passes if necessary if (!additionalPasses.empty() && failed(parsePassPipeline(additionalPasses, pm, errOs))) @@ -468,10 +481,10 @@ void registerToQIRTranslation() { _profile, "translate from quake to " _profile, \ [](mlir::Operation *op, llvm::raw_string_ostream &output, \ const std::string &additionalPasses, bool printIR, \ - bool printIntermediateMLIR) { \ - return qirProfileTranslationFunction(_profile, op, output, \ - additionalPasses, printIR, \ - printIntermediateMLIR); \ + bool printIntermediateMLIR, bool printStats) { \ + return qirProfileTranslationFunction( \ + _profile, op, output, additionalPasses, printIR, \ + printIntermediateMLIR, printStats); \ }) // Base Profile and Adaptive Profile are very similar, so they use the same @@ -486,11 +499,13 @@ void registerToOpenQASMTranslation() { "qasm2", "translate from quake to openQASM 2.0", [](mlir::Operation *op, llvm::raw_string_ostream &output, const std::string &additionalPasses, bool printIR, - bool printIntermediateMLIR) { + bool printIntermediateMLIR, bool printStats) { ScopedTraceWithContext(cudaq::TIMING_JIT, "qasm2 translation"); mlir::PassManager pm(op->getContext()); if (printIntermediateMLIR) pm.enableIRPrinting(); + if (printStats) + pm.enableStatistics(); cudaq::opt::addPipelineTranslateToOpenQASM(pm); mlir::DefaultTimingManager tm; tm.setEnabled(cudaq::isTimingTagEnabled(cudaq::TIMING_JIT_PASSES)); @@ -515,11 +530,13 @@ void registerToIQMJsonTranslation() { "iqm", "translate from quake to IQM's json format", [](mlir::Operation *op, llvm::raw_string_ostream &output, const std::string &additionalPasses, bool printIR, - bool printIntermediateMLIR) { + bool printIntermediateMLIR, bool printStats) { ScopedTraceWithContext(cudaq::TIMING_JIT, "iqm translation"); mlir::PassManager pm(op->getContext()); if (printIntermediateMLIR) pm.enableIRPrinting(); + if (printStats) + pm.enableStatistics(); cudaq::opt::addPipelineTranslateToIQMJson(pm); mlir::DefaultTimingManager tm; tm.setEnabled(cudaq::isTimingTagEnabled(cudaq::TIMING_JIT_PASSES)); @@ -539,6 +556,85 @@ void registerToIQMJsonTranslation() { }); } +void insertSetupAndCleanupOperations(mlir::Operation *module) { + mlir::OpBuilder modBuilder(module); + auto *context = module->getContext(); + auto arrayQubitTy = cudaq::opt::getArrayType(context); + auto voidTy = mlir::LLVM::LLVMVoidType::get(context); + auto boolTy = modBuilder.getI1Type(); + mlir::FlatSymbolRefAttr allocateSymbol = + cudaq::opt::factory::createLLVMFunctionSymbol( + cudaq::opt::QIRArrayQubitAllocateArray, arrayQubitTy, + {modBuilder.getI64Type()}, dyn_cast(module)); + mlir::FlatSymbolRefAttr releaseSymbol = + cudaq::opt::factory::createLLVMFunctionSymbol( + cudaq::opt::QIRArrayQubitReleaseArray, {voidTy}, {arrayQubitTy}, + dyn_cast(module)); + mlir::FlatSymbolRefAttr isDynamicSymbol = + cudaq::opt::factory::createLLVMFunctionSymbol( + cudaq::opt::QIRisDynamicQubitManagement, {boolTy}, {}, + dyn_cast(module)); + mlir::FlatSymbolRefAttr setDynamicSymbol = + cudaq::opt::factory::createLLVMFunctionSymbol( + cudaq::opt::QIRsetDynamicQubitManagement, {voidTy}, {boolTy}, + dyn_cast(module)); + mlir::FlatSymbolRefAttr clearResultMapsSymbol = + cudaq::opt::factory::createLLVMFunctionSymbol( + cudaq::opt::QIRClearResultMaps, {voidTy}, {}, + dyn_cast(module)); + + // Iterate through all operations in the ModuleOp + mlir::SmallVector funcs; + module->walk([&](mlir::LLVM::LLVMFuncOp func) { funcs.push_back(func); }); + for (auto &func : funcs) { + if (!func->hasAttr(cudaq::entryPointAttrName)) + continue; + std::int64_t num_qubits = -1; + if (auto requiredQubits = func->getAttrOfType( + cudaq::opt::QIRRequiredQubitsAttrName)) + requiredQubits.strref().getAsInteger(10, num_qubits); + + // Further processing on funcOp if needed + auto &blocks = func.getBlocks(); + if (blocks.size() < 1 || num_qubits < 0) + continue; + + mlir::Block &block = *blocks.begin(); + mlir::OpBuilder builder(&block, block.begin()); + auto loc = builder.getUnknownLoc(); + + auto origMode = builder.create( + loc, mlir::TypeRange{boolTy}, isDynamicSymbol, mlir::ValueRange{}); + + // Create constant op + auto numQubitsVal = + cudaq::opt::factory::genLlvmI64Constant(loc, builder, num_qubits); + auto falseVal = builder.create( + loc, boolTy, builder.getI16IntegerAttr(false)); + + // Invoke allocate function with constant op + auto qubitAlloc = builder.create( + loc, mlir::TypeRange{arrayQubitTy}, allocateSymbol, + mlir::ValueRange{numQubitsVal.getResult()}); + builder.create(loc, mlir::TypeRange{voidTy}, + setDynamicSymbol, + mlir::ValueRange{falseVal.getResult()}); + + // At the end of the function, deallocate the qubits and restore the + // simulator state. + builder.setInsertionPoint(std::prev(blocks.end())->getTerminator()); + builder.create( + loc, mlir::TypeRange{voidTy}, releaseSymbol, + mlir::ValueRange{qubitAlloc.getResult()}); + builder.create(loc, mlir::TypeRange{voidTy}, + setDynamicSymbol, + mlir::ValueRange{origMode.getResult()}); + builder.create(loc, mlir::TypeRange{voidTy}, + clearResultMapsSymbol, + mlir::ValueRange{}); + } +} + mlir::ExecutionEngine *createQIRJITEngine(mlir::ModuleOp &moduleOp, llvm::StringRef convertTo) { // The "fast" instruction selection compilation algorithm is actually very @@ -567,10 +663,22 @@ mlir::ExecutionEngine *createQIRJITEngine(mlir::ModuleOp &moduleOp, mlir::PassManager pm(context); std::string errMsg; llvm::raw_string_ostream errOs(errMsg); + + bool containsWireSet = + module + ->walk([](quake::WireSetOp wireSetOp) { + return mlir::WalkResult::interrupt(); + }) + .wasInterrupted(); + // Even though we're not lowering all the way to a real QIR profile for this // emulated path, we need to pass in the `convertTo` in order to mimic what // the non-emulated path would do. - cudaq::opt::commonPipelineConvertToQIR(pm, convertTo); + if (containsWireSet) + cudaq::opt::addWiresetToProfileQIRPipeline(pm, convertTo); + else + cudaq::opt::commonPipelineConvertToQIR(pm, convertTo); + mlir::DefaultTimingManager tm; tm.setEnabled(cudaq::isTimingTagEnabled(cudaq::TIMING_JIT_PASSES)); auto timingScope = tm.getRootScope(); // starts the timer @@ -579,6 +687,15 @@ mlir::ExecutionEngine *createQIRJITEngine(mlir::ModuleOp &moduleOp, throw std::runtime_error( "[createQIRJITEngine] Lowering to QIR for remote emulation failed."); timingScope.stop(); + + // Insert necessary calls to qubit allocations and qubit releases if the + // original module contained WireSetOp's. This is required because the + // output of the above pipeline will produce IR that uses statically + // allocated qubit IDs in that case, and the simulator needs these + // additional calls in order to operate properly. + if (containsWireSet) + insertSetupAndCleanupOperations(module); + auto llvmModule = translateModuleToLLVMIR(module, llvmContext); if (!llvmModule) throw std::runtime_error( diff --git a/runtime/cudaq/CMakeLists.txt b/runtime/cudaq/CMakeLists.txt index f36d99c7c1..9c08eef354 100644 --- a/runtime/cudaq/CMakeLists.txt +++ b/runtime/cudaq/CMakeLists.txt @@ -9,7 +9,7 @@ add_subdirectory(spin) set(LIBRARY_NAME cudaq) -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-ctad-maybe-unsupported") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-ctad-maybe-unsupported") set(INTERFACE_POSITION_INDEPENDENT_CODE ON) # Create the CUDA-Q Library diff --git a/runtime/cudaq/builder/kernel_builder.cpp b/runtime/cudaq/builder/kernel_builder.cpp index ee5fe7e078..6961cc547f 100644 --- a/runtime/cudaq/builder/kernel_builder.cpp +++ b/runtime/cudaq/builder/kernel_builder.cpp @@ -962,8 +962,9 @@ jitCode(ImplicitLocOpBuilder &builder, ExecutionEngine *jit, pm.addNestedPass(cudaq::opt::createQuakeAddMetadata()); pm.addNestedPass(createCanonicalizerPass()); pm.addNestedPass(createCSEPass()); - pm.addPass(cudaq::opt::createGenerateDeviceCodeLoader(/*genAsQuake=*/true)); + pm.addPass(cudaq::opt::createGenerateDeviceCodeLoader({.jitTime = true})); pm.addPass(cudaq::opt::createGenerateKernelExecution()); + pm.addPass(createSymbolDCEPass()); if (failed(pm.run(module))) throw std::runtime_error( "cudaq::builder failed to JIT compile the Quake representation."); diff --git a/runtime/cudaq/cudaq.cpp b/runtime/cudaq/cudaq.cpp index 5fed83d5e5..e30be22637 100644 --- a/runtime/cudaq/cudaq.cpp +++ b/runtime/cudaq/cudaq.cpp @@ -14,6 +14,7 @@ #include "cuda_runtime_api.h" #endif #include "cudaq/platform.h" +#include "cudaq/qis/qkernel.h" #include "cudaq/utils/registry.h" #include "distributed/mpi_plugin.h" #include @@ -27,6 +28,7 @@ namespace nvqir { void tearDownBeforeMPIFinalize(); void setRandomSeed(std::size_t); +void setPhotonicRandomSeed(std::size_t seed); } // namespace nvqir namespace cudaq::mpi { @@ -210,7 +212,8 @@ static std::shared_mutex globalRegistryMutex; static std::vector> quakeRegistry; -void cudaq::registry::deviceCodeHolderAdd(const char *key, const char *code) { +void cudaq::registry::__cudaq_deviceCodeHolderAdd(const char *key, + const char *code) { std::unique_lock lock(globalRegistryMutex); auto it = std::find_if(quakeRegistry.begin(), quakeRegistry.end(), [&](const auto &pair) { return pair.first == key; }); @@ -233,12 +236,50 @@ static std::vector kernelRegistry; static std::map argsCreators; static std::map lambdaNames; +static std::map> linkableKernelRegistry; void cudaq::registry::cudaqRegisterKernelName(const char *kernelName) { std::unique_lock lock(globalRegistryMutex); kernelRegistry.emplace_back(kernelName); } +void cudaq::registry::__cudaq_registerLinkableKernel(void *hostSideFunc, + const char *kernelName, + void *deviceSideFunc) { + std::unique_lock lock(globalRegistryMutex); + linkableKernelRegistry.insert( + {hostSideFunc, std::pair{kernelName, deviceSideFunc}}); +} + +std::intptr_t cudaq::registry::__cudaq_getLinkableKernelKey(void *p) { + if (!p) + throw std::runtime_error("cannot get kernel key, nullptr"); + const auto &qk = *reinterpret_cast *>(p); + return reinterpret_cast(*qk.get_entry_kernel_from_holder()); +} + +const char *cudaq::registry::getLinkableKernelNameOrNull(std::intptr_t key) { + auto iter = linkableKernelRegistry.find(reinterpret_cast(key)); + if (iter != linkableKernelRegistry.end()) + return iter->second.first; + return nullptr; +} + +const char *cudaq::registry::__cudaq_getLinkableKernelName(std::intptr_t key) { + auto *result = getLinkableKernelNameOrNull(key); + if (!result) + throw std::runtime_error("kernel key is not present: kernel name unknown"); + return result; +} + +void * +cudaq::registry::__cudaq_getLinkableKernelDeviceFunction(std::intptr_t key) { + auto iter = linkableKernelRegistry.find(reinterpret_cast(key)); + if (iter != linkableKernelRegistry.end()) + return iter->second.second; + throw std::runtime_error("kernel key is not present: kernel unknown"); +} + void cudaq::registry::cudaqRegisterArgsCreator(const char *name, char *rawFunctor) { std::unique_lock lock(globalRegistryMutex); @@ -362,7 +403,14 @@ thread_local static std::size_t cudaq_random_seed = 0; /// will not be repeatable for those operations. void set_random_seed(std::size_t seed) { cudaq_random_seed = seed; - nvqir::setRandomSeed(seed); + try { + nvqir::setRandomSeed(seed); + } catch (std::exception &e) { + cudaq::info("Failed to set random seed in NVQIR, setting photonic random " + "seed in PhotonicNVQIR"); + nvqir::setPhotonicRandomSeed(seed); + } + auto &platform = cudaq::get_platform(); // Notify the platform that a new random seed value is set. platform.onRandomSeedSet(seed); diff --git a/runtime/cudaq/platform/default/CMakeLists.txt b/runtime/cudaq/platform/default/CMakeLists.txt index 39ff674cb9..98bfd4be74 100644 --- a/runtime/cudaq/platform/default/CMakeLists.txt +++ b/runtime/cudaq/platform/default/CMakeLists.txt @@ -42,6 +42,8 @@ if (OPENSSL_FOUND AND CUDAQ_ENABLE_REST) add_subdirectory(rest) add_subdirectory(rest_server) endif() + +add_target_config(opt-test) if (CUSTATEVEC_ROOT AND CUDA_FOUND) add_target_config(nvidia) diff --git a/runtime/cudaq/platform/default/DefaultQuantumPlatform.cpp b/runtime/cudaq/platform/default/DefaultQuantumPlatform.cpp index 6e2ad949c1..1b8d0b1141 100644 --- a/runtime/cudaq/platform/default/DefaultQuantumPlatform.cpp +++ b/runtime/cudaq/platform/default/DefaultQuantumPlatform.cpp @@ -34,7 +34,8 @@ class DefaultQPU : public cudaq::QPU { } void launchKernel(const std::string &name, void (*kernelFunc)(void *), - void *args, std::uint64_t, std::uint64_t) override { + void *args, std::uint64_t, std::uint64_t, + const std::vector &rawArgs) override { ScopedTraceWithContext(cudaq::TIMING_LAUNCH, "QPU::launchKernel"); kernelFunc(args); } diff --git a/runtime/cudaq/platform/default/opt-test.yml b/runtime/cudaq/platform/default/opt-test.yml new file mode 100644 index 0000000000..caa1532c53 --- /dev/null +++ b/runtime/cudaq/platform/default/opt-test.yml @@ -0,0 +1,40 @@ +# ============================================================================ # +# Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. # +# All rights reserved. # +# # +# This source code and the accompanying materials are made available under # +# the terms of the Apache License 2.0 which accompanies this distribution. # +# ============================================================================ # + +name: opt-test +description: "Compiler Optimization Test Target" + +target-arguments: + - key: option + required: false + type: option-flags + help-string: "Specify the target options as a comma-separated list.\nSupported options are 'dep-analysis, 'fp32', 'fp64', 'qpp'.\nFor example, the 'dep-analysis,fp32' option combination will activate single-precision simulation with the dep-analysis passes. Not all option combinations are supported." + +configuration-matrix: + - name: dep-analysis-fp32 + option-flags: [dep-analysis, fp32] + default: true + config: + nvqir-simulation-backend: cusvsim-fp32, custatevec-fp32 + preprocessor-defines: ["-D CUDAQ_SIMULATION_SCALAR_FP32"] + target-pass-pipeline: "func.func(unwind-lowering),canonicalize,lambda-lifting,func.func(memtoreg{quantum=0}),canonicalize,apply-op-specialization,kernel-execution,aggressive-early-inlining,func.func(quake-add-metadata),const-prop-complex,lift-array-value,func.func(get-concrete-matrix),device-code-loader{use-quake=1},canonicalize,cse,func.func(add-dealloc,combine-quantum-alloc,canonicalize,factor-quantum-alloc,memtoreg),canonicalize,cse,add-wireset,func.func(assign-wire-indices),dep-analysis,func.func(regtomem),symbol-dce" + library-mode: false + - name: dep-analysis-fp64 + option-flags: [dep-analysis, fp64] + config: + nvqir-simulation-backend: cusvsim-fp64, custatevec-fp64 + preprocessor-defines: ["-D CUDAQ_SIMULATION_SCALAR_FP64"] + target-pass-pipeline: "func.func(unwind-lowering),canonicalize,lambda-lifting,func.func(memtoreg{quantum=0}),canonicalize,apply-op-specialization,kernel-execution,aggressive-early-inlining,func.func(quake-add-metadata),const-prop-complex,lift-array-value,func.func(get-concrete-matrix),device-code-loader{use-quake=1},canonicalize,cse,func.func(add-dealloc,combine-quantum-alloc,canonicalize,factor-quantum-alloc,memtoreg),canonicalize,cse,add-wireset,func.func(assign-wire-indices),dep-analysis,func.func(regtomem),symbol-dce" + library-mode: false + - name: dep-analysis-qpp + option-flags: [dep-analysis, qpp] + config: + nvqir-simulation-backend: qpp + preprocessor-defines: ["-D CUDAQ_SIMULATION_SCALAR_FP64"] + target-pass-pipeline: "func.func(unwind-lowering),canonicalize,lambda-lifting,func.func(memtoreg{quantum=0}),canonicalize,apply-op-specialization,kernel-execution,aggressive-early-inlining,func.func(quake-add-metadata),const-prop-complex,lift-array-value,func.func(get-concrete-matrix),device-code-loader{use-quake=1},canonicalize,cse,func.func(add-dealloc,combine-quantum-alloc,canonicalize,factor-quantum-alloc,memtoreg),canonicalize,cse,add-wireset,func.func(assign-wire-indices),dep-analysis,func.func(regtomem),symbol-dce" + library-mode: false diff --git a/runtime/cudaq/platform/mqpu/MultiQPUPlatform.cpp b/runtime/cudaq/platform/mqpu/MultiQPUPlatform.cpp index 7ffc1106d6..bdde97a464 100644 --- a/runtime/cudaq/platform/mqpu/MultiQPUPlatform.cpp +++ b/runtime/cudaq/platform/mqpu/MultiQPUPlatform.cpp @@ -137,6 +137,16 @@ class MultiQPUQuantumPlatform : public cudaq::quantum_platform { const auto qpuSubType = getQpuType(description); if (!qpuSubType.empty()) { + const auto formatUrl = [](const std::string &url) -> std::string { + auto formatted = url; + // Default to http:// if none provided. + if (!formatted.starts_with("http")) + formatted = std::string("http://") + formatted; + if (!formatted.empty() && formatted.back() != '/') + formatted += '/'; + return formatted; + }; + if (!cudaq::registry::isRegistered(qpuSubType)) throw std::runtime_error( fmt::format("Unable to retrieve {} QPU implementation. Please " @@ -186,6 +196,20 @@ class MultiQPUQuantumPlatform : public cudaq::quantum_platform { platformQPUs.emplace_back(std::move(qpu)); } platformNumQPUs = platformQPUs.size(); + } else if (qpuSubType == "orca") { + auto urls = cudaq::split(getOpt(description, "url"), ','); + platformQPUs.clear(); + for (std::size_t qId = 0; qId < urls.size(); ++qId) { + // Populate the information and add the QPUs + platformQPUs.emplace_back(cudaq::registry::get("orca")); + platformQPUs.back()->setId(qId); + const std::string configStr = + fmt::format("orca;url;{}", formatUrl(urls[qId])); + platformQPUs.back()->setTargetBackend(configStr); + threadToQpuId[std::hash{}( + platformQPUs.back()->getExecutionThreadId())] = qId; + } + platformNumQPUs = platformQPUs.size(); } else { auto urls = cudaq::split(getOpt(description, "url"), ','); auto sims = cudaq::split(getOpt(description, "backend"), ','); @@ -197,15 +221,6 @@ class MultiQPUQuantumPlatform : public cudaq::quantum_platform { description.find("auto_launch") != std::string::npos || urls.empty(); - const auto formatUrl = [](const std::string &url) -> std::string { - auto formatted = url; - // Default to http:// if none provided. - if (!formatted.starts_with("http")) - formatted = std::string("http://") + formatted; - if (!formatted.empty() && formatted.back() != '/') - formatted += '/'; - return formatted; - }; if (autoLaunch) { urls.clear(); const auto numInstanceStr = getOpt(description, "auto_launch"); diff --git a/runtime/cudaq/platform/mqpu/custatevec/GPUEmulatedQPU.cpp b/runtime/cudaq/platform/mqpu/custatevec/GPUEmulatedQPU.cpp index 5abd45bdf7..1243e9f480 100644 --- a/runtime/cudaq/platform/mqpu/custatevec/GPUEmulatedQPU.cpp +++ b/runtime/cudaq/platform/mqpu/custatevec/GPUEmulatedQPU.cpp @@ -38,7 +38,8 @@ class GPUEmulatedQPU : public cudaq::QPU { } void launchKernel(const std::string &name, void (*kernelFunc)(void *), - void *args, std::uint64_t, std::uint64_t) override { + void *args, std::uint64_t, std::uint64_t, + const std::vector &rawArgs) override { cudaq::info("QPU::launchKernel GPU {}", qpu_id); cudaSetDevice(qpu_id); kernelFunc(args); diff --git a/runtime/cudaq/platform/orca/CMakeLists.txt b/runtime/cudaq/platform/orca/CMakeLists.txt index 779a2cf794..3610b902a3 100644 --- a/runtime/cudaq/platform/orca/CMakeLists.txt +++ b/runtime/cudaq/platform/orca/CMakeLists.txt @@ -8,8 +8,14 @@ set(LIBRARY_NAME cudaq-orca-qpu) message(STATUS "Building ORCA REST QPU.") +set(ORCA_SRC + OrcaExecutor.cpp + OrcaQPU.cpp + OrcaRemoteRESTQPU.cpp + OrcaServerHelper.cpp +) -add_library(${LIBRARY_NAME} SHARED OrcaQPU.cpp) +add_library(${LIBRARY_NAME} SHARED ${ORCA_SRC}) target_include_directories(${LIBRARY_NAME} PRIVATE . PUBLIC @@ -30,9 +36,4 @@ target_link_libraries(${LIBRARY_NAME} install(TARGETS ${LIBRARY_NAME} DESTINATION lib) install(TARGETS ${LIBRARY_NAME} EXPORT cudaq-orca-qpu-targets DESTINATION lib) -# install(EXPORT cudaq-orca-qpu-targets -# FILE CUDAQQPUOrcaTargets.cmake -# NAMESPACE cudaq::orca:: -# DESTINATION lib/cmake/cudaq) - -add_target_config(orca) \ No newline at end of file +add_target_config(orca) diff --git a/runtime/cudaq/platform/orca/OrcaExecutor.cpp b/runtime/cudaq/platform/orca/OrcaExecutor.cpp new file mode 100644 index 0000000000..94413a24c5 --- /dev/null +++ b/runtime/cudaq/platform/orca/OrcaExecutor.cpp @@ -0,0 +1,47 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "OrcaExecutor.h" +#include "OrcaServerHelper.h" +#include "common/Logger.h" + +namespace cudaq { + +details::future OrcaExecutor::execute(cudaq::orca::TBIParameters params, + const std::string &kernelName) { + auto orcaServerHelper = dynamic_cast(serverHelper); + assert(orcaServerHelper); + orcaServerHelper->setShots(shots); + cudaq::info("Executor creating job to execute with the {} helper.", + orcaServerHelper->name()); + // Create the Job Payload, composed of job post path, headers, + // and the job json messages themselves + auto [jobPostPath, headers, jobs] = orcaServerHelper->createJob(params); + auto job = jobs[0]; + auto config = orcaServerHelper->getConfig(); + std::vector ids; + cudaq::info("Job created, posting to {}", jobPostPath); + // Post it, get the response + auto response = client.post(jobPostPath, "", job, headers); + cudaq::info("Job posted, response was {}", response.dump()); + // Add the job id and the job name. + auto job_id = orcaServerHelper->extractJobId(response); + if (job_id.empty()) { + nlohmann::json tmp(job.at("job_id")); + orcaServerHelper->constructGetJobPath(tmp[0]); + job_id = tmp[0].at("job_id"); + } + ids.emplace_back(job_id, kernelName); + config["output_names." + job_id] = kernelName; + + config.insert({"shots", std::to_string(shots)}); + std::string name = orcaServerHelper->name(); + return cudaq::details::future(ids, name, config); +} + +} // namespace cudaq \ No newline at end of file diff --git a/runtime/cudaq/platform/orca/OrcaExecutor.h b/runtime/cudaq/platform/orca/OrcaExecutor.h new file mode 100644 index 0000000000..11f0dd76ad --- /dev/null +++ b/runtime/cudaq/platform/orca/OrcaExecutor.h @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#pragma once + +#include "common/Executor.h" +#include "orca_qpu.h" + +namespace cudaq { + +/// @brief The Executor subclass for ORCA target which has a distinct sampling +/// API. +class OrcaExecutor : public Executor { +public: + /// @brief Execute the provided ORCA quantum parameters and return a future + /// object. The caller can make this synchronous by just immediately calling + /// .get(). + details::future execute(cudaq::orca::TBIParameters params, + const std::string &kernelName); +}; +} // namespace cudaq diff --git a/runtime/cudaq/platform/orca/OrcaQPU.cpp b/runtime/cudaq/platform/orca/OrcaQPU.cpp index 8c6a414b5a..63883a7af3 100644 --- a/runtime/cudaq/platform/orca/OrcaQPU.cpp +++ b/runtime/cudaq/platform/orca/OrcaQPU.cpp @@ -7,322 +7,90 @@ * the terms of the Apache License 2.0 which accompanies this distribution. * ******************************************************************************/ -#include "common/ExecutionContext.h" -#include "common/FmtCore.h" - -#include "common/Logger.h" -#include "common/RestClient.h" -#include "common/ServerHelper.h" -#include "cudaq.h" -#include "nvqpp_config.h" - -#include "cudaq/platform/qpu.h" -#include "cudaq/platform/quantum_platform.h" -#include "cudaq/qis/qubit_qis.h" -#include "cudaq/spin_op.h" +// #include "common/ExecutionContext.h" +// #include "common/Future.h" +#include "cudaq/platform.h" #include "orca_qpu.h" -#include "llvm/Support/Base64.h" +namespace cudaq::orca { -#include -#include -#include -#include -#include -#include +cudaq::sample_result runSampling(TBIParameters ¶meters, + std::size_t qpu_id = 0) { + std::size_t shots = parameters.n_samples; + auto ctx = std::make_unique("sample", shots); -namespace cudaq::orca { -cudaq::sample_result sample(std::vector &input_state, - std::vector &loop_lengths, - std::vector &bs_angles, - std::vector &ps_angles, int n_samples) { - TBIParameters parameters{input_state, loop_lengths, bs_angles, ps_angles, - n_samples}; - cudaq::ExecutionContext context("sample", n_samples); - auto &platform = get_platform(); - platform.set_exec_ctx(&context, 0); - cudaq::altLaunchKernel("orca_launch", nullptr, ¶meters, - sizeof(TBIParameters), 0); + auto &platform = cudaq::get_platform(); + platform.set_exec_ctx(ctx.get(), qpu_id); + platform.set_current_qpu(qpu_id); - return context.result; -} -cudaq::sample_result sample(std::vector &input_state, - std::vector &loop_lengths, - std::vector &bs_angles, int n_samples) { - std::vector ps_angles = {}; - TBIParameters parameters{input_state, loop_lengths, bs_angles, ps_angles, - n_samples}; - cudaq::ExecutionContext context("sample", n_samples); - auto &platform = get_platform(); - platform.set_exec_ctx(&context, 0); cudaq::altLaunchKernel("orca_launch", nullptr, ¶meters, sizeof(TBIParameters), 0); - return context.result; -} -} // namespace cudaq::orca - -namespace { - -/// @brief The OrcaRemoteRESTQPU is a subtype of QPU that enables the -/// execution of CUDA-Q kernels on remotely hosted quantum computing -/// services via a REST Client / Server interaction. This type is meant -/// to be general enough to support any remotely hosted service. -/// Moreover, this QPU handles launching kernels under the Execution Context -/// that includs sampling via synchronous client invocations. -class OrcaRemoteRESTQPU : public cudaq::QPU { -protected: - /// The number of shots - std::optional nShots; - - /// @brief the platform file path, CUDAQ_INSTALL/platforms - std::filesystem::path platformPath; - - /// @brief The name of the QPU being targeted - std::string qpuName; - - /// @brief The base URL - std::string baseUrl; - - /// @brief The machine we are targeting - std::string machine = "PT-1"; - - /// @brief Mapping of general key-values for backend - /// configuration. - std::map backendConfig; - - /// @brief Flag indicating whether we should emulate - /// execution locally. - bool emulate = false; - -private: - /// @brief RestClient used for HTTP requests. - cudaq::RestClient client; - -public: - /// @brief The constructor - OrcaRemoteRESTQPU() : QPU() { - std::filesystem::path cudaqLibPath{cudaq::getCUDAQLibraryPath()}; - platformPath = cudaqLibPath.parent_path().parent_path() / "targets"; - } - - OrcaRemoteRESTQPU(OrcaRemoteRESTQPU &&) = delete; - - /// @brief The destructor - virtual ~OrcaRemoteRESTQPU() = default; - - /// Enqueue a quantum task on the asynchronous execution queue. - void enqueue(cudaq::QuantumTask &task) override { - execution_queue->enqueue(task); - } - - /// @brief Return true if the current backend is a simulator - bool isSimulator() override { return emulate; } - - /// @brief Return true if the current backend supports conditional feedback - bool supportsConditionalFeedback() override { return false; } - - /// Provide the number of shots - void setShots(int _nShots) override { nShots = _nShots; } - - /// Clear the number of shots - void clearShots() override { nShots = std::nullopt; } - - /// @brief Return true if the current backend is remote - virtual bool isRemote() override { return !emulate; } - - /// Store the execution context for launchKernel - void setExecutionContext(cudaq::ExecutionContext *context) override { - if (!context) - return; - - cudaq::info("Remote Rest QPU setting execution context to {}", - context->name); - - // Execution context is valid - executionContext = context; - } - - /// Reset the execution context - void resetExecutionContext() override { - // do nothing here - executionContext = nullptr; - } - - /// @brief This setTargetBackend override is in charge of reading the - /// specific target backend configuration file. - void setTargetBackend(const std::string &backend) override; - - /// @brief Creates a quantum computation job using the provided kernel - /// executions and returns the corresponding payload. - cudaq::ServerJobPayload createJob(cudaq::orca::TBIParameters params); - - /// @brief Given a completed job response, map back to the sample_result - cudaq::sample_result processResults(cudaq::ServerMessage &postJobResponse); - - /// @brief Returns the name of the server helper. - const std::string name() const { return "orca"; } - - /// @brief Returns the headers for the server requests. - cudaq::RestHeaders getHeaders(); - - /// @brief Initializes the server helper with the provided backend - /// configuration. - void initialize(); - - /// @brief Launch the kernel. Handle all pertinent - /// modifications for the execution context. - void launchKernel(const std::string &kernelName, void (*kernelFunc)(void *), - void *args, std::uint64_t voidStarSize, - std::uint64_t resultOffset) override; - void launchKernel(const std::string &kernelName, - const std::vector &rawArgs) override { - throw std::runtime_error("launch kernel on raw args not implemented"); - } -}; - -/// @brief This setTargetBackend override is in charge of reading the -/// specific target backend configuration file. -void OrcaRemoteRESTQPU::setTargetBackend(const std::string &backend) { - cudaq::info("Remote REST platform is targeting {}.", backend); - - // First we see if the given backend has extra config params - auto mutableBackend = backend; - if (mutableBackend.find(";") != std::string::npos) { - auto split = cudaq::split(mutableBackend, ';'); - mutableBackend = split[0]; - // Must be key-value pairs, therefore an even number of values here - if ((split.size() - 1) % 2 != 0) - throw std::runtime_error( - "Backend config must be provided as key-value pairs: " + - std::to_string(split.size())); - - // Add to the backend configuration map - for (std::size_t i = 1; i < split.size(); i += 2) { - // No need to decode trivial true/false values - if (split[i + 1].starts_with("base64_")) { - split[i + 1].erase(0, 7); // erase "base64_" - std::vector decoded_vec; - if (auto err = llvm::decodeBase64(split[i + 1], decoded_vec)) - throw std::runtime_error("DecodeBase64 error"); - std::string decodedStr(decoded_vec.data(), decoded_vec.size()); - cudaq::info("Decoded {} parameter from '{}' to '{}'", split[i], - split[i + 1], decodedStr); - backendConfig.insert({split[i], decodedStr}); - } else { - backendConfig.insert({split[i], split[i + 1]}); - } - } - } - - /// Once we know the backend, we should search for the config file - /// from there we can get the URL/PORT and other inforation used in the - /// pipeline. - // Set the qpu name - qpuName = mutableBackend; - initialize(); + platform.reset_exec_ctx(qpu_id); + return ctx->result; } -/// @brief Launch the kernel. -void OrcaRemoteRESTQPU::launchKernel(const std::string &kernelName, - void (*kernelFunc)(void *), void *args, - std::uint64_t voidStarSize, - std::uint64_t resultOffset) { - cudaq::info("launching ORCA remote rest kernel ({})", kernelName); - - // TODO future iterations of this should support non-void return types. - if (!executionContext) - throw std::runtime_error("Remote rest execution can only be performed " - "via cudaq::sample() or cudaq::observe()."); - - cudaq::orca::TBIParameters params = - *((struct cudaq::orca::TBIParameters *)args); - std::size_t shots = params.n_samples; +async_sample_result runAsyncSampling(TBIParameters ¶meters, + std::size_t qpu_id = 0) { + std::size_t shots = parameters.n_samples; + auto ctx = std::make_unique("sample", shots); - setShots(shots); - executionContext->shots = shots; + // Indicate that this is an async exec + cudaq::details::future futureResult; + ctx->asyncExec = true; - cudaq::info("Executor creating job to execute with the {} helper.", name()); - - // Create the Job Payload, composed of job post path, headers, - // and the job json messages themselves - auto [jobPostPath, headers, jobs] = createJob(params); - auto job = jobs[0]; - cudaq::info("Job (name={}) created, posting to {}", kernelName, jobPostPath); + auto &platform = get_platform(); + platform.set_exec_ctx(ctx.get(), qpu_id); + platform.set_current_qpu(qpu_id); - // Post it, get the response - auto response = client.post(jobPostPath, "", job, headers); + cudaq::altLaunchKernel("orca_launch", nullptr, ¶meters, + sizeof(TBIParameters), 0); - cudaq::sample_result counts = processResults(response); + // If we have a non-null future, set it + futureResult = ctx->futureResult; - // // return the results synchronously - executionContext->result = counts; + platform.reset_exec_ctx(qpu_id); + return async_sample_result(std::move(futureResult)); } -// Initialize the ORCA server helper with a given backend configuration -void OrcaRemoteRESTQPU::initialize() { - // Set the machine - auto iter = backendConfig.find("machine"); - if (iter != backendConfig.end()) - machine = iter->second; - - // Set a base URL if provided - iter = backendConfig.find("url"); - if (iter != backendConfig.end()) { - baseUrl = iter->second; - } +cudaq::sample_result sample(std::vector &input_state, + std::vector &loop_lengths, + std::vector &bs_angles, + std::vector &ps_angles, int n_samples, + std::size_t qpu_id) { + TBIParameters parameters{input_state, loop_lengths, bs_angles, ps_angles, + n_samples}; + return runSampling(parameters, qpu_id); } -// Create a job for the ORCA QPU -cudaq::ServerJobPayload -OrcaRemoteRESTQPU::createJob(cudaq::orca::TBIParameters params) { - std::vector jobs; - cudaq::ServerMessage job; - - // Construct the job message - job["target"] = machine; - - job["input_state"] = params.input_state; - job["loop_lengths"] = params.loop_lengths; - job["bs_angles"] = params.bs_angles; - job["ps_angles"] = params.ps_angles; - job["n_samples"] = params.n_samples; - - jobs.push_back(job); - - // Return a tuple containing the job path, headers, and the job message - auto ret = std::make_tuple(baseUrl, getHeaders(), jobs); - return ret; +cudaq::sample_result sample(std::vector &input_state, + std::vector &loop_lengths, + std::vector &bs_angles, int n_samples, + std::size_t qpu_id) { + std::vector ps_angles = {}; + TBIParameters parameters{input_state, loop_lengths, bs_angles, ps_angles, + n_samples}; + return runSampling(parameters, qpu_id); } -// Process the results from a job -cudaq::sample_result -OrcaRemoteRESTQPU::processResults(cudaq::ServerMessage &postJobResponse) { - auto results = postJobResponse.at("results"); - - cudaq::CountsDictionary counts; - // Process the results - for (const auto &key : results) { - counts[key] += 1; - } - - // Create an execution result - cudaq::ExecutionResult executionResult(counts); - // Return a sample result - auto ret = cudaq::sample_result(executionResult); - return ret; +async_sample_result sample_async(std::vector &input_state, + std::vector &loop_lengths, + std::vector &bs_angles, + std::vector &ps_angles, int n_samples, + std::size_t qpu_id) { + TBIParameters parameters{input_state, loop_lengths, bs_angles, ps_angles, + n_samples}; + return runAsyncSampling(parameters, qpu_id); } -// Get the headers for the API requests -cudaq::RestHeaders OrcaRemoteRESTQPU::getHeaders() { - // Construct the headers - cudaq::RestHeaders headers; - headers["Authorization"] = "apiKey "; - headers["Content-Type"] = "application/json"; - // Return the headers - return headers; +async_sample_result sample_async(std::vector &input_state, + std::vector &loop_lengths, + std::vector &bs_angles, int n_samples, + std::size_t qpu_id) { + std::vector ps_angles = {}; + TBIParameters parameters{input_state, loop_lengths, bs_angles, ps_angles, + n_samples}; + return runAsyncSampling(parameters, qpu_id); } -} // namespace - -CUDAQ_REGISTER_TYPE(cudaq::QPU, OrcaRemoteRESTQPU, orca) +} // namespace cudaq::orca diff --git a/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.cpp b/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.cpp new file mode 100644 index 0000000000..f834136fc4 --- /dev/null +++ b/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.cpp @@ -0,0 +1,99 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "OrcaRemoteRESTQPU.h" +#include "common/Logger.h" +#include "llvm/Support/Base64.h" + +namespace cudaq { +/// @brief This setTargetBackend override is in charge of reading the +/// specific target backend configuration file. +void OrcaRemoteRESTQPU::setTargetBackend(const std::string &backend) { + cudaq::info("OrcaRemoteRESTQPU platform is targeting {} with qpu_id = {}.", + backend, qpu_id); + + // First we see if the given backend has extra config params + auto mutableBackend = backend; + if (mutableBackend.find(";") != std::string::npos) { + auto split = cudaq::split(mutableBackend, ';'); + mutableBackend = split[0]; + // Must be key-value pairs, therefore an even number of values here + if ((split.size() - 1) % 2 != 0) + throw std::runtime_error( + "Backend config must be provided as key-value pairs: " + + std::to_string(split.size())); + + // Add to the backend configuration map + for (std::size_t i = 1; i < split.size(); i += 2) { + // No need to decode trivial true/false values + if (split[i + 1].starts_with("base64_")) { + split[i + 1].erase(0, 7); // erase "base64_" + std::vector decoded_vec; + if (auto err = llvm::decodeBase64(split[i + 1], decoded_vec)) + throw std::runtime_error("DecodeBase64 error"); + std::string decodedStr(decoded_vec.data(), decoded_vec.size()); + cudaq::info("Decoded {} parameter from '{}' to '{}'", split[i], + split[i + 1], decodedStr); + backendConfig.insert({split[i], decodedStr}); + } else { + backendConfig.insert({split[i], split[i + 1]}); + } + } + } + + /// Once we know the backend, we should search for the config file + /// from there we can get the URL/PORT and other information used in the + /// pipeline. + // Set the qpu name + qpuName = mutableBackend; + serverHelper = registry::get(qpuName); + serverHelper->initialize(backendConfig); + + // Give the server helper to the executor + executor->setServerHelper(serverHelper.get()); +} + +/// @brief Launch the experiment. +void OrcaRemoteRESTQPU::launchKernel(const std::string &kernelName, + void (*kernelFunc)(void *), void *args, + std::uint64_t voidStarSize, + std::uint64_t resultOffset, + const std::vector &rawArgs) { + + cudaq::info("OrcaRemoteRESTQPU: Launch kernel named '{}' remote QPU {}", + kernelName, qpu_id); + + auto tid = std::hash{}(std::this_thread::get_id()); + auto ctx = contexts[tid]; + + // TODO future iterations of this should support non-void return types. + if (!ctx) + throw std::runtime_error("Remote rest execution can only be performed " + "via cudaq::sample() or cudaq::observe()."); + + cudaq::orca::TBIParameters params = + *((struct cudaq::orca::TBIParameters *)args); + std::size_t shots = params.n_samples; + + ctx->shots = shots; + + cudaq::details::future future; + future = executor->execute(params, kernelName); + + // Keep this asynchronous if requested + if (ctx->asyncExec) { + ctx->futureResult = future; + return; + } + + // Otherwise make this synchronous + ctx->result = future.get(); +} + +} // namespace cudaq +CUDAQ_REGISTER_TYPE(cudaq::QPU, cudaq::OrcaRemoteRESTQPU, orca) \ No newline at end of file diff --git a/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.h b/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.h new file mode 100644 index 0000000000..80d2df5726 --- /dev/null +++ b/runtime/cudaq/platform/orca/OrcaRemoteRESTQPU.h @@ -0,0 +1,132 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#pragma once + +#include "OrcaExecutor.h" +#include "common/ExecutionContext.h" +#include "common/Future.h" +#include "common/RestClient.h" +#include "common/ServerHelper.h" +#include "cudaq/platform/qpu.h" +#include "orca_qpu.h" + +namespace cudaq { + +/// @brief The OrcaRemoteRESTQPU is a subtype of QPU that enables the +/// execution of CUDA-Q kernels on remotely hosted quantum computing +/// services via a REST Client / Server interaction. This type is meant +/// to be general enough to support any remotely hosted service. +/// Moreover, this QPU handles launching kernels under the Execution Context +/// that includes sampling via synchronous client invocations. +class OrcaRemoteRESTQPU : public cudaq::QPU { +protected: + /// @brief The number of shots + std::optional nShots; + + /// @brief the platform file path, CUDAQ_INSTALL/platforms + std::filesystem::path platformPath; + + /// @brief The name of the QPU being targeted + std::string qpuName; + + /// @brief Flag indicating whether we should emulate + /// execution locally. + bool emulate = false; + + /// @brief Pointer to the concrete Executor for this QPU + std::unique_ptr executor; + + /// @brief Pointer to the concrete ServerHelper, provides + /// specific JSON payloads and POST/GET URL paths. + std::unique_ptr serverHelper; + + /// @brief Mapping of general key-values for backend + /// configuration. + std::map backendConfig; + + /// @brief Mapping of thread and execution context + std::unordered_map contexts; + +private: + /// @brief RestClient used for HTTP requests. + RestClient client; + +public: + /// @brief The constructor + OrcaRemoteRESTQPU() : QPU() { + std::filesystem::path cudaqLibPath{getCUDAQLibraryPath()}; + platformPath = cudaqLibPath.parent_path().parent_path() / "targets"; + // Default is to run sampling via the remote rest call + executor = std::make_unique(); + } + + OrcaRemoteRESTQPU(OrcaRemoteRESTQPU &&) = delete; + + /// @brief The destructor + virtual ~OrcaRemoteRESTQPU() = default; + + /// @brief Get id of the thread this queue executes on. + std::thread::id getExecutionThreadId() const { + return execution_queue->getExecutionThreadId(); + } + + /// @brief Enqueue a quantum task on the asynchronous execution queue. + void enqueue(cudaq::QuantumTask &task) override { + cudaq::info("OrcaRemoteRESTQPU: Enqueue Task on QPU {}", qpu_id); + execution_queue->enqueue(task); + } + + /// @brief Return true if the current backend is a simulator + bool isSimulator() override { return emulate; } + + /// @brief Return true if the current backend supports conditional feedback + bool supportsConditionalFeedback() override { return false; } + + /// @brief Provide the number of shots + void setShots(int _nShots) override { nShots = _nShots; } + + /// @brief Clear the number of shots + void clearShots() override { nShots = std::nullopt; } + + /// @brief Return true if the current backend is remote + virtual bool isRemote() override { return !emulate; } + + /// @brief Store the execution context for launching kernel + void setExecutionContext(cudaq::ExecutionContext *context) override { + cudaq::info("OrcaRemoteRESTQPU::setExecutionContext QPU {}", qpu_id); + auto tid = std::hash{}(std::this_thread::get_id()); + contexts.emplace(tid, context); + cudaq::getExecutionManager()->setExecutionContext(contexts[tid]); + } + + /// @brief Overrides resetExecutionContext to forward to the ExecutionManager + void resetExecutionContext() override { + cudaq::info("OrcaRemoteRESTQPU::resetExecutionContext QPU {}", qpu_id); + auto tid = std::hash{}(std::this_thread::get_id()); + cudaq::getExecutionManager()->resetExecutionContext(); + contexts[tid] = nullptr; + contexts.erase(tid); + } + + /// @brief This setTargetBackend override is in charge of reading the + /// specific target backend configuration file. + void setTargetBackend(const std::string &backend) override; + + /// @brief Launch the kernel. Handle all pertinent modifications for the + /// execution context. + void launchKernel(const std::string &kernelName, void (*kernelFunc)(void *), + void *args, std::uint64_t voidStarSize, + std::uint64_t resultOffset, + const std::vector &rawArgs) override; + void launchKernel(const std::string &kernelName, + const std::vector &rawArgs) override { + throw std::runtime_error("launch kernel on raw args not implemented"); + } +}; +} // namespace cudaq diff --git a/runtime/cudaq/platform/orca/OrcaServerHelper.cpp b/runtime/cudaq/platform/orca/OrcaServerHelper.cpp new file mode 100644 index 0000000000..87adff66b9 --- /dev/null +++ b/runtime/cudaq/platform/orca/OrcaServerHelper.cpp @@ -0,0 +1,134 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "OrcaServerHelper.h" +#include "common/Future.h" +#include "common/Logger.h" +#include "common/Registry.h" +#include "orca_qpu.h" + +namespace cudaq { + +// Initialize the ORCA server helper with a given backend configuration +void OrcaServerHelper::initialize(BackendConfig config) { + backendConfig = config; + + // Set the machine + auto iter = backendConfig.find("machine"); + if (iter != backendConfig.end()) + machine = iter->second; + + // Set an alternate base URL if provided + iter = backendConfig.find("url"); + if (iter != backendConfig.end()) { + baseUrl = iter->second; + if (!baseUrl.ends_with("/")) + baseUrl += "/"; + } +} + +// Create a job for the ORCA QPU +ServerJobPayload +OrcaServerHelper::createJob(cudaq::orca::TBIParameters params) { + std::vector jobs; + ServerMessage job; + + // Construct the job message + job["target"] = machine; + + job["input_state"] = params.input_state; + job["loop_lengths"] = params.loop_lengths; + job["bs_angles"] = params.bs_angles; + job["ps_angles"] = params.ps_angles; + job["n_samples"] = params.n_samples; + + jobs.push_back(job); + + // Return a tuple containing the job path, headers, and the job message + return std::make_tuple(baseUrl + "v1/submit", getHeaders(), jobs); +} + +// Process the results from a job +sample_result OrcaServerHelper::processResults(ServerMessage &postJobResponse, + std::string &jobID) { + auto results = postJobResponse.at("results"); + + CountsDictionary counts; + // Process the results + for (const auto &key : results) { + counts[key] += 1; + } + + // Create an execution result + ExecutionResult executionResult(counts); + // Return a sample result + auto ret = sample_result(executionResult); + return ret; +} + +std::map +OrcaServerHelper::generateRequestHeader() const { + std::string token, refreshKey, timeStr; + if (auto auth_token = std::getenv("ORCA_AUTH_TOKEN")) + token = "Bearer " + std::string(auth_token); + else + token = "Bearer "; + + std::map headers{ + {"Authorization", token}, + {"Content-Type", "application/json"}, + {"Connection", "keep-alive"}, + {"Accept", "*/*"}}; + return headers; +} + +// Get the headers for the API requests +RestHeaders OrcaServerHelper::getHeaders() { return generateRequestHeader(); } + +// From a server message, extract the job ID +std::string OrcaServerHelper::extractJobId(ServerMessage &postResponse) { + // If the response does not contain the key 'id', throw an exception + if (!postResponse.contains("job_id")) + throw std::runtime_error("ServerMessage doesn't contain 'job_id' key."); + + // Return the job ID from the response + auto ret = postResponse.at("job_id"); + return ret; +} + +std::string OrcaServerHelper::constructGetJobPath(ServerMessage &postResponse) { + return baseUrl + "v1/get_job/" + extractJobId(postResponse); +} + +std::string OrcaServerHelper::constructGetJobPath(std::string &jobId) { + return baseUrl + "v1/get_job/" + jobId; +} + +bool OrcaServerHelper::jobIsDone(ServerMessage &getJobResponse) { + auto error = getJobResponse["error_message"].is_null(); + auto status = getJobResponse["job_status"].is_null(); + if (error & status) { + return true; + } else if (!status) { + auto job_status = getJobResponse["job_status"].get(); + cudaq::info("job_status {}", job_status); + return false; + } else { + auto error_message = getJobResponse["error_message"].get(); + cudaq::info("error_message {}", error_message); + if (error_message == "Job can't be found") { + return false; + } else { + throw std::runtime_error(error_message); + } + } +} + +} // namespace cudaq + +CUDAQ_REGISTER_TYPE(cudaq::ServerHelper, cudaq::OrcaServerHelper, orca) diff --git a/runtime/cudaq/platform/orca/OrcaServerHelper.h b/runtime/cudaq/platform/orca/OrcaServerHelper.h new file mode 100644 index 0000000000..c3c5837ccb --- /dev/null +++ b/runtime/cudaq/platform/orca/OrcaServerHelper.h @@ -0,0 +1,91 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ +#pragma once + +#include "common/Registry.h" +#include "common/ServerHelper.h" +#include "cudaq/utils/cudaq_utils.h" +#include "orca_qpu.h" + +#include "nlohmann/json.hpp" + +namespace cudaq { + +class OrcaServerHelper : public ServerHelper { + +protected: + /// @brief The base URL + std::string baseUrl = "http://localhost:8080/"; + + /// @brief The machine we are targeting + std::string machine = "PT-1"; + + /// @brief Time string, when the last tokens were retrieved + std::string timeStr = ""; + + /// @brief The refresh token + std::string refreshKey = ""; + + /// @brief ORCA requires the API token be updated every so often, + /// using the provided refresh token. This function will do that. + void refreshTokens(bool force_refresh = false); + + /// @brief Return the headers required for the REST calls + RestHeaders generateRequestHeader() const; + +public: + OrcaServerHelper() = default; + virtual ~OrcaServerHelper() = default; + + /// @brief Return the name of this server helper, must be the + /// same as the QPU configuration file. + const std::string name() const override { return "orca"; } + + /// @brief Return the POST/GET required headers. + /// @return + RestHeaders getHeaders() override; + + /// @brief Set the server configuration. + void initialize(BackendConfig config) override; + + /// @brief Create a job payload for the provided TBI parameters + ServerJobPayload createJob(cudaq::orca::TBIParameters params); + + /// @brief Create a job payload for the provided quantum codes + ServerJobPayload + createJob(std::vector &circuitCodes) override { + std::vector jobs; + ServerMessage job; + jobs.push_back(job); + + std::map headers; + + // Return a tuple containing the job path, headers, and the job message + auto ret = std::make_tuple("", headers, jobs); + return ret; + }; + + /// @brief Return the job id from the previous job post + std::string extractJobId(ServerMessage &postResponse) override; + + /// @brief Return the URL for retrieving job results + std::string constructGetJobPath(ServerMessage &postResponse) override; + std::string constructGetJobPath(std::string &jobId) override; + + /// @brief Return true if the job is done + bool jobIsDone(ServerMessage &getJobResponse) override; + + // /// @brief Given a completed job response, map back to the sample_result + // sample_result processResults(ServerMessage &postJobResponse); + + /// @brief Given a completed job response, map back to the sample_result + sample_result processResults(ServerMessage &postJobResponse, + std::string &jobID) override; +}; + +} // namespace cudaq diff --git a/runtime/cudaq/platform/orca/orca.yml b/runtime/cudaq/platform/orca/orca.yml index 6367600bd3..be9dfffed7 100644 --- a/runtime/cudaq/platform/orca/orca.yml +++ b/runtime/cudaq/platform/orca/orca.yml @@ -17,12 +17,14 @@ config: link-libs: ["-lcudaq-orca-qpu"] # Library mode is only for simulators, physical backends must turn this off library-mode: false + # Allow use of the multi-QPU library + platform-library: mqpu target-arguments: - key: url required: false type: string - platform-arg: url + platform-arg: url help-string: "Specify URL." - key: machine required: false diff --git a/runtime/cudaq/platform/orca/orca_qpu.h b/runtime/cudaq/platform/orca/orca_qpu.h index fe95f6aeb0..643a1faf91 100644 --- a/runtime/cudaq/platform/orca/orca_qpu.h +++ b/runtime/cudaq/platform/orca/orca_qpu.h @@ -5,11 +5,13 @@ * This source code and the accompanying materials are made available under * * the terms of the Apache License 2.0 which accompanies this distribution. * ******************************************************************************/ - #pragma once -#include "cudaq.h" +#include "common/ExecutionContext.h" +#include "common/Future.h" +#include "common/MeasureCounts.h" #include "cudaq/platform/quantum_platform.h" + #include #include @@ -25,14 +27,30 @@ struct TBIParameters { int n_samples; }; +/// @brief Return type for asynchronous sampling. +using async_sample_result = cudaq::async_result; + /// @brief Implementation of the sample method of the cudaq::orca namespace cudaq::sample_result sample(std::vector &input_state, std::vector &loop_lengths, std::vector &bs_angles, std::vector &ps_angles, - int n_samples = 10000); + int n_samples = 10000, std::size_t qpu_id = 0); + cudaq::sample_result sample(std::vector &input_state, std::vector &loop_lengths, std::vector &bs_angles, - int n_samples = 10000); + int n_samples = 10000, std::size_t qpu_id = 0); + +async_sample_result sample_async(std::vector &input_state, + std::vector &loop_lengths, + std::vector &bs_angles, + std::vector &ps_angles, + int n_samples = 10000, std::size_t qpu_id = 0); + +async_sample_result sample_async(std::vector &input_state, + std::vector &loop_lengths, + std::vector &bs_angles, + int n_samples = 10000, std::size_t qpu_id = 0); + }; // namespace cudaq::orca \ No newline at end of file diff --git a/runtime/cudaq/platform/qpu.h b/runtime/cudaq/platform/qpu.h index b01a6144cb..13a6d7da25 100644 --- a/runtime/cudaq/platform/qpu.h +++ b/runtime/cudaq/platform/qpu.h @@ -173,7 +173,8 @@ class QPU : public registry::RegisteredType { /// The raw function pointer is also provided, as are the runtime arguments, /// as a struct-packed void pointer and its corresponding size. virtual void launchKernel(const std::string &name, void (*kernelFunc)(void *), - void *args, std::uint64_t, std::uint64_t) = 0; + void *args, std::uint64_t, std::uint64_t, + const std::vector &rawArgs) = 0; /// Launch the kernel with given name and argument arrays. // This is intended for remote QPUs whereby we need to JIT-compile the kernel diff --git a/runtime/cudaq/platform/quantum_platform.cpp b/runtime/cudaq/platform/quantum_platform.cpp index e31fdcc7f2..00e259c389 100644 --- a/runtime/cudaq/platform/quantum_platform.cpp +++ b/runtime/cudaq/platform/quantum_platform.cpp @@ -81,10 +81,13 @@ void quantum_platform::set_current_qpu(const std::size_t device_id) { throw std::invalid_argument( "QPU device id is not valid (greater than number of available QPUs)."); } - platformCurrentQPU = device_id; - threadToQpuId.emplace( - std::hash{}(std::this_thread::get_id()), device_id); + auto tid = std::hash{}(std::this_thread::get_id()); + auto iter = threadToQpuId.find(tid); + if (iter != threadToQpuId.end()) + iter->second = device_id; + else + threadToQpuId.emplace(tid, device_id); } std::size_t quantum_platform::get_current_qpu() { return platformCurrentQPU; } @@ -151,7 +154,8 @@ quantum_platform::get_remote_capabilities(const std::size_t qpu_id) const { void quantum_platform::launchKernel(std::string kernelName, void (*kernelFunc)(void *), void *args, std::uint64_t voidStarSize, - std::uint64_t resultOffset) { + std::uint64_t resultOffset, + const std::vector &rawArgs) { std::size_t qpu_id = 0; auto tid = std::hash{}(std::this_thread::get_id()); @@ -160,7 +164,8 @@ void quantum_platform::launchKernel(std::string kernelName, qpu_id = iter->second; auto &qpu = platformQPUs[qpu_id]; - qpu->launchKernel(kernelName, kernelFunc, args, voidStarSize, resultOffset); + qpu->launchKernel(kernelName, kernelFunc, args, voidStarSize, resultOffset, + rawArgs); } void quantum_platform::launchKernel(std::string kernelName, @@ -212,7 +217,7 @@ void cudaq::altLaunchKernel(const char *kernelName, void (*kernelFunc)(void *), auto &platform = *cudaq::getQuantumPlatformInternal(); std::string kernName = kernelName; platform.launchKernel(kernName, kernelFunc, kernelArgs, argsSize, - resultOffset); + resultOffset, {}); } void cudaq::streamlinedLaunchKernel(const char *kernelName, @@ -234,5 +239,6 @@ void cudaq::hybridLaunchKernel(const char *kernelName, void (*kernel)(void *), if (platform.is_remote(platform.get_current_qpu())) platform.launchKernel(kernName, rawArgs); else - platform.launchKernel(kernName, kernel, args, argsSize, resultOffset); + platform.launchKernel(kernName, kernel, args, argsSize, resultOffset, + rawArgs); } diff --git a/runtime/cudaq/platform/quantum_platform.h b/runtime/cudaq/platform/quantum_platform.h index 5f972fd7bc..e9598bf051 100644 --- a/runtime/cudaq/platform/quantum_platform.h +++ b/runtime/cudaq/platform/quantum_platform.h @@ -144,7 +144,8 @@ class quantum_platform { // quantum kernels. void launchKernel(std::string kernelName, void (*kernelFunc)(void *), void *args, std::uint64_t voidStarSize, - std::uint64_t resultOffset); + std::uint64_t resultOffset, + const std::vector &rawArgs); void launchKernel(std::string kernelName, const std::vector &); // This method is the hook for executing SerializedCodeExecutionContext diff --git a/runtime/cudaq/qis/execution_manager.h b/runtime/cudaq/qis/execution_manager.h index b77af90757..506bc27f4b 100644 --- a/runtime/cudaq/qis/execution_manager.h +++ b/runtime/cudaq/qis/execution_manager.h @@ -8,6 +8,7 @@ #pragma once +#include "common/CustomOp.h" #include "common/QuditIdTracker.h" #include "cudaq/host_config.h" #include "cudaq/spin_op.h" @@ -61,17 +62,6 @@ class measure_result { using measure_result = bool; #endif -/// @brief Define a `unitary_operation` type that exposes -/// a sub-type specific unitary representation of the -/// operation. -struct unitary_operation { - /// @brief Given a set of rotation parameters, return - /// a row-major 1D array representing the unitary operation - virtual std::vector> unitary( - const std::vector ¶meters = std::vector()) const = 0; - virtual ~unitary_operation() {} -}; - /// The ExecutionManager provides a base class describing a concrete sub-system /// for allocating qudits and executing quantum instructions on those qudits. /// This type is templated on the concrete qudit type (`qubit`, `qmode`, etc). @@ -96,10 +86,6 @@ class ExecutionManager { /// Internal - At qudit deallocation, return the qudit index void returnIndex(std::size_t idx) { tracker.returnIndex(idx); } - /// @brief Keep track of a registry of user-provided unitary operations. - std::unordered_map> - registeredOperations; - public: ExecutionManager() = default; @@ -180,14 +166,13 @@ class ExecutionManager { /// provided operation name. template void registerOperation(const std::string &name) { - auto iter = registeredOperations.find(name); - if (iter != registeredOperations.end()) - return; - registeredOperations.insert({name, std::make_unique()}); + customOpRegistry::getInstance().registerOperation(name); } /// Clear the registered operations - virtual void clearRegisteredOperations() { registeredOperations.clear(); } + virtual void clearRegisteredOperations() { + customOpRegistry::getInstance().clearRegisteredOperations(); + } virtual ~ExecutionManager() = default; }; diff --git a/runtime/cudaq/qis/managers/default/DefaultExecutionManager.cpp b/runtime/cudaq/qis/managers/default/DefaultExecutionManager.cpp index 5bb4b14f7f..f0beb4f525 100644 --- a/runtime/cudaq/qis/managers/default/DefaultExecutionManager.cpp +++ b/runtime/cudaq/qis/managers/default/DefaultExecutionManager.cpp @@ -207,9 +207,11 @@ class DefaultExecutionManager : public cudaq::BasicExecutionManager { simulator()->applyExpPauli(parameters[0], localC, localT, op); }) .Default([&]() { - if (auto iter = registeredOperations.find(gateName); - iter != registeredOperations.end()) { - auto data = iter->second->unitary(parameters); + if (cudaq::customOpRegistry::getInstance().isOperationRegistered( + gateName)) { + const auto &op = + cudaq::customOpRegistry::getInstance().getOperation(gateName); + auto data = op.unitary(parameters); simulator()->applyCustomOperation(data, localC, localT, gateName); return; } diff --git a/runtime/cudaq/qis/managers/photonics/CMakeLists.txt b/runtime/cudaq/qis/managers/photonics/CMakeLists.txt index af7ba7a18a..7e25abe443 100644 --- a/runtime/cudaq/qis/managers/photonics/CMakeLists.txt +++ b/runtime/cudaq/qis/managers/photonics/CMakeLists.txt @@ -9,29 +9,27 @@ set(LIBRARY_NAME cudaq-em-photonics) add_library(${LIBRARY_NAME} SHARED PhotonicsExecutionManager.cpp) - +set_property(GLOBAL APPEND PROPERTY CUDAQ_RUNTIME_LIBS ${LIBRARY_NAME}) target_include_directories(${LIBRARY_NAME} PUBLIC $ - $ - $) + $ + PRIVATE .) set (PHOTONICS_DEPENDENCIES "") -list(APPEND PHOTONICS_DEPENDENCIES cudaq-common libqpp fmt::fmt-header-only) -add_openmp_configurations(${LIBRARY_NAME} PHOTONICS_DEPENDENCIES) +list(APPEND PHOTONICS_DEPENDENCIES nvqir cudaq-common fmt::fmt-header-only LLVMSupport) target_link_libraries(${LIBRARY_NAME} - PUBLIC cudaq-spin + # PUBLIC cudaq-spin PRIVATE ${PHOTONICS_DEPENDENCIES} + ) -install(TARGETS ${LIBRARY_NAME} - EXPORT cudaq-em-photonics-targets - DESTINATION lib) +install(TARGETS ${LIBRARY_NAME} EXPORT cudaq-em-photonics-targets DESTINATION lib) install(EXPORT cudaq-em-photonics-targets FILE CUDAQEmPhotonicsTargets.cmake NAMESPACE cudaq:: DESTINATION lib/cmake/cudaq) -add_target_config(photonics) +# add_target_config(photonics) diff --git a/runtime/cudaq/qis/managers/photonics/PhotonicsExecutionManager.cpp b/runtime/cudaq/qis/managers/photonics/PhotonicsExecutionManager.cpp index ca0cb35e23..eb62896e74 100644 --- a/runtime/cudaq/qis/managers/photonics/PhotonicsExecutionManager.cpp +++ b/runtime/cudaq/qis/managers/photonics/PhotonicsExecutionManager.cpp @@ -6,10 +6,14 @@ * the terms of the Apache License 2.0 which accompanies this distribution. * ******************************************************************************/ #include "common/Logger.h" +// #include "common/PluginUtils.h" #include "cudaq/qis/managers/BasicExecutionManager.h" -#include "cudaq/spin_op.h" +#include "cudaq/qis/qudit.h" +// #include "cudaq/spin_op.h" #include "cudaq/utils/cudaq_utils.h" -#include "qpp.h" +#include "nvqir/photonics/PhotonicCircuitSimulator.h" + +#include "llvm/ADT/StringSwitch.h" #include #include #include @@ -17,6 +21,10 @@ #include #include +namespace nvqir { +PhotonicCircuitSimulator *getPhotonicCircuitSimulatorInternal(); +} + namespace cudaq { struct PhotonicsState : public cudaq::SimulationState { @@ -29,8 +37,9 @@ struct PhotonicsState : public cudaq::SimulationState { PhotonicsState(qpp::ket &&data, std::size_t lvl) : state(std::move(data)), levels(lvl) {} + /// TODO: Rename the API to be generic std::size_t getNumQubits() const override { - throw "not supported for this photonics simulator"; + return (std::log2(state.size()) / std::log2(levels)); } std::complex overlap(const cudaq::SimulationState &other) override { @@ -39,7 +48,11 @@ struct PhotonicsState : public cudaq::SimulationState { std::complex getAmplitude(const std::vector &basisState) override { - /// TODO: Check basisState.size() matches qudit count + if (getNumQubits() != basisState.size()) + throw std::runtime_error(fmt::format( + "[photonics] getAmplitude with an invalid number of bits in the " + "basis state: expected {}, provided {}.", + getNumQubits(), basisState.size())); // Convert the basis state to an index value const std::size_t idx = std::accumulate( @@ -50,27 +63,34 @@ struct PhotonicsState : public cudaq::SimulationState { } Tensor getTensor(std::size_t tensorIdx = 0) const override { - throw "not supported for this photonics simulator"; + if (tensorIdx != 0) + throw std::runtime_error("[photonics] invalid tensor requested."); + return Tensor{ + reinterpret_cast( + const_cast *>(state.data())), + std::vector{static_cast(state.size())}, + getPrecision()}; } - std::vector getTensors() const override { - throw "not supported for this photonics simulator"; - } + /// @brief Return all tensors that represent this state + std::vector getTensors() const override { return {getTensor()}; } - std::size_t getNumTensors() const override { - throw "not supported for this photonics simulator"; - } + /// @brief Return the number of tensors that represent this state. + std::size_t getNumTensors() const override { return 1; } std::complex operator()(std::size_t tensorIdx, const std::vector &indices) override { - throw "not supported for this photonics simulator"; + if (tensorIdx != 0) + throw std::runtime_error("[photonics] invalid tensor requested."); + if (indices.size() != 1) + throw std::runtime_error("[photonics] invalid element extraction."); + return state[indices[0]]; } std::unique_ptr createFromSizeAndPtr(std::size_t size, void *ptr, std::size_t) override { throw "not supported for this photonics simulator"; - ; } void dump(std::ostream &os) const override { os << state << "\n"; } @@ -87,49 +107,102 @@ struct PhotonicsState : public cudaq::SimulationState { /// @brief The `PhotonicsExecutionManager` implements allocation, deallocation, /// and quantum instruction application for the photonics execution manager. -class PhotonicsExecutionManager : public cudaq::BasicExecutionManager { +class PhotonicsExecutionManager : public BasicExecutionManager { private: - /// @brief Current state - qpp::ket state; + nvqir::PhotonicCircuitSimulator *photonic_simulator() { + return nvqir::getPhotonicCircuitSimulatorInternal(); + } + + /// @brief To improve `qudit` allocation, we defer + /// single `qudit` allocation requests until the first + /// encountered `apply` call. + std::vector requestedAllocations; + + /// @brief Allocate all requested `qudits`. + void flushRequestedAllocations() { + if (requestedAllocations.empty()) + return; - /// @brief Instructions are stored in a map - std::unordered_map> - instructions; + allocateQudits(requestedAllocations); + requestedAllocations.clear(); + } - /// @brief Qudits to be sampled - std::vector sampleQudits; + bool isInTracerMode() { + return executionContext && executionContext->name == "tracer"; + } protected: /// @brief Qudit allocation method: a zeroState is first initialized, the /// following ones are added via kron operators void allocateQudit(const cudaq::QuditInfo &q) override { - if (state.size() == 0) { - // qubit will give [1,0], qutrit will give [1,0,0] and so on... - state = qpp::ket::Zero(q.levels); - state(0) = 1.0; - return; - } - - qpp::ket zeroState = qpp::ket::Zero(q.levels); - zeroState(0) = 1.0; - state = qpp::kron(state, zeroState); + requestedAllocations.emplace_back(q.levels, q.id); } /// @brief Allocate a set of `qudits` with a single call. void allocateQudits(const std::vector &qudits) override { - for (auto &q : qudits) - allocateQudit(q); + photonic_simulator()->setLevels(qudits[0].levels); + photonic_simulator()->allocateQudits(qudits.size()); } void initializeState(const std::vector &targets, const void *state, - simulation_precision precision) override { - throw std::runtime_error("initializeState not implemented."); + cudaq::simulation_precision precision) override { + // Here we have qudits in requestedAllocations + // want to allocate and set state. + // There could be previous 'default' allocations whereby we just cached them + // in requestedAllocations. + // These default allocations need to be dispatched separately. + if (!requestedAllocations.empty() && + targets.size() != requestedAllocations.size()) { + assert(targets.size() < requestedAllocations.size()); + // This assumes no qudit reuse, aka the qudits are allocated in order. + // This is consistent with the Kronecker product assumption in + // CircuitSimulator. + for (std::size_t i = 0; i < requestedAllocations.size() - 1; ++i) { + // Verify this assumption to make sure the simulator set + // the state of appropriate qudits. + const auto &thisAlloc = requestedAllocations[i]; + const auto &nextAlloc = requestedAllocations[i + 1]; + if (nextAlloc.id != (thisAlloc.id + 1)) { + std::stringstream errorMsg; + errorMsg << "Out of order allocation detected. This is not supported " + "by simulator backends. Qudit allocations: [ "; + for (const auto &alloc : requestedAllocations) { + errorMsg << alloc.id << " "; + } + errorMsg << "]"; + throw std::logic_error(errorMsg.str()); + } + } + const auto numDefaultAllocs = + requestedAllocations.size() - targets.size(); + photonic_simulator()->allocateQudits(numDefaultAllocs); + // The targets will be allocated in a specific state. + photonic_simulator()->allocateQudits(targets.size(), state, precision); + } else { + photonic_simulator()->allocateQudits(requestedAllocations.size(), state, + precision); + } + requestedAllocations.clear(); } - virtual void initializeState(const std::vector &targets, - const SimulationState *state) override { - throw std::runtime_error("initializeState not implemented."); + void initializeState(const std::vector &targets, + const cudaq::SimulationState *state) override { + // Note: a void* ptr doesn't provide enough info to the simulators, hence + // need a dedicated code path. + // TODO: simplify/combine the two code paths (raw vector and state). + if (!requestedAllocations.empty() && + targets.size() != requestedAllocations.size()) { + assert(targets.size() < requestedAllocations.size()); + const auto numDefaultAllocs = + requestedAllocations.size() - targets.size(); + photonic_simulator()->allocateQudits(numDefaultAllocs); + // The targets will be allocated in a specific state. + photonic_simulator()->allocateQudits(targets.size(), state); + } else { + photonic_simulator()->allocateQudits(requestedAllocations.size(), state); + } + requestedAllocations.clear(); } /// @brief Qudit deallocation method @@ -139,238 +212,107 @@ class PhotonicsExecutionManager : public cudaq::BasicExecutionManager { void deallocateQudits(const std::vector &qudits) override {} /// @brief Handler for when the photonics execution context changes - void handleExecutionContextChanged() override {} + void handleExecutionContextChanged() override { + requestedAllocations.clear(); + photonic_simulator()->setExecutionContext(executionContext); + } /// @brief Handler for when the current execution context has ended. It /// returns samples to the execution context if it is "sample". void handleExecutionContextEnded() override { - if (executionContext) { - std::vector ids; - for (auto &s : sampleQudits) { - ids.push_back(s.id); - } - if (executionContext->name == "sample") { - auto shots = executionContext->shots; - auto sampleResult = - qpp::sample(shots, state, ids, sampleQudits.begin()->levels); - cudaq::ExecutionResult counts; - for (auto [result, count] : sampleResult) { - std::stringstream bitstring; - for (const auto &quditRes : result) { - bitstring << quditRes; - } - // Add to the sample result - // in mid-circ sampling mode this will append 1 bitstring - counts.appendResult(bitstring.str(), count); - // Reset the string. - bitstring.str(""); - bitstring.clear(); - } - executionContext->result.append(counts); - } else if (executionContext->name == "extract-state") { - executionContext->simulationState = - std::make_unique( - std::move(state), sampleQudits.begin()->levels); - } - // Reset the state and qudits - state.resize(0); - sampleQudits.clear(); + if (!requestedAllocations.empty()) { + cudaq::info( + "[PhotonicsExecutionManager] Flushing remaining {} allocations " + "at handleExecutionContextEnded.", + requestedAllocations.size()); + // If there are pending allocations, flush them to the simulator. + // Making sure the simulator's state is consistent with the number of + // allocations even though the circuit might be empty. + photonic_simulator()->allocateQudits(requestedAllocations.size()); + requestedAllocations.clear(); } + photonic_simulator()->resetExecutionContext(); } - /// @brief Method for executing instructions. void executeInstruction(const Instruction &instruction) override { - auto operation = instructions[std::get<0>(instruction)]; - operation(instruction); + flushRequestedAllocations(); + + // Get the data, create the Qudit* targets + auto [gateName, parameters, controls, targets, op] = instruction; + + // Map the Qudits to Qubits + std::vector localT; + std::transform(targets.begin(), targets.end(), std::back_inserter(localT), + [](auto &&el) { return el.id; }); + std::vector localC; + std::transform(controls.begin(), controls.end(), std::back_inserter(localC), + [](auto &&el) { return el.id; }); + + // Apply the gate + llvm::StringSwitch>(gateName) + .Case("create", + [&]() { photonic_simulator()->create(localC, localT[0]); }) + .Case("annihilate", + [&]() { photonic_simulator()->annihilate(localC, localT[0]); }) + .Case("plus", [&]() { photonic_simulator()->plus(localC, localT[0]); }) + .Case("beam_splitter", + [&]() { + photonic_simulator()->beam_splitter(parameters[0], localC, + localT); + }) + .Case("phase_shift", + [&]() { + photonic_simulator()->phase_shift(parameters[0], localC, + localT[0]); + }) + .Default([&]() { + throw std::runtime_error("[PhotonicsExecutionManager] invalid gate " + "application requested " + + gateName + "."); + })(); } - /// @brief Method for performing qudit measurement. int measureQudit(const cudaq::QuditInfo &q, const std::string ®isterName) override { - if (executionContext && executionContext->name == "sample") { - sampleQudits.push_back(q); - return 0; - } - - // If here, then we care about the result bit, so compute it. - const auto measurement_tuple = qpp::measure( - state, qpp::cmat::Identity(q.levels, q.levels), {q.id}, - /*qudit dimension=*/q.levels, /*destructive measmt=*/false); - const auto measurement_result = std::get(measurement_tuple); - const auto &post_meas_states = std::get(measurement_tuple); - const auto &collapsed_state = post_meas_states[measurement_result]; - state = Eigen::Map(collapsed_state.data(), - collapsed_state.size()); - - cudaq::info("Measured qubit {} -> {}", q.id, measurement_result); - return measurement_result; + flushRequestedAllocations(); + return photonic_simulator()->mz(q.id, registerName); } - /// @brief Measure the state in the basis described by the given `spin_op`. - void measureSpinOp(const cudaq::spin_op &) override {} - - /// @brief Method for performing qudit reset. - void resetQudit(const cudaq::QuditInfo &id) override {} - - /// @brief Returns a precomputed factorial for n up tp 30 - double _fast_factorial(int n) { - static std::vector FACTORIAL_TABLE = { - 1., - 1., - 2., - 6., - 24., - 120., - 720., - 5040., - 40320., - 362880., - 3628800., - 39916800., - 479001600., - 6227020800., - 87178291200., - 1307674368000., - 20922789888000., - 355687428096000., - 6402373705728000., - 121645100408832000., - 2432902008176640000., - 51090942171709440000., - 1124000727777607680000., - 25852016738884976640000., - 620448401733239439360000., - 15511210043330985984000000., - 403291461126605635584000000., - 10888869450418352160768000000., - 304888344611713860501504000000., - 8841761993739701954543616000000., - 265252859812191058636308480000000., - }; - if (n > - 30) { // We do not expect to get 30 photons in the loop at the same time - throw std::invalid_argument("received invalid value, n <= 30"); - } - return FACTORIAL_TABLE[n]; + void flushGateQueue() override { + synchronize(); + flushRequestedAllocations(); + photonic_simulator()->flushGateQueue(); } - /// @brief Computes the kronecker delta of two values - int _kron(int a, int b) { - if (a == b) - return 1; - else + int measure(const cudaq::QuditInfo &target, + const std::string registerName = "") override { + if (isInTracerMode()) return 0; - } - /// @brief Computes if two double values are within some absolute and relative - /// tolerance - bool _isclose(double a, double b, double rtol = 1e-08, double atol = 1e-9) { - return std::fabs(a - b) <= (atol + rtol * std::fabs(b)); - } + // We hit a measure, need to exec / clear instruction queue + synchronize(); - /// @brief Computes a single element in the matrix representing a beam - /// splitter gate - double _calc_beamsplitter_elem(int N1, int N2, int n1, int n2, double theta) { - - const double t = cos(theta); // transmission coeffient - const double r = sin(theta); // reflection coeffient - double sum = 0; - for (int k = 0; k <= n1; ++k) { - int l = N1 - k; - if (l >= 0 && l <= n2) { - // int term4 = _kron(N1, k + l); //* kron(N1 + N2, n1 + n2); - - double term1 = pow(r, (n1 - k + l)) * pow(t, (n2 + k - l)); - if (term1 == 0) { - continue; - } - double term2 = pow((-1), (l)) * - (sqrt(_fast_factorial(n1)) * sqrt(_fast_factorial(n2)) * - sqrt(_fast_factorial(N1)) * sqrt(_fast_factorial(N2))); - double term3 = (_fast_factorial(k) * _fast_factorial(n1 - k) * - _fast_factorial(l) * _fast_factorial(n2 - l)); - double term = term1 * term2 / term3; - sum += term; - } else { - continue; - } - } - - return sum; + // Instruction executed, run the measure call + return measureQudit(target, registerName); } - /// @brief Computes matrix representing a beam splitter gate - void beamsplitter(const double theta, qpp::cmat &BS) { - int d = sqrt(BS.rows()); - // """Returns a matrix representing a beam splitter - for (int n1 = 0; n1 < d; ++n1) { - for (int n2 = 0; n2 < d; ++n2) { - int nxx = n1 + n2; - int nxd = std::min(nxx + 1, d); - for (int N1 = 0; N1 < nxd; ++N1) { - int N2 = nxx - N1; - if (N2 >= nxd) { - continue; - } else { - - BS(n1 * d + n2, N1 * d + N2) = - _calc_beamsplitter_elem(N1, N2, n1, n2, theta); - } - } - } - } + /// @brief Measure the state in the basis described by the given `spin_op`. + void measureSpinOp(const cudaq::spin_op &) override { + throw "spin_op operation (cudaq::observe()) is not supported for this " + "photonics simulator"; } public: PhotonicsExecutionManager() { - - instructions.emplace("plusGate", [&](const Instruction &inst) { - auto &[gateName, params, controls, qudits, spin_op] = inst; - auto target = qudits[0]; - int d = target.levels; - qpp::cmat u{qpp::cmat::Zero(d, d)}; - u(0, d - 1) = 1; - for (int i = 1; i < d; i++) { - u(i, i - 1) = 1; - } - cudaq::info("Applying plusGate on {}<{}>", target.id, target.levels); - state = qpp::apply(state, u, {target.id}, target.levels); - }); - - instructions.emplace("beamSplitterGate", [&](const Instruction &inst) { - auto &[gateName, params, controls, qudits, spin_op] = inst; - auto target1 = qudits[0]; - auto target2 = qudits[1]; - size_t d = target1.levels; - const double theta = params[0]; - qpp::cmat BS{qpp::cmat::Zero(d * d, d * d)}; - beamsplitter(theta, BS); - cudaq::info("Applying beamSplitterGate on {}<{}> and {}<{}>", target1.id, - target1.levels, target2.id, target2.levels); - state = qpp::apply(state, BS, {target1.id, target2.id}, d); - }); - - instructions.emplace("phaseShiftGate", [&](const Instruction &inst) { - auto &[gateName, params, controls, qudits, spin_op] = inst; - auto target = qudits[0]; - size_t d = target.levels; - const double phi = params[0]; - qpp::cmat PS{qpp::cmat::Identity(d, d)}; - const std::complex i(0.0, 1.0); - for (size_t n = 0; n < d; n++) { - PS(n, n) = std::exp(n * phi * i); - } - cudaq::info("Applying phaseShiftGate on {}<{}>", target.id, - target.levels); - state = qpp::apply(state, PS, {target.id}, target.levels); - }); + cudaq::info("[PhotonicsExecutionManager] Creating the {} backend.", + photonic_simulator()->name()); } virtual ~PhotonicsExecutionManager() = default; - cudaq::SpinMeasureResult measure(cudaq::spin_op &op) override { - throw "spin_op observation (cudaq::observe()) is not supported for this " - "photonics simulator"; + void resetQudit(const cudaq::QuditInfo &q) override { + flushRequestedAllocations(); + photonic_simulator()->resetQudit(q.id); } }; // PhotonicsExecutionManager diff --git a/runtime/cudaq/qis/managers/photonics/photonics_qis.h b/runtime/cudaq/qis/managers/photonics/photonics_qis.h index ab4985e09b..71cf323333 100644 --- a/runtime/cudaq/qis/managers/photonics/photonics_qis.h +++ b/runtime/cudaq/qis/managers/photonics/photonics_qis.h @@ -10,44 +10,57 @@ #pragma once #include "common/ExecutionContext.h" -#include "cudaq/qis/qarray.h" +// #include "cudaq/qis/qarray.h" #include "cudaq/qis/qvector.h" #include namespace cudaq { + +/// @brief The `create` gate +// U|0> -> |1>, U|1> -> |2>, ..., and U|d> -> |d> +template +void create(qudit &q) { + getExecutionManager()->apply("create", {}, {}, {{q.n_levels(), q.id()}}); +} + +/// @brief The `annihilate` gate +// U|0> -> |0>, U|1> -> |0>, ..., and U|d> -> |d-1> +template +void annihilate(qudit &q) { + getExecutionManager()->apply("annihilate", {}, {}, {{q.n_levels(), q.id()}}); +} + /// @brief The `plus` gate // U|0> -> |1>, U|1> -> |2>, ..., and U|d> -> |0> template -void plus(cudaq::qudit &q) { - cudaq::getExecutionManager()->apply("plusGate", {}, {}, - {{q.n_levels(), q.id()}}); +void plus(qudit &q) { + getExecutionManager()->apply("plus", {}, {}, {{q.n_levels(), q.id()}}); } /// @brief The `phase shift` gate template -void phase_shift(cudaq::qudit &q, const double &phi) { - cudaq::getExecutionManager()->apply("phaseShiftGate", {phi}, {}, - {{q.n_levels(), q.id()}}); +void phase_shift(qudit &q, const double &angle) { + getExecutionManager()->apply("phase_shift", {angle}, {}, + {{q.n_levels(), q.id()}}); } /// @brief The `beam splitter` gate template -void beam_splitter(cudaq::qudit &q, cudaq::qudit &r, - const double &theta) { - cudaq::getExecutionManager()->apply( - "beamSplitterGate", {theta}, {}, +void beam_splitter(qudit &q, qudit &r, const double &angle) { + getExecutionManager()->apply( + "beam_splitter", {angle}, {}, {{q.n_levels(), q.id()}, {r.n_levels(), r.id()}}); } /// @brief Measure a qudit template -int mz(cudaq::qudit &q) { - return cudaq::getExecutionManager()->measure({q.n_levels(), q.id()}); +int mz(qudit &q) { + return getExecutionManager()->measure({q.n_levels(), q.id()}); } /// @brief Measure a vector of qudits template -std::vector mz(cudaq::qvector &q) { +std::vector mz(qvector &q) { std::vector ret; for (auto &qq : q) ret.emplace_back(mz(qq)); diff --git a/runtime/cudaq/qis/qkernel.h b/runtime/cudaq/qis/qkernel.h new file mode 100644 index 0000000000..6f477110fe --- /dev/null +++ b/runtime/cudaq/qis/qkernel.h @@ -0,0 +1,195 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#pragma once + +#include +#include +#include + +#ifdef CUDAQ_LIBRARY_MODE + +namespace cudaq { + +/// In library mode, the quake compiler is not involved. To streamline things, +/// just have `qkernel` alias the std::function template class. +template +using qkernel = std::function; + +} // namespace cudaq + +#else + +namespace cudaq { + +namespace details { +class QKernelDummy; + +template +class QKernelInterface { +public: + virtual ~QKernelInterface() = default; + + virtual R dispatch(As...) = 0; + virtual void *getEntry() = 0; +}; + +template +class QKernelHolder : public QKernelInterface { +public: + using EntryType = + std::conditional_t, R (QKernelDummy::*)(As...), + R (*)(As...)>; + + QKernelHolder() : entry{nullptr}, callable{} {} + QKernelHolder(const QKernelHolder &) = default; + QKernelHolder(QKernelHolder &&) = default; + + explicit QKernelHolder(F &&f) : callable(std::forward(f)) { + if constexpr (std::is_same_v) { + entry = f; + } else { + setEntry(&F::operator()); + } + } + explicit QKernelHolder(const F &f) : callable(f) { + if constexpr (std::is_same_v) { + entry = f; + } else { + setEntry(&F::operator()); + } + } + + QKernelHolder &operator=(const QKernelHolder &) = default; + QKernelHolder &operator=(QKernelHolder &&holder) = default; + + R dispatch(As... as) override { return std::invoke(callable, as...); } + + // Copy the bits of the member function pointer, \p mfp, into the member + // function pointer, `entry`. The kernel launcher will use this to convert + // from host-side to device-side. + template + void setEntry(const MFP &mfp) { + memcpy(&entry, &mfp, sizeof(EntryType)); + } + + // This will provide a hook value for the runtime to determine which kernel + // was captured in the C++ host code. + void *getEntry() override { return static_cast(&entry); } + + // We keep a (member) function pointer (specialized by the callable) in order + // to convert from the host-side to the device-side address space. + EntryType entry; + + // The actual callable to dispatch upon. + F callable; +}; + +} // namespace details + +#if CUDAQ_USE_STD20 +template +using remove_cvref_t = std::remove_cvref_t; +#else +template +using remove_cvref_t = std::remove_cv_t>; +#endif + +/// A `qkernel` must be used to wrap `CUDA-Q` kernels (callables annotated +/// with the `__qpu__` attribute) when those kernels are \e referenced other +/// than by a direct call in code outside of quantum kernels proper. Supports +/// free functions, classes with call operators, and lambdas. +/// +/// The quake compiler can inspect these wrappers in the C++ code and tweak them +/// to provide information necessary and sufficient for the CUDA-Q runtime to +/// either stitch together execution in a simulation environment and/or JIT +/// compile and re-link these kernels into a cohesive quantum circuit. +template +class qkernel; + +template +class qkernel { +public: + qkernel() {} + qkernel(std::nullptr_t) {} + qkernel(const qkernel &) = default; + qkernel(qkernel &&) = default; + + template , qkernel>> + using DecayType = typename std::enable_if_t>::type; + + template , + typename RES = + std::conditional_t, + std::true_type, std::false_type>> + struct CallableType : RES {}; + + template ::value>> + qkernel(S &&f) { + using PS = remove_cvref_t; + if constexpr (std::is_same_v || + std::is_same_v) { + kernelCallable = + std::make_unique>(f); + } else { + kernelCallable = + std::make_unique>(f); + } + } + + R operator()(As... as) const { + return kernelCallable->dispatch(std::forward(as)...); + } + R operator()(As... as) { + return kernelCallable->dispatch(std::forward(as)...); + } + + void **get_entry_kernel_from_holder() const { + return static_cast(kernelCallable->getEntry()); + } + +private: + std::unique_ptr> kernelCallable; +}; + +#if CUDAQ_USE_STD20 +// Deduction guides for C++20. + +template +qkernel(R (*)(As...)) -> qkernel; + +template +struct qkernel_deduction_guide_helper {}; + +template +struct qkernel_deduction_guide_helper { + using type = R(As...); +}; +template +struct qkernel_deduction_guide_helper { + using type = R(As...); +}; +template +struct qkernel_deduction_guide_helper { + using type = R(As...); +}; +template +struct qkernel_deduction_guide_helper { + using type = R(As...); +}; + +template ::type> +qkernel(F) -> qkernel; + +#endif // CUDAQ_USE_STD20 + +} // namespace cudaq + +#endif // CUDAQ_LIBRARY_MODE diff --git a/runtime/cudaq/qis/qubit_qis.h b/runtime/cudaq/qis/qubit_qis.h index 3d5b0b9960..c83dffe844 100644 --- a/runtime/cudaq/qis/qubit_qis.h +++ b/runtime/cudaq/qis/qubit_qis.h @@ -13,6 +13,7 @@ #include "cudaq/qis/modifiers.h" #include "cudaq/qis/pauli_word.h" #include "cudaq/qis/qarray.h" +#include "cudaq/qis/qkernel.h" #include "cudaq/qis/qreg.h" #include "cudaq/qis/qvector.h" #include "cudaq/spin_op.h" @@ -1198,11 +1199,19 @@ void genericApplicator(const std::string &gateName, Args &&...args) { [[maybe_unused]] std::complex i(0, 1.); \ return __VA_ARGS__; \ } \ + static inline const bool registered_ = []() { \ + cudaq::customOpRegistry::getInstance() \ + .registerOperation(#NAME); \ + return true; \ + }(); \ }; \ CUDAQ_MOD_TEMPLATE \ void NAME(Args &&...args) { \ - cudaq::getExecutionManager()->registerOperation( \ - #NAME); \ + /* Perform registration at call site as well in case the static \ + * initialization was not executed in the same context, e.g., remote \ + * execution.*/ \ + cudaq::customOpRegistry::getInstance() \ + .registerOperation(#NAME); \ details::genericApplicator(#NAME, \ std::forward(args)...); \ } \ diff --git a/runtime/cudaq/qis/remote_state.cpp b/runtime/cudaq/qis/remote_state.cpp index 64aa4e0f40..713a462e46 100644 --- a/runtime/cudaq/qis/remote_state.cpp +++ b/runtime/cudaq/qis/remote_state.cpp @@ -184,7 +184,7 @@ RemoteSimulationState::overlap(const cudaq::SimulationState &other) { std::make_pair(static_cast(this), static_cast(&otherState)); platform.set_exec_ctx(&context); - platform.launchKernel(kernelName, nullptr, nullptr, 0, 0); + platform.launchKernel(kernelName, nullptr, nullptr, 0, 0, {}); platform.reset_exec_ctx(); assert(context.overlapResult.has_value()); return context.overlapResult.value(); diff --git a/runtime/cudaq/qis/remote_state.h b/runtime/cudaq/qis/remote_state.h index bcfd2c88cc..878bb098dd 100644 --- a/runtime/cudaq/qis/remote_state.h +++ b/runtime/cudaq/qis/remote_state.h @@ -40,18 +40,28 @@ class RemoteSimulationState : public cudaq::SimulationState { public: template void addArgument(const T &arg) { - if constexpr (std::is_pointer>::value) { - args.push_back(const_cast(static_cast(arg))); - deleters.push_back([](void *ptr) {}); - } else if constexpr (std::is_copy_constructible>::value) { + if constexpr (std::is_pointer_v>) { + if constexpr (std::is_copy_constructible_v< + std::remove_pointer_t>>) { + auto ptr = new std::remove_pointer_t>(*arg); + args.push_back(ptr); + deleters.push_back([](void *ptr) { + delete static_cast> *>(ptr); + }); + } else { + throw std::invalid_argument( + "Unsupported argument type: only pointers to copy-constructible " + "types and copy-constructible types are supported."); + } + } else if constexpr (std::is_copy_constructible_v>) { auto *ptr = new std::decay_t(arg); args.push_back(ptr); deleters.push_back( [](void *ptr) { delete static_cast *>(ptr); }); } else { throw std::invalid_argument( - "Unsupported argument type: only pointers and " - "copy-constructible types are supported."); + "Unsupported argument type: only pointers to copy-constructible " + "types and copy-constructible types are supported."); } } diff --git a/runtime/cudaq/utils/registry.h b/runtime/cudaq/utils/registry.h index 5d2823d6f3..3e339c196b 100644 --- a/runtime/cudaq/utils/registry.h +++ b/runtime/cudaq/utils/registry.h @@ -9,19 +9,36 @@ #pragma once #include -namespace cudaq { -namespace registry { +namespace cudaq::registry { extern "C" { -void deviceCodeHolderAdd(const char *, const char *); +void __cudaq_deviceCodeHolderAdd(const char *, const char *); void cudaqRegisterKernelName(const char *); void cudaqRegisterArgsCreator(const char *, char *); void cudaqRegisterLambdaName(const char *, const char *); + +/// Register a kernel with the runtime for kernel runtime stitching. +void __cudaq_registerLinkableKernel(void *, const char *, void *); + +/// Return the kernel key from a `qkernel` object. If \p p is a `nullptr` this +/// will throw a runtime error. +std::intptr_t __cudaq_getLinkableKernelKey(void *p); + +/// Given a kernel key value, return the name of the kernel. If the kernel is +/// not registered, throws a runtime error. +const char *__cudaq_getLinkableKernelName(std::intptr_t); + +/// Given a kernel key value, return the corresponding device-side kernel +/// function. If the kernel is not registered, throws a runtime error. +void *__cudaq_getLinkableKernelDeviceFunction(std::intptr_t); } -} // namespace registry +/// Given a kernel key value, return the name of the kernel. If the kernel is +/// not registered, runs a `nullptr`. Note this function is not exposed to the +/// compiler API as an `extern C` function. +const char *getLinkableKernelNameOrNull(std::intptr_t); +} // namespace cudaq::registry -namespace __internal__ { +namespace cudaq::__internal__ { /// Is the kernel `kernelName` registered? bool isKernelGenerated(const std::string &kernelName); -} // namespace __internal__ -} // namespace cudaq +} // namespace cudaq::__internal__ diff --git a/runtime/nvqir/CMakeLists.txt b/runtime/nvqir/CMakeLists.txt index 15f0abfe86..e83049919e 100644 --- a/runtime/nvqir/CMakeLists.txt +++ b/runtime/nvqir/CMakeLists.txt @@ -15,6 +15,7 @@ set(INTERFACE_POSITION_INDEPENDENT_CODE ON) set(NVQIR_RUNTIME_SRC QIRTypes.cpp NVQIR.cpp + ./photonics/PhotonicNVQIR.cpp ../cudaq/qis/state.cpp ) diff --git a/runtime/nvqir/CircuitSimulator.h b/runtime/nvqir/CircuitSimulator.h index e07662fa5b..0611294d4a 100644 --- a/runtime/nvqir/CircuitSimulator.h +++ b/runtime/nvqir/CircuitSimulator.h @@ -739,7 +739,9 @@ class CircuitSimulatorBase : public CircuitSimulator { /// model. Unimplemented on the base class, sub-types can implement noise /// modeling. virtual void applyNoiseChannel(const std::string_view gateName, - const std::vector &qubits) {} + const std::vector &controls, + const std::vector &targets, + const std::vector ¶ms) {} /// @brief Flush the gate queue, run all queued gate /// application tasks. @@ -762,11 +764,10 @@ class CircuitSimulatorBase : public CircuitSimulator { throw std::runtime_error("Unknown exception in applyGate"); } if (executionContext && executionContext->noiseModel) { - std::vector noiseQubits{next.controls.begin(), - next.controls.end()}; - noiseQubits.insert(noiseQubits.end(), next.targets.begin(), - next.targets.end()); - applyNoiseChannel(next.operationName, noiseQubits); + std::vector params(next.parameters.begin(), + next.parameters.end()); + applyNoiseChannel(next.operationName, next.controls, next.targets, + params); } gateQueue.pop(); } diff --git a/runtime/nvqir/NVQIR.cpp b/runtime/nvqir/NVQIR.cpp index ca1f19a6d9..45e2a6f642 100644 --- a/runtime/nvqir/NVQIR.cpp +++ b/runtime/nvqir/NVQIR.cpp @@ -37,6 +37,14 @@ thread_local nvqir::CircuitSimulator *simulator; inline static constexpr std::string_view GetCircuitSimulatorSymbol = "getCircuitSimulator"; +// The following maps are used to map Qubits to Results, and Results to boolean +// values. The pointer values may be integers if they are referring to Base +// Profile or Adaptive Profile QIR programs, so it is generally not safe to +// dereference them. +static thread_local std::map measQB2Res; +static thread_local std::map measRes2QB; +static thread_local std::map measRes2Val; + /// @brief Provide a holder for externally created /// CircuitSimulator pointers (like from Python) that /// will invoke clone on the simulator when requested, which @@ -123,7 +131,10 @@ std::vector arrayToVectorSizeT(Array *arr) { for (std::size_t i = 0; i < arr->size(); i++) { auto arrayPtr = (*arr)[i]; Qubit *idxVal = *reinterpret_cast(arrayPtr); - ret.push_back(idxVal->idx); + if (qubitPtrIsIndex) + ret.push_back((intptr_t)idxVal); + else + ret.push_back(idxVal->idx); } return ret; } @@ -202,6 +213,16 @@ extern "C" { void print_i64(const char *msg, std::size_t i) { printf(msg, i); } void print_f64(const char *msg, double f) { printf(msg, f); } +/// @brief Return whether or not the NVQIR runtime is running with dynamic qubit +/// management (qubits are pointers) or not (qubits are integers). +bool __quantum__rt__is_dynamic_qubit_management() { return !qubitPtrIsIndex; } + +/// @brief Set whether or not the NVQIR runtime is running with dynamic qubit +/// management (qubits are pointers) or not (qubits are integers). +void __quantum__rt__set_dynamic_qubit_management(bool isDynamic) { + qubitPtrIsIndex = !isDynamic; +} + /// @brief QIR Initialization function void __quantum__rt__initialize(int argc, int8_t **argv) { if (!initialized) { @@ -478,6 +499,10 @@ void __quantum__qis__phased_rx(double theta, double phi, Qubit *q) { nvqir::getCircuitSimulatorInternal()->applyCustomOperation(matrix, {}, {qI}); } +void __quantum__qis__phased_rx__body(double theta, double phi, Qubit *q) { + __quantum__qis__phased_rx(theta, phi, q); +} + auto u3_matrix = [](double theta, double phi, double lambda) { std::complex i(0, 1.); std::vector> matrix{ @@ -517,12 +542,22 @@ void __quantum__qis__cnot__body(Qubit *q, Qubit *r) { nvqir::getCircuitSimulatorInternal()->x(controls, rI); } +void __quantum__qis__cz__body(Qubit *q, Qubit *r) { + auto qI = qubitToSizeT(q); + auto rI = qubitToSizeT(r); + ScopedTraceWithContext("NVQIR::cz", qI, rI); + std::vector controls{qI}; + nvqir::getCircuitSimulatorInternal()->z(controls, rI); +} + void __quantum__qis__reset(Qubit *q) { auto qI = qubitToSizeT(q); ScopedTraceWithContext("NVQIR::reset", qI); nvqir::getCircuitSimulatorInternal()->resetQubit(qI); } +void __quantum__qis__reset__body(Qubit *q) { __quantum__qis__reset(q); } + Result *__quantum__qis__mz(Qubit *q) { auto qI = qubitToSizeT(q); ScopedTraceWithContext("NVQIR::mz", qI); @@ -530,19 +565,21 @@ Result *__quantum__qis__mz(Qubit *q) { return b ? ResultOne : ResultZero; } -Result *__quantum__qis__mz__body(Qubit *q) { +Result *__quantum__qis__mz__body(Qubit *q, Result *r) { + measQB2Res[q] = r; + measRes2QB[r] = q; auto qI = qubitToSizeT(q); ScopedTraceWithContext("NVQIR::mz", qI); auto b = nvqir::getCircuitSimulatorInternal()->mz(qI, ""); + measRes2Val[r] = b; return b ? ResultOne : ResultZero; } bool __quantum__qis__read_result__body(Result *result) { - // TODO: implement post-measurement result retrieval. This is not needed for - // typical simulator operation (other than to have it defined), but it may be - // useful in the future. - // https://github.com/NVIDIA/cuda-quantum/issues/758 - ScopedTraceWithContext("NVQIR::read_result (stubbed out)"); + ScopedTraceWithContext("NVQIR::read_result"); + auto iter = measRes2Val.find(result); + if (iter != measRes2Val.end()) + return iter->second; return ResultZeroVal; } @@ -567,10 +604,15 @@ void __quantum__qis__exp_pauli(double theta, Array *qubits, char *pauliWord) { return; } -void __quantum__rt__result_record_output(Result *, int8_t *) {} +void __quantum__rt__result_record_output(Result *r, int8_t *name) { + if (name && qubitPtrIsIndex) + __quantum__qis__mz__to__register(measRes2QB[r], + reinterpret_cast(name)); +} void __quantum__qis__custom_unitary(std::complex *unitary, - Array *controls, Array *targets) { + Array *controls, Array *targets, + const char *name) { auto ctrlsVec = arrayToVectorSizeT(controls); auto tgtsVec = arrayToVectorSizeT(targets); auto numQubits = tgtsVec.size(); @@ -580,12 +622,13 @@ void __quantum__qis__custom_unitary(std::complex *unitary, auto numElements = nToPowTwo * nToPowTwo; std::vector> unitaryMatrix(unitary, unitary + numElements); - nvqir::getCircuitSimulatorInternal()->applyCustomOperation(unitaryMatrix, - ctrlsVec, tgtsVec); + nvqir::getCircuitSimulatorInternal()->applyCustomOperation( + unitaryMatrix, ctrlsVec, tgtsVec, name); } void __quantum__qis__custom_unitary__adj(std::complex *unitary, - Array *controls, Array *targets) { + Array *controls, Array *targets, + const char *name) { auto ctrlsVec = arrayToVectorSizeT(controls); auto tgtsVec = arrayToVectorSizeT(targets); @@ -608,8 +651,8 @@ void __quantum__qis__custom_unitary__adj(std::complex *unitary, for (auto const &row : unitaryConj2D) unitaryFlattened.insert(unitaryFlattened.end(), row.begin(), row.end()); - nvqir::getCircuitSimulatorInternal()->applyCustomOperation(unitaryFlattened, - ctrlsVec, tgtsVec); + nvqir::getCircuitSimulatorInternal()->applyCustomOperation( + unitaryFlattened, ctrlsVec, tgtsVec, name); } /// @brief Map an Array pointer containing Paulis to a vector of Paulis. @@ -832,6 +875,14 @@ void __quantum__qis__exp__body(Array *paulis, double angle, Array *qubits) { } } +/// @brief Cleanup an result maps at the end of a QIR program to avoid leaking +/// results into the next program. +void __quantum__rt__clear_result_maps() { + measQB2Res.clear(); + measRes2QB.clear(); + measRes2Val.clear(); +} + /// @brief Utility function used by Quake->QIR to pack a single Qubit pointer /// into an Array pointer. Array *packSingleQubitInArray(Qubit *q) { diff --git a/runtime/nvqir/photonics/PhotonicCircuitSimulator.h b/runtime/nvqir/photonics/PhotonicCircuitSimulator.h new file mode 100644 index 0000000000..504d5523b7 --- /dev/null +++ b/runtime/nvqir/photonics/PhotonicCircuitSimulator.h @@ -0,0 +1,1162 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ +#pragma once + +#include "PhotonicGates.h" +#include "PhotonicState.h" + +#include "common/Logger.h" +#include "common/MeasureCounts.h" +#include "common/Timing.h" +#include "cudaq/host_config.h" + +#include +#include +#include +#include +#include +#include + +namespace nvqir { + +enum class QuditOrdering { lsb, msb }; + +// @brief Collect summary data and print upon simulator termination +struct PSSummaryData { + std::size_t gateCount = 0; + std::size_t controlCount = 0; + std::size_t targetCount = 0; + std::size_t svIO = 0; + std::size_t svFLOPs = 0; + bool enabled = false; + std::string name; + PSSummaryData() { + if (cudaq::isTimingTagEnabled(cudaq::TIMING_GATE_COUNT)) + enabled = true; + } + + /// @brief Update state-vector-based statistics for a logic gate + void svGateUpdate(const std::size_t nControls, const std::size_t nTargets, + const std::size_t stateDimension, + const std::size_t stateVectorSizeBytes) { + assert(nControls <= 63); + if (enabled) { + gateCount++; + controlCount += nControls; + targetCount += nTargets; + // Times 2 because operating on the state vector requires both reading + // and writing. + svIO += (2 * stateVectorSizeBytes) / + (1 << nControls); // TODO: update to use qudits + // For each element of the state vector, 2 complex multiplies and 1 + // complex accumulate is needed. This is reduced if there if this is a + // controlled operation. + // Each complex multiply is 6 real ops. + // So 2 complex multiplies and 1 complex addition is 2*6+2 = 14 ops. + svFLOPs += stateDimension * (14 * nTargets) / + (1 << nControls); // TODO: update to use qudits + } + } + + ~PSSummaryData() { + if (enabled) { + cudaq::log( + "PhotonicCircuitSimulator '{}' Total Program Metrics [tag={}]:", name, + cudaq::TIMING_GATE_COUNT); + cudaq::log("Gate Count = {}", gateCount); + cudaq::log("Control Count = {}", controlCount); + cudaq::log("Target Count = {}", targetCount); + cudaq::log("State Vector I/O (GB) = {:.6f}", + static_cast(svIO) / 1e9); + cudaq::log("State Vector GFLOPs = {:.6f}", + static_cast(svFLOPs) / 1e9); + } + } +}; + +/// @brief The PhotonicCircuitSimulator defines a base class for all photonic +/// simulators that are available to CUDA-Q via the PhotonicNVQIR library. +/// This base class handles qudit allocation and deallocation, +/// execution context handling, and defines all quantum operations pure +/// virtual methods that subtypes must implement. Subtypes should be responsible +/// for evolution of the concrete wave function representation (e.g., +/// state vector), sampling, and measurements. +class PhotonicCircuitSimulator { +protected: + /// @brief The number of levels for the qudits + std::size_t levels; + + /// @brief Flush the current queue of gates, i.e. + /// apply them to the state. Internal and meant for + /// subclasses to implement + virtual void flushGateQueueImpl() = 0; + + /// @brief Statistics collected over the life of the simulator. + PSSummaryData summaryData; + +public: + /// @brief The constructor + PhotonicCircuitSimulator() = default; + /// @brief The destructor + virtual ~PhotonicCircuitSimulator() = default; + + /// @brief Set the number of levels for the qudits + void setLevels(std::size_t newLevels) { levels = newLevels; } + + /// @brief Flush the current queue of gates, i.e. apply them to the state. + void flushGateQueue() { flushGateQueueImpl(); } + + /// @brief Provide an opportunity for any tear-down tasks before MPI Finalize + /// is invoked. Here we leave this unimplemented, it is meant for subclasses. + virtual void tearDownBeforeMPIFinalize() { + // do nothing + } + + /// @brief Provide a mechanism for simulators to create and return a + /// `PhotonicState` instance from a user-specified data set. + virtual std::unique_ptr + createStateFromData(const cudaq::state_data &) = 0; + + /// @brief Set the current noise model to consider when simulating the state. + /// This should be overridden by simulation strategies that support noise + /// modeling. + virtual void setNoiseModel(cudaq::noise_model &noise) = 0; + + virtual void setRandomSeed(std::size_t seed) { + // do nothing + } + + /// @brief Perform any flushing or synchronization to force that all + /// previously applied gates have truly been applied by the underlying + /// simulator. + virtual void synchronize() {} + + /// @brief Compute the expected value of the given spin op with respect to the + /// current state, . + virtual cudaq::observe_result observe(const cudaq::spin_op &term) = 0; + + /// @brief Allocate a single qudit, return the qudit as a logical index + virtual std::size_t allocateQudit() = 0; + + /// @brief Allocate `count` qudits. + virtual std::vector + allocateQudits(std::size_t count, const void *state = nullptr, + cudaq::simulation_precision precision = + cudaq::simulation_precision::fp32) = 0; + + virtual std::vector + allocateQudits(std::size_t count, const cudaq::PhotonicState *state) = 0; + + /// @brief Deallocate the qudit with give unique index + virtual void deallocate(const std::size_t quditIdx) = 0; + + /// @brief Deallocate all the provided qudits. + virtual void deallocateQudits(const std::vector &qudits) = 0; + + /// @brief Reset the current execution context. + virtual void resetExecutionContext() = 0; + + /// @brief Set the execution context + virtual void setExecutionContext(cudaq::ExecutionContext *context) = 0; + + /// @brief Return the current execution context + virtual cudaq::ExecutionContext *getExecutionContext() = 0; + + /// @brief Whether or not this is a state vector simulator + virtual bool isStateVectorSimulator() const { return false; } + + /// @brief Apply a custom operation described by a matrix of data + /// represented as 1-D vector of elements in row-major order, as well + /// as the the control qudit and target indices + virtual void + applyCustomOperation(const std::vector> &matrix, + const std::vector &controls, + const std::vector &targets, + const std::string_view customUnitaryName = "") = 0; + +#define PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT(NAME) \ + void NAME(const std::size_t quditIdx) { \ + std::vector tmp; \ + NAME(tmp, quditIdx); \ + } \ + virtual void NAME(const std::vector &controls, \ + const std::size_t quditIdx) = 0; + +#define PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT_ONE_PARAM(NAME) \ + void NAME(const double angle, const std::size_t quditIdx) { \ + std::vector tmp; \ + NAME(angle, tmp, quditIdx); \ + } \ + virtual void NAME(const double angle, \ + const std::vector &controls, \ + const std::size_t quditIdx) = 0; + +#define PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT_TWO_PARAM(NAME) \ + void NAME(const double angle, const std::vector quditIdxs) { \ + std::vector tmp; \ + NAME(angle, tmp, quditIdxs); \ + } \ + virtual void NAME(const double angle, \ + const std::vector &controls, \ + const std::vector quditIdxs) = 0; + + /// @brief The create gate + PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT(create) + /// @brief The annihilate gate + PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT(annihilate) + /// @brief The plus gate + PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT(plus) + /// @brief The phase_shift gate + PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT_ONE_PARAM(phase_shift) + /// @brief The beam_splitter gate + PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT_TWO_PARAM(beam_splitter) + +// Undef those preprocessor defines. +#undef PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT +#undef PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT_ONE_PARAM +#undef PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT_TWO_PARAM + + /// @brief Measure the qudit with given index + virtual std::size_t mz(const std::size_t quditIdx) = 0; + + /// @brief Measure operation. Here we check what the current execution + /// context is. If the context is sample, then we do nothing but store the + /// measure qudit, which we then use to do full state sampling when + /// flushAnySamplingTask() is called. If the context is sample-conditional, + /// then we have a circuit that contains if (`mz(q)`) and we measure the + /// qudit, collapse the state, and then store the sample qudit for final full + /// state sampling. We also return the digit result. If no execution context, + /// just measure, collapse, and return the digit. + virtual std::size_t mz(const std::size_t quditIdx, + const std::string ®isterName) = 0; + + /// @brief Reset the qudit to the |0> state + virtual void resetQudit(const std::size_t quditIdx) = 0; + + /// @brief Sample the current multi-qudit state on the given qudit indices + /// over a certain number of shots + virtual cudaq::ExecutionResult + sample(const std::vector &quditIdxs, const int shots) = 0; + + /// @brief Return the name of this PhotonicCircuitSimulator + virtual std::string name() const = 0; + + /// @brief Return a thread_local pointer to this PhotonicCircuitSimulator + virtual PhotonicCircuitSimulator *clone() = 0; + + /// Determine the (preferred) precision of the simulator. + virtual bool isSinglePrecision() const = 0; + + bool isDoublePrecision() const { return !isSinglePrecision(); } + +}; // PhotonicCircuitSimulator + +/// @brief The PhotonicCircuitSimulatorBase is the type that is meant to +/// be subclassed for new photonic simulation strategies. The separation of +/// PhotonicCircuitSimulator from PhotonicCircuitSimulatorBase allows simulation +/// sub-types to specify the floating point precision for the simulation +template +class PhotonicCircuitSimulatorBase : public PhotonicCircuitSimulator { + +private: + /// @brief Reference to the current circuit name. + std::string currentCircuitName = ""; + + /// @brief Return true if the simulator is in the tracer mode. + bool isInTracerMode() const { + return executionContext && executionContext->name == "tracer"; + } + +protected: + /// @brief The current Execution Context (typically this is null, sampling). + cudaq::ExecutionContext *executionContext = nullptr; + + /// @brief A tracker for qudit allocation + cudaq::QuditIdTracker tracker; + + /// @brief The number of qudits that have been allocated + std::size_t nQuditsAllocated = 0; + + /// @brief The dimension of the multi-qudit state. + std::size_t stateDimension = 0; + + /// @brief Keep track of the previous state dimension + /// as we grow the state. + std::size_t previousStateDimension = 0; + + /// @brief Vector containing qudit ids that are to be sampled + std::vector sampleQudits; + + /// @brief Map of register name to observed digit results for mid-circuit + /// sampling + std::unordered_map> + midCircuitSampleResults; + + /// @brief Store the last observed register name, this will help us + /// know if we are writing to a classical digit vector + std::string lastMidCircuitRegisterName = ""; + + /// @brief Vector storing register names that are digit vectors + std::vector vectorRegisters; + + /// @brief Under certain execution contexts, we'll deallocate + /// before we are actually done with the execution task, + /// this vector keeps track of qudit ids that are to be + /// deallocated at a later time. + std::vector deferredDeallocation; + + /// @brief Map digit register names to the qudits that make it up + std::unordered_map> + registerNameToMeasuredQudit; + + /// @brief Keep track of the current number of qudits in batch mode + std::size_t batchModeCurrentNumQudits = 0; + + /// @brief Environment variable name that allows a programmer to specify how + /// expectation values should be computed. This defaults to true. + static constexpr const char observeSamplingEnvVar[] = + "CUDAQ_OBSERVE_FROM_SAMPLING"; + + /// @brief A GateApplicationTask consists of a matrix describing the quantum + /// operation, a set of possible control qudit indices, and a set of target + /// indices. + struct GateApplicationTask { + const std::string operationName; + const std::vector> matrix; + const std::vector controls; + const std::vector targets; + const std::vector parameters; + GateApplicationTask(const std::string &name, + const std::vector> &m, + const std::vector &c, + const std::vector &t, + const std::vector ¶ms) + : operationName(name), matrix(m), controls(c), targets(t), + parameters(params) {} + }; + + /// @brief The current queue of operations to execute + std::queue gateQueue; + + /// @brief Get the name of the current circuit being executed. + std::string getCircuitName() const { return currentCircuitName; } + + /// @brief Return the current multi-qudit state dimension + virtual std::size_t calculateStateDim(const std::size_t numQudits) { + assert(numQudits < 30); + cudaq::info("[calculateStateDim] levels {}, numQudits {}.", levels, + numQudits); + return std::pow(levels, numQudits); + } + + /// @brief Grow the state vector by one qudit. + virtual void addQuditToState() = 0; + + /// @brief Reset the qudit state. + virtual void deallocateStateImpl() = 0; + + /// @brief Reset the qudit state back to dim = 0. + void deallocateState() { + deallocateStateImpl(); + nQuditsAllocated = 0; + stateDimension = 0; + } + /// @brief Measure the qudit and return the result. Collapse the state vector. + virtual int measureQudit(const std::size_t quditIdx) = 0; + + /// @brief Return true if this PhotonicCircuitSimulator can + /// handle instead of PhotonicNVQIR applying measure + /// basis quantum gates to change to the Z basis and sample. + virtual bool canHandleObserve() { return false; } + + /// @brief Return the internal state representation. This + /// is meant for subtypes to override + virtual std::unique_ptr getSimulationState() { + throw std::runtime_error( + "Simulation data not available for this simulator backend."); + } + + /// @brief Handle basic sampling tasks by storing the qudit index for + /// processing in resetExecutionContext. Return true to indicate this is + /// sampling and to exit early. False otherwise. + bool handleBasicSampling(const std::size_t quditIdx, + const std::string ®Name) { + if (executionContext && executionContext->name == "sample" && + !executionContext->hasConditionalsOnMeasureResults) { + // Add the qudit to the sampling list + sampleQudits.push_back(quditIdx); + + auto processForRegName = [&](const std::string ®Str) { + // Insert the sample qudit into the register name map + auto iter = registerNameToMeasuredQudit.find(regStr); + if (iter == registerNameToMeasuredQudit.end()) + registerNameToMeasuredQudit.emplace( + regStr, std::vector{quditIdx}); + else if (std::find(iter->second.begin(), iter->second.end(), + quditIdx) == iter->second.end()) + iter->second.push_back(quditIdx); + }; + + // Insert into global register and named register (if it exists) + processForRegName(cudaq::GlobalRegisterName); + if (!regName.empty()) + processForRegName(regName); + + return true; + } + + return false; + } + + /// @brief This function handles sampling in the presence of conditional + /// statements on qudit measurement results. Specifically, it will keep + /// track of a classical register for all measures encountered in the + /// program and store mid-circuit measures in the corresponding register. + void handleSamplingWithConditionals(const std::size_t quditIdx, + const std::string digitResult, + const std::string ®isterName) { + // We still care about what qudit we are measuring if in the + // sample-conditional context + if (executionContext && executionContext->name == "sample" && + executionContext->hasConditionalsOnMeasureResults) { + std::string mutableRegisterName = registerName; + + // If no registerName, we'll just sample normally + if (registerName.empty()) { + // Either this is library mode and we have register names attached + // to the execution context + if (midCircuitSampleResults.size() < + executionContext->registerNames.size()) { + mutableRegisterName = + executionContext->registerNames[midCircuitSampleResults.size()]; + } else { + // or no register names, in which case we'll just treat it as + // a regular sampled qudit and drop out + sampleQudits.push_back(quditIdx); + return; + } + } + + cudaq::info("Handling Sampling With Conditionals: {}, {}, {}", quditIdx, + digitResult, mutableRegisterName); + // See if we've observed this register before, if not start a vector of + // digit results, if we have, add the digit result to the existing vector + auto iter = midCircuitSampleResults.find(mutableRegisterName); + if (iter == midCircuitSampleResults.end()) + midCircuitSampleResults.emplace(mutableRegisterName, + std::vector{digitResult}); + else + iter->second.push_back(digitResult); + + // If this register is the same as last time, then we are + // writing to a digit vector register (auto var = mz(qreg)) + if (lastMidCircuitRegisterName == mutableRegisterName) + vectorRegisters.push_back(mutableRegisterName); + + // Store the last register name + lastMidCircuitRegisterName = mutableRegisterName; + } + } + + /// @brief Utility function that returns a string-view of the current quantum + /// instruction, intended for logging purposes. + std::string gateToString(const std::string_view gateName, + const std::vector &controls, + const std::vector ¶meters, + const std::vector &targets) { + std::string angleStr = ""; + if (!parameters.empty()) { + angleStr = std::to_string(parameters[0]); + for (std::size_t i = 1; i < parameters.size(); i++) + angleStr += ", " + std::to_string(parameters[i]); + angleStr += ", "; + } + + std::stringstream qudits, ret; + if (!controls.empty()) { + qudits << controls[0]; + for (size_t i = 1; i < controls.size(); i++) { + qudits << ", " << controls[i]; + } + qudits << ", " << targets[0]; + for (size_t i = 1; i < targets.size(); i++) { + qudits << ", " << targets[i]; + } + ret << "(apply) ctrl-" << gateName << "(" << angleStr << qudits.str() + << ")"; + } else { + qudits << targets[0]; + for (size_t i = 1; i < targets.size(); i++) { + qudits << ", " << targets[i]; + } + ret << "(apply) " << gateName << "(" << angleStr << qudits.str() << ")"; + } + return ret.str(); + } + + /// @brief Return true if the current execution is in batch mode + bool isInBatchMode() { + if (!executionContext) + return false; + + if (executionContext->totalIterations == 0) + return false; + + return true; + } + + /// @brief Return true if the current execution is the last execution of batch + /// mode. + bool isLastBatch() { + return executionContext && executionContext->batchIteration > 0 && + executionContext->batchIteration == + executionContext->totalIterations - 1; + } + + /// @brief Override the default sized allocation of qudits + /// here to be a qudit more efficient than the default implementation + virtual void addQuditsToState(std::size_t count, + const void *state = nullptr) { + if (state != nullptr) + throw std::runtime_error("State initialization must be handled by " + "subclasses, override addQuditsToState."); + for (std::size_t i = 0; i < count; i++) + addQuditToState(); + } + + virtual void addQuditsToState(const cudaq::PhotonicState &state) { + throw std::runtime_error("State initialization must be handled by " + "subclasses, override addQuditsToState."); + } + + /// @brief Execute a sampling task with the current set of sample qudits. + void flushAnySamplingTasks(bool force = false) { + if (sampleQudits.empty()) + return; + + if (executionContext->hasConditionalsOnMeasureResults && !force) + return; + + // Sort the qudit indices + std::sort(sampleQudits.begin(), sampleQudits.end()); + auto last = std::unique(sampleQudits.begin(), sampleQudits.end()); + sampleQudits.erase(last, sampleQudits.end()); + + cudaq::info("Sampling the current state, with measure qudits = {}", + sampleQudits); + + // Ask the subtype to sample the current state + auto execResult = + sample(sampleQudits, executionContext->hasConditionalsOnMeasureResults + ? 1 + : executionContext->shots); + + if (registerNameToMeasuredQudit.empty()) { + executionContext->result.append(execResult); + } else { + + for (auto &[regName, qudits] : registerNameToMeasuredQudit) { + // Measurements are sorted according to qudit allocation order + std::sort(qudits.begin(), qudits.end()); + auto last = std::unique(qudits.begin(), qudits.end()); + qudits.erase(last, qudits.end()); + + // Find the position of the qudits we have in the result digit string + // Create a map of qudit to digit string location + std::unordered_map quditLocMap; + for (std::size_t i = 0; i < qudits.size(); i++) { + auto iter = + std::find(sampleQudits.begin(), sampleQudits.end(), qudits[i]); + auto idx = std::distance(sampleQudits.begin(), iter); + quditLocMap.insert({qudits[i], idx}); + } + + cudaq::ExecutionResult tmp(regName); + for (auto &[digits, count] : execResult.counts) { + std::string b = ""; + for (auto &qb : qudits) + b += digits[quditLocMap[qb]]; + tmp.appendResult(b, count); + } + + executionContext->result.append(tmp); + } + } + deallocateQudits(sampleQudits); + sampleQudits.clear(); + registerNameToMeasuredQudit.clear(); + } + + /// @brief Add a new gate application task to the queue + void enqueueGate(const std::string name, + const std::vector> &matrix, + const std::vector &controls, + const std::vector &targets, + const std::vector ¶ms) { + if (isInTracerMode()) { + std::vector controlsInfo, targetsInfo; + for (auto &c : controls) + controlsInfo.emplace_back(levels, c); + for (auto &t : targets) + targetsInfo.emplace_back(levels, t); + + std::vector anglesProcessed; + if constexpr (std::is_same_v) + anglesProcessed = params; + else { + for (auto &a : params) + anglesProcessed.push_back(static_cast(a)); + } + + executionContext->kernelTrace.appendInstruction( + name, anglesProcessed, controlsInfo, targetsInfo); + return; + } + + gateQueue.emplace(name, matrix, controls, targets, params); + } + + /// @brief This pure method is meant for subtypes to implement, and its goal + /// is to apply the gate described by the GateApplicationTask to the + /// subtype-specific state data representation. + virtual void applyGate(const GateApplicationTask &task) = 0; + + /// @brief Provide a base-class method that can be invoked after every gate + /// application and will apply any noise channels after the gate invocation + /// based on a user-provided noise model. Unimplemented on the base class, + /// sub-types can implement noise modeling. + virtual void applyNoiseChannel(const std::string_view gateName, + const std::vector &qudits) {} + + /// @brief Flush the gate queue, run all queued gate application tasks. + void flushGateQueueImpl() override { + while (!gateQueue.empty()) { + auto &next = gateQueue.front(); + if (isStateVectorSimulator() && summaryData.enabled) + summaryData.svGateUpdate( + next.controls.size(), next.targets.size(), stateDimension, + stateDimension * sizeof(std::complex)); + try { + applyGate(next); + } catch (std::exception &e) { + while (!gateQueue.empty()) + gateQueue.pop(); + throw e; + } catch (...) { + while (!gateQueue.empty()) + gateQueue.pop(); + throw std::runtime_error("Unknown exception in applyGate"); + } + gateQueue.pop(); + } + // // For CUDA-based simulators, this calls cudaDeviceSynchronize() + synchronize(); + } + + /// @brief Set the current state to the |0> state, + /// retaining the current number of qudits. + virtual void setToZeroState() = 0; + + /// @brief Return true if expectation values should be computed from + /// sampling + parity of digit strings. + /// Default is to enable observe from sampling, i.e., simulating the + /// change-of-basis circuit for each term. + /// + /// The environment variable "CUDAQ_OBSERVE_FROM_SAMPLING" can be used to + /// turn on or off this setting. + bool shouldObserveFromSampling(bool defaultConfig = true) { + if (auto envVar = std::getenv(observeSamplingEnvVar); envVar) { + std::string asString = envVar; + std::transform(asString.begin(), asString.end(), asString.begin(), + [](auto c) { return std::tolower(c); }); + if (asString == "false" || asString == "off" || asString == "0") + return false; + if (asString == "true" || asString == "on" || asString == "1") + return true; + } + + return defaultConfig; + } + + bool isSinglePrecision() const override { + return std::is_same_v; + } + + /// @brief Return this simulator's qudit ordering. + QuditOrdering getQuditOrdering() const { return QuditOrdering::lsb; } + +public: + /// @brief The constructor + PhotonicCircuitSimulatorBase() = default; + + /// @brief The destructor + virtual ~PhotonicCircuitSimulatorBase() = default; + + /// @brief Create a simulation-specific PhotonicState instance from a + /// user-provided data set. + std::unique_ptr + createStateFromData(const cudaq::state_data &data) override { + return getSimulationState()->createPSFromData(data); + } + + /// @brief Set the current noise model to consider when + /// simulating the state. This should be overridden by + /// simulation strategies that support noise modeling. + void setNoiseModel(cudaq::noise_model &noise) override { + // Fixme consider this as a warning instead of a hard error + throw std::runtime_error( + "The current backend does not support noise modeling."); + } + + /// @brief Compute the expected value of the given spin op + /// with respect to the current state, . + cudaq::observe_result observe(const cudaq::spin_op &term) override { + throw std::runtime_error("This PhotonicCircuitSimulator does not implement " + "observe(const cudaq::spin_op &)."); + } + + /// @brief Allocate a single qudit, return the qudit as a logical index + std::size_t allocateQudit() override { + // Get a new qudit index + auto newIdx = tracker.getNextIndex(); + + if (isInBatchMode()) { + batchModeCurrentNumQudits++; + // In batch mode, we might already have an allocated state that + // has been set to |0..0>. We can reuse it as is, if the next qudit + // index is smaller than number of qudits of this allocated state. + if (newIdx < nQuditsAllocated) + return newIdx; + } + + cudaq::info("Allocating new qudit with idx {} (nQ={}, dim={})", newIdx, + nQuditsAllocated, stateDimension); + + // Increment the number of qudits and set + // the new state dimension + previousStateDimension = stateDimension; + nQuditsAllocated++; + stateDimension = calculateStateDim(nQuditsAllocated); + + if (!isInTracerMode()) + // Tell the subtype to grow the state representation + addQuditToState(); + + // May be that the state grows enough that we want to handle observation via + // sampling + if (executionContext) + executionContext->canHandleObserve = canHandleObserve(); + + // return the new qudit index + return newIdx; + } + + /// @brief Allocate `count` qudits in a specific state. + std::vector + allocateQudits(std::size_t count, const void *state = nullptr, + cudaq::simulation_precision precision = + cudaq::simulation_precision::fp32) override { + + // Make sure if someone gives us state data, that the precision + // is correct for this simulation. + if (state != nullptr) { + if constexpr (std::is_same_v) { + if (precision == cudaq::simulation_precision::fp64) + throw std::runtime_error( + "Invalid user-provided state data. Simulator " + "is FP32 but state data is FP64."); + } else { + if (precision == cudaq::simulation_precision::fp32) + throw std::runtime_error( + "Invalid user-provided state data. Simulator " + "is FP64 but state data is FP32."); + } + } + + std::vector qudits; + for (std::size_t i = 0; i < count; i++) + qudits.emplace_back(tracker.getNextIndex()); + + if (isInBatchMode()) { + // Store the current number of qudits requested + batchModeCurrentNumQudits += count; + + // We have an allocated state, it has been set to |0>, + // we want to reuse it as is. If the state needs to grow, then + // we will ask the subtype to add more qudits. + if (qudits.back() < nQuditsAllocated) + count = 0; + else + count = qudits.back() + 1 - nQuditsAllocated; + } + + cudaq::info("Allocating {} new qudits.", count); + + previousStateDimension = stateDimension; + nQuditsAllocated += count; + stateDimension = calculateStateDim(nQuditsAllocated); + + if (!isInTracerMode()) + // Tell the subtype to allocate more qudits + addQuditsToState(count, state); + + // May be that the state grows enough that we + // want to handle observation via sampling + if (executionContext) + executionContext->canHandleObserve = canHandleObserve(); + + return qudits; + } + + /// @brief Allocate `count` qudits in a specific state. + std::vector + allocateQudits(std::size_t count, + const cudaq::PhotonicState *state) override { + if (!state) + return allocateQudits(count); + + if (!isInTracerMode() && count != state->getNumQudits()) + throw std::invalid_argument("Dimension mismatch: the input state doesn't " + "match the number of qudits"); + + std::vector qudits; + for (std::size_t i = 0; i < count; i++) + qudits.emplace_back(tracker.getNextIndex()); + + if (isInBatchMode()) { + // Store the current number of qudits requested + batchModeCurrentNumQudits += count; + + // We have an allocated state, it has been set to |0>, + // we want to reuse it as is. If the state needs to grow, then + // we will ask the subtype to add more qudits. + if (qudits.back() < nQuditsAllocated) + count = 0; + else + count = qudits.back() + 1 - nQuditsAllocated; + } + + cudaq::info("Allocating {} new qudits.", count); + + previousStateDimension = stateDimension; + nQuditsAllocated += count; + stateDimension = calculateStateDim(nQuditsAllocated); + + if (!isInTracerMode()) + // Tell the subtype to allocate more qudits + addQuditsToState(*state); + + // May be that the state grows enough that we + // want to handle observation via sampling + if (executionContext) + executionContext->canHandleObserve = canHandleObserve(); + + return qudits; + } + + /// @brief Deallocate the qudit with give index + void deallocate(const std::size_t quditIdx) override { + if (executionContext && executionContext->name != "tracer") { + cudaq::info("Deferring qudit {} deallocation", quditIdx); + deferredDeallocation.push_back(quditIdx); + return; + } + + cudaq::info("Deallocating qudit {}", quditIdx); + + // Reset the qudit + if (!isInTracerMode()) + resetQudit(quditIdx); + + // Return the index to the tracker + tracker.returnIndex(quditIdx); + --nQuditsAllocated; + + // Reset the state if we've deallocated all qudits. + if (tracker.allDeallocated()) { + cudaq::info("Deallocated all qudits, reseting state vector."); + // all qudits deallocated, + deallocateState(); + while (!gateQueue.empty()) + gateQueue.pop(); + } + } + + /// @brief Deallocate all requested qudits. If the number of qudits + /// is equal to the number of allocated qudits, then clear the entire + /// state at once. + void deallocateQudits(const std::vector &qudits) override { + // Do nothing if there are no allocated qudits. + if (nQuditsAllocated == 0) + return; + + if (executionContext) { + for (auto &quditIdx : qudits) { + cudaq::info("Deferring qudit {} deallocation", quditIdx); + deferredDeallocation.push_back(quditIdx); + } + return; + } + + if (qudits.size() == tracker.numAllocated()) { + cudaq::info("Deallocate all qudits."); + deallocateState(); + for (auto &q : qudits) + tracker.returnIndex(q); + return; + } + + for (auto &q : qudits) + deallocate(q); + } + + /// @brief Reset the current execution context. + void resetExecutionContext() override { + // If null, do nothing + if (!executionContext) + return; + + // Get the ExecutionContext name + auto execContextName = executionContext->name; + + // If we are sampling... + if (execContextName.find("sample") != std::string::npos) { + // Sample the state over the specified number of shots + if (sampleQudits.empty()) { + if (isInBatchMode()) + sampleQudits.resize(batchModeCurrentNumQudits); + else + sampleQudits.resize(nQuditsAllocated); + std::iota(sampleQudits.begin(), sampleQudits.end(), 0); + } + + // Flush the queue if there are any gates to apply + flushGateQueue(); + + // Flush any queued up sampling tasks + flushAnySamplingTasks(/*force this*/ true); + + // Handle the processing for any mid circuit measurements + for (auto &m : midCircuitSampleResults) { + // Get the register name and the vector of digit results + auto regName = m.first; + auto digitResults = m.second; + cudaq::ExecutionResult counts(regName); + + if (std::find(vectorRegisters.begin(), vectorRegisters.end(), + regName) != vectorRegisters.end()) { + // this is a vector register + std::string digitStr = ""; + for (std::size_t j = 0; j < digitResults.size(); j++) + digitStr += digitResults[j]; + + counts.appendResult(digitStr, 1); + + } else { + // Not a vector, collate all digits into a 1 qudit counts dict + for (std::size_t j = 0; j < digitResults.size(); j++) { + counts.appendResult(digitResults[j], 1); + } + } + executionContext->result.append(counts); + } + + // Reorder the global register (if necessary). This might be necessary + // if the mapping pass had run and we want to undo the shuffle that + // occurred during mapping. + if (!executionContext->reorderIdx.empty()) { + executionContext->result.reorder(executionContext->reorderIdx); + executionContext->reorderIdx.clear(); + } + + // Clear the sample qudits for the next run + sampleQudits.clear(); + midCircuitSampleResults.clear(); + lastMidCircuitRegisterName = ""; + currentCircuitName = ""; + } + + // Set the state data if requested. + if (executionContext->name == "extract-state") { + if (sampleQudits.empty()) { + if (isInBatchMode()) + sampleQudits.resize(batchModeCurrentNumQudits); + else + sampleQudits.resize(nQuditsAllocated); + std::iota(sampleQudits.begin(), sampleQudits.end(), 0); + } + flushGateQueue(); + executionContext->simulationState = getSimulationState(); + + deallocateQudits(sampleQudits); + sampleQudits.clear(); + } + + // Deallocate the deferred qudits, but do so + // without explicit qudit reset. + for (auto &deferred : deferredDeallocation) + tracker.returnIndex(deferred); + + bool shouldSetToZero = isInBatchMode() && !isLastBatch(); + executionContext = nullptr; + + // Reset the state if we've deallocated all qudits. + if (tracker.allDeallocated()) { + if (shouldSetToZero) { + cudaq::info("In batch mode currently, reset state to |0>"); + // Do not deallocate the state, but reset it to |0> + setToZeroState(); + } else { + cudaq::info("Deallocated all qudits, reseting state vector."); + // all qudits deallocated, + deallocateState(); + } + } + + batchModeCurrentNumQudits = 0; + deferredDeallocation.clear(); + } + + /// @brief Set the execution context + void setExecutionContext(cudaq::ExecutionContext *context) override { + executionContext = context; + executionContext->canHandleObserve = canHandleObserve(); + currentCircuitName = context->kernelName; + cudaq::info("Setting current circuit name to {}", currentCircuitName); + } + + /// @brief Return the current execution context + cudaq::ExecutionContext *getExecutionContext() override { + return executionContext; + } + + /// @brief Apply a custom quantum operation + void applyCustomOperation(const std::vector> &matrix, + const std::vector &controls, + const std::vector &targets, + const std::string_view customName) override { + throw std::runtime_error("This PhotonicCircuitSimulator does not implement " + "applyCustomOperation."); + } + + template + void enqueueQuantumOperation(const std::vector &angles, + const std::vector &controls, + const std::vector &targets) { + flushAnySamplingTasks(); + QuantumOperation gate; + // This is a very hot section of code. Don't form the log string unless + // we're actually going to use it. + if (cudaq::details::should_log(cudaq::details::LogLevel::info)) + cudaq::info(gateToString(gate.name(), controls, angles, targets)); + enqueueGate(gate.name(), gate.getGate(levels, angles), controls, targets, + angles); + } + +#define PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT(NAME) \ + using PhotonicCircuitSimulator::NAME; \ + void NAME(const std::vector &controls, \ + const std::size_t quditIdx) override { \ + enqueueQuantumOperation>( \ + {}, controls, std::vector{quditIdx}); \ + } + +#define PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT_ONE_PARAM(NAME) \ + using PhotonicCircuitSimulator::NAME; \ + void NAME(const double angle, const std::vector &controls, \ + const std::size_t quditIdx) override { \ + enqueueQuantumOperation>( \ + {static_cast(angle)}, controls, \ + std::vector{quditIdx}); \ + } + +#define PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT_TWO_PARAM(NAME) \ + using PhotonicCircuitSimulator::NAME; \ + void NAME(const double angle, const std::vector &controls, \ + const std::vector quditsIdxs) override { \ + enqueueQuantumOperation>( \ + {static_cast(angle)}, controls, quditsIdxs); \ + } + /// @brief The create gate + PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT(create) + /// @brief The annihilate gate + PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT(annihilate) + /// @brief The plus gate + PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT(plus) + /// @brief The phase_shift gate + PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT_ONE_PARAM(phase_shift) + /// @brief The beam_splitter gate + PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT_TWO_PARAM(beam_splitter) + +// Undef those preprocessor defines. +#undef PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT +#undef PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT_ONE_PARAM +#undef PHOTONIC_CIRCUIT_SIMULATOR_ONE_QUDIT_TWO_PARAM + + std::size_t mz(const std::size_t quditIdx) override { + return mz(quditIdx, ""); + } + + /// @brief Measure operation. Here we check what the current execution + /// context is. If the context is sample, then we do nothing but store the + /// measure qudit, which we then use to do full state sampling when + /// flushAnySamplingTask() is called. If the context is sample-conditional, + /// then we have a circuit that contains if (`mz(q)`) and we measure the + /// qudit, collapse the state, and then store the sample qudit for final + /// full state sampling. We also return the digit result. If no execution + /// context, just measure, collapse, and return the digit. + std::size_t mz(const std::size_t quditIdx, + const std::string ®isterName) override { + // Flush the Gate Queue + flushGateQueue(); + + // If sampling, just store the digit, do nothing else. + if (handleBasicSampling(quditIdx, registerName)) + return true; + + if (isInTracerMode()) + return true; + + // Get the actual measurement from the subtype measureQudit implementation + auto measureResult = measureQudit(quditIdx); + auto digitResult = std::to_string(measureResult); + // If this CUDA-Q kernel has conditional statements on measure results + // then we want to handle the sampling a digit differently. + handleSamplingWithConditionals(quditIdx, digitResult, registerName); + + // Return the result + return measureResult; + } + +}; // PhotonicCircuitSimulatorBase + +} // namespace nvqir + +#define CONCAT(a, b) CONCAT_INNER(a, b) +#define CONCAT_INNER(a, b) a##b +#define NVQIR_REGISTER_PHOTONIC_SIMULATOR(CLASSNAME, PRINTED_NAME) \ + extern "C" { \ + nvqir::PhotonicCircuitSimulator *getPhotonicCircuitSimulator() { \ + thread_local static std::unique_ptr \ + photonic_simulator = std::make_unique(); \ + return photonic_simulator.get(); \ + } \ + nvqir::PhotonicCircuitSimulator *CONCAT(getPhotonicCircuitSimulator_, \ + PRINTED_NAME)() { \ + thread_local static std::unique_ptr \ + photonic_simulator = std::make_unique(); \ + return photonic_simulator.get(); \ + } \ + } + +#define NVQIR_PHOTONIC_SIMULATOR_CLONE_IMPL(CLASSNAME) \ + nvqir::PhotonicCircuitSimulator *clone() override { \ + thread_local static std::unique_ptr \ + photonic_simulator = std::make_unique(); \ + return photonic_simulator.get(); \ + } diff --git a/runtime/nvqir/photonics/PhotonicGates.h b/runtime/nvqir/photonics/PhotonicGates.h new file mode 100644 index 0000000000..55b301028c --- /dev/null +++ b/runtime/nvqir/photonics/PhotonicGates.h @@ -0,0 +1,241 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#pragma once +#include "nvqir/Gates.h" + +#include + +namespace nvqir { + +/// @brief Enumeration of supported CUDA-Q operations +enum class PhotonicGateName { + CreateGate, + AnnihilateGate, + PlusGate, + BeamSplitterGate, + PhaseShiftGate, +}; + +/// @brief Returns a precomputed factorial for n up to 30 +double _fast_factorial(int n) { + static std::vector FACTORIAL_TABLE = { + 1., + 1., + 2., + 6., + 24., + 120., + 720., + 5040., + 40320., + 362880., + 3628800., + 39916800., + 479001600., + 6227020800., + 87178291200., + 1307674368000., + 20922789888000., + 355687428096000., + 6402373705728000., + 121645100408832000., + 2432902008176640000., + 51090942171709440000., + 1124000727777607680000., + 25852016738884976640000., + 620448401733239439360000., + 15511210043330985984000000., + 403291461126605635584000000., + 10888869450418352160768000000., + 304888344611713860501504000000., + 8841761993739701954543616000000., + 265252859812191058636308480000000., + }; + if (n > 30) // We do not expect to get 30 photons in the loop at the same time + { + throw std::invalid_argument("received invalid value, n <= 30"); + } + return FACTORIAL_TABLE[n]; +} + +/// @brief Computes the Kronecker delta of two values +int _kron(int a, int b) { + if (a == b) + return 1; + return 0; +} + +/// @brief Computes a single element in the matrix representing a beam +/// splitter gate +double _calc_beam_splitter_elem(int N1, int N2, int n1, int n2, double theta) { + const double t = std::cos(theta); // transmission coefficient + const double r = std::sin(theta); // reflection coefficient + double sum = 0; + for (int k = 0; k <= n1; ++k) { + int l = N1 - k; + if (l >= 0 && l <= n2) { + double term1 = std::pow(r, (n1 - k + l)) * std::pow(t, (n2 + k - l)); + if (term1 == 0) { + continue; + } + double term2 = std::pow((-1), (l)) * + (sqrt(_fast_factorial(n1)) * sqrt(_fast_factorial(n2)) * + sqrt(_fast_factorial(N1)) * sqrt(_fast_factorial(N2))); + double term3 = (_fast_factorial(k) * _fast_factorial(n1 - k) * + _fast_factorial(l) * _fast_factorial(n2 - l)); + double term = term1 * term2 / term3; + sum += term; + } else { + continue; + } + } // end for k + + return sum; +} + +/// @brief Computes matrix representing a beam splitter gate +template +void _calc_beam_splitter(std::vector> &BS, + const Scalar theta) { + int levels = sqrt(sqrt(BS.size())); + // """Returns a matrix representing a beam splitter + for (int n1 = 0; n1 < levels; ++n1) { + for (int n2 = 0; n2 < levels; ++n2) { + int nxx = n1 + n2; + int nxd = std::min(nxx + 1, levels); + for (int N1 = 0; N1 < nxd; ++N1) { + int N2 = nxx - N1; + if (N2 >= nxd) { + continue; + } else { + + BS.at(n1 * levels * levels * levels + n2 * levels * levels + + N1 * levels + N2) = + _calc_beam_splitter_elem(N1, N2, n1, n2, theta); + } + } // end for N1 + } // end for n2 + } // end for n1 +} + +/// @brief Given the gate name (an element of the GateName enum), +/// return the matrix data, optionally parameterized by a rotation angle. +template +std::vector> +getPhotonicGateByName(PhotonicGateName name, const std::size_t levels, + std::vector angles = {}) { + switch (name) { + + case (PhotonicGateName::CreateGate): { + auto length = levels * levels; + std::vector> u(length, 0.0); + u.at(length - 1) = 1.; + for (std::size_t i = 1; i < levels; i++) { + u.at(i * levels + (i - 1)) = 1.; + } + return u; + } + case (PhotonicGateName::AnnihilateGate): { + auto length = levels * levels; + std::vector> u(length, 0.0); + u.at(0) = 1.; + for (std::size_t i = 0; i < levels-1; i++) { + u.at(i * levels + (i + 1)) = 1.; + } + return u; + } + case (PhotonicGateName::PlusGate): { + auto length = levels * levels; + std::vector> u(length, 0.0); + u.at(levels - 1) = 1.; + for (std::size_t i = 1; i < levels; i++) { + u.at(i * levels + (i - 1)) = 1.; + } + return u; + } + case (PhotonicGateName::BeamSplitterGate): { + auto theta = angles[0]; + auto length = levels * levels * levels * levels; + std::vector> BS(length, 0.0); + _calc_beam_splitter(BS, theta); + return BS; + } + case (PhotonicGateName::PhaseShiftGate): { + + auto phi = angles[0]; + auto length = levels * levels; + std::vector> PS(length, 0.0); + // static constexpr std::complex im = std::complex(0, 1.); + for (std::size_t i = 0; i < levels; i++) { + PS.at(i * levels + i) = + std::exp(nvqir::im * static_cast(i) * phi); + } + return PS; + } + } + + throw std::runtime_error("Invalid gate provided to getGateByName."); +} + +/// @brief The create operation as a type. Can instantiate and request +/// its matrix data. +template +struct create { + auto getGate(const std::size_t levels, std::vector angles = {}) { + return getPhotonicGateByName(PhotonicGateName::CreateGate, + levels); + } + const std::string name() const { return "create"; } +}; + +/// @brief The annihilate operation as a type. Can instantiate and request +/// its matrix data. +template +struct annihilate { + auto getGate(const std::size_t levels, std::vector angles = {}) { + return getPhotonicGateByName(PhotonicGateName::AnnihilateGate, + levels); + } + const std::string name() const { return "annihilate"; } +}; + +/// @brief The plus operation as a type. Can instantiate and request +/// its matrix data. +template +struct plus { + auto getGate(const std::size_t levels, std::vector angles = {}) { + return getPhotonicGateByName(PhotonicGateName::PlusGate, + levels); + } + const std::string name() const { return "plus"; } +}; + +/// The Beam Splitter Gate +template +struct beam_splitter { + std::vector> + getGate(const std::size_t levels, std::vector angles = {}) { + return getPhotonicGateByName(PhotonicGateName::BeamSplitterGate, + levels, angles); + } + const std::string name() const { return "beam_splitter"; } +}; + +/// The Phase Shift Gate +template +struct phase_shift { + std::vector> + getGate(const std::size_t levels, std::vector angles = {}) { + return getPhotonicGateByName(PhotonicGateName::PhaseShiftGate, + levels, angles); + } + const std::string name() const { return "phase_shift"; } +}; + +} // namespace nvqir diff --git a/runtime/nvqir/photonics/PhotonicNVQIR.cpp b/runtime/nvqir/photonics/PhotonicNVQIR.cpp new file mode 100644 index 0000000000..971bfcaa1d --- /dev/null +++ b/runtime/nvqir/photonics/PhotonicNVQIR.cpp @@ -0,0 +1,68 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ +#include "nvqir/photonics/PhotonicCircuitSimulator.h" +#include "common/PluginUtils.h" + +thread_local nvqir::PhotonicCircuitSimulator *photonic_simulator; +inline static constexpr std::string_view GetPhotonicCircuitSimulatorSymbol = + "getPhotonicCircuitSimulator"; + +/// @brief Provide a holder for externally created +/// PhotonicCircuitSimulator pointers (like from Python) that +/// will invoke clone on the simulator when requested, which +/// in turn will create the simulator if there isn't one on the +/// current thread, otherwise it will reuse the existing one +struct ExternallyProvidedPhotonicSimGenerator { + nvqir::PhotonicCircuitSimulator *simulator; + ExternallyProvidedPhotonicSimGenerator(nvqir::PhotonicCircuitSimulator *sim) + : simulator(sim) {} + auto operator()() { return simulator->clone(); } +}; +static std::unique_ptr + externPhotonicSimGenerator; + +extern "C" { +void __nvqir__setPhotonicCircuitSimulator( + nvqir::PhotonicCircuitSimulator *sim) { + photonic_simulator = sim; + // If we had been given one before, reset the holder + if (externPhotonicSimGenerator) { + auto ptr = externPhotonicSimGenerator.release(); + delete ptr; + } + externPhotonicSimGenerator = + std::make_unique(sim); + cudaq::info("[runtime] Setting the photonic circuit simulator to {}.", + sim->name()); +} +} + +namespace nvqir { + +/// @brief Return the single simulation backend pointer, create if not created +/// already. +/// @return +PhotonicCircuitSimulator *getPhotonicCircuitSimulatorInternal() { + if (photonic_simulator) + return photonic_simulator; + + if (externPhotonicSimGenerator) { + photonic_simulator = (*externPhotonicSimGenerator)(); + return photonic_simulator; + } + photonic_simulator = cudaq::getUniquePluginInstance( + GetPhotonicCircuitSimulatorSymbol); + cudaq::info("Creating the {} backend.", photonic_simulator->name()); + return photonic_simulator; +}; + +void setPhotonicRandomSeed(std::size_t seed) { + getPhotonicCircuitSimulatorInternal()->setRandomSeed(seed); +} + +} // namespace nvqir diff --git a/runtime/nvqir/photonics/PhotonicState.h b/runtime/nvqir/photonics/PhotonicState.h new file mode 100644 index 0000000000..5b26a8fee5 --- /dev/null +++ b/runtime/nvqir/photonics/PhotonicState.h @@ -0,0 +1,113 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ +#pragma once + +#include "PhotonicGates.h" + +#include "common/ExecutionContext.h" +#include "common/Logger.h" +#include "common/MeasureCounts.h" +#include "cudaq/host_config.h" + +#include +#include +#include + +namespace cudaq { + +/// @brief PhotonicState provides an implementation of `SimulationState` that +/// encapsulates the state data for the Photonic Circuit Simulators. +struct PhotonicState : public SimulationState { +protected: + virtual std::unique_ptr + createPSFromSizeAndPtr(std::size_t, void *, std::size_t dataType) = 0; + + std::unique_ptr + createFromSizeAndPtr(std::size_t size, void *ptr, std::size_t) override { + throw std::runtime_error( + "createFromSizeAndPtr not available for this photonic simulator " + "backend."); + } + +public: + virtual std::unique_ptr + createPSFromData(const state_data &data) { + if (std::holds_alternative(data)) { + if (isArrayLike()) + throw std::runtime_error( + "Cannot initialize state vector/density matrix state by matrix " + "product state tensors. Please use tensor network simulator " + "backends."); + auto &dataCasted = std::get(data); + return createPSFromSizeAndPtr( + dataCasted.size(), + const_cast(dataCasted.data()), + data.index()); + } + // Flat array state data + // Check the precision first. Get the size and data pointer from the input + // data. + if (getPrecision() == precision::fp32) { + auto [size, ptr] = getSizeAndPtr(data); + return createPSFromSizeAndPtr(size, ptr, data.index()); + } + + auto [size, ptr] = getSizeAndPtr(data); + return createPSFromSizeAndPtr(size, ptr, data.index()); + } + + Tensor getTensor(std::size_t tensorIdx = 0) const override { + throw std::runtime_error( + "getTensor not available for this photonic simulator backend."); + } + + std::vector getTensors() const override { + throw std::runtime_error( + "getTensors not available for this photonic simulator backend."); + } + + std::size_t getNumTensors() const override { + throw std::runtime_error( + "getNumTensors not available for this photonic simulator backend."); + } + + std::size_t getNumQubits() const override { + throw std::runtime_error( + "getNumQubits not available for this photonic simulator backend."); + } + + virtual std::size_t getNumQudits() const = 0; + + virtual std::complex overlap(const SimulationState &other) override { + throw std::runtime_error( + "overlap not available for this photonic simulator backend."); + } + + std::complex + getAmplitude(const std::vector &basisState) override { + throw std::runtime_error( + "getAmplitude not available for this photonic simulator backend."); + } + + void dump(std::ostream &os) const override { + throw std::runtime_error( + "dump not available for this photonic simulator backend."); + } + + precision getPrecision() const override { + throw std::runtime_error( + "getPrecision not available for this photonic simulator backend."); + } + + void destroyState() override { + throw std::runtime_error( + "destroyState not available for this photonic simulator backend."); + } +}; // PhotonicState + +} // namespace cudaq diff --git a/runtime/nvqir/qpp/CMakeLists.txt b/runtime/nvqir/qpp/CMakeLists.txt index ac20dce330..927d37c229 100644 --- a/runtime/nvqir/qpp/CMakeLists.txt +++ b/runtime/nvqir/qpp/CMakeLists.txt @@ -34,10 +34,10 @@ macro (AddQppBackend LIBRARY_NAME SOURCE_FILE) install(TARGETS ${LIBRARY_NAME} DESTINATION lib) endmacro() - - AddQppBackend(nvqir-qpp QppCircuitSimulator.cpp) AddQppBackend(nvqir-dm QppDMCircuitSimulator.cpp) +AddQppBackend(nvqir-photonics QppPhotonicCircuitSimulator.cpp) add_target_config(qpp-cpu) add_target_config(density-matrix-cpu) +add_target_config(photonics-cpu) diff --git a/runtime/nvqir/qpp/QppDMCircuitSimulator.cpp b/runtime/nvqir/qpp/QppDMCircuitSimulator.cpp index 0a7b6f2129..7777b21e2d 100644 --- a/runtime/nvqir/qpp/QppDMCircuitSimulator.cpp +++ b/runtime/nvqir/qpp/QppDMCircuitSimulator.cpp @@ -138,7 +138,9 @@ class QppNoiseCircuitSimulator : public nvqir::QppCircuitSimulator { /// @param gateName /// @param qubits void applyNoiseChannel(const std::string_view gateName, - const std::vector &qubits) override { + const std::vector &controls, + const std::vector &targets, + const std::vector ¶ms) override { // Do nothing if no execution context if (!executionContext) return; @@ -149,15 +151,16 @@ class QppNoiseCircuitSimulator : public nvqir::QppCircuitSimulator { // Get the name as a string std::string gName(gateName); - + std::vector qubits{controls.begin(), controls.end()}; + qubits.insert(qubits.end(), targets.begin(), targets.end()); std::vector casted_qubits; for (auto index : qubits) { casted_qubits.push_back(convertQubitIndex(index)); } // Get the Kraus channels specified for this gate and qubits - auto krausChannels = - executionContext->noiseModel->get_channels(gName, qubits); + auto krausChannels = executionContext->noiseModel->get_channels( + gName, targets, controls, params); // If none, do nothing if (krausChannels.empty()) diff --git a/runtime/nvqir/qpp/QppPhotonicCircuitSimulator.cpp b/runtime/nvqir/qpp/QppPhotonicCircuitSimulator.cpp new file mode 100644 index 0000000000..7405d40105 --- /dev/null +++ b/runtime/nvqir/qpp/QppPhotonicCircuitSimulator.cpp @@ -0,0 +1,323 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "nvqir/photonics/PhotonicCircuitSimulator.h" +#include +#include +#include + +using namespace cudaq; + +namespace nvqir { + +/// @brief QppPhotonicState provides an implementation of `PhotonicState` that +/// encapsulates the state data for the Qpp Circuit Simulator. +struct QppPhotonicState : public cudaq::PhotonicState { + /// @brief The state. This class takes ownership move semantics. + qpp::ket state; + + /// @brief The levels of the qudits + std::size_t levels; + + QppPhotonicState(qpp::ket &&data, std::size_t lvl) + : state(std::move(data)), levels(lvl) {} + QppPhotonicState(const std::vector &shape, + const std::vector> &data, + std::size_t lvl) { + if (shape.size() != 1) + throw std::runtime_error( + "QppPhotonicState must be created from data with 1D shape."); + + state = Eigen::Map( + const_cast *>(data.data()), shape[0]); + + levels = lvl; + } + + std::size_t getNumQudits() const override { + return (std::log2(state.size()) / std::log2(levels)); + } + + std::complex + getAmplitude(const std::vector &basisState) override { + if (getNumQudits() != basisState.size()) + throw std::runtime_error( + fmt::format("[photonic-state] getAmplitude with an invalid number " + "of qudits in the basis state: expected{}, provided{}.", + getNumQudits(), basisState.size())); + + // Convert the basis state to an index value + const std::size_t idx = std::accumulate( + std::make_reverse_iterator(basisState.end()), + std::make_reverse_iterator(basisState.begin()), 0ull, + [&](std::size_t acc, int digit) { return (acc * levels) + digit; }); + return state[idx]; + } + + Tensor getTensor(std::size_t tensorIdx = 0) const override { + if (tensorIdx != 0) + throw std::runtime_error("[qpp-state] invalid tensor requested."); + return Tensor{ + reinterpret_cast( + const_cast *>(state.data())), + std::vector{static_cast(state.size())}, + getPrecision()}; + } + + // /// @brief Return all tensors that represent this state + std::vector getTensors() const override { return {getTensor()}; } + + // /// @brief Return the number of tensors that represent this state. + std::size_t getNumTensors() const override { return 1; } + + std::complex + operator()(std::size_t tensorIdx, + const std::vector &indices) override { + if (tensorIdx != 0) + throw std::runtime_error("[qpp-state] invalid tensor requested."); + if (indices.size() != 1) + throw std::runtime_error("[qpp-state] invalid element extraction."); + + return state[indices[0]]; + } + + std::unique_ptr + createPSFromSizeAndPtr(std::size_t size, void *ptr, + std::size_t dataType) override { + return std::make_unique( + Eigen::Map(reinterpret_cast *>(ptr), + size), + levels); + } + + std::unique_ptr + createFromSizeAndPtr(std::size_t size, void *ptr, std::size_t) override { + return std::make_unique( + Eigen::Map(reinterpret_cast *>(ptr), + size), + levels); + } + + void dump(std::ostream &os) const override { os << state << "\n"; } + + precision getPrecision() const override { + return cudaq::SimulationState::precision::fp64; + } + + void destroyState() override { + qpp::ket k; + state = k; + } +}; // QppPhotonicState + +/// @brief The QppCircuitSimulator implements the CircuitSimulator +/// base class to provide a simulator delegating to the Q++ library from +/// https://github.com/softwareqinc/qpp. +template +class QppPhotonicCircuitSimulator + : public PhotonicCircuitSimulatorBase { +protected: + /// The QPP state representation (qpp::ket or qpp::cmat) + StateType state; + + std::size_t convertQuditIndex(std::size_t quditIndex) { + assert(stateDimension > 0 && "The state is empty, and thus has no qudits"); + return quditIndex; + } + + qpp::cmat toQppMatrix(const std::vector> &data, + std::size_t nTargets) { + + auto nRows = std::pow(levels, nTargets); + assert(data.size() == nRows * nRows && + "Invalid number of gate matrix elements passed to toQppMatrix"); + + // we represent row major, they represent column major + return Eigen::Map, Eigen::Dynamic, + Eigen::Dynamic, Eigen::RowMajor>>( + const_cast *>(data.data()), nRows, nRows); + } + + /// @brief Grow the state vector by one qudit. + void addQuditToState() override { addQuditsToState(1); } + + void addQuditsToState(std::size_t quditCount, + const void *stateDataIn = nullptr) override { + if (quditCount == 0) + return; + + auto *stateData = reinterpret_cast *>( + const_cast(stateDataIn)); + + if (state.size() == 0) { + // If this is the first time, allocate the state + if (stateData == nullptr) { + state = qpp::ket::Zero(stateDimension); + state(0) = 1.0; + } else + state = qpp::ket::Map(stateData, stateDimension); + return; + } + // If we are resizing an existing, allocate + // a zero state on a n qudit, and Kron-prod + // that with the existing state. + if (stateData == nullptr) { + qpp::ket zero_state = qpp::ket::Zero(calculateStateDim(quditCount)); + zero_state(0) = 1.0; + state = qpp::kron(zero_state, state); + } else { + qpp::ket initState = + qpp::ket::Map(stateData, calculateStateDim(quditCount)); + state = qpp::kron(initState, state); + } + return; + } + + void addQuditsToState(const cudaq::PhotonicState &in_state) override { + const QppPhotonicState *const casted = + dynamic_cast(&in_state); + if (!casted) + throw std::invalid_argument( + "[PhotonicCircuitSimulator] Incompatible state input"); + + if (state.size() == 0) + state = casted->state; + else + state = qpp::kron(casted->state, state); + } + + void deallocateStateImpl() override { + qpp::ket tmp; + state = tmp; + } + + void applyGate(const GateApplicationTask &task) override { + auto matrix = toQppMatrix(task.matrix, task.targets.size()); + // First, convert all of the qudit indices to big endian. + std::vector controls; + for (auto index : task.controls) { + controls.push_back(convertQuditIndex(index)); + } + std::vector targets; + for (auto index : task.targets) { + targets.push_back(convertQuditIndex(index)); + } + + if (controls.empty()) { + state = qpp::apply(state, matrix, targets, levels); + return; + } + state = qpp::applyCTRL(state, matrix, controls, targets, levels); + } + + void setToZeroState() override { + state = qpp::ket::Zero(stateDimension); + state(0) = 1.0; + } + + int measureQudit(const std::size_t index) override { + const auto quditIdx = convertQuditIndex(index); + // If here, then we care about the result digit, so compute it. + const auto measurement_tuple = + qpp::measure(state, qpp::cmat::Identity(levels, levels), {quditIdx}, + /*qudit dimension=*/levels, /*destructive measmt=*/false); + const auto measurement_result = std::get(measurement_tuple); + const auto &post_meas_states = std::get(measurement_tuple); + const auto &collapsed_state = post_meas_states[measurement_result]; + if constexpr (std::is_same_v) { + state = Eigen::Map(collapsed_state.data(), + collapsed_state.size()); + } else { + state = Eigen::Map(collapsed_state.data(), + collapsed_state.rows(), + collapsed_state.cols()); + } + cudaq::info("Measured qudit {} -> {}", quditIdx, measurement_result); + return measurement_result; + } + + +public: + QppPhotonicCircuitSimulator() { + // Populate the correct name so it is printed correctly during + // deconstructor. + summaryData.name = name(); + } + + virtual ~QppPhotonicCircuitSimulator() = default; + + void setRandomSeed(std::size_t seed) override { + qpp::RandomDevices::get_instance().get_prng().seed(seed); + } + + bool canHandleObserve() override { return false; } + + void resetQudit(const std::size_t index) override { + flushGateQueue(); + const auto quditIdx = convertQuditIndex(index); + state = qpp::reset(state, {quditIdx}); + } + + cudaq::ExecutionResult sample(const std::vector &qudits, + const int shots) override { + + std::vector measuredDigits; + for (auto index : qudits) { + measuredDigits.push_back(convertQuditIndex(index)); + } + + auto sampleResult = qpp::sample(shots, state, measuredDigits, levels); + // Convert to what we expect + std::stringstream digit_stream; + cudaq::ExecutionResult counts; + + for (auto [result, count] : sampleResult) { + // Push back each term in the vector of digits to the digit_stream. + for (const auto &digit : result) { + digit_stream << digit; + } + + // Add to the sample result + // in mid-circ sampling mode this will append 1 digit_stream + counts.appendResult(digit_stream.str(), count); + // Reset the state. + digit_stream.str(""); + digit_stream.clear(); + } + + return counts; + } + + std::unique_ptr getSimulationState() override { + flushGateQueue(); + return std::make_unique(std::move(state), levels); + } + + bool isStateVectorSimulator() const override { + return std::is_same_v; + } + + /// @brief Primarily used for testing. + auto getStateVector() { + flushGateQueue(); + return state; + } + + std::string name() const override { return "qpp-photonics"; } + + NVQIR_PHOTONIC_SIMULATOR_CLONE_IMPL(QppPhotonicCircuitSimulator) + +}; // QppPhotonicCircuitSimulator + +} // namespace nvqir + +#ifndef __NVQIR_QPP_TOGGLE_CREATE +/// Register this Simulator with NVQIR. +NVQIR_REGISTER_PHOTONIC_SIMULATOR(nvqir::QppPhotonicCircuitSimulator, + photonics) +#endif \ No newline at end of file diff --git a/runtime/cudaq/qis/managers/photonics/photonics.yml b/runtime/nvqir/qpp/photonics-cpu.yml similarity index 74% rename from runtime/cudaq/qis/managers/photonics/photonics.yml rename to runtime/nvqir/qpp/photonics-cpu.yml index 592e105a6e..3e3aaa56ea 100644 --- a/runtime/cudaq/qis/managers/photonics/photonics.yml +++ b/runtime/nvqir/qpp/photonics-cpu.yml @@ -6,7 +6,10 @@ # the terms of the Apache License 2.0 which accompanies this distribution. # # ============================================================================ # -name: photonics -description: "Photonics" +name: photonics-cpu +description: "Photonics QPP-based CPU-only backend target" config: library-mode-execution-manager: photonics + nvqir-simulation-backend: photonics + link-libs: ["-lcudaq-em-photonics"] + preprocessor-defines: ["-D CUDAQ_SIMULATION_SCALAR_FP64"] diff --git a/runtime/nvqir/stim/CMakeLists.txt b/runtime/nvqir/stim/CMakeLists.txt index 0436278edc..db5e18f72a 100644 --- a/runtime/nvqir/stim/CMakeLists.txt +++ b/runtime/nvqir/stim/CMakeLists.txt @@ -12,6 +12,11 @@ set(INTERFACE_POSITION_INDEPENDENT_CODE ON) set(STIM_SOURCE_DIR ${CMAKE_SOURCE_DIR}/tpls/Stim) set(STIM_BINARY_DIR ${CMAKE_BINARY_DIR}/tpls/Stim) +if(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64") + # Constrain to AVX-2 to keep ourselves compatible with x86-64-v3. + set(SIMD_WIDTH 256 CACHE INTERNAL "Pass SIMD width to Stim subproject") +endif() + # The EXCLUDE_FROM_ALL makes it so that only libstim is built. If other targets # are desired (like the command-line tool), remove EXCLUDE_FROM_ALL below. add_subdirectory(${STIM_SOURCE_DIR} ${STIM_BINARY_DIR} EXCLUDE_FROM_ALL) diff --git a/runtime/nvqir/stim/StimCircuitSimulator.cpp b/runtime/nvqir/stim/StimCircuitSimulator.cpp index 623dd2b277..1cc86fcebe 100644 --- a/runtime/nvqir/stim/StimCircuitSimulator.cpp +++ b/runtime/nvqir/stim/StimCircuitSimulator.cpp @@ -45,7 +45,9 @@ class StimCircuitSimulator : public nvqir::CircuitSimulatorBase { /// @brief Apply the noise channel on \p qubits void applyNoiseChannel(const std::string_view gateName, - const std::vector &qubits) override { + const std::vector &controls, + const std::vector &targets, + const std::vector ¶ms) override { // Do nothing if no execution context if (!executionContext) return; @@ -59,13 +61,15 @@ class StimCircuitSimulator : public nvqir::CircuitSimulatorBase { // Cast size_t to uint32_t std::vector stimTargets; - stimTargets.reserve(qubits.size()); - for (auto q : qubits) + stimTargets.reserve(controls.size() + targets.size()); + for (auto q : controls) + stimTargets.push_back(static_cast(q)); + for (auto q : targets) stimTargets.push_back(static_cast(q)); // Get the Kraus channels specified for this gate and qubits - auto krausChannels = - executionContext->noiseModel->get_channels(gName, qubits); + auto krausChannels = executionContext->noiseModel->get_channels( + gName, controls, targets, params); // If none, do nothing if (krausChannels.empty()) diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index 8c36fd1389..9fe3d92f8f 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -15,8 +15,10 @@ #include "common/ArgumentConversion.h" #include "cudaq/Optimizer/Dialect/CC/CCDialect.h" #include "cudaq/Optimizer/Dialect/Quake/QuakeDialect.h" +#include "cudaq/qis/pauli_word.h" #include "mlir/InitAllDialects.h" #include "mlir/Parser/Parser.h" +#include void doSimpleTest(mlir::MLIRContext *ctx, const std::string &typeName, std::vector args) { @@ -29,6 +31,50 @@ func.func @__nvqpp__mlirgen__testy(%0: )#" + typeName + R"#() -> () return })#"; + // Create the Module + auto mod = mlir::parseSourceString(code, ctx); + llvm::outs() << "Source module:\n" << *mod << '\n'; + cudaq::opt::ArgumentConverter ab{"testy", *mod}; + // Create the argument conversions + ab.gen(args); + // Dump the conversions + llvm::outs() << "========================================\n" + "Substitution module:\n" + << ab.getSubstitutionModule() << '\n'; +} + +void doTest(mlir::MLIRContext *ctx, std::vector &typeNames, + std::vector args, std::size_t startingArgIdx = 0) { + + std::string code; + llvm::raw_string_ostream ss(code); + + // Create code + std::vector indices(args.size()); + std::iota(indices.begin(), indices.end(), 0); + auto argPairs = llvm::zip_equal(indices, typeNames); + + ss << "func.func private @callee("; + llvm::interleaveComma(argPairs, ss, [&](auto p) { + ss << "%" << std::get<0>(p) << ": " << std::get<1>(p); + }); + ss << ")\n"; + + ss << "func.func @__nvqpp__mlirgen__testy("; + llvm::interleaveComma(argPairs, ss, [&](auto p) { + ss << "%" << std::get<0>(p) << ": " << std::get<1>(p); + }); + ss << ") {"; + + ss << " call @callee("; + llvm::interleaveComma(indices, ss, [&](auto p) { ss << "%" << p; }); + + ss << "): ("; + llvm::interleaveComma(typeNames, ss, [&](auto t) { ss << t; }); + ss << ") -> ()\n"; + + ss << " return\n"; + ss << "}\n"; // Create the Module auto mod = mlir::parseSourceString(code, ctx); @@ -36,7 +82,7 @@ func.func @__nvqpp__mlirgen__testy(%0: )#" + cudaq::opt::ArgumentConverter ab{"testy", *mod}; // Create the argument conversions - ab.gen(args); + ab.gen_drop_front(args, startingArgIdx); // Dump the conversions llvm::outs() << "========================================\n" @@ -146,7 +192,7 @@ void test_scalars(mlir::MLIRContext *ctx) { // clang-format on { - std::string x = "Hi, there!"; + cudaq::pauli_word x{"XYZ"}; std::vector v = {static_cast(&x)}; doSimpleTest(ctx, "!cc.charspan", v); } @@ -156,12 +202,12 @@ void test_scalars(mlir::MLIRContext *ctx) { // CHECK: Substitution module: // CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = cc.address_of @cstr.48692C2074686572652100 : !cc.ptr> -// CHECK: %[[VAL_1:.*]] = cc.cast %[[VAL_0]] : (!cc.ptr>) -> !cc.ptr -// CHECK: %[[VAL_2:.*]] = arith.constant 10 : i64 +// CHECK: %[[VAL_0:.*]] = cc.address_of @cstr.58595A00 : !cc.ptr> +// CHECK: %[[VAL_1:.*]] = cc.cast %[[VAL_0]] : (!cc.ptr>) -> !cc.ptr +// CHECK: %[[VAL_2:.*]] = arith.constant 3 : i64 // CHECK: %[[VAL_3:.*]] = cc.stdvec_init %[[VAL_1]], %[[VAL_2]] : (!cc.ptr, i64) -> !cc.charspan // CHECK: } -// CHECK: llvm.mlir.global private constant @cstr.48692C2074686572652100("Hi, there!\00") {addr_space = 0 : i32} +// CHECK-DAG: llvm.mlir.global private constant @cstr.58595A00("XYZ\00") {addr_space = 0 : i32} // clang-format on } @@ -194,6 +240,34 @@ void test_vectors(mlir::MLIRContext *ctx) { // CHECK: %[[VAL_10:.*]] = cc.stdvec_init %[[VAL_0]], %[[VAL_9]] : (!cc.ptr>, i64) -> !cc.stdvec // CHECK: } // clang-format on + + { + std::vector x = {cudaq::pauli_word{"XX"}, + cudaq::pauli_word{"XY"}}; + std::vector v = {static_cast(&x)}; + doSimpleTest(ctx, "!cc.stdvec", v); + } + // clang-format off +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = cc.alloca !cc.array +// CHECK: %[[VAL_1:.*]] = cc.address_of @cstr.585800 : !cc.ptr> +// CHECK: %[[VAL_2:.*]] = cc.cast %[[VAL_1]] : (!cc.ptr>) -> !cc.ptr +// CHECK: %[[VAL_3:.*]] = arith.constant 2 : i64 +// CHECK: %[[VAL_4:.*]] = cc.stdvec_init %[[VAL_2]], %[[VAL_3]] : (!cc.ptr, i64) -> !cc.charspan +// CHECK: %[[VAL_5:.*]] = cc.compute_ptr %[[VAL_0]][0] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_4]], %[[VAL_5]] : !cc.ptr +// CHECK: %[[VAL_6:.*]] = cc.address_of @cstr.585900 : !cc.ptr> +// CHECK: %[[VAL_7:.*]] = cc.cast %[[VAL_6]] : (!cc.ptr>) -> !cc.ptr +// CHECK: %[[VAL_8:.*]] = arith.constant 2 : i64 +// CHECK: %[[VAL_9:.*]] = cc.stdvec_init %[[VAL_7]], %[[VAL_8]] : (!cc.ptr, i64) -> !cc.charspan +// CHECK: %[[VAL_10:.*]] = cc.compute_ptr %[[VAL_0]][1] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_9:.*]], %[[VAL_10:.*]] : !cc.ptr +// CHECK: %[[VAL_11:.*]] = arith.constant 2 : i64 +// CHECK: %[[VAL_12:.*]] = cc.stdvec_init %[[VAL_0]], %[[VAL_11]] : (!cc.ptr>, i64) -> !cc.stdvec +// CHECK: } +// CHECK-DAG: llvm.mlir.global private constant @cstr.585800("XX\00") {addr_space = 0 : i32} +// CHECK-DAG: llvm.mlir.global private constant @cstr.585900("XY\00") {addr_space = 0 : i32} + // clang-format on } void test_aggregates(mlir::MLIRContext *ctx) { @@ -304,18 +378,147 @@ void test_state(mlir::MLIRContext *ctx) { // CHECK: func.func private @callee(!cc.ptr) // CHECK: Substitution module: -// CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = cc.address_of @[[VAL_GC:.*]] : !cc.ptr x 8>> -// CHECK: %[[VAL_1:.*]] = cc.load %[[VAL_0]] : !cc.ptr x 8>> -// CHECK: %[[VAL_2:.*]] = arith.constant 8 : i64 -// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.array x 8> -// CHECK: cc.store %[[VAL_1]], %[[VAL_3]] : !cc.ptr x 8>> -// CHECK: %[[VAL_4:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr x 8>>) -> !cc.ptr -// CHECK: %[[VAL_5:.*]] = func.call @__nvqpp_cudaq_state_createFromData_fp64(%[[VAL_4]], %[[VAL_2]]) : (!cc.ptr, i64) -> !cc.ptr -// CHECK: %[[VAL_6:.*]] = cc.cast %[[VAL_5]] : (!cc.ptr) -> !cc.ptr +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = cc.address_of @[[VAL_GC:.*]] : !cc.ptr x 8>> +// CHECK: %[[VAL_1:.*]] = cc.load %[[VAL_0]] : !cc.ptr x 8>> +// CHECK: %[[VAL_2:.*]] = arith.constant 8 : i64 +// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.array x 8> +// CHECK: cc.store %[[VAL_1]], %[[VAL_3]] : !cc.ptr x 8>> +// CHECK: %[[VAL_4:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr x 8>>) -> !cc.ptr +// CHECK: %[[VAL_5:.*]] = func.call @__nvqpp_cudaq_state_createFromData_fp64(%[[VAL_4]], %[[VAL_2]]) : (!cc.ptr, i64) -> !cc.ptr +// CHECK: %[[VAL_6:.*]] = cc.cast %[[VAL_5]] : (!cc.ptr) -> !cc.ptr +// CHECK: } +// CHECK-DAG: cc.global constant @[[VAL_GC]] (dense<[(0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> +// CHECK-DAG: func.func private @__nvqpp_cudaq_state_createFromData_fp64(!cc.ptr, i64) -> !cc.ptr + // clang-format on +} + +void test_combinations(mlir::MLIRContext *ctx) { + { + bool x = true; + std::vector v = {static_cast(&x)}; + std::vector t = {"i1"}; + doTest(ctx, t, v); + } + // clang-format off +// CHECK-LABEL: Source module: +// CHECK: func.func private @callee(i1) +// CHECK: Substitution module: + +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = arith.constant true +// CHECK: } + // clang-format on + + { + bool x = true; + bool y = false; + std::vector v = {static_cast(&x), static_cast(&y)}; + std::vector t = {"i1", "i1"}; + doTest(ctx, t, v); + } + // clang-format off +// CHECK: Source module: +// CHECK: func.func private @callee(i1, i1) +// CHECK: Substitution module: + +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = arith.constant true +// CHECK: } +// CHECK-LABEL: cc.arg_subst[1] { +// CHECK: %[[VAL_1:.*]] = arith.constant false +// CHECK: } + // clang-format on + + { + bool x = true; + std::int32_t y = 42; + std::vector v = {static_cast(&x), static_cast(&y)}; + std::vector t = {"i1", "i32"}; + doTest(ctx, t, v, 1); + } + + // clang-format off +// CHECK: Source module: +// CHECK: func.func private @callee(i1, i32) +// CHECK: Substitution module: + +// CHECK-LABEL: cc.arg_subst[1] { +// CHECK: %[[VAL_0:.*]] = arith.constant 42 : i32 +// CHECK: } + // clang-format on + + { + std::vector> data{M_SQRT1_2, M_SQRT1_2, 0., 0., + 0., 0., 0., 0.}; + + std::vector x = {0.5, 0.6}; + cudaq::state y{new FakeSimulationState(data.size(), data.data())}; + std::vector z = { + cudaq::pauli_word{"XX"}, + cudaq::pauli_word{"XY"}, + }; + + std::vector v = {static_cast(&x), static_cast(&y), + static_cast(&z)}; + std::vector t = {"!cc.stdvec", "!cc.ptr", + "!cc.stdvec"}; + doTest(ctx, t, v); + } + + // clang-format off +// CHECK: Source module: +// CHECK: func.func private @callee(!cc.stdvec, !cc.ptr, !cc.stdvec) +// CHECK: Substitution module: + +// CHECK-LABEL: cc.arg_subst[0] { +// CHECK: %[[VAL_0:.*]] = cc.alloca !cc.array +// CHECK: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_2:.*]] = cc.compute_ptr %[[VAL_0]][0] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_1]], %[[VAL_2]] : !cc.ptr +// CHECK: %[[VAL_3:.*]] = arith.constant 1.750000e+00 : f32 +// CHECK: %[[VAL_4:.*]] = cc.compute_ptr %[[VAL_0]][1] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_3]], %[[VAL_4]] : !cc.ptr +// CHECK: %[[VAL_5:.*]] = arith.constant 4.17232506E-8 : f32 +// CHECK: %[[VAL_6:.*]] = cc.compute_ptr %[[VAL_0]][2] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_5]], %[[VAL_6]] : !cc.ptr +// CHECK: %[[VAL_7:.*]] = arith.constant 1.775000e+00 : f32 +// CHECK: %[[VAL_8:.*]] = cc.compute_ptr %[[VAL_0]][3] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_7]], %[[VAL_8]] : !cc.ptr +// CHECK: %[[VAL_9:.*]] = arith.constant 4 : i64 +// CHECK: %[[VAL_10:.*]] = cc.stdvec_init %[[VAL_0]], %[[VAL_9]] : (!cc.ptr>, i64) -> !cc.stdvec +// CHECK: } +// CHECK-LABEL: cc.arg_subst[1] { +// CHECK: %[[VAL_0:.*]] = cc.address_of @[[VAL_GC:.*]] : !cc.ptr x 8>> +// CHECK: %[[VAL_1:.*]] = cc.load %[[VAL_0]] : !cc.ptr x 8>> +// CHECK: %[[VAL_2:.*]] = arith.constant 8 : i64 +// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.array x 8> +// CHECK: cc.store %[[VAL_1]], %[[VAL_3]] : !cc.ptr x 8>> +// CHECK: %[[VAL_4:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr x 8>>) -> !cc.ptr +// CHECK: %[[VAL_5:.*]] = func.call @__nvqpp_cudaq_state_createFromData_fp64(%[[VAL_4]], %[[VAL_2]]) : (!cc.ptr, i64) -> !cc.ptr +// CHECK: %[[VAL_6:.*]] = cc.cast %[[VAL_5]] : (!cc.ptr) -> !cc.ptr // CHECK: } // CHECK-DAG: cc.global constant @[[VAL_GC]] (dense<[(0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> // CHECK-DAG: func.func private @__nvqpp_cudaq_state_createFromData_fp64(!cc.ptr, i64) -> !cc.ptr +// CHECK-LABEL: cc.arg_subst[2] { +// CHECK: %[[VAL_0:.*]] = cc.alloca !cc.array +// CHECK: %[[VAL_1:.*]] = cc.address_of @cstr.585800 : !cc.ptr> +// CHECK: %[[VAL_2:.*]] = cc.cast %[[VAL_1]] : (!cc.ptr>) -> !cc.ptr +// CHECK: %[[VAL_3:.*]] = arith.constant 2 : i64 +// CHECK: %[[VAL_4:.*]] = cc.stdvec_init %[[VAL_2]], %[[VAL_3]] : (!cc.ptr, i64) -> !cc.charspan +// CHECK: %[[VAL_5:.*]] = cc.compute_ptr %[[VAL_0]][0] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_4]], %[[VAL_5]] : !cc.ptr +// CHECK: %[[VAL_6:.*]] = cc.address_of @cstr.585900 : !cc.ptr> +// CHECK: %[[VAL_7:.*]] = cc.cast %[[VAL_6]] : (!cc.ptr>) -> !cc.ptr +// CHECK: %[[VAL_8:.*]] = arith.constant 2 : i64 +// CHECK: %[[VAL_9:.*]] = cc.stdvec_init %[[VAL_7]], %[[VAL_8]] : (!cc.ptr, i64) -> !cc.charspan +// CHECK: %[[VAL_10:.*]] = cc.compute_ptr %[[VAL_0]][1] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_9]], %[[VAL_10]] : !cc.ptr +// CHECK: %[[VAL_11:.*]] = arith.constant 2 : i64 +// CHECK: %[[VAL_12:.*]] = cc.stdvec_init %[[VAL_0]], %[[VAL_11]] : (!cc.ptr>, i64) -> !cc.stdvec +// CHECK: } +// CHECK-DAG: llvm.mlir.global private constant @cstr.585800("XX\00") {addr_space = 0 : i32} +// CHECK-DAG: llvm.mlir.global private constant @cstr.585900("XY\00") {addr_space = 0 : i32} // clang-format on } @@ -330,5 +533,6 @@ int main() { test_aggregates(&context); test_recursive(&context); test_state(&context); + test_combinations(&context); return 0; } diff --git a/scripts/validate_container.sh b/scripts/validate_container.sh index f448e42bea..b37feec885 100644 --- a/scripts/validate_container.sh +++ b/scripts/validate_container.sh @@ -70,6 +70,10 @@ available_backends=`\ if grep -q "library-mode-execution-manager: photonics" $file ; then continue fi + # Skip optimization test targets + if [[ $file == *"opt-test.yml" ]]; then + continue + fi if grep -q "nvqir-simulation-backend: stim" $file ; then continue fi @@ -177,7 +181,7 @@ do # Skipped long-running tests (variational optimization loops) for the "remote-mqpu" target to keep CI runtime managable. # A simplified test for these use cases is included in the 'test/Remote-Sim/' test suite. # Skipped tests that require passing kernel callables to entry-point kernels for the "remote-mqpu" target. - if [[ "$ex" == *"vqe_h2"* || "$ex" == *"qaoa_maxcut"* || "$ex" == *"gradients"* || "$ex" == *"grover"* || "$ex" == *"multi_controlled_operations"* || "$ex" == *"phase_estimation"* || "$ex" == *"trotter_kernel"* || "$ex" == *"builder.cpp"* ]]; + if [[ "$ex" == *"vqe_h2"* || "$ex" == *"qaoa_maxcut"* || "$ex" == *"gradients"* || "$ex" == *"grover"* || "$ex" == *"multi_controlled_operations"* || "$ex" == *"phase_estimation"* || "$ex" == *"trotter_kernel_mode"* || "$ex" == *"builder.cpp"* ]]; then let "skipped+=1" echo "Skipping $t target."; diff --git a/targettests/CMakeLists.txt b/targettests/CMakeLists.txt index 2559d89b46..6eea822da0 100644 --- a/targettests/CMakeLists.txt +++ b/targettests/CMakeLists.txt @@ -26,14 +26,22 @@ set(CUDAQ_TEST_PARAMS get_property(test_cudaq_libraries GLOBAL PROPERTY CUDAQ_RUNTIME_LIBS) set(CUDAQ_TEST_DEPENDS + CircuitCheck cudaq-opt cudaq-translate - CircuitCheck FileCheck +) +# We require split-file, which should be installed along with FileCheck, but +# the CI doesn't do it. Comment this out and open a bug. +# split-file +if (NOT CUDAQ_DISABLE_CPP_FRONTEND) + set(CUDAQ_TEST_DEPENDS ${CUDAQ_TEST_DEPENDS} cudaq-quake fixup-linkage + nvq++ ${test_cudaq_libraries} -) + ) +endif() add_custom_target(nvqpp-targettest-depends DEPENDS ${CUDAQ_TEST_DEPENDS}) set_target_properties(nvqpp-targettest-depends PROPERTIES FOLDER "TargetTests") diff --git a/targettests/Remote-Sim/qvector_init_from_state.cpp b/targettests/Remote-Sim/qvector_init_from_state.cpp index 7aacd122b8..5899c2f598 100644 --- a/targettests/Remote-Sim/qvector_init_from_state.cpp +++ b/targettests/Remote-Sim/qvector_init_from_state.cpp @@ -16,22 +16,47 @@ #include #include +#include +#include __qpu__ void test_init_state() { cudaq::qvector q(2); - ry(M_PI/2.0, q[0]); + ry(M_PI / 2.0, q[0]); } __qpu__ void test_init_large_state() { cudaq::qvector q(14); - ry(M_PI/2.0, q[0]); + ry(M_PI / 2.0, q[0]); } -__qpu__ void test_state_param(cudaq::state* state) { +__qpu__ void test_state_param(cudaq::state *state) { cudaq::qvector q1(state); + x(q1); } -void printCounts(cudaq::sample_result& result) { +__qpu__ void test_state_param2(cudaq::state *state, cudaq::pauli_word w) { + cudaq::qvector q(state); + cudaq::exp_pauli(1.0, q, w); +} + +__qpu__ void test_state_param3(cudaq::state *initial_state, + std::vector &words) { + cudaq::qvector q(initial_state); + for (std::size_t i = 0; i < words.size(); ++i) { + cudaq::exp_pauli(1.0, q, words[i]); + } +} + +__qpu__ void test_state_param4(cudaq::state *initial_state, + std::vector &coefficients, + std::vector &words) { + cudaq::qvector q(initial_state); + for (std::size_t i = 0; i < words.size(); ++i) { + cudaq::exp_pauli(coefficients[i], q, words[i]); + } +} + +void printCounts(cudaq::sample_result &result) { std::vector values{}; for (auto &&[bits, counts] : result) { values.push_back(bits); @@ -39,47 +64,168 @@ void printCounts(cudaq::sample_result& result) { std::sort(values.begin(), values.end()); for (auto &&bits : values) { - std::cout << bits << '\n'; + std::cout << bits << std::endl; } } int main() { std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0., 0., 0., 0., 0.}; - std::vector vec1{0., 0., 0., 0., 0., 0., M_SQRT1_2, M_SQRT1_2}; + std::vector vec1{0., 0., 0., 0., + 0., 0., M_SQRT1_2, M_SQRT1_2}; auto state = cudaq::state::from_data(vec); auto state1 = cudaq::state::from_data(vec1); { - // Passing state created from data as argument (kernel mode) - auto counts = cudaq::sample(test_state_param, &state); - printCounts(counts); + std::cout << "Passing state created from data as argument (kernel mode)" + << std::endl; + auto counts = cudaq::sample(test_state_param, &state); + printCounts(counts); - counts = cudaq::sample(test_state_param, &state1); - printCounts(counts); + counts = cudaq::sample(test_state_param, &state1); + printCounts(counts); } -// CHECK: 000 -// CHECK: 100 - + // clang-format off +// CHECK: Passing state created from data as argument (kernel mode) // CHECK: 011 // CHECK: 111 +// CHECK: 000 +// CHECK: 100 + // clang-format on + { - // Passing state from another kernel as argument (kernel mode) + std::cout << "Passing state from another kernel as argument (kernel mode)" + << std::endl; auto state = cudaq::get_state(test_init_state); auto counts = cudaq::sample(test_state_param, &state); printCounts(counts); } - -// CHECK: 00 -// CHECK: 10 + // clang-format off +// CHECK: Passing state from another kernel as argument (kernel mode) +// CHECK: 01 +// CHECK: 11 + // clang-format on { - // Passing large state from another kernel as argument (kernel mode) + std::cout + << "Passing large state from another kernel as argument (kernel mode)" + << std::endl; auto largeState = cudaq::get_state(test_init_large_state); auto counts = cudaq::sample(test_state_param, &largeState); printCounts(counts); } + // clang-format off +// CHECK: Passing large state from another kernel as argument (kernel mode) +// CHECK: 01111111111111 +// CHECK: 11111111111111 + // clang-format on + + { + std::cout << "Passing state from another kernel as argument iteratively " + "(kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state); + for (auto i = 0; i < 4; i++) { + auto counts = cudaq::sample(test_state_param, &state); + std::cout << "Iteration: " << i << std::endl; + printCounts(counts); + state = cudaq::get_state(test_state_param, &state); + } + } + // clang-format off +// CHECK: Passing state from another kernel as argument iteratively (kernel mode) +// CHECK: Iteration: 0 +// CHECK: 01 +// CHECK: 11 +// CHECK: Iteration: 1 +// CHECK: 00 +// CHECK: 10 +// CHECK: Iteration: 2 +// CHECK: 01 +// CHECK: 11 +// CHECK: Iteration: 3 +// CHECK: 00 +// CHECK: 10 + // clang-format on -// CHECK: 00000000000000 -// CHECK: 10000000000000 + { + std::cout << "Passing state from another kernel as argument iteratively " + "with vector args (kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state); + auto words = std::vector{cudaq::pauli_word{"XX"}}; + for (auto i = 0; i < 4; i++) { + auto counts = cudaq::sample(test_state_param3, &state, words); + std::cout << "Iteration: " << i << std::endl; + printCounts(counts); + state = cudaq::get_state(test_state_param3, &state, words); + words = std::vector{cudaq::pauli_word{"XY"}}; + } + } + // Passing state from another kernel as argument iteratively with vector args + // (kernel mode) + // clang-format off +// CHECK: Iteration: 0 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 +// CHECK: Iteration: 1 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 +// CHECK: Iteration: 2 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 +// CHECK: Iteration: 3 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 + // clang-format on + + { + std::cout << "Passing state from another kernel as argument iteratively " + "with vector args with 2 elements (kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state); + auto words = std::vector{cudaq::pauli_word{"XX"}, + cudaq::pauli_word{"II"}}; + auto coeffs = std::vector{1.0, 2.0}; + for (auto i = 0; i < 4; i++) { + auto counts = cudaq::sample(test_state_param4, &state, coeffs, words); + std::cout << "Iteration: " << i << std::endl; + printCounts(counts); + state = cudaq::get_state(test_state_param4, &state, coeffs, words); + words = std::vector{cudaq::pauli_word{"II"}, + cudaq::pauli_word{"XY"}}; + coeffs = std::vector{1.0, 2.0}; + } + } + // clang-format off +// CHECK: Passing state from another kernel as argument iteratively with vector args with 2 elements (kernel mode) +// CHECK: Iteration: 0 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 +// CHECK: Iteration: 1 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 +// CHECK: Iteration: 2 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 +// CHECK: Iteration: 3 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 + // clang-format on } diff --git a/targettests/Remote-Sim/qvector_init_from_state_lazy.cpp b/targettests/Remote-Sim/qvector_init_from_state_lazy.cpp new file mode 100644 index 0000000000..1ee18822db --- /dev/null +++ b/targettests/Remote-Sim/qvector_init_from_state_lazy.cpp @@ -0,0 +1,211 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// REQUIRES: remote-sim +// REQUIRES: c++20 + +// clang-format off +// TODO-FIX-KERNEL-EXEC +// RUN: nvq++ %cpp_std --enable-mlir --target remote-mqpu -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// clang-format on + +#include +#include + +struct test_init_state { + void operator()() __qpu__ { + cudaq::qvector q(2); + ry(M_PI/2.0, q[0]); + } +}; + +struct test_init_large_state { + void operator()() __qpu__ { + cudaq::qvector q(14); + ry(M_PI/2.0, q[0]); + } +}; + +struct test_state_param { + void operator()(cudaq::state *initial_state) __qpu__ { + cudaq::qvector q(initial_state); + x(q); + } +}; + +struct test_state_param2 { + void operator()(cudaq::state *initial_state, cudaq::pauli_word w) __qpu__ { + cudaq::qvector q(initial_state); + cudaq::exp_pauli(1.0, q, w); + } +}; + +struct test_state_param3 { + void operator()(cudaq::state *initial_state, std::vector& words) __qpu__ { + cudaq::qvector q(initial_state); + for (std::size_t i = 0; i < words.size(); ++i) { + cudaq::exp_pauli(1.0, q, words[i]); + } + } +}; + +struct test_state_param4 { + void operator()(cudaq::state *initial_state, std::vector &coefficients, std::vector& words) __qpu__ { + cudaq::qvector q(initial_state); + for (std::size_t i = 0; i < words.size(); ++i) { + cudaq::exp_pauli(coefficients[i], q, words[i]); + } + } +}; + +void printCounts(cudaq::sample_result& result) { + std::vector values{}; + for (auto &&[bits, counts] : result) { + values.push_back(bits); + } + + std::sort(values.begin(), values.end()); + for (auto &&bits : values) { + std::cout << bits << std::endl; + } +} + +int main() { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0., 0., 0., 0., 0.}; + std::vector vec1{0., 0., 0., 0., 0., 0., M_SQRT1_2, M_SQRT1_2}; + auto state = cudaq::state::from_data(vec); + auto state1 = cudaq::state::from_data(vec1); + { + std::cout << "Passing state created from data as argument (kernel mode)" << std::endl; + auto counts = cudaq::sample(test_state_param{}, &state); + printCounts(counts); + + counts = cudaq::sample(test_state_param{}, &state1); + printCounts(counts); + } +// CHECK: Passing state created from data as argument (kernel mode) +// CHECK: 011 +// CHECK: 111 + +// CHECK: 000 +// CHECK: 100 + + { + std::cout << "Passing state from another kernel as argument (kernel mode)" << std::endl; + auto state = cudaq::get_state(test_init_state{}); + auto counts = cudaq::sample(test_state_param{}, &state); + printCounts(counts); + } +// CHECK: Passing state from another kernel as argument (kernel mode) +// CHECK: 01 +// CHECK: 11 + + { + std::cout << "Passing large state from another kernel as argument (kernel mode)" << std::endl; + auto largeState = cudaq::get_state(test_init_large_state{}); + auto counts = cudaq::sample(test_state_param{}, &largeState); + printCounts(counts); + } +// CHECK: Passing large state from another kernel as argument (kernel mode) +// CHECK: 01111111111111 +// CHECK: 11111111111111 + + { + std::cout << "Passing state from another kernel as argument iteratively (kernel mode)" << std::endl; + auto state = cudaq::get_state(test_init_state{}); + for (auto i = 0; i < 4; i++) { + auto counts = cudaq::sample(test_state_param{}, &state); + std::cout << "Iteration: " << i << std::endl; + printCounts(counts); + state = cudaq::get_state(test_state_param{}, &state); + } + } +// CHECK: Passing state from another kernel as argument iteratively (kernel mode) +// CHECK: Iteration: 0 +// CHECK: 01 +// CHECK: 11 +// CHECK: Iteration: 1 +// CHECK: 00 +// CHECK: 10 +// CHECK: Iteration: 2 +// CHECK: 01 +// CHECK: 11 +// CHECK: Iteration: 3 +// CHECK: 00 +// CHECK: 10 + + { + std::cout << "Passing state from another kernel as argument iteratively with vector args (kernel mode)" << std::endl; + auto state = cudaq::get_state(test_init_state{}); + auto words = std::vector{cudaq::pauli_word{"XX"}}; + for (auto i = 0; i < 4; i++) { + auto counts = cudaq::sample(test_state_param3{}, &state, words); + std::cout << "Iteration: " << i << std::endl; + printCounts(counts); + state = cudaq::get_state(test_state_param3{}, &state, words); + words = std::vector{cudaq::pauli_word{"XY"}}; + } + } +// CHECK: Passing state from another kernel as argument iteratively with vector args (kernel mode) +// CHECK: Iteration: 0 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 +// CHECK: Iteration: 1 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 +// CHECK: Iteration: 2 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 +// CHECK: Iteration: 3 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 + + { + std::cout << "Passing state from another kernel as argument iteratively with vector args with 2 elements (kernel mode)" << std::endl; + auto state = cudaq::get_state(test_init_state{}); + auto words = std::vector{cudaq::pauli_word{"XX"}, cudaq::pauli_word{"II"}}; + auto coeffs = std::vector{1.0, 2.0}; + for (auto i = 0; i < 4; i++) { + auto counts = cudaq::sample(test_state_param4{}, &state, coeffs, words); + std::cout << "Iteration: " << i << std::endl; + printCounts(counts); + state = cudaq::get_state(test_state_param4{}, &state, coeffs, words); + words = std::vector{cudaq::pauli_word{"II"}, cudaq::pauli_word{"XY"}}; + coeffs = std::vector{1.0, 2.0}; + } + } +// CHECK: Passing state from another kernel as argument iteratively with vector args with 2 elements (kernel mode) +// CHECK: Iteration: 0 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 +// CHECK: Iteration: 1 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 +// CHECK: Iteration: 2 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 +// CHECK: Iteration: 3 +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 +} diff --git a/targettests/Remote-Sim/test_trotter.cpp b/targettests/Remote-Sim/test_trotter.cpp new file mode 100644 index 0000000000..abb6d52363 --- /dev/null +++ b/targettests/Remote-Sim/test_trotter.cpp @@ -0,0 +1,178 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// clang-format off +// TODO-FIX-KERNEL-EXEC +// RUN: nvq++ %cpp_std --enable-mlir --target remote-mqpu -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// clang-format on + +#include +#include +#include +#include + +// Compute magnetization using Suzuki-Trotter approximation. +// This example demonstrates usage of quantum states in kernel mode. +// +// Details +// https://pubs.aip.org/aip/jmp/article-abstract/32/2/400/229229/General-theory-of-fractal-path-integrals-with +// +// Hamiltonian used +// https://en.m.wikipedia.org/wiki/Quantum_Heisenberg_model + +// If you have a NVIDIA GPU you can use this example to see +// that the GPU-accelerated backends can easily handle a +// larger number of qubits compared the CPU-only backend. +// +// Depending on the available memory on your GPU, you can +// set the number of qubits to around 30 qubits, and run +// the execution command with `-target nvidia` option. +// +// Note: Without setting the target to the `nvidia` backend, +// there will be a noticeable decrease in simulation performance. +// This is because the CPU-only backend has difficulty handling +// 30+ qubit simulations. + +int SPINS = 11; // set to around 25 qubits for `nvidia` target +int STEPS = 10; // set to around 100 for `nvidia` target + +// Compile and run with: +// clang-format off +// ``` +// nvq++ --enable-mlir -v trotter_kernel_mode.cpp -o trotter.x --target nvidia && ./trotter.x +// ``` +// clang-format off + +// Alternating up/down spins +struct initState { + void operator()(int num_spins) __qpu__ { + cudaq::qvector q(num_spins); + for (int qId = 0; qId < num_spins; qId += 2) + x(q[qId]); + } +}; + +std::vector term_coefficients(cudaq::spin_op op) { + std::vector result{}; + op.for_each_term([&](cudaq::spin_op &term) { + const auto coeff = term.get_coefficient().real(); + result.push_back(coeff); + }); + return result; +} + +std::vector term_words(cudaq::spin_op op) { + std::vector result{}; + op.for_each_term( + [&](cudaq::spin_op &term) { result.push_back(term.to_string(false)); }); + return result; +} + +struct trotter { + // Note: This performs a single-step Trotter on top of an initial state, e.g., + // result state of the previous Trotter step. + void operator()(cudaq::state *initial_state, + std::vector &coefficients, + std::vector &words, double dt) __qpu__ { + cudaq::qvector q(initial_state); + for (std::size_t i = 0; i < coefficients.size(); ++i) { + cudaq::exp_pauli(coefficients[i] * dt, q, words[i]); + } + } +}; + +int run_steps(int steps, int spins) { + const double g = 1.0; + const double Jx = 1.0; + const double Jy = 1.0; + const double Jz = g; + const double dt = 0.05; + const int n_steps = steps; + const int n_spins = spins; + const double omega = 2 * M_PI; + const auto heisenbergModelHam = [&](double t) -> cudaq::spin_op { + cudaq::spin_op tdOp(n_spins); + for (int i = 0; i < n_spins - 1; ++i) { + tdOp += (Jx * cudaq::spin::x(i) * cudaq::spin::x(i + 1)); + tdOp += (Jy * cudaq::spin::y(i) * cudaq::spin::y(i + 1)); + tdOp += (Jz * cudaq::spin::z(i) * cudaq::spin::z(i + 1)); + } + for (int i = 0; i < n_spins; ++i) + tdOp += (std::cos(omega * t) * cudaq::spin::x(i)); + return tdOp; + }; + // Observe the average magnetization of all spins () + cudaq::spin_op average_magnetization(n_spins); + for (int i = 0; i < n_spins; ++i) + average_magnetization += ((1.0 / n_spins) * cudaq::spin::z(i)); + average_magnetization -= 1.0; + + // Run loop + auto state = cudaq::get_state(initState{}, n_spins); + std::vector expResults; + std::vector runtimeMs; + for (int i = 0; i < n_steps; ++i) { + const auto start = std::chrono::high_resolution_clock::now(); + auto ham = heisenbergModelHam(i * dt); + auto coefficients = term_coefficients(ham); + auto words = term_words(ham); + auto magnetization_exp_val = cudaq::observe( + trotter{}, average_magnetization, &state, coefficients, words, dt); + auto result = magnetization_exp_val.expectation(); + expResults.emplace_back(result); + state = cudaq::get_state(trotter{}, &state, coefficients, words, dt); + const auto stop = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(stop - start); + auto timeInSeconds = duration.count() / 1000.0 / 1000.0; + runtimeMs.emplace_back(timeInSeconds); + std::cout << "Step " << i << ": time [s]: " << timeInSeconds + << ", result: " << result << std::endl; + } + std::cout << std::endl; + + // Print runtimes and results (useful for plotting). + std::cout << "Step times [s]: ["; + for (const auto &x : runtimeMs) + std::cout << x << ", "; + std::cout << "]" << std::endl; + + std::cout << "Results: ["; + for (const auto &x : expResults) + std::cout << x << ", "; + std::cout << "]" << std::endl; + + std::cout << std::endl; + return 0; +} + +int main() { + const auto start = std::chrono::high_resolution_clock::now(); + run_steps(STEPS, SPINS); + const auto stop = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(stop - start); + std::cout << "Total running time: " << duration.count() / 1000.0 / 1000.0 + << "s" << std::endl; +} + +// CHECK: Step 0: time [s]: [[t0:.*]], result: [[v0:.*]] +// CHECK: Step 1: time [s]: [[t1:.*]], result: [[v1:.*]] +// CHECK: Step 2: time [s]: [[t2:.*]], result: [[v2:.*]] +// CHECK: Step 3: time [s]: [[t3:.*]], result: [[v3:.*]] +// CHECK: Step 4: time [s]: [[t4:.*]], result: [[v4:.*]] +// CHECK: Step 5: time [s]: [[t5:.*]], result: [[v5:.*]] +// CHECK: Step 6: time [s]: [[t6:.*]], result: [[v6:.*]] +// CHECK: Step 7: time [s]: [[t7:.*]], result: [[v7:.*]] +// CHECK: Step 8: time [s]: [[t8:.*]], result: [[v8:.*]] +// CHECK: Step 9: time [s]: [[t9:.*]], result: [[v9:.*]] + +// CHECK: Step times [s]: [[ts:.*]] +// CHECK: Results: [[rs:.*]] + +// CHECK: Total running time: [[tts:.*]]s diff --git a/targettests/SeparateCompilation/basic.cpp b/targettests/SeparateCompilation/basic.cpp new file mode 100644 index 0000000000..eae9d13f3d --- /dev/null +++ b/targettests/SeparateCompilation/basic.cpp @@ -0,0 +1,55 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// REQUIRES: c++20 +// clang-format off +// RUN: if [ command -v split-file ]; then \ +// RUN: split-file %s %t && \ +// RUN: nvq++ --enable-mlir -c %t/baselib.cpp -o %t/baselib.o && \ +// RUN: nvq++ --enable-mlir -c %t/baseuser.cpp -o %t/baseuser.o && \ +// RUN: nvq++ --enable-mlir %t/baselib.o %t/baseuser.o -o %t/base.a.out && \ +// RUN: %t/base.a.out | FileCheck %s ; else \ +// RUN: echo "skipping"; fi +// clang-format on + +//--- baselib.h + +#include "cudaq.h" + +__qpu__ void dunkadee(cudaq::qvector<> &q); + +//--- baselib.cpp + +#include "baselib.h" +#include + +void rollcall() { std::cout << "library function here, sir!\n"; } + +__qpu__ void dunkadee(cudaq::qvector<> &q) { + x(q[0]); + rollcall(); +} + +//--- baseuser.cpp + +#include "baselib.h" +#include + +__qpu__ void userKernel(const cudaq::qkernel &)> &init) { + cudaq::qvector q(2); + init(q); +} + +int main() { + userKernel(dunkadee); + std::cout << "Hello, World!\n"; + return 0; +} + +// CHECK: library function here +// CHECK: Hello, World diff --git a/targettests/SeparateCompilation/class.cpp b/targettests/SeparateCompilation/class.cpp new file mode 100644 index 0000000000..eec15ccedd --- /dev/null +++ b/targettests/SeparateCompilation/class.cpp @@ -0,0 +1,57 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// REQUIRES: c++20 +// clang-format off +// RUN: if [ command -v split-file ]; then \ +// RUN: split-file %s %t && \ +// RUN: nvq++ --enable-mlir -c %t/classlib.cpp -o %t/classlib.o && \ +// RUN: nvq++ --enable-mlir -c %t/classuser.cpp -o %t/classuser.o && \ +// RUN: nvq++ --enable-mlir %t/classlib.o %t/classuser.o -o %t/class.a.out && \ +// RUN: %t/class.a.out | FileCheck %s ; else \ +// RUN: echo "skipping" ; fi +// clang-format on + +//--- classlib.h + +#include "cudaq.h" + +struct HereIsTheThing { + void operator()(cudaq::qvector<> &q) __qpu__; +}; + +//--- classlib.cpp + +#include "classlib.h" +#include + +void rollcall() { std::cout << "library function here, sir!\n"; } + +void HereIsTheThing::operator()(cudaq::qvector<> &q) __qpu__ { + x(q[0]); + rollcall(); +} + +//--- classuser.cpp + +#include "classlib.h" +#include + +__qpu__ void userKernel(const cudaq::qkernel &)> &init) { + cudaq::qvector q(2); + init(q); +} + +int main() { + userKernel(HereIsTheThing{}); + std::cout << "Hello, World!\n"; + return 0; +} + +// CHECK: library function here +// CHECK: Hello, World diff --git a/targettests/SeparateCompilation/deduction_guide.cpp b/targettests/SeparateCompilation/deduction_guide.cpp new file mode 100644 index 0000000000..9f64c5e5d6 --- /dev/null +++ b/targettests/SeparateCompilation/deduction_guide.cpp @@ -0,0 +1,48 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// REQUIRES: c++20 +// clang-format off +// RUN: if [ command -v split-file ]; then \ +// RUN: split-file %s %t && \ +// RUN: nvq++ --enable-mlir -c %t/udedgulib.cpp -o %t/udedgulib.o && \ +// RUN: nvq++ --enable-mlir -c %t/udedguuser.cpp -o %t/udedguuser.o && \ +// RUN: nvq++ --enable-mlir %t/udedgulib.o %t/udedguuser.o -o %t/udedgu.x && \ +// RUN: %t/udedgu.x | FileCheck %s ; else \ +// RUN: echo "skipping" ; fi +// clang-format on + +//--- udedgulib.h + +#include "cudaq.h" + +__qpu__ void dunkadee(cudaq::qvector<> &q); + +//--- udedgulib.cpp + +#include "udedgulib.h" + +__qpu__ void dunkadee(cudaq::qvector<> &q) { x(q[0]); } + +//--- udedguuser.cpp + +#include "udedgulib.h" +#include + +__qpu__ void userKernel(const cudaq::qkernel &)> &init) { + cudaq::qvector q(2); + init(q); +} + +int main() { + cudaq::sample(10, userKernel, dunkadee); + std::cout << "Hello, World!\n"; + return 0; +} + +// CHECK: Hello, World diff --git a/targettests/SeparateCompilation/emulate.cpp b/targettests/SeparateCompilation/emulate.cpp new file mode 100644 index 0000000000..9128a93548 --- /dev/null +++ b/targettests/SeparateCompilation/emulate.cpp @@ -0,0 +1,49 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// clang-format off +// RUN: if [ command -v split-file ]; then \ +// RUN: split-file %s %t && \ +// RUN: nvq++ %cpp_std -target quantinuum -emulate -fno-set-target-backend -c %t/emulib.cpp -o %t/emulibx.o && \ +// RUN: nvq++ %cpp_std -target quantinuum -emulate -c %t/emuuser.cpp -o %t/emuuserx.o && \ +// RUN: nvq++ %cpp_std -target quantinuum -emulate %t/emulibx.o %t/emuuserx.o -o %t/emux.a.out && \ +// RUN: %t/emux.a.out | FileCheck %s ; else \ +// RUN: echo "skipping" ; fi +// clang-format on + +//--- emulib.h + +#include "cudaq.h" + +__qpu__ void dunkadee(cudaq::qvector<> &q); + +//--- emulib.cpp + +#include "emulib.h" +#include + +__qpu__ void dunkadee(cudaq::qvector<> &q) { x(q[0]); } + +//--- emuuser.cpp + +#include "emulib.h" +#include + +__qpu__ void userKernel(const cudaq::qkernel &)> &init) { + cudaq::qvector q(2); + init(q); +} + +int main() { + cudaq::sample(10, userKernel, + cudaq::qkernel &)>{dunkadee}); + std::cout << "Hello, World!\n"; + return 0; +} + +// CHECK: Hello, World diff --git a/targettests/SeparateCompilation/lambda.cpp b/targettests/SeparateCompilation/lambda.cpp new file mode 100644 index 0000000000..724d81d804 --- /dev/null +++ b/targettests/SeparateCompilation/lambda.cpp @@ -0,0 +1,52 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// REQUIRES: c++20 +// clang-format off +// RUN: if [ command -v split-file ]; then \ +// RUN: split-file %s %t && \ +// RUN: nvq++ --enable-mlir -c %t/anonlib.cpp -o %t/anonlib.o && \ +// RUN: nvq++ --enable-mlir -c %t/anonuser.cpp -o %t/anonuser.o && \ +// RUN: nvq++ --enable-mlir %t/anonlib.o %t/anonuser.o -o %t/anon.a.out && \ +// RUN: %t/anon.a.out | FileCheck %s ; else \ +// RUN: echo "skipping" ; fi +// clang-format on + +//--- anonlib.h + +#include "cudaq.h" + +__qpu__ void userKernel(const cudaq::qkernel &)> &); + +//--- anonlib.cpp + +#include "anonlib.h" + +__qpu__ void userKernel(const cudaq::qkernel &)> &init) { + cudaq::qvector q(2); + init(q); +} + +//--- anonuser.cpp + +#include "anonlib.h" +#include + +void rollcall() { std::cout << "elsewhere function here, sir!\n"; } + +int main() { + userKernel([](cudaq::qvector<> &q) __qpu__ { + x(q[0]); + rollcall(); + }); + std::cout << "Hello, World!\n"; + return 0; +} + +// CHECK: elsewhere function here +// CHECK: Hello, World diff --git a/targettests/SeparateCompilation/multiple_callables.cpp b/targettests/SeparateCompilation/multiple_callables.cpp new file mode 100644 index 0000000000..2f8bc5e6a1 --- /dev/null +++ b/targettests/SeparateCompilation/multiple_callables.cpp @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// REQUIRES: c++20 +// RUN: nvq++ --enable-mlir %s -o %t && %t + +#include "cudaq.h" + +__qpu__ void entry(const cudaq::qkernel &)> &o, + const cudaq::qkernel &, int)> &p) { + cudaq::qvector q(2); + o(q); + p(q, 1); +} + +int main() { + auto l = [](cudaq::qvector<> &q) __qpu__ { x(q[0]); }; + auto m = [](cudaq::qvector<> &q, int i) __qpu__ { y(q[i]); }; + + entry(l, m); + return 0; +} diff --git a/targettests/SeparateCompilation/pure_device.cpp b/targettests/SeparateCompilation/pure_device.cpp new file mode 100644 index 0000000000..224311a311 --- /dev/null +++ b/targettests/SeparateCompilation/pure_device.cpp @@ -0,0 +1,50 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// clang-format off +// RUN: if [ command -v split-file ]; then \ +// RUN: split-file %s %t && \ +// RUN: nvq++ %cpp_std --enable-mlir -c %t/pd_lib.cpp -o %t/pd_lib.o && \ +// RUN: nvq++ %cpp_std --enable-mlir -c %t/pd_main.cpp -o %t/pd_main.o && \ +// RUN: nvq++ %cpp_std --enable-mlir %t/pd_lib.o %t/pd_main.o -o %t/pd.a.out && \ +// RUN: %t/pd.a.out | FileCheck %s ; else \ +// RUN: echo "skipping" ; fi +// clang-format on + +//--- pd_lib.h + +#pragma once + +#include "cudaq.h" + +// NB: The __qpu__ here on this declaration cannot be omitted! +__qpu__ void callMe(cudaq::qvector<> &q, int i); + +//--- pd_lib.cpp + +#include "pd_lib.h" + +void send_bat_signal() { std::cout << "na na na na na ... BATMAN!\n"; } + +__qpu__ void callMe(cudaq::qvector<> &q, int i) { + ry(2.2, q[0]); + send_bat_signal(); +} + +//--- pd_main.cpp + +#include "pd_lib.h" + +__qpu__ void entry() { + cudaq::qvector q(2); + callMe(q, 5); +} + +int main() { entry(); } + +// CHECK: na ... BATMAN! diff --git a/targettests/TargetConfig/RegressionValidation/photonics.config b/targettests/TargetConfig/RegressionValidation/photonics.config index 6643d3b7e8..f6c7c23e02 100644 --- a/targettests/TargetConfig/RegressionValidation/photonics.config +++ b/targettests/TargetConfig/RegressionValidation/photonics.config @@ -6,6 +6,9 @@ # the terms of the Apache License 2.0 which accompanies this distribution. # # ============================================================================ # -# RUN: cudaq-target-conf -o %t %cudaq_target_dir/photonics.yml && cat %t | FileCheck %s +# RUN: cudaq-target-conf -o %t %cudaq_target_dir/photonics-cpu.yml && cat %t | FileCheck %s # CHECK-DAG: LIBRARY_MODE_EXECUTION_MANAGER=photonics +# CHECK-DAG: NVQIR_SIMULATION_BACKEND="photonics" +# CHECK-DAG: LINKLIBS="${LINKLIBS} -lcudaq-em-photonics" +# CHECK-DAG: PREPROCESSOR_DEFINES="${PREPROCESSOR_DEFINES} -D CUDAQ_SIMULATION_SCALAR_FP64" diff --git a/targettests/execution/cudaq_observe.cpp b/targettests/execution/cudaq_observe.cpp index dcba1964d2..daa10c1425 100644 --- a/targettests/execution/cudaq_observe.cpp +++ b/targettests/execution/cudaq_observe.cpp @@ -8,7 +8,6 @@ // REQUIRES: c++20 // clang-format off -// RUN: nvq++ --target anyon --emulate %s -o %t && %t | FileCheck %s // RUN: nvq++ --target ionq --emulate %s -o %t && %t | FileCheck %s // 2 different IQM machines for 2 different topologies // RUN: nvq++ --target iqm --iqm-machine Adonis --emulate %s -o %t && %t | FileCheck %s @@ -49,7 +48,7 @@ int main() { return 0; } -// Note: seeds 2 and 12 will push this to -2 instead of -1. All all other -// seeds in 1-100 range will be -1.x. +// Note: seeds 2 and 12 will push this to -2 instead of -1. All other seeds in +// 1-100 range will be -1.x. // CHECK: Energy is -1. diff --git a/targettests/execution/custom_operation_basic.cpp b/targettests/execution/custom_operation_basic.cpp index bb3b58e02f..77c8020a3e 100644 --- a/targettests/execution/custom_operation_basic.cpp +++ b/targettests/execution/custom_operation_basic.cpp @@ -7,7 +7,11 @@ ******************************************************************************/ // RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std --target quantinuum --emulate %s -o %t && %t %s 2>&1 | FileCheck %s -check-prefix=FAIL +// RUN: nvq++ %cpp_std --target anyon --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target ionq --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target iqm --emulate --iqm-machine Apollo %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target oqc --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target quantinuum --emulate %s -o %t && %t | FileCheck %s #include @@ -33,4 +37,5 @@ int main() { // CHECK: 11 // CHECK: 00 -// FAIL: failed to legalize operation 'quake.custom_op' +// CHECK-NOT: 01 +// CHECK-NOT: 10 diff --git a/targettests/execution/custom_operation_toffoli.cpp b/targettests/execution/custom_operation_toffoli.cpp new file mode 100644 index 0000000000..869a3ce3aa --- /dev/null +++ b/targettests/execution/custom_operation_toffoli.cpp @@ -0,0 +1,35 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target quantinuum --emulate %s -o %t && %t %s 2>&1 | FileCheck %s -check-prefix=FAIL + +#include + +CUDAQ_REGISTER_OPERATION(toffoli, 3, 0, + {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0}) + +__qpu__ void kernel() { + cudaq::qvector q(3); + x(q); + toffoli(q[0], q[1], q[2]); +} + +int main() { + auto counts = cudaq::sample(kernel); + for (auto &[bits, count] : counts) { + printf("%s\n", bits.data()); + } +} + +// CHECK: 110 + +// FAIL: failed to legalize operation 'quake.custom_op' diff --git a/targettests/execution/exp_pauli.cpp b/targettests/execution/exp_pauli.cpp new file mode 100644 index 0000000000..bf7ed5bac1 --- /dev/null +++ b/targettests/execution/exp_pauli.cpp @@ -0,0 +1,62 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// clang-format off +// Simulators +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --enable-mlir --target remote-mqpu -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// +// Quantum emulators +// RUN: nvq++ %cpp_std --target quantinuum --emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target ionq --emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// 2 different IQM machines for 2 different topologies +// RUN: nvq++ %cpp_std --target iqm --iqm-machine Adonis --emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target iqm --iqm-machine Apollo --emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target oqc --emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target anyon --emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// clang-format on + +#include +#include + +__qpu__ void test() { + cudaq::qvector q(2); + cudaq::exp_pauli(1.0, q, "XX"); +} + +__qpu__ void test_param(cudaq::pauli_word w) { + cudaq::qvector q(2); + cudaq::exp_pauli(1.0, q, w); +} + +void printCounts(cudaq::sample_result& result) { + std::vector values{}; + for (auto &&[bits, counts] : result) { + values.push_back(bits); + } + + std::sort(values.begin(), values.end()); + for (auto &&bits : values) { + std::cout << bits << '\n'; + } +} + +int main() { + auto counts = cudaq::sample(test); + printCounts(counts); + + counts = cudaq::sample(test_param, cudaq::pauli_word{"XY"}); + printCounts(counts); + return 0; +} + +// CHECK: 00 +// CHECK: 11 + +// CHECK: 00 +// CHECK: 11 diff --git a/targettests/execution/qspan_slices.cpp b/targettests/execution/qspan_slices.cpp index 3086d98f9d..ca5ecce508 100644 --- a/targettests/execution/qspan_slices.cpp +++ b/targettests/execution/qspan_slices.cpp @@ -8,11 +8,12 @@ // REQUIRES: c++20 // clang-format off -// RUN: nvq++ --target anyon --emulate %s -o %t && %t | FileCheck %s -// RUN: nvq++ --target ionq --emulate %s -o %t && %t | FileCheck %s -// RUN: nvq++ --target iqm --iqm-machine Adonis --emulate %s -o %t && %t | FileCheck %s -// RUN: nvq++ --target oqc --emulate %s -o %t && %t | FileCheck %s -// RUN: nvq++ --target quantinuum --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ --target anyon --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ --target anyon --anyon-machine berkeley-25q --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ --target ionq --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ --target iqm --iqm-machine Adonis --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ --target oqc --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ --target quantinuum --emulate %s -o %t && %t | FileCheck %s // Tests for --disable-qubit-mapping: // RUN: nvq++ -v %s -o %t --target oqc --emulate --disable-qubit-mapping && CUDAQ_MLIR_PRINT_EACH_PASS=1 %t |& FileCheck --check-prefix=DISABLE %s // RUN: nvq++ -v %s -o %t --target iqm --iqm-machine Adonis --emulate --disable-qubit-mapping && CUDAQ_MLIR_PRINT_EACH_PASS=1 %t |& FileCheck --check-prefix=DISABLE %s diff --git a/targettests/execution/quantum_struct.cpp b/targettests/execution/quantum_struct.cpp new file mode 100644 index 0000000000..929a2cd615 --- /dev/null +++ b/targettests/execution/quantum_struct.cpp @@ -0,0 +1,70 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// clang-format off +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// clang-format on + +#include +#include + +struct PureQuantumStruct { + cudaq::qview<> view1; + cudaq::qview<> view2; +}; + +struct Fehu { + void operator()(cudaq::qview<> v) __qpu__ { h(v); } +}; + +struct Ansuz { + void operator()(cudaq::qview<> v) __qpu__ { x(v); } +}; + +struct Uruz { + void operator()(PureQuantumStruct group) __qpu__ { + Ansuz{}(group.view1); + Fehu{}(group.view1); + Fehu{}(group.view2); + Ansuz{}(group.view2); + } +}; + +struct Thurisaz { + void operator()() __qpu__ { + cudaq::qvector v1(2); + cudaq::qvector v2(3); + PureQuantumStruct pqs{v1, v2}; + Uruz{}(pqs); + mz(v1); + mz(v2); + } +}; + +int main() { + auto result = cudaq::sample(Thurisaz{}); + int flags[1 << 5] = {0}; + for (auto &&[b, c] : result) { + int off = std::stoi(b, nullptr, 2); + if (off >= (1 << 5) || off < 0) { + std::cout << "Amazingly incorrect: " << b << '\n'; + return 1; + } + flags[off] = 1 + c; + } + for (int i = 0; i < (1 << 5); ++i) { + if (flags[i] == 0) { + std::cout << "FAILED!\n"; + return 1; + } + } + std::cout << "Wahoo!\n"; + return 0; +} + +// CHECK: Wahoo diff --git a/targettests/execution/qubit_management_bug_lifting_ifs.cpp b/targettests/execution/qubit_management_bug_lifting_ifs.cpp new file mode 100644 index 0000000000..9685950e62 --- /dev/null +++ b/targettests/execution/qubit_management_bug_lifting_ifs.cpp @@ -0,0 +1,41 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include + +// RUN: nvq++ --target opt-test --target-option dep-analysis,qpp %s -o %t && %t + +struct run_test { + __qpu__ auto operator()() { + cudaq::qubit q; + + bool b = false; + + // Ifs shouldn't be lifted at all, as it violates assumptions of + // the algorithmic logic + if (true) { + if (true) { + x(q); + b = true; + } + } else { + if (true) { + y(q); + b = true; + } + } + + return b; + } +}; + +int main() { + bool result = run_test{}(); + printf("Result = %b\n", result); + return 0; +} diff --git a/targettests/execution/qubit_management_bug_qids-2.cpp b/targettests/execution/qubit_management_bug_qids-2.cpp new file mode 100644 index 0000000000..afd1058f54 --- /dev/null +++ b/targettests/execution/qubit_management_bug_qids-2.cpp @@ -0,0 +1,43 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include + +// RUN: nvq++ --target opt-test --target-option dep-analysis,qpp %s -o %t && %t + +struct run_test { + __qpu__ auto operator()() { + cudaq::qubit q,p,r; + + h(r); + + // Ensures that updates to qids get correctly propagated to sub-blocks + if (true) { + x(q); + x(r); + x(q,p); + } else { + if (true) + x(p); + else + x(p); + y(q); + x(q,r); + } + + bool b = mz(r); + + return b; + } +}; + +int main() { + bool result = run_test{}(); + printf("Result = %b\n", result); + return 0; +} diff --git a/targettests/execution/qubit_management_bug_qids.cpp b/targettests/execution/qubit_management_bug_qids.cpp new file mode 100644 index 0000000000..a5436bf6f3 --- /dev/null +++ b/targettests/execution/qubit_management_bug_qids.cpp @@ -0,0 +1,43 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include + +// RUN: nvq++ --target opt-test --target-option dep-analysis,qpp %s -o %t && %t + +struct run_test { + __qpu__ auto operator()() { + cudaq::qubit q,p,r; + + h(r); + + // q will be duplicated in the then and else branches, + // but then mapped to two different qubits. + // This test ensures that this case is handled properly, + // with a fresh qid being generated for q when it is split. + if (true) { + x(p); + y(q); + x(q,r); + } else { + y(q); + x(r); + x(q,p); + } + + bool b = mz(r); + + return b; + } +}; + +int main() { + bool result = run_test{}(); + printf("Result = %b\n", result); + return 0; +} diff --git a/targettests/execution/qubit_management_if-1.cpp b/targettests/execution/qubit_management_if-1.cpp new file mode 100644 index 0000000000..df83599c18 --- /dev/null +++ b/targettests/execution/qubit_management_if-1.cpp @@ -0,0 +1,35 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include + +// TODO: filecheck with statistics + +// RUN: nvq++ --target opt-test --target-option dep-analysis,qpp %s -o %t && %t + +// Simple test, shouldn't affect anything +struct run_test { + __qpu__ auto operator()() { + cudaq::qubit q; + + if (true) + x(q); + else + y(q); + + bool b = mz(q); + + return b; + } +}; + +int main() { + bool result = run_test{}(); + printf("Result = %b\n", result); + return 0; +} diff --git a/targettests/execution/qubit_management_if-2.cpp b/targettests/execution/qubit_management_if-2.cpp new file mode 100644 index 0000000000..b9824b1c46 --- /dev/null +++ b/targettests/execution/qubit_management_if-2.cpp @@ -0,0 +1,43 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include + +// RUN: nvq++ --target opt-test --target-option dep-analysis,qpp %s -o %t && %t + +struct run_test { + __qpu__ auto operator()() { + cudaq::qubit q; + + bool res; + + h(q); + bool b = mz(q); + + // Should be able to lift x(p/r) + if (b) { + cudaq::qubit p; + x(p); + y(p); + res = mz(p); + } else { + cudaq::qubit r; + x(r); + z(r); + res = mz(r); + } + + return res; + } +}; + +int main() { + bool result = run_test{}(); + printf("Result = %b\n", result); + return 0; +} diff --git a/targettests/execution/qubit_management_if-3.cpp b/targettests/execution/qubit_management_if-3.cpp new file mode 100644 index 0000000000..4af00e2b27 --- /dev/null +++ b/targettests/execution/qubit_management_if-3.cpp @@ -0,0 +1,37 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include + +// RUN: nvq++ --target opt-test --target-option dep-analysis,qpp %s -o %t && %t + +struct run_test { + __qpu__ auto operator()() { + cudaq::qubit q; + + h(q); + bool b = mz(q); + // Should be able to lift x(q) after + if (b) { + y(q); + x(q); + } else { + h(q); + x(q); + } + + bool res = mz(q); + return res; + } +}; + +int main() { + bool result = run_test{}(); + printf("Result = %b\n", result); + return 0; +} diff --git a/targettests/execution/qubit_management_if-4.cpp b/targettests/execution/qubit_management_if-4.cpp new file mode 100644 index 0000000000..973a68a412 --- /dev/null +++ b/targettests/execution/qubit_management_if-4.cpp @@ -0,0 +1,37 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include + +// RUN: nvq++ --target opt-test --target-option dep-analysis,qpp %s -o %t && %t + +struct run_test { + __qpu__ auto operator()() { + cudaq::qubit q; + + bool res; + // Should be able to lift mz(q) before + if (true) { + x(q); + y(q); + res = true; + } else { + x(q); + y(q); + res = false; + } + + return res; + } +}; + +int main() { + bool result = run_test{}(); + printf("Result = %b\n", result); + return 0; +} diff --git a/targettests/execution/qubit_management_if-5.cpp b/targettests/execution/qubit_management_if-5.cpp new file mode 100644 index 0000000000..213f740231 --- /dev/null +++ b/targettests/execution/qubit_management_if-5.cpp @@ -0,0 +1,41 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include + +// RUN: nvq++ --target opt-test --target-option dep-analysis,qpp %s -o %t && %t + +struct run_test { + __qpu__ auto operator()() { + cudaq::qubit q; + + bool res; + // Should not lift rx(i,p) + if (true) { + cudaq::qubit p; + x(q); + res = mz(q); + auto i = (float)res; + rx(i, p); + } else { + cudaq::qubit p; + y(q); + res = mz(q); + auto i = (float)res; + rx(i, p); + } + + return res; + } +}; + +int main() { + bool result = run_test{}(); + printf("Result = %b\n", result); + return 0; +} diff --git a/targettests/execution/qubit_management_if_classical.cpp b/targettests/execution/qubit_management_if_classical.cpp new file mode 100644 index 0000000000..15296ad5a0 --- /dev/null +++ b/targettests/execution/qubit_management_if_classical.cpp @@ -0,0 +1,34 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include + +// RUN: nvq++ --target opt-test --target-option dep-analysis,qpp %s -o %t && %t + +struct run_test { + __qpu__ auto operator()() { + cudaq::qubit p; + + if (true) { + rx(1., p); + z(p); + rx(1., p); + } else { + rx(1., p); + y(p); + } + auto res = mz(p); + return res; + } +}; + +int main() { + bool result = run_test{}(); + printf("Result = %b\n", result); + return 0; +} diff --git a/targettests/execution/state_init_err_runtime.cpp b/targettests/execution/state_init_err_runtime.cpp index f4c4d5fdc1..6632035f33 100644 --- a/targettests/execution/state_init_err_runtime.cpp +++ b/targettests/execution/state_init_err_runtime.cpp @@ -7,23 +7,19 @@ ******************************************************************************/ // clang-format off -// Note: change |& to 2>&1| if running in bash -// RUN: nvq++ %cpp_std %s -o %t --target quantinuum --emulate && %t |& FileCheck %s -// Note: change |& to 2>&1| if running in bash -// RUN: nvq++ %cpp_std --enable-mlir --target remote-mqpu --remote-mqpu-url localhost:9999 %s -o %t && %t |& FileCheck %s +// RUN: nvq++ %cpp_std -target quantinuum -emulate -fkernel-exec-kind=1 %s -o %t&& %t |& FileCheck %s +// RUN: nvq++ %cpp_std --enable-mlir -target remote-mqpu --remote-mqpu-url localhost:9999 -fkernel-exec-kind=1 %s -o %t && %t |& FileCheck %s // clang-format on #include #include -__qpu__ void test(cudaq::state *inState) { - cudaq::qvector q(inState); -} +__qpu__ void test(cudaq::state *inState) { cudaq::qvector q(inState); } int main() { std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; auto state = cudaq::state::from_data(vec); - { + { auto counts = cudaq::sample(test, &state); counts.dump(); printf("size %zu\n", counts.size()); @@ -31,4 +27,6 @@ int main() { return 0; } +// clang-format off // CHECK: error: 'func.func' op synthesis: unsupported argument type for remote devices and simulators: state* +// clang-format on diff --git a/targettests/execution/state_preparation_vector.cpp b/targettests/execution/state_preparation_vector.cpp index 8bce594ee6..e95b068518 100644 --- a/targettests/execution/state_preparation_vector.cpp +++ b/targettests/execution/state_preparation_vector.cpp @@ -7,54 +7,58 @@ ******************************************************************************/ // Simulators -// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s // Quantum emulators -// RUN: nvq++ %cpp_std --target quantinuum --emulate %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std --target ionq --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std -target quantinuum -emulate %s -o %t && \ +// RUN: %t | FileCheck %s +// RUN: nvq++ %cpp_std -target ionq -emulate %s -o %t && \ +// RUN: %t | FileCheck %s +// RUN: nvq++ %cpp_std -target oqc -emulate %s -o %t && \ +// RUN: %t | FileCheck %s + // 2 different IQM machines for 2 different topologies -// RUN: nvq++ %cpp_std --target iqm --iqm-machine Adonis --emulate %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std --target iqm --iqm-machine Apollo --emulate %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std --target oqc --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std -target iqm --iqm-machine Adonis -emulate %s -o %t && \ +// RUN: %t | FileCheck %s +// RUN: nvq++ %cpp_std -target iqm --iqm-machine Apollo -emulate %s -o %t && \ +// RUN: %t | FileCheck %s #include #include -__qpu__ float test_const_prop_cast() { - return M_SQRT1_2; -} +__qpu__ float test_const_prop_cast() { return M_SQRT1_2; } __qpu__ void test_const_prop_cast_caller() { auto c = test_const_prop_cast(); - cudaq::qvector v(std::vector({ c, c, 0., 0.})); + cudaq::qvector v(std::vector({c, c, 0., 0.})); } __qpu__ void test_complex_constant_array() { - cudaq::qvector v(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); + cudaq::qvector v(std::vector({M_SQRT1_2, M_SQRT1_2, 0., 0.})); } #ifdef CUDAQ_SIMULATION_SCALAR_FP32 __qpu__ void test_complex_constant_array_floating_point() { - cudaq::qvector v(std::vector>({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); + cudaq::qvector v( + std::vector>({M_SQRT1_2, M_SQRT1_2, 0., 0.})); } #else __qpu__ void test_complex_constant_array_floating_point() { - cudaq::qvector v(std::vector>({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); + cudaq::qvector v( + std::vector>({M_SQRT1_2, M_SQRT1_2, 0., 0.})); } #endif __qpu__ void test_complex_constant_array2() { - cudaq::qvector v1(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); - cudaq::qvector v2(std::vector({ 0., 0., M_SQRT1_2, M_SQRT1_2})); + cudaq::qvector v1( + std::vector({M_SQRT1_2, M_SQRT1_2, 0., 0.})); + cudaq::qvector v2( + std::vector({0., 0., M_SQRT1_2, M_SQRT1_2})); } __qpu__ void test_complex_constant_array3() { - cudaq::qvector v({ - cudaq::complex(M_SQRT1_2), - cudaq::complex(M_SQRT1_2), - cudaq::complex(0.0), - cudaq::complex(0.0) - }); + cudaq::qvector v({cudaq::complex(M_SQRT1_2), cudaq::complex(M_SQRT1_2), + cudaq::complex(0.0), cudaq::complex(0.0)}); } __qpu__ void test_complex_array_param(std::vector inState) { @@ -62,26 +66,28 @@ __qpu__ void test_complex_array_param(std::vector inState) { } #ifdef CUDAQ_SIMULATION_SCALAR_FP32 -__qpu__ void test_complex_array_param_floating_point(std::vector> inState) { +__qpu__ void test_complex_array_param_floating_point( + std::vector> inState) { cudaq::qvector q1 = inState; } #else -__qpu__ void test_complex_array_param_floating_point(std::vector> inState) { +__qpu__ void test_complex_array_param_floating_point( + std::vector> inState) { cudaq::qvector q1 = inState; } #endif __qpu__ void test_real_constant_array() { - cudaq::qvector v({ M_SQRT1_2, M_SQRT1_2, 0., 0.}); + cudaq::qvector v({M_SQRT1_2, M_SQRT1_2, 0., 0.}); } #ifdef CUDAQ_SIMULATION_SCALAR_FP32 __qpu__ void test_real_constant_array_floating_point() { - cudaq::qvector v(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); + cudaq::qvector v(std::vector({M_SQRT1_2, M_SQRT1_2, 0., 0.})); } #else __qpu__ void test_real_constant_array_floating_point() { - cudaq::qvector v(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); + cudaq::qvector v(std::vector({M_SQRT1_2, M_SQRT1_2, 0., 0.})); } #endif @@ -99,7 +105,7 @@ __qpu__ void test_real_array_param_floating_point(std::vector inState) { } #endif -void printCounts(cudaq::sample_result& result) { +void printCounts(cudaq::sample_result &result) { std::vector values{}; for (auto &&[bits, counts] : result) { values.push_back(bits); @@ -117,107 +123,107 @@ int main() { printCounts(counts); } -// CHECK: 00 -// CHECK: 10 + // CHECK: 00 + // CHECK: 10 { auto counts = cudaq::sample(test_complex_constant_array); printCounts(counts); } -// CHECK: 00 -// CHECK: 10 + // CHECK: 00 + // CHECK: 10 { auto counts = cudaq::sample(test_complex_constant_array_floating_point); printCounts(counts); } -// CHECK: 00 -// CHECK: 10 + // CHECK: 00 + // CHECK: 10 { auto counts = cudaq::sample(test_complex_constant_array2); printCounts(counts); } -// CHECK: 0001 -// CHECK: 0011 -// CHECK: 1001 -// CHECK: 1011 + // CHECK: 0001 + // CHECK: 0011 + // CHECK: 1001 + // CHECK: 1011 { auto counts = cudaq::sample(test_complex_constant_array3); printCounts(counts); } -// CHECK: 00 -// CHECK: 10 + // CHECK: 00 + // CHECK: 10 { auto counts = cudaq::sample(test_real_constant_array); printCounts(counts); } -// CHECK: 00 -// CHECK: 10 + // CHECK: 00 + // CHECK: 10 { auto counts = cudaq::sample(test_real_constant_array_floating_point); printCounts(counts); } -// CHECK: 00 -// CHECK: 10 + // CHECK: 00 + // CHECK: 10 { std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; { - // Passing state data as argument (kernel mode) - auto counts = cudaq::sample(test_complex_array_param, vec); - printCounts(counts); + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_complex_array_param, vec); + printCounts(counts); -// CHECK: 00 -// CHECK: 10 + // CHECK: 00 + // CHECK: 10 - counts = cudaq::sample(test_complex_array_param, vec1); - printCounts(counts); + counts = cudaq::sample(test_complex_array_param, vec1); + printCounts(counts); -// CHECK: 01 -// CHECK: 11 + // CHECK: 01 + // CHECK: 11 } { - // Passing state data as argument (kernel mode) - auto counts = cudaq::sample(test_complex_array_param_floating_point, vec); - printCounts(counts); + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_complex_array_param_floating_point, vec); + printCounts(counts); -// CHECK: 00 -// CHECK: 10 + // CHECK: 00 + // CHECK: 10 - counts = cudaq::sample(test_complex_array_param_floating_point, vec1); - printCounts(counts); + counts = cudaq::sample(test_complex_array_param_floating_point, vec1); + printCounts(counts); -// CHECK: 01 -// CHECK: 11 + // CHECK: 01 + // CHECK: 11 } { - // Passing state data as argument (builder mode) - auto [kernel, v] = cudaq::make_kernel>(); - auto qubits = kernel.qalloc(v); + // Passing state data as argument (builder mode) + auto [kernel, v] = cudaq::make_kernel>(); + auto qubits = kernel.qalloc(v); - auto counts = cudaq::sample(kernel, vec); - printCounts(counts); + auto counts = cudaq::sample(kernel, vec); + printCounts(counts); -// CHECK: 00 -// CHECK: 10 + // CHECK: 00 + // CHECK: 10 - counts = cudaq::sample(kernel, vec1); - printCounts(counts); + counts = cudaq::sample(kernel, vec1); + printCounts(counts); -// CHECK: 01 -// CHECK: 11 + // CHECK: 01 + // CHECK: 11 } } @@ -225,50 +231,50 @@ int main() { std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0.}; std::vector vec1{0., 0., M_SQRT1_2, M_SQRT1_2}; { - // Passing state data as argument (kernel mode) - auto counts = cudaq::sample(test_real_array_param, vec); - printCounts(counts); + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_real_array_param, vec); + printCounts(counts); -// CHECK: 00 -// CHECK: 10 + // CHECK: 00 + // CHECK: 10 - counts = cudaq::sample(test_real_array_param, vec1); - printCounts(counts); + counts = cudaq::sample(test_real_array_param, vec1); + printCounts(counts); -// CHECK: 01 -// CHECK: 11 + // CHECK: 01 + // CHECK: 11 } { - // Passing state data as argument (kernel mode) - auto counts = cudaq::sample(test_real_array_param_floating_point, vec); - printCounts(counts); + // Passing state data as argument (kernel mode) + auto counts = cudaq::sample(test_real_array_param_floating_point, vec); + printCounts(counts); -// CHECK: 00 -// CHECK: 10 + // CHECK: 00 + // CHECK: 10 - counts = cudaq::sample(test_real_array_param_floating_point, vec1); - printCounts(counts); + counts = cudaq::sample(test_real_array_param_floating_point, vec1); + printCounts(counts); -// CHECK: 01 -// CHECK: 11 + // CHECK: 01 + // CHECK: 11 } { - // Passing state data as argument (builder mode) - auto [kernel, v] = cudaq::make_kernel>(); - auto qubits = kernel.qalloc(v); + // Passing state data as argument (builder mode) + auto [kernel, v] = cudaq::make_kernel>(); + auto qubits = kernel.qalloc(v); - auto counts = cudaq::sample(kernel, vec); - printCounts(counts); + auto counts = cudaq::sample(kernel, vec); + printCounts(counts); -// CHECK: 00 -// CHECK: 10 + // CHECK: 00 + // CHECK: 10 - counts = cudaq::sample(kernel, vec1); - printCounts(counts); + counts = cudaq::sample(kernel, vec1); + printCounts(counts); -// CHECK: 01 -// CHECK: 11 + // CHECK: 01 + // CHECK: 11 } } } diff --git a/test/AST-Quake/base_profile.cpp b/test/AST-Quake/base_profile-0.cpp similarity index 72% rename from test/AST-Quake/base_profile.cpp rename to test/AST-Quake/base_profile-0.cpp index d5e177132e..d0c55bce9f 100644 --- a/test/AST-Quake/base_profile.cpp +++ b/test/AST-Quake/base_profile-0.cpp @@ -7,31 +7,34 @@ ******************************************************************************/ // REQUIRES: c++20 -// RUN: cudaq-quake %s | cudaq-opt --lower-to-cfg | cudaq-translate --convert-to=qir-base -o - | FileCheck %s +// clang-format off +// RUN: cudaq-quake %s | cudaq-opt --add-dealloc --lower-to-cfg | cudaq-translate --convert-to=qir-base -o - | FileCheck %s +// clang-format on #include struct kernel { - void operator()() __qpu__ { - cudaq::qarray<3> q; - h(q[1]); - x(q[1],q[2]); + void operator()() __qpu__ { + cudaq::qarray<3> q; + h(q[1]); + x(q[1], q[2]); - x(q[0], q[1]); - h(q[0]); + x(q[0], q[1]); + h(q[0]); - // This scope block is intentionally blank and is used for robustness testing. - {} + // This scope block is intentionally blank and is used for robustness + // testing. + {} - auto b0 = mz(q[0]); - auto b1 = mz(q[1]); - } + auto b0 = mz(q[0]); + auto b1 = mz(q[1]); + } }; +// clang-format off // CHECK-LABEL: define void @__nvqpp__mlirgen__kernel() // CHECK: tail call void @__quantum__qis__mz__body(%{{.*}}* null, %{{.*}}* null) // CHECK: tail call void @__quantum__qis__mz__body(%{{.*}}* nonnull inttoptr (i64 1 to %{{.*}}*), %{{.*}}* nonnull inttoptr (i64 1 to %{{.*}}*)) // CHECK: tail call void @__quantum__rt__result_record_output(%{{.*}}* null, i8* nonnull getelementptr inbounds ([3 x i8], [3 x i8]* @cstr.623000, i64 0, i64 0)) // CHECK: tail call void @__quantum__rt__result_record_output(%{{.*}}* nonnull inttoptr (i64 1 to %{{.*}}*), i8* nonnull getelementptr inbounds ([3 x i8], [3 x i8]* @cstr.623100, i64 0, i64 0)) - - +// clang-format on diff --git a/test/AST-Quake/base_profile-1.cpp b/test/AST-Quake/base_profile-1.cpp new file mode 100644 index 0000000000..e33637e9ff --- /dev/null +++ b/test/AST-Quake/base_profile-1.cpp @@ -0,0 +1,361 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// REQUIRES: c++20 +// clang-format off +// RUN: cudaq-quake %s | cudaq-opt --add-dealloc --expand-measurements --factor-quantum-alloc --expand-control-veqs --cc-loop-unroll --canonicalize --multicontrol-decomposition --lower-to-cfg --cse --decomposition=enable-patterns="CCXToCCZ,CCZToCX" --combine-quantum-alloc --canonicalize | cudaq-translate --convert-to=qir-base | FileCheck --check-prefix=BASE %s +// RUN: cudaq-quake %s | cudaq-opt --add-dealloc --expand-measurements --factor-quantum-alloc --expand-control-veqs --cc-loop-unroll --canonicalize --multicontrol-decomposition --lower-to-cfg --cse --decomposition=enable-patterns="CCXToCCZ,CCZToCX" --combine-quantum-alloc --canonicalize | cudaq-translate --convert-to=qir-adaptive | FileCheck --check-prefix=ADAPT %s +// RUN: cudaq-quake -D FULL %s | cudaq-opt --add-dealloc --expand-measurements --factor-quantum-alloc --expand-control-veqs --cc-loop-unroll --canonicalize --multicontrol-decomposition --lower-to-cfg --cse --combine-quantum-alloc --canonicalize | cudaq-translate --convert-to=qir | FileCheck --check-prefix=FULL %s +// clang-format on + +#include + +#ifdef FULL +#define T(X) X +#else +#define T(X) +#endif + +// This test is meant to exercise more of the NVQIR calls. The ctrl variants are +// commented out because they don't work with qir-base or qir-adaptive. +struct comprehensive { + void operator()() __qpu__ { + cudaq::qubit q0; + cudaq::qarray<1> q1; + cudaq::qarray<2> q2; + cudaq::qarray<3> q3; + + // The basic one target gates. + h(q0); + h(q3); + T(h(q2[0], q2[1], q0)); + x(q0); + x(q3); + T(x(!q2[1], q3[2], q1[0])); + cnot(q2[0], q3[0]); + y(q0); + y(q3); + T(y(q3, q1[0])); + z(q0); + z(q3); + T(z(!q2[1], q3[2], q1[0])); + t(q0); + t(q3); + T(t(!q2[1], q3[2], q1[0])); + t(q3[1]); + s(q0); + s(q3); + T(s(!q2[1], q3[2], q1[0])); + s(q0); + + // Basic rotations. + const double p = 5.6123; + rx(p, q0); + rx(p, q3[1]); + T(rx(p, !q2[1], q3[2], q1[0])); + rx(p, q0); + + ry(p + 1, q0); + ry(p + 1, q3[0]); + T(ry(p + 1, q2[1], q3[0], q1[0])); + ry(p + 1, q0); + + rz(p + 2, q0); + rz(p + 3, q3[1]); + T(rz(p + 4, !q2[1], q3[2], q1[0])); + rz(p + 5, q0); + + r1(p - 1, q0); + r1(p - 2, q3[2]); + T(r1(p - 3, q3[1], q3[2], q1[0])); + r1(p - 4, q0); + + // Misc. gates. + swap(q0, q3[2]); + T(swap(q0, q1[0], q2[1])); + + u3(0.8, 0.5, -1.0, q2[1]); + T(u3(6.2, -3.1, 7.8, q3[1], q3[0], q0)); + + auto singleton = mz(q0); + auto eins = mz(q1); + auto dub = mz(q2); + auto trip = mz(q3); + } +}; + +// clang-format off + +// BASE-LABEL: define void @__nvqpp__mlirgen__comprehensive() +// BASE: tail call void @__quantum__qis__h__body(%Qubit* null) +// BASE: tail call void @__quantum__qis__h__body(%Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// BASE: tail call void @__quantum__qis__h__body(%Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// BASE: tail call void @__quantum__qis__h__body(%Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// BASE: tail call void @__quantum__qis__x__body(%Qubit* null) +// BASE: tail call void @__quantum__qis__x__body(%Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// BASE: tail call void @__quantum__qis__x__body(%Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// BASE: tail call void @__quantum__qis__x__body(%Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// BASE: tail call void @__quantum__qis__cnot__body(%Qubit* nonnull inttoptr (i64 2 to %Qubit*), %Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// BASE: tail call void @__quantum__qis__y__body(%Qubit* null) +// BASE: tail call void @__quantum__qis__y__body(%Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// BASE: tail call void @__quantum__qis__y__body(%Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// BASE: tail call void @__quantum__qis__y__body(%Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// BASE: tail call void @__quantum__qis__z__body(%Qubit* null) +// BASE: tail call void @__quantum__qis__z__body(%Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// BASE: tail call void @__quantum__qis__z__body(%Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// BASE: tail call void @__quantum__qis__z__body(%Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// BASE: tail call void @__quantum__qis__t__body(%Qubit* null) +// BASE: tail call void @__quantum__qis__t__body(%Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// BASE: tail call void @__quantum__qis__t__body(%Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// BASE: tail call void @__quantum__qis__t__body(%Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// BASE: tail call void @__quantum__qis__t__adj(%Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// BASE: tail call void @__quantum__qis__s__body(%Qubit* null) +// BASE: tail call void @__quantum__qis__s__body(%Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// BASE: tail call void @__quantum__qis__s__body(%Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// BASE: tail call void @__quantum__qis__s__body(%Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// BASE: tail call void @__quantum__qis__s__adj(%Qubit* null) +// BASE: tail call void @__quantum__qis__rx__body(double 5.612300e+00, %Qubit* null) +// BASE: tail call void @__quantum__qis__rx__body(double 5.612300e+00, %Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// BASE: tail call void @__quantum__qis__rx__body(double -5.612300e+00, %Qubit* null) +// BASE: tail call void @__quantum__qis__ry__body(double 6.612300e+00, %Qubit* null) +// BASE: tail call void @__quantum__qis__ry__body(double 6.612300e+00, %Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// BASE: tail call void @__quantum__qis__ry__body(double -6.612300e+00, %Qubit* null) +// BASE: tail call void @__quantum__qis__rz__body(double 7.612300e+00, %Qubit* null) +// BASE: tail call void @__quantum__qis__rz__body(double 0x4021397F62B6AE7E, %Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// BASE: tail call void @__quantum__qis__rz__body(double 0xC025397F62B6AE7E, %Qubit* null) +// BASE: tail call void @__quantum__qis__r1__body(double 4.612300e+00, %Qubit* null) +// BASE: tail call void @__quantum__qis__r1__body(double 0x400CE5FD8ADAB9F6, %Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// BASE: tail call void @__quantum__qis__r1__body(double 0xBFF9CBFB15B573EC, %Qubit* null) +// BASE: tail call void @__quantum__qis__swap__body(%Qubit* null, %Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// BASE: tail call void @__quantum__qis__u3__body(double 8.000000e-01, double 5.000000e-01, double -1.000000e+00, %Qubit* nonnull inttoptr (i64 3 to %Qubit*)) +// BASE: tail call void @__quantum__qis__mz__body(%Qubit* null, %Result* null) +// BASE: tail call void @__quantum__qis__mz__body(%Qubit* nonnull inttoptr (i64 1 to %Qubit*), %Result* nonnull inttoptr (i64 1 to %Result*)) +// BASE: tail call void @__quantum__qis__mz__body(%Qubit* nonnull inttoptr (i64 2 to %Qubit*), %Result* nonnull inttoptr (i64 2 to %Result*)) +// BASE: tail call void @__quantum__qis__mz__body(%Qubit* nonnull inttoptr (i64 3 to %Qubit*), %Result* nonnull inttoptr (i64 3 to %Result*)) +// BASE: tail call void @__quantum__qis__mz__body(%Qubit* nonnull inttoptr (i64 4 to %Qubit*), %Result* nonnull inttoptr (i64 4 to %Result*)) +// BASE: tail call void @__quantum__qis__mz__body(%Qubit* nonnull inttoptr (i64 5 to %Qubit*), %Result* nonnull inttoptr (i64 5 to %Result*)) +// BASE: tail call void @__quantum__qis__mz__body(%Qubit* nonnull inttoptr (i64 6 to %Qubit*), %Result* nonnull inttoptr (i64 6 to %Result*)) +// BASE: tail call void @__quantum__rt__result_record_output(%Result* null, i8* nonnull getelementptr inbounds ([10 x i8], [10 x i8]* @cstr.73696E676C65746F6E00, i64 0, i64 0)) +// BASE: tail call void @__quantum__rt__result_record_output(%Result* nonnull inttoptr (i64 1 to %Result*), i8* nonnull getelementptr inbounds ([5 x i8], [5 x i8]* @cstr.65696E7300, i64 0, i64 0)) +// BASE: tail call void @__quantum__rt__result_record_output(%Result* nonnull inttoptr (i64 2 to %Result*), i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @cstr.64756200, i64 0, i64 0)) +// BASE: tail call void @__quantum__rt__result_record_output(%Result* nonnull inttoptr (i64 3 to %Result*), i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @cstr.64756200, i64 0, i64 0)) +// BASE: tail call void @__quantum__rt__result_record_output(%Result* nonnull inttoptr (i64 4 to %Result*), i8* nonnull getelementptr inbounds ([5 x i8], [5 x i8]* @cstr.7472697000, i64 0, i64 0)) +// BASE: tail call void @__quantum__rt__result_record_output(%Result* nonnull inttoptr (i64 5 to %Result*), i8* nonnull getelementptr inbounds ([5 x i8], [5 x i8]* @cstr.7472697000, i64 0, i64 0)) +// BASE: tail call void @__quantum__rt__result_record_output(%Result* nonnull inttoptr (i64 6 to %Result*), i8* nonnull getelementptr inbounds ([5 x i8], [5 x i8]* @cstr.7472697000, i64 0, i64 0)) +// BASE: ret void +// BASE: } + +// ADAPT-LABEL: define void @__nvqpp__mlirgen__comprehensive() +// ADAPT: tail call void @__quantum__qis__h__body(%Qubit* null) +// ADAPT: tail call void @__quantum__qis__h__body(%Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__h__body(%Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__h__body(%Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__x__body(%Qubit* null) +// ADAPT: tail call void @__quantum__qis__x__body(%Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__x__body(%Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__x__body(%Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__cnot__body(%Qubit* nonnull inttoptr (i64 2 to %Qubit*), %Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__y__body(%Qubit* null) +// ADAPT: tail call void @__quantum__qis__y__body(%Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__y__body(%Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__y__body(%Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__z__body(%Qubit* null) +// ADAPT: tail call void @__quantum__qis__z__body(%Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__z__body(%Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__z__body(%Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__t__body(%Qubit* null) +// ADAPT: tail call void @__quantum__qis__t__body(%Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__t__body(%Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__t__body(%Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__t__adj(%Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__s__body(%Qubit* null) +// ADAPT: tail call void @__quantum__qis__s__body(%Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__s__body(%Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__s__body(%Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__s__adj(%Qubit* null) +// ADAPT: tail call void @__quantum__qis__rx__body(double 5.612300e+00, %Qubit* null) +// ADAPT: tail call void @__quantum__qis__rx__body(double 5.612300e+00, %Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__rx__body(double -5.612300e+00, %Qubit* null) +// ADAPT: tail call void @__quantum__qis__ry__body(double 6.612300e+00, %Qubit* null) +// ADAPT: tail call void @__quantum__qis__ry__body(double 6.612300e+00, %Qubit* nonnull inttoptr (i64 4 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__ry__body(double -6.612300e+00, %Qubit* null) +// ADAPT: tail call void @__quantum__qis__rz__body(double 7.612300e+00, %Qubit* null) +// ADAPT: tail call void @__quantum__qis__rz__body(double 0x4021397F62B6AE7E, %Qubit* nonnull inttoptr (i64 5 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__rz__body(double 0xC025397F62B6AE7E, %Qubit* null) +// ADAPT: tail call void @__quantum__qis__r1__body(double 4.612300e+00, %Qubit* null) +// ADAPT: tail call void @__quantum__qis__r1__body(double 0x400CE5FD8ADAB9F6, %Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__r1__body(double 0xBFF9CBFB15B573EC, %Qubit* null) +// ADAPT: tail call void @__quantum__qis__swap__body(%Qubit* null, %Qubit* nonnull inttoptr (i64 6 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__u3__body(double 8.000000e-01, double 5.000000e-01, double -1.000000e+00, %Qubit* nonnull inttoptr (i64 3 to %Qubit*)) +// ADAPT: tail call void @__quantum__qis__mz__body(%Qubit* null, %Result* null) +// ADAPT: tail call void @__quantum__rt__result_record_output(%Result* null, i8* nonnull getelementptr inbounds ([10 x i8], [10 x i8]* @cstr.73696E676C65746F6E00, i64 0, i64 0)) +// ADAPT: tail call void @__quantum__qis__mz__body(%Qubit* nonnull inttoptr (i64 1 to %Qubit*), %Result* nonnull inttoptr (i64 1 to %Result*)) +// ADAPT: tail call void @__quantum__rt__result_record_output(%Result* nonnull inttoptr (i64 1 to %Result*), i8* nonnull getelementptr inbounds ([5 x i8], [5 x i8]* @cstr.65696E7300, i64 0, i64 0)) +// ADAPT: %[[VAL_2:.*]] = tail call i1 @__quantum__qis__read_result__body(%Result* nonnull inttoptr (i64 1 to %Result*)) +// ADAPT: tail call void @__quantum__qis__mz__body(%Qubit* nonnull inttoptr (i64 2 to %Qubit*), %Result* nonnull inttoptr (i64 2 to %Result*)) +// ADAPT: tail call void @__quantum__rt__result_record_output(%Result* nonnull inttoptr (i64 2 to %Result*), i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @cstr.64756200, i64 0, i64 0)) +// ADAPT: %[[VAL_3:.*]] = tail call i1 @__quantum__qis__read_result__body(%Result* nonnull inttoptr (i64 2 to %Result*)) +// ADAPT: tail call void @__quantum__qis__mz__body(%Qubit* nonnull inttoptr (i64 3 to %Qubit*), %Result* nonnull inttoptr (i64 3 to %Result*)) +// ADAPT: tail call void @__quantum__rt__result_record_output(%Result* nonnull inttoptr (i64 3 to %Result*), i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @cstr.64756200, i64 0, i64 0)) +// ADAPT: %[[VAL_4:.*]] = tail call i1 @__quantum__qis__read_result__body(%Result* nonnull inttoptr (i64 3 to %Result*)) +// ADAPT: tail call void @__quantum__qis__mz__body(%Qubit* nonnull inttoptr (i64 4 to %Qubit*), %Result* nonnull inttoptr (i64 4 to %Result*)) +// ADAPT: tail call void @__quantum__rt__result_record_output(%Result* nonnull inttoptr (i64 4 to %Result*), i8* nonnull getelementptr inbounds ([5 x i8], [5 x i8]* @cstr.7472697000, i64 0, i64 0)) +// ADAPT: %[[VAL_5:.*]] = tail call i1 @__quantum__qis__read_result__body(%Result* nonnull inttoptr (i64 4 to %Result*)) +// ADAPT: tail call void @__quantum__qis__mz__body(%Qubit* nonnull inttoptr (i64 5 to %Qubit*), %Result* nonnull inttoptr (i64 5 to %Result*)) +// ADAPT: tail call void @__quantum__rt__result_record_output(%Result* nonnull inttoptr (i64 5 to %Result*), i8* nonnull getelementptr inbounds ([5 x i8], [5 x i8]* @cstr.7472697000, i64 0, i64 0)) +// ADAPT: %[[VAL_6:.*]] = tail call i1 @__quantum__qis__read_result__body(%Result* nonnull inttoptr (i64 5 to %Result*)) +// ADAPT: tail call void @__quantum__qis__mz__body(%Qubit* nonnull inttoptr (i64 6 to %Qubit*), %Result* nonnull inttoptr (i64 6 to %Result*)) +// ADAPT: tail call void @__quantum__rt__result_record_output(%Result* nonnull inttoptr (i64 6 to %Result*), i8* nonnull getelementptr inbounds ([5 x i8], [5 x i8]* @cstr.7472697000, i64 0, i64 0)) +// ADAPT: %[[VAL_7:.*]] = tail call i1 @__quantum__qis__read_result__body(%Result* nonnull inttoptr (i64 6 to %Result*)) +// ADAPT: ret void +// ADAPT: } + +// FULL-LABEL: define void @__nvqpp__mlirgen__comprehensive() +// FULL: %[[VAL_0:.*]] = alloca i64, align 8 +// FULL: %[[VAL_1:.*]] = alloca i64, align 8 +// FULL: %[[VAL_2:.*]] = alloca i64, align 8 +// FULL: %[[VAL_3:.*]] = alloca i64, align 8 +// FULL: %[[VAL_4:.*]] = alloca i64, align 8 +// FULL: %[[VAL_5:.*]] = alloca i64, align 8 +// FULL: %[[VAL_6:.*]] = tail call %Array* @__quantum__rt__qubit_allocate_array(i64 9) +// FULL: %[[VAL_8:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%Array* %[[VAL_6]], i64 0) +// FULL: %[[VAL_9:.*]] = bitcast i8* %[[VAL_8]] to %Qubit** +// FULL: %[[VAL_11:.*]] = load %Qubit*, %Qubit** %[[VAL_9]], align 8 +// FULL: %[[VAL_12:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%Array* %[[VAL_6]], i64 1) +// FULL: %[[VAL_13:.*]] = bitcast i8* %[[VAL_12]] to %Qubit** +// FULL: %[[VAL_14:.*]] = load %Qubit*, %Qubit** %[[VAL_13]], align 8 +// FULL: %[[VAL_15:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%Array* %[[VAL_6]], i64 2) +// FULL: %[[VAL_16:.*]] = bitcast i8* %[[VAL_15]] to %Qubit** +// FULL: %[[VAL_17:.*]] = load %Qubit*, %Qubit** %[[VAL_16]], align 8 +// FULL: tail call void @__quantum__qis__h(%Qubit* %[[VAL_17]]) +// FULL: %[[VAL_18:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%Array* %[[VAL_6]], i64 6) +// FULL: %[[VAL_19:.*]] = bitcast i8* %[[VAL_18]] to %Qubit** +// FULL: %[[VAL_20:.*]] = load %Qubit*, %Qubit** %[[VAL_19]], align 8 +// FULL: tail call void @__quantum__qis__h(%Qubit* %[[VAL_20]]) +// FULL: %[[VAL_21:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%Array* %[[VAL_6]], i64 7) +// FULL: %[[VAL_22:.*]] = bitcast i8* %[[VAL_21]] to %Qubit** +// FULL: %[[VAL_23:.*]] = load %Qubit*, %Qubit** %[[VAL_22]], align 8 +// FULL: tail call void @__quantum__qis__h(%Qubit* %[[VAL_23]]) +// FULL: %[[VAL_24:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%Array* %[[VAL_6]], i64 8) +// FULL: %[[VAL_25:.*]] = bitcast i8* %[[VAL_24]] to %Qubit** +// FULL: %[[VAL_26:.*]] = load %Qubit*, %Qubit** %[[VAL_25]], align 8 +// FULL: tail call void @__quantum__qis__h(%Qubit* %[[VAL_26]]) +// FULL: %[[VAL_27:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%Array* %[[VAL_6]], i64 4) +// FULL: %[[VAL_28:.*]] = bitcast i8* %[[VAL_27]] to %Qubit** +// FULL: %[[VAL_29:.*]] = load %Qubit*, %Qubit** %[[VAL_28]], align 8 +// FULL: %[[VAL_30:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%Array* %[[VAL_6]], i64 5) +// FULL: %[[VAL_31:.*]] = bitcast i8* %[[VAL_30]] to %Qubit** +// FULL: %[[VAL_32:.*]] = load %Qubit*, %Qubit** %[[VAL_31]], align 8 +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_29]], %Qubit* %[[VAL_32]], %Qubit* %[[VAL_14]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 1, void (%Array*, %Qubit*)* nonnull @__quantum__qis__h__ctl, %Qubit* %[[VAL_14]], %Qubit* %[[VAL_17]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_29]], %Qubit* %[[VAL_32]], %Qubit* %[[VAL_14]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_17]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_20]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_23]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_26]]) +// FULL: %[[VAL_33:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%Array* %[[VAL_6]], i64 3) +// FULL: %[[VAL_34:.*]] = bitcast i8* %[[VAL_33]] to %Qubit** +// FULL: %[[VAL_35:.*]] = load %Qubit*, %Qubit** %[[VAL_34]], align 8 +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_32]], %Qubit* %[[VAL_26]], %Qubit* %[[VAL_35]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 1, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_29]], %Qubit* %[[VAL_20]]) +// FULL: tail call void @__quantum__qis__y(%Qubit* %[[VAL_17]]) +// FULL: tail call void @__quantum__qis__y(%Qubit* %[[VAL_20]]) +// FULL: tail call void @__quantum__qis__y(%Qubit* %[[VAL_23]]) +// FULL: tail call void @__quantum__qis__y(%Qubit* %[[VAL_26]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_20]], %Qubit* %[[VAL_23]], %Qubit* %[[VAL_14]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_26]], %Qubit* %[[VAL_14]], %Qubit* %[[VAL_11]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 1, void (%Array*, %Qubit*)* nonnull @__quantum__qis__y__ctl, %Qubit* %[[VAL_11]], %Qubit* %[[VAL_35]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_26]], %Qubit* %[[VAL_14]], %Qubit* %[[VAL_11]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_20]], %Qubit* %[[VAL_23]], %Qubit* %[[VAL_14]]) +// FULL: tail call void @__quantum__qis__z(%Qubit* %[[VAL_17]]) +// FULL: tail call void @__quantum__qis__z(%Qubit* %[[VAL_20]]) +// FULL: tail call void @__quantum__qis__z(%Qubit* %[[VAL_23]]) +// FULL: tail call void @__quantum__qis__z(%Qubit* %[[VAL_26]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__z__ctl, %Qubit* %[[VAL_32]], %Qubit* %[[VAL_26]], %Qubit* %[[VAL_35]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: tail call void @__quantum__qis__t(%Qubit* %[[VAL_17]]) +// FULL: tail call void @__quantum__qis__t(%Qubit* %[[VAL_20]]) +// FULL: tail call void @__quantum__qis__t(%Qubit* %[[VAL_23]]) +// FULL: tail call void @__quantum__qis__t(%Qubit* %[[VAL_26]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_32]], %Qubit* %[[VAL_26]], %Qubit* %[[VAL_14]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 1, void (%Array*, %Qubit*)* nonnull @__quantum__qis__t__ctl, %Qubit* %[[VAL_14]], %Qubit* %[[VAL_35]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_32]], %Qubit* %[[VAL_26]], %Qubit* %[[VAL_14]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: tail call void @__quantum__qis__t__adj(%Qubit* %[[VAL_23]]) +// FULL: tail call void @__quantum__qis__s(%Qubit* %[[VAL_17]]) +// FULL: tail call void @__quantum__qis__s(%Qubit* %[[VAL_20]]) +// FULL: tail call void @__quantum__qis__s(%Qubit* %[[VAL_23]]) +// FULL: tail call void @__quantum__qis__s(%Qubit* %[[VAL_26]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_32]], %Qubit* %[[VAL_26]], %Qubit* %[[VAL_14]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 1, void (%Array*, %Qubit*)* nonnull @__quantum__qis__s__ctl, %Qubit* %[[VAL_14]], %Qubit* %[[VAL_35]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_32]], %Qubit* %[[VAL_26]], %Qubit* %[[VAL_14]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: tail call void @__quantum__qis__s__adj(%Qubit* %[[VAL_17]]) +// FULL: tail call void @__quantum__qis__rx(double 5.612300e+00, %Qubit* %[[VAL_17]]) +// FULL: tail call void @__quantum__qis__rx(double 5.612300e+00, %Qubit* %[[VAL_23]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: tail call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_32]], %Qubit* %[[VAL_26]], %Qubit* %[[VAL_14]]) +// FULL: tail call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: store i64 0, i64* %[[VAL_5]], align 8 +// FULL: call void (double, i64, i64*, void (double, %Array*, %Qubit*)*, ...) @invokeRotationWithControlQubits(double 5.612300e+00, i64 1, i64* nonnull %[[VAL_5]], void (double, %Array*, %Qubit*)* nonnull @__quantum__qis__rx__ctl, double 5.612300e+00, %Qubit* %[[VAL_14]], %Qubit* %[[VAL_35]]) +// FULL: call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_32]], %Qubit* %[[VAL_26]], %Qubit* %[[VAL_14]]) +// FULL: call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: call void @__quantum__qis__rx(double -5.612300e+00, %Qubit* %[[VAL_17]]) +// FULL: call void @__quantum__qis__ry(double 6.612300e+00, %Qubit* %[[VAL_17]]) +// FULL: call void @__quantum__qis__ry(double 6.612300e+00, %Qubit* %[[VAL_20]]) +// FULL: call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_32]], %Qubit* %[[VAL_20]], %Qubit* %[[VAL_14]]) +// FULL: store i64 0, i64* %[[VAL_4]], align 8 +// FULL: call void (double, i64, i64*, void (double, %Array*, %Qubit*)*, ...) @invokeRotationWithControlQubits(double 6.612300e+00, i64 1, i64* nonnull %[[VAL_4]], void (double, %Array*, %Qubit*)* nonnull @__quantum__qis__ry__ctl, double 6.612300e+00, %Qubit* %[[VAL_14]], %Qubit* %[[VAL_35]]) +// FULL: call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_32]], %Qubit* %[[VAL_20]], %Qubit* %[[VAL_14]]) +// FULL: call void @__quantum__qis__ry(double -6.612300e+00, %Qubit* %[[VAL_17]]) +// FULL: call void @__quantum__qis__rz(double 7.612300e+00, %Qubit* %[[VAL_17]]) +// FULL: call void @__quantum__qis__rz(double 0x4021397F62B6AE7E, %Qubit* %[[VAL_23]]) +// FULL: call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_32]], %Qubit* %[[VAL_26]], %Qubit* %[[VAL_14]]) +// FULL: call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: store i64 0, i64* %[[VAL_3]], align 8 +// FULL: call void (double, i64, i64*, void (double, %Array*, %Qubit*)*, ...) @invokeRotationWithControlQubits(double 0x4023397F62B6AE7E, i64 1, i64* nonnull %[[VAL_3]], void (double, %Array*, %Qubit*)* nonnull @__quantum__qis__rz__ctl, double 0x4023397F62B6AE7E, %Qubit* %[[VAL_14]], %Qubit* %[[VAL_35]]) +// FULL: call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_32]], %Qubit* %[[VAL_26]], %Qubit* %[[VAL_14]]) +// FULL: call void @__quantum__qis__x(%Qubit* %[[VAL_32]]) +// FULL: call void @__quantum__qis__rz(double 0xC025397F62B6AE7E, %Qubit* %[[VAL_17]]) +// FULL: call void @__quantum__qis__r1(double 4.612300e+00, %Qubit* %[[VAL_17]]) +// FULL: call void @__quantum__qis__r1(double 0x400CE5FD8ADAB9F6, %Qubit* %[[VAL_26]]) +// FULL: call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_23]], %Qubit* %[[VAL_26]], %Qubit* %[[VAL_14]]) +// FULL: store i64 0, i64* %[[VAL_2]], align 8 +// FULL: call void (double, i64, i64*, void (double, %Array*, %Qubit*)*, ...) @invokeRotationWithControlQubits(double 0x4004E5FD8ADAB9F6, i64 1, i64* nonnull %[[VAL_2]], void (double, %Array*, %Qubit*)* nonnull @__quantum__qis__r1__ctl, double 0x4004E5FD8ADAB9F6, %Qubit* %[[VAL_14]], %Qubit* %[[VAL_35]]) +// FULL: call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_23]], %Qubit* %[[VAL_26]], %Qubit* %[[VAL_14]]) +// FULL: call void @__quantum__qis__r1(double 0xBFF9CBFB15B573EC, %Qubit* %[[VAL_17]]) +// FULL: call void @__quantum__qis__swap(%Qubit* %[[VAL_17]], %Qubit* %[[VAL_26]]) +// FULL: store i64 0, i64* %[[VAL_1]], align 8 +// FULL: call void (i64, i64*, i64, void (%Array*, %Qubit*, %Qubit*)*, ...) @invokeWithControlRegisterOrQubits(i64 1, i64* nonnull %[[VAL_1]], i64 2, void (%Array*, %Qubit*, %Qubit*)* nonnull @__quantum__qis__swap__ctl, %Qubit* %[[VAL_17]], %Qubit* %[[VAL_35]], %Qubit* %[[VAL_32]]) +// FULL: call void @__quantum__qis__u3(double 8.000000e-01, double 5.000000e-01, double -1.000000e+00, %Qubit* %[[VAL_32]]) +// FULL: call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_23]], %Qubit* %[[VAL_20]], %Qubit* %[[VAL_14]]) +// FULL: store i64 0, i64* %[[VAL_0]], align 8 +// FULL: call void (double, double, double, i64, i64*, void (double, double, double, %Array*, %Qubit*)*, ...) @invokeU3RotationWithControlQubits(double 6.200000e+00, double -3.100000e+00, double 0x401F333333333333, i64 1, i64* nonnull %[[VAL_0]], void (double, double, double, %Array*, %Qubit*)* nonnull @__quantum__qis__u3__ctl, double 6.200000e+00, double -3.100000e+00, double 0x401F333333333333, %Qubit* %[[VAL_14]], %Qubit* %[[VAL_17]]) +// FULL: call void (i64, void (%Array*, %Qubit*)*, ...) @invokeWithControlQubits(i64 2, void (%Array*, %Qubit*)* nonnull @__quantum__qis__x__ctl, %Qubit* %[[VAL_23]], %Qubit* %[[VAL_20]], %Qubit* %[[VAL_14]]) +// FULL: %[[VAL_36:.*]] = call %Result* @__quantum__qis__mz__to__register(%Qubit* %[[VAL_17]], i8* nonnull getelementptr inbounds ([10 x i8], [10 x i8]* @cstr.73696E676C65746F6E00, i64 0, i64 0)) +// FULL: %[[VAL_38:.*]] = call %Result* @__quantum__qis__mz__to__register(%Qubit* %[[VAL_35]], i8* nonnull getelementptr inbounds ([5 x i8], [5 x i8]* @cstr.65696E7300, i64 0, i64 0)) +// FULL: %[[VAL_39:.*]] = call %Result* @__quantum__qis__mz__to__register(%Qubit* %[[VAL_29]], i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @cstr.64756200, i64 0, i64 0)) +// FULL: %[[VAL_40:.*]] = call %Result* @__quantum__qis__mz__to__register(%Qubit* %[[VAL_32]], i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @cstr.64756200, i64 0, i64 0)) +// FULL: %[[VAL_41:.*]] = call %Result* @__quantum__qis__mz__to__register(%Qubit* %[[VAL_20]], i8* nonnull getelementptr inbounds ([5 x i8], [5 x i8]* @cstr.7472697000, i64 0, i64 0)) +// FULL: %[[VAL_42:.*]] = call %Result* @__quantum__qis__mz__to__register(%Qubit* %[[VAL_23]], i8* nonnull getelementptr inbounds ([5 x i8], [5 x i8]* @cstr.7472697000, i64 0, i64 0)) +// FULL: %[[VAL_43:.*]] = call %Result* @__quantum__qis__mz__to__register(%Qubit* %[[VAL_26]], i8* nonnull getelementptr inbounds ([5 x i8], [5 x i8]* @cstr.7472697000, i64 0, i64 0)) +// FULL: call void @__quantum__rt__qubit_release_array(%Array* %[[VAL_6]]) +// FULL: ret void +// FULL: } diff --git a/test/AST-Quake/ctor-2.cpp b/test/AST-Quake/ctor-2.cpp index 390835bf53..5c7027e37c 100644 --- a/test/AST-Quake/ctor-2.cpp +++ b/test/AST-Quake/ctor-2.cpp @@ -22,23 +22,37 @@ void S1::operator()(bool b) { cudaq::qubit q; S2 s2; s2(b); + x(q); } void S2::operator()(bool b) { cudaq::qubit q; + z(q); } // CHECK-LABEL: func.func @__nvqpp__mlirgen__S1( -// CHECK-SAME: %[[VAL_0:.*]]: i1{{.*}}) attributes +// CHECK-SAME: %[[VAL_0:.*]]: i1) // CHECK: %[[VAL_1:.*]] = cc.alloca i1 // CHECK: cc.store %[[VAL_0]], %[[VAL_1]] : !cc.ptr // CHECK: %[[VAL_2:.*]] = quake.alloca !quake.ref // CHECK: %[[VAL_3:.*]] = cc.alloca !cc.struct<"S2" {} [8,1]> -// CHECK: call @_ZN2S2C1Ev(%[[VAL_3]]) : (!cc.ptr>) -> () -// CHECK: %[[VAL_5:.*]] = cc.load %[[VAL_1]] : !cc.ptr -// CHECK: call @_ZN2S2clEb(%[[VAL_5]]) : (i1) -> () -// CHECK: return -// CHECK: } +// CHECK: call @_ZN2S2C1Ev(%[[VAL_3]]) : (!cc.ptr>) -> () +// CHECK: %[[VAL_4:.*]] = cc.load %[[VAL_1]] : !cc.ptr +// CHECK: call @_ZN2S2clEb(%[[VAL_4]]) : (i1) -> () +// CHECK: quake.x %[[VAL_2]] : (!quake.ref) -> () +// CHECK: return +// CHECK: } + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__S2( +// CHECK-SAME: %[[VAL_0:.*]]: i1) +// CHECK: %[[VAL_1:.*]] = cc.alloca i1 +// CHECK: cc.store %[[VAL_0]], %[[VAL_1]] : !cc.ptr +// CHECK: %[[VAL_2:.*]] = quake.alloca !quake.ref +// CHECK: quake.z %[[VAL_2]] : (!quake.ref) -> () +// CHECK: return +// CHECK: } -// CHECK-LABEL: func.func @__nvqpp__mlirgen__S2 +// CHECK: func.func private @_ZN2S2C1Ev(!cc.ptr>) +// CHECK: func.func private @_ZN2S2clEb(i1) +// CHECK: func.func @_ZN2S1clEb( diff --git a/test/AST-Quake/indirect_callable.cpp b/test/AST-Quake/indirect_callable.cpp new file mode 100644 index 0000000000..ba429e9782 --- /dev/null +++ b/test/AST-Quake/indirect_callable.cpp @@ -0,0 +1,48 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: cudaq-quake %cpp_std %s | cudaq-opt | FileCheck %s + +#include + +__qpu__ int rando_qernel(double); + +__qpu__ void superstar_qernel(const cudaq::qkernel& bob, double dub) { + auto size = bob(dub); + cudaq::qvector q(size); + mz(q); +} + +void meanwhile_on_safari() { + cudaq::qkernel tiger{rando_qernel}; + superstar_qernel(tiger, 11.0); +} + +// clang-format off +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_superstar_qernel._Z16superstar_qernelRKN5cudaq7qkernelIFidEEEd( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.indirect_callable<(f64) -> i32>, +// CHECK-SAME: %[[VAL_1:.*]]: f64) attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_2:.*]] = cc.alloca f64 +// CHECK: cc.store %[[VAL_1]], %[[VAL_2]] : !cc.ptr +// CHECK: %[[VAL_3:.*]] = cc.load %[[VAL_2]] : !cc.ptr +// CHECK: %[[VAL_4:.*]] = cc.call_indirect_callable %[[VAL_0]], %[[VAL_3]] : (!cc.indirect_callable<(f64) -> i32>, f64) -> i32 +// CHECK: %[[VAL_5:.*]] = cc.alloca i32 +// CHECK: cc.store %[[VAL_4]], %[[VAL_5]] : !cc.ptr +// CHECK: %[[VAL_6:.*]] = cc.load %[[VAL_5]] : !cc.ptr +// CHECK: %[[VAL_7:.*]] = cc.cast signed %[[VAL_6]] : (i32) -> i64 +// CHECK: %[[VAL_8:.*]] = quake.alloca !quake.veq[%[[VAL_7]] : i64] +// CHECK: %[[VAL_9:.*]] = quake.mz %[[VAL_8]] : (!quake.veq) -> !cc.stdvec +// CHECK: return +// CHECK: } + +// CHECK-LABEL: func.func @_Z16superstar_qernelRKN5cudaq7qkernelIFidEEEd( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, +// CHECK-SAME: %[[VAL_1:.*]]: f64) attributes {no_this} { +// CHECK: return +// CHECK: } +// clang-format on diff --git a/test/AST-Quake/pure_quantum_struct.cpp b/test/AST-Quake/pure_quantum_struct.cpp new file mode 100644 index 0000000000..6f257d4d0d --- /dev/null +++ b/test/AST-Quake/pure_quantum_struct.cpp @@ -0,0 +1,157 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// clang-format off +// RUN: cudaq-quake %cpp_std %s | cudaq-opt | FileCheck %s +// RUN: cudaq-quake %cpp_std %s | cudaq-translate --convert-to=qir | FileCheck --check-prefix=QIR %s +// clang-format on + +#include "cudaq.h" + +struct test { + cudaq::qview<> q; + cudaq::qview<> r; +}; + +__qpu__ void applyH(cudaq::qubit &q) { h(q); } +__qpu__ void applyX(cudaq::qubit &q) { x(q); } +__qpu__ void kernel(test t) { + h(t.q); + s(t.r); + + applyH(t.q[0]); + applyX(t.r[0]); +} + +// clang-format off +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_kernel._Z6kernel4test( +// CHECK-SAME: %[[VAL_0:.*]]: !quake.struq, !quake.veq>) attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_3:.*]] = quake.get_member %[[VAL_0]][0] : (!quake.struq, !quake.veq>) -> !quake.veq +// CHECK: %[[VAL_4:.*]] = quake.veq_size %[[VAL_3]] : (!quake.veq) -> i64 +// CHECK: %[[VAL_12:.*]] = quake.get_member %[[VAL_0]][1] : (!quake.struq, !quake.veq>) -> !quake.veq +// CHECK: %[[VAL_13:.*]] = quake.veq_size %[[VAL_12]] : (!quake.veq) -> i64 +// CHECK: %[[VAL_21:.*]] = quake.get_member %[[VAL_0]][0] : (!quake.struq, !quake.veq>) -> !quake.veq +// CHECK: %[[VAL_22:.*]] = quake.extract_ref %[[VAL_21]][0] : (!quake.veq) -> !quake.ref +// CHECK: call @__nvqpp__mlirgen__function_applyH._Z6applyHRN5cudaq5quditILm2EEE(%[[VAL_22]]) : (!quake.ref) -> () +// CHECK: %[[VAL_23:.*]] = quake.get_member %[[VAL_0]][1] : (!quake.struq, !quake.veq>) -> !quake.veq +// CHECK: %[[VAL_24:.*]] = quake.extract_ref %[[VAL_23]][0] : (!quake.veq) -> !quake.ref +// CHECK: call @__nvqpp__mlirgen__function_applyX._Z6applyXRN5cudaq5quditILm2EEE(%[[VAL_24]]) : (!quake.ref) -> () +// CHECK: return +// CHECK: } +// clang-format on + +__qpu__ void entry_initlist() { + cudaq::qvector q(2), r(2); + test tt{q, r}; + kernel(tt); +} + +// clang-format off +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_entry_initlist._Z14entry_initlistv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_1:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_2:.*]] = quake.make_struq %[[VAL_0]], %[[VAL_1]] : (!quake.veq<2>, !quake.veq<2>) -> !quake.struq, !quake.veq> +// CHECK: call @__nvqpp__mlirgen__function_kernel._Z6kernel4test(%[[VAL_2]]) : (!quake.struq, !quake.veq>) -> () +// CHECK: return +// CHECK: } +// clang-format on + +__qpu__ void entry_ctor() { + cudaq::qvector q(2), r(2); + test tt(q, r); + h(tt.r[0]); +} + +// clang-format off +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_entry_ctor._Z10entry_ctorv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_1:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_2:.*]] = quake.extract_ref %[[VAL_1]][0] : (!quake.veq<2>) -> !quake.ref +// CHECK: quake.h %[[VAL_2]] : (!quake.ref) -> () +// CHECK: return +// CHECK: } +// clang-format on + +// clang-format off +// QIR-LABEL: define void @__nvqpp__mlirgen__function_kernel._Z6kernel4test({ +// QIR-SAME: %[[VAL_0:.*]]*, %[[VAL_0]]* } %[[VAL_1:.*]]) local_unnamed_addr { +// QIR: %[[VAL_2:.*]] = extractvalue { %[[VAL_0]]*, %[[VAL_0]]* } %[[VAL_1]], 0 +// QIR: %[[VAL_3:.*]] = tail call i64 @__quantum__rt__array_get_size_1d(%[[VAL_0]]* %[[VAL_2]]) +// QIR: %[[VAL_4:.*]] = icmp sgt i64 %[[VAL_3]], 0 +// QIR: br i1 %[[VAL_4]], label %[[VAL_5:.*]], label %[[VAL_6:.*]] +// QIR: .lr.ph: ; preds = %[[VAL_7:.*]], %[[VAL_5]] +// QIR: %[[VAL_8:.*]] = phi i64 [ %[[VAL_9:.*]], %[[VAL_5]] ], [ 0, %[[VAL_7]] ] +// QIR: %[[VAL_10:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_0]]* %[[VAL_2]], i64 %[[VAL_8]]) +// QIR: %[[VAL_11:.*]] = bitcast i8* %[[VAL_10]] to %[[VAL_12:.*]]** +// QIR: %[[VAL_13:.*]] = load %[[VAL_12]]*, %[[VAL_12]]** %[[VAL_11]], align 8 +// QIR: tail call void @__quantum__qis__h(%[[VAL_12]]* %[[VAL_13]]) +// QIR: %[[VAL_9]] = add nuw nsw i64 %[[VAL_8]], 1 +// QIR: %[[VAL_14:.*]] = icmp eq i64 %[[VAL_9]], %[[VAL_3]] +// QIR: br i1 %[[VAL_14]], label %[[VAL_6]], label %[[VAL_5]] +// QIR: ._crit_edge: ; preds = %[[VAL_5]], %[[VAL_7]] +// QIR: %[[VAL_15:.*]] = extractvalue { %[[VAL_0]]*, %[[VAL_0]]* } %[[VAL_1]], 1 +// QIR: %[[VAL_16:.*]] = tail call i64 @__quantum__rt__array_get_size_1d(%[[VAL_0]]* %[[VAL_15]]) +// QIR: %[[VAL_17:.*]] = icmp sgt i64 %[[VAL_16]], 0 +// QIR: br i1 %[[VAL_17]], label %[[VAL_18:.*]], label %[[VAL_19:.*]] +// QIR: .lr.ph3: ; preds = %[[VAL_6]], %[[VAL_18]] +// QIR: %[[VAL_20:.*]] = phi i64 [ %[[VAL_21:.*]], %[[VAL_18]] ], [ 0, %[[VAL_6]] ] +// QIR: %[[VAL_22:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_0]]* %[[VAL_15]], i64 %[[VAL_20]]) +// QIR: %[[VAL_23:.*]] = bitcast i8* %[[VAL_22]] to %[[VAL_12]]** +// QIR: %[[VAL_24:.*]] = load %[[VAL_12]]*, %[[VAL_12]]** %[[VAL_23]], align 8 +// QIR: tail call void @__quantum__qis__s(%[[VAL_12]]* %[[VAL_24]]) +// QIR: %[[VAL_21]] = add nuw nsw i64 %[[VAL_20]], 1 +// QIR: %[[VAL_25:.*]] = icmp eq i64 %[[VAL_21]], %[[VAL_16]] +// QIR: br i1 %[[VAL_25]], label %[[VAL_19]], label %[[VAL_18]] +// QIR: ._crit_edge4: ; preds = %[[VAL_18]], %[[VAL_6]] +// QIR: %[[VAL_26:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_0]]* %[[VAL_2]], i64 0) +// QIR: %[[VAL_27:.*]] = bitcast i8* %[[VAL_26]] to %[[VAL_12]]** +// QIR: %[[VAL_28:.*]] = load %[[VAL_12]]*, %[[VAL_12]]** %[[VAL_27]], align 8 +// QIR: tail call void @__quantum__qis__h(%[[VAL_12]]* %[[VAL_28]]) +// QIR: %[[VAL_29:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_0]]* %[[VAL_15]], i64 0) +// QIR: %[[VAL_30:.*]] = bitcast i8* %[[VAL_29]] to %[[VAL_12]]** +// QIR: %[[VAL_31:.*]] = load %[[VAL_12]]*, %[[VAL_12]]** %[[VAL_30]], align 8 +// QIR: tail call void @__quantum__qis__x(%[[VAL_12]]* %[[VAL_31]]) +// QIR: ret void +// QIR: } + +// QIR-LABEL: define void @__nvqpp__mlirgen__function_entry_initlist._Z14entry_initlistv() local_unnamed_addr { +// QIR: %[[VAL_0:.*]] = tail call %[[VAL_1:.*]]* @__quantum__rt__qubit_allocate_array(i64 4) +// QIR: %[[VAL_2:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_1]]* %[[VAL_0]], i64 0) +// QIR: %[[VAL_3:.*]] = bitcast i8* %[[VAL_2]] to %[[VAL_4:.*]]** +// QIR: %[[VAL_5:.*]] = load %[[VAL_4]]*, %[[VAL_4]]** %[[VAL_3]], align 8 +// QIR: tail call void @__quantum__qis__h(%[[VAL_4]]* %[[VAL_5]]) +// QIR: %[[VAL_6:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_1]]* %[[VAL_0]], i64 1) +// QIR: %[[VAL_7:.*]] = bitcast i8* %[[VAL_6]] to %[[VAL_4]]** +// QIR: %[[VAL_8:.*]] = load %[[VAL_4]]*, %[[VAL_4]]** %[[VAL_7]], align 8 +// QIR: tail call void @__quantum__qis__h(%[[VAL_4]]* %[[VAL_8]]) +// QIR: %[[VAL_9:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_1]]* %[[VAL_0]], i64 2) +// QIR: %[[VAL_10:.*]] = bitcast i8* %[[VAL_9]] to %[[VAL_4]]** +// QIR: %[[VAL_11:.*]] = load %[[VAL_4]]*, %[[VAL_4]]** %[[VAL_10]], align 8 +// QIR: tail call void @__quantum__qis__s(%[[VAL_4]]* %[[VAL_11]]) +// QIR: %[[VAL_12:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_1]]* %[[VAL_0]], i64 3) +// QIR: %[[VAL_13:.*]] = bitcast i8* %[[VAL_12]] to %[[VAL_4]]** +// QIR: %[[VAL_14:.*]] = load %[[VAL_4]]*, %[[VAL_4]]** %[[VAL_13]], align 8 +// QIR: tail call void @__quantum__qis__s(%[[VAL_4]]* %[[VAL_14]]) +// QIR: tail call void @__quantum__qis__h(%[[VAL_4]]* %[[VAL_5]]) +// QIR: tail call void @__quantum__qis__x(%[[VAL_4]]* %[[VAL_11]]) +// QIR: tail call void @__quantum__rt__qubit_release_array(%[[VAL_1]]* %[[VAL_0]]) +// QIR: ret void +// QIR: } + +// QIR-LABEL: define void @__nvqpp__mlirgen__function_entry_ctor._Z10entry_ctorv() local_unnamed_addr { +// QIR: %[[VAL_0:.*]] = tail call %[[VAL_1:.*]]* @__quantum__rt__qubit_allocate_array(i64 4) +// QIR: %[[VAL_2:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_1]]* %[[VAL_0]], i64 2) +// QIR: %[[VAL_3:.*]] = bitcast i8* %[[VAL_2]] to %[[VAL_4:.*]]** +// QIR: %[[VAL_5:.*]] = load %[[VAL_4]]*, %[[VAL_4]]** %[[VAL_3]], align 8 +// QIR: tail call void @__quantum__qis__h(%[[VAL_4]]* %[[VAL_5]]) +// QIR: tail call void @__quantum__rt__qubit_release_array(%[[VAL_1]]* %[[VAL_0]]) +// QIR: ret void +// QIR: } +// clang-format on diff --git a/test/AST-Quake/vector.cpp b/test/AST-Quake/vector.cpp index d124a304e7..b75ed287b4 100644 --- a/test/AST-Quake/vector.cpp +++ b/test/AST-Quake/vector.cpp @@ -39,6 +39,21 @@ struct simple_float_rotation { } }; +struct difficult_symphony { + auto operator()(std::vector theta) __qpu__ { + float *firstData = theta.data(); + cudaq::qvector q(1); + rx(firstData[0], q[0]); + mz(q); + } +}; + +// clang-format off +// CHECK-LABEL: func.func @__nvqpp__mlirgen__difficult_symphony( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.stdvec{{.*}}) attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_1:.*]] = cc.stdvec_data %[[VAL_0]] : (!cc.stdvec) -> !cc.ptr> +// clang-format on + int main() { std::vector vec_args = {0.63}; @@ -63,6 +78,9 @@ int main() { printf("Observed: %s, %lu\n", bits.c_str(), count); } + auto bob_counts = cudaq::sample(difficult_symphony{}, float_args); + bob_counts.dump(); + // can get from counts too printf("Exp: %lf\n", float_counts.expectation()); return 0; diff --git a/test/AST-Quake/vector_bool.cpp b/test/AST-Quake/vector_bool.cpp index 84f296416b..b23ddbdf11 100644 --- a/test/AST-Quake/vector_bool.cpp +++ b/test/AST-Quake/vector_bool.cpp @@ -6,7 +6,7 @@ * the terms of the Apache License 2.0 which accompanies this distribution. * ******************************************************************************/ -// RUN: cudaq-quake %cpp_std %s | FileCheck %s +// RUN: cudaq-quake %cpp_std %s | cudaq-opt | FileCheck %s // Simple test using a std::vector operator. @@ -20,6 +20,7 @@ struct t1 { } }; +// clang-format off // CHECK-LABEL: func.func @__nvqpp__mlirgen__t1( // CHECK-SAME: %[[VAL_0:.*]]: !cc.stdvec{{.*}}) -> i1 attributes // CHECK: %[[VAL_1:.*]] = quake.alloca !quake.veq<2> @@ -31,4 +32,47 @@ struct t1 { // CHECK: return %[[VAL_5]] : i1 // CHECK: } // CHECK-NOT: func.func private @_ZNKSt14_Bit_referencecvbEv() -> i1 +// clang-format on +struct VectorBoolReturn { + std::vector operator()() __qpu__ { + cudaq::qvector q(4); + return mz(q); + } +}; + +// clang-format off +// CHECK-LABEL: func.func @__nvqpp__mlirgen__VectorBoolReturn() -> !cc.stdvec attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = quake.alloca !quake.veq<4> +// CHECK: %[[VAL_2:.*]] = quake.mz %[[VAL_1]] : (!quake.veq<4>) -> !cc.stdvec +// CHECK: %[[VAL_3:.*]] = quake.discriminate %[[VAL_2]] : (!cc.stdvec) -> !cc.stdvec +// CHECK: %[[VAL_4:.*]] = cc.stdvec_data %[[VAL_3]] : (!cc.stdvec) -> !cc.ptr +// CHECK: %[[VAL_5:.*]] = cc.stdvec_size %[[VAL_3]] : (!cc.stdvec) -> i64 +// CHECK: %[[VAL_6:.*]] = call @__nvqpp_vectorCopyCtor(%[[VAL_4]], %[[VAL_5]], %[[VAL_0]]) : (!cc.ptr, i64, i64) -> !cc.ptr +// CHECK: %[[VAL_7:.*]] = cc.stdvec_init %[[VAL_6]], %[[VAL_5]] : (!cc.ptr, i64) -> !cc.stdvec +// CHECK: return %[[VAL_7]] : !cc.stdvec +// CHECK: } +// clang-format on + +struct VectorBoolResult { + std::vector operator()() __qpu__ { + cudaq::qvector q(4); + std::vector vec = mz(q); + return vec; + } +}; + +// clang-format off +// CHECK-LABEL: func.func @__nvqpp__mlirgen__VectorBoolResult() -> !cc.stdvec attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = quake.alloca !quake.veq<4> +// CHECK: %[[VAL_2:.*]] = quake.mz %[[VAL_1]] name "vec" : (!quake.veq<4>) -> !cc.stdvec +// CHECK: %[[VAL_3:.*]] = quake.discriminate %[[VAL_2]] : (!cc.stdvec) -> !cc.stdvec +// CHECK: %[[VAL_4:.*]] = cc.stdvec_data %[[VAL_3]] : (!cc.stdvec) -> !cc.ptr +// CHECK: %[[VAL_5:.*]] = cc.stdvec_size %[[VAL_3]] : (!cc.stdvec) -> i64 +// CHECK: %[[VAL_6:.*]] = call @__nvqpp_vectorCopyCtor(%[[VAL_4]], %[[VAL_5]], %[[VAL_0]]) : (!cc.ptr, i64, i64) -> !cc.ptr +// CHECK: %[[VAL_7:.*]] = cc.stdvec_init %[[VAL_6]], %[[VAL_5]] : (!cc.ptr, i64) -> !cc.stdvec +// CHECK: return %[[VAL_7]] : !cc.stdvec +// CHECK: } +// clang-format on diff --git a/test/AST-Quake/vector_int-0.cpp b/test/AST-Quake/vector_int-0.cpp new file mode 100644 index 0000000000..4fcc2c1b75 --- /dev/null +++ b/test/AST-Quake/vector_int-0.cpp @@ -0,0 +1,56 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: cudaq-quake %cpp_std %s | cudaq-opt | FileCheck %s + +#include + +struct VectorIntReturn { + std::vector operator()() __qpu__ { return {142, 243}; } +}; + +// clang-format off +// CHECK-LABEL: func.func @__nvqpp__mlirgen__VectorIntReturn() -> !cc.stdvec attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK-DAG: %[[VAL_0:.*]] = arith.constant 4 : i64 +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 142 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 243 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 2 : i64 +// CHECK-DAG: %[[VAL_4:.*]] = cc.alloca !cc.array +// CHECK: %[[VAL_5:.*]] = cc.cast %[[VAL_4]] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_1]], %[[VAL_5]] : !cc.ptr +// CHECK: %[[VAL_6:.*]] = cc.compute_ptr %[[VAL_4]][1] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_2]], %[[VAL_6]] : !cc.ptr +// CHECK: %[[VAL_7:.*]] = cc.cast %[[VAL_4]] : (!cc.ptr>) -> !cc.ptr +// CHECK: %[[VAL_8:.*]] = call @__nvqpp_vectorCopyCtor(%[[VAL_7]], %[[VAL_3]], %[[VAL_0]]) : (!cc.ptr, i64, i64) -> !cc.ptr +// CHECK: %[[VAL_9:.*]] = cc.stdvec_init %[[VAL_8]], %[[VAL_3]] : (!cc.ptr, i64) -> !cc.stdvec +// CHECK: return %[[VAL_9]] : !cc.stdvec +// CHECK: } +// clang-format on + +struct VectorIntResult { + std::vector operator()() __qpu__ { + std::vector result(2); + result[0] = 42; + return result; + } +}; + +// clang-format off +// CHECK-LABEL: func.func @__nvqpp__mlirgen__VectorIntResult() -> !cc.stdvec attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK-DAG: %[[VAL_0:.*]] = arith.constant 2 : i64 +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 4 : i64 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 42 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = cc.alloca !cc.array +// CHECK: %[[VAL_4:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_2]], %[[VAL_4]] : !cc.ptr +// CHECK: %[[VAL_5:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr>) -> !cc.ptr +// CHECK: %[[VAL_6:.*]] = call @__nvqpp_vectorCopyCtor(%[[VAL_5]], %[[VAL_0]], %[[VAL_1]]) : (!cc.ptr, i64, i64) -> !cc.ptr +// CHECK: %[[VAL_7:.*]] = cc.stdvec_init %[[VAL_6]], %[[VAL_0]] : (!cc.ptr, i64) -> !cc.stdvec +// CHECK: return %[[VAL_7]] : !cc.stdvec +// CHECK: } +// clang-format on diff --git a/test/AST-Quake/vector_int-1.cpp b/test/AST-Quake/vector_int-1.cpp new file mode 100644 index 0000000000..3bdfae634f --- /dev/null +++ b/test/AST-Quake/vector_int-1.cpp @@ -0,0 +1,61 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: cudaq-quake %cpp_std %s | cudaq-opt -kernel-execution | FileCheck %s + +#include + +__qpu__ std::vector doubleDeckerBus() { + std::vector ii(2); + ii[0] = 2; + return ii; +} + +__qpu__ void touringLondon() { + auto ii = doubleDeckerBus(); + cudaq::qvector q(ii[0]); + return; +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_doubleDeckerBus._Z15doubleDeckerBusv( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, i64}>> {llvm.sret = !cc.struct<{!cc.ptr, i64}>}) attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_1:.*]] = arith.constant 2 : i64 +// CHECK: %[[VAL_2:.*]] = arith.constant 4 : i64 +// CHECK: %[[VAL_3:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_4:.*]] = cc.alloca !cc.array +// CHECK: %[[VAL_5:.*]] = cc.cast %[[VAL_4]] : (!cc.ptr>) -> !cc.ptr +// CHECK: cc.store %[[VAL_3]], %[[VAL_5]] : !cc.ptr +// CHECK: %[[VAL_6:.*]] = cc.cast %[[VAL_4]] : (!cc.ptr>) -> !cc.ptr +// CHECK: %[[VAL_7:.*]] = call @__nvqpp_vectorCopyCtor(%[[VAL_6]], %[[VAL_1]], %[[VAL_2]]) : (!cc.ptr, i64, i64) -> !cc.ptr +// CHECK: %[[VAL_8:.*]] = cc.stdvec_init %[[VAL_7]], %[[VAL_1]] : (!cc.ptr, i64) -> !cc.stdvec +// CHECK: %[[VAL_9:.*]] = cc.cast %[[VAL_0]] : (!cc.ptr, i64}>>) -> !cc.ptr, i64}>> +// CHECK: %[[VAL_10:.*]] = cc.stdvec_data %[[VAL_8]] : (!cc.stdvec) -> !cc.ptr +// CHECK: %[[VAL_11:.*]] = cc.compute_ptr %[[VAL_9]][0] : (!cc.ptr, i64}>>) -> !cc.ptr> +// CHECK: %[[VAL_12:.*]] = cc.cast %[[VAL_11]] : (!cc.ptr>) -> !cc.ptr> +// CHECK: cc.store %[[VAL_10]], %[[VAL_12]] : !cc.ptr> +// CHECK: %[[VAL_13:.*]] = cc.stdvec_size %[[VAL_8]] : (!cc.stdvec) -> i64 +// CHECK: %[[VAL_14:.*]] = cc.compute_ptr %[[VAL_9]][1] : (!cc.ptr, i64}>>) -> !cc.ptr +// CHECK: cc.store %[[VAL_13]], %[[VAL_14]] : !cc.ptr +// CHECK: return +// CHECK: } + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_touringLondon._Z13touringLondonv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = cc.alloca !cc.struct<{!cc.ptr, i64}> +// CHECK: call @__nvqpp__mlirgen__function_doubleDeckerBus._Z15doubleDeckerBusv(%[[VAL_0]]) : (!cc.ptr, i64}>>) -> () +// CHECK: %[[VAL_10:.*]] = cc.compute_ptr %[[VAL_0]][0] : (!cc.ptr, i64}>>) -> !cc.ptr> +// CHECK: %[[VAL_1:.*]] = cc.load %[[VAL_10]] : !cc.ptr> +// CHECK: %[[VAL_2:.*]] = cc.compute_ptr %[[VAL_0]][1] : (!cc.ptr, i64}>>) -> !cc.ptr +// CHECK: %[[VAL_3:.*]] = cc.load %[[VAL_2]] : !cc.ptr +// CHECK: %[[VAL_4:.*]] = cc.stdvec_init %[[VAL_1]], %[[VAL_3]] : (!cc.ptr, i64) -> !cc.stdvec +// CHECK: %[[VAL_5:.*]] = cc.stdvec_data %[[VAL_4]] : (!cc.stdvec) -> !cc.ptr> +// CHECK: %[[VAL_6:.*]] = cc.cast %[[VAL_5]] : (!cc.ptr>) -> !cc.ptr +// CHECK: %[[VAL_7:.*]] = cc.load %[[VAL_6]] : !cc.ptr +// CHECK: %[[VAL_8:.*]] = cc.cast signed %[[VAL_7]] : (i32) -> i64 +// CHECK: %[[VAL_9:.*]] = quake.alloca !quake.veq{{\[}}%[[VAL_8]] : i64] +// CHECK: return +// CHECK: } diff --git a/test/AST-error/kernel_invalid_argument-2.cpp b/test/AST-error/kernel_invalid_argument.cpp similarity index 100% rename from test/AST-error/kernel_invalid_argument-2.cpp rename to test/AST-error/kernel_invalid_argument.cpp diff --git a/test/AST-error/kernel_with_member_functions.cpp b/test/AST-error/kernel_with_member_functions.cpp new file mode 100644 index 0000000000..7275e15f1c --- /dev/null +++ b/test/AST-error/kernel_with_member_functions.cpp @@ -0,0 +1,23 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// REQUIRES: c++20 +// RUN: cudaq-quake %s -verify + +#include "cudaq.h" + +// expected-error@+1 {{struct with user-defined methods is not allowed}} +struct test { + cudaq::qview<> q; + int myMethod() { return 0; } +}; + +__qpu__ void kernel() { + cudaq::qvector q(2); + test t(q); +} diff --git a/test/AST-error/quantum_struct_declarations.cpp b/test/AST-error/quantum_struct_declarations.cpp new file mode 100644 index 0000000000..9d766b82da --- /dev/null +++ b/test/AST-error/quantum_struct_declarations.cpp @@ -0,0 +1,47 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// REQUIRES: c++20 +// RUN: cudaq-quake %s -verify + +#include "cudaq.h" + +// expected-error@+1 {{quantum struct has invalid member type}} +struct error1 { + cudaq::qvector<4> wrong; +}; + +// expected-error@+1 {{kernel argument type not supported}} +__qpu__ void bug1(error1&); + +// expected-error@+1 {{quantum struct has invalid member type}} +struct error2 { + cudaq::qubit cubit; +}; + +// expected-error@+1 {{kernel argument type not supported}} +__qpu__ void bug2(error2&); + +// expected-error@+2 {{quantum struct has invalid member type}} +// expected-error@+1 {{quantum struct has invalid member type}} +struct error3 { + cudaq::qubit nope; + cudaq::qvector<2> sorry; +}; + +// expected-error@+1 {{kernel argument type not supported}} +__qpu__ void bug3(error3&); + +__qpu__ void funny() { + error1 e1; + error2 e2; + error3 e3; + bug1(e1); + bug2(e2); + bug3(e3); +} diff --git a/test/AST-error/quantum_struct_signature.cpp b/test/AST-error/quantum_struct_signature.cpp new file mode 100644 index 0000000000..88e39f132e --- /dev/null +++ b/test/AST-error/quantum_struct_signature.cpp @@ -0,0 +1,23 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// REQUIRES: c++20 +// RUN: cudaq-quake %s -verify + +#include "cudaq.h" + +struct test { + cudaq::qubit &r; + cudaq::qview<> q; +}; + +// expected-error@+1 {{kernel result type not supported}} +__qpu__ test kernel(cudaq::qubit &q, cudaq::qview<> qq) { + test result(q, qq); + return result; +} diff --git a/test/AST-error/quantum_struct_with_struct_member.cpp b/test/AST-error/quantum_struct_with_struct_member.cpp new file mode 100644 index 0000000000..74fe2024f6 --- /dev/null +++ b/test/AST-error/quantum_struct_with_struct_member.cpp @@ -0,0 +1,29 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// REQUIRES: c++20 +// RUN: cudaq-quake %s -verify + +#include "cudaq.h" + +struct s { + cudaq::qview<> s; +}; +// expected-error@+2{{recursive quantum struct types are not allowed}} +// expected-error@+1{{quantum struct has invalid member type}} +struct test { + cudaq::qview<> q; + cudaq::qview<> r; + s s; +}; +__qpu__ void entry_ctor() { + cudaq::qvector q(2), r(2); + s s(q); + test tt(q, r, s); + h(tt.r[0]); +} diff --git a/test/AST-error/struct_quantum_and_classical.cpp b/test/AST-error/struct_quantum_and_classical.cpp new file mode 100644 index 0000000000..cdbdaf1517 --- /dev/null +++ b/test/AST-error/struct_quantum_and_classical.cpp @@ -0,0 +1,34 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// REQUIRES: c++20 +// RUN: cudaq-quake %s -verify + +#include "cudaq.h" + +// expected-error@+1 {{hybrid quantum-classical struct types are not allowed}} +struct test { + int i; + double d; + cudaq::qview<> q; +}; + +__qpu__ void hello(cudaq::qubit &q) { h(q); } + +__qpu__ void kernel(test t) { + h(t.q); + hello(t.q[0]); +} + +__qpu__ void entry(int i) { + cudaq::qvector q(i); + test tt{1, 2.2, q}; + // this fails non-default ctor ConvertExpr:2899, + // but this is not what we are testing here + // kernel(tt); +} diff --git a/test/AST-error/vector.cpp b/test/AST-error/vector.cpp index 45d217c528..f7eb7cb85e 100644 --- a/test/AST-error/vector.cpp +++ b/test/AST-error/vector.cpp @@ -22,7 +22,6 @@ struct VectorVectorReturner { for (std::size_t j = 0, M = v.size(); j < M; ++j) r[j] = v[j]; } - // expected-error@+1{{C++ constructor (non-default)}} return result; } }; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8ab7072818..097456841f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -30,11 +30,10 @@ set(NVQPP_TEST_PARAMS get_property(test_cudaq_libraries GLOBAL PROPERTY CUDAQ_RUNTIME_LIBS) set(NVQPP_TEST_DEPENDS + CircuitCheck cudaq-opt cudaq-translate - CircuitCheck FileCheck - test_argument_conversion CustomPassPlugin ) diff --git a/test/Quake-QIR/argument.qke b/test/Quake-QIR/argument.qke index cbcc2fb451..61d737d5ce 100644 --- a/test/Quake-QIR/argument.qke +++ b/test/Quake-QIR/argument.qke @@ -31,7 +31,7 @@ func.func @test_0(%0: !cc.ptr, %1: !cc.ptr, %1: !cc.ptr, %1: !cc.ptr, %1: !cc.ptr, %1: !cc.ptr, %1: !cc.ptr, !cc.std func.func @test_3(%0: !cc.ptr, %1: !cc.ptr, !cc.ptr, !cc.ptr}>, !cc.struct<{!cc.ptr, !cc.ptr, !cc.ptr}>}>>) { return } +} -// CHECK-LABEL: define void @__nvqpp__mlirgen__test_3({ { i16*, i64 }, { float*, i64 } } -// CHECK-SAME: %[[VAL_0:.*]]) local_unnamed_addr { +// CHECK-LABEL: define void @__nvqpp__mlirgen__test_3({ { i16*, i64 }, { float*, i64 } } +// CHECK-SAME: %[[VAL_0:.*]]) {{.*}}{ // CHECK: %[[VAL_1:.*]] = extractvalue { { i16*, i64 }, { float*, i64 } } %[[VAL_0]], 0 // CHECK: %[[VAL_2:.*]] = extractvalue { i16*, i64 } %[[VAL_1]], 0 // CHECK: %[[VAL_3:.*]] = extractvalue { i16*, i64 } %[[VAL_1]], 1 @@ -205,8 +202,7 @@ func.func @test_3(%0: !cc.ptr, %1: !cc.ptr, %1: !cc.ptr, %1: !cc.ptr, %1: !cc.ptr, %1: !cc.ptr, %1: !cc.ptr, %1: !cc.ptr, %1: !cc.ptr, %1: !cc.ptr, !cc.ptr, !cc.ptr} } // CHECK-LABEL: define void @__nvqpp__mlirgen__test_0({ i8*, i64 }* nocapture writeonly sret({ i8*, i64 }) -// CHECK-SAME: %[[VAL_0:.*]], i32 %[[VAL_1:.*]]) local_unnamed_addr { +// CHECK-SAME: %[[VAL_0:.*]], i32 %[[VAL_1:.*]]) {{.*}}{ // CHECK: %[[VAL_2:.*]] = sext i32 %[[VAL_1]] to i64 // CHECK: %[[VAL_3:.*]] = tail call %[[VAL_4:.*]]* @__quantum__rt__qubit_allocate_array(i64 %[[VAL_2]]) // CHECK: %[[VAL_5:.*]] = tail call i64 @__quantum__rt__array_get_size_1d(%[[VAL_4]]* %[[VAL_3]]) @@ -104,7 +104,7 @@ func.func @test_0(%1: !cc.ptr, !cc.ptr, !cc.ptr} // CHECK: } // CHECK-LABEL: define void @test_0({ i8*, i8*, i8* }* sret({ i8*, i8*, i8* }) -// CHECK-SAME: %[[VAL_0:.*]], i8* nocapture readnone %[[VAL_1:.*]], i32 %[[VAL_2:.*]]) local_unnamed_addr { +// CHECK-SAME: %[[VAL_0:.*]], i8* nocapture readnone %[[VAL_1:.*]], i32 %[[VAL_2:.*]]) {{.*}}{ // CHECK: %[[VAL_3:.*]] = alloca { i32, { i1*, i64 } }, align 8 // CHECK: %[[VAL_4:.*]] = bitcast { i32, { i1*, i64 } }* %[[VAL_3]] to i8* // CHECK: %[[VAL_5:.*]] = getelementptr inbounds { i32, { i1*, i64 } }, { i32, { i1*, i64 } }* %[[VAL_3]], i64 0, i32 0 @@ -141,7 +141,7 @@ func.func @test_1(%1: !cc.ptr> {llvm.sret = !cc.struct<{i1, } // CHECK-LABEL: define void @__nvqpp__mlirgen__test_1({ i1, i1 }* nocapture writeonly sret({ i1, i1 }) -// CHECK-SAME: %[[VAL_0:.*]]) local_unnamed_addr { +// CHECK-SAME: %[[VAL_0:.*]]) {{.*}}{ // CHECK: %[[VAL_1:.*]] = tail call %[[VAL_2:.*]]* @__quantum__rt__qubit_allocate_array(i64 2) // CHECK: %[[VAL_3:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_2]]* %[[VAL_1]], i64 0) // CHECK: %[[VAL_4:.*]] = bitcast i8* %[[VAL_3]] to %[[VAL_5:.*]]** @@ -166,8 +166,7 @@ func.func @test_1(%1: !cc.ptr> {llvm.sret = !cc.struct<{i1, // CHECK: } // CHECK-LABEL: define void @test_1({ i1, i1 }* nocapture writeonly sret({ i1, i1 }) -// CHECK-SAME: %[[VAL_0:.*]], i8* nocapture readnone -// CHECK-SAME: %[[VAL_1:.*]]) local_unnamed_addr { +// CHECK-SAME: %[[VAL_0:.*]], i8* nocapture readnone %[[VAL_1:.*]]) {{.*}}{ // CHECK: %[[VAL_2:.*]] = alloca [2 x i8], align 8 // CHECK: %[[VAL_3:.*]] = getelementptr inbounds [2 x i8], [2 x i8]* %[[VAL_2]], i64 0, i64 0 // CHECK: call void @altLaunchKernel(i8* nonnull getelementptr inbounds ([7 x i8], [7 x i8]* @test_1.kernelName, i64 0, i64 0), i8* nonnull bitcast ({ i8*, i64 } (i8*, i1)* @test_1.thunk to i8*), i8* nonnull %[[VAL_3]], i64 2, i64 0) @@ -202,14 +201,13 @@ func.func @test_2(%1: !cc.ptr> {llvm.sret = !cc } // CHECK-LABEL: define void @__nvqpp__mlirgen__test_2({ i16, float, double, i64 }* nocapture writeonly sret({ i16, float, double, i64 }) -// CHECK-SAME: %[[VAL_0:.*]]) local_unnamed_addr #{{[0-9]+}} { +// CHECK-SAME: %[[VAL_0:.*]]) {{.*}}{ // CHECK: store { i16, float, double, i64 } { i16 8, float 0x40159999A0000000, double 3.783000e+01, i64 1479 }, { i16, float, double, i64 }* %[[VAL_0]], align 8 // CHECK: ret void // CHECK: } // CHECK-LABEL: define void @test_2({ i16, float, double, i64 }* nocapture writeonly sret({ i16, float, double, i64 }) -// CHECK-SAME: %[[VAL_0:.*]], i8* nocapture readnone -// CHECK-SAME: %[[VAL_1:.*]]) local_unnamed_addr { +// CHECK-SAME: %[[VAL_0:.*]], i8* nocapture readnone %[[VAL_1:.*]]) {{.*}}{ // CHECK: %[[VAL_2:.*]] = alloca { { i16, float, double, i64 } }, align 8 // CHECK: %[[VAL_3:.*]] = bitcast { { i16, float, double, i64 } }* %[[VAL_2]] to i8* // CHECK: call void @altLaunchKernel(i8* nonnull getelementptr inbounds ([7 x i8], [7 x i8]* @test_2.kernelName, i64 0, i64 0), i8* nonnull bitcast ({ i8*, i64 } (i8*, i1)* @test_2.thunk to i8*), i8* nonnull %[[VAL_3]], i64 24, i64 0) @@ -249,7 +247,7 @@ func.func @test_3(%1: !cc.ptr> {llvm.sret = !cc.array> {llvm.sret = !cc.array> {llvm.sret = !cc.struct<{i } // CHECK-LABEL: define void @__nvqpp__mlirgen__test_4({ i64, double }* nocapture writeonly sret({ i64, double }) -// CHECK-SAME: %[[VAL_0:.*]]) local_unnamed_addr #{{[0-9]+}} { +// CHECK-SAME: %[[VAL_0:.*]]) {{.*}}{ // CHECK: %[[VAL_1:.*]] = getelementptr inbounds { i64, double }, { i64, double }* %[[VAL_0]], i64 0, i32 0 // CHECK: store i64 537892, i64* %[[VAL_1]], align 8 // CHECK: %[[VAL_2:.*]] = getelementptr { i64, double }, { i64, double }* %[[VAL_0]], i64 0, i32 1 @@ -312,8 +309,7 @@ func.func @test_4(%1: !cc.ptr> {llvm.sret = !cc.struct<{i // CHECK: } // CHECK-LABEL: define void @test_4({ i64, double }* nocapture writeonly sret({ i64, double }) -// CHECK-SAME: %[[VAL_0:.*]], i8* nocapture readnone -// CHECK-SAME: %[[VAL_1:.*]]) local_unnamed_addr { +// CHECK-SAME: %[[VAL_0:.*]], i8* nocapture readnone %[[VAL_1:.*]]) {{.*}}{ // CHECK: %[[VAL_2:.*]] = alloca { i64, double }, align 8 // CHECK: %[[VAL_3:.*]] = bitcast { i64, double }* %[[VAL_2]] to i8* // CHECK: call void @altLaunchKernel(i8* nonnull getelementptr inbounds ([7 x i8], [7 x i8]* @test_4.kernelName, i64 0, i64 0), i8* nonnull bitcast ({ i8*, i64 } (i8*, i1)* @test_4.thunk to i8*), i8* nonnull %[[VAL_3]], i64 16, i64 0) @@ -339,7 +335,7 @@ func.func @test_5(%0: !cc.ptr> {llvm.sret = !cc.struct<{i } // CHECK-LABEL: define void @__nvqpp__mlirgen__test_5({ i64, double }* nocapture writeonly sret({ i64, double }) -// CHECK-SAME: %[[VAL_0:.*]]) local_unnamed_addr #{{[0-9]+}} { +// CHECK-SAME: %[[VAL_0:.*]]) {{.*}}{ // CHECK: %[[VAL_1:.*]] = getelementptr inbounds { i64, double }, { i64, double }* %[[VAL_0]], i64 0, i32 0 // CHECK: store i64 537892, i64* %[[VAL_1]], align 8 // CHECK: %[[VAL_2:.*]] = getelementptr { i64, double }, { i64, double }* %[[VAL_0]], i64 0, i32 1 @@ -348,7 +344,7 @@ func.func @test_5(%0: !cc.ptr> {llvm.sret = !cc.struct<{i // CHECK: } // CHECK-LABEL: define void @test_5({ i64, double }* nocapture writeonly sret({ i64, double }) -// CHECK-SAME: %[[VAL_0:.*]]) local_unnamed_addr { +// CHECK-SAME: %[[VAL_0:.*]]) {{.*}}{ // CHECK: %[[VAL_1:.*]] = alloca { i64, double }, align 8 // CHECK: %[[VAL_2:.*]] = bitcast { i64, double }* %[[VAL_1]] to i8* // CHECK: call void @altLaunchKernel(i8* nonnull getelementptr inbounds ([7 x i8], [7 x i8]* @test_5.kernelName, i64 0, i64 0), i8* nonnull bitcast ({ i8*, i64 } (i8*, i1)* @test_5.thunk to i8*), i8* nonnull %[[VAL_2]], i64 16, i64 0) @@ -365,7 +361,7 @@ func.func @test_5(%0: !cc.ptr> {llvm.sret = !cc.struct<{i } - +//===----------------------------------------------------------------------===// // CHECK-LABEL: define i64 @test_0.returnOffset() // CHECK: ret i64 8 diff --git a/test/Quake/dep_analysis.qke b/test/Quake/dep_analysis.qke new file mode 100644 index 0000000000..aa9a3b76d8 --- /dev/null +++ b/test/Quake/dep_analysis.qke @@ -0,0 +1,141 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +// RUN: cudaq-opt --add-wireset --assign-wire-indices --dep-analysis %s | FileCheck %s + +// A more complete functional test +func.func @test1() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %0 = quake.null_wire + %1 = quake.null_wire + %2 = quake.null_wire + %3 = quake.h %0 : (!quake.wire) -> !quake.wire + %4 = quake.h %2 : (!quake.wire) -> !quake.wire + %5 = quake.h %1 : (!quake.wire) -> !quake.wire + %6:2 = quake.x [%3] %5 : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) + %7 = quake.h %6#0 : (!quake.wire) -> !quake.wire + %8 = quake.y %6#1 : (!quake.wire) -> !quake.wire + %9 = quake.z %8 : (!quake.wire) -> !quake.wire + %10:2 = quake.x [%4] %9 : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) + %11 = quake.h %10#0 : (!quake.wire) -> !quake.wire + %measOut, %wires = quake.mz %10#1 : (!quake.wire) -> (!quake.measure, !quake.wire) + %12 = quake.discriminate %measOut : (!quake.measure) -> i1 + quake.sink %wires : !quake.wire + quake.sink %11 : !quake.wire + quake.sink %7 : !quake.wire + return %12 : i1 +} + +// CHECK-LABEL: quake.wire_set @wires[2147483647] attributes {sym_visibility = "private"} + +// CHECK-LABEL: func.func @test1() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_0:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK: %[[VAL_1:.*]] = quake.h %[[VAL_0]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_2:.*]] = quake.borrow_wire @wires[1] : !quake.wire +// CHECK: %[[VAL_3:.*]] = quake.h %[[VAL_2]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_4:.*]]:2 = quake.x {{\[}}%[[VAL_1]]] %[[VAL_3]] : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) +// CHECK: %[[VAL_5:.*]] = quake.h %[[VAL_4]]#0 : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_6:.*]] = quake.y %[[VAL_4]]#1 : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_7:.*]] = quake.h %[[VAL_5]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_8:.*]] = quake.z %[[VAL_6]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_9:.*]]:2 = quake.x {{\[}}%[[VAL_7]]] %[[VAL_8]] : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) +// CHECK: %[[VAL_10:.*]], %[[VAL_11:.*]] = quake.mz %[[VAL_9]]#1 : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: %[[VAL_12:.*]] = quake.discriminate %[[VAL_10]] : (!quake.measure) -> i1 +// CHECK: quake.return_wire %[[VAL_11]] : !quake.wire +// CHECK: %[[VAL_13:.*]] = quake.h %[[VAL_9]]#0 : (!quake.wire) -> !quake.wire +// CHECK: quake.return_wire %[[VAL_13]] : !quake.wire +// CHECK: return %[[VAL_12]] : i1 +// CHECK: } + +// This a test mostly for scheduling +func.func @test2() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %0 = quake.null_wire + %1 = quake.null_wire + %2 = quake.null_wire + %3 = quake.h %2 : (!quake.wire) -> !quake.wire + %4 = quake.x %3 : (!quake.wire) -> !quake.wire + %5 = quake.y %4 : (!quake.wire) -> !quake.wire + %6 = quake.h %1 : (!quake.wire) -> !quake.wire + %7:2 = quake.x [%0] %6 : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) + %8:2 = quake.x [%7#0] %5 : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) + %9 = quake.z %7#1 : (!quake.wire) -> !quake.wire + %10 = quake.y %9 : (!quake.wire) -> !quake.wire + %11 = quake.x %10 : (!quake.wire) -> !quake.wire + %measOut, %wires = quake.mz %11 : (!quake.wire) -> (!quake.measure, !quake.wire) + quake.sink %8#0 : !quake.wire + quake.sink %8#1 : !quake.wire + quake.sink %wires : !quake.wire + %12 = quake.discriminate %measOut : (!quake.measure) -> i1 + return %12 : i1 +} + +// CHECK-LABEL: func.func @test2() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_0:.*]] = quake.borrow_wire @wires[2] : !quake.wire +// CHECK: %[[VAL_1:.*]] = quake.h %[[VAL_0]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_2:.*]] = quake.borrow_wire @wires[1] : !quake.wire +// CHECK: %[[VAL_3:.*]] = quake.h %[[VAL_2]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_4:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK: %[[VAL_5:.*]]:2 = quake.x {{\[}}%[[VAL_4]]] %[[VAL_1]] : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) +// CHECK: %[[VAL_6:.*]] = quake.x %[[VAL_3]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_7:.*]] = quake.z %[[VAL_5]]#1 : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_8:.*]] = quake.y %[[VAL_6]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_9:.*]] = quake.y %[[VAL_7]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_10:.*]]:2 = quake.x {{\[}}%[[VAL_5]]#0] %[[VAL_8]] : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) +// CHECK: quake.return_wire %[[VAL_10]]#0 : !quake.wire +// CHECK: quake.return_wire %[[VAL_10]]#1 : !quake.wire +// CHECK: %[[VAL_11:.*]] = quake.x %[[VAL_9]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_12:.*]], %[[VAL_13:.*]] = quake.mz %[[VAL_11]] : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: quake.return_wire %[[VAL_13]] : !quake.wire +// CHECK: %[[VAL_14:.*]] = quake.discriminate %[[VAL_12]] : (!quake.measure) -> i1 +// CHECK: return %[[VAL_14]] : i1 +// CHECK: } + +// This is a basic dead-code check +func.func @test3() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %0 = quake.null_wire + %1 = quake.null_wire + %measOut, %wires = quake.mz %1 : (!quake.wire) -> (!quake.measure, !quake.wire) + %2 = quake.discriminate %measOut : (!quake.measure) -> i1 + quake.sink %0 : !quake.wire + quake.sink %wires : !quake.wire + return %2 : i1 +} + +// CHECK-LABEL: func.func @test3() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_0:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK: %[[VAL_1:.*]], %[[VAL_2:.*]] = quake.mz %[[VAL_0]] : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: %[[VAL_3:.*]] = quake.discriminate %[[VAL_1]] : (!quake.measure) -> i1 +// CHECK: quake.return_wire %[[VAL_2]] : !quake.wire +// CHECK: return %[[VAL_3]] : i1 +// CHECK: } + +// This a test mostly for scheduling +func.func @test4() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %0 = quake.null_wire + %1 = quake.null_wire + %2:2 = quake.x [%0] %1 : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) + %3 = quake.h %2#1 : (!quake.wire) -> !quake.wire + %4:2 = quake.x [%2#0] %3 : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) + %5:2 = quake.mz %4#0 : (!quake.wire) -> (!quake.measure, !quake.wire) + %6 = quake.discriminate %5#0 : (!quake.measure) -> i1 + quake.sink %5#1 : !quake.wire + quake.sink %4#1 : !quake.wire + return %6 : i1 +} + +// CHECK-LABEL: func.func @test4() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK-DAG: %[[VAL_0:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK-DAG: %[[VAL_1:.*]] = quake.borrow_wire @wires[1] : !quake.wire +// CHECK: %[[VAL_2:.*]]:2 = quake.x {{\[}}%[[VAL_0]]] %[[VAL_1]] : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) +// CHECK: %[[VAL_3:.*]] = quake.h %[[VAL_2]]#1 : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_4:.*]]:2 = quake.x {{\[}}%[[VAL_2]]#0] %[[VAL_3]] : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) +// CHECK: quake.return_wire %[[VAL_4]]#1 : !quake.wire +// CHECK: %[[VAL_5:.*]], %[[VAL_6:.*]] = quake.mz %[[VAL_4]]#0 : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK-DAG: %[[VAL_7:.*]] = quake.discriminate %[[VAL_5]] : (!quake.measure) -> i1 +// CHECK-DAG: quake.return_wire %[[VAL_6]] : !quake.wire +// CHECK: return %[[VAL_7]] : i1 +// CHECK: } diff --git a/test/Quake/dep_analysis_bug_classical.qke b/test/Quake/dep_analysis_bug_classical.qke new file mode 100644 index 0000000000..8cf89d7edb --- /dev/null +++ b/test/Quake/dep_analysis_bug_classical.qke @@ -0,0 +1,135 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +// RUN: cudaq-opt --add-wireset --assign-wire-indices --dep-analysis -split-input-file %s | FileCheck %s + +// The second cc.if has a shadow dependency on the first if (and therefore first wire) +// This test ensures that the shadow dependency is respected. +func.func @test1() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel", qubitMeasurementFeedback = true} { + %true = arith.constant true + %cst = arith.constant 1.000000e+00 : f32 + %cst_0 = arith.constant 2.000000e+00 : f32 + %0 = quake.null_wire + %1 = quake.null_wire + %2 = quake.h %0 : (!quake.wire) -> !quake.wire + %3 = quake.x %2 : (!quake.wire) -> !quake.wire + %measOut, %wires = quake.mz %3 : (!quake.wire) -> (!quake.measure, !quake.wire) + %4 = quake.discriminate %measOut : (!quake.measure) -> i1 + %5 = cc.if(%4) -> f32 { + cc.continue %cst : f32 + } else { + cc.continue %cst_0 : f32 + } + %6 = cc.if(%true) ((%arg0 = %1)) -> !quake.wire { + %7 = cc.if(%true) ((%arg1 = %arg0)) -> !quake.wire { + %8 = quake.rx (%5) %arg1 : (f32, !quake.wire) -> !quake.wire + cc.continue %8 : !quake.wire + } else { + cc.continue %arg1 : !quake.wire + } + cc.continue %7 : !quake.wire + } else { + cc.continue %arg0 : !quake.wire + } + %measOut_1, %wires_2 = quake.mz %6 : (!quake.wire) -> (!quake.measure, !quake.wire) + %7 = quake.discriminate %measOut_1 : (!quake.measure) -> i1 + quake.sink %wires : !quake.wire + quake.sink %wires_2 : !quake.wire + return %7 : i1 +} + +// CHECK-LABEL: quake.wire_set @wires[2147483647] attributes {sym_visibility = "private"} + +// CHECK-LABEL: func.func @test1() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel", qubitMeasurementFeedback = true} { +// CHECK: %[[VAL_0:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK: %[[VAL_1:.*]] = quake.h %[[VAL_0]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_2:.*]] = quake.x %[[VAL_1]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_3:.*]], %[[VAL_4:.*]] = quake.mz %[[VAL_2]] : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: %[[VAL_5:.*]] = quake.discriminate %[[VAL_3]] : (!quake.measure) -> i1 +// CHECK: %[[VAL_6:.*]] = cc.if(%[[VAL_5]]) -> f32 { +// CHECK: %[[VAL_7:.*]] = arith.constant 1.000000e+00 : f32 +// CHECK: cc.continue %[[VAL_7]] : f32 +// CHECK: } else { +// CHECK: %[[VAL_8:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK: cc.continue %[[VAL_8]] : f32 +// CHECK: } +// CHECK: %[[VAL_9:.*]] = arith.constant true +// CHECK: %[[VAL_10:.*]] = cc.if(%[[VAL_9]]) ((%[[VAL_11:.*]] = %[[VAL_4]])) -> !quake.wire { +// CHECK: %[[VAL_12:.*]] = arith.constant true +// CHECK: %[[VAL_13:.*]] = cc.if(%[[VAL_12]]) ((%[[VAL_14:.*]] = %[[VAL_11]])) -> !quake.wire { +// CHECK: %[[VAL_15:.*]] = quake.rx (%[[VAL_16:.*]]) %[[VAL_14]] : (f32, !quake.wire) -> !quake.wire +// CHECK: cc.continue %[[VAL_15]] : !quake.wire +// CHECK: } else { +// CHECK: cc.continue %[[VAL_17:.*]] : !quake.wire +// CHECK: } +// CHECK: cc.continue %[[VAL_18:.*]] : !quake.wire +// CHECK: } else { +// CHECK: cc.continue %[[VAL_19:.*]] : !quake.wire +// CHECK: } +// CHECK: %[[VAL_20:.*]], %[[VAL_21:.*]] = quake.mz %[[VAL_22:.*]] : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: %[[VAL_23:.*]] = quake.discriminate %[[VAL_20]] : (!quake.measure) -> i1 +// CHECK: quake.return_wire %[[VAL_21]] : !quake.wire +// CHECK: return %[[VAL_23]] : i1 +// CHECK: } + +// ----- + +// The second cc.if has a shadow dependency on the first if (and therefore first wire) +// This test ensures that the shadow dependency is respected, and that the shadow +// dependency is properly handled when lifting the op + +func.func @test2() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel", qubitMeasurementFeedback = true} { + %true = arith.constant true + %cst = arith.constant 1.000000e+00 : f32 + %cst_0 = arith.constant 2.000000e+00 : f32 + %0 = quake.null_wire + %1 = quake.null_wire + %2 = quake.h %0 : (!quake.wire) -> !quake.wire + %3 = quake.x %2 : (!quake.wire) -> !quake.wire + %measOut, %wires = quake.mz %3 : (!quake.wire) -> (!quake.measure, !quake.wire) + %4 = quake.discriminate %measOut : (!quake.measure) -> i1 + %5 = cc.if(%4) -> f32 { + cc.continue %cst : f32 + } else { + cc.continue %cst_0 : f32 + } + %6 = cc.if(%true) ((%arg0 = %1)) -> !quake.wire { + %8 = quake.rx (%5) %arg0 : (f32, !quake.wire) -> !quake.wire + cc.continue %8 : !quake.wire + } else { + %8 = quake.rx (%5) %arg0 : (f32, !quake.wire) -> !quake.wire + cc.continue %8 : !quake.wire + } + %measOut_1, %wires_2 = quake.mz %6 : (!quake.wire) -> (!quake.measure, !quake.wire) + %7 = quake.discriminate %measOut_1 : (!quake.measure) -> i1 + quake.sink %wires : !quake.wire + quake.sink %wires_2 : !quake.wire + return %7 : i1 +} + +// CHECK-LABEL: quake.wire_set @wires[2147483647] attributes {sym_visibility = "private"} + +// CHECK-LABEL: func.func @test2() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel", qubitMeasurementFeedback = true} { +// CHECK: %[[VAL_0:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK: %[[VAL_1:.*]] = quake.h %[[VAL_0]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_2:.*]] = quake.x %[[VAL_1]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_3:.*]], %[[VAL_4:.*]] = quake.mz %[[VAL_2]] : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: %[[VAL_5:.*]] = quake.discriminate %[[VAL_3]] : (!quake.measure) -> i1 +// CHECK: %[[VAL_6:.*]] = cc.if(%[[VAL_5]]) -> f32 { +// CHECK: %[[VAL_7:.*]] = arith.constant 1.000000e+00 : f32 +// CHECK: cc.continue %[[VAL_7]] : f32 +// CHECK: } else { +// CHECK: %[[VAL_8:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK: cc.continue %[[VAL_8]] : f32 +// CHECK: } +// CHECK: %[[VAL_9:.*]] = quake.rx (%[[VAL_10:.*]]) %[[VAL_4]] : (f32, !quake.wire) -> !quake.wire +// CHECK: %[[VAL_11:.*]], %[[VAL_12:.*]] = quake.mz %[[VAL_9]] : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: %[[VAL_13:.*]] = quake.discriminate %[[VAL_11]] : (!quake.measure) -> i1 +// CHECK: quake.return_wire %[[VAL_12]] : !quake.wire +// CHECK: return %[[VAL_13]] : i1 +// CHECK: } diff --git a/test/Quake/dep_analysis_bug_lifting_schedule.qke b/test/Quake/dep_analysis_bug_lifting_schedule.qke new file mode 100644 index 0000000000..ccf96bbd4a --- /dev/null +++ b/test/Quake/dep_analysis_bug_lifting_schedule.qke @@ -0,0 +1,122 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +// RUN: cudaq-opt --add-wireset --assign-wire-indices --dep-analysis %s | FileCheck %s + +// The `quake.h` on `%arg0` will be lifted, which could cause a conflict +// as the same qubit will be reused for r and q. This tests that the +// implementation handles this conflict safely. +func.func @__nvqpp__mlirgen__run_test1() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %true = arith.constant true + %0 = quake.null_wire + %1 = cc.if(%true) ((%arg0 = %0)) -> !quake.wire { + %3 = quake.null_wire + %4 = quake.null_wire + %5 = quake.x %4 : (!quake.wire) -> !quake.wire + %6 = quake.h %arg0 : (!quake.wire) -> !quake.wire + %7 = quake.y %5 : (!quake.wire) -> !quake.wire + %8 = quake.x %6 : (!quake.wire) -> !quake.wire + %9:2 = quake.x [%8] %3 : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) + %10 = quake.h %9#1 : (!quake.wire) -> !quake.wire + quake.sink %10 : !quake.wire + quake.sink %7 : !quake.wire + cc.continue %9#0 : !quake.wire + } else { + %3 = quake.h %arg0 : (!quake.wire) -> !quake.wire + cc.continue %3 : !quake.wire + } + %measOut, %wires = quake.mz %1 : (!quake.wire) -> (!quake.measure, !quake.wire) + %2 = quake.discriminate %measOut : (!quake.measure) -> i1 + quake.sink %wires : !quake.wire + return %2 : i1 +} + +// CHECK-LABEL: quake.wire_set @wires[2147483647] attributes {sym_visibility = "private"} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__run_test1() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_0:.*]] = quake.borrow_wire @wires[1] : !quake.wire +// CHECK: %[[VAL_1:.*]] = quake.h %[[VAL_0]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_2:.*]] = arith.constant true +// CHECK: %[[VAL_3:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK: %[[VAL_4:.*]]:2 = cc.if(%[[VAL_2]]) ((%[[VAL_5:.*]] = %[[VAL_1]], %[[VAL_6:.*]] = %[[VAL_3]])) -> (!quake.wire, !quake.wire) { +// CHECK: %[[VAL_7:.*]] = quake.x %[[VAL_5]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_8:.*]]:2 = quake.x {{\[}}%[[VAL_7]]] %[[VAL_6]] : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) +// CHECK: %[[VAL_9:.*]] = quake.h %[[VAL_8]]#1 : (!quake.wire) -> !quake.wire +// CHECK: cc.continue %[[VAL_8]]#0, %[[VAL_9]] : !quake.wire, !quake.wire +// CHECK: } else { +// CHECK: cc.continue %[[VAL_10:.*]], %[[VAL_11:.*]] : !quake.wire, !quake.wire +// CHECK: } +// CHECK: quake.return_wire %[[VAL_12:.*]]#1 : !quake.wire +// CHECK: %[[VAL_13:.*]], %[[VAL_14:.*]] = quake.mz %[[VAL_12]]#0 : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: %[[VAL_15:.*]] = quake.discriminate %[[VAL_13]] : (!quake.measure) -> i1 +// CHECK: quake.return_wire %[[VAL_14]] : !quake.wire +// CHECK: return %[[VAL_15]] : i1 +// CHECK: } + +func.func @__nvqpp__mlirgen__run_test2() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %true = arith.constant true + %false = arith.constant false + %0 = quake.null_wire + %1:2 = cc.if(%true) ((%arg0 = %0)) -> (i1, !quake.wire) { + %4 = quake.null_wire + %5 = quake.null_wire + %6:2 = quake.x [%arg0] %4 : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) + %7 = quake.h %6#0 : (!quake.wire) -> !quake.wire + %8 = quake.h %5 : (!quake.wire) -> !quake.wire + %9 = quake.y %7 : (!quake.wire) -> !quake.wire + %measOut, %wires = quake.mz %8 : (!quake.wire) -> (!quake.measure, !quake.wire) + %10 = quake.discriminate %measOut : (!quake.measure) -> i1 + quake.sink %6#1 : !quake.wire + quake.sink %wires : !quake.wire + cc.continue %10, %9 : i1, !quake.wire + } else { + %4 = quake.y %arg0 : (!quake.wire) -> !quake.wire + cc.continue %false, %4 : i1, !quake.wire + } + %2 = arith.cmpi eq, %1#0, %false : i1 + %3:2 = cc.if(%2) ((%arg0 = %1#1)) -> (i1, !quake.wire) { + cc.continue %false, %arg0 : i1, !quake.wire + } else { + %measOut, %wires = quake.mz %arg0 : (!quake.wire) -> (!quake.measure, !quake.wire) + %4 = quake.discriminate %measOut : (!quake.measure) -> i1 + cc.continue %4, %wires : i1, !quake.wire + } + quake.sink %3#1 : !quake.wire + return %3#0 : i1 +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__run_test2() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_0:.*]] = arith.constant true +// CHECK: %[[VAL_1:.*]] = quake.borrow_wire @wires[1] : !quake.wire +// CHECK: %[[VAL_2:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK: %[[VAL_3:.*]]:3 = cc.if(%[[VAL_0]]) ((%[[VAL_4:.*]] = %[[VAL_1]], %[[VAL_5:.*]] = %[[VAL_2]])) -> (i1, !quake.wire, !quake.wire) { +// CHECK: %[[VAL_6:.*]]:2 = quake.x {{\[}}%[[VAL_4]]] %[[VAL_5]] : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) +// CHECK: %[[VAL_7:.*]] = quake.h %[[VAL_6]]#1 : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_8:.*]] = quake.h %[[VAL_6]]#0 : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_9:.*]], %[[VAL_10:.*]] = quake.mz %[[VAL_7]] : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: %[[VAL_11:.*]] = quake.discriminate %[[VAL_9]] : (!quake.measure) -> i1 +// CHECK: cc.continue %[[VAL_11]], %[[VAL_8]], %[[VAL_10]] : i1, !quake.wire, !quake.wire +// CHECK: } else { +// CHECK: %[[VAL_12:.*]] = arith.constant false +// CHECK: cc.continue %[[VAL_12]], %[[VAL_13:.*]], %[[VAL_14:.*]] : i1, !quake.wire, !quake.wire +// CHECK: } +// CHECK: %[[VAL_15:.*]] = arith.constant false +// CHECK: %[[VAL_16:.*]] = arith.cmpi eq, %[[VAL_17:.*]]#0, %[[VAL_15]] : i1 +// CHECK: quake.return_wire %[[VAL_17]]#2 : !quake.wire +// CHECK: %[[VAL_18:.*]] = quake.y %[[VAL_17]]#1 : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_19:.*]]:2 = cc.if(%[[VAL_16]]) ((%[[VAL_20:.*]] = %[[VAL_18]])) -> (i1, !quake.wire) { +// CHECK: %[[VAL_21:.*]] = arith.constant false +// CHECK: cc.continue %[[VAL_21]], %[[VAL_20]] : i1, !quake.wire +// CHECK: } else { +// CHECK: %[[VAL_22:.*]], %[[VAL_23:.*]] = quake.mz %[[VAL_24:.*]] : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: %[[VAL_25:.*]] = quake.discriminate %[[VAL_22]] : (!quake.measure) -> i1 +// CHECK: cc.continue %[[VAL_25]], %[[VAL_23]] : i1, !quake.wire +// CHECK: } +// CHECK: quake.return_wire %[[VAL_26:.*]]#1 : !quake.wire +// CHECK: return %[[VAL_26]]#0 : i1 +// CHECK: } diff --git a/test/Quake/dep_analysis_bug_lowering.qke b/test/Quake/dep_analysis_bug_lowering.qke new file mode 100644 index 0000000000..0f71d63c95 --- /dev/null +++ b/test/Quake/dep_analysis_bug_lowering.qke @@ -0,0 +1,49 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +// RUN: cudaq-opt --add-wireset --assign-wire-indices --dep-analysis %s | FileCheck %s + +func.func @__nvqpp__mlirgen__run_test() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %true = arith.constant true + %0 = quake.null_wire + %1 = quake.null_wire + %2:2 = cc.if(%true) ((%arg0 = %0, %arg1 = %1)) -> (!quake.wire, !quake.wire) { + %4 = quake.h %arg0 : (!quake.wire) -> !quake.wire + %5 = quake.x %4 : (!quake.wire) -> !quake.wire + %6 = quake.h %arg1 : (!quake.wire) -> !quake.wire + cc.continue %5, %6 : !quake.wire, !quake.wire + } else { + cc.continue %arg0, %arg1 : !quake.wire, !quake.wire + } + %measOut, %wires = quake.mz %2#0 name "b" : (!quake.wire) -> (!quake.measure, !quake.wire) + %3 = quake.discriminate %measOut : (!quake.measure) -> i1 + quake.sink %wires : !quake.wire + quake.sink %2#1 : !quake.wire + return %3 : i1 +} + +// CHECK-LABEL: quake.wire_set @wires[2147483647] attributes {sym_visibility = "private"} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__run_test() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_0:.*]] = arith.constant true +// CHECK: %[[VAL_1:.*]] = quake.borrow_wire @wires[1] : !quake.wire +// CHECK: %[[VAL_2:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK: %[[VAL_3:.*]]:2 = cc.if(%[[VAL_0]]) ((%[[VAL_4:.*]] = %[[VAL_1]], %[[VAL_5:.*]] = %[[VAL_2]])) -> (!quake.wire, !quake.wire) { +// CHECK: %[[VAL_6:.*]] = quake.h %[[VAL_4]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_7:.*]] = quake.x %[[VAL_6]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_8:.*]] = quake.h %[[VAL_5]] : (!quake.wire) -> !quake.wire +// CHECK: cc.continue %[[VAL_7]], %[[VAL_8]] : !quake.wire, !quake.wire +// CHECK: } else { +// CHECK: cc.continue %[[VAL_9:.*]], %[[VAL_10:.*]] : !quake.wire, !quake.wire +// CHECK: } +// CHECK: quake.return_wire %[[VAL_11:.*]]#1 : !quake.wire +// CHECK: %[[VAL_12:.*]], %[[VAL_13:.*]] = quake.mz %[[VAL_11]]#0 name "b" : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: %[[VAL_14:.*]] = quake.discriminate %[[VAL_12]] : (!quake.measure) -> i1 +// CHECK: quake.return_wire %[[VAL_13]] : !quake.wire +// CHECK: return %[[VAL_14]] : i1 +// CHECK: } \ No newline at end of file diff --git a/test/Quake/dep_analysis_classical.qke b/test/Quake/dep_analysis_classical.qke new file mode 100644 index 0000000000..815b18a7bc --- /dev/null +++ b/test/Quake/dep_analysis_classical.qke @@ -0,0 +1,176 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +// RUN: cudaq-opt --add-wireset --assign-wire-indices --dep-analysis %s | FileCheck %s + +func.func @test1() -> !quake.measure attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %cst_0 = arith.constant 2.000000e+00 : f64 + %0 = quake.null_wire + %1 = quake.rx (%cst_0) %0 : (f64, !quake.wire) -> !quake.wire + %measOut, %wires = quake.mz %1 : (!quake.wire) -> (!quake.measure, !quake.wire) + quake.sink %wires : !quake.wire + return %measOut : !quake.measure +} + +// CHECK-LABEL: quake.wire_set @wires[2147483647] attributes {sym_visibility = "private" + +// CHECK-LABEL: func.func @test1() -> !quake.measure attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK-DAG: %[[VAL_0:.*]] = arith.constant 2.000000e+00 : f64 +// CHECK-DAG: %[[VAL_1:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK: %[[VAL_2:.*]] = quake.rx (%[[VAL_0]]) %[[VAL_1]] : (f64, !quake.wire) -> !quake.wire +// CHECK: %[[VAL_3:.*]], %[[VAL_4:.*]] = quake.mz %[[VAL_2]] : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: quake.return_wire %[[VAL_4]] : !quake.wire +// CHECK: return %[[VAL_3]] : !quake.measure +// CHECK: } + +func.func @test2() -> !quake.measure attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %cst = arith.constant 7.000000e+00 : f64 + %cst_0 = arith.constant 2.000000e+00 : f64 + %0 = quake.null_wire + %measOut, %wires = quake.mz %0 : (!quake.wire) -> (!quake.measure, !quake.wire) + %1 = quake.discriminate %measOut : (!quake.measure) -> i1 + %2 = arith.uitofp %1 : i1 to f64 + %3 = arith.mulf %2, %cst_0 : f64 + %4 = arith.addf %3, %cst : f64 + %5 = quake.rx (%4) %wires : (f64, !quake.wire) -> !quake.wire + quake.sink %5 : !quake.wire + return %measOut : !quake.measure +} + +// CHECK-LABEL: func.func @test2() -> !quake.measure attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_0:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK: %[[VAL_1:.*]], %[[VAL_2:.*]] = quake.mz %[[VAL_0]] : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: %[[VAL_3:.*]] = quake.discriminate %[[VAL_1]] : (!quake.measure) -> i1 +// CHECK: %[[VAL_4:.*]] = arith.uitofp %[[VAL_3]] : i1 to f64 +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2.000000e+00 : f64 +// CHECK: %[[VAL_6:.*]] = arith.mulf %[[VAL_4]], %[[VAL_5]] : f64 +// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 7.000000e+00 : f64 +// CHECK: %[[VAL_8:.*]] = arith.addf %[[VAL_6]], %[[VAL_7]] : f64 +// CHECK: %[[VAL_9:.*]] = quake.rx (%[[VAL_8]]) %[[VAL_2]] : (f64, !quake.wire) -> !quake.wire +// CHECK: quake.return_wire %[[VAL_9]] : !quake.wire +// CHECK: return %[[VAL_1]] : !quake.measure +// CHECK: } + +func.func @test3() -> !quake.measure attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %cst_0 = arith.constant 2.000000e+00 : f64 + %0 = quake.null_wire + %1 = quake.null_wire + %2 = quake.rx (%cst_0) %0 : (f64, !quake.wire) -> !quake.wire + %3 = quake.x %2 : (!quake.wire) -> !quake.wire + %4 = quake.y %3 : (!quake.wire) -> !quake.wire + %5 = quake.rx (%cst_0) %1 : (f64, !quake.wire) -> !quake.wire + %6:2 = quake.x [%4] %5 : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) + %measOut, %wires = quake.mz %6#1 : (!quake.wire) -> (!quake.measure, !quake.wire) + quake.sink %6#0 : !quake.wire + quake.sink %wires : !quake.wire + return %measOut : !quake.measure +} + +// CHECK-LABEL: func.func @test3() -> !quake.measure attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK-DAG: %[[VAL_0:.*]] = arith.constant 2.000000e+00 : f64 +// CHECK-DAG: %[[VAL_1:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK: %[[VAL_2:.*]] = quake.rx (%[[VAL_0]]) %[[VAL_1]] : (f64, !quake.wire) -> !quake.wire +// CHECK: %[[VAL_3:.*]] = quake.x %[[VAL_2]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_4:.*]] = quake.y %[[VAL_3]] : (!quake.wire) -> !quake.wire +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2.000000e+00 : f64 +// CHECK-DAG: %[[VAL_6:.*]] = quake.borrow_wire @wires[1] : !quake.wire +// CHECK: %[[VAL_7:.*]] = quake.rx (%[[VAL_5]]) %[[VAL_6]] : (f64, !quake.wire) -> !quake.wire +// CHECK: %[[VAL_8:.*]]:2 = quake.x {{\[}}%[[VAL_4]]] %[[VAL_7]] : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) +// CHECK: quake.return_wire %[[VAL_8]]#0 : !quake.wire +// CHECK: %[[VAL_9:.*]], %[[VAL_10:.*]] = quake.mz %[[VAL_8]]#1 : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: quake.return_wire %[[VAL_10]] : !quake.wire +// CHECK: return %[[VAL_9]] : !quake.measure +// CHECK: } + +func.func @test4() -> f64 attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %cst_0 = arith.constant 2.000000e+00 : f64 + return %cst_0 : f64 +} + +// CHECK-LABEL: func.func @test4() -> f64 attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_0:.*]] = arith.constant 2.000000e+00 : f64 +// CHECK: return %[[VAL_0]] : f64 +// CHECK: } + +// Ensures that %cst_0 as a constant is not enough to connect the graphs +func.func @test5() -> !quake.measure attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %cst_0 = arith.constant 2.000000e+00 : f64 + %0 = quake.null_wire + %1 = quake.null_wire + %2 = quake.rx (%cst_0) %0 : (f64, !quake.wire) -> !quake.wire + %3 = quake.x %2 : (!quake.wire) -> !quake.wire + %4 = quake.y %3 : (!quake.wire) -> !quake.wire + %5 = quake.rx (%cst_0) %1 : (f64, !quake.wire) -> !quake.wire + %measOut, %wires = quake.mz %5 : (!quake.wire) -> (!quake.measure, !quake.wire) + quake.sink %4 : !quake.wire + quake.sink %wires : !quake.wire + return %measOut : !quake.measure +} + +// CHECK-LABEL: func.func @test5() -> !quake.measure attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_0:.*]] = arith.constant 2.000000e+00 : f64 +// CHECK: %[[VAL_1:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK: %[[VAL_2:.*]] = quake.rx (%[[VAL_0]]) %[[VAL_1]] : (f64, !quake.wire) -> !quake.wire +// CHECK: %[[VAL_3:.*]], %[[VAL_4:.*]] = quake.mz %[[VAL_2]] : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: quake.return_wire %[[VAL_4]] : !quake.wire +// CHECK: return %[[VAL_3]] : !quake.measure +// CHECK: } + +func.func @test6() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %0 = quake.null_wire + %1 = quake.null_wire + %2 = quake.h %1 : (!quake.wire) -> !quake.wire + %measOut, %wires = quake.mz %0 : (!quake.wire) -> (!quake.measure, !quake.wire) + %3 = quake.discriminate %measOut : (!quake.measure) -> i1 + %4 = arith.uitofp %3 : i1 to f64 + %5 = quake.rx (%4) %2 : (f64, !quake.wire) -> !quake.wire + quake.sink %wires : !quake.wire + quake.sink %5 : !quake.wire + return %3 : i1 +} + +// CHECK-LABEL: func.func @test6() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_0:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK: %[[VAL_1:.*]], %[[VAL_2:.*]] = quake.mz %[[VAL_0]] : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: %[[VAL_3:.*]] = quake.discriminate %[[VAL_1]] : (!quake.measure) -> i1 +// CHECK: %[[VAL_4:.*]] = arith.uitofp %[[VAL_3]] : i1 to f64 +// CHECK: quake.return_wire %[[VAL_2]] : !quake.wire +// CHECK: %[[VAL_5:.*]] = quake.borrow_wire @wires[1] : !quake.wire +// CHECK: %[[VAL_6:.*]] = quake.h %[[VAL_5]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_7:.*]] = quake.rx (%[[VAL_4]]) %[[VAL_6]] : (f64, !quake.wire) -> !quake.wire +// CHECK: quake.return_wire %[[VAL_7]] : !quake.wire +// CHECK: return %[[VAL_3]] : i1 +// CHECK: } + +func.func @test7() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %0 = quake.null_wire + %1 = quake.null_wire + %2 = quake.null_wire + %measOut, %wires = quake.mz %0 : (!quake.wire) -> (!quake.measure, !quake.wire) + %3 = quake.discriminate %measOut : (!quake.measure) -> i1 + %4 = arith.uitofp %3 : i1 to f64 + %5 = quake.rx (%4) %1 : (f64, !quake.wire) -> !quake.wire + %6 = quake.rx (%4) %2 : (f64, !quake.wire) -> !quake.wire + quake.sink %wires : !quake.wire + quake.sink %5 : !quake.wire + quake.sink %6 : !quake.wire + return %3 : i1 +} + +// CHECK-LABEL: func.func @test7() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_0:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK: %[[VAL_1:.*]], %[[VAL_2:.*]] = quake.mz %[[VAL_0]] : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: %[[VAL_3:.*]] = quake.discriminate %[[VAL_1]] : (!quake.measure) -> i1 +// CHECK: %[[VAL_4:.*]] = arith.uitofp %[[VAL_3]] : i1 to f64 +// CHECK: %[[VAL_5:.*]] = quake.borrow_wire @wires[1] : !quake.wire +// CHECK: %[[VAL_6:.*]] = quake.rx (%[[VAL_4]]) %[[VAL_5]] : (f64, !quake.wire) -> !quake.wire +// CHECK: quake.return_wire %[[VAL_6]] : !quake.wire +// CHECK: %[[VAL_7:.*]] = quake.rx (%[[VAL_4]]) %[[VAL_2]] : (f64, !quake.wire) -> !quake.wire +// CHECK: quake.return_wire %[[VAL_7]] : !quake.wire +// CHECK: return %[[VAL_3]] : i1 +// CHECK: } diff --git a/test/Quake/dep_analysis_lifting.qke b/test/Quake/dep_analysis_lifting.qke new file mode 100644 index 0000000000..44e7a60d41 --- /dev/null +++ b/test/Quake/dep_analysis_lifting.qke @@ -0,0 +1,91 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +// RUN: cudaq-opt --add-wireset --assign-wire-indices --dep-analysis %s | FileCheck %s + +func.func @test1() -> !quake.measure attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %true = arith.constant true + %0 = quake.null_wire + %1 = quake.null_wire + %2 = quake.null_wire + %3 = quake.h %0 : (!quake.wire) -> !quake.wire + %4:3 = cc.if(%true) ((%arg1 = %3, %arg2 = %1, %arg3 = %2)) -> (!quake.wire, !quake.wire, !quake.wire) { + %5 = quake.h %arg2 : (!quake.wire) -> !quake.wire + %6:2 = quake.x [%5] %arg1 : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) + cc.continue %6#1, %6#0, %arg3: !quake.wire, !quake.wire, !quake.wire + } else { + %5 = quake.h %arg3 : (!quake.wire) -> !quake.wire + %6:2 = quake.y [%5] %arg1 : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) + cc.continue %6#1, %arg2, %6#0: !quake.wire, !quake.wire, !quake.wire + } + %measOut, %wires = quake.mz %4#0 : (!quake.wire) -> (!quake.measure, !quake.wire) + quake.sink %wires : !quake.wire + quake.sink %4#1 : !quake.wire + quake.sink %4#2 : !quake.wire + return %measOut : !quake.measure +} + +// CHECK-LABEL: quake.wire_set @wires[2147483647] attributes {sym_visibility = "private"} + +// CHECK-LABEL: func.func @test1() -> !quake.measure attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK-DAG: %[[VAL_0:.*]] = quake.borrow_wire @wires[1] : !quake.wire +// CHECK-DAG: %[[VAL_1:.*]] = quake.h %[[VAL_0]] : (!quake.wire) -> !quake.wire +// CHECK-DAG: %[[VAL_2:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK-DAG: %[[VAL_3:.*]] = quake.h %[[VAL_2]] : (!quake.wire) -> !quake.wire +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant true +// CHECK: %[[VAL_5:.*]]:2 = cc.if(%[[VAL_4]]) ((%[[VAL_6:.*]] = %[[VAL_1]], %[[VAL_7:.*]] = %[[VAL_3]])) -> (!quake.wire, !quake.wire) { +// CHECK: %[[VAL_8:.*]]:2 = quake.x {{\[}}%[[VAL_7]]] %[[VAL_6]] : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) +// CHECK: cc.continue %[[VAL_8]]#1, %[[VAL_8]]#0 : !quake.wire, !quake.wire +// CHECK: } else { +// CHECK: %[[VAL_9:.*]]:2 = quake.y {{\[}}%[[VAL_10:.*]]] %[[VAL_11:.*]] : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) +// CHECK: cc.continue %[[VAL_9]]#1, %[[VAL_9]]#0 : !quake.wire, !quake.wire +// CHECK: } +// CHECK: quake.return_wire %[[VAL_12:.*]]#1 : !quake.wire +// CHECK: %[[VAL_13:.*]], %[[VAL_14:.*]] = quake.mz %[[VAL_12]]#0 : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: quake.return_wire %[[VAL_14]] : !quake.wire +// CHECK: return %[[VAL_13]] : !quake.measure +// CHECK: } + +func.func @test2() -> !quake.measure attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %true = arith.constant true + %0 = quake.null_wire + %1 = quake.null_wire + %2 = quake.h %0 : (!quake.wire) -> !quake.wire + %3:2 = cc.if(%true) ((%arg1 = %2, %arg2 = %1)) -> (!quake.wire, !quake.wire) { + %4 = quake.y %arg2 : (!quake.wire) -> !quake.wire + %5:2 = quake.x [%4] %arg1 : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) + cc.continue %5#1, %5#0: !quake.wire, !quake.wire + } else { + %4 = quake.z %arg2 : (!quake.wire) -> !quake.wire + %5:2 = quake.x [%4] %arg1 : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) + cc.continue %5#1, %5#0: !quake.wire, !quake.wire + } + %measOut, %wires = quake.mz %3#0 : (!quake.wire) -> (!quake.measure, !quake.wire) + quake.sink %wires : !quake.wire + quake.sink %3#1 : !quake.wire + return %measOut : !quake.measure +} + +// CHECK-LABEL: func.func @test2() -> !quake.measure attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_0:.*]] = arith.constant true +// CHECK: %[[VAL_1:.*]] = quake.borrow_wire @wires[0] : !quake.wire +// CHECK: %[[VAL_2:.*]] = cc.if(%[[VAL_0]]) ((%[[VAL_3:.*]] = %[[VAL_1]])) -> !quake.wire { +// CHECK: %[[VAL_4:.*]] = quake.y %[[VAL_3]] : (!quake.wire) -> !quake.wire +// CHECK: cc.continue %[[VAL_4]] : !quake.wire +// CHECK: } else { +// CHECK: %[[VAL_5:.*]] = quake.z %[[VAL_6:.*]] : (!quake.wire) -> !quake.wire +// CHECK: cc.continue %[[VAL_5]] : !quake.wire +// CHECK: } +// CHECK: %[[VAL_7:.*]] = quake.borrow_wire @wires[1] : !quake.wire +// CHECK: %[[VAL_8:.*]] = quake.h %[[VAL_7]] : (!quake.wire) -> !quake.wire +// CHECK: %[[VAL_9:.*]]:2 = quake.x {{\[}}%[[VAL_10:.*]]] %[[VAL_8]] : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) +// CHECK: quake.return_wire %[[VAL_9]]#0 : !quake.wire +// CHECK: %[[VAL_11:.*]], %[[VAL_12:.*]] = quake.mz %[[VAL_9]]#1 : (!quake.wire) -> (!quake.measure, !quake.wire) +// CHECK: quake.return_wire %[[VAL_12]] : !quake.wire +// CHECK: return %[[VAL_11]] : !quake.measure +// CHECK: } diff --git a/test/Quake/invalid.qke b/test/Quake/invalid.qke new file mode 100644 index 0000000000..1dd79e84c0 --- /dev/null +++ b/test/Quake/invalid.qke @@ -0,0 +1,56 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +// RUN: cudaq-opt %s -split-input-file -verify-diagnostics + +func.func @test_struq() { + %0 = quake.alloca !quake.veq<4> + %1 = arith.constant 1 : i32 + %2 = arith.constant 2.0 : f32 + // expected-error@+1 {{must be non-struct quantum reference type}} + %6 = quake.make_struq %0, %1, %2 : (!quake.veq<4>, i32, f32) -> !quake.struq, i32, f32> + return +} + +// ----- + +func.func @test_struq() { + %0 = quake.alloca !quake.veq<4> + %1 = quake.alloca !quake.veq<7> + // expected-error@+1 {{member type not compatible with operand type}} + %6 = quake.make_struq %0, %1 : (!quake.veq<4>, !quake.veq<7>) -> !quake.struq, !quake.veq<8>> + return +} + +// ----- + +func.func @test_struq() { + %0 = quake.alloca !quake.veq<4> + %1 = quake.alloca !quake.veq<7> + // expected-error@+1 {{result type has different member count than operands}} + %6 = quake.make_struq %0, %1 : (!quake.veq<4>, !quake.veq<7>) -> !quake.struq> + return +} + +// ----- + +func.func @test_struq() { + %0 = quake.alloca !quake.veq<4> + %1 = quake.alloca !quake.veq<7> + // expected-error@+1 {{result type has different member count than operands}} + %6 = quake.make_struq %0, %1 : (!quake.veq<4>, !quake.veq<7>) -> !quake.struq, !quake.veq, !quake.veq> + return +} + +// ----- + +func.func @test_struq(%arg : !quake.struq, !quake.veq<2>, !quake.veq<3>>) { + // expected-error@+1 {{invalid index}} + %6 = quake.get_member %arg[3] : (!quake.struq, !quake.veq<2>, !quake.veq<3>>) -> !quake.veq<1> + return +} diff --git a/test/Quake/lambda_variable-2.qke b/test/Quake/lambda_variable-2.qke index 541e538dee..4d116a79e7 100644 --- a/test/Quake/lambda_variable-2.qke +++ b/test/Quake/lambda_variable-2.qke @@ -7,7 +7,8 @@ // ========================================================================== // // RUN: cudaq-opt --lambda-lifting --canonicalize %s | FileCheck %s -// RUN: cudaq-opt --lambda-lifting --canonicalize %s | cudaq-translate --convert-to=qir | FileCheck --check-prefixes=QIR %s +// RUN: cudaq-opt --lambda-lifting --canonicalize %s | \ +// RUN: cudaq-translate --convert-to=qir | FileCheck --check-prefix=QIR %s // CHECK-LABEL: func.func @__nvqpp__mlirgen__kernel_b( // CHECK-SAME: %[[VAL_0:.*]]: !cc.callable<() -> ()>) @@ -61,16 +62,6 @@ // QIR: call {{.*}} @__quantum__rt__qubit_allocate_array(i64 4) // QIR: call void @__quantum__rt__qubit_release_array( -// QIR-LABEL: define void @__nvqpp__lifted.lambda.0( -// QIR: call i8* @__quantum__rt__array_get_element_ptr_1d( -// QIR: call void @__quantum__qis__h( -// QIR: call i8* @__quantum__rt__array_get_element_ptr_1d( -// QIR: call void @__quantum__qis__h( -// QIR: call i8* @__quantum__rt__array_get_element_ptr_1d( -// QIR: call void @__quantum__qis__h( -// QIR: call i8* @__quantum__rt__array_get_element_ptr_1d( -// QIR: call void @__quantum__qis__h( - module attributes {quake.mangled_name_map = {__nvqpp__mlirgen__kernel_a = "_ZN8kernel_aclEv", __nvqpp__mlirgen__kernel_b = "_ZN8kernel_bclEOSt8functionIFvvEE"}} { func.func @__nvqpp__mlirgen__kernel_b(%arg0: !cc.callable<() -> ()>) attributes {"cudaq-entrypoint", "cudaq-kernel"} { return diff --git a/test/Quake/roundtrip-ops.qke b/test/Quake/roundtrip-ops.qke index fe643c5ab5..2d094e4060 100644 --- a/test/Quake/roundtrip-ops.qke +++ b/test/Quake/roundtrip-ops.qke @@ -777,3 +777,65 @@ func.func @offsets() { // CHECK: %[[VAL_3:.*]] = cc.offsetof !cc.array x 100> [86, 1] : i64 // CHECK: %[[VAL_4:.*]] = cc.offsetof !cc.array x 100> [0, 0] : i64 // CHECK: return + +func.func @indirect_callable1(%arg : !cc.indirect_callable<() -> ()>) { + cc.call_indirect_callable %arg : (!cc.indirect_callable<() -> ()>) -> () + return +} + +// CHECK-LABEL: func.func @indirect_callable1( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.indirect_callable<() -> ()>) { +// CHECK: cc.call_indirect_callable %[[VAL_0]] : (!cc.indirect_callable<() -> ()>) -> () +// CHECK: return +// CHECK: } + +func.func @indirect_callable2(%arg : !cc.indirect_callable<(i32) -> i64>) -> i64 { + %cst = arith.constant 4 : i32 + %0 = cc.call_indirect_callable %arg, %cst : (!cc.indirect_callable<(i32) -> i64>, i32) -> i64 + return %0 : i64 +} + +// CHECK-LABEL: func.func @indirect_callable2( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.indirect_callable<(i32) -> i64>) -> i64 { +// CHECK: %[[VAL_1:.*]] = arith.constant 4 : i32 +// CHECK: %[[VAL_2:.*]] = cc.call_indirect_callable %[[VAL_0]], %[[VAL_1]] : (!cc.indirect_callable<(i32) -> i64>, i32) -> i64 +// CHECK: return %[[VAL_2]] : i64 +// CHECK: } + +func.func @quantum_product_type() { + %0 = quake.alloca !quake.veq<3> + %1 = quake.alloca !quake.veq<4> + %2 = quake.make_struq %0, %1 : (!quake.veq<3>, !quake.veq<4>) -> !quake.struq, !quake.veq> + %3 = quake.get_member %2[0] : (!quake.struq, !quake.veq>) -> !quake.veq + %4 = quake.get_member %2[1] : (!quake.struq, !quake.veq>) -> !quake.veq + %10 = quake.alloca !quake.veq<5> + %11 = quake.alloca !quake.veq<6> + %12 = quake.make_struq %10, %11 : (!quake.veq<5>, !quake.veq<6>) -> !quake.struq<"gumby": !quake.veq, !quake.veq> + %13 = quake.get_member %12[0] : (!quake.struq<"gumby": !quake.veq, !quake.veq>) -> !quake.veq + %14 = quake.get_member %12[1] : (!quake.struq<"gumby": !quake.veq, !quake.veq>) -> !quake.veq + %20 = quake.alloca !quake.veq<7> + %21 = quake.alloca !quake.veq<8> + %22 = quake.make_struq %20, %21 : (!quake.veq<7>, !quake.veq<8>) -> !quake.struq, !quake.veq<8>> + %23 = quake.get_member %22[0] : (!quake.struq, !quake.veq<8>>) -> !quake.veq<7> + %24 = quake.get_member %22[1] : (!quake.struq, !quake.veq<8>>) -> !quake.veq<8> + return + } + +// CHECK-LABEL: func.func @quantum_product_type() { +// CHECK: %[[VAL_0:.*]] = quake.alloca !quake.veq<3> +// CHECK: %[[VAL_1:.*]] = quake.alloca !quake.veq<4> +// CHECK: %[[VAL_2:.*]] = quake.make_struq %[[VAL_0]], %[[VAL_1]] : (!quake.veq<3>, !quake.veq<4>) -> !quake.struq, !quake.veq> +// CHECK: %[[VAL_3:.*]] = quake.get_member %[[VAL_2]][0] : (!quake.struq, !quake.veq>) -> !quake.veq +// CHECK: %[[VAL_4:.*]] = quake.get_member %[[VAL_2]][1] : (!quake.struq, !quake.veq>) -> !quake.veq +// CHECK: %[[VAL_5:.*]] = quake.alloca !quake.veq<5> +// CHECK: %[[VAL_6:.*]] = quake.alloca !quake.veq<6> +// CHECK: %[[VAL_7:.*]] = quake.make_struq %[[VAL_5]], %[[VAL_6]] : (!quake.veq<5>, !quake.veq<6>) -> !quake.struq<"gumby": !quake.veq, !quake.veq> +// CHECK: %[[VAL_8:.*]] = quake.get_member %[[VAL_7]][0] : (!quake.struq<"gumby": !quake.veq, !quake.veq>) -> !quake.veq +// CHECK: %[[VAL_9:.*]] = quake.get_member %[[VAL_7]][1] : (!quake.struq<"gumby": !quake.veq, !quake.veq>) -> !quake.veq +// CHECK: %[[VAL_10:.*]] = quake.alloca !quake.veq<7> +// CHECK: %[[VAL_11:.*]] = quake.alloca !quake.veq<8> +// CHECK: %[[VAL_12:.*]] = quake.make_struq %[[VAL_10]], %[[VAL_11]] : (!quake.veq<7>, !quake.veq<8>) -> !quake.struq, !quake.veq<8>> +// CHECK: %[[VAL_13:.*]] = quake.get_member %[[VAL_12]][0] : (!quake.struq, !quake.veq<8>>) -> !quake.veq<7> +// CHECK: %[[VAL_14:.*]] = quake.get_member %[[VAL_12]][1] : (!quake.struq, !quake.veq<8>>) -> !quake.veq<8> +// CHECK: return +// CHECK: } diff --git a/test/Quake/wireset_codegen.qke b/test/Quake/wireset_codegen.qke new file mode 100644 index 0000000000..5ca9213018 --- /dev/null +++ b/test/Quake/wireset_codegen.qke @@ -0,0 +1,429 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +// clang-format off +// RUN: cudaq-opt --wireset-to-profile-qir-prep --wireset-to-profile-qir --symbol-dce %s | FileCheck --check-prefix=BASE %s +// RUN: cudaq-opt --wireset-to-profile-qir-prep --wireset-to-profile-qir=convert-to=qir-base --symbol-dce %s | FileCheck --check-prefix=BASE %s +// RUN: cudaq-opt --wireset-to-profile-qir-prep --wireset-to-profile-qir=convert-to=qir-adaptive --symbol-dce %s | FileCheck --check-prefix=ADAPT %s +// clang-format on + +quake.wire_set @phys[8] + +func.func @__nvqpp__mlirgen__comprehensive() attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %cst = arith.constant 1.6123000000000003 : f64 + %cst_0 = arith.constant 3.6123000000000003 : f64 + %cst_1 = arith.constant 4.612300e+00 : f64 + %cst_2 = arith.constant -3.0000000000000009 : f64 + %cst_3 = arith.constant 8.6123000000000012 : f64 + %cst_4 = arith.constant 0.000000e+00 : f64 + %cst_5 = arith.constant 6.612300e+00 : f64 + %cst_6 = arith.constant 5.612300e+00 : f64 + %cst_7 = arith.constant 8.000000e-01 : f64 + %cst_8 = arith.constant 5.000000e-01 : f64 + %cst_9 = arith.constant -1.000000e+00 : f64 + %0 = quake.borrow_wire @phys[0] : !quake.wire + %1 = quake.borrow_wire @phys[1] : !quake.wire + %2 = quake.borrow_wire @phys[2] : !quake.wire + %3 = quake.borrow_wire @phys[3] : !quake.wire + %4 = quake.borrow_wire @phys[4] : !quake.wire + %5 = quake.borrow_wire @phys[5] : !quake.wire + %6 = quake.borrow_wire @phys[6] : !quake.wire + %7 = quake.h %0 : (!quake.wire) -> !quake.wire + %8 = quake.h %4 : (!quake.wire) -> !quake.wire + %9 = quake.h %5 : (!quake.wire) -> !quake.wire + %10 = quake.h %6 : (!quake.wire) -> !quake.wire + %11 = quake.x %7 : (!quake.wire) -> !quake.wire + %12 = quake.x %8 : (!quake.wire) -> !quake.wire + %13 = quake.x %9 : (!quake.wire) -> !quake.wire + %14 = quake.x %10 : (!quake.wire) -> !quake.wire + %15:2 = quake.x [%2] %12 : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) + %16 = quake.y %11 : (!quake.wire) -> !quake.wire + %17 = quake.y %15#1 : (!quake.wire) -> !quake.wire + %18 = quake.y %13 : (!quake.wire) -> !quake.wire + %19 = quake.y %14 : (!quake.wire) -> !quake.wire + %20 = quake.z %16 : (!quake.wire) -> !quake.wire + %21 = quake.z %17 : (!quake.wire) -> !quake.wire + %22 = quake.z %18 : (!quake.wire) -> !quake.wire + %23 = quake.z %19 : (!quake.wire) -> !quake.wire + %24 = quake.t %20 : (!quake.wire) -> !quake.wire + %25 = quake.t %21 : (!quake.wire) -> !quake.wire + %26 = quake.t %22 : (!quake.wire) -> !quake.wire + %27 = quake.t %23 : (!quake.wire) -> !quake.wire + %28 = quake.t %26 : (!quake.wire) -> !quake.wire + %29 = quake.s %24 : (!quake.wire) -> !quake.wire + %30 = quake.s %25 : (!quake.wire) -> !quake.wire + %31 = quake.s %28 : (!quake.wire) -> !quake.wire + %32 = quake.s %27 : (!quake.wire) -> !quake.wire + %33 = quake.s %29 : (!quake.wire) -> !quake.wire + %34 = quake.rx (%cst_6) %31 : (f64, !quake.wire) -> !quake.wire + %35 = quake.rx (%cst_4) %33 : (f64, !quake.wire) -> !quake.wire + %36 = quake.ry (%cst_5) %30 : (f64, !quake.wire) -> !quake.wire + %37 = quake.ry (%cst_4) %35 : (f64, !quake.wire) -> !quake.wire + %38 = quake.rz (%cst_3) %34 : (f64, !quake.wire) -> !quake.wire + %39 = quake.rz (%cst_2) %37 : (f64, !quake.wire) -> !quake.wire + %40 = quake.r1 (%cst_1) %39 : (f64, !quake.wire) -> !quake.wire + %41 = quake.r1 (%cst_0) %32 : (f64, !quake.wire) -> !quake.wire + %42 = quake.r1 (%cst) %40 : (f64, !quake.wire) -> !quake.wire + %43:2 = quake.swap %42, %41 : (!quake.wire, !quake.wire) -> (!quake.wire, !quake.wire) + %44 = quake.u3 (%cst_7, %cst_8, %cst_9) %3 : (f64, f64, f64, !quake.wire) -> !quake.wire + %measOut, %wires = quake.mz %43#0 name "singleton" : (!quake.wire) -> (!quake.measure, !quake.wire) + %75 = cc.alloca i8 + %76 = quake.discriminate %measOut : (!quake.measure) -> i1 + %77 = cc.cast unsigned %76 : (i1) -> i8 + cc.store %77, %75 : !cc.ptr + %45 = cc.alloca !cc.array + %measOut_10, %wires_11 = quake.mz %1 name "eins" : (!quake.wire) -> (!quake.measure, !quake.wire) + %46 = quake.discriminate %measOut_10 : (!quake.measure) -> i1 + %47 = cc.cast %45 : (!cc.ptr>) -> !cc.ptr + %48 = cc.cast unsigned %46 : (i1) -> i8 + cc.store %48, %47 : !cc.ptr + %49 = cc.alloca !cc.array + %measOut_12, %wires_13 = quake.mz %15#0 name "dub" : (!quake.wire) -> (!quake.measure, !quake.wire) + %50 = quake.discriminate %measOut_12 : (!quake.measure) -> i1 + %51 = cc.cast %49 : (!cc.ptr>) -> !cc.ptr + %52 = cc.cast unsigned %50 : (i1) -> i8 + cc.store %52, %51 : !cc.ptr + %measOut_14, %wires_15 = quake.mz %44 name "dub" : (!quake.wire) -> (!quake.measure, !quake.wire) + %53 = quake.discriminate %measOut_14 : (!quake.measure) -> i1 + %54 = cc.compute_ptr %49[1] : (!cc.ptr>) -> !cc.ptr + %55 = cc.cast unsigned %53 : (i1) -> i8 + cc.store %55, %54 : !cc.ptr + %56 = cc.alloca !cc.array + %measOut_16, %wires_17 = quake.mz %36 name "trip" : (!quake.wire) -> (!quake.measure, !quake.wire) + %57 = quake.discriminate %measOut_16 : (!quake.measure) -> i1 + %58 = cc.cast %56 : (!cc.ptr>) -> !cc.ptr + %59 = cc.cast unsigned %57 : (i1) -> i8 + cc.store %59, %58 : !cc.ptr + %measOut_18, %wires_19 = quake.mz %38 name "trip" : (!quake.wire) -> (!quake.measure, !quake.wire) + %60 = quake.discriminate %measOut_18 : (!quake.measure) -> i1 + %61 = cc.compute_ptr %56[1] : (!cc.ptr>) -> !cc.ptr + %62 = cc.cast unsigned %60 : (i1) -> i8 + cc.store %62, %61 : !cc.ptr + %measOut_20, %wires_21 = quake.mz %43#1 name "trip" : (!quake.wire) -> (!quake.measure, !quake.wire) + %63 = quake.discriminate %measOut_20 : (!quake.measure) -> i1 + %64 = cc.compute_ptr %56[2] : (!cc.ptr>) -> !cc.ptr + %65 = cc.cast unsigned %63 : (i1) -> i8 + cc.store %65, %64 : !cc.ptr + quake.return_wire %wires : !quake.wire + quake.return_wire %wires_11 : !quake.wire + quake.return_wire %wires_13 : !quake.wire + quake.return_wire %wires_15 : !quake.wire + quake.return_wire %wires_17 : !quake.wire + quake.return_wire %wires_19 : !quake.wire + quake.return_wire %wires_21 : !quake.wire + return +} + +// BASE-LABEL: func.func @__nvqpp__mlirgen__comprehensive() attributes {"cudaq-entrypoint", "cudaq-kernel", output_names = {{.*}}, requiredQubits = "7", requiredResults = "7"} { +// BASE-DAG: %[[VAL_0:.*]] = arith.constant 1.6123000000000003 : f64 +// BASE-DAG: %[[VAL_1:.*]] = arith.constant 3.6123000000000003 : f64 +// BASE-DAG: %[[VAL_2:.*]] = arith.constant 4.612300e+00 : f64 +// BASE-DAG: %[[VAL_3:.*]] = arith.constant -3.0000000000000009 : f64 +// BASE-DAG: %[[VAL_4:.*]] = arith.constant 8.6123000000000012 : f64 +// BASE-DAG: %[[VAL_5:.*]] = arith.constant 0.000000e+00 : f64 +// BASE-DAG: %[[VAL_6:.*]] = arith.constant 6.612300e+00 : f64 +// BASE-DAG: %[[VAL_7:.*]] = arith.constant 5.612300e+00 : f64 +// BASE-DAG: %[[VAL_8:.*]] = arith.constant 8.000000e-01 : f64 +// BASE-DAG: %[[VAL_9:.*]] = arith.constant 5.000000e-01 : f64 +// BASE-DAG: %[[VAL_10:.*]] = arith.constant -1.000000e+00 : f64 +// BASE-DAG: %[[VAL_11:.*]] = arith.constant 0 : i64 +// BASE: %[[VAL_12:.*]] = cc.cast %[[VAL_11]] : (i64) -> !cc.ptr +// BASE: %[[VAL_13:.*]] = cc.cast %[[VAL_12]] : (!cc.ptr) -> !llvm.ptr> +// BASE: %[[VAL_14:.*]] = arith.constant 1 : i64 +// BASE: %[[VAL_15:.*]] = cc.cast %[[VAL_14]] : (i64) -> !cc.ptr +// BASE: %[[VAL_16:.*]] = cc.cast %[[VAL_15]] : (!cc.ptr) -> !llvm.ptr> +// BASE: %[[VAL_17:.*]] = arith.constant 2 : i64 +// BASE: %[[VAL_18:.*]] = cc.cast %[[VAL_17]] : (i64) -> !cc.ptr +// BASE: %[[VAL_19:.*]] = cc.cast %[[VAL_18]] : (!cc.ptr) -> !llvm.ptr> +// BASE: %[[VAL_20:.*]] = arith.constant 3 : i64 +// BASE: %[[VAL_21:.*]] = cc.cast %[[VAL_20]] : (i64) -> !cc.ptr +// BASE: %[[VAL_22:.*]] = cc.cast %[[VAL_21]] : (!cc.ptr) -> !llvm.ptr> +// BASE: %[[VAL_23:.*]] = arith.constant 4 : i64 +// BASE: %[[VAL_24:.*]] = cc.cast %[[VAL_23]] : (i64) -> !cc.ptr +// BASE: %[[VAL_25:.*]] = cc.cast %[[VAL_24]] : (!cc.ptr) -> !llvm.ptr> +// BASE: %[[VAL_26:.*]] = arith.constant 5 : i64 +// BASE: %[[VAL_27:.*]] = cc.cast %[[VAL_26]] : (i64) -> !cc.ptr +// BASE: %[[VAL_28:.*]] = cc.cast %[[VAL_27]] : (!cc.ptr) -> !llvm.ptr> +// BASE: %[[VAL_29:.*]] = arith.constant 6 : i64 +// BASE: %[[VAL_30:.*]] = cc.cast %[[VAL_29]] : (i64) -> !cc.ptr +// BASE: %[[VAL_31:.*]] = cc.cast %[[VAL_30]] : (!cc.ptr) -> !llvm.ptr> +// BASE: call @__quantum__qis__h__body(%[[VAL_13]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__h__body(%[[VAL_25]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__h__body(%[[VAL_28]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__h__body(%[[VAL_31]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__x__body(%[[VAL_13]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__x__body(%[[VAL_25]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__x__body(%[[VAL_28]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__x__body(%[[VAL_31]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__cnot__body(%[[VAL_19]], %[[VAL_25]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// BASE: call @__quantum__qis__y__body(%[[VAL_13]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__y__body(%[[VAL_25]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__y__body(%[[VAL_28]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__y__body(%[[VAL_31]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__z__body(%[[VAL_13]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__z__body(%[[VAL_25]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__z__body(%[[VAL_28]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__z__body(%[[VAL_31]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__t__body(%[[VAL_13]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__t__body(%[[VAL_25]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__t__body(%[[VAL_28]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__t__body(%[[VAL_31]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__tdg__body(%[[VAL_28]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__s__body(%[[VAL_13]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__s__body(%[[VAL_25]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__s__body(%[[VAL_28]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__s__body(%[[VAL_31]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__sdg__body(%[[VAL_13]]) : (!llvm.ptr>) -> () +// BASE: call @__quantum__qis__rx__body(%[[VAL_7]], %[[VAL_28]]) : (f64, !llvm.ptr>) -> () +// BASE: call @__quantum__qis__rx__body(%[[VAL_5]], %[[VAL_13]]) : (f64, !llvm.ptr>) -> () +// BASE: call @__quantum__qis__ry__body(%[[VAL_6]], %[[VAL_25]]) : (f64, !llvm.ptr>) -> () +// BASE: call @__quantum__qis__ry__body(%[[VAL_5]], %[[VAL_13]]) : (f64, !llvm.ptr>) -> () +// BASE: call @__quantum__qis__rz__body(%[[VAL_4]], %[[VAL_28]]) : (f64, !llvm.ptr>) -> () +// BASE: call @__quantum__qis__rz__body(%[[VAL_3]], %[[VAL_13]]) : (f64, !llvm.ptr>) -> () +// BASE: call @__quantum__qis__r1__body(%[[VAL_2]], %[[VAL_13]]) : (f64, !llvm.ptr>) -> () +// BASE: call @__quantum__qis__r1__body(%[[VAL_1]], %[[VAL_31]]) : (f64, !llvm.ptr>) -> () +// BASE: call @__quantum__qis__r1__body(%[[VAL_0]], %[[VAL_13]]) : (f64, !llvm.ptr>) -> () +// BASE: call @__quantum__qis__swap__body(%[[VAL_13]], %[[VAL_31]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// BASE: call @__quantum__qis__u3__body(%[[VAL_8]], %[[VAL_9]], %[[VAL_10]], %[[VAL_22]]) : (f64, f64, f64, !llvm.ptr>) -> () +// BASE: %[[VAL_32:.*]] = arith.constant 0 : i64 +// BASE: %[[VAL_33:.*]] = cc.cast %[[VAL_32]] : (i64) -> !cc.ptr +// BASE: %[[VAL_34:.*]] = cc.cast %[[VAL_33]] : (!cc.ptr) -> !llvm.ptr> +// BASE: call @__quantum__qis__mz__body(%[[VAL_13]], %[[VAL_34]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// BASE: %[[VAL_35:.*]] = cc.alloca i8 +// BASE: %[[VAL_36:.*]] = cc.address_of @cstr.73696E676C65746F6E00 : !cc.ptr> +// BASE: %[[VAL_37:.*]] = cc.cast %[[VAL_36]] : (!cc.ptr>) -> !cc.ptr +// BASE: call @__quantum__rt__result_record_output(%[[VAL_34]], %[[VAL_37]]) : (!llvm.ptr>, !cc.ptr) -> () +// BASE: %[[VAL_38:.*]] = cc.undef i1 +// BASE: %[[VAL_39:.*]] = cc.cast unsigned %[[VAL_38]] : (i1) -> i8 +// BASE: cc.store %[[VAL_39]], %[[VAL_35]] : !cc.ptr +// BASE: %[[VAL_40:.*]] = cc.alloca !cc.array +// BASE: %[[VAL_41:.*]] = arith.constant 1 : i64 +// BASE: %[[VAL_42:.*]] = cc.cast %[[VAL_41]] : (i64) -> !cc.ptr +// BASE: %[[VAL_43:.*]] = cc.cast %[[VAL_42]] : (!cc.ptr) -> !llvm.ptr> +// BASE: call @__quantum__qis__mz__body(%[[VAL_16]], %[[VAL_43]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// BASE: %[[VAL_44:.*]] = cc.address_of @cstr.65696E7300 : !cc.ptr> +// BASE: %[[VAL_45:.*]] = cc.cast %[[VAL_44]] : (!cc.ptr>) -> !cc.ptr +// BASE: call @__quantum__rt__result_record_output(%[[VAL_43]], %[[VAL_45]]) : (!llvm.ptr>, !cc.ptr) -> () +// BASE: %[[VAL_46:.*]] = cc.undef i1 +// BASE: %[[VAL_47:.*]] = cc.cast %[[VAL_40]] : (!cc.ptr>) -> !cc.ptr +// BASE: %[[VAL_48:.*]] = cc.cast unsigned %[[VAL_46]] : (i1) -> i8 +// BASE: cc.store %[[VAL_48]], %[[VAL_47]] : !cc.ptr +// BASE: %[[VAL_49:.*]] = cc.alloca !cc.array +// BASE: %[[VAL_50:.*]] = arith.constant 2 : i64 +// BASE: %[[VAL_51:.*]] = cc.cast %[[VAL_50]] : (i64) -> !cc.ptr +// BASE: %[[VAL_52:.*]] = cc.cast %[[VAL_51]] : (!cc.ptr) -> !llvm.ptr> +// BASE: call @__quantum__qis__mz__body(%[[VAL_19]], %[[VAL_52]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// BASE: %[[VAL_53:.*]] = cc.address_of @cstr.64756200 : !cc.ptr> +// BASE: %[[VAL_54:.*]] = cc.cast %[[VAL_53]] : (!cc.ptr>) -> !cc.ptr +// BASE: call @__quantum__rt__result_record_output(%[[VAL_52]], %[[VAL_54]]) : (!llvm.ptr>, !cc.ptr) -> () +// BASE: %[[VAL_55:.*]] = cc.undef i1 +// BASE: %[[VAL_56:.*]] = cc.cast %[[VAL_49]] : (!cc.ptr>) -> !cc.ptr +// BASE: %[[VAL_57:.*]] = cc.cast unsigned %[[VAL_55]] : (i1) -> i8 +// BASE: cc.store %[[VAL_57]], %[[VAL_56]] : !cc.ptr +// BASE: %[[VAL_58:.*]] = arith.constant 3 : i64 +// BASE: %[[VAL_59:.*]] = cc.cast %[[VAL_58]] : (i64) -> !cc.ptr +// BASE: %[[VAL_60:.*]] = cc.cast %[[VAL_59]] : (!cc.ptr) -> !llvm.ptr> +// BASE: call @__quantum__qis__mz__body(%[[VAL_22]], %[[VAL_60]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// BASE: %[[VAL_61:.*]] = cc.address_of @cstr.64756200 : !cc.ptr> +// BASE: %[[VAL_62:.*]] = cc.cast %[[VAL_61]] : (!cc.ptr>) -> !cc.ptr +// BASE: call @__quantum__rt__result_record_output(%[[VAL_60]], %[[VAL_62]]) : (!llvm.ptr>, !cc.ptr) -> () +// BASE: %[[VAL_63:.*]] = cc.undef i1 +// BASE: %[[VAL_64:.*]] = cc.compute_ptr %[[VAL_49]][1] : (!cc.ptr>) -> !cc.ptr +// BASE: %[[VAL_65:.*]] = cc.cast unsigned %[[VAL_63]] : (i1) -> i8 +// BASE: cc.store %[[VAL_65]], %[[VAL_64]] : !cc.ptr +// BASE: %[[VAL_66:.*]] = cc.alloca !cc.array +// BASE: %[[VAL_67:.*]] = arith.constant 4 : i64 +// BASE: %[[VAL_68:.*]] = cc.cast %[[VAL_67]] : (i64) -> !cc.ptr +// BASE: %[[VAL_69:.*]] = cc.cast %[[VAL_68]] : (!cc.ptr) -> !llvm.ptr> +// BASE: call @__quantum__qis__mz__body(%[[VAL_25]], %[[VAL_69]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// BASE: %[[VAL_70:.*]] = cc.address_of @cstr.7472697000 : !cc.ptr> +// BASE: %[[VAL_71:.*]] = cc.cast %[[VAL_70]] : (!cc.ptr>) -> !cc.ptr +// BASE: call @__quantum__rt__result_record_output(%[[VAL_69]], %[[VAL_71]]) : (!llvm.ptr>, !cc.ptr) -> () +// BASE: %[[VAL_72:.*]] = cc.undef i1 +// BASE: %[[VAL_73:.*]] = cc.cast %[[VAL_66]] : (!cc.ptr>) -> !cc.ptr +// BASE: %[[VAL_74:.*]] = cc.cast unsigned %[[VAL_72]] : (i1) -> i8 +// BASE: cc.store %[[VAL_74]], %[[VAL_73]] : !cc.ptr +// BASE: %[[VAL_75:.*]] = arith.constant 5 : i64 +// BASE: %[[VAL_76:.*]] = cc.cast %[[VAL_75]] : (i64) -> !cc.ptr +// BASE: %[[VAL_77:.*]] = cc.cast %[[VAL_76]] : (!cc.ptr) -> !llvm.ptr> +// BASE: call @__quantum__qis__mz__body(%[[VAL_28]], %[[VAL_77]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// BASE: %[[VAL_78:.*]] = cc.address_of @cstr.7472697000 : !cc.ptr> +// BASE: %[[VAL_79:.*]] = cc.cast %[[VAL_78]] : (!cc.ptr>) -> !cc.ptr +// BASE: call @__quantum__rt__result_record_output(%[[VAL_77]], %[[VAL_79]]) : (!llvm.ptr>, !cc.ptr) -> () +// BASE: %[[VAL_80:.*]] = cc.undef i1 +// BASE: %[[VAL_81:.*]] = cc.compute_ptr %[[VAL_66]][1] : (!cc.ptr>) -> !cc.ptr +// BASE: %[[VAL_82:.*]] = cc.cast unsigned %[[VAL_80]] : (i1) -> i8 +// BASE: cc.store %[[VAL_82]], %[[VAL_81]] : !cc.ptr +// BASE: %[[VAL_83:.*]] = arith.constant 6 : i64 +// BASE: %[[VAL_84:.*]] = cc.cast %[[VAL_83]] : (i64) -> !cc.ptr +// BASE: %[[VAL_85:.*]] = cc.cast %[[VAL_84]] : (!cc.ptr) -> !llvm.ptr> +// BASE: call @__quantum__qis__mz__body(%[[VAL_31]], %[[VAL_85]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// BASE: %[[VAL_86:.*]] = cc.address_of @cstr.7472697000 : !cc.ptr> +// BASE: %[[VAL_87:.*]] = cc.cast %[[VAL_86]] : (!cc.ptr>) -> !cc.ptr +// BASE: call @__quantum__rt__result_record_output(%[[VAL_85]], %[[VAL_87]]) : (!llvm.ptr>, !cc.ptr) -> () +// BASE: %[[VAL_88:.*]] = cc.undef i1 +// BASE: %[[VAL_89:.*]] = cc.compute_ptr %[[VAL_66]][2] : (!cc.ptr>) -> !cc.ptr +// BASE: %[[VAL_90:.*]] = cc.cast unsigned %[[VAL_88]] : (i1) -> i8 +// BASE: cc.store %[[VAL_90]], %[[VAL_89]] : !cc.ptr +// BASE: return +// BASE: } + + +// ADAPT-LABEL: func.func @__nvqpp__mlirgen__comprehensive() attributes {"cudaq-entrypoint", "cudaq-kernel", output_names = {{.*}}, requiredQubits = "7", requiredResults = "7"} { +// ADAPT-DAG: %[[VAL_0:.*]] = arith.constant 1.6123000000000003 : f64 +// ADAPT-DAG: %[[VAL_1:.*]] = arith.constant 3.6123000000000003 : f64 +// ADAPT-DAG: %[[VAL_2:.*]] = arith.constant 4.612300e+00 : f64 +// ADAPT-DAG: %[[VAL_3:.*]] = arith.constant -3.0000000000000009 : f64 +// ADAPT-DAG: %[[VAL_4:.*]] = arith.constant 8.6123000000000012 : f64 +// ADAPT-DAG: %[[VAL_5:.*]] = arith.constant 0.000000e+00 : f64 +// ADAPT-DAG: %[[VAL_6:.*]] = arith.constant 6.612300e+00 : f64 +// ADAPT-DAG: %[[VAL_7:.*]] = arith.constant 5.612300e+00 : f64 +// ADAPT-DAG: %[[VAL_8:.*]] = arith.constant 8.000000e-01 : f64 +// ADAPT-DAG: %[[VAL_9:.*]] = arith.constant 5.000000e-01 : f64 +// ADAPT-DAG: %[[VAL_10:.*]] = arith.constant -1.000000e+00 : f64 +// ADAPT-DAG: %[[VAL_11:.*]] = arith.constant 0 : i64 +// ADAPT: %[[VAL_12:.*]] = cc.cast %[[VAL_11]] : (i64) -> !cc.ptr +// ADAPT: %[[VAL_13:.*]] = cc.cast %[[VAL_12]] : (!cc.ptr) -> !llvm.ptr> +// ADAPT: %[[VAL_14:.*]] = arith.constant 1 : i64 +// ADAPT: %[[VAL_15:.*]] = cc.cast %[[VAL_14]] : (i64) -> !cc.ptr +// ADAPT: %[[VAL_16:.*]] = cc.cast %[[VAL_15]] : (!cc.ptr) -> !llvm.ptr> +// ADAPT: %[[VAL_17:.*]] = arith.constant 2 : i64 +// ADAPT: %[[VAL_18:.*]] = cc.cast %[[VAL_17]] : (i64) -> !cc.ptr +// ADAPT: %[[VAL_19:.*]] = cc.cast %[[VAL_18]] : (!cc.ptr) -> !llvm.ptr> +// ADAPT: %[[VAL_20:.*]] = arith.constant 3 : i64 +// ADAPT: %[[VAL_21:.*]] = cc.cast %[[VAL_20]] : (i64) -> !cc.ptr +// ADAPT: %[[VAL_22:.*]] = cc.cast %[[VAL_21]] : (!cc.ptr) -> !llvm.ptr> +// ADAPT: %[[VAL_23:.*]] = arith.constant 4 : i64 +// ADAPT: %[[VAL_24:.*]] = cc.cast %[[VAL_23]] : (i64) -> !cc.ptr +// ADAPT: %[[VAL_25:.*]] = cc.cast %[[VAL_24]] : (!cc.ptr) -> !llvm.ptr> +// ADAPT: %[[VAL_26:.*]] = arith.constant 5 : i64 +// ADAPT: %[[VAL_27:.*]] = cc.cast %[[VAL_26]] : (i64) -> !cc.ptr +// ADAPT: %[[VAL_28:.*]] = cc.cast %[[VAL_27]] : (!cc.ptr) -> !llvm.ptr> +// ADAPT: %[[VAL_29:.*]] = arith.constant 6 : i64 +// ADAPT: %[[VAL_30:.*]] = cc.cast %[[VAL_29]] : (i64) -> !cc.ptr +// ADAPT: %[[VAL_31:.*]] = cc.cast %[[VAL_30]] : (!cc.ptr) -> !llvm.ptr> +// ADAPT: call @__quantum__qis__h__body(%[[VAL_13]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__h__body(%[[VAL_25]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__h__body(%[[VAL_28]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__h__body(%[[VAL_31]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__x__body(%[[VAL_13]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__x__body(%[[VAL_25]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__x__body(%[[VAL_28]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__x__body(%[[VAL_31]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__cnot__body(%[[VAL_19]], %[[VAL_25]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__y__body(%[[VAL_13]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__y__body(%[[VAL_25]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__y__body(%[[VAL_28]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__y__body(%[[VAL_31]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__z__body(%[[VAL_13]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__z__body(%[[VAL_25]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__z__body(%[[VAL_28]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__z__body(%[[VAL_31]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__t__body(%[[VAL_13]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__t__body(%[[VAL_25]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__t__body(%[[VAL_28]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__t__body(%[[VAL_31]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__tdg__body(%[[VAL_28]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__s__body(%[[VAL_13]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__s__body(%[[VAL_25]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__s__body(%[[VAL_28]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__s__body(%[[VAL_31]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__sdg__body(%[[VAL_13]]) : (!llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__rx__body(%[[VAL_7]], %[[VAL_28]]) : (f64, !llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__rx__body(%[[VAL_5]], %[[VAL_13]]) : (f64, !llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__ry__body(%[[VAL_6]], %[[VAL_25]]) : (f64, !llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__ry__body(%[[VAL_5]], %[[VAL_13]]) : (f64, !llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__rz__body(%[[VAL_4]], %[[VAL_28]]) : (f64, !llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__rz__body(%[[VAL_3]], %[[VAL_13]]) : (f64, !llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__r1__body(%[[VAL_2]], %[[VAL_13]]) : (f64, !llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__r1__body(%[[VAL_1]], %[[VAL_31]]) : (f64, !llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__r1__body(%[[VAL_0]], %[[VAL_13]]) : (f64, !llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__swap__body(%[[VAL_13]], %[[VAL_31]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// ADAPT: call @__quantum__qis__u3__body(%[[VAL_8]], %[[VAL_9]], %[[VAL_10]], %[[VAL_22]]) : (f64, f64, f64, !llvm.ptr>) -> () +// ADAPT: %[[VAL_32:.*]] = arith.constant 0 : i64 +// ADAPT: %[[VAL_33:.*]] = cc.cast %[[VAL_32]] : (i64) -> !cc.ptr +// ADAPT: %[[VAL_34:.*]] = cc.cast %[[VAL_33]] : (!cc.ptr) -> !llvm.ptr> +// ADAPT: call @__quantum__qis__mz__body(%[[VAL_13]], %[[VAL_34]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// ADAPT: %[[VAL_35:.*]] = cc.alloca i8 +// ADAPT: %[[VAL_36:.*]] = cc.address_of @cstr.73696E676C65746F6E00 : !cc.ptr> +// ADAPT: %[[VAL_37:.*]] = cc.cast %[[VAL_36]] : (!cc.ptr>) -> !cc.ptr +// ADAPT: call @__quantum__rt__result_record_output(%[[VAL_34]], %[[VAL_37]]) : (!llvm.ptr>, !cc.ptr) -> () +// ADAPT: %[[VAL_38:.*]] = call @__quantum__qis__read_result__body(%[[VAL_34]]) : (!llvm.ptr>) -> i1 +// ADAPT: %[[VAL_39:.*]] = cc.cast unsigned %[[VAL_38]] : (i1) -> i8 +// ADAPT: cc.store %[[VAL_39]], %[[VAL_35]] : !cc.ptr +// ADAPT: %[[VAL_40:.*]] = cc.alloca !cc.array +// ADAPT: %[[VAL_41:.*]] = arith.constant 1 : i64 +// ADAPT: %[[VAL_42:.*]] = cc.cast %[[VAL_41]] : (i64) -> !cc.ptr +// ADAPT: %[[VAL_43:.*]] = cc.cast %[[VAL_42]] : (!cc.ptr) -> !llvm.ptr> +// ADAPT: call @__quantum__qis__mz__body(%[[VAL_16]], %[[VAL_43]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// ADAPT: %[[VAL_44:.*]] = cc.address_of @cstr.65696E7300 : !cc.ptr> +// ADAPT: %[[VAL_45:.*]] = cc.cast %[[VAL_44]] : (!cc.ptr>) -> !cc.ptr +// ADAPT: call @__quantum__rt__result_record_output(%[[VAL_43]], %[[VAL_45]]) : (!llvm.ptr>, !cc.ptr) -> () +// ADAPT: %[[VAL_46:.*]] = call @__quantum__qis__read_result__body(%[[VAL_43]]) : (!llvm.ptr>) -> i1 +// ADAPT: %[[VAL_47:.*]] = cc.cast %[[VAL_40]] : (!cc.ptr>) -> !cc.ptr +// ADAPT: %[[VAL_48:.*]] = cc.cast unsigned %[[VAL_46]] : (i1) -> i8 +// ADAPT: cc.store %[[VAL_48]], %[[VAL_47]] : !cc.ptr +// ADAPT: %[[VAL_49:.*]] = cc.alloca !cc.array +// ADAPT: %[[VAL_50:.*]] = arith.constant 2 : i64 +// ADAPT: %[[VAL_51:.*]] = cc.cast %[[VAL_50]] : (i64) -> !cc.ptr +// ADAPT: %[[VAL_52:.*]] = cc.cast %[[VAL_51]] : (!cc.ptr) -> !llvm.ptr> +// ADAPT: call @__quantum__qis__mz__body(%[[VAL_19]], %[[VAL_52]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// ADAPT: %[[VAL_53:.*]] = cc.address_of @cstr.64756200 : !cc.ptr> +// ADAPT: %[[VAL_54:.*]] = cc.cast %[[VAL_53]] : (!cc.ptr>) -> !cc.ptr +// ADAPT: call @__quantum__rt__result_record_output(%[[VAL_52]], %[[VAL_54]]) : (!llvm.ptr>, !cc.ptr) -> () +// ADAPT: %[[VAL_55:.*]] = call @__quantum__qis__read_result__body(%[[VAL_52]]) : (!llvm.ptr>) -> i1 +// ADAPT: %[[VAL_56:.*]] = cc.cast %[[VAL_49]] : (!cc.ptr>) -> !cc.ptr +// ADAPT: %[[VAL_57:.*]] = cc.cast unsigned %[[VAL_55]] : (i1) -> i8 +// ADAPT: cc.store %[[VAL_57]], %[[VAL_56]] : !cc.ptr +// ADAPT: %[[VAL_58:.*]] = arith.constant 3 : i64 +// ADAPT: %[[VAL_59:.*]] = cc.cast %[[VAL_58]] : (i64) -> !cc.ptr +// ADAPT: %[[VAL_60:.*]] = cc.cast %[[VAL_59]] : (!cc.ptr) -> !llvm.ptr> +// ADAPT: call @__quantum__qis__mz__body(%[[VAL_22]], %[[VAL_60]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// ADAPT: %[[VAL_61:.*]] = cc.address_of @cstr.64756200 : !cc.ptr> +// ADAPT: %[[VAL_62:.*]] = cc.cast %[[VAL_61]] : (!cc.ptr>) -> !cc.ptr +// ADAPT: call @__quantum__rt__result_record_output(%[[VAL_60]], %[[VAL_62]]) : (!llvm.ptr>, !cc.ptr) -> () +// ADAPT: %[[VAL_63:.*]] = call @__quantum__qis__read_result__body(%[[VAL_60]]) : (!llvm.ptr>) -> i1 +// ADAPT: %[[VAL_64:.*]] = cc.compute_ptr %[[VAL_49]][1] : (!cc.ptr>) -> !cc.ptr +// ADAPT: %[[VAL_65:.*]] = cc.cast unsigned %[[VAL_63]] : (i1) -> i8 +// ADAPT: cc.store %[[VAL_65]], %[[VAL_64]] : !cc.ptr +// ADAPT: %[[VAL_66:.*]] = cc.alloca !cc.array +// ADAPT: %[[VAL_67:.*]] = arith.constant 4 : i64 +// ADAPT: %[[VAL_68:.*]] = cc.cast %[[VAL_67]] : (i64) -> !cc.ptr +// ADAPT: %[[VAL_69:.*]] = cc.cast %[[VAL_68]] : (!cc.ptr) -> !llvm.ptr> +// ADAPT: call @__quantum__qis__mz__body(%[[VAL_25]], %[[VAL_69]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// ADAPT: %[[VAL_70:.*]] = cc.address_of @cstr.7472697000 : !cc.ptr> +// ADAPT: %[[VAL_71:.*]] = cc.cast %[[VAL_70]] : (!cc.ptr>) -> !cc.ptr +// ADAPT: call @__quantum__rt__result_record_output(%[[VAL_69]], %[[VAL_71]]) : (!llvm.ptr>, !cc.ptr) -> () +// ADAPT: %[[VAL_72:.*]] = call @__quantum__qis__read_result__body(%[[VAL_69]]) : (!llvm.ptr>) -> i1 +// ADAPT: %[[VAL_73:.*]] = cc.cast %[[VAL_66]] : (!cc.ptr>) -> !cc.ptr +// ADAPT: %[[VAL_74:.*]] = cc.cast unsigned %[[VAL_72]] : (i1) -> i8 +// ADAPT: cc.store %[[VAL_74]], %[[VAL_73]] : !cc.ptr +// ADAPT: %[[VAL_75:.*]] = arith.constant 5 : i64 +// ADAPT: %[[VAL_76:.*]] = cc.cast %[[VAL_75]] : (i64) -> !cc.ptr +// ADAPT: %[[VAL_77:.*]] = cc.cast %[[VAL_76]] : (!cc.ptr) -> !llvm.ptr> +// ADAPT: call @__quantum__qis__mz__body(%[[VAL_28]], %[[VAL_77]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// ADAPT: %[[VAL_78:.*]] = cc.address_of @cstr.7472697000 : !cc.ptr> +// ADAPT: %[[VAL_79:.*]] = cc.cast %[[VAL_78]] : (!cc.ptr>) -> !cc.ptr +// ADAPT: call @__quantum__rt__result_record_output(%[[VAL_77]], %[[VAL_79]]) : (!llvm.ptr>, !cc.ptr) -> () +// ADAPT: %[[VAL_80:.*]] = call @__quantum__qis__read_result__body(%[[VAL_77]]) : (!llvm.ptr>) -> i1 +// ADAPT: %[[VAL_81:.*]] = cc.compute_ptr %[[VAL_66]][1] : (!cc.ptr>) -> !cc.ptr +// ADAPT: %[[VAL_82:.*]] = cc.cast unsigned %[[VAL_80]] : (i1) -> i8 +// ADAPT: cc.store %[[VAL_82]], %[[VAL_81]] : !cc.ptr +// ADAPT: %[[VAL_83:.*]] = arith.constant 6 : i64 +// ADAPT: %[[VAL_84:.*]] = cc.cast %[[VAL_83]] : (i64) -> !cc.ptr +// ADAPT: %[[VAL_85:.*]] = cc.cast %[[VAL_84]] : (!cc.ptr) -> !llvm.ptr> +// ADAPT: call @__quantum__qis__mz__body(%[[VAL_31]], %[[VAL_85]]) : (!llvm.ptr>, !llvm.ptr>) -> () +// ADAPT: %[[VAL_86:.*]] = cc.address_of @cstr.7472697000 : !cc.ptr> +// ADAPT: %[[VAL_87:.*]] = cc.cast %[[VAL_86]] : (!cc.ptr>) -> !cc.ptr +// ADAPT: call @__quantum__rt__result_record_output(%[[VAL_85]], %[[VAL_87]]) : (!llvm.ptr>, !cc.ptr) -> () +// ADAPT: %[[VAL_88:.*]] = call @__quantum__qis__read_result__body(%[[VAL_85]]) : (!llvm.ptr>) -> i1 +// ADAPT: %[[VAL_89:.*]] = cc.compute_ptr %[[VAL_66]][2] : (!cc.ptr>) -> !cc.ptr +// ADAPT: %[[VAL_90:.*]] = cc.cast unsigned %[[VAL_88]] : (i1) -> i8 +// ADAPT: cc.store %[[VAL_90]], %[[VAL_89]] : !cc.ptr +// ADAPT: return +// ADAPT: } diff --git a/test/Transforms/DecompositionPatterns/ExpPauliToHRyRzCX.qke b/test/Transforms/DecompositionPatterns/ExpPauliToHRyRzCX.qke index 3cfe622af9..e7d6454a3b 100644 --- a/test/Transforms/DecompositionPatterns/ExpPauliToHRyRzCX.qke +++ b/test/Transforms/DecompositionPatterns/ExpPauliToHRyRzCX.qke @@ -22,7 +22,6 @@ module attributes {quake.mangled_name_map = {__nvqpp__mlirgen__Z4mainE3$_0 = "_Z quake.exp_pauli %4, %1, %5 : (f64, !quake.veq<4>, !cc.ptr>) -> () return } -} // CHECK-LABEL: func.func @__nvqpp__mlirgen__Z4mainE3$_0( // CHECK-SAME: %[[VAL_0:.*]]: f64) attributes {"cudaq-entrypoint", "cudaq-kernel"} { @@ -66,3 +65,37 @@ module attributes {quake.mangled_name_map = {__nvqpp__mlirgen__Z4mainE3$_0 = "_Z // CHECK: quake.h %[[VAL_20]] : (!quake.ref) -> () // CHECK: return // CHECK: } + + func.func @__nvqpp__mlirgen__function_test_param._Z10test_paramN5cudaq10pauli_wordE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %cst = arith.constant 1.000000e+00 : f64 + %c2_i64 = arith.constant 2 : i64 + %0 = cc.address_of @cstr.585900 : !cc.ptr> + %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr + %2 = cc.stdvec_init %1, %c2_i64 : (!cc.ptr, i64) -> !cc.charspan + %3 = quake.alloca !quake.veq<2> + quake.exp_pauli %cst, %3, %2 : (f64, !quake.veq<2>, !cc.charspan) -> () + return + } + llvm.mlir.global private constant @cstr.585900("XY\00") {addr_space = 0 : i32} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_test_param._Z10test_paramN5cudaq10pauli_wordE() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_2:.*]] = arith.constant 1.5707963267948966 : f64 +// CHECK: %[[VAL_3:.*]] = arith.constant -1.5707963267948966 : f64 +// CHECK: %[[VAL_4:.*]] = arith.constant 1.000000e+00 : f64 +// CHECK: %[[VAL_5:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_6:.*]] = quake.extract_ref %[[VAL_5]][%[[VAL_0]]] : (!quake.veq<2>, i64) -> !quake.ref +// CHECK: quake.h %[[VAL_6]] : (!quake.ref) -> () +// CHECK: %[[VAL_7:.*]] = quake.extract_ref %[[VAL_5]][%[[VAL_1]]] : (!quake.veq<2>, i64) -> !quake.ref +// CHECK: quake.rx (%[[VAL_2]]) %[[VAL_7]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_6]]] %[[VAL_7]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_4]]) %[[VAL_7]] : (f64, !quake.ref) -> () +// CHECK: quake.x [%[[VAL_6]]] %[[VAL_7]] : (!quake.ref, !quake.ref) -> () +// CHECK: %[[VAL_8:.*]] = quake.extract_ref %[[VAL_5]][%[[VAL_1]]] : (!quake.veq<2>, i64) -> !quake.ref +// CHECK: quake.rx (%[[VAL_3]]) %[[VAL_8]] : (f64, !quake.ref) -> () +// CHECK: %[[VAL_9:.*]] = quake.extract_ref %[[VAL_5]][%[[VAL_0]]] : (!quake.veq<2>, i64) -> !quake.ref +// CHECK: quake.h %[[VAL_9]] : (!quake.ref) -> () +// CHECK: return +// CHECK: } +} diff --git a/test/Transforms/UnitarySynthesis/random_unitary_4.qke b/test/Transforms/UnitarySynthesis/random_unitary_4.qke new file mode 100644 index 0000000000..0d75db2ac4 --- /dev/null +++ b/test/Transforms/UnitarySynthesis/random_unitary_4.qke @@ -0,0 +1,77 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +// RUN: cudaq-opt --unitary-synthesis --canonicalize --apply-op-specialization --aggressive-early-inlining %s | FileCheck %s + +module attributes {quake.mangled_name_map = {__nvqpp__mlirgen__kernel1 = "__nvqpp__mlirgen__kernel1_PyKernelEntryPointRewrite"}} { + func.func @__nvqpp__mlirgen__kernel1() attributes {"cudaq-entrypoint"} { + %0 = quake.alloca !quake.veq<2> + %1 = quake.extract_ref %0[1] : (!quake.veq<2>) -> !quake.ref + %2 = quake.extract_ref %0[0] : (!quake.veq<2>) -> !quake.ref + quake.custom_op @__nvqpp__mlirgen__op1_generator_2.rodata %1, %2 : (!quake.ref, !quake.ref) -> () + return + } + cc.global constant @__nvqpp__mlirgen__op1_generator_2.rodata (dense<[(-0.25534141999999999,0.045629179999999998), (0.11619328,0.79785479999999998), (0.19980911000000001,-0.24754117), (0.052455160000000001,0.42272180999999998), (0.48212336,-0.35275169000000001), (0.47307302000000001,2.047710e-01), (0.38804407000000002,0.34346750999999998), (-0.30236461999999997,-0.13199084), (0.53000373000000001,-0.052047940000000001), (-0.055464520000000003,0.044808380000000002), (-0.39853872000000001,-0.60358142999999997), (-0.40979785000000002,0.1422147), (0.20174057000000001,0.50152752), (0.042562830000000003,-0.27803220000000001), (0.14896845,0.29140401999999999), (-0.16938781,0.70203793000000003)]> : tensor<16xcomplex>) : !cc.array x 16> +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__kernel1() attributes {"cudaq-entrypoint"} { +// CHECK-DAG: %[[VAL_0:.*]] = arith.constant 3.95826252{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 0.93802610{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2.25682378{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1.14360941{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1.57079632{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0.13346974{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_6:.*]] = arith.constant -0.4362153{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_7:.*]] = arith.constant -1.2996367{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_8:.*]] = arith.constant -1.5707963{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 1.29963670{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_10:.*]] = arith.constant 0.7280736{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_11:.*]] = arith.constant 2.7836517{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_12:.*]] = arith.constant -1.906609{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_13:.*]] = arith.constant -3.140173{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_14:.*]] = arith.constant 2.2872369{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_15:.*]] = arith.constant 1.6888584{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_16:.*]] = arith.constant -0.206301{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_17:.*]] = arith.constant 2.0186522{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_18:.*]] = arith.constant 2.0888531{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_19:.*]] = quake.alloca !quake.veq<2> +// CHECK-DAG: %[[VAL_20:.*]] = quake.extract_ref %[[VAL_19]][1] : (!quake.veq<2>) -> !quake.ref +// CHECK-DAG: %[[VAL_21:.*]] = quake.extract_ref %[[VAL_19]][0] : (!quake.veq<2>) -> !quake.ref +// CHECK: quake.rz (%[[VAL_0]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.ry (%[[VAL_1]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_2]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_12]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.ry (%[[VAL_11]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_10]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.h %[[VAL_20]] : (!quake.ref) -> () +// CHECK: quake.h %[[VAL_21]] : (!quake.ref) -> () +// CHECK: quake.x {{\[}}%[[VAL_21]]] %[[VAL_20]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_3]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.x {{\[}}%[[VAL_21]]] %[[VAL_20]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.h %[[VAL_21]] : (!quake.ref) -> () +// CHECK: quake.h %[[VAL_20]] : (!quake.ref) -> () +// CHECK: quake.rx (%[[VAL_4]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.rx (%[[VAL_4]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.x {{\[}}%[[VAL_21]]] %[[VAL_20]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_5]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.x {{\[}}%[[VAL_21]]] %[[VAL_20]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.rx (%[[VAL_8]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.rx (%[[VAL_8]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.x {{\[}}%[[VAL_21]]] %[[VAL_20]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_6]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.x {{\[}}%[[VAL_21]]] %[[VAL_20]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_15]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.ry (%[[VAL_14]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_13]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_18]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.ry (%[[VAL_17]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_16]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.r1 (%[[VAL_7]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_9]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: return +// CHECK: } diff --git a/test/Transforms/UnitarySynthesis/random_unitary_5.qke b/test/Transforms/UnitarySynthesis/random_unitary_5.qke new file mode 100644 index 0000000000..0da3aa0392 --- /dev/null +++ b/test/Transforms/UnitarySynthesis/random_unitary_5.qke @@ -0,0 +1,77 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +// RUN: cudaq-opt --unitary-synthesis --canonicalize --apply-op-specialization --aggressive-early-inlining %s | FileCheck %s + +module attributes {quake.mangled_name_map = {__nvqpp__mlirgen__kernel2 = "__nvqpp__mlirgen__kernel2_PyKernelEntryPointRewrite"}} { + func.func @__nvqpp__mlirgen__kernel2() attributes {"cudaq-entrypoint"} { + %0 = quake.alloca !quake.veq<2> + %1 = quake.extract_ref %0[1] : (!quake.veq<2>) -> !quake.ref + %2 = quake.extract_ref %0[0] : (!quake.veq<2>) -> !quake.ref + quake.custom_op @__nvqpp__mlirgen__op2_generator_2.rodata %1, %2 : (!quake.ref, !quake.ref) -> () + return + } + cc.global constant @__nvqpp__mlirgen__op2_generator_2.rodata (dense<[(0.18897759,0.33963024000000003), (0.12335641999999999,-0.48243450999999998), (0.42873799000000001,-0.22386284000000001), (-0.38231686999999998,-0.46998072000000002), (0.26665664,0.31917547000000002), (0.66539470999999994,0.25221665999999998), (-0.47503402,-0.12900718), (-0.26305423,0.095708849999999998), (-0.1821702,0.14533362999999999), (0.18060332000000001,-0.34169106999999999), (1.314040e-03,-0.64370212999999998), (0.54215897999999996,0.29670066), (-0.30045970999999999,0.72895551000000003), (-0.26715635999999998,-0.15790472999999999), (-0.069665530000000003,0.32335976999999999), (-0.13738248,0.39211302999999997)]> : tensor<16xcomplex>) : !cc.array x 16> +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__kernel2() attributes {"cudaq-entrypoint"} { +// CHECK-DAG: %[[VAL_0:.*]] = arith.constant 3.3597983{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 1.1124416{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant -1.522760{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0.4109889{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1.5707963{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant -4.083336{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1.2323317{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 0.8932718{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_8:.*]] = arith.constant -1.570796{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_9:.*]] = arith.constant -0.893271{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_10:.*]] = arith.constant 0.706693{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_11:.*]] = arith.constant 2.349985{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_12:.*]] = arith.constant 1.002236{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_13:.*]] = arith.constant 0.635862{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_14:.*]] = arith.constant 0.453700{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_15:.*]] = arith.constant -0.57588{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_16:.*]] = arith.constant 1.030866{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_17:.*]] = arith.constant 1.768800{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_18:.*]] = arith.constant 0.445277{{[0-9]+}} : f64 +// CHECK-DAG: %[[VAL_19:.*]] = quake.alloca !quake.veq<2> +// CHECK-DAG: %[[VAL_20:.*]] = quake.extract_ref %[[VAL_19]][1] : (!quake.veq<2>) -> !quake.ref +// CHECK-DAG: %[[VAL_21:.*]] = quake.extract_ref %[[VAL_19]][0] : (!quake.veq<2>) -> !quake.ref +// CHECK: quake.rz (%[[VAL_0]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.ry (%[[VAL_1]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_2]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_12]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.ry (%[[VAL_11]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_10]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.h %[[VAL_20]] : (!quake.ref) -> () +// CHECK: quake.h %[[VAL_21]] : (!quake.ref) -> () +// CHECK: quake.x {{\[}}%[[VAL_21]]] %[[VAL_20]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_3]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.x {{\[}}%[[VAL_21]]] %[[VAL_20]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.h %[[VAL_21]] : (!quake.ref) -> () +// CHECK: quake.h %[[VAL_20]] : (!quake.ref) -> () +// CHECK: quake.rx (%[[VAL_4]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.rx (%[[VAL_4]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.x {{\[}}%[[VAL_21]]] %[[VAL_20]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_5]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.x {{\[}}%[[VAL_21]]] %[[VAL_20]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.rx (%[[VAL_8]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.rx (%[[VAL_8]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.x {{\[}}%[[VAL_21]]] %[[VAL_20]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_6]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.x {{\[}}%[[VAL_21]]] %[[VAL_20]] : (!quake.ref, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_15]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.ry (%[[VAL_14]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_13]]) %[[VAL_21]] : (f64, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_18]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.ry (%[[VAL_17]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_16]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.r1 (%[[VAL_7]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: quake.rz (%[[VAL_9]]) %[[VAL_20]] : (f64, !quake.ref) -> () +// CHECK: return +// CHECK: } diff --git a/test/Translate/translate_openqasm2_loop.cpp b/test/Translate/translate_openqasm2_loop.cpp new file mode 100644 index 0000000000..8cb1504b2b --- /dev/null +++ b/test/Translate/translate_openqasm2_loop.cpp @@ -0,0 +1,55 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: cudaq-quake %s | cudaq-translate --convert-to=openqasm2 | FileCheck %s + +#include +#include + +struct adonis_kernel { + // Adonis QPU. + // QB1 + // | + // QB2 - QB3 - QB4 + // | + // QB5 + + void operator()() __qpu__ { + cudaq::qvector q(5); + x(q[0]); + + for (int i = 0; i < 4; i++) { + x(q[i], q[i + 1]); + } + x(q[0], q[2], q[1]); + auto result = mz(q); + } +}; + +int main() { + auto counts = cudaq::sample(adonis_kernel{}); + counts.dump(); +} + +// CHECK: // Code generated by NVIDIA's nvq++ compiler +// CHECK: OPENQASM 2.0; + +// CHECK: include "qelib1.inc"; + +// CHECK: gate ZN13adonis_kernelclEv(param0) { +// CHECK: } + +// CHECK: qreg var0[5]; +// CHECK: x var0[0]; +// CHECK: cx var0[0], var0[1]; +// CHECK: cx var0[1], var0[2]; +// CHECK: cx var0[2], var0[3]; +// CHECK: cx var0[3], var0[4]; +// CHECK: ccx var0[0], var0[2], var0[1]; +// CHECK: creg var12[5]; +// CHECK: measure var0 -> var12; diff --git a/test/Translate/translate_openqasm2_simple.cpp b/test/Translate/translate_openqasm2_simple.cpp new file mode 100644 index 0000000000..1caf22cc9d --- /dev/null +++ b/test/Translate/translate_openqasm2_simple.cpp @@ -0,0 +1,40 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: cudaq-quake %s | cudaq-translate --convert-to=openqasm2 | FileCheck %s + +#include +#include + +struct kernel { + void operator()() __qpu__ { + cudaq::qvector q(2); + h(q[0]); + x(q[0], q[1]); + mz(q); + } +}; + +int main() { + auto counts = cudaq::sample(kernel{}); + counts.dump(); +} + +// CHECK: // Code generated by NVIDIA's nvq++ compiler +// CHECK: OPENQASM 2.0; + +// CHECK: include "qelib1.inc"; + +// CHECK: gate ZN6kernelclEv(param0) { +// CHECK: } + +// CHECK: qreg var0[2]; +// CHECK: h var0[0]; +// CHECK: cx var0[0], var0[1]; +// CHECK: creg var3[2]; +// CHECK: measure var0 -> var3; diff --git a/test/Translate/translate_openqasm2_vector.cpp b/test/Translate/translate_openqasm2_vector.cpp new file mode 100644 index 0000000000..cb47358b95 --- /dev/null +++ b/test/Translate/translate_openqasm2_vector.cpp @@ -0,0 +1,42 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: cudaq-quake %s | cudaq-translate --convert-to=openqasm2 | FileCheck %s + +#include +#include + +struct kernel { + + void operator()() __qpu__ { + cudaq::qvector q(std::vector({ M_SQRT1_2, M_SQRT1_2, 0., 0.})); + auto result = mz(q); + } +}; + +int main() { + auto counts = cudaq::sample(kernel{}); + counts.dump(); +} + +// CHECK: // Code generated by NVIDIA's nvq++ compiler +// CHECK: OPENQASM 2.0; + +// CHECK: include "qelib1.inc"; + +// CHECK: gate ZN6kernelclEv(param0) { +// CHECK: } + +// CHECK: qreg var0[2]; +// CHECK: ry(0.000000e+00) var0[1]; +// CHECK: ry(7.853982e-01) var0[0]; +// CHECK: cx var0[1], var0[0]; +// CHECK: ry(7.853982e-01) var0[0]; +// CHECK: cx var0[1], var0[0]; +// CHECK: creg var3[2]; +// CHECK: measure var0 -> var3; diff --git a/tools/cudaq-opt/cudaq-opt.cpp b/tools/cudaq-opt/cudaq-opt.cpp index 9457356862..0374a44448 100644 --- a/tools/cudaq-opt/cudaq-opt.cpp +++ b/tools/cudaq-opt/cudaq-opt.cpp @@ -62,6 +62,7 @@ int main(int argc, char **argv) { cudaq::opt::registerUnrollingPipeline(); cudaq::opt::registerToExecutionManagerCCPipeline(); cudaq::opt::registerTargetPipelines(); + cudaq::opt::registerWireSetToProfileQIRPipeline(); cudaq::opt::registerMappingPipeline(); // See if we have been asked to load a pass plugin, diff --git a/tools/cudaq-translate/cudaq-translate.cpp b/tools/cudaq-translate/cudaq-translate.cpp index 180a0870ad..846273871f 100644 --- a/tools/cudaq-translate/cudaq-translate.cpp +++ b/tools/cudaq-translate/cudaq-translate.cpp @@ -63,7 +63,7 @@ static llvm::cl::opt convertTo( "convert-to", llvm::cl::desc( "Specify the translation output to be created. [Default: \"qir\"]"), - llvm::cl::value_desc("target dialect [\"qir\", \"qir-adaptive\", " + llvm::cl::value_desc("target assembly format [\"qir\", \"qir-adaptive\", " "\"qir-base\", \"openqasm2\", \"iqm\"]"), llvm::cl::init("qir")); diff --git a/tools/nvqpp/nvq++.in b/tools/nvqpp/nvq++.in index f22959681a..57059a2ddb 100644 --- a/tools/nvqpp/nvq++.in +++ b/tools/nvqpp/nvq++.in @@ -98,6 +98,9 @@ function f_option_handling { -fkernel-exec-kind=*) KERNEL_EXECUTION_KIND="{codegen=${1#*=}}" ;; + -fno-set-target-backend) + SET_TARGET_BACKEND=false + ;; *) # Pass any unrecognized options on to the clang++ tool. ARGS="${ARGS} $1" @@ -347,6 +350,7 @@ DISABLE_QUBIT_MAPPING=false NVQIR_LIBS="-lnvqir -lnvqir-" CPPSTD=-std=c++20 CUDAQ_OPT_EXTRA_PASSES= +SET_TARGET_BACKEND=true # Provide a default backend, user can override NVQIR_SIMULATION_BACKEND="qpp" @@ -638,7 +642,7 @@ if [ -n "${TARGET_CONFIG}" ]; then else error_exit "Invalid Target: ($TARGET_CONFIG)" fi - if ${GEN_TARGET_BACKEND}; then + if ${GEN_TARGET_BACKEND} && ${SET_TARGET_BACKEND}; then # Add a function that will run before main and set the target # backend on the quantum_platform TARGET_CONFIG="${TARGET_CONFIG};emulate;${CUDAQ_EMULATE_REMOTE}" @@ -703,13 +707,12 @@ if ${ENABLE_AGGRESSIVE_EARLY_INLINE}; then if ${DO_LINK}; then OPT_PASSES=$(add_pass_to_pipeline "${OPT_PASSES}" "aggressive-early-inlining") else - OPT_PASSES=$(add_pass_to_pipeline "${OPT_PASSES}" "func.func(indirect-to-direct-calls),inline") + OPT_PASSES=$(add_pass_to_pipeline "${OPT_PASSES}" "indirect-to-direct-calls,inline") fi fi if ${ENABLE_DEVICE_CODE_LOADERS}; then RUN_OPT=true - OPT_PASSES=$(add_pass_to_pipeline "${OPT_PASSES}" "func.func(quake-add-metadata)") - OPT_PASSES=$(add_pass_to_pipeline "${OPT_PASSES}" "const-prop-complex,lift-array-value,func.func(get-concrete-matrix),device-code-loader{use-quake=1}") + OPT_PASSES=$(add_pass_to_pipeline "${OPT_PASSES}" "func.func(quake-add-metadata),const-prop-complex,lift-array-value,func.func(get-concrete-matrix),device-code-loader") fi if ${ENABLE_LOWER_TO_CFG}; then RUN_OPT=true @@ -719,6 +722,14 @@ if ${RUN_OPT}; then OPT_PASSES=$(add_pass_to_pipeline "${OPT_PASSES}" "canonicalize,cse") fi +# If TARGET_PASS_PIPELINE is set, then use that exact pipeline (while still +# allowing command-line additions like normal). +if [ ! -z "$TARGET_PASS_PIPELINE" ]; then + RUN_OPT=true + # Don't use add_pass_to_pipeline here. + OPT_PASSES="$TARGET_PASS_PIPELINE" +fi + if [ ! -z "$CUDAQ_OPT_EXTRA_PASSES" ]; then OPT_PASSES=$(add_pass_to_pipeline "${OPT_PASSES}" "$CUDAQ_OPT_EXTRA_PASSES") fi diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index 8a3ce01b50..bc00564bab 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -238,7 +238,8 @@ target_link_libraries(test_photonics cudaq cudaq-platform-default cudaq-em-photonics - nvqir-qpp + nvqir + nvqir-photonics gtest_main) gtest_discover_tests(test_photonics) @@ -252,7 +253,8 @@ target_link_libraries(test_utils cudaq-platform-default cudaq-em-photonics nvqir - nvqir-qpp fmt::fmt-header-only + nvqir-qpp + fmt::fmt-header-only gtest_main) gtest_discover_tests(test_utils) diff --git a/unittests/integration/noise_tester.cpp b/unittests/integration/noise_tester.cpp index 6d67613950..10cd17164d 100644 --- a/unittests/integration/noise_tester.cpp +++ b/unittests/integration/noise_tester.cpp @@ -8,6 +8,7 @@ #include "CUDAQTestUtils.h" #include +#include #include #ifdef CUDAQ_BACKEND_DM @@ -290,4 +291,283 @@ CUDAQ_TEST(NoiseTest, checkPhaseFlipType) { cudaq::unset_noise(); // clear for subsequent tests } +template +struct xOpAll { + void operator()() __qpu__ { + cudaq::qarray q; + x(q); + } +}; + +CUDAQ_TEST(NoiseTest, checkAllQubitChannel) { + cudaq::set_random_seed(13); + cudaq::bit_flip_channel bf(1.); + cudaq::noise_model noise; + noise.add_all_qubit_channel(bf); + const std::size_t shots = 252; + auto counts = cudaq::sample({.shots = shots, .noise = noise}, xOpAll<3>{}); + // Check results + EXPECT_EQ(1, counts.size()); + // Noise is applied to all qubits. + EXPECT_NEAR(counts.probability("000"), 1., .1); + std::size_t totalShots = 0; + for (auto &[bitstr, count] : counts) + totalShots += count; + EXPECT_EQ(totalShots, shots); +} + +static cudaq::kraus_channel create2pNoiseChannel() { + cudaq::kraus_op op0{cudaq::complex{0.99498743710662, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.99498743710662, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.99498743710662, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.99498743710662, 0.0}}, + op1{cudaq::complex{0.0, 0.0}, + {0.0, 0.0}, + {0.05773502691896258, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.05773502691896258, 0.0}, + {0.05773502691896258, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.05773502691896258, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}}, + op2{cudaq::complex{0.0, 0.0}, + {0.0, 0.0}, + {0.0, -0.05773502691896258}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, -0.05773502691896258}, + {0.0, 0.05773502691896258}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.05773502691896258}, + {0.0, 0.0}, + {0.0, 0.0}}, + op3{cudaq::complex{0.05773502691896258, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.05773502691896258, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {-0.05773502691896258, 0.0}, + {-0.0, 0.0}, + {0.0, 0.0}, + {0.0, 0.0}, + {-0.0, 0.0}, + {-0.05773502691896258, 0.0}}; + cudaq::kraus_channel noise2q( + std::vector{op0, op1, op2, op3}); + return noise2q; +} + +template +struct bellRandom { + void operator()(int q, int r) __qpu__ { + cudaq::qarray qubits; + h(qubits[q]); + x(qubits[q], qubits[r]); + } +}; + +CUDAQ_TEST(NoiseTest, checkAllQubitChannelWithControl) { + cudaq::set_random_seed(13); + cudaq::noise_model noise; + noise.add_all_qubit_channel(create2pNoiseChannel(), + /*numControls=*/1); + const std::size_t shots = 1024; + constexpr std::size_t numQubits = 5; + std::vector qubitIds(numQubits); + std::iota(qubitIds.begin(), qubitIds.end(), 0); + std::set> runs; + do { + const auto pair = std::make_pair(qubitIds[0], qubitIds[1]); + if (runs.contains(pair)) + continue; + runs.insert(pair); + std::cout << "Testing entangling b/w " << qubitIds[0] << " and " + << qubitIds[1] << "\n"; + auto counts = + cudaq::sample({.shots = shots, .noise = noise}, bellRandom{}, + qubitIds[0], qubitIds[1]); + // More than 2 entangled states due to the noise. + EXPECT_GT(counts.size(), 2); + } while (std::next_permutation(qubitIds.begin(), qubitIds.end())); +} + +CUDAQ_TEST(NoiseTest, checkAllQubitChannelWithControlPrefix) { + cudaq::set_random_seed(13); + cudaq::noise_model noise; + noise.add_all_qubit_channel("cx", create2pNoiseChannel()); + const std::size_t shots = 1024; + constexpr std::size_t numQubits = 5; + std::vector qubitIds(numQubits); + std::iota(qubitIds.begin(), qubitIds.end(), 0); + std::set> runs; + do { + const auto pair = std::make_pair(qubitIds[0], qubitIds[1]); + if (runs.contains(pair)) + continue; + runs.insert(pair); + std::cout << "Testing entangling b/w " << qubitIds[0] << " and " + << qubitIds[1] << "\n"; + auto counts = + cudaq::sample({.shots = shots, .noise = noise}, bellRandom{}, + qubitIds[0], qubitIds[1]); + // More than 2 entangled states due to the noise. + EXPECT_GT(counts.size(), 2); + } while (std::next_permutation(qubitIds.begin(), qubitIds.end())); +} + +CUDAQ_TEST(NoiseTest, checkCallbackChannel) { + cudaq::set_random_seed(13); + cudaq::noise_model noise; + noise.add_channel( + [](const auto &qubits, const auto ¶ms) -> cudaq::kraus_channel { + if (qubits.size() == 1 && qubits[0] != 2) + return cudaq::bit_flip_channel(1.); + return cudaq::kraus_channel(); + }); + const std::size_t shots = 252; + auto counts = cudaq::sample({.shots = shots, .noise = noise}, xOpAll<5>{}); + // Check results + EXPECT_EQ(1, counts.size()); + counts.dump(); + // Noise is applied to all qubits. + // All qubits, except q[2], are flipped. + EXPECT_NEAR(counts.probability("00100"), 1., .1); + std::size_t totalShots = 0; + for (auto &[bitstr, count] : counts) + totalShots += count; + EXPECT_EQ(totalShots, shots); +} + +struct rxOp { + void operator()(double angle) __qpu__ { + cudaq::qubit q; + rx(angle, q); + } +}; + +CUDAQ_TEST(NoiseTest, checkCallbackChannelWithParams) { + cudaq::set_random_seed(13); + cudaq::noise_model noise; + noise.add_channel( + [](const auto &qubits, const auto ¶ms) -> cudaq::kraus_channel { + EXPECT_EQ(1, params.size()); + // For testing: only add noise if the angle is positive. + if (params[0] > 0.0) + return cudaq::bit_flip_channel(1.); + return cudaq::kraus_channel(); + }); + const std::size_t shots = 252; + { + // Rx(pi) == X + auto counts = cudaq::sample({.shots = shots, .noise = noise}, rxOp{}, M_PI); + // Check results + EXPECT_EQ(1, counts.size()); + counts.dump(); + // Due to 100% bit-flip, it becomes "0". + EXPECT_NEAR(counts.probability("0"), 1., .1); + } + { + // Rx(-pi) == X + auto counts = + cudaq::sample({.shots = shots, .noise = noise}, rxOp{}, -M_PI); + // Check results + EXPECT_EQ(1, counts.size()); + counts.dump(); + // Due to our custom setup, a negative angle will have no noise. + EXPECT_NEAR(counts.probability("1"), 1., .1); + } +} + +CUDAQ_REGISTER_OPERATION(CustomX, 1, 0, {0, 1, 1, 0}); +CUDAQ_TEST(NoiseTest, checkCustomOperation) { + auto kernel = []() { + cudaq::qubit q; + CustomX(q); + }; + + // Add channel for custom operation using the (name + operand) API + { + cudaq::set_random_seed(13); + cudaq::bit_flip_channel bf(1.); + cudaq::noise_model noise; + noise.add_channel("CustomX", {0}, bf); + const std::size_t shots = 252; + auto counts = cudaq::sample({.shots = shots, .noise = noise}, kernel); + // Check results + EXPECT_EQ(1, counts.size()); + // Due to bit-flip noise, it becomes "0". + EXPECT_NEAR(counts.probability("0"), 1., .1); + std::size_t totalShots = 0; + for (auto &[bitstr, count] : counts) + totalShots += count; + EXPECT_EQ(totalShots, shots); + } + + // Add channel for custom operation using the all-qubit API + { + cudaq::set_random_seed(13); + cudaq::bit_flip_channel bf(1.); + cudaq::noise_model noise; + noise.add_all_qubit_channel("CustomX", bf); + const std::size_t shots = 252; + auto counts = cudaq::sample({.shots = shots, .noise = noise}, kernel); + // Check results + EXPECT_EQ(1, counts.size()); + // Due to bit-flip noise, it becomes "0". + EXPECT_NEAR(counts.probability("0"), 1., .1); + std::size_t totalShots = 0; + for (auto &[bitstr, count] : counts) + totalShots += count; + EXPECT_EQ(totalShots, shots); + } + // Add channel for custom operation using the callback API + { + cudaq::set_random_seed(13); + cudaq::noise_model noise; + noise.add_channel( + "CustomX", + [](const auto &qubits, const auto ¶ms) -> cudaq::kraus_channel { + return cudaq::bit_flip_channel(1.); + }); + const std::size_t shots = 252; + auto counts = cudaq::sample({.shots = shots, .noise = noise}, kernel); + // Check results + EXPECT_EQ(1, counts.size()); + // Due to bit-flip noise, it becomes "0". + EXPECT_NEAR(counts.probability("0"), 1., .1); + std::size_t totalShots = 0; + for (auto &[bitstr, count] : counts) + totalShots += count; + EXPECT_EQ(totalShots, shots); + } +} #endif diff --git a/unittests/photonics/PhotonicsTester.cpp b/unittests/photonics/PhotonicsTester.cpp index 0fce4b70ad..bd66b2c806 100644 --- a/unittests/photonics/PhotonicsTester.cpp +++ b/unittests/photonics/PhotonicsTester.cpp @@ -15,21 +15,21 @@ TEST(PhotonicsTester, checkSimple) { struct test { auto operator()() __qpu__ { - cudaq::qvector<3> qutrits(2); - plus(qutrits[0]); - plus(qutrits[1]); - plus(qutrits[1]); - return mz(qutrits); + cudaq::qvector<3> qumodes(2); + plus(qumodes[0]); + plus(qumodes[1]); + plus(qumodes[1]); + return mz(qumodes); } }; struct test2 { void operator()() __qpu__ { - cudaq::qvector<3> qutrits(2); - plus(qutrits[0]); - plus(qutrits[1]); - plus(qutrits[1]); - mz(qutrits); + cudaq::qvector<3> qumodes(2); + plus(qumodes[0]); + plus(qumodes[1]); + plus(qumodes[1]); + mz(qumodes); } }; @@ -51,15 +51,15 @@ TEST(PhotonicsTester, checkHOM) { constexpr std::array input_state{1, 1}; - cudaq::qvector<3> quds(2); // |00> + cudaq::qvector<3> qumodes(2); // |00> for (std::size_t i = 0; i < 2; i++) { for (std::size_t j = 0; j < input_state[i]; j++) { - plus(quds[i]); // setting to |11> + plus(qumodes[i]); // setting to |11> } } - beam_splitter(quds[0], quds[1], theta); - mz(quds); + beam_splitter(qumodes[0], qumodes[1], theta); + mz(qumodes); } }; @@ -93,18 +93,18 @@ TEST(PhotonicsTester, checkMZI) { constexpr std::array input_state{1, 0}; - cudaq::qvector<3> quds(2); // |00> + cudaq::qvector<3> qumodes(2); // |00> for (std::size_t i = 0; i < 2; i++) for (std::size_t j = 0; j < input_state[i]; j++) - plus(quds[i]); // setting to |10> + plus(qumodes[i]); // setting to |10> - beam_splitter(quds[0], quds[1], M_PI / 4); - phase_shift(quds[0], M_PI / 3); + beam_splitter(qumodes[0], qumodes[1], M_PI / 4); + phase_shift(qumodes[0], M_PI / 3); - beam_splitter(quds[0], quds[1], M_PI / 4); - phase_shift(quds[0], M_PI / 3); + beam_splitter(qumodes[0], qumodes[1], M_PI / 4); + phase_shift(qumodes[0], M_PI / 3); - mz(quds); + mz(qumodes); } };