diff --git a/include/Tensor.old.hpp b/include/Tensor.old.hpp deleted file mode 100644 index ae09fcb3..00000000 --- a/include/Tensor.old.hpp +++ /dev/null @@ -1,1958 +0,0 @@ -#ifndef _H_Tensor_ -#define _H_Tensor_ - -#include "Type.hpp" -#include "cytnx_error.hpp" -#include "backend/Storage.hpp" -#include "Device.hpp" -#include "intrusive_ptr_base.hpp" -#include -#include -#include "utils/vec_range.hpp" -#include "utils/vec_cast.hpp" -#include "utils/dynamic_arg_resolver.hpp" -// #include "linalg.hpp" -#include "Accessor.hpp" -#include -#include -#include -#include -#include "backend/Scalar.hpp" - -namespace cytnx { - - ///@cond - // real implementation - class Tensor_impl : public intrusive_ptr_base { - private: - // Interface: - Storage_init_interface __SII; - - // Memory: - Storage _storage; - - // tensor shape - std::vector _shape; - - // pseudo-perm info - std::vector _mapper; - std::vector _invmapper; - bool _contiguous; - - public: - friend class Tensor; - boost::intrusive_ptr _clone_meta_only() const { - boost::intrusive_ptr out(new Tensor_impl()); - out->_mapper = this->_mapper; - out->_invmapper = this->_invmapper; - out->_shape = this->_shape; - out->_contiguous = this->_contiguous; - return out; - } - Tensor_impl() : _contiguous(true){}; - - void Init(const std::vector &shape, const unsigned int &dtype = Type.Double, - int device = -1, const bool &init_zero = true); - void Init(const Storage &in); - // void Init(const Storage &in, const std::vector &shape, - // const unsigned int &dtype, int device); - /* - template - void From_vec(const T &ndvec){ - cytnx_error_msg(std::string(typeid(T).name()).find("vector") == - std::string::npos,"[ERROR][Tensor][From_vec] the input argument should be a nd vector.%s","\n"); - //dispatch the rank!: - - - - } - */ - // clone&assignment constr., use intrusive_ptr's - Tensor_impl(const Tensor_impl &rhs); - Tensor_impl &operator=(const Tensor_impl &rhs); // add const - - unsigned int dtype() const { return this->_storage.dtype(); } - int device() const { return this->_storage.device(); } - - std::string dtype_str() const { return Type.getname(this->_storage.dtype()); } - std::string device_str() const { return Device.getname(this->_storage.device()); } - - const std::vector &shape() const { return _shape; } - - const bool &is_contiguous() const { return this->_contiguous; } - - const std::vector &mapper() const { return this->_mapper; } - const std::vector &invmapper() const { return this->_invmapper; } - Storage &storage() { return _storage; } - - const Storage &storage() const { return _storage; } - - boost::intrusive_ptr clone() const { - boost::intrusive_ptr out = this->_clone_meta_only(); - out->_storage = this->_storage.clone(); - return out; - } - - void to_(const int &device) { this->_storage.to_(device); } - boost::intrusive_ptr to(const int &device) { - if (this->device() == device) { - // boost::intrusive_ptr out(this); - return this; - } else { - boost::intrusive_ptr out = this->_clone_meta_only(); - out->_storage = this->_storage.to(device); - return out; - } - } - - void permute_(const std::vector &rnks); - - boost::intrusive_ptr permute(const std::vector &rnks); - - template - T &at(const std::vector &locator) const { - cytnx_error_msg(locator.size() != this->_shape.size(), "%s", - "The input index does not match Tensor's rank."); - - cytnx_uint64 RealRank, mtplyr; - // std::vector c_shape(this->_shape.size()); - // std::vector c_loc(this->_shape.size()); - cytnx_uint64 c_shape, c_loc; - - RealRank = 0; - mtplyr = 1; - - for (cytnx_int64 i = this->_shape.size() - 1; i >= 0; i--) { - if (locator[i] >= this->_shape[i]) { - cytnx_error_msg(true, "%s", "Attempting to access out-of-bound index in Tensor."); - } - // c_shape[i] = this->_shape[this->_invmapper[i]]; - // c_loc[i] = locator[this->_invmapper[i]]; - c_shape = this->_shape[this->_invmapper[i]]; - c_loc = locator[this->_invmapper[i]]; - RealRank += mtplyr * c_loc; - mtplyr *= c_shape; - } - return this->_storage.at(RealRank); - } - - const Scalar::Sproxy at(const std::vector &locator) const { - cytnx_error_msg(locator.size() != this->_shape.size(), "%s", - "The input index does not match Tensor's rank."); - - cytnx_uint64 RealRank, mtplyr; - // std::vector c_shape(this->_shape.size()); - // std::vector c_loc(this->_shape.size()); - - cytnx_uint64 c_shape, c_loc; - RealRank = 0; - mtplyr = 1; - - for (cytnx_int64 i = this->_shape.size() - 1; i >= 0; i--) { - if (locator[i] >= this->_shape[i]) { - cytnx_error_msg(true, "%s", "Attempting to access out-of-bound index in Tensor."); - } - // c_shape[i] = this->_shape[this->_invmapper[i]]; - // c_loc[i] = locator[this->_invmapper[i]]; - c_shape = this->_shape[this->_invmapper[i]]; - c_loc = locator[this->_invmapper[i]]; - RealRank += mtplyr * c_loc; - mtplyr *= c_shape; - } - return this->_storage.at(RealRank); - } - - Scalar::Sproxy at(const std::vector &locator) { - cytnx_error_msg(locator.size() != this->_shape.size(), "%s", - "The input index does not match Tensor's rank."); - - cytnx_uint64 RealRank, mtplyr; - // std::vector c_shape(this->_shape.size()); - // std::vector c_loc(this->_shape.size()); - cytnx_uint64 c_shape, c_loc; - - RealRank = 0; - mtplyr = 1; - - for (cytnx_int64 i = this->_shape.size() - 1; i >= 0; i--) { - if (locator[i] >= this->_shape[i]) { - cytnx_error_msg(true, "%s", "Attempting to access out-of-bound index in Tensor."); - } - // c_shape[i] = this->_shape[this->_invmapper[i]]; - // c_loc[i] = locator[this->_invmapper[i]]; - c_shape = this->_shape[this->_invmapper[i]]; - c_loc = locator[this->_invmapper[i]]; - RealRank += mtplyr * c_loc; - mtplyr *= c_shape; - } - return this->_storage.at(RealRank); - } - - boost::intrusive_ptr get(const std::vector &accessors); - boost::intrusive_ptr get_deprecated(const std::vector &accessors); - void set(const std::vector &accessors, - const boost::intrusive_ptr &rhs); - - template - void set(const std::vector &accessors, const T &rc); - - void set(const std::vector &accessors, const Scalar::Sproxy &rc); - - template - void fill(const Tx &val) { - this->storage().fill(val); - } - - boost::intrusive_ptr contiguous() { - // return new instance if act on non-contiguous tensor - // return self if act on contiguous tensor - if (this->_contiguous) { - boost::intrusive_ptr out(this); - // out->_storage = this->_storage; - return out; - } else { - boost::intrusive_ptr out(new Tensor_impl()); - std::vector oldshape(this->_shape.size()); - for (cytnx_uint64 i = 0; i < this->_shape.size(); i++) { - oldshape[i] = this->_shape[this->_invmapper[i]]; - } - - out->_storage._impl = - this->_storage._impl->Move_memory(oldshape, this->_mapper, this->_invmapper); - // this->_storage._impl->Move_memory_(oldshape, this->_mapper, this->_invmapper); - // out->_storage._impl = this->_storage._impl; - // std::cout << out->_storage << std::endl; - out->_invmapper = vec_range(this->_invmapper.size()); - out->_mapper = out->_invmapper; - out->_shape = this->_shape; - out->_contiguous = true; - return out; - } - } - - void contiguous_() { - // return new instance if act on non-contiguous tensor - // return self if act on contiguous tensor - if (!this->_contiguous) { - std::vector oldshape(this->_shape.size()); - for (cytnx_uint64 i = 0; i < this->_shape.size(); i++) { - oldshape[i] = this->_shape[this->_invmapper[i]]; - } - - this->_storage._impl = - this->_storage._impl->Move_memory(oldshape, this->_mapper, this->_invmapper); - // this->_storage._impl->Move_memory_(oldshape, this->_mapper, this->_invmapper); - // this->_mapper = vec_range(this->_invmapper.size()); - vec_range_(this->_mapper, this->invmapper().size()); - this->_invmapper = this->_mapper; - this->_contiguous = true; - } - } - - void reshape_(const std::vector &new_shape) { - if (!this->_contiguous) { - this->contiguous_(); - } - // std::vector result_shape(new_shape.size()); - cytnx_uint64 new_N = 1; - bool has_undetermine = false; - unsigned int Udet_id = 0; - // this->_shape = vec_cast(new_shape); - this->_shape.resize(new_shape.size()); - for (cytnx_uint64 i = 0; i < new_shape.size(); i++) { - this->_shape[i] = new_shape[i]; - } - for (int i = 0; i < new_shape.size(); i++) { - if (new_shape[i] < 0) { - if (new_shape[i] != -1) - cytnx_error_msg( - new_shape[i] != -1, "%s", - "[ERROR] reshape can only have dimension > 0 and one undetermine rank specify as -1"); - if (has_undetermine) - cytnx_error_msg( - new_shape[i] != -1, "%s", - "[ERROR] reshape can only have dimension > 0 and one undetermine rank specify as -1"); - Udet_id = i; - has_undetermine = true; - } else { - new_N *= new_shape[i]; - // result_shape[i] = new_shape[i]; - } - } - - if (has_undetermine) { - cytnx_error_msg(new_N > this->_storage.size(), "%s", - "[ERROR] new shape exceed the total number of elements."); - cytnx_error_msg(this->_storage.size() % new_N, "%s", - "[ERROR] unmatch size when reshape with undetermine dimension"); - // result_shape[Udet_id] = this->_storage.size() / new_N; - this->_shape[Udet_id] = this->_storage.size() / new_N; - } else { - cytnx_error_msg(new_N != this->_storage.size(), "%s", - "[ERROR] new shape does not match the number of elements."); - } - - // this->_shape = result_shape; - // this->_mapper = std::move(vec_range(new_shape.size())); - this->_mapper.resize(new_shape.size()); - vec_range_(this->_mapper, new_shape.size()); - this->_invmapper = this->_mapper; - } - - boost::intrusive_ptr reshape(const std::vector &new_shape) { - boost::intrusive_ptr out(new Tensor_impl()); - if (this->is_contiguous()) { - out = this->_clone_meta_only(); - out->_storage = this->_storage; - } else { - out = this->contiguous(); - } - // out = this->clone(); - - out->reshape_(new_shape); - return out; - } - - boost::intrusive_ptr astype(const int &new_type) { - // boost::intrusive_ptr out(new Tensor_impl()); - // out->_storage = this->_storage.astype(new_type); - if (this->dtype() == new_type) { - return this; - } else { - boost::intrusive_ptr out = this->_clone_meta_only(); - out->_storage = this->_storage.astype(new_type); - return out; - } - } - }; - ///@endcond - - class Tensor; - - ///@cond - // [Note] these are fwd from linalg.hpp - template - Tensor operator+(const Tensor &lhs, const T &rc); - template - Tensor operator-(const Tensor &lhs, const T &rhs); - template - Tensor operator*(const Tensor &lhs, const T &rhs); - template - Tensor operator/(const Tensor &lhs, const T &rhs); - ///@endcond - - /// @brief an tensor (multi-dimensional array) - class Tensor { - private: - public: - /// @cond - // this is a proxy class to allow get/set element using [] as python! - struct Tproxy { - boost::intrusive_ptr _insimpl; - std::vector _accs; - Tproxy(boost::intrusive_ptr _ptr, const std::vector &accs) - : _insimpl(std::move(_ptr)), _accs(accs) {} - - // when used to set elems: - const Tensor &operator=(const Tensor &rhs) { - this->_insimpl->set(_accs, rhs._impl); - return rhs; - } - - template - const T &operator=(const T &rc) { - this->_insimpl->set(_accs, rc); - return rc; - } - const Tproxy &operator=(const Tproxy &rc) { - Tensor tmp = Tensor(rc); - this->_insimpl->set(_accs, tmp._impl); - return rc; - } - - template - Tensor operator+=(const T &rc) { - Tensor self; - self._impl = _insimpl->get(_accs); - self += rc; - _insimpl->set(_accs, self._impl); - self._impl = this->_insimpl; - return self; - } - Tensor operator+=(const Tproxy &rc); - - template - Tensor operator-=(const T &rc) { - Tensor self; - self._impl = _insimpl->get(_accs); - self -= rc; - _insimpl->set(_accs, self._impl); - self._impl = this->_insimpl; - return self; - } - Tensor operator-=(const Tproxy &rc); - - template - Tensor operator/=(const T &rc) { - Tensor self; - self._impl = _insimpl->get(_accs); - self /= rc; - _insimpl->set(_accs, self._impl); - self._impl = this->_insimpl; - return self; - } - Tensor operator/=(const Tproxy &rc); - - template - Tensor operator*=(const T &rc) { - Tensor self; - self._impl = _insimpl->get(_accs); - self *= rc; - _insimpl->set(_accs, self._impl); - self._impl = this->_insimpl; - return self; - } - Tensor operator*=(const Tproxy &rc); - - // alias to resolve conflict with op ovld for rc=Tensor - /* - template - Tensor _operatorADD(const T &rc) const{ - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Add(rc); - } - */ - Tensor operator+(const cytnx_complex128 &rc) const; //{return this->_operatorADD(rc);}; - Tensor operator+(const cytnx_complex64 &rc) const; //{return this->_operatorADD(rc);}; - Tensor operator+(const cytnx_double &rc) const; //{return this->_operatorADD(rc);}; - Tensor operator+(const cytnx_float &rc) const; //{return this->_operatorADD(rc);}; - Tensor operator+(const cytnx_uint64 &rc) const; //{return this->_operatorADD(rc);}; - Tensor operator+(const cytnx_int64 &rc) const; //{return this->_operatorADD(rc);}; - Tensor operator+(const cytnx_uint32 &rc) const; //{return this->_operatorADD(rc);}; - Tensor operator+(const cytnx_int32 &rc) const; //{return this->_operatorADD(rc);}; - Tensor operator+(const cytnx_uint16 &rc) const; //{return this->_operatorADD(rc);}; - Tensor operator+(const cytnx_int16 &rc) const; //{return this->_operatorADD(rc);}; - Tensor operator+(const cytnx_bool &rc) const; //{return this->_operatorADD(rc);}; - Tensor operator+(const Tproxy &rc) const; - - /* - template - Tensor _operatorSUB(const T &rc) const{ - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Sub(rc); - } - */ - Tensor operator-(const cytnx_complex128 &rc) const; //{return this->_operatorSUB(rc);}; - Tensor operator-(const cytnx_complex64 &rc) const; //{return this->_operatorSUB(rc);}; - Tensor operator-(const cytnx_double &rc) const; //{return this->_operatorSUB(rc);}; - Tensor operator-(const cytnx_float &rc) const; //{return this->_operatorSUB(rc);}; - Tensor operator-(const cytnx_uint64 &rc) const; //{return this->_operatorSUB(rc);}; - Tensor operator-(const cytnx_int64 &rc) const; //{return this->_operatorSUB(rc);}; - Tensor operator-(const cytnx_uint32 &rc) const; //{return this->_operatorSUB(rc);}; - Tensor operator-(const cytnx_int32 &rc) const; //{return this->_operatorSUB(rc);}; - Tensor operator-(const cytnx_uint16 &rc) const; //{return this->_operatorSUB(rc);}; - Tensor operator-(const cytnx_int16 &rc) const; //{return this->_operatorSUB(rc);}; - Tensor operator-(const cytnx_bool &rc) const; //{return this->_operatorSUB(rc);}; - Tensor operator-(const Tproxy &rc) const; - - Tensor operator-() const; - - /* - template - Tensor _operatorMUL(const T &rc) const{ - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Mul(rc); - } - */ - Tensor operator*(const cytnx_complex128 &rc) const; //{return this->_operatorMUL(rc);}; - Tensor operator*(const cytnx_complex64 &rc) const; //{return this->_operatorMUL(rc);}; - Tensor operator*(const cytnx_double &rc) const; //{return this->_operatorMUL(rc);}; - Tensor operator*(const cytnx_float &rc) const; //{return this->_operatorMUL(rc);}; - Tensor operator*(const cytnx_uint64 &rc) const; //{return this->_operatorMUL(rc);}; - Tensor operator*(const cytnx_int64 &rc) const; //{return this->_operatorMUL(rc);}; - Tensor operator*(const cytnx_uint32 &rc) const; //{return this->_operatorMUL(rc);}; - Tensor operator*(const cytnx_int32 &rc) const; //{return this->_operatorMUL(rc);}; - Tensor operator*(const cytnx_uint16 &rc) const; //{return this->_operatorMUL(rc);}; - Tensor operator*(const cytnx_int16 &rc) const; //{return this->_operatorMUL(rc);}; - Tensor operator*(const cytnx_bool &rc) const; //{return this->_operatorMUL(rc);}; - Tensor operator*(const Tproxy &rc) const; - - /* - template - Tensor _operatorDIV(const T &rc) const{ - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Div(rc); - } - */ - Tensor operator/(const cytnx_complex128 &rc) const; //{return this->_operatorDIV(rc);}; - Tensor operator/(const cytnx_complex64 &rc) const; //{return this->_operatorDIV(rc);}; - Tensor operator/(const cytnx_double &rc) const; //{return this->_operatorDIV(rc);}; - Tensor operator/(const cytnx_float &rc) const; //{return this->_operatorDIV(rc);}; - Tensor operator/(const cytnx_uint64 &rc) const; //{return this->_operatorDIV(rc);}; - Tensor operator/(const cytnx_int64 &rc) const; //{return this->_operatorDIV(rc);}; - Tensor operator/(const cytnx_uint32 &rc) const; //{return this->_operatorDIV(rc);}; - Tensor operator/(const cytnx_int32 &rc) const; //{return this->_operatorDIV(rc);}; - Tensor operator/(const cytnx_uint16 &rc) const; //{return this->_operatorDIV(rc);}; - Tensor operator/(const cytnx_int16 &rc) const; //{return this->_operatorDIV(rc);}; - Tensor operator/(const cytnx_bool &rc) const; //{return this->_operatorDIV(rc);}; - Tensor operator/(const Tproxy &rc) const; - - template - T item() const { - Tensor out; - out._impl = _insimpl->get(_accs); - return out.item(); - } - - Scalar::Sproxy item() const { - Tensor out; - out._impl = _insimpl->get(_accs); - return out.item(); - } - - // when used to get elems: - operator Tensor() const { - Tensor out; - out._impl = _insimpl->get(_accs); - return out; - } - - Storage storage() const { - Tensor out; - out._impl = _insimpl->get(_accs); - return out.storage(); - } - - }; // proxy class of Tensor. - - /// @endcond - - /// @cond - // these two are using the python way! - //---------------------------------------- - template - Tproxy operator()(const std::string &e1, const Ts &...elems) { - // std::cout << e1 << std::endl; - std::vector tmp = Indices_resolver(e1, elems...); - return (*this)[tmp]; - } - template - Tproxy operator()(const cytnx_int64 &e1, const Ts &...elems) { - // std::cout << e1<< std::endl; - std::vector tmp = Indices_resolver(e1, elems...); - return (*this)[tmp]; - } - template - Tproxy operator()(const cytnx::Accessor &e1, const Ts &...elems) { - // std::cout << e1 << std::endl; - std::vector tmp = Indices_resolver(e1, elems...); - return (*this)[tmp]; - } - template - const Tproxy operator()(const std::string &e1, const Ts &...elems) const { - // std::cout << e1 << std::endl; - std::vector tmp = Indices_resolver(e1, elems...); - return (*this)[tmp]; - } - template - const Tproxy operator()(const cytnx_int64 &e1, const Ts &...elems) const { - std::vector tmp = Indices_resolver(e1, elems...); - return (*this)[tmp]; - } - template - const Tproxy operator()(const cytnx::Accessor &e1, const Ts &...elems) const { - std::vector tmp = Indices_resolver(e1, elems...); - return (*this)[tmp]; - } - - //----------------------------------------- - - Tproxy operator[](const std::initializer_list &accs) { - std::vector tmp = accs; - return (*this)[tmp]; - } - Tproxy operator[](const std::vector &accs) { - return Tproxy(this->_impl, accs); - } - - const Tproxy operator[](const std::vector &accs) const { - return Tproxy(this->_impl, accs); - } - const Tproxy operator[](const std::initializer_list &accs) const { - std::vector tmp = accs; - return (*this)[tmp]; - } - - Tproxy operator[](const std::initializer_list &accs) { - std::vector tmp = accs; - return (*this)[tmp]; - } - Tproxy operator[](const std::vector &accs) { - std::vector acc_in; - for (int i = 0; i < accs.size(); i++) { - acc_in.push_back(cytnx::Accessor(accs[i])); - } - return Tproxy(this->_impl, acc_in); - } - const Tproxy operator[](const std::initializer_list &accs) const { - std::vector tmp = accs; - return (*this)[tmp]; - } - const Tproxy operator[](const std::vector &accs) const { - std::vector acc_in; - for (int i = 0; i < accs.size(); i++) { - acc_in.push_back(cytnx::Accessor(accs[i])); - } - return Tproxy(this->_impl, acc_in); - } - const Tproxy operator[](const std::vector &accs) const { - std::vector acc_in; - for (int i = 0; i < accs.size(); i++) { - acc_in.push_back(cytnx::Accessor(accs[i])); - } - return Tproxy(this->_impl, acc_in); - } - ///@endcond - //------------------------------------------- - - /// @cond - void _Save(std::fstream &f) const; - void _Load(std::fstream &f); - - /// @endcond - /** - @brief Save current Tensor to file - @param[in] fname file name (without file extension) - - @details - save the Tensor to file with file path specify with input param \p fname with postfix - ".cytn" - @see Load(const std::string &fname) - */ - void Save(const std::string &fname) const; - /** - * @see Save(const std::string &fname) const - */ - void Save(const char *fname) const; - - /** - * @brief Save current Tensor to the binary file - * @details This function will save the Tensor to the binary file with file - * name \p fname . - * @param fname[in] the file name of the binary file. - * @pre The file name @p fname must be valid. - * @see cytnx::Tensor::Fromfile - */ - void Tofile(const std::string &fname) const; - - /** - * @see Tofile(const std::string &fname) const - */ - void Tofile(const char *fname) const; - - /** - * @see Tofile(const std::string &fname) const - */ - void Tofile(std::fstream &f) const; - - /** - @brief Load current Tensor from file - @param fname[in] file name - @details - load the Storage from file with file path specify with input param 'fname' - @pre the file must be a Tensor object which is saved by cytnx::Tensor::Save. - */ - - static Tensor Load(const std::string &fname); - /** - * @see Load(const std::string &fname) - */ - static Tensor Load(const char *fname); - - /** - * @brief Load current Tensor from the binary file - * @details This function will load the Tensor from the binary file which is saved by - * cytnx::Tensor::Tofile. Given the file name \p fname , data type \p dtype and - * number of elements \p count, this function will load the first \p count elements - * from the binary file \p fname with data type \p dtype. - * @param fname[in] the file name of the binary file. - * @param dtype[in] the data type of the binary file. This can be any of the type defined in - * cytnx::Type. - * @param count[in] the number of elements to be loaded from the binary file. If set to -1, - * all elements in the binary file will be loaded. - * @return Tensor - * @pre - * 1. The @p dtype cannot be Type.Void. - * 2. The @p dtype must be the same as the data type of the binary file. - * 3. The @p Nelem cannot be 0. - * 4. The @p Nelem cannot be larger than the number of elements in the binary file. - * 5. The file name @p fname must be valid. - * @see cytnx::Tensor::Tofile - */ - static Tensor Fromfile(const std::string &fname, const unsigned int &dtype, - const cytnx_int64 &count = -1); - static Tensor Fromfile(const char *fname, const unsigned int &dtype, - const cytnx_int64 &count = -1); - - // static Tensor Frombinary(const std::string &fname); - - ///@cond - boost::intrusive_ptr _impl; - Tensor() : _impl(new Tensor_impl()){}; - Tensor(const Tensor &rhs) { _impl = rhs._impl; } - - /* - template - Tensor(const std::initializer_list &rhs){ - Storage stmp = std::vector(rhs); - boost::intrusive_ptr tmp(new Tensor_impl()); - tmp->Init(stmp); - this->_impl = tmp; - } - */ - - Tensor &operator=(const Tensor &rhs) { - _impl = rhs._impl; - return *this; - } - - void operator=(const Tproxy &rhsp) { // this is used to handle proxy assignment - this->_impl = rhsp._insimpl->get(rhsp._accs); - } - ///@endcond - - //@{ - // default device==Device.cpu (-1) - /** - @brief initialize a Tensor - @param[in] shape the shape of tensor. - @param[in] dtype the dtype of tensor. This can be any of type defined in cytnx::Type - @param[in] device the device that tensor to be created. This can be cytnx::Device.cpu or - @param[in] init_zero if true, the content of Tensor will be initialized to zero. if false, the - content of Tensor will be un-initialize. - cytnx::Device.cuda+, see cytnx::Device for more detail. - - @note - The content of Tensor created will be un-initialize! See \link cytnx::zeros - zeros()\endlink, \link cytnx::ones ones() \endlink or \link cytnx::arange arange() \endlink for - generating an Tensor. - - ## Example: - ### c++ API: - \include example/Tensor/Init.cpp - #### output> - \verbinclude example/Tensor/Init.cpp.out - ### python API: - \include example/Tensor/Init.py - #### output> - \verbinclude example/Tensor/Init.py.out - */ - void Init(const std::vector &shape, const unsigned int &dtype = Type.Double, - const int &device = -1, const bool &init_zero = true) { - boost::intrusive_ptr tmp(new Tensor_impl()); - this->_impl = tmp; - this->_impl->Init(shape, dtype, device, init_zero); - } - // void Init(const Storage& storage) { - // boost::intrusive_ptr tmp(new Tensor_impl()); - // this->_impl = tmp; - // this->_impl->Init(storage); - // } - // void Init(const Storage& storage, const std::vector &shape, - // const unsigned int &dtype = Type.Double, const int &device = -1) { - // boost::intrusive_ptr tmp(new Tensor_impl()); - // this->_impl = tmp; - // this->_impl->Init(storage, shape, dtype, device); - // } - - /** - * @brief Construct a new Tensor object - * @details This is the constructor of Tensor. It will call - * cytnx::Tensor::Init() to initialize the Tensor. - * @param[in] shape the shape of tensor - * @param[in] dtype the dtype of tensor. This can be any of type defined in cytnx::Type. - * @param[in] device the device that tensor to be created. This can be cytnx::Device.cpu or - * cytnx::Device.cuda+, see cytnx::Device for more detail. - * @param[in] init_zero if true, the content of Tensor will be initialized to zero. If false, - * the content of Tensor will be un-initialized. - * @see cytnx::Tensor::Init - */ - Tensor(const std::vector &shape, const unsigned int &dtype = Type.Double, - const int &device = -1, const bool &init_zero = 1) - : _impl(new Tensor_impl()) { - this->Init(shape, dtype, device, init_zero); - } - // Tensor(const Storage& storage) - // : _impl(new Tensor_impl()) { - // this->Init(storage); - // } - // Tensor(const Storage& storage, const std::vector &shape, - // const unsigned int &dtype = Type.Double, const int &device = -1) - // : _impl(new Tensor_impl()) { - // this->Init(storage, shape, dtype, device); - // } - //@} - - /** - @brief Convert a Storage to Tensor - @param[in] in the Storage to be converted - @return [Tensor] a Tensor with the same dtype and device as the input Storage - */ - static Tensor from_storage(const Storage &in) { - Tensor out; - boost::intrusive_ptr tmp(new Tensor_impl()); - out._impl = tmp; - out._impl->Init(in); - return out; - } - - /** - @brief the dtype-id of the Tensor - @see cytnx::Type - @return [unsigned int] the dtype_id of the Tensor - */ - unsigned int dtype() const { return this->_impl->dtype(); } - - /** - @brief the device-id of the Tensor - @see cytnx::Device - @return [int] the device_id of the Tensor - */ - int device() const { return this->_impl->device(); } - - /** - @brief the dtype (in string) of the Tensor - @see cytnx::Type, dtype() const - @return [std::string] the dtype of the Tensor - */ - std::string dtype_str() const { return this->_impl->dtype_str(); } - - /** - @brief the device (in string) of the Tensor - @see cytnx::Device, device() const - @return [std::string] the device of the Tensor - */ - std::string device_str() const { return this->_impl->device_str(); } - - /** - @brief the shape of the Tensor - @return [std::vector] the shape of the Tensor - */ - const std::vector &shape() const { return this->_impl->shape(); } - - /** - @brief the rank of the Tensor - @return [cytnx_uint64] the rank of the Tensor - */ - cytnx_uint64 rank() const { return this->_impl->shape().size(); } - - /** - @brief return a clone of the current Tensor. - @return [Tensor] - @details - In C++ API, the behavior of assignment operator is designed to have same behavior as - python,\n to have a copy of the current tensor, we call clone to return a copy. - - ## Example: - ### c++ API: - \include example/Tensor/clone.cpp - #### output> - \verbinclude example/Tensor/clone.cpp.out - ### python API: - \include example/Tensor/clone.py - #### output> - \verbinclude example/Tensor/clone.py.out - */ - Tensor clone() const { - Tensor out; - out._impl = this->_impl->clone(); - return out; - } - - /** - @brief copy a tensor to new device - @param[in] device the device-id that is moving to. it can be any device defined in cytnx::Device - @return [Tensor] - - description:\n - if the device-id is the same as current Tensor's device, then return self.\n - otherwise, return a copy of instance that located on the target device. \n - see also: \link cytnx::Tensor::to_ Tensor.to_ \endlink \n - - ## Example: - ### c++ API: - \include example/Tensor/to.cpp - #### output> - \verbinclude example/Tensor/to.cpp.out - ### python API: - \include example/Tensor/to.py - #### output> - \verbinclude example/Tensor/to.py.out - */ - Tensor to(const int &device) const { - Tensor out; - out._impl = this->_impl->to(device); - return out; - } - - /** - @brief move the current Tensor to the device. - @param[in] device the device-id that is moving to. it can be any device defined in cytnx::Device - - description:\n - see also: \link cytnx::Tensor::to Tensor.to \endlink\n - - ## Example: - ### c++ API: - \include example/Tensor/to_.cpp - #### output> - \verbinclude example/Tensor/to_.cpp.out - ### python API: - \include example/Tensor/to_.py - #### output> - \verbinclude example/Tensor/to_.py.out - */ - void to_(const int &device) { this->_impl->to_(device); } - - /** - @brief return whether the Tensor is contiguous or not. - @return [bool] true if the Tensor is contiguous, false otherwise. - */ - const bool &is_contiguous() const { return this->_impl->is_contiguous(); } - - Tensor permute_(const std::vector &rnks) { - this->_impl->permute_(rnks); - return *this; - } - /// @cond - template - Tensor permute_(const cytnx_uint64 &e1, const Ts &...elems) { - std::vector argv = dynamic_arg_uint64_resolver(e1, elems...); - this->_impl->permute_(argv); - return *this; - } - /// @endcond - - /** - @brief perform tensor permute on the cytnx::Tensor and return a new instance. - @param[in] rnks the permute indices, should have No. of elements equal to the rank of tensor. - @return [Tensor] a permuted new Tensor - @pre - 1. The size of input and output Tensor should be the same. - 2. \p rnks cannot contain duplicated elements. - - ## Example: - ### c++ API: - \include example/Tensor/permute.cpp - #### output> - \verbinclude example/Tensor/permute.cpp.out - ### python API: - \include example/Tensor/permute.py - #### output> - \verbinclude example/Tensor/permute.py.out - */ - Tensor permute(const std::vector &rnks) const { - Tensor out; - out._impl = this->_impl->permute(rnks); - return out; - } - /// @cond - template - Tensor permute(const cytnx_uint64 &e1, const Ts &...elems) const { - std::vector argv = dynamic_arg_uint64_resolver(e1, elems...); - return this->permute(argv); - } - /// @endcond - - /** - @brief Make the Tensor contiguous by coalescing the memory (storage). - @return [Tensor] a new Tensor that is with contiguous memory (storage). - @see \link Tensor::contiguous_ Tensor::contiguous_() \endlink - - ## Example: - ### c++ API: - \include example/Tensor/contiguous.cpp - #### output> - \verbinclude example/Tensor/contiguous.cpp.out - ### python API: - \include example/Tensor/contiguous.py - #### output> - \verbinclude example/Tensor/contiguous.py.out - */ - Tensor contiguous() const { - Tensor out; - out._impl = this->_impl->contiguous(); - return out; - } - - /** - @brief Make the Tensor contiguous by coalescing the memory (storage), inplacely - @see \link Tensor::contiguous Tensor::contiguous() \endlink - - ## Example: - ### c++ API: - \include example/Tensor/contiguous_.cpp - #### output> - \verbinclude example/Tensor/contiguous_.cpp.out - ### python API: - \include example/Tensor/contiguous_.py - #### output> - \verbinclude example/Tensor/contiguous_.py.out - */ - Tensor contiguous_() { - this->_impl->contiguous_(); - return *this; - } - - /** - @brief reshape the Tensor, inplacely - @param[in] new_shape the new shape of the Tensor. - @pre - 1. The size of input and output Tensor should be the same. - 2. \p new_shape cannot be empty. - @see \link Tensor::reshape Tensor::reshape() \endlink - @note - Compare to reshape(), this function will not create a new Tensor, - but reshape the current Tensor inplacely. - - ## Example: - ### c++ API: - \include example/Tensor/reshape_.cpp - #### output> - \verbinclude example/Tensor/reshape_.cpp.out - ### python API: - \include example/Tensor/reshape_.py - #### output> - \verbinclude example/Tensor/reshape_.py.out - */ - void reshape_(const std::vector &new_shape) { this->_impl->reshape_(new_shape); } - /// @cond - void reshape_(const std::vector &new_shape) { - std::vector shape(new_shape.begin(), new_shape.end()); - this->_impl->reshape_(shape); - } - void reshape_(const std::initializer_list &new_shape) { - std::vector shape = new_shape; - this->_impl->reshape_(shape); - } - template - void reshape_(const cytnx_int64 &e1, const Ts... elems) { - std::vector shape = dynamic_arg_int64_resolver(e1, elems...); - // std::cout << shape << std::endl; - this->_impl->reshape_(shape); - } - /// @endcond - - /** - @brief return a new Tensor that is reshaped. - @param[in] new_shape the new shape of the Tensor. - @return [Tensor] - @pre - 1. The size of input and output Tensor should be the same. - 2. \p new_shape cannot be empty. - @note - 1. This function will not change the original Tensor. - 2. You can use Tensor::reshape_() to reshape the Tensor inplacely. - 3. You can set \p new_shape to -1, which will be automatically determined - by the size of the Tensor. The behavior is the same as numpy.reshape(). - @see \link Tensor::reshape_ Tensor::reshape_() \endlink - - ## Example: - ### c++ API: - \include example/Tensor/reshape.cpp - #### output> - \verbinclude example/Tensor/reshape.cpp.out - ### python API: - \include example/Tensor/reshape.py - #### output> - \verbinclude example/Tensor/reshape.py.out - */ - Tensor reshape(const std::vector &new_shape) const { - Tensor out; - out._impl = this->_impl->reshape(new_shape); - return out; - } - - /** - * @see reshape(const std::vector &new_shape) const - */ - Tensor reshape(const std::vector &new_shape) const { - std::vector tmp(new_shape.size()); - memcpy(&tmp[0], &new_shape[0], sizeof(cytnx_uint64) * new_shape.size()); - Tensor out; - out._impl = this->_impl->reshape(tmp); - return out; - } - - /** - * @see reshape(const std::vector &new_shape) const - */ - Tensor reshape(const std::initializer_list &new_shape) const { - return this->reshape(std::vector(new_shape)); - } - - /// @cond - template - Tensor reshape(const cytnx_int64 &e1, const Ts &...elems) const { - std::vector argv = dynamic_arg_int64_resolver(e1, elems...); - return this->reshape(argv); - } - /// @endcond - - /** - @brief return a new Tensor that cast to different dtype. - @param[in] new_type the new dtype. It can be any type defined in cytnx::Type - @return [Tensor] - @note - If the new_type is the same as dtype of the current Tensor, return self. - @attention - This function cannot convert complex type to real type, please use - Tensor::real() or Tensor::imag() to get the real or imaginary part of - the complex Tensor instead. - - ## Example: - ### c++ API: - \include example/Tensor/astype.cpp - #### output> - \verbinclude example/Tensor/astype.cpp.out - ### python API: - \include example/Tensor/astype.py - #### output> - \verbinclude example/Tensor/astype.py.out - */ - Tensor astype(const int &new_type) const { - Tensor out; - out._impl = this->_impl->astype(new_type); - return out; - } - - // Tensor diagonal(){ - // for(unsigned int i=0;ishape().size();i++){ - // if(this->shape()[i] != this->shape()[0],"[ERROR] Tensor.diagonal() can only be called - // when the subject has equal dimension in each rank.%s","\n"); - // } - // - // } - - /** - @brief Get an element at specific location. - @details This function is used to get an element at specific location. If the template type is - not given, the return will be a Scalar. - @param[in] locator the location of the element - @return [ref] - - @note - 1. This is for C++ API only! - 2. need template instantiation to resolve the type, which should be consist with - the dtype of the Tensor. An error will be issued if the template type is inconsist - with the current dtype of Tensor. - 3. For python API, use [] directly to get element. - - ## Example: - ### c++ API: - \include example/Tensor/at.cpp - #### output> - \verbinclude example/Tensor/at.cpp.out - */ - template - T &at(const std::vector &locator) { - return this->_impl->at(locator); - } - - /** - * @see at(const std::vector &locator) - */ - template - const T &at(const std::vector &locator) const { - return this->_impl->at(locator); - } - /// @cond - template - const T &at(const cytnx_uint64 &e1, const Ts &...elems) const { - std::vector argv = dynamic_arg_uint64_resolver(e1, elems...); - return this->at(argv); - } - template - T &at(const cytnx_uint64 &e1, const Ts &...elems) { - std::vector argv = dynamic_arg_uint64_resolver(e1, elems...); - return this->at(argv); - } - - const Scalar::Sproxy at(const std::vector &locator) const { - return this->_impl->at(locator); - } - - Scalar::Sproxy at(const std::vector &locator) { return this->_impl->at(locator); } - /// @endcond - - /** - @brief get the element from a rank-0 Tensor. - @details This function is used to get the element from a rank-0 Tensor. If the template type is - not given, the return will be a Scalar. - @return [T] - - @note - 1. This can only be called on a rank-0 Tensor (scalar). For C++ API, a template - instantiation of type is needed to resolve the type, which should be connsist with the dtype of - the Tensor. An error will be issued if the template type if inconsist with the current dtype of - Tensor. - 2. Although the return is by reference in C++ part, the return in python is not. - 3. From 2., We recommend user to use at (C++ API) and [] (python API) to modify the value - of the element to have consistant syntax across two languages. - - ## Example: - ### c++ API: - \include example/Tensor/item.cpp - #### output> - \verbinclude example/Tensor/item.cpp.out - ### python API: - \include example/Tensor/item.py - #### output> - \verbinclude example/Tensor/item.py.out - */ - template - T &item() { - cytnx_error_msg(this->_impl->storage().size() != 1, "[ERROR][Tensor.item]%s", - "item can only be called from a Tensor with only one element\n"); - return this->_impl->storage().at(0); - } - - ///@cond - template - const T &item() const { - cytnx_error_msg(this->_impl->storage().size() != 1, "[ERROR][Tensor.item]%s", - "item can only be called from a Tensor with only one element\n"); - return this->_impl->storage().at(0); - } - - const Scalar::Sproxy item() const { - Scalar::Sproxy out(this->storage()._impl, 0); - return out; - } - - Scalar::Sproxy item() { - Scalar::Sproxy out(this->storage()._impl, 0); - return out; - } - - ///@endcond - - /** - @brief get elements using Accessor (C++ API) / slices (python API) - @param[in] accessors the Accessor (C++ API) / slices (python API) to get the elements. - @return [Tensor] - @see \link cytnx::Accessor Accessor\endlink for cordinate with Accessor in C++ API. - @note - 1. the return will be a new Tensor instance, which not share memory with the current Tensor. - - ## Equivalently: - One can also using more intruisive way to get the slice using [] operator. - - ## Example: - ### c++ API: - \include example/Tensor/get.cpp - #### output> - \verbinclude example/Tensor/get.cpp.out - ### python API: - \include example/Tensor/get.py - #### output> - \verbinclude example/Tensor/get.py.out - */ - Tensor get(const std::vector &accessors) const { - Tensor out; - out._impl = this->_impl->get(accessors); - return out; - } - - /* - Tensor get_v2(const std::vector &accessors) const{ - Tensor out; - out._impl = this->_impl->get_v2(accessors); - return out; - } - */ - - /** - @brief set elements with the input Tensor using Accessor (C++ API) / slices (python API) - @param[in] accessors the list(vector) of accessors. - @param rhs [Tensor] - @note: - the shape of the input Tensor should be the same as the shape that indicated using Accessor. - The memory is not shared with the input Tensor. - - ## Example: - ### c++ API: - \include example/Tensor/set.cpp - #### output> - \verbinclude example/Tensor/set.cpp.out - ### python API: - \include example/Tensor/set.py - #### output> - \verbinclude example/Tensor/set.py.out - */ - void set(const std::vector &accessors, const Tensor &rhs) { - this->_impl->set(accessors, rhs._impl); - } - - /** - @brief set elements with the input constant using Accessor (C++ API) / slices (python API) - @param[in] accessors the list(vector) of accessors. - @param rc [Const] - - @see \link cytnx::Tensor::fill Tensor::fill \endlink for filling all elements with assigned - constant. - - ## Example: - ### c++ API: - \include example/Tensor/set.cpp - #### output> - \verbinclude example/Tensor/set.cpp.out - ### python API: - \include example/Tensor/set.py - #### output> - \verbinclude example/Tensor/set.py.out - */ - template - void set(const std::vector &accessors, const T &rc) { - this->_impl->set(accessors, rc); - } - ///@cond - template - void set(const std::initializer_list &accessors, const T &rc) { - std::vector args = accessors; - this->set(args, rc); - } - ///@endcond - - /** - @brief return the storage of current Tensor. - @return [Storage] - - @note - The return storage shares the same instance of the storage of current Tensor. Use - Storage.clone() to create a new instance of the returned Storage. - - */ - Storage &storage() const { return this->_impl->storage(); } - - /** - @brief fill all the element of current Tensor with the value. - @param[in] val the assigned value - - ## Example: - ### c++ API: - \include example/Tensor/fill.cpp - #### output> - \verbinclude example/Tensor/fill.cpp.out - ### python API - \include example/Tensor/fill.py - #### output> - \verbinclude example/Tensor/fill.py.out - */ - template - void fill(const T &val) { - this->_impl->fill(val); - } - - /** - * @brief compare the shape of two tensors. - * @param[in] rhs the tensor to be compared. - */ - bool equivshape(const Tensor &rhs) { - if (this->shape() != rhs.shape()) return false; - return true; - } - - /** - * @brief return the real part of the tensor. - * @return [Tensor] the real part of the tensor. - * @pre the tensor must be complex type (Type.ComplexDouble or - * Type.ComplexFloat). - * @see cytnx::Type - */ - Tensor real(); - - /** - * @brief return the imaginary part of the tensor. - * @return [Tensor] the imaginary part of the tensor. - * @pre the tensor must be complex type (Type.ComplexDouble or - * Type.ComplexFloat). - * @see cytnx::Type - */ - Tensor imag(); - - // Arithmic: - /** - * @brief addition assignment operator with a Tensor or a scalar. - * @details This function will add the template type to the current tensor, inplacely. - * The template can be either a scalar or a tensor. If the template is a - * scalar, then the scalar will be added to all the elements of the - * current tensor. If the template is a tensor, then the shape of the - * template tensor must be the same as the current tensor. The supported - * type of the template are Tensor, Scalar or any scalar type (see - * \ref cytnx_complex128, \ref cytnx_complex64, \ref cytnx_double, \ref cytnx_float, - * \ref cytnx_int64, \ref cytnx_int32, \ref cytnx_int16, - * \ref cytnx_uint64, \ref cytnx_uint32, \ref cytnx_uint16, \ref cytnx_bool). - * @param[in] rc the added Tensor or scalar. - * @pre - * If the template type is Tensor, then the shape of the template tensor - * must be the same as the current tensor. - */ - template - Tensor &operator+=(const T &rc); - - /** - * @brief subtraction assignment operator with a Tensor or a scalar. - * @details This function will subtract the template type to the current tensor, inplacely. - * The template can be either a scalar or a tensor. If the template is a - * scalar, then the scalar will be subtracted to all the elements of the - * current tensor. If the template is a tensor, then the shape of the - * template tensor must be the same as the current tensor. The supported - * type of the template are Tensor, Scalar or any scalar type (see - * \ref cytnx_complex128, \ref cytnx_complex64, \ref cytnx_double, \ref cytnx_float, - * \ref cytnx_int64, \ref cytnx_int32, \ref cytnx_int16, - * \ref cytnx_uint64, \ref cytnx_uint32, \ref cytnx_uint16, \ref cytnx_bool). - * @param[in] rc the subtracted Tensor or scalar. - * @pre - * If the template type is Tensor, then the shape of the template tensor - * must be the same as the current tensor. - */ - template - Tensor &operator-=(const T &rc); - - /** - * @brief multiplication assignment operator with a Tensor or a scalar. - * @details This function will multiply the template type to the current tensor, inplacely. - * The template can be either a scalar or a tensor. If the template is a - * scalar, then the scalar will be multiplied to all the elements of the - * current tensor. If the template is a tensor, then the shape of the - * template tensor must be the same as the current tensor. The supported - * type of the template are Tensor, Scalar or any scalar type (see - * \ref cytnx_complex128, \ref cytnx_complex64, \ref cytnx_double, \ref cytnx_float, - * \ref cytnx_int64, \ref cytnx_int32, \ref cytnx_int16, - * \ref cytnx_uint64, \ref cytnx_uint32, \ref cytnx_uint16, \ref cytnx_bool). - * @param[in] rc the multiplied Tensor or scalar. - * @pre - * If the template type is Tensor, then the shape of the template tensor - * must be the same as the current tensor. - */ - template - Tensor &operator*=(const T &rc); - - /** - * @brief division assignment operator with a Tensor or a scalar. - * @details This function will divide the template type to the current tensor, inplacely. - * The template can be either a scalar or a tensor. If the template is a - * scalar, then the scalar will be divided to all the elements of the - * current tensor. If the template is a tensor, then the shape of the - * template tensor must be the same as the current tensor. The supported - * type of the template are Tensor, Scalar or any scalar type (see - * \ref cytnx_complex128, \ref cytnx_complex64, \ref cytnx_double, \ref cytnx_float, - * \ref cytnx_int64, \ref cytnx_int32, \ref cytnx_int16, - * \ref cytnx_uint64, \ref cytnx_uint32, \ref cytnx_uint16, \ref cytnx_bool). - * @param[in] rc the divided Tensor or scalar. - * @pre - * 1. If the template type is Tensor, then the shape of the template tensor - * must be the same as the current tensor. - * 2. \p rc cannot be zero. - */ - template - Tensor &operator/=(const T &rc); - - // Tensor &operator+=(const Tproxy &rc); - // Tensor &operator-=(const Tproxy &rc); - // Tensor &operator*=(const Tproxy &rc); - // Tensor &operator/=(const Tproxy &rc); - /* - Tensor operator+(const Tproxy &rc){ - return *this + Tensor(rc); - } - Tensor operator-(const Tproxy &rc){ - return *this - Tensor(rc); - } - Tensor operator*(const Tproxy &rc){ - return *this * Tensor(rc); - } - Tensor operator/(const Tproxy &rc){ - return *this / Tensor(rc); - } - */ - /** - * @brief Addition function with a Tensor or a scalar. Same as - * cytnx::operator+(const Tensor &self, const T &rhs). - * @param[in] rhs the added Tensor or scalar. - */ - template - Tensor Add(const T &rhs) { - return *this + rhs; - } - - /** - * @brief Addition function with a Tensor or a scalar, inplacely. - * Same as operator+=(const T &rhs). - * @param[in] rhs the added Tensor or scalar. - */ - template - Tensor &Add_(const T &rhs) { - return *this += rhs; - } - - /** - * @brief Subtraction function with a Tensor or a scalar. Same as - * cytnx::operator-(const Tensor &self, const T &rhs). - * @param[in] rhs the subtracted Tensor or scalar. - */ - template - Tensor Sub(const T &rhs) { - return *this - rhs; - } - - /** - * @brief Subtraction function with a Tensor or a scalar, inplacely. - * Same as operator-=(const T &rhs). - * @param[in] rhs the subtracted Tensor or scalar. - */ - template - Tensor &Sub_(const T &rhs) { - return *this -= rhs; - } - - /** - * @brief Multiplication function with a Tensor or a scalar. Same as - * cytnx::operator*(const Tensor &self, const T &rhs). - * @param[in] rhs the multiplied Tensor or scalar. - */ - template - Tensor Mul(const T &rhs) { - return *this * rhs; - } - - /** - * @brief Multiplication function with a Tensor or a scalar, inplacely. - * Same as operator*=(const T &rhs). - * @param[in] rhs the multiplied Tensor or scalar. - */ - template - Tensor &Mul_(const T &rhs) { - return *this *= rhs; - } - - /** - * @brief Division function with a Tensor or a scalar. Same as - * cytnx::operator/(const Tensor &self, const T &rhs). - * @param[in] rhs the divided Tensor or scalar. - * @attension \p rhs cannot be zero. - */ - template - Tensor Div(const T &rhs) { - return *this / rhs; - } - - /** - * @brief Division function with a Tensor or a scalar, inplacely. - * Same as operator/=(const T &rhs). - * @param[in] rhs the divided Tensor or scalar. - * @attension \p rhs cannot be zero. - */ - template - Tensor &Div_(const T &rhs) { - return *this /= rhs; - } - - /** - * @brief The comparison function. - * @details This function is the comparison function. Same as - * cytnx::operator==(const Tensor &self, const T &rhs). - * @param[in] rhs the compared object. - */ - template - Tensor Cpr(const T &rhs) { - return *this == rhs; - } - - // /** - // * @brief Compare each element of the current tensor with the input tensor. - // * @details This function Compare each element of the current tensor with the input tensor. - // * @param[in] rhs the compared tensor. - // */ - // bool approx_eq(const Tensor &rhs, const cytnx_double tol = 0) { - // if (this->device() != rhs.device()) { - // if (User_debug) - // std::cout << "[approx_eq] Tensor device " << this->device() - // << "not equal to rhs tensor device " << rhs.device() << std::endl; - // return false; - // } - // // if (this->dtype() != rhs.dtype()) { - // // std::cout << "[approx_eq] Tensor dtype " << this->dtype() - // // << "not equal to rhs tensor dtype " << rhs.dtype() << std::endl; - // // return false; - // // } - // if (this->shape() != rhs.shape()) { - // if (User_debug) - // std::cout << "[approx_eq] Tensor shape " << this->shape() - // << "not equal to rhs tensor shape " << rhs.shape() << std::endl; - // return false; - // } - // if (this->is_contiguous() != rhs.is_contiguous()) { - // if (User_debug) - // std::cout << "[AreNearlyEqTensor] Tensor contiguous flag " << this->is_contiguous() - // << "not equal to rhs tensor flag " << rhs.is_contiguous() << std::endl; - // return false; - // } - // return this->_impl->_storage.approx_eq(rhs._impl->_storage._impl, tol); - // } - - // template - // Tensor& Cpr_(const T &rhs){ - // - // return *this == rhs; - // } - - template - Tensor Mod(const T &rhs) { - return *this % rhs; - } - - /** - * @brief The negation function. - * @details This function is the negation function. Namely, if the current - * tensor is \f$A\f$, then the output tensor is \f$-A\f$. - * @return The negation of the current tensor. - */ - Tensor operator-() { return this->Mul(-1.); } - - /** - * @brief The flatten function. - * @details This function is the flatten function. It will clone (deep copy) - * , contiguos the current tensor and reshape it to 1-rank Tensor. - * @note compare to the flatten_() function, this function will return a new - * tensor and the current tensor will not be changed. - */ - Tensor flatten() const { - Tensor out = this->clone(); - out.contiguous_(); - out.reshape_({-1}); - return out; - } - - /** - * @brief The flatten function, inplacely. - * @details This function is the flatten function, inplacely. It will - * contiguos the current tensor and reshape it to 1-rank Tensor. - * @note compare to the flatten() function, this is an inplacely function, - * the current tensor will be changed. - */ - void flatten_() { - this->contiguous_(); - this->reshape_({-1}); - } - - /** - * @brief the append function. - * @details This function is the append function. It will append the \p rhs - * tensor to the current tensor. The \p rhs tensor must have the same shape - * as the current tensor, except the first dimension. For example, if the - * current tensor is \f$A(i,j,k)\f$ and the \p rhs tensor is \f$B(j,k)\f$, then - * the output tensor is \f$C(i,j,k)\f$ where - * \f[ - * C(i,j,k) = \begin{cases} - * A(i,j,k) & \text{if } i \neq N \\ - * B(j,k) & \text{if } i = N - * \end{cases} - * \f] - * where \f$N\f$ is the number of the first dimension of the current tensor. - * Here indices \f$i\f$, \f$j\f$ and \f$k\f$ start from 0. - * @param[in] rhs the appended tensor. - * @return The appended tensor. - * @pre - * 1. The \p rhs tensor and the current tensor cannot be empty. - * 2. The \p rhs tensor must have the same shape as the current tensor, - * except the first dimension. Namely, rhs.shape()[i] == this->shape()[i+1] - * and rhs.shape().size() == this->shape().size()-1. - * @note If the dtype of the \p rhs is different from the current tensor, - * the \p rhs will be casted to the dtype of the current tensor. - * @see append(const Storage &rhs) - */ - void append(const Tensor &rhs) { - // Tensor in; - if (!this->is_contiguous()) this->contiguous_(); - - // check Tensor in shape: - cytnx_error_msg(rhs.shape().size() == 0 || this->shape().size() == 0, - "[ERROR] try to append a null Tensor.%s", "\n"); - cytnx_error_msg(rhs.shape().size() != (this->shape().size() - 1), - "[ERROR] try to append a Tensor with rank not match.%s", "\n"); - cytnx_uint64 Nelem = 1; - for (unsigned int i = 0; i < rhs.shape().size(); i++) { - cytnx_error_msg(rhs.shape()[i] != this->shape()[i + 1], - "[ERROR] dimension mismatch @ rhs.rank: [%d] this: [%d] rhs: [%d]\n", i, - this->shape()[i + 1], rhs.shape()[i]); - Nelem *= rhs.shape()[i]; - } - - // check type: - Tensor in; - if (rhs.dtype() != this->dtype()) { - in = rhs.astype(this->dtype()); - if (!in.is_contiguous()) in.contiguous_(); - } else { - if (!in.is_contiguous()) - in = rhs.contiguous(); - else - in = rhs; - } - this->_impl->_shape[0] += 1; - cytnx_uint64 oldsize = this->_impl->_storage.size(); - this->_impl->_storage.resize(oldsize + Nelem); - memcpy(((char *)this->_impl->_storage.data()) + - oldsize * Type.typeSize(this->dtype()) / sizeof(char), - in._impl->_storage.data(), Type.typeSize(in.dtype()) * Nelem); - } - /** - * @brief the append function of the Storage. - * @details This function is the append function of the Storage. It will - * append the \p srhs Storage to the current tensor. The current tensor must - * be rank-2 and the \p srhs Storage must have the same size as the second - * dimension of the current tensor. For example, if the current tensor is - * \f$A\f$ with size \f$M \times N\f$ and the \p srhs Storage is \f$B\f$ - * with size \f$N\f$, then the output tensor is \f$C\f$ with size \f$M \times - * (N+1)\f$ where - * \f[ - * C(i,j) = \begin{cases} - * A(i,j) & \text{if } j \neq N \\ - * B(i) & \text{if } j = N - * \end{cases} - * \f] - * Here indices \f$i\f$ and \f$j\f$ start from 0. - * @param[in] srhs the appended Storage. - * @return The appended tensor. - * @pre - * 1. The \p srhs Storage and the current tensor cannot be empty. - * 2. The current tensor must be rank-2. - * 3. The \p srhs Storage must have the same size as the second dimension of - * the current tensor. Namely, srhs.size() == this->shape()[1]. - * @note If the dtype of the \p srhs is different from the current tensor, - * the \p srhs will be casted to the dtype of the current tensor. - * @see append(const Tensor &rhs) - */ - void append(const Storage &srhs) { - if (!this->is_contiguous()) this->contiguous_(); - - // check Tensor in shape: - cytnx_error_msg(srhs.size() == 0 || this->shape().size() == 0, - "[ERROR] try to append a null Tensor.%s", "\n"); - cytnx_error_msg((this->shape().size() - 1) != 1, - "[ERROR] append a storage to Tensor can only accept rank-2 Tensor.%s", "\n"); - cytnx_error_msg(this->shape().back() != srhs.size(), "[ERROR] Tensor dmension mismatch!%s", - "\n"); - - // check type: - Storage in; - if (srhs.dtype() != this->dtype()) { - in = srhs.astype(this->dtype()); - } else { - in = srhs; - } - this->_impl->_shape[0] += 1; - cytnx_uint64 oldsize = this->_impl->_storage.size(); - this->_impl->_storage.resize(oldsize + in.size()); - memcpy(((char *)this->_impl->_storage.data()) + - oldsize * Type.typeSize(this->dtype()) / sizeof(char), - in._impl->Mem, Type.typeSize(in.dtype()) * in.size()); - } - /* - void append(const Tensor &rhs){ - // convert to the same type. - Tensor in; - if(rhs.dtype() != this->dtype()){ - in = rhs.astype(this->dtype()); - }else{ - in = rhs; - } - - // 1) check rank - if(this->shape().size()==1){ - // check if rhs is a scalar tensor (only one element) - cytnx_error_msg(!(rhs.shape().size()==1 && rhs.shape()[0]==1),"[ERROR] trying to append - a scalar into multidimentional Tensor is not allow.\n Only rank-1 Tensor can accept scalar - append.%s","\n"); this->_impl->_shape[0]+=1; this->_impl->_storage.append(0); - - }else{ - cytnx_error_msg(rhs.shape().size() != this->shape().size()-1,"[ERROR] try to append a - Tensor with rank not match.%s","\n"); - - } - cytnx_error_msg(!this->is_contiguous(),"[ERROR] append require the Tensor to be contiguous. - suggestion: call contiguous() or contiguous_() first.","\n"); - } - */ - /** - * @brief the append function of the scalar. - * @details This function is the append function of the scalar. It can only append - * scalar into rank-1 Tensor. - * @param[in] rhs the appended scalar. - * @return The appended tensor. - * @pre - * 1. The current Tensor must be rank-1. (1D array) - * 2. The current Tensor must be contiguous. - * 3. \p rhs must be a scalar. - */ - template - void append(const T &rhs) { - cytnx_error_msg(this->shape().size() != 1, - "[ERROR] trying to append a scalar into multidimentional Tensor is not " - "allow.\n Only rank-1 Tensor can accept scalar append.%s", - "\n"); - cytnx_error_msg(!this->is_contiguous(), - "[ERROR] append require the Tensor to be contiguous. suggestion: call " - "contiguous() or contiguous_() first.", - "\n"); - this->_impl->_shape[0] += 1; - this->_impl->_storage.append(rhs); - } - - /** - * @brief Check whether two tensors share the same internal memory. - * @details This function will check whether two tensors share the same - * internal memory. If the two tensors share the same internal memory, then - * the function will return true. Otherwise, it will return false. See user - * guide for more details. - * @param[in] rhs the tensor to be compared. - */ - bool same_data(const Tensor &rhs) const; - - // linalg: - /** - * @brief the SVD member function. Same as - * \ref cytnx::linalg::Svd(const Tensor &Tin, const bool &is_UvT) - * , where \p Tin is the current Tensor. - */ - std::vector Svd(const bool &is_UvT = true) const; - - /** - * @brief the Eigh member function. Same as - * \ref cytnx::linalg::Eigh(const Tensor &Tin, const bool &is_V, const bool &row_v) - * , where \p Tin is the current Tensor. - */ - std::vector Eigh(const bool &is_V = true, const bool &row_v = false) const; - - /** - * @brief the InvM_ member function. Same as - * \ref cytnx::linalg::InvM_(Tensor &Tin), where \p Tin is the current Tensor. - */ - Tensor &InvM_(); - - /** - * @brief the InvM member function. Same as - * \ref cytnx::linalg::InvM(const Tensor &Tin), where \p Tin is the current Tensor. - */ - Tensor InvM() const; - - /** - * @brief the Inv_ member function. Same as - * \ref cytnx::linalg::Inv_(Tensor &Tin, const double &clip) - */ - Tensor &Inv_(const double &clip); - - /** - * @brief the Inv member function. Same as - * \ref cytnx::linalg::Inv(const Tensor &Tin, const double &clip) - */ - Tensor Inv(const double &clip) const; - - /** - * @brief the Conj_ member function. Same as - * \ref cytnx::linalg::Conj_(Tensor &Tin), where \p Tin is the current Tensor. - */ - Tensor &Conj_(); - - /** - * @brief the Conj member function. Same as - * \ref cytnx::linalg::Conj(const Tensor &Tin), where \p Tin is the current Tensor. - */ - Tensor Conj() const; - - /** - * @brief the Exp_ member function. Same as linalg::Exp_(Tensor &Tin), where \p Tin is the - * current Tensor. - */ - Tensor &Exp_(); - - /** - * @brief the Exp member function. Same as linalg::Exp(const Tensor &Tin), where \p Tin is the - * current Tensor. - */ - Tensor Exp() const; - - /** - * @brief the Norm member function. Same as linalg::Norm(const Tensor &Tin), where \p Tin is - * the current Tensor. - */ - Tensor Norm() const; - - /** - * @brief the Pow member function. Same as linalg::Pow(const Tensor &Tin, const cytnx_double - * &p), where \p Tin is the current Tensor. - */ - Tensor Pow(const cytnx_double &p) const; - - /** - * @brief the Pow_ member function. Same as linalg::Pow_(Tensor &Tin, const cytnx_double - * &p), where \p Tin is the current Tensor. - */ - Tensor &Pow_(const cytnx_double &p); - - /** - * @brief the Trace member function. Same as linalg::Trace(const Tensor &Tin, const - * cytnx_uint64 &a, const cytnx_uint64 &b), where \p Tin is the current Tensor. - */ - Tensor Trace(const cytnx_uint64 &a = 0, const cytnx_uint64 &b = 1) const; - - /** - * @brief the Abs member function. Same as linalg::Abs(const Tensor &Tin), where \p Tin is the - * current Tensor. - */ - Tensor Abs() const; - - /** - * @brief the Abs_ member function. Same as linalg::Abs_(Tensor &Tin), where \p Tin is the - * current Tensor. - */ - Tensor &Abs_(); - - /** - * @brief the Max member function. Same as linalg::Max(const Tensor &Tin), - * where \p Tin is the current Tensor. - */ - Tensor Max() const; - - /** - * @brief the Min member function. Same as linalg::Min(const Tensor &Tin), - * where \p Tin is the current Tensor. - */ - Tensor Min() const; - - }; // class Tensor - - Tensor operator+(const Tensor &lhs, const Tensor::Tproxy &rhs); - Tensor operator-(const Tensor &lhs, const Tensor::Tproxy &rhs); - Tensor operator*(const Tensor &lhs, const Tensor::Tproxy &rhs); - Tensor operator/(const Tensor &lhs, const Tensor::Tproxy &rhs); - - Tensor operator+(const Tensor &lhs, const Scalar::Sproxy &rhs); - Tensor operator-(const Tensor &lhs, const Scalar::Sproxy &rhs); - Tensor operator*(const Tensor &lhs, const Scalar::Sproxy &rhs); - Tensor operator/(const Tensor &lhs, const Scalar::Sproxy &rhs); - - ///@cond - std::ostream &operator<<(std::ostream &os, const Tensor &in); - std::ostream &operator<<(std::ostream &os, const Tensor::Tproxy &in); - ///@endcond - //{ os << Tensor(in);}; -} // namespace cytnx - -#endif diff --git a/include/UniTensor.hpp b/include/UniTensor.hpp index 08a94537..9fb3558b 100644 --- a/include/UniTensor.hpp +++ b/include/UniTensor.hpp @@ -480,8 +480,9 @@ namespace cytnx { std::vector _interface_block; // this is serves as interface for get_blocks_(); return this; } else { + // TODO: Do not allocate the memory twice. boost::intrusive_ptr out = this->clone(); - out->to_(device); + out->get_block_() = out->get_block_().to(device); return out; } } @@ -1155,8 +1156,11 @@ namespace cytnx { if (this->device() == device) { return this; } else { + // TODO: Do not allocate the memory twice. boost::intrusive_ptr out = this->clone(); - out->to_(device); + for (cytnx_uint64 i = 0; i < out->get_blocks_(true).size(); i++) { + out->get_blocks_(true)[i] = out->get_blocks_(true)[i].to(device); + } return out; } }; @@ -1891,15 +1895,6 @@ namespace cytnx { const int &device = Device.cpu, const bool &is_diag = false, const std::string &name = "") : _impl(new UniTensor_base()) { - #ifdef UNI_DEBUG - cytnx_warning_msg( - true, - "[DEBUG] message: entry for UniTensor(const std::vector &bonds, const " - "std::vector &in_labels={}, const cytnx_int64 &rowrank=-1, const unsigned " - "int " - "&dtype=Type.Double, const int &device = Device.cpu, const bool &is_diag=false)%s", - "\n"); - #endif this->Init(bonds, in_labels, rowrank, dtype, device, is_diag, name); } diff --git a/include/backend/Scalar.hpp b/include/backend/Scalar.hpp index 6f542e29..58b4f38b 100644 --- a/include/backend/Scalar.hpp +++ b/include/backend/Scalar.hpp @@ -2848,6 +2848,12 @@ namespace cytnx { } // casting + /// @brief The explicit casting operator of the Scalar class to cytnx::cytnx_complex128. + explicit operator cytnx_complex128() const { return this->_impl->to_cytnx_complex128(); } + + /// @brief The explicit casting operator of the Scalar class to cytnx::cytnx_complex64. + explicit operator cytnx_complex64() const { return this->_impl->to_cytnx_complex64(); } + /// @brief The explicit casting operator of the Scalar class to cytnx::cytnx_double. explicit operator cytnx_double() const { return this->_impl->to_cytnx_double(); } diff --git a/include/backend/Storage.hpp b/include/backend/Storage.hpp index f30883ef..db99b353 100644 --- a/include/backend/Storage.hpp +++ b/include/backend/Storage.hpp @@ -1,21 +1,31 @@ -#ifndef _H_Storage_ -#define _H_Storage_ +#ifndef BACKEND_STORAGE_H_ +#define BACKEND_STORAGE_H_ #ifndef BACKEND_TORCH - #include - #include + + #include + #include #include #include #include + #include #include + #include + #include + #include + #include #include + #include #include - #include - #include "Type.hpp" + #include "backend/Scalar.hpp" + #include "backend/utils_internal_gpu/cuAlloc_gpu.hpp" + #include "backend/utils_internal_gpu/cuComplexmem_gpu.hpp" + #include "backend/utils_internal_gpu/cuFill_gpu.hpp" + #include "backend/utils_internal_gpu/cuMovemem_gpu.hpp" + #include "cytnx_error.hpp" #include "Device.hpp" #include "intrusive_ptr_base.hpp" - #include "cytnx_error.hpp" - #include "backend/Scalar.hpp" + #include "Type.hpp" #define STORAGE_DEFT_SZ 2 @@ -46,7 +56,7 @@ namespace cytnx { std::string device_str() const; const unsigned long long &capacity() const { return this->cap; } const unsigned long long &size() const { return this->len; } - ~Storage_base(); + virtual ~Storage_base(); template T &at(const cytnx_uint64 &idx) const; @@ -172,6 +182,7 @@ namespace cytnx { const std::vector &mapper = {}); virtual void print_elems(); + // TODO: only for complex storage virtual boost::intrusive_ptr real(); virtual boost::intrusive_ptr imag(); @@ -221,829 +232,774 @@ namespace cytnx { // virtual bool approx_eq(const boost::intrusive_ptr &rhs, // const cytnx_double tol = 1e-8); }; - ///@endcond + ///@endcond - ///@cond - class FloatStorage : public Storage_base { + #ifdef UNI_GPU + template + class GpuAllocator { public: - FloatStorage() { this->dtype = Type.Float; }; - void Init(const unsigned long long &len_in, const int &device = -1, - const bool &init_zero = true); - void _Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device = -1, - const bool &iscap = false, const unsigned long long &cap_in = 0); - boost::intrusive_ptr _create_new_sametype(); - boost::intrusive_ptr clone(); - boost::intrusive_ptr Move_memory(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void to_(const int &device); - boost::intrusive_ptr to(const int &device); - void PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper = {}); - void print_elems(); - - boost::intrusive_ptr real(); - boost::intrusive_ptr imag(); + typedef T value_type; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; - // generators: - void fill(const cytnx_complex128 &val); - void fill(const cytnx_complex64 &val); - void fill(const cytnx_double &val); - void fill(const cytnx_float &val); - void fill(const cytnx_int64 &val); - void fill(const cytnx_uint64 &val); - void fill(const cytnx_int32 &val); - void fill(const cytnx_uint32 &val); - void fill(const cytnx_int16 &val); - void fill(const cytnx_uint16 &val); - void fill(const cytnx_bool &val); - void set_zeros(); - void resize(const cytnx_uint64 &newsize); - - void append(const Scalar &val); - void append(const cytnx_complex128 &val); - void append(const cytnx_complex64 &val); - void append(const cytnx_double &val); - void append(const cytnx_float &val); - void append(const cytnx_int64 &val); - void append(const cytnx_uint64 &val); - void append(const cytnx_int32 &val); - void append(const cytnx_uint32 &val); - void append(const cytnx_int16 &val); - void append(const cytnx_uint16 &val); - void append(const cytnx_bool &val); - Scalar get_item(const cytnx_uint64 &in) const; - - void set_item(const cytnx_uint64 &idx, const Scalar &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_double &val); - void set_item(const cytnx_uint64 &idx, const cytnx_float &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_bool &val); - - // bool approx_eq(const boost::intrusive_ptr &rhs, const cytnx_double tol = 1e-8); - }; - ///@endcond + [[nodiscard]] constexpr T *allocate(std::size_t n) { + return reinterpret_cast(utils_internal::cuMalloc_gpu(n * sizeof(T))); + } - ///@cond - class DoubleStorage : public Storage_base { - public: - DoubleStorage() { this->dtype = Type.Double; }; - void Init(const unsigned long long &len_in, const int &device = -1, - const bool &init_zero = true); - void _Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device = -1, - const bool &iscap = false, const unsigned long long &cap_in = 0); - boost::intrusive_ptr _create_new_sametype(); - boost::intrusive_ptr clone(); - boost::intrusive_ptr Move_memory(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void to_(const int &device); - boost::intrusive_ptr to(const int &device); - void PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper = {}); - void print_elems(); + constexpr void deallocate(T *p, [[maybe_unused]] std::size_t n) { + checkCudaErrors(cudaFree(p)); + } - boost::intrusive_ptr real(); - boost::intrusive_ptr imag(); + template + bool operator==(const Alloc &rhs) const noexcept { + return std::is_same_v, Alloc>; + } - // generators: - void fill(const cytnx_complex128 &val); - void fill(const cytnx_complex64 &val); - void fill(const cytnx_double &val); - void fill(const cytnx_float &val); - void fill(const cytnx_int64 &val); - void fill(const cytnx_uint64 &val); - void fill(const cytnx_int32 &val); - void fill(const cytnx_uint32 &val); - void fill(const cytnx_int16 &val); - void fill(const cytnx_uint16 &val); - void fill(const cytnx_bool &val); - void set_zeros(); - void resize(const cytnx_uint64 &newsize); - - void append(const Scalar &val); - void append(const cytnx_complex128 &val); - void append(const cytnx_complex64 &val); - void append(const cytnx_double &val); - void append(const cytnx_float &val); - void append(const cytnx_int64 &val); - void append(const cytnx_uint64 &val); - void append(const cytnx_int32 &val); - void append(const cytnx_uint32 &val); - void append(const cytnx_int16 &val); - void append(const cytnx_uint16 &val); - void append(const cytnx_bool &val); - Scalar get_item(const cytnx_uint64 &in) const; - - void set_item(const cytnx_uint64 &idx, const Scalar &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_double &val); - void set_item(const cytnx_uint64 &idx, const cytnx_float &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_bool &val); - - // bool approx_eq(const boost::intrusive_ptr &rhs, const cytnx_double tol = 1e-8); + template + bool operator!=(const Alloc &rhs) const noexcept { + return !operator==(rhs); + } + }; + #else + template + class GpuAllocator {}; + #endif // UNI_GPU + + template + struct TypePair { + using type = T; + static constexpr auto value = v; }; - ///@endcond - ///@cond - class ComplexDoubleStorage : public Storage_base { + template + struct FindTypePair {}; + template + struct FindTypePair + : std::conditional_t, B1, FindTypePair> {}; + + template + inline constexpr auto cytnx_type_id = FindTypePair< + T, TypePair, + TypePair, TypePair, + TypePair, TypePair, + TypePair, TypePair, + TypePair, TypePair, + TypePair, TypePair>::value; + + template > + class StorageImplementation : public Storage_base { public: - ComplexDoubleStorage() { this->dtype = Type.ComplexDouble; }; - void Init(const unsigned long long &len_in, const int &device = -1, - const bool &init_zero = true); - void _Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device = -1, - const bool &iscap = false, const unsigned long long &cap_in = 0); - boost::intrusive_ptr _create_new_sametype(); - boost::intrusive_ptr clone(); - boost::intrusive_ptr Move_memory(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void to_(const int &device); - boost::intrusive_ptr to(const int &device); - void PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper = {}); - void print_elems(); + typedef T value_type; + typedef Allocator allocator_type; + typedef std::vector storage_type; + typedef typename storage_type::size_type size_type; + typedef typename storage_type::iterator iterator; + typedef typename storage_type::const_iterator const_iterator; + + static_assert( + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v); + + StorageImplementation() { + // TODO: initialize it in Storage_base + dtype = cytnx_type_id; + } - boost::intrusive_ptr real(); - boost::intrusive_ptr imag(); + StorageImplementation(int device) : StorageImplementation() { + // TODO: initialize it in Storage_base + this->device = device; + } - // generators: - void fill(const cytnx_complex128 &val); - void fill(const cytnx_complex64 &val); - void fill(const cytnx_double &val); - void fill(const cytnx_float &val); - void fill(const cytnx_int64 &val); - void fill(const cytnx_uint64 &val); - void fill(const cytnx_int32 &val); - void fill(const cytnx_uint32 &val); - void fill(const cytnx_int16 &val); - void fill(const cytnx_uint16 &val); - void fill(const cytnx_bool &val); - void set_zeros(); - void resize(const cytnx_uint64 &newsize); - - void append(const Scalar &val); - void append(const cytnx_complex128 &val); - void append(const cytnx_complex64 &val); - void append(const cytnx_double &val); - void append(const cytnx_float &val); - void append(const cytnx_int64 &val); - void append(const cytnx_uint64 &val); - void append(const cytnx_int32 &val); - void append(const cytnx_uint32 &val); - void append(const cytnx_int16 &val); - void append(const cytnx_uint16 &val); - void append(const cytnx_bool &val); - Scalar get_item(const cytnx_uint64 &in) const; - - void set_item(const cytnx_uint64 &idx, const Scalar &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_double &val); - void set_item(const cytnx_uint64 &idx, const cytnx_float &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_bool &val); - - // bool approx_eq(const boost::intrusive_ptr &rhs, const cytnx_double tol = 1e-8); - }; - ///@endcond + StorageImplementation(unsigned long long len_in, int device = -1, bool init_zero = true) + : StorageImplementation(device) { + Init(len_in, device, init_zero); + } + StorageImplementation(const StorageImplementation &other) { + dtype = cytnx_type_id; + // To make sure `cap` equals storage_.capacity() and only allocates the memory once, we have + // to reserve and then assign. + cap = other.cap; + storage_.reserve(cap); + storage_ = other.storage_; + len = storage_.size(); + Mem = reinterpret_cast(storage_.data()); + device = other.device; + } - ///@cond - class ComplexFloatStorage : public Storage_base { - public: - ComplexFloatStorage() { this->dtype = Type.ComplexFloat; }; void Init(const unsigned long long &len_in, const int &device = -1, - const bool &init_zero = true); + const bool &init_zero = true) override { + len = len_in; + if (len % STORAGE_DEFT_SZ) { + cap = ((unsigned long long)((len) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; + } else { + cap = len; + } + cytnx_error_msg(((device == Device.cpu) == std::is_same_v, Allocator>), "%s", + "[ERROR] Cannot switch between CPU and GPU in Init()." + " device:%d this->device:%d", + device, this->device); + this->device = device; + if constexpr (std::is_same_v, Allocator>) { + checkCudaErrors(cudaSetDevice(device)); + } + storage_.reserve(cap); + storage_.resize(len); + if constexpr (!std::is_same_v) { + Mem = reinterpret_cast(storage_.data()); + } + }; + + // TODO: Remove this unsafe initializer. void _Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device = -1, - const bool &iscap = false, const unsigned long long &cap_in = 0); - boost::intrusive_ptr _create_new_sametype(); - boost::intrusive_ptr clone(); - boost::intrusive_ptr Move_memory(const std::vector &old_shape, + const bool &iscap = false, const unsigned long long &cap_in = 0) override { + cytnx_error_msg(cap_in < len_in, "%s", "[ERROR] _Init_by_ptr cannot have capacity < size."); + cytnx_error_msg(len_in < 1, "%s", "[ERROR] _Init_by_ptr cannot have len_in < 1."); + cytnx_error_msg(cap_in % STORAGE_DEFT_SZ != 0, + "[ERROR] _Init_by_ptr cannot have not %dx cap_in.", STORAGE_DEFT_SZ); + cytnx_error_msg(this->device != device, "[ERROR] Cannot change device by the raw pointer.", + STORAGE_DEFT_SZ); + cap = iscap ? cap_in : len_in; + len = len_in; + storage_.reserve(cap); + storage_.resize(len); + Mem = storage_.data(); + auto *source_ptr = reinterpret_cast(rawptr); + if constexpr (std::is_same_v, Allocator>) { + checkCudaErrors(cudaDeviceSynchronize()); + } + std::copy(source_ptr, source_ptr + len, storage_.begin()); + if constexpr (std::is_same_v, Allocator>) { + checkCudaErrors(cudaFree(rawptr)); + } else { + free(rawptr); + } + }; + + // will be removed + boost::intrusive_ptr _create_new_sametype() { + return boost::intrusive_ptr(new StorageImplementation(device)); + }; + // will be removed + boost::intrusive_ptr clone() override { + if constexpr (std::is_same_v, Allocator>) { + checkCudaErrors(cudaSetDevice(device)); + checkCudaErrors(cudaDeviceSynchronize()); + } + return boost::intrusive_ptr(new StorageImplementation(*this)); + }; + + boost::intrusive_ptr Movemem_cpu(boost::intrusive_ptr &in, + const std::vector &old_shape, const std::vector &mapper, - const std::vector &invmapper); + const std::vector &invmapper, + const bool is_inplace) { + auto old = boost::dynamic_pointer_cast(in); + std::vector offsets(old_shape.size()); + int current_offset = 1; + for (auto idx = mapper.rbegin(); idx != mapper.rend(); ++idx) { + offsets[*idx] = current_offset; + current_offset *= old_shape[*idx]; + } + int actual_size = current_offset; // TODO: get from old.size() + std::vector current_indices(old_shape.size()); + if (!is_inplace) { + auto *out = new StorageImplementation(actual_size, old->device, /* init_zero */ false); + int niddle = old_shape.size() - 1; + int position = 0; + for (int i = 0; i < actual_size - 1; ++i) { + out->storage_[position] = old->storage_[i]; + niddle = old_shape.size() - 1; + while (current_indices[niddle] == old_shape[niddle] - 1) { + position -= current_indices[niddle] * offsets[niddle]; + current_indices[niddle--] = 0; + } + current_indices[niddle] += 1; + position += offsets[niddle]; + } + out->storage_[actual_size - 1] = old->storage_[actual_size - 1]; + return boost::intrusive_ptr(out); + } + std::vector seen(actual_size); + std::vector original_offsets(old_shape.size()); + current_offset = 1; + original_offsets[old_shape.size() - 1] = current_offset; + for (size_t i = old_shape.size() - 1; i > 0; --i) { + current_offset *= old_shape[i]; + original_offsets[i - 1] = current_offset; + } + + for (int i = 0; i < seen.size(); ++i) { + int position = i; + while (!seen[i]) { + seen[i] = true; + int new_position = 0; + for (size_t j = 0; j < old_shape.size(); ++j) { + new_position += position / original_offsets[j] * offsets[j]; + position %= original_offsets[j]; + } + std::swap(old->storage_[i], old->storage_[new_position]); + position = new_position; + } + } + return in; + } + void Move_memory_(const std::vector &old_shape, const std::vector &mapper, - const std::vector &invmapper); - void to_(const int &device); - boost::intrusive_ptr to(const int &device); - void PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper = {}); - void print_elems(); + const std::vector &invmapper) override { + boost::intrusive_ptr tmp(this); + if constexpr (!std::is_same_v, allocator_type>) { // on CPU + Movemem_cpu(tmp, old_shape, mapper, invmapper, /* is_inplace */ true); + } else { + if constexpr (std::is_same_v) { + utils_internal::cuMovemem_gpu_cd(tmp, old_shape, mapper, invmapper, + /* is_inplace */ true); + } else if (std::is_same_v) { + utils_internal::cuMovemem_gpu_cf(tmp, old_shape, mapper, invmapper, + /* is_inplace */ true); + } else if (std::is_same_v) { + utils_internal::cuMovemem_gpu_d(tmp, old_shape, mapper, invmapper, + /* is_inplace */ true); + } else if (std::is_same_v) { + utils_internal::cuMovemem_gpu_f(tmp, old_shape, mapper, invmapper, + /* is_inplace */ true); + } else if (std::is_same_v) { + utils_internal::cuMovemem_gpu_i64(tmp, old_shape, mapper, invmapper, + /* is_inplace */ true); + } else if (std::is_same_v) { + utils_internal::cuMovemem_gpu_u64(tmp, old_shape, mapper, invmapper, + /* is_inplace */ true); + } else if (std::is_same_v) { + utils_internal::cuMovemem_gpu_i32(tmp, old_shape, mapper, invmapper, + /* is_inplace */ true); + } else if (std::is_same_v) { + utils_internal::cuMovemem_gpu_u32(tmp, old_shape, mapper, invmapper, + /* is_inplace */ true); + } else if (std::is_same_v) { + utils_internal::cuMovemem_gpu_i16(tmp, old_shape, mapper, invmapper, + /* is_inplace */ true); + } else if (std::is_same_v) { + utils_internal::cuMovemem_gpu_u16(tmp, old_shape, mapper, invmapper, + /* is_inplace */ true); + } else if (std::is_same_v) { // for cytnx_bool + utils_internal::cuMovemem_gpu_b(tmp, old_shape, mapper, invmapper, + /* is_inplace */ true); + } else { + assert(false); + } + } + }; - boost::intrusive_ptr real(); - boost::intrusive_ptr imag(); + boost::intrusive_ptr Move_memory( + const std::vector &old_shape, const std::vector &mapper, + const std::vector &invmapper) override { + boost::intrusive_ptr tmp(this); + if constexpr (!std::is_same_v, allocator_type>) { // on CPU + return Movemem_cpu(tmp, old_shape, mapper, invmapper, /* is_inplace */ false); + } else { + if constexpr (std::is_same_v) { + return utils_internal::cuMovemem_gpu_cd(tmp, old_shape, mapper, invmapper, + /* is_inplace */ false); + } else if (std::is_same_v) { + return utils_internal::cuMovemem_gpu_cf(tmp, old_shape, mapper, invmapper, + /* is_inplace */ false); + } else if (std::is_same_v) { + return utils_internal::cuMovemem_gpu_d(tmp, old_shape, mapper, invmapper, + /* is_inplace */ false); + } else if (std::is_same_v) { + return utils_internal::cuMovemem_gpu_f(tmp, old_shape, mapper, invmapper, + /* is_inplace */ false); + } else if (std::is_same_v) { + return utils_internal::cuMovemem_gpu_i64(tmp, old_shape, mapper, invmapper, + /* is_inplace */ false); + } else if (std::is_same_v) { + return utils_internal::cuMovemem_gpu_u64(tmp, old_shape, mapper, invmapper, + /* is_inplace */ false); + } else if (std::is_same_v) { + return utils_internal::cuMovemem_gpu_i32(tmp, old_shape, mapper, invmapper, + /* is_inplace */ false); + } else if (std::is_same_v) { + return utils_internal::cuMovemem_gpu_u32(tmp, old_shape, mapper, invmapper, + /* is_inplace */ false); + } else if (std::is_same_v) { + return utils_internal::cuMovemem_gpu_i16(tmp, old_shape, mapper, invmapper, + /* is_inplace */ false); + } else if (std::is_same_v) { + return utils_internal::cuMovemem_gpu_u16(tmp, old_shape, mapper, invmapper, + /* is_inplace */ false); + } else if (std::is_same_v) { // for cytnx_bool + return utils_internal::cuMovemem_gpu_b(tmp, old_shape, mapper, invmapper, + /* is_inplace */ false); + } else { + assert(false); + } + } + } - // generators: - void fill(const cytnx_complex128 &val); - void fill(const cytnx_complex64 &val); - void fill(const cytnx_double &val); - void fill(const cytnx_float &val); - void fill(const cytnx_int64 &val); - void fill(const cytnx_uint64 &val); - void fill(const cytnx_int32 &val); - void fill(const cytnx_uint32 &val); - void fill(const cytnx_int16 &val); - void fill(const cytnx_uint16 &val); - void fill(const cytnx_bool &val); - void set_zeros(); - void resize(const cytnx_uint64 &newsize); - - void append(const Scalar &val); - void append(const cytnx_complex128 &val); - void append(const cytnx_complex64 &val); - void append(const cytnx_double &val); - void append(const cytnx_float &val); - void append(const cytnx_int64 &val); - void append(const cytnx_uint64 &val); - void append(const cytnx_int32 &val); - void append(const cytnx_uint32 &val); - void append(const cytnx_int16 &val); - void append(const cytnx_uint16 &val); - void append(const cytnx_bool &val); - Scalar get_item(const cytnx_uint64 &in) const; - - void set_item(const cytnx_uint64 &idx, const Scalar &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_double &val); - void set_item(const cytnx_uint64 &idx, const cytnx_float &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_bool &val); - - // bool approx_eq(const boost::intrusive_ptr &rhs, const cytnx_double tol = 1e-8); - }; - ///@endcond + // will be removed + void to_(const int &device) override{}; + // will be removed + boost::intrusive_ptr to(const int &device) override { + #ifndef UNI_GPU + cytnx_error_msg(device != Device.cpu, + "[ERROR] Cannot transfer to GPU with the binary built without CUDA." + " device:%d", + device); + return nullptr; + #endif // UNI_GPU + bool to_cpu = device == Device.cpu; + bool this_is_on_cpu = !std::is_same_v, Allocator>; + if (to_cpu == this_is_on_cpu) { // also catches the case transferring between two GPUs + return this; + } + boost::intrusive_ptr dest; + if (device == Device.cpu) { + dest = new StorageImplementation(device); + } else { + dest = new StorageImplementation>(device); + } + dest->Init(storage_.size(), device, /* init_zero */ false); + if constexpr (std::is_same_v, Allocator>) { + checkCudaErrors(cudaSetDevice(this->device)); + checkCudaErrors(cudaDeviceSynchronize()); + } + std::memcpy(dest->data(), data(), storage_.size() * sizeof(value_type)); + return dest; + }; - ///@cond - class Int64Storage : public Storage_base { - public: - Int64Storage() { this->dtype = Type.Int64; }; - void Init(const unsigned long long &len_in, const int &device = -1, - const bool &init_zero = true); - void _Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device = -1, - const bool &iscap = false, const unsigned long long &cap_in = 0); - boost::intrusive_ptr _create_new_sametype(); - boost::intrusive_ptr clone(); - boost::intrusive_ptr Move_memory(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void to_(const int &device); - boost::intrusive_ptr to(const int &device); void PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper = {}); - void print_elems(); + const std::vector &mapper = {}) { + if constexpr (std::is_same_v, Allocator>) { + checkCudaErrors(cudaSetDevice(device)); + checkCudaErrors(cudaDeviceSynchronize()); + } + os << "\nTotal elem: " << size() << '\n'; + os << "type : " << Type.getname(dtype) << '\n'; + os << Device.getname(device) << '\n'; + os << "Shape :("; + if (!shape.empty()) { + os << shape[0]; + for (auto it = shape.begin() + 1; it != shape.end(); ++it) { + os << ", " << *it; + } + } + os << ")\n"; + if (storage_.empty()) { + os << "[]" << std::endl; + return; + } - boost::intrusive_ptr real(); - boost::intrusive_ptr imag(); + os << "["; + std::vector origin_to_now; + if (mapper.empty()) { + origin_to_now.reserve(shape.size()); + for (cytnx_uint64 i = 0; i < shape.size(); ++i) { + origin_to_now.push_back(i); + } + } else { + origin_to_now = mapper; + } + // The summation of all offsets[i] * current_indices[i] is the position of the number + // represetning by current_indices. + std::vector offsets(shape.size()); + int current_offset = 1; + offsets[origin_to_now[shape.size() - 1]] = current_offset; + for (size_t i = shape.size(); --i;) { + current_offset *= shape[origin_to_now[i]]; + offsets[origin_to_now[i - 1]] = current_offset; + } + std::vector current_indices(shape.size()); + int position = 0; + size_t niddle = 0; + for (const_iterator it = storage_.begin(); it < storage_.end() - 1; ++it) { + // output extra newline when finishing to output a matrix and a array + if (it != storage_.begin()) { + if (shape.size() - niddle > 2) { + os << std::string(2, '\n'); + } else if (shape.size() - niddle == 2) { + os << '\n'; + } + } + if (niddle < shape.size() - 1) { + os << std::string(niddle + 1, ' ') << std::string(shape.size() - niddle - 1, '['); + } else { + os << " "; // the space between numbers in the same row + } + os << storage_[position]; + niddle = shape.size() - 1; + while (current_indices[niddle] == shape[niddle] - 1) { + position -= current_indices[niddle] * offsets[niddle]; + current_indices[niddle--] = 0; + } + current_indices[niddle] += 1; + position += offsets[niddle]; + if (niddle < shape.size() - 1) { + os << std::string(shape.size() - niddle - 1, ']'); + } + } + if (shape.back() > 1) { + os << ' '; + } + os << storage_.back() << std::string(shape.size(), ']') << std::endl; + }; + void print_elems() override { + if constexpr (std::is_same_v, Allocator>) { + checkCudaErrors(cudaSetDevice(device)); + checkCudaErrors(cudaDeviceSynchronize()); + } + std::cout << "[ "; + for (const auto &value : storage_) { + std::cout << value << " "; + } + std::cout << " ]"; + }; - // generators: - void fill(const cytnx_complex128 &val); - void fill(const cytnx_complex64 &val); - void fill(const cytnx_double &val); - void fill(const cytnx_float &val); - void fill(const cytnx_int64 &val); - void fill(const cytnx_uint64 &val); - void fill(const cytnx_int32 &val); - void fill(const cytnx_uint32 &val); - void fill(const cytnx_int16 &val); - void fill(const cytnx_uint16 &val); - void fill(const cytnx_bool &val); - void set_zeros(); - void resize(const cytnx_uint64 &newsize); - - void append(const Scalar &val); - void append(const cytnx_complex128 &val); - void append(const cytnx_complex64 &val); - void append(const cytnx_double &val); - void append(const cytnx_float &val); - void append(const cytnx_int64 &val); - void append(const cytnx_uint64 &val); - void append(const cytnx_int32 &val); - void append(const cytnx_uint32 &val); - void append(const cytnx_int16 &val); - void append(const cytnx_uint16 &val); - void append(const cytnx_bool &val); - Scalar get_item(const cytnx_uint64 &in) const; - - void set_item(const cytnx_uint64 &idx, const Scalar &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_double &val); - void set_item(const cytnx_uint64 &idx, const cytnx_float &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_bool &val); - - // bool approx_eq(const boost::intrusive_ptr &rhs, const cytnx_double tol = 1e-8); - }; - ///@endcond + iterator begin() noexcept { return storage_.begin(); } + + const_iterator begin() const noexcept { return storage_.begin(); } + + // TODO: real() and imag() should be moved to Tensor + // TODO: The interface should be rewritten to prevent runtime type checking. + boost::intrusive_ptr real() override { + if constexpr (std::is_same_v || std::is_same_v) { + auto *out = new StorageImplementation< + typename T::value_type, + typename std::allocator_traits::template rebind_alloc>( + storage_.size(), device); + if constexpr (std::is_same_v, Allocator>) { + checkCudaErrors(cudaSetDevice(device)); + if constexpr (std::is_same_v) { + utils_internal::cuComplexmem_gpu_cdtd(out->data(), storage_.data(), storage_.size(), + /* get_real */ true); + } else { + utils_internal::cuComplexmem_gpu_cftf(out->data(), storage_.data(), storage_.size(), + /* get_real */ true); + } + } else { + std::transform( + storage_.begin(), storage_.end(), out->begin(), + [](const auto &c) -> auto{ return c.real(); }); + } + + return boost::intrusive_ptr(out); + } else { + cytnx_error_msg(true, "[ERROR] Storage.real() can only be called from complex type.%s", + "\n"); + } + }; - ///@cond - class Uint64Storage : public Storage_base { - public: - Uint64Storage() { this->dtype = Type.Uint64; }; - void Init(const unsigned long long &len_in, const int &device = -1, - const bool &init_zero = true); - void _Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device = -1, - const bool &iscap = false, const unsigned long long &cap_in = 0); - boost::intrusive_ptr _create_new_sametype(); - boost::intrusive_ptr clone(); - boost::intrusive_ptr Move_memory(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void to_(const int &device); - boost::intrusive_ptr to(const int &device); - void PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper = {}); - void print_elems(); + boost::intrusive_ptr imag() override { + if constexpr (std::is_same_v || std::is_same_v) { + auto *out = new StorageImplementation< + typename T::value_type, + typename std::allocator_traits::template rebind_alloc>( + storage_.size(), device); + if constexpr (std::is_same_v, Allocator>) { + checkCudaErrors(cudaSetDevice(device)); + if constexpr (std::is_same_v) { + utils_internal::cuComplexmem_gpu_cdtd(out->data(), storage_.data(), storage_.size(), + /* get_real */ false); + } else { + utils_internal::cuComplexmem_gpu_cftf(out->data(), storage_.data(), storage_.size(), + /* get_real */ false); + } + } else { + std::transform( + storage_.begin(), storage_.end(), out->begin(), + [](const auto &c) -> auto{ return c.imag(); }); + } + return boost::intrusive_ptr(out); + } else { + cytnx_error_msg(true, "[ERROR] Storage.imag() can only be called from complex type.%s", + "\n"); + } + }; - boost::intrusive_ptr real(); - boost::intrusive_ptr imag(); + void fill(const cytnx_complex128 &val) override { Fill(val); }; + void fill(const cytnx_complex64 &val) override { Fill(val); }; + void fill(const cytnx_double &val) override { Fill(val); }; + void fill(const cytnx_float &val) override { Fill(val); }; + void fill(const cytnx_int64 &val) override { Fill(val); }; + void fill(const cytnx_uint64 &val) override { Fill(val); }; + void fill(const cytnx_int32 &val) override { Fill(val); }; + void fill(const cytnx_uint32 &val) override { Fill(val); }; + void fill(const cytnx_int16 &val) override { Fill(val); }; + void fill(const cytnx_uint16 &val) override { Fill(val); }; + void fill(const cytnx_bool &val) override { Fill(val); }; + void set_zeros() override { Fill(0); }; + void resize(const cytnx_uint64 &newsize) override { + if constexpr (std::is_same_v, Allocator>) { + if (newsize > storage_.capacity()) { + checkCudaErrors(cudaSetDevice(device)); + checkCudaErrors(cudaDeviceSynchronize()); + } + } + storage_.resize(newsize); + len = newsize; + Mem = reinterpret_cast(storage_.data()); + }; - // generators: - void fill(const cytnx_complex128 &val); - void fill(const cytnx_complex64 &val); - void fill(const cytnx_double &val); - void fill(const cytnx_float &val); - void fill(const cytnx_int64 &val); - void fill(const cytnx_uint64 &val); - void fill(const cytnx_int32 &val); - void fill(const cytnx_uint32 &val); - void fill(const cytnx_int16 &val); - void fill(const cytnx_uint16 &val); - void fill(const cytnx_bool &val); - void set_zeros(); - void resize(const cytnx_uint64 &newsize); - - void append(const Scalar &val); - void append(const cytnx_complex128 &val); - void append(const cytnx_complex64 &val); - void append(const cytnx_double &val); - void append(const cytnx_float &val); - void append(const cytnx_int64 &val); - void append(const cytnx_uint64 &val); - void append(const cytnx_int32 &val); - void append(const cytnx_uint32 &val); - void append(const cytnx_int16 &val); - void append(const cytnx_uint16 &val); - void append(const cytnx_bool &val); - Scalar get_item(const cytnx_uint64 &in) const; - - void set_item(const cytnx_uint64 &idx, const Scalar &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_double &val); - void set_item(const cytnx_uint64 &idx, const cytnx_float &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_bool &val); - - // bool approx_eq(const boost::intrusive_ptr &rhs, const cytnx_double tol = 1e-8); - }; - ///@endcond - ///@cond - class Int32Storage : public Storage_base { - public: - Int32Storage() { this->dtype = Type.Int32; }; - void Init(const unsigned long long &len_in, const int &device = -1, - const bool &init_zero = true); - void _Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device = -1, - const bool &iscap = false, const unsigned long long &cap_in = 0); - boost::intrusive_ptr _create_new_sametype(); - boost::intrusive_ptr clone(); - boost::intrusive_ptr Move_memory(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void to_(const int &device); - boost::intrusive_ptr to(const int &device); - void PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper = {}); - void print_elems(); + void append(const Scalar &val) override { Append(val); } + void append(const cytnx_complex128 &val) override { Append(val); } + void append(const cytnx_complex64 &val) override { Append(val); } + void append(const cytnx_double &val) override { Append(val); } + void append(const cytnx_float &val) override { Append(val); } + void append(const cytnx_int64 &val) override { Append(val); } + void append(const cytnx_uint64 &val) override { Append(val); } + void append(const cytnx_int32 &val) override { Append(val); } + void append(const cytnx_uint32 &val) override { Append(val); } + void append(const cytnx_int16 &val) override { Append(val); } + void append(const cytnx_uint16 &val) override { Append(val); } + void append(const cytnx_bool &val) override { Append(val); } + Scalar get_item(const cytnx_uint64 &in) const override { + if constexpr (std::is_same_v, Allocator>) { + checkCudaErrors(cudaSetDevice(device)); + checkCudaErrors(cudaDeviceSynchronize()); + } + return storage_[in]; + }; - boost::intrusive_ptr real(); - boost::intrusive_ptr imag(); + void set_item(const cytnx_uint64 &idx, const Scalar &val) { SetItem(idx, val); } + void set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val) { SetItem(idx, val); } + void set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val) { SetItem(idx, val); } + void set_item(const cytnx_uint64 &idx, const cytnx_double &val) { SetItem(idx, val); } + void set_item(const cytnx_uint64 &idx, const cytnx_float &val) { SetItem(idx, val); } + void set_item(const cytnx_uint64 &idx, const cytnx_int64 &val) { SetItem(idx, val); } + void set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val) { SetItem(idx, val); } + void set_item(const cytnx_uint64 &idx, const cytnx_int32 &val) { SetItem(idx, val); } + void set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val) { SetItem(idx, val); } + void set_item(const cytnx_uint64 &idx, const cytnx_int16 &val) { SetItem(idx, val); } + void set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val) { SetItem(idx, val); } + void set_item(const cytnx_uint64 &idx, const cytnx_bool &val) { SetItem(idx, val); } - // generators: - void fill(const cytnx_complex128 &val); - void fill(const cytnx_complex64 &val); - void fill(const cytnx_double &val); - void fill(const cytnx_float &val); - void fill(const cytnx_int64 &val); - void fill(const cytnx_uint64 &val); - void fill(const cytnx_int32 &val); - void fill(const cytnx_uint32 &val); - void fill(const cytnx_int16 &val); - void fill(const cytnx_uint16 &val); - void fill(const cytnx_bool &val); - void set_zeros(); - void resize(const cytnx_uint64 &newsize); - void append(const Scalar &val); - void append(const cytnx_complex128 &val); - void append(const cytnx_complex64 &val); - void append(const cytnx_double &val); - void append(const cytnx_float &val); - void append(const cytnx_int64 &val); - void append(const cytnx_uint64 &val); - void append(const cytnx_int32 &val); - void append(const cytnx_uint32 &val); - void append(const cytnx_int16 &val); - void append(const cytnx_uint16 &val); - void append(const cytnx_bool &val); - Scalar get_item(const cytnx_uint64 &in) const; - - void set_item(const cytnx_uint64 &idx, const Scalar &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_double &val); - void set_item(const cytnx_uint64 &idx, const cytnx_float &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_bool &val); - - // bool approx_eq(const boost::intrusive_ptr &rhs, const cytnx_double tol = 1e-8); - }; - ///@endcond + private: + template + void Fill(U value) { + if constexpr (std::is_constructible_v) { + if constexpr (std::is_same_v, Allocator>) { + checkCudaErrors(cudaSetDevice(device)); + value_type converted_value{value}; + if constexpr (std::is_same_v) { + utils_internal::cuFill_gpu_cd(Mem, (void *)(&converted_value), len); + } else if (std::is_same_v) { + utils_internal::cuFill_gpu_cf(Mem, (void *)(&converted_value), len); + } else if (std::is_same_v) { + utils_internal::cuFill_gpu_d(Mem, (void *)(&converted_value), len); + } else if (std::is_same_v) { + utils_internal::cuFill_gpu_f(Mem, (void *)(&converted_value), len); + } else if (std::is_same_v) { + utils_internal::cuFill_gpu_i64(Mem, (void *)(&converted_value), len); + } else if (std::is_same_v) { + utils_internal::cuFill_gpu_u64(Mem, (void *)(&converted_value), len); + } else if (std::is_same_v) { + utils_internal::cuFill_gpu_i32(Mem, (void *)(&converted_value), len); + } else if (std::is_same_v) { + utils_internal::cuFill_gpu_u32(Mem, (void *)(&converted_value), len); + } else if (std::is_same_v) { + utils_internal::cuFill_gpu_i16(Mem, (void *)(&converted_value), len); + } else if (std::is_same_v) { + utils_internal::cuFill_gpu_u16(Mem, (void *)(&converted_value), len); + } else { + assert(false); + } + checkCudaErrors(cudaDeviceSynchronize()); + } else { + std::fill(storage_.begin(), storage_.end(), static_cast(value)); + } + } else { + cytnx_error_msg(true, "%s", "[ERROR] Failed to convert the value."); + } + } - ///@cond - class Uint32Storage : public Storage_base { - public: - Uint32Storage() { this->dtype = Type.Uint32; }; - void Init(const unsigned long long &len_in, const int &device = -1, - const bool &init_zero = true); - void _Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device = -1, - const bool &iscap = false, const unsigned long long &cap_in = 0); - boost::intrusive_ptr _create_new_sametype(); - boost::intrusive_ptr clone(); - boost::intrusive_ptr Move_memory(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void to_(const int &device); - boost::intrusive_ptr to(const int &device); - void PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper = {}); - void print_elems(); + template + void Append(U value) { + if constexpr (std::is_constructible_v) { + if constexpr (std::is_same_v, Allocator>) { + // `push_back` may cause reallocation on the CPU side, so we have to wait GPU finishing + // their jobs. + checkCudaErrors(cudaSetDevice(device)); + checkCudaErrors(cudaDeviceSynchronize()); + } + storage_.push_back(static_cast(value)); + ++len; + Mem = reinterpret_cast(storage_.data()); + } else { + cytnx_error_msg(true, "%s", "[ERROR] Failed to convert the value."); + } + } - boost::intrusive_ptr real(); - boost::intrusive_ptr imag(); + template + void SetItem(size_type idx, U value) { + if constexpr (std::is_constructible_v) { + storage_[idx] = static_cast(value); + } else { + cytnx_error_msg(true, "%s", "[ERROR] Failed to convert the value."); + } + } - // generators: - void fill(const cytnx_complex128 &val); - void fill(const cytnx_complex64 &val); - void fill(const cytnx_double &val); - void fill(const cytnx_float &val); - void fill(const cytnx_int64 &val); - void fill(const cytnx_uint64 &val); - void fill(const cytnx_int32 &val); - void fill(const cytnx_uint32 &val); - void fill(const cytnx_int16 &val); - void fill(const cytnx_uint16 &val); - void fill(const cytnx_bool &val); - void set_zeros(); - void resize(const cytnx_uint64 &newsize); - void append(const Scalar &val); - void append(const cytnx_complex128 &val); - void append(const cytnx_complex64 &val); - void append(const cytnx_double &val); - void append(const cytnx_float &val); - void append(const cytnx_int64 &val); - void append(const cytnx_uint64 &val); - void append(const cytnx_int32 &val); - void append(const cytnx_uint32 &val); - void append(const cytnx_int16 &val); - void append(const cytnx_uint16 &val); - void append(const cytnx_bool &val); - Scalar get_item(const cytnx_uint64 &in) const; - - void set_item(const cytnx_uint64 &idx, const Scalar &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_double &val); - void set_item(const cytnx_uint64 &idx, const cytnx_float &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_bool &val); - - // bool approx_eq(const boost::intrusive_ptr &rhs, const cytnx_double tol = 1e-8); + storage_type storage_; }; - ///@endcond - ///@cond - class Uint16Storage : public Storage_base { - public: - Uint16Storage() { this->dtype = Type.Uint16; }; - void Init(const unsigned long long &len_in, const int &device = -1, - const bool &init_zero = true); - void _Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device = -1, - const bool &iscap = false, const unsigned long long &cap_in = 0); - boost::intrusive_ptr _create_new_sametype(); - boost::intrusive_ptr clone(); - boost::intrusive_ptr Move_memory(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void to_(const int &device); - boost::intrusive_ptr to(const int &device); - void PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper = {}); - void print_elems(); + template <> + Scalar StorageImplementation::get_item(const cytnx_uint64 &in) const; - boost::intrusive_ptr real(); - boost::intrusive_ptr imag(); + template <> + Scalar StorageImplementation>::get_item(const cytnx_uint64 &in) const; - // generators: - void fill(const cytnx_complex128 &val); - void fill(const cytnx_complex64 &val); - void fill(const cytnx_double &val); - void fill(const cytnx_float &val); - void fill(const cytnx_int64 &val); - void fill(const cytnx_uint64 &val); - void fill(const cytnx_int32 &val); - void fill(const cytnx_uint32 &val); - void fill(const cytnx_int16 &val); - void fill(const cytnx_uint16 &val); - void fill(const cytnx_bool &val); - void set_zeros(); - void resize(const cytnx_uint64 &newsize); - - void append(const Scalar &val); - void append(const cytnx_complex128 &val); - void append(const cytnx_complex64 &val); - void append(const cytnx_double &val); - void append(const cytnx_float &val); - void append(const cytnx_int64 &val); - void append(const cytnx_uint64 &val); - void append(const cytnx_int32 &val); - void append(const cytnx_uint32 &val); - void append(const cytnx_int16 &val); - void append(const cytnx_uint16 &val); - void append(const cytnx_bool &val); - Scalar get_item(const cytnx_uint64 &in) const; - - void set_item(const cytnx_uint64 &idx, const Scalar &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_double &val); - void set_item(const cytnx_uint64 &idx, const cytnx_float &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_bool &val); - - // bool approx_eq(const boost::intrusive_ptr &rhs, const cytnx_double tol = 1e-8); - }; - ///@endcond + template <> + template + void StorageImplementation::Fill(U value) { + if constexpr (std::is_constructible_v) { + std::fill(storage_.begin(), storage_.end(), + static_cast(static_cast(value))); + } else { + cytnx_error_msg(true, "%s", "[ERROR] Failed to convert the value."); + } + } - ///@cond - class Int16Storage : public Storage_base { - public: - Int16Storage() { this->dtype = Type.Int16; }; - void Init(const unsigned long long &len_in, const int &device = -1, - const bool &init_zero = true); - void _Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device = -1, - const bool &iscap = false, const unsigned long long &cap_in = 0); - boost::intrusive_ptr _create_new_sametype(); - boost::intrusive_ptr clone(); - boost::intrusive_ptr Move_memory(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void to_(const int &device); - boost::intrusive_ptr to(const int &device); - void PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper = {}); - void print_elems(); + template <> + template + void StorageImplementation>::Fill(U value) { + if constexpr (std::is_constructible_v) { + checkCudaErrors(cudaSetDevice(device)); + bool converted_value{value}; + utils_internal::cuFill_gpu_b(this->Mem, (void *)(&converted_value), this->len); + } else { + cytnx_error_msg(true, "%s", "[ERROR] Failed to convert the value."); + } + } - boost::intrusive_ptr real(); - boost::intrusive_ptr imag(); + template <> + template + void StorageImplementation::Append(U value) { + if constexpr (std::is_constructible_v) { + storage_.push_back(static_cast(static_cast(value))); + ++len; + Mem = reinterpret_cast(storage_.data()); + } else { + cytnx_error_msg(true, "%s", "[ERROR] Failed to convert the value."); + } + } - // generators: - void fill(const cytnx_complex128 &val); - void fill(const cytnx_complex64 &val); - void fill(const cytnx_double &val); - void fill(const cytnx_float &val); - void fill(const cytnx_int64 &val); - void fill(const cytnx_uint64 &val); - void fill(const cytnx_int32 &val); - void fill(const cytnx_uint32 &val); - void fill(const cytnx_int16 &val); - void fill(const cytnx_uint16 &val); - void fill(const cytnx_bool &val); - void set_zeros(); - void resize(const cytnx_uint64 &newsize); - void append(const Scalar &val); - void append(const cytnx_complex128 &val); - void append(const cytnx_complex64 &val); - void append(const cytnx_double &val); - void append(const cytnx_float &val); - void append(const cytnx_int64 &val); - void append(const cytnx_uint64 &val); - void append(const cytnx_int32 &val); - void append(const cytnx_uint32 &val); - void append(const cytnx_int16 &val); - void append(const cytnx_uint16 &val); - void append(const cytnx_bool &val); - Scalar get_item(const cytnx_uint64 &in) const; - - void set_item(const cytnx_uint64 &idx, const Scalar &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_double &val); - void set_item(const cytnx_uint64 &idx, const cytnx_float &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_bool &val); - - // bool approx_eq(const boost::intrusive_ptr &rhs, const cytnx_double tol = 1e-8); - }; - ///@endcond + template <> + template + void StorageImplementation>::Append(U value) { + if constexpr (std::is_constructible_v) { + // `push_back` may cause reallocation on the CPU side, so we have to wait GPU finishing their + // jobs. + checkCudaErrors(cudaSetDevice(device)); + checkCudaErrors(cudaDeviceSynchronize()); + storage_.push_back(static_cast(static_cast(value))); + ++len; + Mem = reinterpret_cast(storage_.data()); + } else { + cytnx_error_msg(true, "%s", "[ERROR] Failed to convert the value."); + } + } - ///@cond - class BoolStorage : public Storage_base { - public: - BoolStorage() { this->dtype = Type.Bool; }; - void Init(const unsigned long long &len_in, const int &device = -1, - const bool &init_zero = true); - void _Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device = -1, - const bool &iscap = false, const unsigned long long &cap_in = 0); - boost::intrusive_ptr _create_new_sametype(); - boost::intrusive_ptr clone(); - boost::intrusive_ptr Move_memory(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper); - void to_(const int &device); - boost::intrusive_ptr to(const int &device); - void PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper = {}); - void print_elems(); + template <> + template + void StorageImplementation::SetItem(size_type idx, U value) { + if constexpr (std::is_constructible_v) { + storage_[idx] = static_cast(static_cast(value)); + } else { + cytnx_error_msg(true, "%s", "[ERROR] Failed to convert the value."); + } + } - boost::intrusive_ptr real(); - boost::intrusive_ptr imag(); + template <> + template + void StorageImplementation>::SetItem(size_type idx, U value) { + if constexpr (std::is_constructible_v) { + storage_[idx] = static_cast(static_cast(value)); + } else { + cytnx_error_msg(true, "%s", "[ERROR] Failed to convert the value."); + } + } - // generators: - void fill(const cytnx_complex128 &val); - void fill(const cytnx_complex64 &val); - void fill(const cytnx_double &val); - void fill(const cytnx_float &val); - void fill(const cytnx_int64 &val); - void fill(const cytnx_uint64 &val); - void fill(const cytnx_int32 &val); - void fill(const cytnx_uint32 &val); - void fill(const cytnx_int16 &val); - void fill(const cytnx_uint16 &val); - void fill(const cytnx_bool &val); - void set_zeros(); - void resize(const cytnx_uint64 &newsize); - void append(const Scalar &val); - void append(const cytnx_complex128 &val); - void append(const cytnx_complex64 &val); - void append(const cytnx_double &val); - void append(const cytnx_float &val); - void append(const cytnx_int64 &val); - void append(const cytnx_uint64 &val); - void append(const cytnx_int32 &val); - void append(const cytnx_uint32 &val); - void append(const cytnx_int16 &val); - void append(const cytnx_uint16 &val); - void append(const cytnx_bool &val); - Scalar get_item(const cytnx_uint64 &in) const; - - void set_item(const cytnx_uint64 &idx, const Scalar &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_double &val); - void set_item(const cytnx_uint64 &idx, const cytnx_float &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_int16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val); - void set_item(const cytnx_uint64 &idx, const cytnx_bool &val); - - // bool approx_eq(const boost::intrusive_ptr &rhs, const cytnx_double tol = 1e-8); - }; - ///@endcond + using ComplexDoubleStorage = StorageImplementation; + using ComplexFloatStorage = StorageImplementation; + using DoubleStorage = StorageImplementation; + using FloatStorage = StorageImplementation; + using Int64Storage = StorageImplementation; + using Uint64Storage = StorageImplementation; + using Int32Storage = StorageImplementation; + using Uint32Storage = StorageImplementation; + using Int16Storage = StorageImplementation; + using Uint16Storage = StorageImplementation; + // Current implementation depends on std::vector::data(), which is deleted for T = bool. + // We store data in char and specilize member functions if needed. + using BoolStorage = StorageImplementation; + + #ifdef UNI_GPU + using ComplexDoubleGpuStorage = + StorageImplementation>; + using ComplexFloatGpuStorage = + StorageImplementation>; + using DoubleGpuStorage = StorageImplementation>; + using FloatGpuStorage = StorageImplementation>; + using Int64GpuStorage = StorageImplementation>; + using Uint64GpuStorage = StorageImplementation>; + using Int32GpuStorage = StorageImplementation>; + using Uint32GpuStorage = StorageImplementation>; + using Int16GpuStorage = StorageImplementation>; + using Uint16GpuStorage = StorageImplementation>; + // Current implementation depends on std::vector::data(), which is deleted for T = bool. + // We store data in char and specilize member functions if needed. + using BoolGpuStorage = StorageImplementation>; + #endif // UNI_GPU ///@cond - typedef boost::intrusive_ptr (*pStorage_init)(); - inline boost::intrusive_ptr SIInit_cd() { - boost::intrusive_ptr out(new ComplexDoubleStorage()); - return out; + typedef boost::intrusive_ptr (*pStorage_init)(int device); + inline boost::intrusive_ptr SIInit_cd(int device) { + return device == Device.cpu + ? boost::intrusive_ptr(new ComplexDoubleStorage(device)) + : boost::intrusive_ptr(new ComplexDoubleGpuStorage(device)); } - inline boost::intrusive_ptr SIInit_cf() { - boost::intrusive_ptr out(new ComplexFloatStorage()); - return out; + inline boost::intrusive_ptr SIInit_cf(int device) { + return device == Device.cpu + ? boost::intrusive_ptr(new ComplexFloatStorage(device)) + : boost::intrusive_ptr(new ComplexFloatGpuStorage(device)); } - inline boost::intrusive_ptr SIInit_d() { - boost::intrusive_ptr out(new DoubleStorage()); - return out; + inline boost::intrusive_ptr SIInit_d(int device) { + return device == Device.cpu ? boost::intrusive_ptr(new DoubleStorage(device)) + : boost::intrusive_ptr(new DoubleGpuStorage(device)); } - inline boost::intrusive_ptr SIInit_f() { - boost::intrusive_ptr out(new FloatStorage()); - return out; + inline boost::intrusive_ptr SIInit_f(int device) { + return device == Device.cpu ? boost::intrusive_ptr(new FloatStorage(device)) + : boost::intrusive_ptr(new FloatGpuStorage(device)); } - inline boost::intrusive_ptr SIInit_u64() { - boost::intrusive_ptr out(new Uint64Storage()); - return out; + inline boost::intrusive_ptr SIInit_u64(int device) { + return device == Device.cpu ? boost::intrusive_ptr(new Uint64Storage(device)) + : boost::intrusive_ptr(new Uint64GpuStorage(device)); } - inline boost::intrusive_ptr SIInit_i64() { - boost::intrusive_ptr out(new Int64Storage()); - return out; + inline boost::intrusive_ptr SIInit_i64(int device) { + return device == Device.cpu ? boost::intrusive_ptr(new Int64Storage(device)) + : boost::intrusive_ptr(new Int64GpuStorage(device)); } - inline boost::intrusive_ptr SIInit_u32() { - boost::intrusive_ptr out(new Uint32Storage()); - return out; + inline boost::intrusive_ptr SIInit_u32(int device) { + return device == Device.cpu ? boost::intrusive_ptr(new Uint32Storage(device)) + : boost::intrusive_ptr(new Uint32GpuStorage(device)); } - inline boost::intrusive_ptr SIInit_i32() { - boost::intrusive_ptr out(new Int32Storage()); - return out; + inline boost::intrusive_ptr SIInit_i32(int device) { + return device == Device.cpu ? boost::intrusive_ptr(new Int32Storage(device)) + : boost::intrusive_ptr(new Int32GpuStorage(device)); } - inline boost::intrusive_ptr SIInit_u16() { - boost::intrusive_ptr out(new Uint16Storage()); - return out; + inline boost::intrusive_ptr SIInit_u16(int device) { + return device == Device.cpu ? boost::intrusive_ptr(new Uint16Storage(device)) + : boost::intrusive_ptr(new Uint16GpuStorage(device)); } - inline boost::intrusive_ptr SIInit_i16() { - boost::intrusive_ptr out(new Int16Storage()); - return out; + inline boost::intrusive_ptr SIInit_i16(int device) { + return device == Device.cpu ? boost::intrusive_ptr(new Int16Storage(device)) + : boost::intrusive_ptr(new Int16GpuStorage(device)); } - inline boost::intrusive_ptr SIInit_b() { - boost::intrusive_ptr out(new BoolStorage()); - return out; + inline boost::intrusive_ptr SIInit_b(int device) { + return device == Device.cpu ? boost::intrusive_ptr(new BoolStorage(device)) + : boost::intrusive_ptr(new BoolGpuStorage(device)); } ///@endcond ///@cond @@ -1104,7 +1060,7 @@ namespace cytnx { void Init(const unsigned long long &size, const unsigned int &dtype = Type.Double, int device = -1, const bool &init_zero = true) { cytnx_error_msg(dtype >= N_Type, "%s", "[ERROR] invalid argument: dtype"); - this->_impl = __SII.USIInit[dtype](); + this->_impl = __SII.USIInit[dtype](device); this->_impl->Init(size, device, init_zero); } // void _Init_byptr(void *rawptr, const unsigned long long &len_in, const unsigned int &dtype = @@ -1125,8 +1081,7 @@ namespace cytnx { * &init_zero) */ Storage(const unsigned long long &size, const unsigned int &dtype = Type.Double, - int device = -1, const bool &init_zero = true) - : _impl(new Storage_base()) { + int device = -1, const bool &init_zero = true) { Init(size, dtype, device, init_zero); } // Storage(void *rawptr, const unsigned long long &len_in, const unsigned int &dtype = @@ -1502,57 +1457,57 @@ namespace cytnx { } void _from_vector(const std::vector &vin, const int device = -1) { - this->_impl = __SII.USIInit[Type.ComplexDouble](); + this->_impl = __SII.USIInit[Type.ComplexDouble](device); this->_impl->Init(vin.size(), device); memcpy(this->_impl->Mem, &vin[0], sizeof(cytnx_complex128) * vin.size()); } void _from_vector(const std::vector &vin, const int device = -1) { - this->_impl = __SII.USIInit[Type.ComplexFloat](); + this->_impl = __SII.USIInit[Type.ComplexFloat](device); this->_impl->Init(vin.size(), device); memcpy(this->_impl->Mem, &vin[0], sizeof(cytnx_complex64) * vin.size()); } void _from_vector(const std::vector &vin, const int device = -1) { - this->_impl = __SII.USIInit[Type.Double](); + this->_impl = __SII.USIInit[Type.Double](device); this->_impl->Init(vin.size(), device); memcpy(this->_impl->Mem, &vin[0], sizeof(cytnx_double) * vin.size()); } void _from_vector(const std::vector &vin, const int device = -1) { - this->_impl = __SII.USIInit[Type.Float](); + this->_impl = __SII.USIInit[Type.Float](device); this->_impl->Init(vin.size(), device); memcpy(this->_impl->Mem, &vin[0], sizeof(cytnx_float) * vin.size()); } void _from_vector(const std::vector &vin, const int device = -1) { - this->_impl = __SII.USIInit[Type.Uint64](); + this->_impl = __SII.USIInit[Type.Uint64](device); this->_impl->Init(vin.size(), device); memcpy(this->_impl->Mem, &vin[0], sizeof(cytnx_uint64) * vin.size()); } void _from_vector(const std::vector &vin, const int device = -1) { - this->_impl = __SII.USIInit[Type.Int64](); + this->_impl = __SII.USIInit[Type.Int64](device); this->_impl->Init(vin.size(), device); memcpy(this->_impl->Mem, &vin[0], sizeof(cytnx_int64) * vin.size()); } void _from_vector(const std::vector &vin, const int device = -1) { - this->_impl = __SII.USIInit[Type.Uint32](); + this->_impl = __SII.USIInit[Type.Uint32](device); this->_impl->Init(vin.size(), device); memcpy(this->_impl->Mem, &vin[0], sizeof(cytnx_uint32) * vin.size()); } void _from_vector(const std::vector &vin, const int device = -1) { - this->_impl = __SII.USIInit[Type.Int32](); + this->_impl = __SII.USIInit[Type.Int32](device); this->_impl->Init(vin.size(), device); memcpy(this->_impl->Mem, &vin[0], sizeof(cytnx_int32) * vin.size()); } void _from_vector(const std::vector &vin, const int device = -1) { - this->_impl = __SII.USIInit[Type.Uint16](); + this->_impl = __SII.USIInit[Type.Uint16](device); this->_impl->Init(vin.size(), device); memcpy(this->_impl->Mem, &vin[0], sizeof(cytnx_uint16) * vin.size()); } void _from_vector(const std::vector &vin, const int device = -1) { - this->_impl = __SII.USIInit[Type.Int16](); + this->_impl = __SII.USIInit[Type.Int16](device); this->_impl->Init(vin.size(), device); memcpy(this->_impl->Mem, &vin[0], sizeof(cytnx_int16) * vin.size()); } void _from_vector(const std::vector &vin, const int device = -1) { - this->_impl = __SII.USIInit[Type.Bool](); + this->_impl = __SII.USIInit[Type.Bool](device); this->_impl->Init(vin.size(), device); this->_impl->_cpy_bool(this->_impl->Mem, vin); // memcpy(this->_impl->Mem,vin.data(),sizeof(cytnx_bool)*vin.size()); @@ -1631,5 +1586,5 @@ namespace cytnx { } // namespace cytnx -#endif -#endif +#endif // BACKEND_TORCH +#endif // BACKEND_STORAGE_H_ diff --git a/include/linalg.hpp.old b/include/linalg.hpp.old deleted file mode 100644 index 7697650c..00000000 --- a/include/linalg.hpp.old +++ /dev/null @@ -1,1066 +0,0 @@ -#ifndef _linalg_H_ -#define _linalg_H_ - -#include "Type.hpp" -#include "cytnx_error.hpp" -#include "Tensor.hpp" -#include "backend/Storage.hpp" -#include "UniTensor.hpp" -#include "Scalar.hpp" -#include "LinOp.hpp" -#include - -namespace cytnx { - // class Tensor; //fwd - // class UniTensor; //fwd - // class LinOp; //fwd - - /** - @namespace cytnx::linalg - @brief linear algebra related functions. - */ - namespace linalg { - - // Add: - //================================================== - /** - @brief element-wise add - */ - cytnx::UniTensor Add(const cytnx::UniTensor &Lt, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor Add(const T &lc, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor Add(const cytnx::UniTensor &Lt, const T &rc); - - // Sub: - //================================================== - /** - @brief element-wise subtract - */ - cytnx::UniTensor Sub(const cytnx::UniTensor &Lt, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor Sub(const T &lc, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor Sub(const cytnx::UniTensor &Lt, const T &rc); - - // Mul: - //================================================== - /** - @brief element-wise subtract - */ - cytnx::UniTensor Mul(const cytnx::UniTensor &Lt, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor Mul(const T &lc, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor Mul(const cytnx::UniTensor &Lt, const T &rc); - - // Div: - //================================================== - /** - @brief element-wise divide - */ - cytnx::UniTensor Div(const cytnx::UniTensor &Lt, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor Div(const T &lc, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor Div(const cytnx::UniTensor &Lt, const T &rc); - - // Mod: - //================================================== - /** - @brief element-wise modulo - */ - cytnx::UniTensor Mod(const cytnx::UniTensor &Lt, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor Mod(const T &lc, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor Mod(const cytnx::UniTensor &Lt, const T &rc); - - std::vector Svd(const cytnx::UniTensor &Tin, const bool &is_U = true, - const bool &is_vT = true); - std::vector Svd_truncate(const cytnx::UniTensor &Tin, - const cytnx_uint64 &keepdim, const double &err = 0, - const bool &is_U = true, const bool &is_vT = true, - const bool &return_err = false); - std::vector Hosvd( - const cytnx::UniTensor &Tin, const std::vector &mode, - const bool &is_core = true, const bool &is_Ls = false, - const std::vector &trucate_dim = std::vector()); - - template - cytnx::UniTensor ExpH(const cytnx::UniTensor &Tin, const T &a, const T &b = 0); - template - cytnx::UniTensor ExpM(const cytnx::UniTensor &Tin, const T &a, const T &b = 0); - - cytnx::UniTensor ExpH(const cytnx::UniTensor &Tin); - cytnx::UniTensor ExpM(const cytnx::UniTensor &Tin); - - - cytnx::UniTensor Trace(const cytnx::UniTensor &Tin, const cytnx_int64 &a = 0, - const cytnx_int64 &b = 1); - cytnx::UniTensor Trace(const cytnx::UniTensor &Tin, const std::string &a, const std::string &b); - cytnx::UniTensor Trace(const cytnx::UniTensor &Tin, const cytnx_int64 &a = 0, - const cytnx_int64 &b = 1, const bool &by_label = false); - std::vector Qr(const cytnx::UniTensor &Tin, const bool &is_tau = false); - std::vector Qdr(const cytnx::UniTensor &Tin, const bool &is_tau = false); - - // Pow: - //================================================== - /** - @brief take power p on all the elements in UniTensor. - @param p, the power - @return - [UniTensor] - - */ - UniTensor Pow(const UniTensor &Tin, const double &p); - - /** - @brief inplace perform power on all the elements in UniTensor. - @param Tin, the input UniTensor. - @param p, the power. - - description: - on return, the elements in Tin will be modified to it's exponetial value. - */ - void Pow_(UniTensor &Tin, const double &p); - - } // namespace linalg - - cytnx::UniTensor operator+(const cytnx::UniTensor &Lt, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor operator+(const T &lc, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor operator+(const cytnx::UniTensor &Lt, const T &rc); - - cytnx::UniTensor operator-(const cytnx::UniTensor &Lt, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor operator-(const T &lc, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor operator-(const cytnx::UniTensor &Lt, const T &rc); - - cytnx::UniTensor operator*(const cytnx::UniTensor &Lt, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor operator*(const T &lc, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor operator*(const cytnx::UniTensor &Lt, const T &rc); - - cytnx::UniTensor operator/(const cytnx::UniTensor &Lt, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor operator/(const T &lc, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor operator/(const cytnx::UniTensor &Lt, const T &rc); - - cytnx::UniTensor operator%(const cytnx::UniTensor &Lt, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor operator%(const T &lc, const cytnx::UniTensor &Rt); - template - cytnx::UniTensor operator%(const cytnx::UniTensor &Lt, const T &rc); -} // namespace cytnx - -//==================================================================================== -//==================================================================================== -//==================================================================================== -namespace cytnx { - - namespace linalg { - Tensor Add(const Tensor &Lt, const Tensor &Rt); - template - Tensor Add(const T &lc, const Tensor &Rt); - template - Tensor Add(const Tensor &Lt, const T &rc); - - void iAdd(Tensor &Lt, const Tensor &Rt); - - // Sub: - //================================================== - /** - @brief element-wise subtract - */ - Tensor Sub(const Tensor &Lt, const Tensor &Rt); - template - Tensor Sub(const T &lc, const Tensor &Rt); - template - Tensor Sub(const Tensor &Lt, const T &rc); - - void iSub(Tensor &Lt, const Tensor &Rt); - - // Mul: - //================================================== - /** - @brief element-wise subtract - */ - Tensor Mul(const Tensor &Lt, const Tensor &Rt); - template - Tensor Mul(const T &lc, const Tensor &Rt); - template - Tensor Mul(const Tensor &Lt, const T &rc); - - void iMul(Tensor &Lt, const Tensor &Rt); - - // Div: - //================================================== - /** - @brief element-wise divide - */ - Tensor Div(const Tensor &Lt, const Tensor &Rt); - template - Tensor Div(const T &lc, const Tensor &Rt); - template - Tensor Div(const Tensor &Lt, const T &rc); - - void iDiv(Tensor &Lt, const Tensor &Rt); - - // Mod: - //================================================== - /** - @brief element-wise divide - */ - Tensor Mod(const Tensor &Lt, const Tensor &Rt); - template - Tensor Mod(const T &lc, const Tensor &Rt); - template - Tensor Mod(const Tensor &Lt, const T &rc); - - // Cpr: - //================================================== - /** - @brief element-wise compare - */ - Tensor Cpr(const Tensor &Lt, const Tensor &Rt); - template - Tensor Cpr(const T &lc, const Tensor &Rt); - template - Tensor Cpr(const Tensor &Lt, const T &rc); - - // Norm: - //================================================= - /** - @brief calculate the norm of a tensor. - @param Tl input Tensor - @return Tensor - - [Note] - 1. if the input tensor is rank-1, the frobenius norm is calculated. - 2. if the input tensor is rank-N with N>=2, the tensor will be flatten to 1d first, and - calculate the frobenius norm. - */ - Tensor Norm(const Tensor &Tl); - - // Det: - //================================================= - /** - @brief calculate the determinant of a tensor. - @param Tl input Tensor - @return Tensor - - [Note] - 1. input tensor should be a NxN rank-2 Tensor. - */ - Tensor Det(const Tensor &Tl); - - // Svd: - //================================================== - /** - @brief Perform Singular-Value decomposition on a rank-2 Tensor. - @param Tin a \link cytnx::Tensor Tensor \endlink, it should be a rank-2 tensor (matrix) - @param is_U if return a left uniform matrix. - @param is_vT if return a right uniform matrix. - @return [std::vector] - - 1. the first tensor is a 1-d tensor contanin the singular values - 2. the second tensor is the left uniform matrix [U], a 2-d tensor (matrix). It only return - when is_U=true. - 3. the third tensor is the right uniform matrix [vT], a 2-d tensor (matrix). It only return - when is_vT=true. - */ - std::vector Svd(const Tensor &Tin, const bool &is_U = true, const bool &is_vT = true); - - // Svd_truncate: - //================================================== - std::vector Svd_truncate(const Tensor &Tin, const cytnx_uint64 &keepdim, - const double &err = 0, const bool &is_U = true, - const bool &is_vT = true, const bool &return_err = false); - - // Hosvd: - std::vector Hosvd( - const Tensor &Tin, const std::vector &mode, const bool &is_core = true, - const bool &is_Ls = false, - const std::vector &trucate_dim = std::vector()); - - // Qr: - //================================================== - /** - @brief Perform QR decomposition on a rank-2 Tensor. - @param Tin a \link cytnx::Tensor Tensor \endlink, it should be a rank-2 tensor (matrix) - @param is_tau if return the tau that contains the Householder reflectors that generate q along - with r. The tau array contains scaling factors for the reflectors - @return [std::vector] - - 1. the first tensor is the orthomormal matrix [Q], a 2-d tensor (matrix) - 2. the second tensor is the right-upper triangular matrix [R], a 2-d tensor (matrix). - 3. the third tensor is the Householder reflectors [H], a 1-d tensor (vector). It only return - when is_tau=true. - */ - std::vector Qr(const Tensor &Tin, const bool &is_tau = false); - - // Qdr: - //================================================== - /** - @brief Perform QDR decomposition on a rank-2 Tensor. - @param Tin a \link cytnx::Tensor Tensor \endlink, it should be a rank-2 tensor (matrix) - @param is_tau if return the tau that contains the Householder reflectors that generate q along - with r. The tau array contains scaling factors for the reflectors - @return [std::vector] - - 1. the first tensor is the orthomormal matrix [Q], a 2-d tensor (matrix) - 2. the second tensor is the diagonal matrix [D], a 1-d tensor (matrix). - 3. the third tensor is the right-upper triangular matrix [R], a 2-d tensor (matrix). - 4. the forth tensor is the Householder reflectors [H], a 1-d tensor (matrix). It only return - when is_tau=true. - */ - std::vector Qdr(const Tensor &Tin, const bool &is_tau = false); - - // Eigh: - //================================================== - /** - @brief eigen-value decomposition for Hermitian matrix - @param Tin The Tensor - @param is_V return eigen vectors - @param row_V if set to ture, the return eigen vectors will be row form. - [Note] the Tin should be a rank-2 Tensor. - */ - std::vector Eigh(const Tensor &Tin, const bool &is_V = true, const bool &row_v = false); - - // Eig: - //================================================== - /** - @brief eigen-value decomposition for generic square matrix - @param Tin The Tensor - @param is_V return eigen vectors - @param row_V if set to ture, the return eigen vectors will be row form. - - [Note] the Tin should be a rank-2 Tensor. - */ - std::vector Eig(const Tensor &Tin, const bool &is_V = true, const bool &row_v = false); - - // Trace: - //================================================== - /** - @brief perform trace over index. - - [Note] the Tn should be at-least rank-2 Tensor. - */ - Tensor Trace(const Tensor &Tn, const cytnx_uint64 &axisA = 0, const cytnx_uint64 &axisB = 1); - - // Min: - //================================================== - /** - @brief get the minimum element. - - [Note] For complex TN, only real part is compared. - */ - Tensor Min(const Tensor &Tn); - - // Max: - //================================================== - /** - @brief get the maximum element. - - [Note] For complex TN, only real part is compared. - */ - Tensor Max(const Tensor &Tn); - - // Sum: - //================================================== - /** - @brief get the sum of all the elements. - - */ - Tensor Sum(const Tensor &Tn); - - // Matmul: - //================================================== - /** - @brief perform matrix multiplication on two tensors. - - [Note] the TL and TR should be both rank-2 Tensor. - */ - Tensor Matmul(const Tensor &TL, const Tensor &TR); - - // Matmul: - //================================================== - /** - @brief perform matrix multiplication on two Tensors with one rank-1 and the other rank-2 where - the rank-1 represent the diagonal elements of the specific tensor. - - [Note] the TL and TR one of them should be rank-1 Tensor and the other should be rank-2 Tensor. - */ - Tensor Matmul_dg(const Tensor &Tl, const Tensor &Tr); - - // InvM: - //================================================== - /** - @brief Matrix inverse. - @return - [Tensor] - - [Note] the Tin should be a rank-2 Tensor. - */ - Tensor InvM(const Tensor &Tin); - /** - @brief inplace perform Matrix inverse. - - description: - on return, the Tin will be modified to it's inverse. - - [Note] the Tin should be a rank-2 Tensor. - */ - void InvM_(Tensor &Tin); - - // Inv: - //================================================== - /** - @brief Element-wise inverse with clip. - @return - [Tensor] - - description: - Performs Elementwise inverse with clip. if A[i] < clip, then 1/A[i] = 0 will be set. - - [Note] For complex type Tensors, the square norm is used to determine the clip. - - */ - Tensor Inv(const Tensor &Tin, const double &clip); - - /** - @brief inplace perform Element-wise inverse with clip. - @return - [Tensor] - - description: - 1. Performs Elementwise inverse with clip. if A[i] < clip, then 1/A[i] = 0 will be set. - 2. on return, all the elements will be modified to it's inverse. if Tin is integer type, it - will automatically promote to Type.Double. - - [Note] For complex type Tensors, the square norm is used to determine the clip. - - */ - void Inv_(Tensor &Tin, const double &clip); - - // Conj: - //================================================== - /** - @brief Conjugate all the element in Tensor. - @return - [Tensor] - - [Note] - 1. if the input Tensor is complex, then return a new Tensor with all the elements are - conjugated. - 2. if the input Tensor is real, then return a copy of input Tensor. - */ - Tensor Conj(const Tensor &Tin); - /** - @brief inplace perform Conjugate on all the element in Tensor. - - [Note] - 1. if the input Tensor is complex, the elements of input Tensor will all be conjugated. - 2. if the input Tensor is real, then nothing act. - */ - void Conj_(Tensor &Tin); - - // Exp: - //================================================== - /** - @brief Exponential all the element in Tensor. - @return - [Double Tensor] or [ComplexDouble Tensor] - - */ - Tensor Exp(const Tensor &Tin); - - /** - @brief Exponential all the element in Tensor. - @return - [Float Tensor] or [ComplexFloat Tensor] - - */ - Tensor Expf(const Tensor &Tin); - - /** - @brief inplace perform Exponential on all the element in Tensor. - @param Tin, the input Tensor. - - description: - 1. on return, the elements in Tin will be modified to it's exponetial value. - 2. For Real, if the type is not Double, change the type of the input tensor to Double. - 3. For Complex, if input is ComplexFloat, promote to ComplexDouble. - */ - void Exp_(Tensor &Tin); - - /** - @brief inplace perform Exponential on all the element in Tensor. - @param Tin, the input Tensor. - - description: - 1. on return, the elements in Tin will be modified to it's exponetial value. - 2. For Real, if the type is not Float, change the type of the input tensor to Float. - 3. For Complex, if input is ComplexDouble, promote to ComplexFloat. - */ - void Expf_(Tensor &Tin); - - // Pow: - //================================================== - /** - @brief take power p on all the elements in Tensor. - @param p, the power - @return - [Tensor] - - */ - Tensor Pow(const Tensor &Tin, const double &p); - - /** - @brief inplace perform power on all the elements in Tensor. - @param Tin, the input Tensor. - @param p, the power. - - description: - on return, the elements in Tin will be modified to it's exponetial value. - */ - void Pow_(Tensor &Tin, const double &p); - - // Abs: - //================================================== - /** - @brief Elementwise absolute value. - @param Tin tensor. - @return - [Tensor] - - */ - Tensor Abs(const Tensor &Tin); - - /** - @brief inplace perform elementwiase absolute value. - @param Tin, the input Tensor. - - description: - on return, the elements in Tin will be modified to it's absolute value. Note that if the - input tensor is complex, it will be modified to real type. - */ - void Abs_(Tensor &Tin); - - // Diag: - //================================================== - /** - @brief return a diagonal tensor with diagonal elements provided as Tin. - @return - [Tensor] - - description: - the return Tensor will be rank-2, with shape=(L, L); where L is the number of elements in - Tin. - - - [Note] Tin should be a rank-1 Tensor. - - */ - Tensor Diag(const Tensor &Tin); - - // Tensordot: - //================================================== - /** - @brief perform tensor dot by sum out the indices assigned of two Tensors. - @param Tl Tensor #1 - @param Tr Tensor #2 - @param idxl the indices of rank of Tensor #1 that is going to sum with Tensor #2 - @param idxr the indices of rank of Tensor #2 that is going to sum with Tensor #1 - @param cacheL cache Tensor #1 (See user-guide for details) - @param cacheR cache Tensor #2 (See user-guide for details) - @return - [Tensor] - - [Note] - 1. the elements in idxl and idxr have one to one correspondence. - 2. two tensors should on same device. - */ - Tensor Tensordot(const Tensor &Tl, const Tensor &Tr, const std::vector &idxl, - const std::vector &idxr, const bool &cacheL = false, - const bool &cacheR = false); - - // Tensordot_dg: - //================================================== - /** - @brief perform tensor dot by sum out the indices assigned of two Tensors, with either one of - them to be a rank-2 diagonal tensor represented by a rank-2 tensor. - @param Tl Tensor #1 - @param Tr Tensor #2 - @param idxl the indices of rank of Tensor #1 that is going to sum with Tensor #2 - @param idxr the indices of rank of Tensor #2 that is going to sum with Tensor #1 - @param diag_L if Tl(true)/Tr(false) is a diagnal matrix, represented by a rank-1 tensor. - @return - [Tensor] - - [Note] - 1. the elements in idxl and idxr have one to one correspondence. - 2. two tensors should on same device. - 3. if diag_L=true, Tl should be a rank-1 tensor as the diagonal elements of a diagonal - matrix. if false, Tr should be a rank-1 tensor - */ - Tensor Tensordot_dg(const Tensor &Tl, const Tensor &Tr, const std::vector &idxl, - const std::vector &idxr, const bool &diag_L); - - // Outer: - //================================================== - /** - @brief perform outer produces of two rank-1 Tensor. - @param Tl rank-1 Tensor #1 - @param Tr rank-1 Tensor #2 - @return - [Tensor] - - description: - if the Tensor #1 has [shape_1], and Tensor #2 has [shape_2]; then the return Tensor will - have shape: concate(shape_1,shape_2) - - [Note] - two tensor should on same device. - - */ - Tensor Outer(const Tensor &Tl, const Tensor &Tr); - - // Kron: - //================================================== - /** - @brief perform kronecker produces of two Tensor. - @param Tl rank-n Tensor #1 - @param Tr rank-m Tensor #2 - @param Tl_pad_left The padding scheme for Tl if Tl.rank != Tr.rank - @param Tr_pad_left The padding scheme for Tr if Tl.rank != Tr.rank - @return - [Tensor] - - description: - The function assume two tensor has the same rank. In case where two tensors have different - ranks, the small one will be extend by adding redundant dimension to the beginning of axis - (T_pad_right=true) or by adding redundant dim to the last axis (if T_pad_left=false - [default]). if the Tensor #1 has shape=(i1,j1,k1,l1...), and Tensor #2 has - shape=(i2,j2,k2,l2...); then the return Tensor will have shape=(i1*i2,j1*j2,k1*k2...) - - [Note] - two tensor should on same device. - - */ - Tensor Kron(const Tensor &Tl, const Tensor &Tr, const bool &Tl_pad_left = false, - const bool &Tr_pad_left = false); - - - // Directsum: - //================================================== - /** - @brief perform directsum of two Tensor. - @param T1 rank-n Tensor #1 - @param T2 rank-n Tensor #2 - @param shared_axes The axes that are shared by two tensors - @return - [Tensor] - - description: - The function assume two tensor has the same rank, and axes indicated in are the same for both T1 and T2. - The out put tensors will have same rank as T1 and T2, with the dimension of rest of the axes being the sum of dimensions of T1 and T2. - e.g., the out put shape = (i1+i2,j1+j2, share_axis_1, k1+k2, share_axis_2, ...); where T1.shape = (i1,j1,share_axis_1,k1,share_axis_2 ...) - and T2.shape = (i2,j2,share_axis_1,k2,share_axis_2 ...) - - - [Note] - two tensor should on same device. - - */ - Tensor Directsum(const Tensor &T1, const Tensor &T2, const std::vector &shared_axes); - - - - - // VectorDot: - //================================================= - /** - @brief perform inner product of vectors - @param Tl Tensor #1 - @param Tr Tensor #2 - @param if the Tl should be conjugated (only work for complex. For real Tensor, no function), - default: false - @return - [Tensor] Rank-0 - - description: - two Tensors must be Rank-1, with same length. - - [Note] - performance tune: This function have better performance when two vectors with same types, - and are one of following type: cytnx_double, cytnx_float, cytnx_complex64 or cytnx_complex128. - - */ - Tensor Vectordot(const Tensor &Tl, const Tensor &Tr, const bool &is_conj = false); - - // Dot: - //================================================= - /** - @brief dot product of two arrays. - @param Tl Tensor #1 - @param Tr Tensor #2 - @return - [Tensor] - - description: - 1. if both Tl and Tr are 1d arrays, it is inner product of vectors (no complex conj), it - calls linalg.Vectordot with is_conj=false. - 2. if both Tl and Tr are 2d arrays, it calls linalg.Matmul to compute the matrix - multiplication - 3. if Tl is Nd array (with N>=2, and Tr is 1-D array, it is sum product over the last axis - of a with b - - [Note] - performance tune: This function have better performance when two arrays with same types, and - are one of following type: cytnx_double, cytnx_float, cytnx_complex64 or cytnx_complex128. - - [Python] - In Python API, operator@ is overloaded as a shorthand of linalg::Dot. - */ - Tensor Dot(const Tensor &Tl, const Tensor &Tr); - - // Tridiag: - //=========================================== - /** - @brief perform diagonalization of symmetric tri-diagnoal matrix. - @param Diag Tensor #1 - @param Sub_diag Tensor #2 - @param is_V: if calculate the eigen value. - @param k: Return k lowest eigen vector if is_V=True - @param throw_excp: Whether to throw exception when error occurs in Tridiag internal function - @return - [vector] if is_V = True, the first tensor is the eigen value, and second tensor is - eigenvector of shape [k,L]. - - description: - two Tensors must be Rank-1, with length of Diag = L and Sub_diag length = L-1. - - [Note] - performance tune: This function have better performance when two vectors with same types, - and are one of following type: cytnx_double, cytnx_float. In general all real type can be use as - input, which will be promote to floating point type for calculation. - - */ - std::vector Tridiag(const Tensor &Diag, const Tensor &Sub_diag, const bool &is_V = true, - const bool &is_row = false, bool throw_excp = false); - - // ExpH: - //=========================================== - /** - @brief perform matrix exponential for Hermitian matrix - @param in input Tensor, should be Hermitian - @param a rescale factor - @param b bias - @return - [Tensor] - - description: - perform matrix exponential with \f$O = \exp{aM + b}\f$. - - */ - template - Tensor ExpH(const Tensor &in, const T &a, const T &b =0); - Tensor ExpH(const Tensor &in); - - - - // ExpM: - //=========================================== - /** - @brief perform matrix exponential for generic matrix - @param in input Tensor, should be a square rank-2. - @param a rescale factor - @param b bias - @return - [Tensor] - - description: - perform matrix exponential with \f$O = \exp{aM + b}\f$. - - */ - template - Tensor ExpM(const Tensor &in, const T &a, const T &b = 0); - - Tensor ExpM(const Tensor &in); - - // Lanczos: - //=========================================== - /** - @brief perform Lanczos for hermitian/symmetric matrices or linear function. - @param Hop the Linear Operator defined by LinOp class or it's inheritance (see LinOp). - @param Tin the initial vector, this should be rank-1. - @param method the desired Lanczos method to use, can be 'ER' or 'Gnd'. - @param CvgCrit the convergence criterion of the energy. - @param maxiter the maximum interation steps for each k. - @param k the number of lowest k eigen values. - @param is_V if set to true, the eigen vectors will be returned. - @param is_row whether the return eigen vectors should be in row-major form. - @param max_krydim the maximum krylov subspace dimension for each iteration. - @param verbose print out iteration info. - @return - [eigvals (Tensor), eigvecs (Tensor)(option)] - #description: - This function calculate the eigen value problem using explicitly restarted Lanczos. - #Performance tune: - For small linear dimension, try to reduce max_krydim. - #[Note] - To use, define a linear operator with LinOp class either by assign a custom function or - create a class that inherit LinOp (see LinOp for further details) - */ - std::vector Lanczos(LinOp *Hop, const Tensor &Tin = Tensor(), - const std::string method = "Gnd", const double &CvgCrit = 1.0e-14, - const unsigned int &Maxiter = 10000, const cytnx_uint64 &k = 1, - const bool &is_V = true, const bool &is_row = false, - const cytnx_uint32 &max_krydim = 0, const bool &verbose = false); - - // Lanczos: - //=========================================== - /** - @brief perform Lanczos for hermitian/symmetric matrices or linear function. - @param Hop the Linear Operator defined by LinOp class or it's inheritance (see LinOp). - @param Tin the initial vector, this should be a UniTensor. - @param method the desired Lanczos method to use, can be 'ER' or 'Gnd'. - @param CvgCrit the convergence criterion of the energy. - @param maxiter the maximum interation steps for each k. - @param k the number of lowest k eigen values. - @param is_V if set to true, the eigen vectors will be returned. - @param is_row whether the return eigen vectors should be in row-major form. - @param max_krydim the maximum krylov subspace dimension for each iteration. - @param verbose print out iteration info. - @return - [eigvals (Tensor), eigvecs (Tensor)(option)] - #description: - This function calculate the eigen value problem using explicitly restarted Lanczos. - #Performance tune: - For small linear dimension, try to reduce max_krydim. - #[Note] - To use, define a linear operator with LinOp class either by assign a custom function or - create a class that inherit LinOp (see LinOp for further details) - */ - std::vector Lanczos(LinOp *Hop, const UniTensor &Tin = UniTensor(), - const std::string method = "Gnd", - const double &CvgCrit = 1.0e-14, - const unsigned int &Maxiter = 10000, const cytnx_uint64 &k = 1, - const bool &is_V = true, const bool &is_row = false, - const cytnx_uint32 &max_krydim = 4, const bool &verbose = false); - - - // Lanczos: - //=========================================== - /** - @brief perform Lanczos for hermitian/symmetric matrices or linear function. - @param Hop the Linear Operator defined by LinOp class or it's inheritance (see LinOp). - @param k the number of lowest k eigen values. - @param is_V if set to true, the eigen vectors will be returned. - @param maxiter the maximum interation steps for each k. - @param CvgCrit the convergence criterion of the energy. - @param is_row whether the return eigen vectors should be in row-major form. - @param Tin the initial vector, this should be rank-1 - @param max_krydim the maximum krylov subspace dimension for each iteration. - @param verbose print out iteration info. - @return - [eigvals (Tensor), eigvecs (Tensor)(option)] - - #description: - This function calculate the eigen value problem using explicitly restarted Lanczos. - - #Performance tune: - For small linear dimension, try to reduce max_krydim. - - #[Note] - To use, define a linear operator with LinOp class either by assign a custom function or - create a class that inherit LinOp (see LinOp for further details) - */ - std::vector Lanczos_ER(LinOp *Hop, const cytnx_uint64 &k = 1, const bool &is_V = true, - const cytnx_uint64 &maxiter = 10000, - const double &CvgCrit = 1.0e-14, const bool &is_row = false, - const Tensor &Tin = Tensor(), const cytnx_uint32 &max_krydim = 4, - const bool &verbose = false); - - // Lanczos: - //=========================================== - /** - @brief perform Lanczos for hermitian/symmetric matrices or linear function to get ground state - and lowest eigen value - @param Hop the Linear Operator defined by LinOp class or it's inheritance (see LinOp). - @param CvgCrit the convergence criterion of the energy. - @param is_V if set to true, the eigen vectors will be returned. - @param Tin the initial vector, this should be rank-1 - @param verbose print out iteration info. - @param maxiter the maximum interation steps for each k. - @return - [eigvals (Tensor), eigvecs (Tensor)(option)] - - #description: - This function calculate the eigen value problem using naive Lanczos to get ground state and - lowest eigen value. - - - #[Note] - To use, define a linear operator with LinOp class either by assign a custom function or - create a class that inherit LinOp (see LinOp for further details) - */ - std::vector Lanczos_Gnd(LinOp *Hop, const double &CvgCrit = 1.0e-14, - const bool &is_V = true, const Tensor &Tin = Tensor(), - const bool &verbose = false, - const unsigned int &Maxiter = 100000); - - // Lanczos: - //=============================================== - /** - @brief perform Lanczos for hermitian/symmetric matrices or linear function to get ground state - and lowest eigen value - @param Hop the Linear Operator defined by LinOp class or it's inheritance (see LinOp). - @param CvgCrit the convergence criterion of the energy. - @param is_V if set to true, the eigen vectors will be returned. - @param Tin the initial vector, this should be a UniTensor. - @param verbose print out iteration info. - @param maxiter the maximum interation steps for each k. - @return - [eigvals (UniTensor::Dense), eigvecs (UniTensor)(option)] - - #description: - This function calculate the eigen value problem using naive Lanczos to get ground state and - lowest eigen value. - - - #[Note] - To use, define a linear operator with LinOp class either by assign a custom function or - create a class that inherit LinOp (see LinOp for further details) - */ - std::vector Lanczos_Gnd_Ut(LinOp *Hop, const UniTensor &Tin, - const double &CvgCrit = 1.0e-14, const bool &is_V = true, - const bool &verbose = false, - const unsigned int &Maxiter = 100000); - - // Lstsq: - //=========================================== - /** - @brief Return the least-squares solution to a linear matrix equation. - @param A “Coefficient” matrix, must be two-dimensional. - @param b Ordinate or “dependent variable” values, must be two-dimensional, the least-squares - solution is calculated for each of the K columns of b. - @param rcond Cut-off ratio for small singular values of a. For the purposes of rank - determination, singular values are treated as zero if they are smaller than rcond times the - largest singular value of A, If it is negative, the machine precision is used. - @return [std::vector] - - 1. the first tensor is least-squares solutions in the K columns. - 2. the second tensor is the sums of squared residuals: Squared Euclidean 2-norm for each - column in b - a @ x. If the rank of a is < N or M <= N, this is a zero Tensor. - 3. the third tensor is the rank of matrix A. - 4. the forth tensor is singular values of A. - - #description: - Computes the vector x that approximatively solves the equation A @ x = b. The equation may - be under-, well-, or over-determined independent columns. If a is square and of full rank, then - x (but for round-off error) is the “exact” solution of the equation. Else, x minimizes the - Euclidean 2-norm || b - a x ||. - - [Ke] - */ - std::vector Lstsq(const Tensor &A, const Tensor &b, const float &rcond = -1); - - - /** - @brief Blas Axpy, performing return = a*x + y - @param a Scalar. - @param x Tensor, can be any rank - @param y Tensor, can be any rank - @return - [Tensor] - - #description: - This function performs a*x+y where x,y are Tensor and a is a Scalar. The dtype of return - Tensor will be the strongest among x,y and a. - - If y is not specify, then it performs a*x -> return - - #[Note] - This will return a new tensor. - - */ - Tensor Axpy(const Scalar &a, const Tensor &x, const Tensor &y = Tensor()); - - void Axpy_(const Scalar &a, const Tensor &x, Tensor &y); - - /** - @brief Blas Ger, performing return = a*vec(x)*vec(y)^T - @param x Tensor, rank-1 with size nx - @param y Tensor, rank-1 with size ny - @param a Scalar, if not provided a = 1. - @return - [Tensor with shape (nx,ny)] - - #description: - This function performs a*x*y^T where x,y are rank-1 Tensor with dimension nx and ny respectively; and a is a Scalar. The dtype of return - Tensor will be the strongest among x,y and a. - - - #[Note] - This will return a new tensor. - - */ - Tensor Ger(const Tensor &x, const Tensor &y, const Scalar &a=Scalar()); - - - - - } // namespace linalg - - // operators: - Tensor operator+(const Tensor &Lt, const Tensor &Rt); - template - Tensor operator+(const T &lc, const Tensor &Rt); - template - Tensor operator+(const Tensor &Lt, const T &rc); - - //------------------------------------ - Tensor operator-(const Tensor &Lt, const Tensor &Rt); - template - Tensor operator-(const T &lc, const Tensor &Rt); - template - Tensor operator-(const Tensor &Lt, const T &rc); - - //----------------------------------- - Tensor operator*(const Tensor &Lt, const Tensor &Rt); - template - Tensor operator*(const T &lc, const Tensor &Rt); - template - Tensor operator*(const Tensor &Lt, const T &rc); - - //---------------------------------- - Tensor operator/(const Tensor &Lt, const Tensor &Rt); - template - Tensor operator/(const T &lc, const Tensor &Rt); - template - Tensor operator/(const Tensor &Lt, const T &rc); - - //---------------------------------- - Tensor operator%(const Tensor &Lt, const Tensor &Rt); - template - Tensor operator%(const T &lc, const Tensor &Rt); - template - Tensor operator%(const Tensor &Lt, const T &rc); - - //---------------------------------- - Tensor operator==(const Tensor &Lt, const Tensor &Rt); - template - Tensor operator==(const T &lc, const Tensor &Rt); - template - Tensor operator==(const Tensor &Lt, const T &rc); - -} // namespace cytnx - -#endif diff --git a/src/BlockUniTensor.cpp.old b/src/BlockUniTensor.cpp.old deleted file mode 100644 index 4cda25b9..00000000 --- a/src/BlockUniTensor.cpp.old +++ /dev/null @@ -1,1985 +0,0 @@ -#include "UniTensor.hpp" -#include "Accessor.hpp" -#include "utils/utils.hpp" -#include "utils/utils_internal_interface.hpp" -#include "linalg.hpp" -#include "Generator.hpp" -#include -#include "utils/vec_print.hpp" -#include "utils/vec_concatenate.hpp" -#include -#include -#include -#ifdef UNI_OMP - #include -#endif -#include "backend/lapack_wrapper.hpp" - -using namespace std; -namespace cytnx { - typedef Accessor ac; - void BlockUniTensor::Init(const std::vector &bonds, const std::vector &in_labels, - const cytnx_int64 &rowrank, const unsigned int &dtype, - const int &device, const bool &is_diag, const bool &no_alloc, const std::string &name) { - this->_name = name; - // the entering is already check all the bonds have symmetry. - // need to check: - // 1. the # of symmetry and their type across all bonds - // 2. check if all bonds are non regular: - - // check Symmetry for all bonds - cytnx_uint32 N_symmetry = bonds[0].Nsym(); - vector tmpSyms = bonds[0].syms(); - - cytnx_uint32 N_ket = 0; - for (cytnx_uint64 i = 0; i < bonds.size(); i++) { - // check - cytnx_error_msg( - bonds[i].type() == BD_REG, - "[ERROR][BlockUniTensor] All bonds must be tagged for UniTensor with symmetries.%s", "\n"); - - - cytnx_error_msg( - bonds[i]._impl->_degs.size() == 0, - "[ERROR][BlockUniTensor] All bonds must be in new format for BlockUniTensor!.%s", "\n"); - - // check rank-0 bond: - cytnx_error_msg(bonds[i].dim() == 0, - "[ERROR][BlockUniTensor] All bonds must have dimension >=1%s", "\n"); - // check symmetry and type: - cytnx_error_msg(bonds[i].Nsym() != N_symmetry, - "[ERROR][BlockUniTensor] inconsistant # of symmetry at bond: %d. # of " - "symmetry should be %d\n", - i, N_symmetry); - for (cytnx_uint32 n = 0; n < N_symmetry; n++) { - cytnx_error_msg(bonds[i].syms()[n] != tmpSyms[n], - "[ERROR][BlockUniTensor] symmetry mismatch at bond: %d, %s != %s\n", n, - bonds[i].syms()[n].stype_str().c_str(), tmpSyms[n].stype_str().c_str()); - } - N_ket += cytnx_uint32(bonds[i].type() == bondType::BD_KET); - } - - // check rowrank: - cytnx_error_msg((N_ket < 1) || (N_ket > bonds.size() - 1), - "[ERROR][BlockUniTensor] must have at least one ket-bond and one bra-bond.%s", - "\n"); - - - if (rowrank == -1) { - this->_rowrank = N_ket; - //this->_inner_rowrank = N_ket; - } else { - if(is_diag){ - cytnx_error_msg(rowrank != 1, - "[ERROR][BlockUniTensor] rowrank must be = 1 when is_diag = true.%s", "\n"); - }else{ - cytnx_error_msg((rowrank < 0) || (rowrank > bonds.size() ), - "[ERROR][BlockUniTensor] rowrank must be >=0 and <=rank.%s", "\n"); - } - this->_rowrank = rowrank; - //this->_inner_rowrank = rowrank; - // update braket_form >>> - } - - - // check labels: - if (in_labels.size() == 0) { - for (cytnx_int64 i = 0; i < bonds.size(); i++) this->_labels.push_back(to_string(i)); - - } else { - // check bonds & labels dim - cytnx_error_msg(bonds.size() != in_labels.size(), "%s", - "[ERROR] labels must have same lenth as # of bonds."); - - std::vector tmp = vec_unique(in_labels); - cytnx_error_msg(tmp.size() != in_labels.size(), - "[ERROR] labels cannot contain duplicated elements.%s", "\n"); - this->_labels = in_labels; - } - - //cytnx_error_msg(is_diag,"[ERROR][BlockUniTensor] Cannot set is_diag=true when the UniTensor is with symmetry.%s","\n"); - if(is_diag){ - cytnx_error_msg(bonds.size()!=2,"[ERROR][BlockUniTensor] is_diag = true must be rank-2 with one in-bond and one out-bond.%s","\n"); - cytnx_error_msg(bonds[0].type()== bonds[1].type(), "[ERROR][BlockUniTensor] is_diag=true must have one in-bond and oue out-bond.%s","\n"); - if(rowrank != 1, "[ERROR][BlockUniTensor] is_diag = true must have rowrank=1.%s","\n"); - - //checking basis! - cytnx_error_msg(bonds[0].redirect() != bonds[1],"[ERROR][BlockUniTensor] is_diag=true the in-bond and out-bond basis must match!%s","\n"); - - } - this->_is_diag = is_diag; - - // copy bonds, otherwise it will share objects: - this->_bonds = vec_clone(bonds); - this->_is_braket_form = this->_update_braket(); - - // vector blocklens; - // vector> blocksizes; - // cytnx_uint64 totblocksize = 0; - - if(this->_is_diag){ - for(int b=0;b_bonds[0].qnums().size();b++){ - this->_inner_to_outer_idx.push_back({(cytnx_uint64)b,(cytnx_uint64)b}); - if(!no_alloc){ - this->_blocks.push_back(zeros(this->_bonds[0]._impl->_degs[b],dtype,device)); - }else{ - this->_blocks.push_back(Tensor({this->_bonds[0]._impl->_degs[b]},dtype,device,false)); - } - } - - }else{ - // checking how many blocks are there, and the size: - std::vector Loc(this->_bonds.size(),0); - std::vector tot_qns(this->_bonds[0].Nsym()); // use first bond to determine symmetry size - std::vector size(this->_bonds.size()); - bool fin=false; - while(1){ - - //get elem - //cout << "start!" << endl; - //cytnx::vec_print_simple(std::cout , Loc); - this->_fx_get_total_fluxs(Loc, this->_bonds[0].syms(),tot_qns); - - //std::cout << "Loc: "; - //cytnx::vec_print_simple(std::cout, Loc); - //std::cout << "tot_flx: "; - //cytnx::vec_print_simple(std::cout, tot_qns); - - //if exists: - if( std::all_of(tot_qns.begin(),tot_qns.end(), [](const int &i){return i==0;}) ){ - //get size & init block! - if(!no_alloc){ - // cytnx_uint64 blockNelem = 1; - for(cytnx_int32 i=0;i_bonds[i]._impl->_degs[Loc[i]]; - // blockNelem *= size[i]; - } - this->_blocks.push_back(zeros(size,dtype,device)); - // blocklens.push_back(blockNelem); - // blocksizes.push_back(size); - // totblocksize += blockNelem; - }else{ - for(cytnx_int32 i=0;i_bonds[i]._impl->_degs[Loc[i]]; - } - this->_blocks.push_back(Tensor(size,dtype,device,false)); - } - // push its loc - this->_inner_to_outer_idx.push_back(Loc); - - } - - while(Loc.size()!=0){ - if(Loc.back()==this->_bonds[Loc.size()-1]._impl->_qnums.size()-1){ - Loc.pop_back(); - continue; - } - else{ - Loc.back()+=1; - //cout << "+1 at loc:" << Loc.size()-1 <_bonds.size()){ - Loc.push_back(0); - } - break; - } - } - - if(Loc.size()==0) break; - } - - // if(!no_alloc){ - // cytnx_uint64 offset=0; - - // char* ptr = (char*)utils_internal::Calloc_cpu( - // totblocksize+blocklens.size()*STORAGE_DEFT_SZ, - // Type.typeSize(dtype)); - // for(cytnx_int64 k=0;k_blocks.push_back(Tensor(Storage(ptr+(offset*Type.typeSize(dtype)), - // blocklens[k],dtype,device,true,cap),blocksizes[k],dtype,device)); - // offset+=cap; - // } - // } - }// is_diag? - - } - - void beauty_print_block(std::ostream &os, const cytnx_uint64 &Nin, const cytnx_uint64 &Nout, const std::vector &qn_indices, const std::vector &bonds, const Tensor &block){ - cytnx_uint64 Total_line = Nin < Nout ? Nout:Nin; - - std::vector Lside(Total_line); - std::vector Rside(Total_line); - std::vector MidL(Total_line); - std::vector MidR(Total_line); - cytnx_uint64 Lmax = 0; - cytnx_uint64 mL = 0; - cytnx_uint64 mR = 0; - - for(int i=0;i_syms[s].stype_str() + "(" + to_string(bonds[i]._impl->_qnums[qn_indices[i]][s]) + ")"; - } - if(Lmax < Lside[i].size()) Lmax = Lside[i].size(); - - MidL[i] += to_string(block.shape()[i]); - if(mL < MidL[i].size()) mL = MidL[i].size(); - } - - //Rside: - if(i_syms[s].stype_str() + "(" + to_string(bonds[Nin+i]._impl->_qnums[qn_indices[Nin+i]][s]) + ")"; - } - // check if is_diag = true: - if(block.shape().size()==1 && bonds.size()==2) - MidR[i] += to_string(block.shape()[i]); - else - MidR[i] += to_string(block.shape()[Nin+i]); - if(mR < MidR[i].size()) mR = MidR[i].size(); - } - - } - - //filling space: - for(int i=0;i= this->_blocks.size()),"[ERROR] index [%d] out of bound. should be >0 and < number of available blocks %d\n",idx,this->_blocks.size()); - - std::ostream &os = std::cout; - - os << "========================\n"; - if(this->_is_diag) os << " *is_diag: True\n"; - os << "BLOCK [#" << idx << "]\n"; - /* - os << " |-Qn indices for each axis:\n {\t"; - for(int s=0;s_inner_to_outer_idx[idx].size();s++){ - os << this->_inner_to_outer_idx[idx][s] << "\t"; - } - os << "}" << endl; - os << "\t"; - for(int s=0;s_bonds.size();s++){ - os << ((this->_bonds[s].type()>0)?"OUT":"IN") << "\t"; - } - os << endl; - os << " |-Qn for each axis:\n"; - for(int s=0;s_bonds[0].Nsym();s++){ - os << " " <_bonds[0]._impl->_syms[s].stype_str() << ":\t"; - for(int l=0;l_blocks[idx].shape().size();l++){ - os << std::showpos << this->_bonds[l]._impl->_qnums[this->_inner_to_outer_idx[idx][l]][s] << "\t"; - } - os << std::noshowpos << endl; - } - */ - os << " |- [] : Qn index \n"; - os << " |- Sym(): Qnum of correspond symmetry\n"; - beauty_print_block(os, this->_rowrank, this->_labels.size() - this->_rowrank, this->_inner_to_outer_idx[idx], this->_bonds, this->_blocks[idx]); - - - if(full_info) - os << this->_blocks[idx]; - else{ - os << " |-dtype:\t" << Type.getname(this->_blocks[idx].dtype()) << endl; - os << " |-device:\t" << Device.getname(this->_blocks[idx].device()) << endl; - os << " |-contiguous:\t" << (this->_blocks[idx].is_contiguous()? "True" : "False") << endl; - os << " |-shape:\t"; - vec_print_simple(os,this->_blocks[idx].shape()); - - } - - } - - void BlockUniTensor::print_blocks(const bool &full_info) const{ - std::ostream &os = std::cout; - - os << "-------- start of print ---------\n"; - char *buffer = (char *)malloc(sizeof(char) * 10240); - sprintf(buffer, "Tensor name: %s\n", this->_name.c_str()); - os << std::string(buffer); - if (this->_is_tag) sprintf(buffer, "braket_form : %s\n", this->_is_braket_form ? "True" : "False"); - os << std::string(buffer); - sprintf(buffer, "is_diag : %s\n", this->_is_diag ? "True" : "False"); - os << std::string(buffer); - sprintf(buffer, "[OVERALL] contiguous : %s\n", this->is_contiguous() ? "True" : "False"); - os << std::string(buffer); - - /* - os << "Symmetries: "; - for(int s=0;s_bonds[0].Nsym();s++) - os << this->_bonds[0]._impl->_syms[s].stype_str() << " "; - os << endl; - */ - - // print each blocks with its qnum! - for(int b=0;b_blocks.size();b++){ - this->print_block(b,full_info); - } - - /* - auto tmp_qnums = in.get_blocks_qnums(); - std::vector tmp = in.get_blocks_(true); - sprintf(buffer, "BLOCKS:: %s", "\n"); - os << std::string(buffer); - os << "=============\n"; - - if (!in.is_contiguous()) { - cytnx_warning_msg( - true, - "[WARNING][Symmetric] cout/print UniTensor on a non-contiguous UniTensor. the blocks " - "appears here could be different than the current shape of UniTensor.%s", - "\n"); - } - for (cytnx_uint64 i = 0; i < tmp.size(); i++) { - os << "Qnum:" << tmp_qnums[i] << std::endl; - os << tmp[i] << std::endl; - os << "=============\n"; - } - os << "-------- end of print ---------\n"; - */ - free(buffer); - } - - void BlockUniTensor::print_diagram(const bool &bond_info) { - char *buffer = (char *)malloc(10240 * sizeof(char)); - unsigned int BUFFsize = 100; - - sprintf(buffer, "-----------------------%s", "\n"); - std::cout << std::string(buffer); - sprintf(buffer, "tensor Name : %s\n", this->_name.c_str()); - std::cout << std::string(buffer); - sprintf(buffer, "tensor Rank : %d\n", this->_labels.size()); - std::cout << std::string(buffer); - //sprintf(buffer, "block_form : true%s", "\n"); - //std::cout << std::string(buffer); - sprintf(buffer, "contiguous : %s\n", this->is_contiguous() ? "True" : "False"); - std::cout << std::string(buffer); - sprintf(buffer, "valid blocks : %d\n", this->_blocks.size()); - std::cout << std::string(buffer); - sprintf(buffer, "is diag : %s\n", this->is_diag() ? "True" : "False"); - std::cout << std::string(buffer); - sprintf(buffer, "on device : %s\n", this->device_str().c_str()); - std::cout << std::string(buffer); - - cytnx_uint64 Nin = this->_rowrank; - cytnx_uint64 Nout = this->_labels.size() - this->_rowrank; - cytnx_uint64 vl; - if (Nin > Nout) - vl = Nin; - else - vl = Nout; - - std::string bks; - char *l = (char *)malloc(BUFFsize * sizeof(char)); - char *llbl = (char *)malloc(BUFFsize * sizeof(char)); - char *r = (char *)malloc(BUFFsize * sizeof(char)); - char *rlbl = (char *)malloc(BUFFsize * sizeof(char)); - - int Space_Llabel_max=0, Space_Ldim_max=0, Space_Rdim_max =0; - //quickly checking the size for each line, only check the largest! - - for (cytnx_uint64 i = 0; i < vl; i++) { - if(i_labels[i].size()) Space_Llabel_max = this->_labels[i].size(); - if(Space_Ldim_max < to_string(this->_bonds[i].dim()).size()) Space_Ldim_max = to_string(this->_bonds[i].dim()).size(); - } - if(i_bonds[Nin+i].dim()).size()) Space_Rdim_max = to_string(this->_bonds[Nin+i].dim()).size(); - } - } - string LallSpace = (string(" ")*(Space_Llabel_max+3+1)); - string MallSpace = string(" ")*(1 + Space_Ldim_max + 5 + Space_Rdim_max+1); - string M_dashes = string("-")*(1 + Space_Ldim_max + 5 + Space_Rdim_max+1); - - std::string tmpss; - sprintf(buffer, "%s row %s col %s",LallSpace.c_str(),MallSpace.c_str(),"\n"); - std::cout << std::string(buffer); - sprintf(buffer, "%s -%s- %s",LallSpace.c_str(),M_dashes.c_str(),"\n"); - std::cout << std::string(buffer); - for (cytnx_uint64 i = 0; i < vl; i++) { - sprintf(buffer, "%s |%s| %s",LallSpace.c_str(),MallSpace.c_str(),"\n"); - std::cout << std::string(buffer); - - if (i < Nin) { - if (this->_bonds[i].type() == bondType::BD_KET) - bks = " -->"; - else - bks = "*<--"; - memset(l, 0, sizeof(char) * BUFFsize); - memset(llbl, 0, sizeof(char) * BUFFsize); - tmpss = this->_labels[i] + std::string(" ")*(Space_Llabel_max-this->_labels[i].size()); - sprintf(l, "%s %s", tmpss.c_str(), bks.c_str()); - tmpss = to_string(this->_bonds[i].dim()) + std::string(" ")*(Space_Ldim_max-to_string(this->_bonds[i].dim()).size()); - sprintf(llbl, "%s", tmpss.c_str()); - } else { - memset(l, 0, sizeof(char) * BUFFsize); - memset(llbl, 0, sizeof(char) * BUFFsize); - tmpss = std::string(" ")*(Space_Llabel_max+5); - sprintf(l, "%s",tmpss.c_str()); - tmpss = std::string(" ")*(Space_Ldim_max); - sprintf(llbl, "%s",tmpss.c_str()); - } - if (i < Nout) { - if (this->_bonds[Nin + i].type() == bondType::BD_KET) - bks = "<--*"; - else - bks = "--> "; - memset(r, 0, sizeof(char) * BUFFsize); - memset(rlbl, 0, sizeof(char) * BUFFsize); - - sprintf(r, "%s %s", bks.c_str(), this->_labels[Nin + i].c_str()); - - tmpss = to_string(this->_bonds[Nin+i].dim()) + std::string(" ")*(Space_Rdim_max-to_string(this->_bonds[Nin+i].dim()).size()); - sprintf(rlbl, "%s", tmpss.c_str()); - - } else { - memset(r, 0, sizeof(char) * BUFFsize); - memset(rlbl, 0, sizeof(char) * BUFFsize); - sprintf(r, "%s", " "); - tmpss = std::string(" ")*Space_Rdim_max; - sprintf(rlbl, "%s",tmpss.c_str()); - } - sprintf(buffer, " %s| %s %s |%s\n", l, llbl, rlbl, r); - std::cout << std::string(buffer); - } - sprintf(buffer, "%s |%s| %s",LallSpace.c_str(),MallSpace.c_str(),"\n"); - std::cout << std::string(buffer); - sprintf(buffer, "%s -%s- %s",LallSpace.c_str(),M_dashes.c_str(),"\n"); - std::cout << std::string(buffer); - sprintf(buffer, "%s", "\n"); - std::cout << std::string(buffer); - - if (bond_info) { - for (cytnx_uint64 i = 0; i < this->_bonds.size(); i++) { - // sprintf(buffer, "lbl:%d ", this->_labels[i]); - sprintf(buffer, "lbl:%s ", this->_labels[i].c_str()); - std::cout << std::string(buffer); - std::cout << this->_bonds[i] << std::endl; - } - } - - fflush(stdout); - free(l); - free(llbl); - free(r); - free(rlbl); - free(buffer); - } - - boost::intrusive_ptr BlockUniTensor::contiguous() { - if(this->is_contiguous()){ - boost::intrusive_ptr out(this); - return out; - } else{ - BlockUniTensor *tmp = new BlockUniTensor(); - tmp = this->clone_meta(true,true); - tmp->_blocks.resize(this->_blocks.size()); - for(unsigned int b=0;b_blocks.size();b++){ - if(this->_blocks[b].is_contiguous()){ - tmp->_blocks[b] = this->_blocks[b].clone(); - }else{ - tmp->_blocks[b] = this->_blocks[b].contiguous(); - } - } - boost::intrusive_ptr out(tmp); - return out; - } - } - - std::vector BlockUniTensor::syms() const { return this->_bonds[0].syms(); } - - - boost::intrusive_ptr BlockUniTensor::permute( - const std::vector &mapper, const cytnx_int64 &rowrank, const bool &by_label) { - - BlockUniTensor *out_raw = this->clone_meta(true,true); - out_raw ->_blocks.resize(this->_blocks.size()); - - std::vector mapper_u64; - if (by_label) { - // cytnx_error_msg(true,"[Developing!]%s","\n"); - std::vector::iterator it; - for (cytnx_uint64 i = 0; i < mapper.size(); i++) { - it = std::find(out_raw->_labels.begin(), out_raw->_labels.end(), std::to_string(mapper[i])); - cytnx_error_msg(it == out_raw->_labels.end(), - "[ERROR] label %d does not exist in current UniTensor.\n", mapper[i]); - mapper_u64.push_back(std::distance(out_raw->_labels.begin(), it)); - } - - } else { - mapper_u64 = std::vector(mapper.begin(), mapper.end()); - //checking: - for(int i=0;i= this->rank(), "[ERROR] index %d out of bound!\n",mapper_u64[i]); - } - - } - - - out_raw->_bonds = vec_map(vec_clone(out_raw->bonds()), mapper_u64); // this will check validity - out_raw->_labels = vec_map(out_raw->labels(), mapper_u64); - - - if(out_raw->_is_diag){ - //cytnx_error_msg(true,"[ERROR][BlockUniTensor] currently do not support permute for is_diag=true for BlockUniTensor!%s","\n"); - if(rowrank >= 0) - cytnx_error_msg(rowrank != 1, "[ERROR][BlockUniTensor] is_diag=true must have rowrank=1.%s","\n"); - out_raw->_is_braket_form = out_raw->_update_braket(); - - }else{ - //inner_to_outer permute! - for(cytnx_int64 b=0;b_inner_to_outer_idx.size();b++){ - out_raw->_inner_to_outer_idx[b] = vec_map(out_raw->_inner_to_outer_idx[b], mapper_u64); - out_raw->_blocks[b] = this->_blocks[b].permute(mapper_u64); - } - - if(rowrank >=0){ - cytnx_error_msg((rowrank >= out_raw->_bonds.size()) || (rowrank < 1), - "[ERROR][BlockUniTensor] rowrank cannot exceed the rank of UniTensor-1, and should be >=1.%s", - "\n"); - out_raw->_rowrank = rowrank; - - } - out_raw->_is_braket_form = out_raw->_update_braket(); - } - boost::intrusive_ptr out(out_raw); - - return out; - } - - boost::intrusive_ptr BlockUniTensor::permute( - const std::vector &mapper, const cytnx_int64 &rowrank) { - - BlockUniTensor *out_raw = this->clone_meta(true,true); - out_raw ->_blocks.resize(this->_blocks.size()); - - std::vector mapper_i64; - // cytnx_error_msg(true,"[Developing!]%s","\n"); - std::vector::iterator it; - for (cytnx_int64 i = 0; i < mapper.size(); i++) { - it = std::find(out_raw->_labels.begin(), out_raw->_labels.end(), mapper[i]); - cytnx_error_msg(it == out_raw->_labels.end(), - "[ERROR] label %s does not exist in current UniTensor.\n", mapper[i].c_str()); - mapper_i64.push_back(std::distance(out_raw->_labels.begin(), it)); - } - - return this->permute(mapper_i64,rowrank,false); - - - } - - void BlockUniTensor::permute_(const std::vector &mapper, const cytnx_int64 &rowrank, - const bool &by_label) { - std::vector mapper_u64; - if (by_label) { - // cytnx_error_msg(true,"[Developing!]%s","\n"); - std::vector::iterator it; - for (cytnx_uint64 i = 0; i < mapper.size(); i++) { - it = std::find(this->_labels.begin(), this->_labels.end(), std::to_string(mapper[i])); - cytnx_error_msg(it == this->_labels.end(), - "[ERROR] label %d does not exist in current UniTensor.\n", mapper[i]); - mapper_u64.push_back(std::distance(this->_labels.begin(), it)); - } - - } else { - mapper_u64 = std::vector(mapper.begin(), mapper.end()); - //checking: - for(int i=0;i= this->rank(), "[ERROR] index %d out of bound!\n",mapper_u64[i]); - } - } - - this->_bonds = vec_map(vec_clone(this->bonds()), mapper_u64); // this will check validity - this->_labels = vec_map(this->labels(), mapper_u64); - - if(this->_is_diag){ - - if(rowrank >= 0) - cytnx_error_msg(rowrank != 1, "[ERROR][BlockUniTensor] is_diag=true must have rowrank=1.%s","\n"); - this->_is_braket_form = this->_update_braket(); - - }else{ - //inner_to_outer permute! - for(cytnx_int64 b=0;b_inner_to_outer_idx.size();b++){ - this->_inner_to_outer_idx[b] = vec_map(this->_inner_to_outer_idx[b], mapper_u64); - this->_blocks[b].permute_(mapper_u64); - } - - if (rowrank >= 0) { - cytnx_error_msg((rowrank >= this->_bonds.size()) || (rowrank < 1), - "[ERROR][BlockUniTensor] rowrank cannot exceed the rank of UniTensor-1, and should be >=1.%s", - "\n"); - this->_rowrank = rowrank; - } - this->_is_braket_form = this->_update_braket(); - } - - } - - void BlockUniTensor::permute_(const std::vector &mapper, - const cytnx_int64 &rowrank) { - - std::vector mapper_i64; - // cytnx_error_msg(true,"[Developing!]%s","\n"); - std::vector::iterator it; - for (cytnx_uint64 i = 0; i < mapper.size(); i++) { - it = std::find(this->_labels.begin(), this->_labels.end(), mapper[i]); - cytnx_error_msg(it == this->_labels.end(), - "[ERROR] label %d does not exist in current UniTensor.\n", mapper[i].c_str()); - mapper_i64.push_back(std::distance(this->_labels.begin(), it)); - } - - this->permute_(mapper_i64,rowrank,false); - - } - - boost::intrusive_ptr BlockUniTensor::relabels( - const std::vector &new_labels) { - BlockUniTensor *tmp = this->clone_meta(true, true); - tmp->_blocks = this->_blocks; - tmp->set_labels(new_labels); - boost::intrusive_ptr out(tmp); - return out; - } - boost::intrusive_ptr BlockUniTensor::relabels( - const std::vector &new_labels) { - vector vs(new_labels.size()); - transform(new_labels.begin(), new_labels.end(), vs.begin(), - [](cytnx_int64 x) -> string { return to_string(x); }); - //std::cout << "entry" << endl; - return relabels(vs); - } - - boost::intrusive_ptr BlockUniTensor::relabel(const cytnx_int64 &inx, - const cytnx_int64 &new_label, - const bool &by_label) { - BlockUniTensor *tmp = this->clone_meta(true, true); - tmp->_blocks = this->_blocks; - tmp->set_label(inx, new_label, by_label); - boost::intrusive_ptr out(tmp); - return out; - } - boost::intrusive_ptr BlockUniTensor::relabel(const cytnx_int64 &inx, - const string &new_label) { - BlockUniTensor *tmp = this->clone_meta(true, true); - tmp->_blocks = this->_blocks; - tmp->set_label(inx, new_label); - boost::intrusive_ptr out(tmp); - return out; - } - boost::intrusive_ptr BlockUniTensor::relabel(const string &inx, - const string &new_label) { - BlockUniTensor *tmp = this->clone_meta(true, true); - tmp->_blocks = this->_blocks; - tmp->set_label(inx, new_label); - boost::intrusive_ptr out(tmp); - return out; - } - boost::intrusive_ptr BlockUniTensor::relabel(const cytnx_int64 &inx, - const cytnx_int64 &new_label) { - BlockUniTensor *tmp = this->clone_meta(true, true); - tmp->_blocks = this->_blocks; - tmp->set_label(inx, new_label); - boost::intrusive_ptr out(tmp); - return out; - } - - - - boost::intrusive_ptr BlockUniTensor::contract( - const boost::intrusive_ptr &rhs, const bool &mv_elem_self, - const bool &mv_elem_rhs){ - // checking type - cytnx_error_msg(rhs->uten_type() != UTenType.Block, - "[ERROR] cannot contract symmetry-block UniTensor with other type of UniTensor%s", - "\n"); - - //checking symmetry: - cytnx_error_msg(this->syms() != rhs->syms(), - "[ERROR] two UniTensor have different symmetry type cannot contract.%s", "\n"); - - - // get common labels: - std::vector comm_labels; - std::vector comm_idx1, comm_idx2; - vec_intersect_(comm_labels, this->labels(), rhs->labels(), comm_idx1, comm_idx2); - - - - if (comm_idx1.size() == 0) { - - // output instance; - BlockUniTensor *tmp = new BlockUniTensor(); - BlockUniTensor *Rtn = (BlockUniTensor*)rhs.get(); - std::vector out_labels; - std::vector out_bonds; - cytnx_int64 out_rowrank; - - - //no-common label: - vec_concatenate_(out_labels, this->labels(), rhs->labels()); - for (cytnx_uint64 i = 0; i < this->_bonds.size(); i++) - out_bonds.push_back(this->_bonds[i].clone()); - for (cytnx_uint64 i = 0; i < rhs->_bonds.size(); i++) - out_bonds.push_back(rhs->_bonds[i].clone()); - - out_rowrank = this->rowrank() + rhs->rowrank(); - vec_concatenate_(out_labels, this->_labels, rhs->_labels); - - //cout << out_bonds; - tmp->Init(out_bonds,out_labels, out_rowrank, this->dtype(), this->device(),false); - - //tmp->_name = this->_name + "+" + rhs->_name; - - //check each valid block: - std::vector Lidx(this->_bonds.size()); //buffer - std::vector Ridx(rhs->_bonds.size()); //buffer - for(cytnx_int32 b=0;b_blocks.size();b++){ - memcpy(&Lidx[0], &tmp->_inner_to_outer_idx[b][0],sizeof(cytnx_uint64)*this->_bonds.size()); - memcpy(&Ridx[0], &tmp->_inner_to_outer_idx[b][this->_bonds.size()],sizeof(cytnx_uint64)*rhs->_bonds.size()); - - auto IDL = vec_argwhere(this->_inner_to_outer_idx,Lidx); - auto IDR = vec_argwhere(Rtn->_inner_to_outer_idx,Ridx); - - /* - cout << b << endl; - //vec_print_simple(std::cout,tmp->_inner_to_outer_idx[b]); - //vec_print_simple(std::cout,Lidx); - //vec_print_simple(std::cout,Ridx); - vec_print_simple(std::cout,IDL); - vec_print_simple(std::cout,IDR); - */ - if(User_debug){ - if(IDL.size()==IDR.size()){ - cytnx_error_msg(IDL.size()>1,"[ERROR][BlockUniTensor] IDL has more than two ambiguous location!%s","\n"); - cytnx_error_msg(IDR.size()>1,"[ERROR][BlockUniTensor] IDL has more than two ambiguous location!%s","\n"); - - }else{ - cytnx_error_msg(true,"[ERROR] duplication, something wrong!%s","\n"); - - } - } - if(IDL.size()){ - - auto tmpR = Rtn->is_diag()?linalg::Diag(Rtn->_blocks[IDR[0]]):Rtn->_blocks[IDR[0]]; - auto tmpL = this->is_diag()?linalg::Diag(this->_blocks[IDL[0]]):this->_blocks[IDL[0]]; - std::vector shape_L = - vec_concatenate(tmpL.shape(), std::vector(tmpR.shape().size(), 1)); - - tmpL = tmpL.reshape(shape_L); - auto Ott = linalg::Kron(tmpL,tmpR,false,true); - //checking: - cytnx_error_msg(Ott.shape()!=tmp->_blocks[b].shape(),"[ERROR] mismatching shape!%s","\n"); - tmp->_blocks[b] = Ott; - } - - } - - boost::intrusive_ptr out(tmp); - return out; - }else{ - //first, get common index! - - // check qnums & type: - for (int i = 0; i < comm_labels.size(); i++) { - if (User_debug){ - cytnx_error_msg(this->_bonds[comm_idx1[i]].qnums() != rhs->_bonds[comm_idx2[i]].qnums(), - "[ERROR] contract bond @ label %s have qnum mismatch.\n", comm_labels[i].c_str()); - cytnx_error_msg(this->_bonds[comm_idx1[i]].getDegeneracies() != rhs->_bonds[comm_idx2[i]].getDegeneracies(), - "[ERROR] contract bond @ label %s have degeneracies mismatch.\n", comm_labels[i].c_str()); - } - cytnx_error_msg(this->_bonds[comm_idx1[i]].type() + rhs->_bonds[comm_idx2[i]].type(), - "[ERROR] BRA can only contract with KET. invalid @ label: %s\n", - comm_labels[i].c_str()); - } - - // proc meta, labels: - std::vector non_comm_idx1 = - vec_erase(utils_internal::range_cpu(this->rank()), comm_idx1); - std::vector non_comm_idx2 = - vec_erase(utils_internal::range_cpu(rhs->rank()), comm_idx2); - - if ((non_comm_idx1.size() == 0) && (non_comm_idx2.size() == 0)) { - std::vector _shadow_comm_idx1(comm_idx1.size()), _shadow_comm_idx2(comm_idx2.size()); - memcpy(_shadow_comm_idx1.data(),comm_idx1.data(),sizeof(cytnx_int64)*comm_idx1.size()); - memcpy(_shadow_comm_idx2.data(),comm_idx2.data(),sizeof(cytnx_int64)*comm_idx2.size()); - // All the legs are contracted, the return will be a scalar - - // output instance; - DenseUniTensor *tmp = new DenseUniTensor(); - - boost::intrusive_ptr Lperm = this->permute(_shadow_comm_idx1); - boost::intrusive_ptr Rperm = rhs->permute(_shadow_comm_idx2); - - BlockUniTensor *Lperm_raw = (BlockUniTensor*)Lperm.get(); - BlockUniTensor *Rperm_raw = (BlockUniTensor*)Rperm.get(); - - - //pair the block and contract using vectordot! - // naive way! - for(unsigned int b=0;b_blocks.size();b++){ - for(unsigned int a=0;a_blocks.size();a++){ - if(Lperm_raw->_inner_to_outer_idx[b] == Rperm_raw->_inner_to_outer_idx[a]){ - if(tmp->_block.dtype()==Type.Void) - tmp->_block = linalg::Vectordot(Lperm_raw->_blocks[b].flatten(),Rperm_raw->_blocks[a].flatten()); - else - tmp->_block += linalg::Vectordot(Lperm_raw->_blocks[b].flatten(),Rperm_raw->_blocks[a].flatten()); - - // std::cout << b << " " << a << endl; - - - } - } - } - - tmp->_rowrank = 0; - tmp->_is_tag = false; - /* - if(mv_elem_self){ - // calculate reverse mapper: - std::vector inv_mapperL(comm_idx1.size()); - for (int i = 0; i < comm_idx1.size(); i++) { - inv_mapperL[comm_idx1[i]] = i; - } - for(unsigned int b=0;b_blocks.size();b++){ - this->_blocks[b].permute_(comm_idx1); - this->_blocks[b].contiguous_(); - this->_blocks[b].permute_(inv_mapperL); - } - } - - if(mv_elem_rhs){ - BlockUniTensor *Rtn = (BlockUniTensor*)rhs.get(); - // calculate reverse mapper: - std::vector inv_mapperR(comm_idx2.size()); - for (int i = 0; i < comm_idx2.size(); i++) { - inv_mapperR[comm_idx2[i]] = i; - } - for(unsigned int b=0;b_blocks.size();b++){ - Rtn->_blocks[b].permute_(comm_idx2); - Rtn->_blocks[b].contiguous_(); - Rtn->_blocks[b].permute_(inv_mapperR); - } - } - */ - boost::intrusive_ptr out(tmp); - return out; - - - }else{ - //cytnx_error_msg(true,"developing!%s","\n"); - BlockUniTensor *tmp = new BlockUniTensor(); - BlockUniTensor *Rtn = (BlockUniTensor*)rhs.get(); - std::vector out_labels; - std::vector out_bonds; - cytnx_int64 out_rowrank; - - // these two cannot omp parallel, due to intrusive_ptr - for (cytnx_uint64 i = 0; i < non_comm_idx1.size(); i++) - out_bonds.push_back(this->_bonds[non_comm_idx1[i]].clone()); - for (cytnx_uint64 i = 0; i < non_comm_idx2.size(); i++) - out_bonds.push_back(rhs->_bonds[non_comm_idx2[i]].clone()); - - vec_concatenate_(out_labels, vec_clone(this->_labels, non_comm_idx1), - vec_clone(rhs->_labels, non_comm_idx2)); - - out_rowrank = this->rowrank() + rhs->rowrank(); - for (cytnx_uint64 i = 0; i < comm_idx1.size(); i++) - if (comm_idx1[i] < this->_rowrank) out_rowrank--; - for (cytnx_uint64 i = 0; i < comm_idx2.size(); i++) - if (comm_idx2[i] < rhs->_rowrank) out_rowrank--; - - // Initialize!! - if((this->dtype()!=Type.Double and this->dtype()!=Type.ComplexDouble) and - (this->dtype()!=Type.Float and this->dtype()!=Type.ComplexFloat) or - this->is_diag() or Rtn->is_diag()){ - // cout<<"IM IN!!!"<Init(out_bonds,out_labels, out_rowrank, this->dtype(), this->device(), false, false); - } else { - tmp->Init(out_bonds,out_labels, out_rowrank, this->dtype(), this->device(), false, true); - } - - // now, build the itoi table: - std::vector< std::vector > itoiL_common(this->_blocks.size()), itoiR_common(Rtn->_blocks.size()); - // std::vector< std::vector > Bkk; - - for(cytnx_int64 a=0;a_blocks.size();a++){ - itoiL_common[a] = vec_clone(this->_inner_to_outer_idx[a],comm_idx1); - } - - // std::unordered_map, std::vector, VectorHasher> mp; - // std::unordered_map, cytnx_uint64, VectorHasher> mpC; - boost::unordered_map, std::vector > mp; - boost::unordered_map, cytnx_uint64> mpC; - - for(cytnx_int64 b=0;b_blocks.size();b++){ - itoiR_common[b] = vec_clone(Rtn->_inner_to_outer_idx[b],comm_idx2); - if(!mp[itoiR_common[b]].size()) - mp[itoiR_common[b]] = std::vector(1,b); - else mp[itoiR_common[b]].push_back(b); - } - for(cytnx_int64 b=0;b_blocks.size();b++){ - mpC[tmp->_inner_to_outer_idx[b]] = b; - } - - std::vector Lgbuffer; - std::vector itoiR_idx; - std::vector oldshapeL; - std::vector> oldshapeR(Rtn->_blocks.size(),std::vector()); - std::vector> oldshapeC; - // smallvec reshaped(tmp->_blocks.size(),false); - std::vector reshaped(tmp->_blocks.size(),false); - // smallvec calculated(tmp->_blocks.size(),false); - for(cytnx_int64 a=0;a_blocks.size();a++){ - oldshapeC.push_back(tmp->_blocks[a].shape()); - } - // std::vector non_contract_l,non_contract_r; - std::vector mapperL,inv_mapperL(this->_blocks[0].shape().size()); - std::vector mapperR,inv_mapperR(Rtn->_blocks[0].shape().size()); - vec_concatenate_(mapperL, non_comm_idx1, comm_idx1); - vec_concatenate_(mapperR, comm_idx2, non_comm_idx2); - for (int aa = 0; aa < mapperL.size(); aa++) { - inv_mapperL[mapperL[aa]] = aa; - } - for (int aa = 0; aa < mapperR.size(); aa++) { - inv_mapperR[mapperR[aa]] = aa; - } - // std::vector> inv_mapperR(Rtn->_blocks.size(),std::vector(Rtn->_blocks[0].shape().size())); - - if(this->is_diag()!=Rtn->is_diag()){ - for(cytnx_int64 a=0;a_blocks.size();a++){ - cytnx_int64 comm_dim = 1; - itoiR_idx = mp[itoiL_common[a]]; - for(cytnx_uint64 b : itoiR_idx){ - Lgbuffer.resize(non_comm_idx1.size()+non_comm_idx2.size()); - for(cytnx_uint64 cc=0;cc_inner_to_outer_idx[a][non_comm_idx1[cc]]; - } - for(cytnx_uint64 cc=non_comm_idx1.size();cc_inner_to_outer_idx[b][non_comm_idx2[cc-non_comm_idx1.size()]]; - } - // vec_concatenate_(Lgbuffer, vec_clone(this->_inner_to_outer_idx[a],non_comm_idx1) - // , vec_clone(Rtn->_inner_to_outer_idx[b],non_comm_idx2)); - // auto it = std::find(tmp->_inner_to_outer_idx.begin(),tmp->_inner_to_outer_idx.end(),Lgbuffer); - // cytnx_int64 targ_b = it - tmp->_inner_to_outer_idx.begin(); - cytnx_int64 targ_b = mpC[Lgbuffer]; - tmp->_blocks[targ_b] += linalg::Tensordot_dg(this->_blocks[a], Rtn->_blocks[b], comm_idx1, comm_idx2, this->is_diag()); - } - } - }else{ - // smallvec transs(Rtn->_blocks.size(), 'N'); - // smallvec ms(Rtn->_blocks.size(),0),ns(Rtn->_blocks.size(),0),ks(Rtn->_blocks.size(),0); - // smallvec doublealpha(Rtn->_blocks.size(),1.0); - // smallvec doublebeta(Rtn->_blocks.size(),0.0); - // smallvec floatalpha(Rtn->_blocks.size(),1.0); - // smallvec floatbeta(Rtn->_blocks.size(),0.0); - // smallvec complexalpha(Rtn->_blocks.size(),1.0); - // smallvec complexbeta(Rtn->_blocks.size(),0.0); - // smallvec complexalpha_f(Rtn->_blocks.size(),1.0); - // smallvec complexbeta_f(Rtn->_blocks.size(),0.0); - // smallvec LMems(Rtn->_blocks.size(),0),RMems(Rtn->_blocks.size(),0),CMems(Rtn->_blocks.size(),0); - // smallvec group_size(Rtn->_blocks.size(),1); - - std::vector transs(Rtn->_blocks.size(), 'N'); - std::vector ms(Rtn->_blocks.size(),0),ns(Rtn->_blocks.size(),0),ks(Rtn->_blocks.size(),0); - std::vector doublealpha(Rtn->_blocks.size(),1.0); - std::vector doublebeta(Rtn->_blocks.size(),0.0); - std::vector floatalpha(Rtn->_blocks.size(),1.0); - std::vector floatbeta(Rtn->_blocks.size(),0.0); - std::vector complexalpha(Rtn->_blocks.size(),1.0); - std::vector complexbeta(Rtn->_blocks.size(),0.0); - std::vector complexalpha_f(Rtn->_blocks.size(),1.0); - std::vector complexbeta_f(Rtn->_blocks.size(),0.0); - std::vector LMems(Rtn->_blocks.size(),0),RMems(Rtn->_blocks.size(),0),CMems(Rtn->_blocks.size(),0); - std::vector group_size(Rtn->_blocks.size(),1); - - for(cytnx_int64 a=0;a_blocks.size();a++){ - cytnx_int64 comm_dim = 1; - itoiR_idx = mp[itoiL_common[a]]; - for (cytnx_uint64 aa = 0; aa < comm_idx1.size(); aa++) { - comm_dim *= this->_blocks[a].shape()[comm_idx1[aa]]; - } - // vec_concatenate_(mapperL, non_comm_idx1, comm_idx1); - // for (int aa = 0; aa < mapperL.size(); aa++) { - // inv_mapperL[mapperL[aa]] = aa; - // } - this->_blocks[a].permute_(mapperL); - oldshapeL = this->_blocks[a].shape(); - this->_blocks[a].reshape_({-1, comm_dim}); - - for(cytnx_uint64 binx = 0;binx_blocks[b].permute_(mapperR); - // oldshapeR = Rtn->_blocks[b].shape(); - oldshapeR[b] = Rtn->_blocks[b].shape(); - Rtn->_blocks[b].reshape_({comm_dim, -1}); - Lgbuffer.resize(non_comm_idx1.size()+non_comm_idx2.size()); - for(cytnx_uint64 cc=0;cc_inner_to_outer_idx[a][non_comm_idx1[cc]]; - } - for(cytnx_uint64 cc=non_comm_idx1.size();cc_inner_to_outer_idx[b][non_comm_idx2[cc-non_comm_idx1.size()]]; - } - // vec_concatenate_(Lgbuffer, vec_clone(this->_inner_to_outer_idx[a],non_comm_idx1) - // , vec_clone(Rtn->_inner_to_outer_idx[b],non_comm_idx2)); - - // auto it = std::find(tmp->_inner_to_outer_idx.begin(),tmp->_inner_to_outer_idx.end(),Lgbuffer); - // cytnx_int64 targ_b = it - tmp->_inner_to_outer_idx.begin(); - cytnx_int64 targ_b = mpC[Lgbuffer]; - doublebeta[binx]=1.0; - complexbeta[binx]=1.0; - floatbeta[binx]=1.0; - complexbeta_f[binx]=1.0; - if(!reshaped[targ_b]){ - tmp->_blocks[targ_b].reshape_({(cytnx_int64)this->_blocks[a].shape()[0], (cytnx_int64)Rtn->_blocks[b].shape()[1]}); - reshaped[targ_b] = true; - doublebeta[binx]=0.0; - complexbeta[binx]=0.0; - floatbeta[binx]=0.0; - complexbeta_f[binx]=0.0; - // if(tmp->dtype()==Type.Double and this->dtype()==Type.Double and Rtn->dtype()==Type.Double){ - // doublebeta[binx]=0.0; - // }else if(tmp->dtype()==Type.ComplexDouble and this->dtype()==Type.ComplexDouble and Rtn->dtype()==Type.ComplexDouble){ - // complexbeta[binx]=0.0; - // } - } - if((tmp->dtype()==Type.Double and this->dtype()==Type.Double and Rtn->dtype()==Type.Double) or - (tmp->dtype()==Type.ComplexDouble and this->dtype()==Type.ComplexDouble and Rtn->dtype()==Type.ComplexDouble) or - (tmp->dtype()==Type.Float and this->dtype()==Type.Float and Rtn->dtype()==Type.Float) or - (tmp->dtype()==Type.ComplexFloat and this->dtype()==Type.ComplexFloat and Rtn->dtype()==Type.ComplexFloat) - ){ - ms[binx] = this->_blocks[a].shape()[0]; - ns[binx] = Rtn->_blocks[b].shape()[1]; - ks[binx] = comm_dim; - LMems[binx] = this->_blocks[a].storage()._impl->Mem; - RMems[binx] = Rtn->_blocks[b].storage()._impl->Mem; - CMems[binx] = tmp->_blocks[targ_b].storage()._impl->Mem; - // linalg::d_Matmul(this->_blocks[a], Rtn->_blocks[b], tmp->_blocks[targ_b], 1.0, 1.0, false); - } else { - tmp->_blocks[targ_b] += linalg::Matmul(this->_blocks[a], Rtn->_blocks[b]).reshape(tmp->_blocks[targ_b].shape()); - } - // Rtn->_blocks[b].reshape_(oldshapeR); - // Rtn->_blocks[b].permute_(inv_mapperR); - } - - if(tmp->dtype()==Type.Double and this->dtype()==Type.Double and Rtn->dtype()==Type.Double){ - blas_int group_count = itoiR_idx.size(); - // std::vector group_size(group_count,1); - group_size.resize(group_count,1); - dgemm_batch(transs.data(),transs.data(),ns.data(),ms.data(),ks.data(),doublealpha.data(), - (const cytnx_double**)RMems.data(),ns.data(),(const cytnx_double**)LMems.data(), - ks.data(),doublebeta.data(),(cytnx_double**)CMems.data(),ns.data(),&group_count,group_size.data()); - }else if(tmp->dtype()==Type.ComplexDouble and this->dtype()==Type.ComplexDouble and Rtn->dtype()==Type.ComplexDouble){ - blas_int group_count = itoiR_idx.size(); - // std::vector group_size(group_count,1); - group_size.resize(group_count,1); - zgemm_batch(transs.data(),transs.data(),ns.data(),ms.data(),ks.data(),complexalpha.data(), - (const cytnx_complex128**)RMems.data(),ns.data(),(const cytnx_complex128**)LMems.data(), - ks.data(),complexbeta.data(),(cytnx_complex128**)CMems.data(),ns.data(),&group_count,group_size.data()); - }else if(tmp->dtype()==Type.Float and this->dtype()==Type.Float and Rtn->dtype()==Type.Float){ - blas_int group_count = itoiR_idx.size(); - // std::vector group_size(group_count,1); - group_size.resize(group_count,1); - sgemm_batch(transs.data(),transs.data(),ns.data(),ms.data(),ks.data(),floatalpha.data(), - (const cytnx_float**)RMems.data(),ns.data(),(const cytnx_float**)LMems.data(), - ks.data(),floatbeta.data(),(cytnx_float**)CMems.data(),ns.data(),&group_count,group_size.data()); - }else if(tmp->dtype()==Type.ComplexFloat and this->dtype()==Type.ComplexFloat and Rtn->dtype()==Type.ComplexFloat){ - blas_int group_count = itoiR_idx.size(); - // std::vector group_size(group_count,1); - group_size.resize(group_count,1); - cgemm_batch(transs.data(),transs.data(),ns.data(),ms.data(),ks.data(),complexalpha_f.data(), - (const cytnx_complex64**)RMems.data(),ns.data(),(const cytnx_complex64**)LMems.data(), - ks.data(),complexbeta_f.data(),(cytnx_complex64**)CMems.data(),ns.data(),&group_count,group_size.data()); - } - - for(cytnx_uint64 binx = 0;binx_blocks[b].reshape_(oldshapeR[b]); - Rtn->_blocks[b].permute_(inv_mapperR); - } - - this->_blocks[a].reshape_(oldshapeL); - this->_blocks[a].permute_(inv_mapperL); - } - - for(cytnx_int64 a=0;a_blocks.size();a++){ - tmp->_blocks[a].reshape_(oldshapeC[a]); - if(!reshaped[a]){ - // cout<<"IM ININININ"<_blocks[a].storage().print_info(); - // tmp->_blocks[a].storage().print(); - tmp->_blocks[a].storage().set_zeros(); - // cout<<"-----------"<_blocks[a].storage().print_info(); - // tmp->_blocks[a].storage().print(); - // cout<<"IM OUTOUTOUT"< out(tmp); - return out; - - - - } // does it contract all the bond? - - cytnx_error_msg(true,"something wrong!%s","\n"); - - } // does it contract all the bond? - - - - }; - - - void BlockUniTensor::Transpose_(){ - // modify tag - for (int i = 0; i < this->bonds().size(); i++) { - this->bonds()[i].redirect_(); - // this->bonds()[i].qnums() = this->bonds()[i].calc_reverse_qnums(); - } - - }; - - void BlockUniTensor::normalize_(){ - Scalar out(0,this->dtype()); - for(auto &block: this->_blocks){ - out += Scalar(linalg::Pow(linalg::Norm(block),2).item()); - } - out = sqrt(out); - for(auto &block: this->_blocks){ - block/=out; - } - }; - - void BlockUniTensor::Trace_(const cytnx_int64 &a, const cytnx_int64 &b, const bool &by_label){ - - // 1) from label to indx. - cytnx_int64 ida, idb; - - if (by_label) { - ida = vec_where(this->_labels, std::to_string(a)); - idb = vec_where(this->_labels, std::to_string(b)); - } else { - cytnx_error_msg(a < 0 || b < 0, "[ERROR] invalid index a, b%s", "\n"); - cytnx_error_msg(a >= this->rank() || b >= this->rank(), "[ERROR] index out of bound%s", "\n"); - ida = a; - idb = b; - } - - this->Trace_(ida,idb); - - } - - void BlockUniTensor::Trace_(const std::string &a, const std::string &b){ - // 1) from label to indx. - cytnx_int64 ida, idb; - - ida = vec_where(this->_labels, a); - idb = vec_where(this->_labels, b); - - this->Trace_(ida,idb); - } - void BlockUniTensor::Trace_(const cytnx_int64 &a, const cytnx_int64 &b){ - - cytnx_int64 ida = a; - cytnx_int64 idb = b; - - // check if indices are the same: - cytnx_error_msg(a < 0 || b < 0, "[ERROR] invalid index a, b%s", "\n"); - cytnx_error_msg(a >= this->rank() || b >= this->rank(), "[ERROR] index out of bound%s", "\n"); - - cytnx_error_msg(ida == idb, - "[ERROR][BlockUniTensor::Trace_] index a and index b should not be the same.%s", - "\n"); - - // check if two bonds type are contractable: - cytnx_error_msg(this->_bonds[ida].type() == this->_bonds[idb].type(),"[ERROR] BD_BRA/BD_OUT can only contract with BD_KET/BD_IN%s","\n"); - - // check if two bonds dimension matches: - cytnx_error_msg( - this->_bonds[ida]._impl->_degs != this->_bonds[idb]._impl->_degs, - "[ERROR][BlockUniTensor::Trace_] The dimension of two bond for trace does not match!%s", - "\n"); - - // check if two bonds qnum matches: - cytnx_error_msg( - this->_bonds[ida]._impl->_qnums != this->_bonds[idb]._impl->_qnums, - "[ERROR][BlockUniTensor::Trace_] The quantum numbers of two bond for trace does not match!%s", - "\n"); - - - // update rowrank: - cytnx_int64 tmpRk = this->_rowrank; - if (ida < tmpRk) this->_rowrank--; - if (idb < tmpRk) this->_rowrank--; - - // 1) remove the bond, labels: - if (ida > idb) std::swap(ida, idb); - this->_bonds.erase(this->_bonds.begin() + idb); - this->_bonds.erase(this->_bonds.begin() + ida); - this->_labels.erase(this->_labels.begin() + idb); - this->_labels.erase(this->_labels.begin() + ida); - - //trace the block! - std::vector new_blocks; - vec2d new_itoi; - if(this->_labels.size()==0){ - // if there is no leg left, leaving only one block, and let API to handle the BlockUniTensor->DenseUniTensor! - new_blocks.push_back(zeros(1,this->dtype(),this->device())); - for(cytnx_int64 i=0;i_blocks.size();i++){ - if(this->_inner_to_outer_idx[i][ida] == this->_inner_to_outer_idx[i][idb]){ - if(this->is_diag()) new_blocks.back()+=linalg::Sum(this->_blocks[i]); - else new_blocks.back() += this->_blocks[i].Trace(ida,idb); - } - } - - }else{ - std::map , cytnx_uint64> tmap; - std::map , cytnx_uint64>::iterator itr; - for(cytnx_int64 i=0;i_blocks.size();i++){ - //std::cout << "blk: " << i << std::endl; - if(this->_inner_to_outer_idx[i][ida] == this->_inner_to_outer_idx[i][idb]){ - auto s = this->_inner_to_outer_idx[i]; - s.erase(s.begin() + idb); - s.erase(s.begin() + ida); - auto itr = tmap.find(s); - if(itr!=tmap.end()) - new_blocks[itr->second] += this->_blocks[i].Trace(ida,idb); - else{ - tmap[s] = new_blocks.size(); - new_blocks.push_back(this->_blocks[i].Trace(ida,idb)); - new_itoi.push_back(s); - } - } - } - - } - - this->_blocks = new_blocks; - this->_inner_to_outer_idx = new_itoi; - - } - - - Tensor BlockUniTensor::Norm() const{ - Scalar t; - if (this->_blocks.size()) { - t = linalg::Norm(this->_blocks[0]).item(); - t *= t; - for (int blk = 1; blk < this->_blocks.size(); blk++) { - Scalar tmp = linalg::Norm(this->_blocks[blk]).item(); - t += tmp * tmp; - } - - } else { - t = Scalar(0, Type.Double); - } - - t = sqrt(t); - Tensor R({1}, t.dtype()); - - R(0) = t; - return R; - } - - - // helper function: - void BlockUniTensor::_fx_locate_elem(cytnx_int64 &bidx, std::vector &loc_in_T,const std::vector &locator) const { - // 1. check if out of range: - cytnx_error_msg(locator.size() != this->_bonds.size(), - "[ERROR] len(locator) does not match the rank of tensor.%s", "\n"); - - - for (int i = 0; i < this->_bonds.size(); i++) { - cytnx_error_msg(locator[i] >= this->_bonds[i].dim(), - "[ERROR][BlockUniTensor][elem_exists] locator @index: %d out of range.\n", - i); - } - - // 2. calculate the location is in which qindices: - if(this->is_diag()){ - if(locator[0]!=locator[1]) bidx = -1; - else{ - loc_in_T.push_back(locator[0]); - std::vector qindices(2); - // its diag, so we can just use single bond! - for(int d=0;d_bonds[0]._impl->_degs.size();d++){ - if(loc_in_T[0] >= this->_bonds[0]._impl->_degs[d]) loc_in_T[0] -= this->_bonds[0]._impl->_degs[d]; - else{qindices[0] = qindices[1] = d; break;} - } - auto it = std::find(this->_inner_to_outer_idx.begin(),this->_inner_to_outer_idx.end(),qindices); - if(it == this->_inner_to_outer_idx.end()) bidx = -1; - else bidx = it - this->_inner_to_outer_idx.begin(); - - } - - }else{ - loc_in_T = locator; - std::vector qindices(loc_in_T.size()); - for(int i=0;i_bonds.size();i++){ - for(int d=0;d_bonds[i]._impl->_degs.size();d++){ - if(loc_in_T[i] >= this->_bonds[i]._impl->_degs[d]) loc_in_T[i] -= this->_bonds[i]._impl->_degs[d]; - else{qindices[i] = d; break;} - } - } - - auto it = std::find(this->_inner_to_outer_idx.begin(),this->_inner_to_outer_idx.end(),qindices); - - if(it == this->_inner_to_outer_idx.end()) bidx = -1; - else bidx = it - this->_inner_to_outer_idx.begin(); - } - } - - - - bool BlockUniTensor::elem_exists(const std::vector &locator) const{ - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return !(bidx < 0); - } - - //------------------------------------------- - // at_for_sparse - Scalar::Sproxy BlockUniTensor::at_for_sparse(const std::vector &locator){ - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - if(bidx<0){ - return Scalar::Sproxy(this->NullRefTensor.storage()._impl,0); - }else{ - return this->_blocks[bidx].at(loc_in_T); - } - } - cytnx_complex128 &BlockUniTensor::at_for_sparse(const std::vector &locator, - const cytnx_complex128 &aux){ - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - cytnx_complex64 &BlockUniTensor::at_for_sparse(const std::vector &locator, - const cytnx_complex64 &aux){ - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - - } - cytnx_double &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_double &aux){ - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - - } - cytnx_float &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_float &aux){ - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - cytnx_uint64 &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_uint64 &aux){ - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - cytnx_int64 &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_int64 &aux){ - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - cytnx_uint32 &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_uint32 &aux){ - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - cytnx_int32 &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_int32 &aux){ - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - cytnx_uint16 &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_uint16 &aux){ - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - cytnx_int16 &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_int16 &aux){ - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - - - const Scalar::Sproxy BlockUniTensor::at_for_sparse(const std::vector &locator) const{ - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - if(bidx<0){ - return Scalar::Sproxy(this->NullRefTensor.storage()._impl,0); - }else{ - return this->_blocks[bidx].at(loc_in_T); - } - } - const cytnx_complex128 &BlockUniTensor::at_for_sparse(const std::vector &locator, - const cytnx_complex128 &aux)const { - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - const cytnx_complex64 &BlockUniTensor::at_for_sparse(const std::vector &locator, - const cytnx_complex64 &aux)const { - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - - } - const cytnx_double &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_double &aux)const { - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - - } - const cytnx_float &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_float &aux)const { - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - const cytnx_uint64 &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_uint64 &aux)const { - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - const cytnx_int64 &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_int64 &aux)const { - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - const cytnx_uint32 &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_uint32 &aux)const { - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - const cytnx_int32 &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_int32 &aux)const { - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - const cytnx_uint16 &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_uint16 &aux)const { - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - const cytnx_int16 &BlockUniTensor::at_for_sparse(const std::vector &locator, const cytnx_int16 &aux)const { - cytnx_int64 bidx; - std::vector loc_in_T; - this->_fx_locate_elem(bidx,loc_in_T,locator); - return this->_blocks[bidx].at(loc_in_T); - } - - - void BlockUniTensor::_save_dispatch(std::fstream &f) const { - // cytnx_error_msg(true,"[ERROR] Save for SparseUniTensor is under developing!!%s","\n"); - - cytnx_uint64 Nblocks = this->_blocks.size(); - f.write((char *)&Nblocks, sizeof(cytnx_uint64)); - - // save inner_to_outer_idx: - for(unsigned int b=0;b_inner_to_outer_idx[b][0],sizeof(cytnx_uint64)*this->_bonds.size()); - } - for (unsigned int i = 0; i < this->_blocks.size(); i++) { - this->_blocks[i]._Save(f); - } - } - - void BlockUniTensor::_load_dispatch(std::fstream &f) { - // cytnx_error_msg(true,"[ERROR] Save for SparseUniTensor is under developing!!%s","\n"); - - cytnx_uint64 Nblocks; - f.read((char *)&Nblocks, sizeof(cytnx_uint64)); - - this->_inner_to_outer_idx = std::vector< std::vector >(Nblocks,std::vector(this->_bonds.size())); - // read inner_to_outer_idx: - for(unsigned int b=0;b_inner_to_outer_idx[b][0],sizeof(cytnx_uint64)*this->_bonds.size()); - } - this->_blocks.resize(Nblocks); - - for (unsigned int i = 0; i < this->_blocks.size(); i++) { - this->_blocks[i]._Load(f); - } - } - - - void BlockUniTensor::truncate_(const cytnx_int64 &bond_idx, const cytnx_uint64 &q_index, - const bool &by_label){ - - cytnx_error_msg(this->is_diag(),"[ERROR][BlockUniTensor][truncate_] cannot use truncate_ when is_diag() = true.%s","\n"); - cytnx_int64 bidx = bond_idx; - if(by_label){ - auto it = std::find(this->_labels.begin(), this->_labels.end(), to_string(bond_idx)); - cytnx_error_msg(it == this->_labels.end(), - "[ERROR] label [%d] does not exist in current UniTensor.\n", bond_idx); - bidx = it - this->_labels.begin(); - } - - cytnx_error_msg((bidx>=this->_labels.size())|| (bidx < 0), "[ERROR][BlockUniTensor][truncate_] bond_idx out of bound.%s","\n"); - cytnx_error_msg(q_index >= this->_bonds[bidx].qnums().size(), "[ERROR][BlockUniTensor][truncate_] q_index out of bound @ specify Bond @[%d].\n",bidx); - - cytnx_error_msg(this->_bonds[bidx].qnums().size()==1,"[ERROR][BlockUniTensor][truncate_] cannot remove the only qnums on a given Bond!%s","\n"); - - this->_bonds[bidx]._impl->_rm_qnum(q_index); - - //traversal all blocks, find all blocks that need to remove: - std::vector locs; - for(cytnx_int64 b=0;b_blocks.size();b++){ - if(this->_inner_to_outer_idx[b][bidx] == q_index) locs.push_back(b); - } - - //remove! - vec_erase_(this->_inner_to_outer_idx,locs); - vec_erase_(this->_blocks,locs); - - - - } - void BlockUniTensor::truncate_(const std::string &bond_idx, const cytnx_uint64 &q_index){ - auto it = std::find(this->_labels.begin(), this->_labels.end(), bond_idx); - cytnx_error_msg(it == this->_labels.end(), - "[ERROR] label [%s] does not exist in current UniTensor.\n", bond_idx.c_str()); - - cytnx_int64 idx = it - this->_labels.begin(); - this->truncate_(idx,q_index,false); - } - void BlockUniTensor::truncate_(const cytnx_int64 &bond_idx, const cytnx_uint64 &q_index){ - this->truncate_(bond_idx,q_index,false); - } - - - - void BlockUniTensor::Mul_(const Scalar &rhs) { - // cytnx_error_msg(true,"[ERROR] cannot perform arithmetic on all tagged tensor, @spase - // unitensor%s","\n"); - for (cytnx_int64 i = 0; i < this->_blocks.size(); i++) { - this->_blocks[i] *= rhs; - } - } - - void BlockUniTensor::Div_(const Scalar &rhs) { - // cytnx_error_msg(true,"[ERROR] cannot perform arithmetic on all tagged tensor, @spase - // unitensor%s","\n"); - for (cytnx_int64 i = 0; i < this->_blocks.size(); i++) { - this->_blocks[i] /= rhs; - } - } - - - void BlockUniTensor::Add_(const boost::intrusive_ptr &rhs){ - //checking Type: - cytnx_error_msg(rhs->uten_type()!=UTenType.Block,"[ERROR] cannot add two UniTensor with different type/format.%s","\n"); - - BlockUniTensor* Rtn = (BlockUniTensor*)rhs.get(); - - // 1) check each bond. - cytnx_error_msg(this->_bonds.size()!=Rtn->_bonds.size(),"[ERROR] cannot add two BlockUniTensor with different rank!%s","\n"); - for(cytnx_int64 i=0;i_bonds.size();i++){ - cytnx_error_msg(this->_bonds[i] != Rtn->_bonds[i],"[ERROR] Bond @ index: %d does not match. Therefore cannot perform Add of two UniTensor\n",i); - } - - cytnx_error_msg(this->is_diag()!=Rtn->is_diag(),"[ERROR] cannot add BlockUniTensor with is_diag=true and is_diag=false.%s","\n"); - - // 2) finding the blocks (they might be not in the same order! - for(cytnx_int64 b=0;b_blocks.size();b++){ - for(cytnx_int64 a=0;a_blocks.size();a++){ - if(this->_inner_to_outer_idx[b] == Rtn->_inner_to_outer_idx[(b+a)%Rtn->_blocks.size()]){ - this->_blocks[b] += Rtn->_blocks[(b+a)%Rtn->_blocks.size()]; - break; - } - } - } - - } - - void BlockUniTensor::Mul_(const boost::intrusive_ptr &rhs){ - //checking Type: - cytnx_error_msg(rhs->uten_type()!=UTenType.Block,"[ERROR] cannot add two UniTensor with different type/format.%s","\n"); - - BlockUniTensor* Rtn = (BlockUniTensor*)rhs.get(); - - // 1) check each bond. - cytnx_error_msg(this->_bonds.size()!=Rtn->_bonds.size(),"[ERROR] cannot add two BlockUniTensor with different rank!%s","\n"); - for(cytnx_int64 i=0;i_bonds.size();i++){ - cytnx_error_msg(this->_bonds[i] != Rtn->_bonds[i],"[ERROR] Bond @ index: %d does not match. Therefore cannot perform Add of two UniTensor\n",i); - } - - cytnx_error_msg(this->is_diag()!=Rtn->is_diag(),"[ERROR] cannot add BlockUniTensor with is_diag=true and is_diag=false.%s","\n"); - - // 2) finding the blocks (they might be not in the same order! - for(cytnx_int64 b=0;b_blocks.size();b++){ - for(cytnx_int64 a=0;a_blocks.size();a++){ - if(this->_inner_to_outer_idx[b] == Rtn->_inner_to_outer_idx[(b+a)%Rtn->_blocks.size()]){ - this->_blocks[b] *= Rtn->_blocks[(b+a)%Rtn->_blocks.size()]; - break; - } - } - } - - } - - void BlockUniTensor::Sub_(const boost::intrusive_ptr &rhs){ - //checking Type: - cytnx_error_msg(rhs->uten_type()!=UTenType.Block,"[ERROR] cannot add two UniTensor with different type/format.%s","\n"); - - BlockUniTensor* Rtn = (BlockUniTensor*)rhs.get(); - - // 1) check each bond. - cytnx_error_msg(this->_bonds.size()!=Rtn->_bonds.size(),"[ERROR] cannot add two BlockUniTensor with different rank!%s","\n"); - for(cytnx_int64 i=0;i_bonds.size();i++){ - cytnx_error_msg(this->_bonds[i] != Rtn->_bonds[i],"[ERROR] Bond @ index: %d does not match. Therefore cannot perform Add of two UniTensor\n",i); - } - - cytnx_error_msg(this->is_diag()!=Rtn->is_diag(),"[ERROR] cannot add BlockUniTensor with is_diag=true and is_diag=false.%s","\n"); - - // 2) finding the blocks (they might be not in the same order! - for(cytnx_int64 b=0;b_blocks.size();b++){ - for(cytnx_int64 a=0;a_blocks.size();a++){ - if(this->_inner_to_outer_idx[b] == Rtn->_inner_to_outer_idx[(b+a)%Rtn->_blocks.size()]){ - this->_blocks[b] -= Rtn->_blocks[(b+a)%Rtn->_blocks.size()]; - break; - } - } - } - - } - - void BlockUniTensor::_fx_group_duplicates(const std::vector &dup_bond_idxs, const std::vector > &idx_mappers){ - - //checking the bonds that are duplicates - //auto mod_idxs = dup_bond_idxs; std::sort(mod_idx.begin(),mod_idx.end()); - - //generating new inner_to_outer_idx: - std::vector > tmp_inner_to_outer_idx; - - - - //process one by one: - for(cytnx_int64 bn=0;bn_inner_to_outer_idx; - - for(cytnx_int64 i=0;i_inner_to_outer_idx.size();i++){ - tmp_inner_to_outer_idx[i][dup_bond_idxs[bn]] = idx_mappers[bn][ this->_inner_to_outer_idx[i][dup_bond_idxs[bn]] ]; - } - - std::vector mask(this->_blocks.size()); - std::vector new_blocks; - std::vector > new_inner_to_outer_idx; - - std::vector no_combine; // same for each bond! - for(cytnx_uint64 i=0;irank();i++){ - if(i!=dup_bond_idxs[bn]) no_combine.push_back(i); - } - - for(cytnx_int64 b=0;b_blocks.size();b++){ - if(mask[b]==1) continue; - mask[b] = 1; - new_blocks.push_back(this->_blocks[b]); - new_inner_to_outer_idx.push_back(tmp_inner_to_outer_idx[b]); - for(cytnx_int64 a=b+1;a_blocks.size();a++){ - if(mask[a]==1) continue; - if(tmp_inner_to_outer_idx[a] == tmp_inner_to_outer_idx[b]){ - // need to combine two! - // checking which bonds does not need to combine! - mask[a] = 1; - /* - std::cout << "CALL DS:\n"; - std::cout << no_combine << std::endl; - std::cout << "targ: old/new itoi:\n"; - std::cout << this->_inner_to_outer_idx[b] << std::endl; - std::cout << tmp_inner_to_outer_idx[b] << std::endl; - std::cout << "----------\n" << std::endl; - std::cout << "src: old/new itoi:\n"; - std::cout << this->_inner_to_outer_idx[a] << std::endl; - std::cout << tmp_inner_to_outer_idx[a] << std::endl; - std::cout << "----------\n" << std::endl; - std::cout << new_blocks.back().shape() << std::endl; - std::cout << this->_blocks[a].shape() << std::endl; - std::cout << "=============\n" << std::endl; - */ - new_blocks.back() = linalg::Directsum(new_blocks.back(),this->_blocks[a],no_combine); - - } - - } - }// traversal each block! - - this->_blocks = new_blocks; - this->_inner_to_outer_idx = new_inner_to_outer_idx; - - } - - } - - void BlockUniTensor::group_basis_(){ - - std::vector has_dup; - std::vector > idx_mappers; - for(cytnx_uint64 i=0;i_bonds.size();i++){ - if(this->_bonds[i].has_duplicate_qnums()){ - has_dup.push_back(i); - idx_mappers.push_back(this->_bonds[i].group_duplicates_()); - } - } - - - // this modify _inner_to_outer_idx and blocks! - this->_fx_group_duplicates(has_dup,idx_mappers); - - } - - - void BlockUniTensor::combineBonds(const std::vector &indicators, - const bool &force) { - cytnx_error_msg(this->is_diag(),"[ERROR][BlockUniTensor] cannot combineBonds when is_diag = true!%s","\n"); - - cytnx_error_msg(indicators.size() < 2, "[ERROR] the number of bonds to combine must be > 1%s", - "\n"); - std::vector::iterator it; - std::vector idx_mapper; idx_mapper.reserve(this->rank()); - //std::vector new_shape_aft_perm; new_shape_aft_perm.reserve(this->rank()-indicators.size()+1); - - //idx_mapper = std::vector(indicators.begin(), indicators.end()); - - cytnx_error_msg(this->_is_diag, - "[ERROR] cannot combineBond on a is_diag=True UniTensor. suggestion: try " - "UniTensor.to_dense()/to_dense_() first.%s [NOTE] this is BlockUniTensor, so currently under developing!\n", - "\n"); - - - //get the mapper: - int cnt = 0; - int idor; - for(int i=0;irank();i++){ - if(cnt==indicators.size()){ - idx_mapper.push_back(i); - //new_shape_aft_perm.push_back(0); - }else{ - if(std::find(indicators.begin(),indicators.end(),i)==indicators.end()){ - idx_mapper.push_back(i); - //new_shape_aft_perm.push_back(0); - }else{ - if(i==indicators[0]){ - //new_shape_aft_perm.push_back(-1); - idor = idx_mapper.size(); //new_shape_aft_perm.size(); - for(int j=0;jpermute_(idx_mapper); - this->contiguous_(); - - //group bonds: - std::vector new_bonds; - std::vector cb_stride(indicators.size()); - //std::cout << "idor" << idor << std::endl; - //std::cout << "rank" << this->rank() << std::endl; - for(int i=0;irank();i++){ - if(i==idor){ - Bond tmp = this->_bonds[i]; - cb_stride[0] = this->_bonds[i].qnums().size(); - for(int j=1;j_bonds[i+j].qnums().size(); - if(force) tmp._impl->force_combineBond_(this->_bonds[i+j]._impl,false); // no grouping - else tmp.combineBond_(this->_bonds[i+j],false); // no grouping - } - new_bonds.push_back(tmp); - i += indicators.size()-1; - - }else{ - new_bonds.push_back(this->_bonds[i]); - } - } - - // remove labels: - this->_labels.erase(this->_labels.begin()+idor+1,this->_labels.begin()+idor+1+indicators.size()-1); - this->_bonds = new_bonds; - - - //reshape each blocks, and update_inner_to_outer_idx: - //process stride: - memcpy(&cb_stride[0],&cb_stride[1],sizeof(cytnx_uint64)*(cb_stride.size()-1)); - // for(int i=cb_stride.size()-2;i>=0;i--){ - // cb_stride[i] = cb_stride[i+1]; - // } - cb_stride.back()=1; - for(int i=cb_stride.size()-2;i>=0;i--){ - cb_stride[i]*=cb_stride[i+1]; - } - - std::vector new_shape; new_shape.reserve(this->rank()); - for(int b=0;b_blocks.size();b++){ - new_shape.clear(); - for(int i=0;i_blocks[b].shape().size();i++){ - if(i==idor){ - i+=indicators.size()-1; - new_shape.push_back(-1); - }else{ - new_shape.push_back(this->_blocks[b].shape()[i]); - } - } - this->_blocks[b].reshape_(new_shape); - } - - // cout<<"AAAAAAAAAAAAAAAAAAAAAAA"<get_qindices(2)<bonds()<_blocks.size();b++){ - this->_inner_to_outer_idx[b][idor] *= cb_stride[0]; - for(int i=idor+1;i_inner_to_outer_idx[b][idor]+= this->_inner_to_outer_idx[b][i] * cb_stride[i-idor]; - } - if(idor+indicators.size()_inner_to_outer_idx[b].size()){ - memcpy(&this->_inner_to_outer_idx[b][idor+1],&this->_inner_to_outer_idx[b][idor+indicators.size()],sizeof(cytnx_uint64)*(this->_inner_to_outer_idx[b].size()-idor-indicators.size())); - } - this->_inner_to_outer_idx[b].resize(this->rank()); - } - //std::cout << this->_inner_to_outer_idx << std::endl; - - //check rowrank: - if(this->_rowrank >= this->rank()) this->_rowrank = this->rank(); - - this->_is_braket_form = this->_update_braket(); - - // cout<<"BBBBBBBBBBBBBBBBBBBBBBB"<get_qindices(2)<bonds()<group_basis_(); - } - - - void BlockUniTensor::combineBonds(const std::vector &indicators, - const bool &force) { - cytnx_error_msg(indicators.size() < 2, "[ERROR] the number of bonds to combine must be > 1%s", - "\n"); - std::vector::iterator it; - std::vector idx_mapper; - // find the index of label: - for (cytnx_uint64 i = 0; i < indicators.size(); i++) { - it = std::find(this->_labels.begin(), this->_labels.end(), indicators[i]); - cytnx_error_msg(it == this->_labels.end(), "[ERROR] labels not found in current UniTensor%s", - "\n"); - idx_mapper.push_back(std::distance(this->_labels.begin(), it)); - } - this->combineBonds(idx_mapper,force); - } - - void BlockUniTensor::combineBonds(const std::vector &indicators, - const bool &force, const bool &by_label) { - cytnx_error_msg(indicators.size() < 2, "[ERROR] the number of bonds to combine must be > 1%s", - "\n"); - std::vector::iterator it; - std::vector idx_mapper; - if (by_label) { - // find the index of label: - for (cytnx_uint64 i = 0; i < indicators.size(); i++) { - it = std::find(this->_labels.begin(), this->_labels.end(), std::to_string(indicators[i])); - cytnx_error_msg(it == this->_labels.end(), - "[ERROR] labels not found in current UniTensor%s", "\n"); - idx_mapper.push_back(std::distance(this->_labels.begin(), it)); - } - - } else { - idx_mapper = indicators; - } - this->combineBonds(idx_mapper,force); - - } - - - -} // namespace cytnx diff --git a/src/SparseUniTensor.cpp b/src/SparseUniTensor.cpp index 2382ecd8..70d734ab 100644 --- a/src/SparseUniTensor.cpp +++ b/src/SparseUniTensor.cpp @@ -1,7 +1,6 @@ #include "UniTensor.hpp" #include "Accessor.hpp" #include "utils/utils.hpp" -#include "utils/utils_internal_interface.hpp" #include "linalg.hpp" #include "Generator.hpp" #include diff --git a/src/Tensor.old.cpp b/src/Tensor.old.cpp deleted file mode 100644 index d83e1a38..00000000 --- a/src/Tensor.old.cpp +++ /dev/null @@ -1,1390 +0,0 @@ -#include -#include "Tensor.hpp" -#include "utils/utils_internal_interface.hpp" -#include "linalg.hpp" -#include "utils/is.hpp" -#include "Type.hpp" -using namespace std; - -namespace cytnx { - - //---------------------------------------------- - // Tproxy - - Tensor Tensor::Tproxy::operator+=(const Tensor::Tproxy &rc) { - Tensor self; - self._impl = _insimpl->get(_accs); - // self += Tensor(rc); - cytnx::linalg::iAdd(self, Tensor(rc)); - - _insimpl->set(_accs, self._impl); - self._impl = this->_insimpl; - return self; - } - Tensor Tensor::Tproxy::operator-=(const Tensor::Tproxy &rc) { - Tensor self; - self._impl = _insimpl->get(_accs); - // self += Tensor(rc); - cytnx::linalg::iSub(self, Tensor(rc)); - - _insimpl->set(_accs, self._impl); - self._impl = this->_insimpl; - return self; - } - Tensor Tensor::Tproxy::operator/=(const Tensor::Tproxy &rc) { - Tensor self; - self._impl = _insimpl->get(_accs); - // self += Tensor(rc); - cytnx::linalg::iDiv(self, Tensor(rc)); - - _insimpl->set(_accs, self._impl); - self._impl = this->_insimpl; - return self; - } - Tensor Tensor::Tproxy::operator*=(const Tensor::Tproxy &rc) { - Tensor self; - self._impl = _insimpl->get(_accs); - // self += Tensor(rc); - cytnx::linalg::iMul(self, Tensor(rc)); - - _insimpl->set(_accs, self._impl); - self._impl = this->_insimpl; - return self; - } - - // ADD - Tensor Tensor::Tproxy::operator+( - const cytnx_complex128 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Add(rc); - } - Tensor Tensor::Tproxy::operator+( - const cytnx_complex64 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Add(rc); - } - Tensor Tensor::Tproxy::operator+( - const cytnx_double &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Add(rc); - } - Tensor Tensor::Tproxy::operator+( - const cytnx_float &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Add(rc); - } - Tensor Tensor::Tproxy::operator+( - const cytnx_uint64 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Add(rc); - } - Tensor Tensor::Tproxy::operator+( - const cytnx_int64 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Add(rc); - } - Tensor Tensor::Tproxy::operator+( - const cytnx_uint32 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Add(rc); - } - Tensor Tensor::Tproxy::operator+( - const cytnx_int32 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Add(rc); - } - Tensor Tensor::Tproxy::operator+( - const cytnx_uint16 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Add(rc); - } - Tensor Tensor::Tproxy::operator+( - const cytnx_int16 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Add(rc); - } - Tensor Tensor::Tproxy::operator+( - const cytnx_bool &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Add(rc); - } - - Tensor Tensor::Tproxy::operator+(const Tproxy &rc) const { - Tensor out; - out._impl = _insimpl->get(_accs); - return cytnx::linalg::Add(out, Tensor(rc)); - } - - // SUB: - Tensor Tensor::Tproxy::operator-( - const cytnx_complex128 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Sub(rc); - } - Tensor Tensor::Tproxy::operator-( - const cytnx_complex64 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Sub(rc); - } - Tensor Tensor::Tproxy::operator-( - const cytnx_double &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Sub(rc); - } - Tensor Tensor::Tproxy::operator-( - const cytnx_float &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Sub(rc); - } - Tensor Tensor::Tproxy::operator-( - const cytnx_uint64 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Sub(rc); - } - Tensor Tensor::Tproxy::operator-( - const cytnx_int64 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Sub(rc); - } - Tensor Tensor::Tproxy::operator-( - const cytnx_uint32 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Sub(rc); - } - Tensor Tensor::Tproxy::operator-( - const cytnx_int32 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Sub(rc); - } - Tensor Tensor::Tproxy::operator-( - const cytnx_uint16 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Sub(rc); - } - Tensor Tensor::Tproxy::operator-( - const cytnx_int16 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Sub(rc); - } - Tensor Tensor::Tproxy::operator-( - const cytnx_bool &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Sub(rc); - } - Tensor Tensor::Tproxy::operator-(const Tproxy &rc) const { - Tensor out; - out._impl = _insimpl->get(_accs); - return cytnx::linalg::Sub(out, Tensor(rc)); - } - Tensor Tensor::Tproxy::operator-() const { - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Mul(-1); - } - - // MUL - Tensor Tensor::Tproxy::operator*( - const cytnx_complex128 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Mul(rc); - } - Tensor Tensor::Tproxy::operator*( - const cytnx_complex64 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Mul(rc); - } - Tensor Tensor::Tproxy::operator*( - const cytnx_double &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Mul(rc); - } - Tensor Tensor::Tproxy::operator*( - const cytnx_float &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Mul(rc); - } - Tensor Tensor::Tproxy::operator*( - const cytnx_uint64 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Mul(rc); - } - Tensor Tensor::Tproxy::operator*( - const cytnx_int64 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Mul(rc); - } - Tensor Tensor::Tproxy::operator*( - const cytnx_uint32 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Mul(rc); - } - Tensor Tensor::Tproxy::operator*( - const cytnx_int32 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Mul(rc); - } - Tensor Tensor::Tproxy::operator*( - const cytnx_uint16 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Mul(rc); - } - Tensor Tensor::Tproxy::operator*( - const cytnx_int16 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Mul(rc); - } - Tensor Tensor::Tproxy::operator*( - const cytnx_bool &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Mul(rc); - } - Tensor Tensor::Tproxy::operator*(const Tproxy &rc) const { - Tensor out; - out._impl = _insimpl->get(_accs); - return cytnx::linalg::Mul(out, Tensor(rc)); - } - - // DIV - Tensor Tensor::Tproxy::operator/( - const cytnx_complex128 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Div(rc); - } - Tensor Tensor::Tproxy::operator/( - const cytnx_complex64 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Div(rc); - } - Tensor Tensor::Tproxy::operator/( - const cytnx_double &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Div(rc); - } - Tensor Tensor::Tproxy::operator/( - const cytnx_float &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Div(rc); - } - Tensor Tensor::Tproxy::operator/( - const cytnx_uint64 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Div(rc); - } - Tensor Tensor::Tproxy::operator/( - const cytnx_int64 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Div(rc); - } - Tensor Tensor::Tproxy::operator/( - const cytnx_uint32 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Div(rc); - } - Tensor Tensor::Tproxy::operator/( - const cytnx_int32 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Div(rc); - } - Tensor Tensor::Tproxy::operator/( - const cytnx_uint16 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Div(rc); - } - Tensor Tensor::Tproxy::operator/( - const cytnx_int16 &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Div(rc); - } - Tensor Tensor::Tproxy::operator/( - const cytnx_bool &rc) const { //{return this->_operatorADD(rc);}; - Tensor out; - out._impl = _insimpl->get(_accs); - return out.Div(rc); - } - Tensor Tensor::Tproxy::operator/(const Tproxy &rc) const { - Tensor out; - out._impl = _insimpl->get(_accs); - return cytnx::linalg::Div(out, Tensor(rc)); - } - - //----------------------------------------------- - void Tensor_impl::Init(const std::vector &shape, const unsigned int &dtype, - int device, const bool &init_zero) { - // check: - cytnx_error_msg(dtype >= N_Type, "%s", "[ERROR] invalid argument: dtype"); - cytnx_error_msg(shape.size() == 0, "%s", - "[ERROR] invalid argument: shape. Must at least have one element."); - cytnx_uint64 Nelem = 1; - for (int i = 0; i < shape.size(); i++) { - cytnx_error_msg(shape[i] == 0, "%s", "[ERROR] shape cannot have 0 dimension in any rank."); - Nelem *= shape[i]; - } - // this->_storage = __SII.USIInit[dtype](); - this->_storage.Init(Nelem, dtype, device, init_zero); - this->_shape = shape; - this->_mapper = vec_range(shape.size()); - this->_invmapper = this->_mapper; - this->_contiguous = true; - // cout << shape << endl; - } - void Tensor_impl::Init(const Storage &in) { - cytnx_error_msg(in.dtype() == Type.Void, - "[ERROR] cannot init Tensor using un-initialized Storage%s", "\n"); - this->_storage = in; - this->_shape.clear(); - this->_shape.push_back(in.size()); - this->_mapper.clear(); - this->_mapper.push_back(0); - this->_invmapper = this->_mapper; - this->_contiguous = true; - } - // void Tensor_impl::Init(const Storage &in, const std::vector &shape, - // const unsigned int &dtype, int device) { - // cytnx_error_msg(in.dtype() == Type.Void, - // "[ERROR] cannot init Tensor using un-initialized Storage%s", "\n"); - // // check: - // cytnx_error_msg(dtype >= N_Type, "%s", "[ERROR] invalid argument: dtype"); - // cytnx_error_msg(shape.size() == 0, "%s", - // "[ERROR] invalid argument: shape. Must at least have one element."); - // cytnx_uint64 Nelem = 1; - // for (int i = 0; i < shape.size(); i++) { - // cytnx_error_msg(shape[i] == 0, "%s", "[ERROR] shape cannot have 0 dimension in any rank."); - // Nelem *= shape[i]; - // } - // this->_storage = in; - // // this->_storage = __SII.USIInit[dtype](); - // this->_shape = shape; - // this->_mapper = vec_range(shape.size()); - // this->_invmapper = this->_mapper; - // this->_contiguous = true; - // } - - boost::intrusive_ptr Tensor_impl::permute(const std::vector &rnks) { - // check:: - if (rnks.size() != this->_shape.size()) { - cytnx_error_msg(true, "%s", - "reshape a tensor with a specify shape that does not match with the shape of " - "the incident tensor."); - } - - if (vec_unique(rnks).size() != rnks.size()) { - cytnx_error_msg(true, "%s", "tensor permute with duplicated index.\n"); - } - - std::vector new_fwdmap(this->_shape.size()); - std::vector new_shape(this->_shape.size()); - std::vector new_idxmap(this->_shape.size()); - - // for(int i=0;i_shape.size();i++) - // std::cout << this->_mapper[i] << " " << this->_invmapper[i] << std::endl; - - boost::intrusive_ptr out(new Tensor_impl()); - - for (cytnx_uint32 i = 0; i < rnks.size(); i++) { - if (rnks[i] >= rnks.size()) { - cytnx_error_msg(1, "%s", "reshape a tensor with invalid rank index."); - } - // std::cout << this->_mapper[rnks[i]] << " " << i << std::endl; - new_idxmap[this->_mapper[rnks[i]]] = i; - new_fwdmap[i] = this->_mapper[rnks[i]]; - new_shape[i] = this->_shape[rnks[i]]; - } - - out->_invmapper = std::move(new_idxmap); - out->_shape = std::move(new_shape); - out->_mapper = std::move(new_fwdmap); - - /// checking if permute back to contiguous: - bool iconti = true; - for (cytnx_uint32 i = 0; i < rnks.size(); i++) { - // if (new_fwdmap[i] != new_idxmap[i]) { - // iconti = false; - // break; - // } - if (out->_mapper[i] != i) { - iconti = false; - break; - } - } - out->_contiguous = iconti; - - // ref storage - out->_storage = this->_storage; - return out; - } - - void Tensor_impl::permute_(const std::vector &rnks) { - // check:: - if (rnks.size() != this->_shape.size()) { - cytnx_error_msg(true, "%s", - "reshape a tensor with a specify shape that does not match with the shape of " - "the incident tensor."); - } - - if (vec_unique(rnks).size() != rnks.size()) { - cytnx_error_msg(true, "%s", "tensor permute with duplicated index.\n"); - } - - // std::vector new_fwdmap(this->_shape.size()); - // std::vector new_shape(this->_shape.size()); - // std::vector new_idxmap(this->_shape.size()); - - // smallvec new_fwdmap(this->_shape.size()); - // smallvec new_shape(this->_shape.size()); - // smallvec new_idxmap(this->_shape.size()); - std::vector new_fwdmap(this->_shape.size()); - std::vector new_shape(this->_shape.size()); - std::vector new_idxmap(this->_shape.size()); - - // for(int i=0;i_shape.size();i++) - // std::cout << this->_mapper[i] << " " << this->_invmapper[i] << std::endl; - - for (cytnx_uint32 i = 0; i < rnks.size(); i++) { - if (rnks[i] >= rnks.size()) { - cytnx_error_msg(1, "%s", "reshape a tensor with invalid rank index."); - } - // std::cout << this->_mapper[rnks[i]] << " " << i << std::endl; - // new_idxmap[this->_mapper[rnks[i]]] = i; - this->_invmapper[this->_mapper[rnks[i]]] = i; - new_fwdmap[i] = this->_mapper[rnks[i]]; - new_shape[i] = this->_shape[rnks[i]]; - } - - // this->_invmapper = std::move(new_idxmap); - for (cytnx_uint64 i = 0; i < this->_shape.size(); i++) { - this->_shape[i] = new_shape[i]; - this->_mapper[i] = new_fwdmap[i]; - } - - // this->_shape = std::move(new_shape); - // this->_mapper = std::move(new_fwdmap); - - /// checking if permute back to contiguous: - bool iconti = true; - for (cytnx_uint32 i = 0; i < rnks.size(); i++) { - // if (this->_mapper[i] != this->_invmapper[i]) { - // iconti = false; - // break; - // } - if (this->_mapper[i] != i) { - iconti = false; - break; - } - } - this->_contiguous = iconti; - } - - // shadow new: - // - - boost::intrusive_ptr Tensor_impl::get( - const std::vector &accessors) { - cytnx_error_msg(accessors.size() > this->_shape.size(), "%s", - "The input indexes rank is out of range! (>Tensor's rank)."); - - std::vector acc = accessors; - for (int i = 0; i < this->_shape.size() - accessors.size(); i++) { - acc.push_back(Accessor::all()); - } - - /* - cout << "acc type bef" << endl; - for(int i=0;i_invmapper); // contiguous. - /* - cout << "acc type aft" << endl; - for(int i=0;i_shape, this->_invmapper); - // cout << "curr_shape" << endl; - // cout << curr_shape << endl; - - //[2] from back to front, check until last all: - cytnx_uint64 Nunit = 1; - int tmpidx = 0; - while (tmpidx < curr_shape.size()) { - if (acc.back().type() == Accessor::All) { - Nunit *= curr_shape[curr_shape.size() - 1 - tmpidx]; - tmpidx++; - acc.pop_back(); - } else { - break; - } - } - // cout << "tmpidx" << tmpidx << endl; - // cout << "Nunit" << Nunit << endl; - // cout << acc.size() << endl; - - // acc-> locators - - std::vector get_shape(acc.size()); - std::vector> locators(acc.size()); - for (cytnx_uint32 i = 0; i < acc.size(); i++) { - cytnx_error_msg(acc[i].type() == Accessor::Qns, - "[ERROR] Tensor cannot accept accessor with qnum list.%s", "\n"); - acc[i].get_len_pos(curr_shape[i], get_shape[i], locators[i]); - } - // cout << "get_shape" << endl; - // cout << get_shape << endl; - - // create Tensor: - for (cytnx_uint64 i = 0; i < tmpidx; i++) { - get_shape.push_back(curr_shape[acc.size() + i]); - } - boost::intrusive_ptr out(new Tensor_impl()); - out->Init(get_shape, this->dtype(), this->device()); - // cout << get_shape << endl; - - if (locators.size() == 0) { - locators.resize(1); - locators[0].push_back(0); - } - - // call storage - this->storage()._impl->GetElem_byShape_v2(out->storage()._impl, curr_shape, locators, Nunit); - - // permute back: - std::vector new_mapper(this->_mapper.begin(), this->_mapper.end()); - std::vector new_shape; - std::vector remove_id; - for (unsigned int i = 0; i < out->_shape.size(); i++) { - if (out->shape()[i] == 1 && (acc[i].type() == Accessor::Singl)) - remove_id.push_back(this->_mapper[this->_invmapper[i]]); - else - new_shape.push_back(out->shape()[i]); - } - - // cout << "mapper" << endl; - // cout << new_mapper << endl; - // cout << "inv_mapper" << endl; - // cout << this->_invmapper << endl; - - // cout << "remove_id" << endl; - // cout << remove_id << endl; - // cout << "out shape raw" << endl; - // cout << out->shape() << endl; - - // cout << "perm" << endl; - // cout << perm << endl; - // cout << new_shape << endl; - if (new_shape.size()) { // exclude the case where only single element exists! - - out->reshape_(new_shape); // remove size-1 axis - - std::vector perm; - for (unsigned int i = 0; i < new_mapper.size(); i++) { - perm.push_back(new_mapper[i]); - for (unsigned int j = 0; j < remove_id.size(); j++) { - if (new_mapper[i] > remove_id[j]) - perm.back() -= 1; - else if (new_mapper[i] == remove_id[j]) { - perm.pop_back(); - break; - } - } - } - out->permute_(perm); - } else { - out->reshape_({1}); // if it is only one element. - } - - return out; - } - - boost::intrusive_ptr Tensor_impl::get_deprecated( - const std::vector &accessors) { - cytnx_error_msg(accessors.size() > this->_shape.size(), "%s", - "The input indexes rank is out of range! (>Tensor's rank)."); - - std::vector acc = accessors; - for (int i = 0; i < this->_shape.size() - accessors.size(); i++) { - acc.push_back(Accessor::all()); - } - - vector get_shape(acc.size()); - - // vector new_shape; - std::vector> locators(this->_shape.size()); - for (cytnx_uint32 i = 0; i < acc.size(); i++) { - acc[i].get_len_pos(this->_shape[i], get_shape[i], locators[i]); - // std::cout << this->_shape[i] << " " << get_shape[i] << "|"; - // for(int j=0;j out(new Tensor_impl()); - out->Init(get_shape, this->dtype(), this->device()); - - this->storage()._impl->GetElem_byShape(out->storage()._impl, this->shape(), this->_mapper, - get_shape, locators); - - vector new_shape; - for (cytnx_uint32 i = 0; i < acc.size(); i++) - if (get_shape[i] != 1) new_shape.push_back(get_shape[i]); - - if (new_shape.size() == 0) - out->reshape_({1}); - else - out->reshape_(new_shape); - return out; - } - - void Tensor_impl::set(const std::vector &accessors, - const boost::intrusive_ptr &rhs) { - // cout << "calling set" << endl; - cytnx_error_msg(accessors.size() > this->_shape.size(), "%s", - "The input indexes rank is out of range! (>Tensor's rank)."); - - vector acc = accessors; - for (int i = 0; i < this->_shape.size() - accessors.size(); i++) { - acc.push_back(Accessor::all()); - } - - // vector get_shape(acc.size()); - acc = vec_map(acc, this->_invmapper); // contiguous. - - //[1] curr_shape: - auto curr_shape = vec_map(this->_shape, this->_invmapper); - - //[2] from back to front, check until last all: - cytnx_uint64 Nunit = 1; - int tmpidx = 0; - while (tmpidx < curr_shape.size()) { - if (acc.back().type() == Accessor::All) { - Nunit *= curr_shape[curr_shape.size() - 1 - tmpidx]; - tmpidx++; - acc.pop_back(); - } else { - break; - } - } - - std::vector get_shape(acc.size()); - std::vector> locators(acc.size()); - for (cytnx_uint32 i = 0; i < acc.size(); i++) { - cytnx_error_msg(acc[i].type() == Accessor::Qns, - "[ERROR] Tensor cannot accept accessor with qnum list.%s", "\n"); - acc[i].get_len_pos(curr_shape[i], get_shape[i], locators[i]); - } - - /// checking if its scalar assign! - if (rhs->storage().size() == 1) { - this->storage()._impl->SetElem_byShape_v2(rhs->storage()._impl, curr_shape, locators, Nunit, - true); - // std::cout << "Scalar" << endl; - - } else { - for (cytnx_uint64 i = 0; i < tmpidx; i++) { - get_shape.push_back(curr_shape[acc.size() + i]); - } - - // std::cout << get_shape << endl; - - // permute input to currect pos - std::vector new_mapper(this->_mapper.begin(), this->_mapper.end()); - std::vector new_shape; - std::vector remove_id; - for (unsigned int i = 0; i < get_shape.size(); i++) { - if (acc[i].type() == Accessor::Singl) - remove_id.push_back(this->_mapper[this->_invmapper[i]]); - else - new_shape.push_back(get_shape[i]); - } - - if (new_shape.size() == 0) new_shape.push_back(1); - - // use current size to infer rhs permutation. - std::vector perm; - for (unsigned int i = 0; i < new_mapper.size(); i++) { - perm.push_back(new_mapper[i]); - - for (unsigned int j = 0; j < remove_id.size(); j++) { - if (new_mapper[i] > remove_id[j]) - perm.back() -= 1; - else if (new_mapper[i] == remove_id[j]) { - perm.pop_back(); - break; - } - } - } - - std::vector iperm(perm.size()); - for (unsigned int i = 0; i < iperm.size(); i++) iperm[perm[i]] = i; - - // std::cout << new_shape << endl; - boost::intrusive_ptr tmp; - // std::cout << iperm << std::endl; - tmp = rhs->permute(iperm)->contiguous(); - cytnx_error_msg(new_shape != tmp->shape(), "[ERROR][Tensor.set_elems]%s", - "inconsistent shape"); - this->storage()._impl->SetElem_byShape_v2(tmp->storage()._impl, curr_shape, locators, Nunit, - false); - } - } - - template - void Tensor_impl::set(const std::vector &accessors, const T &rc) { - cytnx_error_msg(accessors.size() > this->_shape.size(), "%s", - "The input indexes rank is out of range! (>Tensor's rank)."); - - std::vector acc = accessors; - for (int i = 0; i < this->_shape.size() - accessors.size(); i++) { - acc.push_back(Accessor::all()); - } - - acc = vec_map(acc, this->_invmapper); // contiguous. - - //[1] curr_shape: - auto curr_shape = vec_map(this->_shape, this->_invmapper); - - //[2] from back to front, check until last all: - cytnx_uint64 Nunit = 1; - int tmpidx = 0; - while (tmpidx < curr_shape.size()) { - if (acc.back().type() == Accessor::All) { - Nunit *= curr_shape[curr_shape.size() - 1 - tmpidx]; - tmpidx++; - acc.pop_back(); - } else { - break; - } - } - // cout << "tmpidx" << tmpidx << endl; - // cout << "Nunit" << Nunit << endl; - // cout << acc.size() << endl; - - // acc-> locators - - std::vector get_shape(acc.size()); - std::vector> locators(acc.size()); - for (cytnx_uint32 i = 0; i < acc.size(); i++) { - cytnx_error_msg(acc[i].type() == Accessor::Qns, - "[ERROR] Tensor cannot accept accessor with qnum list.%s", "\n"); - acc[i].get_len_pos(curr_shape[i], get_shape[i], locators[i]); - } - // cout << "get_shape" << endl; - // cout << get_shape << endl; - - // call storage - Scalar c = rc; - - Storage tmp(1, c.dtype(), this->device()); - tmp.set_item(0, rc); - this->storage()._impl->SetElem_byShape_v2(tmp._impl, curr_shape, locators, Nunit, true); - } - template void Tensor_impl::set(const std::vector &, - const cytnx_complex128 &); - template void Tensor_impl::set(const std::vector &, - const cytnx_complex64 &); - template void Tensor_impl::set(const std::vector &, - const cytnx_double &); - template void Tensor_impl::set(const std::vector &, - const cytnx_float &); - template void Tensor_impl::set(const std::vector &, - const cytnx_int64 &); - template void Tensor_impl::set(const std::vector &, - const cytnx_uint64 &); - template void Tensor_impl::set(const std::vector &, - const cytnx_int32 &); - template void Tensor_impl::set(const std::vector &, - const cytnx_uint32 &); - template void Tensor_impl::set(const std::vector &, - const cytnx_int16 &); - template void Tensor_impl::set(const std::vector &, - const cytnx_uint16 &); - template void Tensor_impl::set(const std::vector &, - const cytnx_bool &); - template void Tensor_impl::set(const std::vector &, const Scalar &); - - void Tensor_impl::set(const std::vector &accessors, const Scalar::Sproxy &rc) { - this->set(accessors, Scalar(rc)); - } - - std::ostream &operator<<(std::ostream &os, const Tensor &in) { - if (in.is_contiguous()) - in._impl->storage()._impl->PrintElem_byShape(os, in.shape()); - else - in._impl->storage()._impl->PrintElem_byShape(os, in.shape(), in._impl->invmapper()); - return os; - } - std::ostream &operator<<(std::ostream &os, const Tensor::Tproxy &in) { - os << Tensor(in) << std::endl; - return os; - } - //=================================================================== - // wrapper - - void Tensor::Tofile(const std::string &fname) const { - if (!this->is_contiguous()) { - auto A = this->contiguous(); - A.storage().Tofile(fname); - } else { - this->_impl->_storage.Tofile(fname); - } - } - void Tensor::Tofile(const char *fname) const { - if (!this->is_contiguous()) { - auto A = this->contiguous(); - A.storage().Tofile(fname); - } else { - this->_impl->_storage.Tofile(fname); - } - } - void Tensor::Tofile(fstream &f) const { - if (!this->is_contiguous()) { - auto A = this->contiguous(); - A.storage().Tofile(f); - } else { - this->_impl->_storage.Tofile(f); - } - } - void Tensor::Save(const std::string &fname) const { - fstream f; - f.open((fname + ".cytn"), ios::out | ios::trunc | ios::binary); - if (!f.is_open()) { - cytnx_error_msg(true, "[ERROR] invalid file path for save.%s", "\n"); - } - this->_Save(f); - f.close(); - } - void Tensor::Save(const char *fname) const { - fstream f; - string ffname = string(fname) + ".cytn"; - f.open(ffname, ios::out | ios::trunc | ios::binary); - if (!f.is_open()) { - cytnx_error_msg(true, "[ERROR] invalid file path for save.%s", "\n"); - } - this->_Save(f); - f.close(); - } - void Tensor::_Save(fstream &f) const { - // header - // check: - cytnx_error_msg(!f.is_open(), "[ERROR] invalid fstream!.%s", "\n"); - - unsigned int IDDs = 888; - f.write((char *)&IDDs, sizeof(unsigned int)); - cytnx_uint64 shp = this->shape().size(); - cytnx_uint64 Conti = this->is_contiguous(); - f.write((char *)&shp, sizeof(cytnx_uint64)); - - f.write((char *)&Conti, sizeof(cytnx_uint64)); - f.write((char *)&this->_impl->_shape[0], sizeof(cytnx_uint64) * shp); - f.write((char *)&this->_impl->_mapper[0], sizeof(cytnx_uint64) * shp); - f.write((char *)&this->_impl->_invmapper[0], sizeof(cytnx_uint64) * shp); - - // pass to storage for save: - this->_impl->_storage._Save(f); - } - - Tensor Tensor::Fromfile(const std::string &fname, const unsigned int &dtype, - const cytnx_int64 &count) { - return Tensor::from_storage(Storage::Fromfile(fname, dtype, count)); - } - Tensor Tensor::Fromfile(const char *fname, const unsigned int &dtype, const cytnx_int64 &count) { - return Tensor::from_storage(Storage::Fromfile(fname, dtype, count)); - } - Tensor Tensor::Load(const std::string &fname) { - Tensor out; - fstream f; - f.open(fname, ios::in | ios::binary); - if (!f.is_open()) { - cytnx_error_msg(true, "[ERROR] invalid file path for load.%s", "\n"); - } - out._Load(f); - f.close(); - return out; - } - Tensor Tensor::Load(const char *fname) { - Tensor out; - fstream f; - f.open(fname, ios::in | ios::binary); - if (!f.is_open()) { - cytnx_error_msg(true, "[ERROR] invalid file path for load.%s", "\n"); - } - out._Load(f); - f.close(); - return out; - } - void Tensor::_Load(fstream &f) { - // header - // check: - cytnx_error_msg(!f.is_open(), "[ERROR] invalid fstream!.%s", "\n"); - - unsigned int tmpIDDs; - f.read((char *)&tmpIDDs, sizeof(unsigned int)); - cytnx_error_msg(tmpIDDs != 888, "[ERROR] the object is not a cytnx tensor!%s", "\n"); - - cytnx_uint64 shp; - cytnx_uint64 Conti; - f.read((char *)&shp, sizeof(cytnx_uint64)); - f.read((char *)&Conti, sizeof(cytnx_uint64)); - this->_impl->_contiguous = Conti; - - this->_impl->_shape.resize(shp); - this->_impl->_mapper.resize(shp); - this->_impl->_invmapper.resize(shp); - f.read((char *)&this->_impl->_shape[0], sizeof(cytnx_uint64) * shp); - f.read((char *)&this->_impl->_mapper[0], sizeof(cytnx_uint64) * shp); - f.read((char *)&this->_impl->_invmapper[0], sizeof(cytnx_uint64) * shp); - - // pass to storage for save: - this->_impl->_storage._Load(f); - } - - Tensor Tensor::real() { - Tensor out; - out._impl = this->_impl->_clone_meta_only(); - out._impl->_storage = this->_impl->_storage.real(); - return out; - }; - - Tensor Tensor::imag() { - Tensor out; - out._impl = this->_impl->_clone_meta_only(); - out._impl->_storage = this->_impl->_storage.imag(); - return out; - } - - ///@cond - // += - template <> - Tensor &Tensor::operator+=(const Tensor &rc) { - cytnx::linalg::iAdd(*this, rc); - return *this; - } - template <> - Tensor &Tensor::operator+=(const Tensor::Tproxy &rc) { - cytnx::linalg::iAdd(*this, Tensor(rc)); - return *this; - } - template <> - Tensor &Tensor::operator+=(const cytnx_complex128 &rc) { - this->_impl->storage() = cytnx::linalg::Add(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator+=(const cytnx_complex64 &rc) { - this->_impl->storage() = cytnx::linalg::Add(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator+=(const cytnx_double &rc) { - this->_impl->storage() = cytnx::linalg::Add(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator+=(const cytnx_float &rc) { - this->_impl->storage() = cytnx::linalg::Add(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator+=(const cytnx_int64 &rc) { - this->_impl->storage() = cytnx::linalg::Add(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator+=(const cytnx_uint64 &rc) { - this->_impl->storage() = cytnx::linalg::Add(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator+=(const cytnx_int32 &rc) { - this->_impl->storage() = cytnx::linalg::Add(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator+=(const cytnx_uint32 &rc) { - this->_impl->storage() = cytnx::linalg::Add(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator+=(const cytnx_int16 &rc) { - this->_impl->storage() = cytnx::linalg::Add(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator+=(const cytnx_uint16 &rc) { - this->_impl->storage() = cytnx::linalg::Add(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator+=(const cytnx_bool &rc) { - this->_impl->storage() = cytnx::linalg::Add(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator+=(const Scalar &rc) { - this->_impl->storage() = cytnx::linalg::Add(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator+=(const Scalar::Sproxy &rc) { - return this->operator+=(Scalar(rc)); - } - // -= - template <> - Tensor &Tensor::operator-=(const Tensor &rc) { - cytnx::linalg::iSub(*this, rc); - return *this; - } - template <> - Tensor &Tensor::operator-=(const Tensor::Tproxy &rc) { - cytnx::linalg::iSub(*this, Tensor(rc)); - return *this; - } - template <> - Tensor &Tensor::operator-=(const cytnx_complex128 &rc) { - this->_impl->storage() = cytnx::linalg::Sub(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator-=(const cytnx_complex64 &rc) { - this->_impl->storage() = cytnx::linalg::Sub(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator-=(const cytnx_double &rc) { - this->_impl->storage() = cytnx::linalg::Sub(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator-=(const cytnx_float &rc) { - this->_impl->storage() = cytnx::linalg::Sub(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator-=(const cytnx_int64 &rc) { - this->_impl->storage() = cytnx::linalg::Sub(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator-=(const cytnx_uint64 &rc) { - this->_impl->storage() = cytnx::linalg::Sub(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator-=(const cytnx_int32 &rc) { - this->_impl->storage() = cytnx::linalg::Sub(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator-=(const cytnx_uint32 &rc) { - this->_impl->storage() = cytnx::linalg::Sub(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator-=(const cytnx_int16 &rc) { - this->_impl->storage() = cytnx::linalg::Sub(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator-=(const cytnx_uint16 &rc) { - this->_impl->storage() = cytnx::linalg::Sub(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator-=(const cytnx_bool &rc) { - this->_impl->storage() = cytnx::linalg::Sub(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator-=(const Scalar &rc) { - this->_impl->storage() = cytnx::linalg::Sub(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator-=(const Scalar::Sproxy &rc) { - return this->operator-=(Scalar(rc)); - } - // *= - template <> - Tensor &Tensor::operator*=(const Tensor &rc) { - cytnx::linalg::iMul(*this, rc); - return *this; - } - template <> - Tensor &Tensor::operator*=(const Tensor::Tproxy &rc) { - cytnx::linalg::iMul(*this, Tensor(rc)); - return *this; - } - template <> - Tensor &Tensor::operator*=(const cytnx_complex128 &rc) { - this->_impl->storage() = cytnx::linalg::Mul(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator*=(const cytnx_complex64 &rc) { - this->_impl->storage() = cytnx::linalg::Mul(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator*=(const cytnx_double &rc) { - this->_impl->storage() = cytnx::linalg::Mul(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator*=(const cytnx_float &rc) { - this->_impl->storage() = cytnx::linalg::Mul(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator*=(const cytnx_int64 &rc) { - this->_impl->storage() = cytnx::linalg::Mul(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator*=(const cytnx_uint64 &rc) { - this->_impl->storage() = cytnx::linalg::Mul(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator*=(const cytnx_int32 &rc) { - this->_impl->storage() = cytnx::linalg::Mul(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator*=(const cytnx_uint32 &rc) { - this->_impl->storage() = cytnx::linalg::Mul(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator*=(const cytnx_int16 &rc) { - this->_impl->storage() = cytnx::linalg::Mul(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator*=(const cytnx_uint16 &rc) { - this->_impl->storage() = cytnx::linalg::Mul(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator*=(const cytnx_bool &rc) { - this->_impl->storage() = cytnx::linalg::Mul(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator*=(const Scalar &rc) { - this->_impl->storage() = cytnx::linalg::Mul(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator*=(const Scalar::Sproxy &rc) { - return this->operator*=(Scalar(rc)); - } - - // /= - template <> - Tensor &Tensor::operator/=(const Tensor &rc) { - cytnx::linalg::iDiv(*this, rc); - return *this; - } - template <> - Tensor &Tensor::operator/=(const Tensor::Tproxy &rc) { - cytnx::linalg::iDiv(*this, Tensor(rc)); - return *this; - } - template <> - Tensor &Tensor::operator/=(const cytnx_complex128 &rc) { - this->_impl->storage() = cytnx::linalg::Div(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator/=(const cytnx_complex64 &rc) { - this->_impl->storage() = cytnx::linalg::Div(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator/=(const cytnx_double &rc) { - this->_impl->storage() = cytnx::linalg::Div(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator/=(const cytnx_float &rc) { - this->_impl->storage() = cytnx::linalg::Div(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator/=(const cytnx_int64 &rc) { - this->_impl->storage() = cytnx::linalg::Div(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator/=(const cytnx_uint64 &rc) { - this->_impl->storage() = cytnx::linalg::Div(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator/=(const cytnx_int32 &rc) { - // std::cout << "entry /= int32" << std::endl; - this->_impl->storage() = cytnx::linalg::Div(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator/=(const cytnx_uint32 &rc) { - this->_impl->storage() = cytnx::linalg::Div(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator/=(const cytnx_int16 &rc) { - this->_impl->storage() = cytnx::linalg::Div(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator/=(const cytnx_uint16 &rc) { - this->_impl->storage() = cytnx::linalg::Div(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator/=(const cytnx_bool &rc) { - this->_impl->storage() = cytnx::linalg::Div(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator/=(const Scalar &rc) { - this->_impl->storage() = cytnx::linalg::Div(*this, rc)._impl->storage(); - return *this; - } - template <> - Tensor &Tensor::operator/=(const Scalar::Sproxy &rc) { - return this->operator/=(Scalar(rc)); - } - ///@endcond - - // std::vector Tensor::Svd(const bool &is_U, const bool &is_vT) const { - // return linalg::Svd(*this, is_U, is_vT); - // } - std::vector Tensor::Svd(const bool &is_UvT) const { return linalg::Svd(*this, is_UvT); } - std::vector Tensor::Eigh(const bool &is_V, const bool &row_v) const { - return linalg::Eigh(*this, is_V, row_v); - } - - Tensor &Tensor::InvM_() { - linalg::InvM_(*this); - return *this; - } - Tensor Tensor::InvM() const { return linalg::InvM(*this); } - Tensor &Tensor::Inv_(const double &clip) { - linalg::Inv_(*this, clip); - return *this; - } - Tensor Tensor::Inv(const double &clip) const { return linalg::Inv(*this, clip); } - - Tensor &Tensor::Conj_() { - linalg::Conj_(*this); - return *this; - } - Tensor Tensor::Conj() const { return linalg::Conj(*this); } - - Tensor &Tensor::Exp_() { - linalg::Exp_(*this); - return *this; - } - Tensor Tensor::Exp() const { return linalg::Exp(*this); } - Tensor Tensor::Norm() const { return linalg::Norm(*this); } - - Tensor Tensor::Pow(const cytnx_double &p) const { return linalg::Pow(*this, p); } - - Tensor &Tensor::Pow_(const cytnx_double &p) { - linalg::Pow_(*this, p); - return *this; - } - - Tensor &Tensor::Abs_() { - linalg::Abs_(*this); - return *this; - } - Tensor Tensor::Abs() const { return linalg::Abs(*this); } - Tensor Tensor::Max() const { return linalg::Max(*this); } - Tensor Tensor::Min() const { return linalg::Min(*this); } - - Tensor Tensor::Trace(const cytnx_uint64 &a, const cytnx_uint64 &b) const { - Tensor out = linalg::Trace(*this, a, b); - return out; - } - - bool Tensor::same_data(const Tensor &rhs) const { - return is(this->_impl->storage(), rhs.storage()); - } - - //=========================== - // Tensor am Tproxy - Tensor operator+(const Tensor &lhs, const Tensor::Tproxy &rhs) { - return cytnx::linalg::Add(lhs, Tensor(rhs)); - } - Tensor operator-(const Tensor &lhs, const Tensor::Tproxy &rhs) { - return cytnx::linalg::Sub(lhs, Tensor(rhs)); - } - Tensor operator*(const Tensor &lhs, const Tensor::Tproxy &rhs) { - return cytnx::linalg::Mul(lhs, Tensor(rhs)); - } - Tensor operator/(const Tensor &lhs, const Tensor::Tproxy &rhs) { - return cytnx::linalg::Div(lhs, Tensor(rhs)); - } - - //=========================== - // Tensor am Sproxy - Tensor operator+(const Tensor &lhs, const Scalar::Sproxy &rhs) { - return cytnx::linalg::Add(lhs, Scalar(rhs)); - } - Tensor operator-(const Tensor &lhs, const Scalar::Sproxy &rhs) { - return cytnx::linalg::Sub(lhs, Scalar(rhs)); - } - Tensor operator*(const Tensor &lhs, const Scalar::Sproxy &rhs) { - return cytnx::linalg::Mul(lhs, Scalar(rhs)); - } - Tensor operator/(const Tensor &lhs, const Scalar::Sproxy &rhs) { - return cytnx::linalg::Div(lhs, Scalar(rhs)); - } - -} // namespace cytnx diff --git a/src/backend/BoolStorage.cpp b/src/backend/BoolStorage.cpp deleted file mode 100644 index 6285101a..00000000 --- a/src/backend/BoolStorage.cpp +++ /dev/null @@ -1,738 +0,0 @@ -#ifdef UNI_OMP - #include -#endif -#include "backend/Storage.hpp" -#include "utils_internal_interface.hpp" -using namespace std; -using namespace cytnx; - -namespace cytnx { - //+++++++++++++++++++ - void BoolStorage::Init(const unsigned long long &len_in, const int &device, - const bool &init_zero) { - // cout << "Bool.init" << endl; - // check: - this->len = len_in; - - // check: - // cytnx_error_msg(len_in < 1, "%s", "[ERROR] cannot init a Storage with zero element"); - this->dtype = Type.Bool; - - if (this->len % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((this->len) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = this->len; - } - - if (device == Device.cpu) { - if (init_zero) - this->Mem = utils_internal::Calloc_cpu(this->cap, sizeof(bool)); - else - this->Mem = utils_internal::Malloc_cpu(this->cap * sizeof(bool)); - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - // this->Mem = utils_internal::cuMalloc_gpu(this->cap*sizeof(bool)); - this->Mem = utils_internal::cuCalloc_gpu(this->cap, sizeof(bool)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot init a Storage on gpu without CUDA support."); -#endif - } - this->device = device; - } - - void BoolStorage::_Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device, - const bool &iscap, const unsigned long long &cap_in) { - this->Mem = rawptr; - this->len = len_in; - if (iscap) { - this->cap = cap_in; - } else { - this->cap = len_in; - } - cytnx_error_msg(this->cap % STORAGE_DEFT_SZ != 0, - "[ERROR] _Init_by_ptr cannot have not %dx cap_in.", STORAGE_DEFT_SZ); - -#ifdef UNI_DEBUG - cytnx_error_msg(len_in < 1, "%s", "[ERROR] _Init_by_ptr cannot have len_in < 1."); - cytnx_error_msg(this->cap < this->len, "%s", - "[ERROR] _Init_by_ptr cannot have capacity < size."); -#endif - this->dtype = Type.Bool; - this->device = device; - } - - boost::intrusive_ptr BoolStorage::_create_new_sametype() { - boost::intrusive_ptr out(new BoolStorage()); - return out; - } - - boost::intrusive_ptr BoolStorage::clone() { - boost::intrusive_ptr out(new BoolStorage()); - out->Init(this->len, this->device); - if (this->device == Device.cpu) { - memcpy(out->Mem, this->Mem, sizeof(bool) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors( - cudaMemcpy(out->Mem, this->Mem, sizeof(bool) * this->len, cudaMemcpyDeviceToDevice)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot clone a Storage on gpu without CUDA support."); -#endif - } - return out; - } - - void BoolStorage::Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - utils_internal::Movemem_cpu_b(tmp, old_shape, mapper, invmapper, 1); - } else { -#ifdef UNI_GPU - utils_internal::cuMovemem_gpu_b(tmp, old_shape, mapper, invmapper, 1); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); -#endif - } - } - - boost::intrusive_ptr BoolStorage::Move_memory( - const std::vector &old_shape, const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - return utils_internal::Movemem_cpu_b(tmp, old_shape, mapper, invmapper, 0); - } else { -#ifdef UNI_GPU - return utils_internal::cuMovemem_gpu_b(tmp, old_shape, mapper, invmapper, 0); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); - return nullptr; -#endif - } - } - void BoolStorage::to_(const int &device) { - if (this->device != device) { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(bool) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(bool) * this->cap, cudaMemcpyHostToDevice)); - free(this->Mem); - this->Mem = dtmp; - this->device = device; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(bool) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(bool) * this->cap, cudaMemcpyDeviceToHost)); - cudaFree(this->Mem); - this->Mem = htmp; - this->device = device; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(bool) * this->cap); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(bool) * this->cap)); - cudaFree(this->Mem); - this->Mem = dtmp; - this->device = device; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); -#endif - } - } - } - boost::intrusive_ptr BoolStorage::to(const int &device) { - // Here, we follow pytorch scheme. if the device is the same as this->device, then return this - // (python self) otherwise, return a clone on different device. - if (this->device == device) { - return this; - } else { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(bool) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(bool) * this->cap, cudaMemcpyHostToDevice)); - boost::intrusive_ptr out(new BoolStorage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); - return nullptr; -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(bool) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(bool) * this->cap, cudaMemcpyDeviceToHost)); - boost::intrusive_ptr out(new BoolStorage()); - out->_Init_byptr(htmp, this->len, device, true, this->cap); - return out; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(bool) * this->cap); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(bool) * this->cap)); - boost::intrusive_ptr out(new BoolStorage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); - return nullptr; -#endif - } - } - } - - void BoolStorage::PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper) { - char *buffer = (char *)malloc(sizeof(char) * 256); - // checking: - cytnx_uint64 Ne = 1; - for (cytnx_uint64 i = 0; i < shape.size(); i++) { - Ne *= shape[i]; - } - if (Ne != this->len) { - cytnx_error_msg(1, "%s", - "PrintElem_byShape, the number of shape not match with the No. of elements."); - } - - if (len == 0) { - os << "[ "; - os << "\nThe Storage has not been allocated or linked.\n"; - os << "]\n"; - } else { - os << std::endl << "Total elem: " << this->len << "\n"; - - os << "type : " << Type.getname(this->dtype) << std::endl; - - int atDevice = this->device; - os << Device.getname(this->device) << std::endl; - - sprintf(buffer, "%s", "Shape :"); - os << string(buffer); - sprintf(buffer, " (%lu", shape[0]); - os << string(buffer); - for (cytnx_size_t i = 1; i < shape.size(); i++) { - sprintf(buffer, ",%lu", shape[i]); - os << string(buffer); - } - os << ")" << std::endl; - - // temporary move to cpu for printing. - if (this->device != Device.cpu) { - this->to_(Device.cpu); - } - - std::vector stk(shape.size(), 0), stk2; - - cytnx_uint64 s; - cytnx_bool *elem_ptr_ = static_cast(this->Mem); - - if (mapper.size() == 0) { - cytnx_uint64 cnt = 0; - while (1) { - for (cytnx_size_t i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << string(buffer); - stk.pop_back(); - } - } - for (cytnx_size_t i = 0; i < shape.back(); i++) { - stk2.back() = i; - if (elem_ptr_[cnt]) { - sprintf(buffer, "True %s", " "); - os << string(buffer); - } else { - sprintf(buffer, "False%s", " "); - os << string(buffer); - } - cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } else { - /// This is for non-contiguous Tensor printing; - // cytnx_error_msg(1,"%s","print for a non-contiguous Storage is under developing"); - // cytnx_uint64 cnt=0; - std::vector c_offj(shape.size()); - std::vector c_shape(shape.size()); - - cytnx_uint64 accu = 1; - cytnx_uint64 RealMemPos; - for (cytnx_uint32 i = 0; i < shape.size(); i++) { - c_shape[i] = shape[mapper[i]]; - } - for (cytnx_int64 i = c_shape.size() - 1; i >= 0; i--) { - c_offj[i] = accu; - accu *= c_shape[i]; - } - - while (true) { - for (cytnx_size_t i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - - /// Calculate the Memory reflection: - RealMemPos = 0; - for (cytnx_uint64 n = 0; n < shape.size(); n++) { - RealMemPos += c_offj[n] * stk2[mapper[n]]; // mapback + backmap = normal-map - } - if (elem_ptr_[RealMemPos]) { - sprintf(buffer, "True %s", " "); - os << string(buffer); - } else { - sprintf(buffer, "False%s", " "); - os << string(buffer); - } - // cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } // check if need mapping - - if (atDevice != Device.cpu) { - this->to_(atDevice); - } - - } // len==0 - free(buffer); - } - - void BoolStorage::print_elems() { - char *buffer = (char *)malloc(sizeof(char) * 256); - auto *elem_ptr_ = static_cast(this->Mem); - cout << "[ "; - for (unsigned long long cnt = 0; cnt < this->len; cnt++) { - if (elem_ptr_[cnt]) { - sprintf(buffer, "True %s", " "); - cout << string(buffer); - } else { - sprintf(buffer, "False%s", " "); - cout << string(buffer); - } - } - cout << "]" << endl; - free(buffer); - } - - void BoolStorage::fill(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void BoolStorage::fill(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void BoolStorage::fill(const cytnx_double &val) { - cytnx_bool tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void BoolStorage::fill(const cytnx_float &val) { - cytnx_bool tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void BoolStorage::fill(const cytnx_int64 &val) { - cytnx_bool tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void BoolStorage::fill(const cytnx_uint64 &val) { - cytnx_bool tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void BoolStorage::fill(const cytnx_int32 &val) { - cytnx_bool tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void BoolStorage::fill(const cytnx_uint32 &val) { - cytnx_bool tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void BoolStorage::fill(const cytnx_int16 &val) { - cytnx_bool tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void BoolStorage::fill(const cytnx_uint16 &val) { - cytnx_bool tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void BoolStorage::fill(const cytnx_bool &val) { - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&val), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&val), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - - void BoolStorage::set_zeros() { - if (this->device == Device.cpu) { - utils_internal::SetZeros(this->Mem, sizeof(cytnx_bool) * this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuSetZeros(this->Mem, sizeof(cytnx_bool) * this->len); -#else - cytnx_error_msg(1, "[ERROR][set_zeros] fatal, the storage is on gpu without CUDA support.%s", - "\n"); -#endif - } - } - - void BoolStorage::resize(const cytnx_uint64 &newsize) { - // cytnx_error_msg(newsize < 1,"[ERROR]resize should have size > 0%s","\n"); - - if (newsize > this->cap) { - if (newsize % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((newsize) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = newsize; - } - if (this->device == Device.cpu) { - void *htmp = calloc(this->cap, sizeof(cytnx_bool)); - memcpy(htmp, this->Mem, sizeof(cytnx_bool) * this->len); - free(this->Mem); - this->Mem = htmp; - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_bool)); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(cytnx_bool) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.resize. the Storage is as GPU but without CUDA support."); -#endif - } - } - this->len = newsize; - } - - void BoolStorage::append(const Scalar &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = bool(val); - } - void BoolStorage::append(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void BoolStorage::append(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void BoolStorage::append(const cytnx_double &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void BoolStorage::append(const cytnx_float &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void BoolStorage::append(const cytnx_int64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void BoolStorage::append(const cytnx_int32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void BoolStorage::append(const cytnx_int16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void BoolStorage::append(const cytnx_uint64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void BoolStorage::append(const cytnx_uint32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void BoolStorage::append(const cytnx_uint16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void BoolStorage::append(const cytnx_bool &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - boost::intrusive_ptr BoolStorage::real() { - cytnx_error_msg(true, "[ERROR] Storage.real() can only be called from complex type.%s", "\n"); - } - boost::intrusive_ptr BoolStorage::imag() { - cytnx_error_msg(true, "[ERROR] Storage.imag() can only be called from complex type.%s", "\n"); - } - - Scalar BoolStorage::get_item(const cytnx_uint64 &idx) const { - return Scalar(this->at(idx)); - } - - void BoolStorage::set_item(const cytnx_uint64 &idx, const Scalar &val) { - this->at(idx) = cytnx_bool(val); - } - void BoolStorage::set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - // this->at(idx) = val; - } - void BoolStorage::set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - // this->at(idx) = val; - } - void BoolStorage::set_item(const cytnx_uint64 &idx, const cytnx_double &val) { - this->at(idx) = val; - } - void BoolStorage::set_item(const cytnx_uint64 &idx, const cytnx_float &val) { - this->at(idx) = val; - } - void BoolStorage::set_item(const cytnx_uint64 &idx, const cytnx_int64 &val) { - this->at(idx) = val; - } - void BoolStorage::set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val) { - this->at(idx) = val; - } - void BoolStorage::set_item(const cytnx_uint64 &idx, const cytnx_int32 &val) { - this->at(idx) = val; - } - void BoolStorage::set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val) { - this->at(idx) = val; - } - void BoolStorage::set_item(const cytnx_uint64 &idx, const cytnx_int16 &val) { - this->at(idx) = val; - } - void BoolStorage::set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val) { - this->at(idx) = val; - } - void BoolStorage::set_item(const cytnx_uint64 &idx, const cytnx_bool &val) { - this->at(idx) = val; - } - - // bool BoolStorage::approx_eq(const boost::intrusive_ptr &rhs, - // const cytnx_double tol) { - // boost::intrusive_ptr _lhs, _rhs; - // if (rhs->dtype == this->dtype) { - // _lhs = this; - // _rhs = rhs; - // } else if (rhs->dtype > this->dtype) { - // _lhs = this; - // _rhs = rhs->astype(this->dtype); - // } else { - // _lhs = this->astype(rhs->dtype); - // _rhs = rhs; - // } - // if (_rhs->size() != _lhs->size()) { - // if (User_debug) std::cout << "different tensor size." << std::endl; - // return false; - // } - // for (cytnx_uint64 i = 0; i < this->len; i++) { - // if (_lhs->get_item(i).approx_eq(_rhs->get_item(i), tol) == false) { - // if (User_debug) - // std::cout << "tensor different at idx:" << i << "\n" - // << "lhs:" << _lhs->get_item(i) << " rhs:" << _rhs->get_item(i) << "\n"; - // return false; - // } - // } - // return true; - // } - -} // namespace cytnx diff --git a/src/backend/CMakeLists.txt b/src/backend/CMakeLists.txt index f4b71cf5..adc80a83 100644 --- a/src/backend/CMakeLists.txt +++ b/src/backend/CMakeLists.txt @@ -21,18 +21,6 @@ target_sources_local(cytnx Scalar.cpp Storage.cpp Storage_base.cpp - ComplexDoubleStorage.cpp - ComplexFloatStorage.cpp - DoubleStorage.cpp - FloatStorage.cpp - Uint64Storage.cpp - Int64Storage.cpp - Uint32Storage.cpp - Int32Storage.cpp - Uint16Storage.cpp - Int16Storage.cpp - BoolStorage.cpp - Tensor_impl.cpp ) diff --git a/src/backend/ComplexDoubleStorage.cpp b/src/backend/ComplexDoubleStorage.cpp deleted file mode 100644 index e1f4d1b3..00000000 --- a/src/backend/ComplexDoubleStorage.cpp +++ /dev/null @@ -1,821 +0,0 @@ -#ifdef UNI_OMP - #include -#endif -#include "backend/Storage.hpp" -#include "utils_internal_interface.hpp" - -using namespace std; - -namespace cytnx { - //+++++++++++++++++++ - void ComplexDoubleStorage::Init(const unsigned long long &len_in, const int &device, - const bool &init_zero) { - // cout << "ComplexDouble.init" << endl; - // check: - this->len = len_in; - - // check: - // cytnx_error_msg(len_in < 1, "%s", "[ERROR] cannot init a Storage with zero element"); - this->dtype = Type.ComplexDouble; - - if (this->len % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((this->len) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = this->len; - } - - if (device == Device.cpu) { - if (init_zero) - this->Mem = utils_internal::Calloc_cpu(this->cap, sizeof(complex)); - else - this->Mem = utils_internal::Malloc_cpu(this->cap * sizeof(complex)); - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - checkCudaErrors(cudaSetDevice(device)); - // this->Mem = utils_internal::cuMalloc_gpu(this->cap*sizeof(complex)); - this->Mem = utils_internal::cuCalloc_gpu(this->cap, sizeof(complex)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot init a Storage on gpu without CUDA support."); -#endif - } - this->device = device; - } - - void ComplexDoubleStorage::_Init_byptr(void *rawptr, const unsigned long long &len_in, - const int &device, const bool &iscap, - const unsigned long long &cap_in) { - this->Mem = rawptr; - this->len = len_in; - if (iscap) { - this->cap = cap_in; - } else { - this->cap = len_in; - } - cytnx_error_msg(this->cap % STORAGE_DEFT_SZ != 0, - "[ERROR] _Init_by_ptr cannot have not %dx cap_in.", STORAGE_DEFT_SZ); - -#ifdef UNI_DEBUG - cytnx_error_msg(len_in < 1, "%s", "[ERROR] _Init_by_ptr cannot have len_in < 1."); - cytnx_error_msg(this->cap < this->len, "%s", - "[ERROR] _Init_by_ptr cannot have capacity < size."); -#endif - this->device = device; - this->dtype = Type.ComplexDouble; - } - - boost::intrusive_ptr ComplexDoubleStorage::_create_new_sametype() { - boost::intrusive_ptr out(new ComplexDoubleStorage()); - return out; - } - boost::intrusive_ptr ComplexDoubleStorage::clone() { - boost::intrusive_ptr out(new ComplexDoubleStorage()); - out->Init(this->len, this->device); - if (this->device == Device.cpu) { - memcpy(out->Mem, this->Mem, sizeof(cytnx_complex128) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - checkCudaErrors(cudaMemcpy(out->Mem, this->Mem, sizeof(cytnx_complex128) * this->len, - cudaMemcpyDeviceToDevice)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot clone a Storage on gpu without CUDA support."); -#endif - } - return out; - } - - void ComplexDoubleStorage::Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - utils_internal::Movemem_cpu_cd(tmp, old_shape, mapper, invmapper, 1); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuMovemem_gpu_cd(tmp, old_shape, mapper, invmapper, 1); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); -#endif - } - } - - boost::intrusive_ptr ComplexDoubleStorage::Move_memory( - const std::vector &old_shape, const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - return utils_internal::Movemem_cpu_cd(tmp, old_shape, mapper, invmapper, 0); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - return utils_internal::cuMovemem_gpu_cd(tmp, old_shape, mapper, invmapper, 0); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); -#endif - } - } - - void ComplexDoubleStorage::to_(const int &device) { - if (this->device != device) { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_complex128) * this->cap); - checkCudaErrors(cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_complex128) * this->len, - cudaMemcpyHostToDevice)); - free(this->Mem); - this->Mem = dtmp; - this->device = device; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_complex128) * this->cap); - checkCudaErrors(cudaMemcpy(htmp, this->Mem, sizeof(cytnx_complex128) * this->len, - cudaMemcpyDeviceToHost)); - cudaFree(this->Mem); - this->Mem = htmp; - this->device = device; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_complex128) * this->cap); - checkCudaErrors(cudaMemcpyPeer(dtmp, device, this->Mem, this->device, - sizeof(cytnx_complex128) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; - this->device = device; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); -#endif - } - } - } - boost::intrusive_ptr ComplexDoubleStorage::to(const int &device) { - // Here, we follow pytorch scheme. if the device is the same as this->device, then return this - // (python self) otherwise, return a clone on different device. - if (this->device == device) { - return this; - } else { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_complex128) * this->cap); - checkCudaErrors(cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_complex128) * this->len, - cudaMemcpyHostToDevice)); - boost::intrusive_ptr out(new ComplexDoubleStorage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); - return nullptr; -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_complex128) * this->cap); - checkCudaErrors(cudaMemcpy(htmp, this->Mem, sizeof(cytnx_complex128) * this->len, - cudaMemcpyDeviceToHost)); - boost::intrusive_ptr out(new ComplexDoubleStorage()); - out->_Init_byptr(htmp, this->len, device, true, this->cap); - return out; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_complex128) * this->cap); - checkCudaErrors(cudaMemcpyPeer(dtmp, device, this->Mem, this->device, - sizeof(cytnx_complex128) * this->len)); - boost::intrusive_ptr out(new ComplexDoubleStorage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); - return nullptr; -#endif - } - } - } - - void ComplexDoubleStorage::PrintElem_byShape(std::ostream &os, - const std::vector &shape, - const std::vector &mapper) { - char *buffer = (char *)calloc(1024, sizeof(char)); - - // checking: - cytnx_uint64 Ne = 1; - for (cytnx_uint64 i = 0; i < shape.size(); i++) { - Ne *= shape[i]; - } - if (Ne != this->len) { - cytnx_error_msg(1, "%s", - "PrintElem_byShape, the number of shape not match with the No. of elements."); - } - - if (len == 0) { - os << "[ "; - os << "\nThe Storage has not been allocated or linked.\n"; - os << "]\n"; - } else { - os << std::endl << "Total elem: " << this->len << "\n"; - - os << "type : " << Type.getname(this->dtype) << std::endl; - int atDevice = this->device; - os << Device.getname(this->device) << std::endl; - - sprintf(buffer, "%s", "Shape :"); - os << std::string(buffer); - sprintf(buffer, " (%llu", shape[0]); - os << std::string(buffer); - for (cytnx_int32 i = 1; i < shape.size(); i++) { - sprintf(buffer, ",%llu", shape[i]); - os << std::string(buffer); - } - os << ")" << std::endl; - - // temporary move to cpu for printing. - if (this->device != Device.cpu) { - this->to_(Device.cpu); - } - - std::vector stk(shape.size(), 0), stk2; - - cytnx_uint64 s; - auto *elem_ptr_ = static_cast(this->Mem); - - if (mapper.empty()) { - cytnx_uint64 cnt = 0; - while (true) { - for (cytnx_size_t i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << std::string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << std::string(buffer); - stk.pop_back(); - } - } - for (cytnx_size_t i = 0; i < shape.back(); i++) { - stk2.back() = i; - - sprintf(buffer, "%.5e%+.5ej ", elem_ptr_[cnt].real(), elem_ptr_[cnt].imag()); - os << std::string(buffer); - cnt++; - } - - s = 0; - while (true) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << std::string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } else { - /// This is for non-contiguous Tensor printing; - // cytnx_error_msg(1,"%s","print for a non-contiguous Storage is under developing"); - // cytnx_uint64 cnt=0; - std::vector c_offj(shape.size()); - std::vector c_shape(shape.size()); - - cytnx_uint64 accu = 1; - cytnx_uint64 RealMemPos; - for (cytnx_uint32 i = 0; i < shape.size(); i++) { - c_shape[i] = shape[mapper[i]]; - } - for (cytnx_int64 i = c_shape.size() - 1; i >= 0; i--) { - c_offj[i] = accu; - accu *= c_shape[i]; - } - - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << std::string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << std::string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - - /// Calculate the Memory reflection: - RealMemPos = 0; - for (cytnx_uint64 n = 0; n < shape.size(); n++) { - RealMemPos += c_offj[n] * stk2[mapper[n]]; // mapback + backmap = normal-map - } - sprintf(buffer, "%.5e%+.5ej ", elem_ptr_[RealMemPos].real(), - elem_ptr_[RealMemPos].imag()); - os << std::string(buffer); - // cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << std::string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } // check if need mapping - - if (atDevice != Device.cpu) { - this->to_(atDevice); - } - - } // len==0 - free(buffer); - } - - void ComplexDoubleStorage::print_elems() { - char *buffer = (char *)malloc(sizeof(char) * 256); - cytnx_complex128 *elem_ptr_ = static_cast(this->Mem); - cout << "[ "; - for (unsigned long long cnt = 0; cnt < this->len; cnt++) { - sprintf(buffer, "%.5e%+.5ej ", elem_ptr_[cnt].real(), elem_ptr_[cnt].imag()); - cout << string(buffer); - } - cout << " ]" << endl; - free(buffer); - } - - void ComplexDoubleStorage::fill(const cytnx_complex128 &val) { - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&val), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&val), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexDoubleStorage::fill(const cytnx_complex64 &val) { - cytnx_complex128 tmp(val.real(), val.imag()); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexDoubleStorage::fill(const cytnx_double &val) { - cytnx_complex128 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexDoubleStorage::fill(const cytnx_float &val) { - cytnx_complex128 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexDoubleStorage::fill(const cytnx_int64 &val) { - cytnx_complex128 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexDoubleStorage::fill(const cytnx_uint64 &val) { - cytnx_complex128 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexDoubleStorage::fill(const cytnx_int32 &val) { - cytnx_complex128 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexDoubleStorage::fill(const cytnx_uint32 &val) { - cytnx_complex128 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexDoubleStorage::fill(const cytnx_int16 &val) { - cytnx_complex128 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexDoubleStorage::fill(const cytnx_uint16 &val) { - cytnx_complex128 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexDoubleStorage::fill(const cytnx_bool &val) { - cytnx_complex128 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - - void ComplexDoubleStorage::set_zeros() { - if (this->device == Device.cpu) { - utils_internal::SetZeros(this->Mem, sizeof(cytnx_complex128) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuSetZeros(this->Mem, sizeof(cytnx_complex128) * this->len); -#else - cytnx_error_msg(1, "[ERROR][set_zeros] fatal, the storage is on gpu without CUDA support.%s", - "\n"); -#endif - } - } - - void ComplexDoubleStorage::resize(const cytnx_uint64 &newsize) { - // cytnx_error_msg(newsize < 1,"[ERROR]resize should have size > 0%s","\n"); - - if (newsize > this->cap) { - if (newsize % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((newsize) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = newsize; - } - if (this->device == Device.cpu) { - void *htmp = calloc(this->cap, sizeof(cytnx_complex128)); - memcpy(htmp, this->Mem, sizeof(cytnx_complex128) * this->len); - free(this->Mem); - this->Mem = htmp; - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_complex128)); - checkCudaErrors(cudaMemcpyPeer(dtmp, device, this->Mem, this->device, - sizeof(cytnx_complex128) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.resize. the Storage is as GPU but without CUDA support."); -#endif - } - } - this->len = newsize; - } - - void ComplexDoubleStorage::append(const Scalar &val) { - // cytnx_complex128 tmp(val.real(),val.imag()); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = complex128(val); - } - - void ComplexDoubleStorage::append(const cytnx_complex128 &val) { - // cytnx_complex128 tmp(val.real(),val.imag()); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void ComplexDoubleStorage::append(const cytnx_complex64 &val) { - cytnx_complex128 tmp(val.real(), val.imag()); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexDoubleStorage::append(const cytnx_double &val) { - cytnx_complex128 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexDoubleStorage::append(const cytnx_float &val) { - cytnx_complex128 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexDoubleStorage::append(const cytnx_int64 &val) { - cytnx_complex128 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexDoubleStorage::append(const cytnx_int32 &val) { - cytnx_complex128 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexDoubleStorage::append(const cytnx_int16 &val) { - cytnx_complex128 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexDoubleStorage::append(const cytnx_uint64 &val) { - cytnx_complex128 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexDoubleStorage::append(const cytnx_uint32 &val) { - cytnx_complex128 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexDoubleStorage::append(const cytnx_uint16 &val) { - cytnx_complex128 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexDoubleStorage::append(const cytnx_bool &val) { - cytnx_complex128 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - - boost::intrusive_ptr ComplexDoubleStorage::real() { - if (this->device == Device.cpu) { - boost::intrusive_ptr out(new DoubleStorage()); - void *dtmp = malloc(sizeof(cytnx_double) * this->cap); - utils_internal::Complexmem_cpu_cdtd(dtmp, this->Mem, this->len, true); - out->_Init_byptr(dtmp, this->len, this->device, true, this->cap); - return out; - } else { -#ifdef UNI_GPU - boost::intrusive_ptr out(new DoubleStorage()); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_double) * this->cap); - utils_internal::cuComplexmem_gpu_cdtd(dtmp, this->Mem, this->len, true); - out->_Init_byptr(dtmp, this->len, this->device, true, this->cap); - return out; -#else - cytnx_error_msg( - 1, "%s", "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); - return nullptr; -#endif - } - } - boost::intrusive_ptr ComplexDoubleStorage::imag() { - if (this->device == Device.cpu) { - boost::intrusive_ptr out(new DoubleStorage()); - void *dtmp = malloc(sizeof(cytnx_double) * this->cap); - utils_internal::Complexmem_cpu_cdtd(dtmp, this->Mem, this->len, false); - out->_Init_byptr(dtmp, this->len, this->device, true, this->cap); - return out; - } else { -#ifdef UNI_GPU - boost::intrusive_ptr out(new DoubleStorage()); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_double) * this->cap); - utils_internal::cuComplexmem_gpu_cdtd(dtmp, this->Mem, this->len, false); - out->_Init_byptr(dtmp, this->len, this->device, true, this->cap); - return out; -#else - cytnx_error_msg( - 1, "%s", "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); - return nullptr; -#endif - } - } - - Scalar ComplexDoubleStorage::get_item(const cytnx_uint64 &idx) const { - return Scalar(this->at(idx)); - } - - void ComplexDoubleStorage::set_item(const cytnx_uint64 &idx, const Scalar &val) { - this->at(idx) = complex128(val); - } - void ComplexDoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val) { - this->at(idx) = val; - } - void ComplexDoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val) { - this->at(idx) = val; - } - void ComplexDoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_double &val) { - this->at(idx) = val; - } - void ComplexDoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_float &val) { - this->at(idx) = val; - } - void ComplexDoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_int64 &val) { - this->at(idx) = val; - } - void ComplexDoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val) { - this->at(idx) = val; - } - void ComplexDoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_int32 &val) { - this->at(idx) = val; - } - void ComplexDoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val) { - this->at(idx) = val; - } - void ComplexDoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_int16 &val) { - this->at(idx) = val; - } - void ComplexDoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val) { - this->at(idx) = val; - } - void ComplexDoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_bool &val) { - this->at(idx) = val; - } - - // bool ComplexDoubleStorage::approx_eq(const boost::intrusive_ptr &rhs, - // const cytnx_double tol) { - // boost::intrusive_ptr _lhs, _rhs; - // if (rhs->dtype == this->dtype) { - // _lhs = this; - // _rhs = rhs; - // } else if (rhs->dtype > this->dtype) { - // _lhs = this; - // _rhs = rhs->astype(this->dtype); - // } else { - // _lhs = this->astype(rhs->dtype); - // _rhs = rhs; - // } - // if (_rhs->size() != _lhs->size()) { - // if (User_debug) std::cout << "different tensor size." << std::endl; - // return false; - // } - // for (cytnx_uint64 i = 0; i < this->len; i++) { - // if (_lhs->get_item(i).approx_eq(_rhs->get_item(i), tol) == false) { - // if (User_debug) - // std::cout << "tensor different at idx:" << i << "\n" - // << "lhs:" << _lhs->get_item(i) << " rhs:" << _rhs->get_item(i) << "\n"; - // return false; - // } - // } - // return true; - // } - -} // namespace cytnx diff --git a/src/backend/ComplexFloatStorage.cpp b/src/backend/ComplexFloatStorage.cpp deleted file mode 100644 index 219eb304..00000000 --- a/src/backend/ComplexFloatStorage.cpp +++ /dev/null @@ -1,819 +0,0 @@ -#ifdef UNI_OMP - #include -#endif -#include "backend/Storage.hpp" -#include "utils_internal_interface.hpp" -using namespace std; - -namespace cytnx { - - //+++++++++++++++++++ - void ComplexFloatStorage::Init(const unsigned long long &len_in, const int &device, - const bool &init_zero) { - // cout << "ComplexFloat.init" << endl; - // check: - this->len = len_in; - - // check: - // cytnx_error_msg(len_in < 1, "%s", "[ERROR] cannot init a Storage with zero element"); - this->dtype = Type.ComplexFloat; - - if (this->len % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((this->len) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = this->len; - } - - if (device == Device.cpu) { - if (init_zero) - this->Mem = utils_internal::Calloc_cpu(this->cap, sizeof(complex)); - else - this->Mem = utils_internal::Malloc_cpu(this->cap * sizeof(complex)); - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - // this->Mem = utils_internal::cuMalloc_gpu(this->cap*sizeof(complex)); - this->Mem = utils_internal::cuCalloc_gpu(this->cap, sizeof(complex)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot init a Storage on gpu without CUDA support."); -#endif - } - this->device = device; - } - - void ComplexFloatStorage::_Init_byptr(void *rawptr, const unsigned long long &len_in, - const int &device, const bool &iscap, - const unsigned long long &cap_in) { - this->Mem = rawptr; - this->len = len_in; - - if (iscap) { - this->cap = cap_in; - } else { - this->cap = len_in; - } - cytnx_error_msg(this->cap % STORAGE_DEFT_SZ != 0, - "[ERROR] _Init_by_ptr cannot have not %dx cap_in.", STORAGE_DEFT_SZ); - - if (User_debug) { - cytnx_error_msg(len_in < 1, "%s", "[ERROR] _Init_by_ptr cannot have len_in < 1."); - cytnx_error_msg(this->cap < this->len, "%s", - "[ERROR] _Init_by_ptr cannot have capacity < size."); - } - this->dtype = Type.ComplexFloat; - this->device = device; - } - - boost::intrusive_ptr ComplexFloatStorage::_create_new_sametype() { - boost::intrusive_ptr out(new ComplexFloatStorage()); - return out; - } - boost::intrusive_ptr ComplexFloatStorage::clone() { - boost::intrusive_ptr out(new ComplexFloatStorage()); - out->Init(this->len, this->device); - if (this->device == Device.cpu) { - memcpy(out->Mem, this->Mem, sizeof(cytnx_complex64) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - checkCudaErrors(cudaMemcpy(out->Mem, this->Mem, sizeof(cytnx_complex64) * this->len, - cudaMemcpyDeviceToDevice)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot clone a Storage on gpu without CUDA support."); -#endif - } - return out; - } - - void ComplexFloatStorage::Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - utils_internal::Movemem_cpu_cf(tmp, old_shape, mapper, invmapper, 1); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuMovemem_gpu_cf(tmp, old_shape, mapper, invmapper, 1); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); -#endif - } - } - - boost::intrusive_ptr ComplexFloatStorage::Move_memory( - const std::vector &old_shape, const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - return utils_internal::Movemem_cpu_cf(tmp, old_shape, mapper, invmapper, 0); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - return utils_internal::cuMovemem_gpu_cf(tmp, old_shape, mapper, invmapper, 0); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); - return nullptr; -#endif - } - } - void ComplexFloatStorage::to_(const int &device) { - if (this->device != device) { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_complex64) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_complex64) * this->len, cudaMemcpyHostToDevice)); - free(this->Mem); - this->Mem = dtmp; - this->device = device; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_complex64) * this->cap); - checkCudaErrors(cudaMemcpy(htmp, this->Mem, sizeof(cytnx_complex64) * this->len, - cudaMemcpyDeviceToHost)); - cudaFree(this->Mem); - this->Mem = htmp; - this->device = device; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_complex64) * this->cap); - checkCudaErrors(cudaMemcpyPeer(dtmp, device, this->Mem, this->device, - sizeof(cytnx_complex64) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; - this->device = device; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); -#endif - } - } - } - boost::intrusive_ptr ComplexFloatStorage::to(const int &device) { - // Here, we follow pytorch scheme. if the device is the same as this->device, then return this - // (python self) otherwise, return a clone on different device. - if (this->device == device) { - return this; - } else { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_complex64) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_complex64) * this->len, cudaMemcpyHostToDevice)); - boost::intrusive_ptr out(new ComplexFloatStorage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); - return nullptr; -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_complex64) * this->cap); - checkCudaErrors(cudaMemcpy(htmp, this->Mem, sizeof(cytnx_complex64) * this->len, - cudaMemcpyDeviceToHost)); - boost::intrusive_ptr out(new ComplexFloatStorage()); - out->_Init_byptr(htmp, this->len, device, true, this->cap); - return out; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_complex64) * this->cap); - checkCudaErrors(cudaMemcpyPeer(dtmp, device, this->Mem, this->device, - sizeof(cytnx_complex64) * this->len)); - boost::intrusive_ptr out(new ComplexFloatStorage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); - return nullptr; -#endif - } - } - } - - void ComplexFloatStorage::PrintElem_byShape(std::ostream &os, - const std::vector &shape, - const std::vector &mapper) { - char *buffer = (char *)malloc(sizeof(char) * 256); - - // checking: - cytnx_uint64 Ne = 1; - for (cytnx_uint64 i = 0; i < shape.size(); i++) { - Ne *= shape[i]; - } - if (Ne != this->len) { - cytnx_error_msg(1, "%s", - "PrintElem_byShape, the number of shape not match with the No. of elements."); - } - - if (len == 0) { - os << "[ "; - os << "\nThe Storage has not been allocated or linked.\n"; - os << "]\n"; - } else { - os << std::endl << "Total elem: " << this->len << "\n"; - - os << "type : " << Type.getname(this->dtype) << std::endl; - - int atDevice = this->device; - os << Device.getname(this->device) << std::endl; - - sprintf(buffer, "%s", "Shape :"); - os << std::string(buffer); - sprintf(buffer, " (%d", shape[0]); - os << std::string(buffer); - for (cytnx_int32 i = 1; i < shape.size(); i++) { - sprintf(buffer, ",%d", shape[i]); - os << std::string(buffer); - } - os << ")" << std::endl; - - // temporary move to cpu for printing. - if (this->device != Device.cpu) { - this->to_(Device.cpu); - } - - std::vector stk(shape.size(), 0), stk2; - - cytnx_uint64 s; - cytnx_complex64 *elem_ptr_ = static_cast(this->Mem); - - if (mapper.size() == 0) { - cytnx_uint64 cnt = 0; - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << std::string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << std::string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - sprintf(buffer, "%.5e%+.5ej ", elem_ptr_[cnt].real(), elem_ptr_[cnt].imag()); - os << std::string(buffer); - cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << std::string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } else { - /// This is for non-contiguous Tensor printing; - // cytnx_error_msg(1,"%s","print for a non-contiguous Storage is under developing"); - // cytnx_uint64 cnt=0; - std::vector c_offj(shape.size()); - std::vector c_shape(shape.size()); - - cytnx_uint64 accu = 1; - cytnx_uint64 RealMemPos; - for (cytnx_uint32 i = 0; i < shape.size(); i++) { - c_shape[i] = shape[mapper[i]]; - } - for (cytnx_int64 i = c_shape.size() - 1; i >= 0; i--) { - c_offj[i] = accu; - accu *= c_shape[i]; - } - - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << std::string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << std::string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - - /// Calculate the Memory reflection: - RealMemPos = 0; - for (cytnx_uint64 n = 0; n < shape.size(); n++) { - RealMemPos += c_offj[n] * stk2[mapper[n]]; // mapback + backmap = normal-map - } - sprintf(buffer, "%.5e%+.5ej ", elem_ptr_[RealMemPos].real(), - elem_ptr_[RealMemPos].imag()); - os << std::string(buffer); - // cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << std::string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } // check if need mapping - - if (atDevice != Device.cpu) { - this->to_(atDevice); - } - - } // len==0 - free(buffer); - } - - void ComplexFloatStorage::print_elems() { - char *buffer = (char *)malloc(sizeof(char) * 256); - cytnx_complex64 *elem_ptr_ = static_cast(this->Mem); - cout << "[ "; - for (unsigned long long cnt = 0; cnt < this->len; cnt++) { - sprintf(buffer, "%.5e%+.5ej ", elem_ptr_[cnt].real(), elem_ptr_[cnt].imag()); - cout << std::string(buffer); - } - cout << " ]" << endl; - free(buffer); - } - - void ComplexFloatStorage::fill(const cytnx_complex128 &val) { - cytnx_complex64 tmp(val.real(), val.imag()); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexFloatStorage::fill(const cytnx_complex64 &val) { - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&val), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&val), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexFloatStorage::fill(const cytnx_double &val) { - cytnx_complex64 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexFloatStorage::fill(const cytnx_float &val) { - cytnx_complex64 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexFloatStorage::fill(const cytnx_int64 &val) { - cytnx_complex64 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexFloatStorage::fill(const cytnx_uint64 &val) { - cytnx_complex64 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexFloatStorage::fill(const cytnx_int32 &val) { - cytnx_complex64 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexFloatStorage::fill(const cytnx_uint32 &val) { - cytnx_complex64 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexFloatStorage::fill(const cytnx_int16 &val) { - cytnx_complex64 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexFloatStorage::fill(const cytnx_uint16 &val) { - cytnx_complex64 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void ComplexFloatStorage::fill(const cytnx_bool &val) { - cytnx_complex64 tmp(val, 0); - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - - void ComplexFloatStorage::set_zeros() { - if (this->device == Device.cpu) { - utils_internal::SetZeros(this->Mem, sizeof(cytnx_complex64) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuSetZeros(this->Mem, sizeof(cytnx_complex64) * this->len); -#else - cytnx_error_msg(1, "[ERROR][set_zeros] fatal, the storage is on gpu without CUDA support.%s", - "\n"); -#endif - } - } - - void ComplexFloatStorage::resize(const cytnx_uint64 &newsize) { - // cytnx_error_msg(newsize < 1,"[ERROR]resize should have size > 0%s","\n"); - - if (newsize > this->cap) { - if (newsize % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((newsize) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = newsize; - } - if (this->device == Device.cpu) { - void *htmp = calloc(this->cap, sizeof(cytnx_complex64)); - memcpy(htmp, this->Mem, sizeof(cytnx_complex64) * this->len); - free(this->Mem); - this->Mem = htmp; - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_complex64)); - checkCudaErrors(cudaMemcpyPeer(dtmp, device, this->Mem, this->device, - sizeof(cytnx_complex64) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.resize. the Storage is as GPU but without CUDA support."); -#endif - } - } - this->len = newsize; - } - - void ComplexFloatStorage::append(const Scalar &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = complex64(val); - } - void ComplexFloatStorage::append(const cytnx_complex128 &val) { - cytnx_complex64 tmp(val.real(), val.imag()); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexFloatStorage::append(const cytnx_complex64 &val) { - // cytnx_complex64 tmp(val.real(),val.imag()); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void ComplexFloatStorage::append(const cytnx_double &val) { - cytnx_complex64 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexFloatStorage::append(const cytnx_float &val) { - cytnx_complex64 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexFloatStorage::append(const cytnx_int64 &val) { - cytnx_complex64 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexFloatStorage::append(const cytnx_int32 &val) { - cytnx_complex64 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexFloatStorage::append(const cytnx_int16 &val) { - cytnx_complex64 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexFloatStorage::append(const cytnx_uint64 &val) { - cytnx_complex64 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexFloatStorage::append(const cytnx_uint32 &val) { - cytnx_complex64 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexFloatStorage::append(const cytnx_uint16 &val) { - cytnx_complex64 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - void ComplexFloatStorage::append(const cytnx_bool &val) { - cytnx_complex64 tmp(val, 0); - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = tmp; - } - - boost::intrusive_ptr ComplexFloatStorage::real() { - if (this->device == Device.cpu) { - boost::intrusive_ptr out(new FloatStorage()); - void *dtmp = malloc(sizeof(cytnx_float) * this->cap); - utils_internal::Complexmem_cpu_cftf(dtmp, this->Mem, this->len, true); - out->_Init_byptr(dtmp, this->len, this->device, true, this->cap); - return out; - } else { -#ifdef UNI_GPU - boost::intrusive_ptr out(new FloatStorage()); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_float) * this->cap); - utils_internal::cuComplexmem_gpu_cftf(dtmp, this->Mem, this->len, true); - out->_Init_byptr(dtmp, this->len, this->device, true, this->cap); - return out; -#else - cytnx_error_msg( - 1, "%s", "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); - return nullptr; -#endif - } - } - boost::intrusive_ptr ComplexFloatStorage::imag() { - if (this->device == Device.cpu) { - boost::intrusive_ptr out(new FloatStorage()); - void *dtmp = malloc(sizeof(cytnx_float) * this->cap); - utils_internal::Complexmem_cpu_cftf(dtmp, this->Mem, this->len, false); - out->_Init_byptr(dtmp, this->len, this->device, true, this->cap); - return out; - } else { -#ifdef UNI_GPU - boost::intrusive_ptr out(new FloatStorage()); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_float) * this->cap); - utils_internal::cuComplexmem_gpu_cftf(dtmp, this->Mem, this->len, false); - out->_Init_byptr(dtmp, this->len, this->device, true, this->cap); - return out; -#else - cytnx_error_msg( - 1, "%s", "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); - return nullptr; -#endif - } - } - Scalar ComplexFloatStorage::get_item(const cytnx_uint64 &idx) const { - return Scalar(this->at(idx)); - } - - void ComplexFloatStorage::set_item(const cytnx_uint64 &idx, const Scalar &val) { - this->at(idx) = complex64(val); - } - void ComplexFloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val) { - this->at(idx) = val; - } - void ComplexFloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val) { - this->at(idx) = val; - } - void ComplexFloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_double &val) { - this->at(idx) = val; - } - void ComplexFloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_float &val) { - this->at(idx) = val; - } - void ComplexFloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_int64 &val) { - this->at(idx) = val; - } - void ComplexFloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val) { - this->at(idx) = val; - } - void ComplexFloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_int32 &val) { - this->at(idx) = val; - } - void ComplexFloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val) { - this->at(idx) = val; - } - void ComplexFloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_int16 &val) { - this->at(idx) = val; - } - void ComplexFloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val) { - this->at(idx) = val; - } - void ComplexFloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_bool &val) { - this->at(idx) = val; - } - - // bool ComplexFloatStorage::approx_eq(const boost::intrusive_ptr &rhs, - // const cytnx_double tol) { - // boost::intrusive_ptr _lhs, _rhs; - // if (rhs->dtype == this->dtype) { - // _lhs = this; - // _rhs = rhs; - // } else if (rhs->dtype > this->dtype) { - // _lhs = this; - // _rhs = rhs->astype(this->dtype); - // } else { - // _lhs = this->astype(rhs->dtype); - // _rhs = rhs; - // } - // if (_rhs->size() != _lhs->size()) { - // if (User_debug) std::cout << "different tensor size." << std::endl; - // return false; - // } - // for (cytnx_uint64 i = 0; i < this->len; i++) { - // if (_lhs->get_item(i).approx_eq(_rhs->get_item(i), tol) == false) { - // if (User_debug) - // std::cout << "tensor different at idx:" << i << "\n" - // << "lhs:" << _lhs->get_item(i) << " rhs:" << _rhs->get_item(i) << "\n"; - // return false; - // } - // } - // return true; - // } - -} // namespace cytnx diff --git a/src/backend/DoubleStorage.cpp b/src/backend/DoubleStorage.cpp deleted file mode 100644 index 69000037..00000000 --- a/src/backend/DoubleStorage.cpp +++ /dev/null @@ -1,742 +0,0 @@ -#ifdef UNI_OMP - #include -#endif -#include "backend/Storage.hpp" -#include "utils_internal_interface.hpp" - -using namespace std; -namespace cytnx { - - void DoubleStorage::Init(const unsigned long long &len_in, const int &device, - const bool &init_zero) { - // cout << "Double.init" << endl; - this->len = len_in; - - // check: - // cytnx_error_msg(len_in < 1, "%s", "[ERROR] cannot init a Storage with zero element"); - this->dtype = Type.Double; - - if (this->len % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((this->len) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = this->len; - } - - if (device == Device.cpu) { - if (init_zero) - this->Mem = utils_internal::Calloc_cpu(this->cap, sizeof(double)); - else - this->Mem = utils_internal::Malloc_cpu(this->cap * sizeof(double)); - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - checkCudaErrors(cudaSetDevice(device)); - // this->Mem = utils_internal::cuMalloc_gpu(this->cap*sizeof(double)); - this->Mem = utils_internal::cuCalloc_gpu(this->cap, sizeof(double)); - -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot init a Storage on gpu without CUDA support."); -#endif - } - this->device = device; - } - - void DoubleStorage::_Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device, - const bool &iscap, const unsigned long long &cap_in) { - //[note], this is an internal function, the device should match the device_id that allocate the - // pointer if the pointer is on GPU device. - - this->Mem = rawptr; - this->len = len_in; - if (iscap) { - this->cap = cap_in; - } else { - this->cap = len_in; - } - - cytnx_error_msg(this->cap % STORAGE_DEFT_SZ != 0, - "[ERROR] _Init_by_ptr cannot have not %dx cap_in.", STORAGE_DEFT_SZ); - -#ifdef UNI_DEBUG - cytnx_error_msg(len_in < 1, "%s", "[ERROR] _Init_by_ptr cannot have len_in < 1."); - cytnx_error_msg(this->cap < this->len, "%s", - "[ERROR] _Init_by_ptr cannot have capacity < size."); -#endif - this->dtype = Type.Double; - this->device = device; - } - - boost::intrusive_ptr DoubleStorage::_create_new_sametype() { - boost::intrusive_ptr out(new DoubleStorage()); - return out; - } - - boost::intrusive_ptr DoubleStorage::clone() { - boost::intrusive_ptr out(new DoubleStorage()); - out->Init(this->len, this->device); - if (this->device == Device.cpu) { - memcpy(out->Mem, this->Mem, sizeof(double) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - checkCudaErrors( - cudaMemcpy(out->Mem, this->Mem, sizeof(double) * this->len, cudaMemcpyDeviceToDevice)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot clone a Storage on gpu without CUDA support."); -#endif - } - return out; - } - - void DoubleStorage::Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - utils_internal::Movemem_cpu_d(tmp, old_shape, mapper, invmapper, 1); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuMovemem_gpu_d(tmp, old_shape, mapper, invmapper, 1); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); -#endif - } - } - - boost::intrusive_ptr DoubleStorage::Move_memory( - const std::vector &old_shape, const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - // cout << this->device << " " << Device.cpu << endl; - if (this->device == Device.cpu) { - // cout << "[OK]" << endl; - return utils_internal::Movemem_cpu_d(tmp, old_shape, mapper, invmapper, 0); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - return utils_internal::cuMovemem_gpu_d(tmp, old_shape, mapper, invmapper, 0); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); - return nullptr; -#endif - } - } - - void DoubleStorage::to_(const int &device) { - if (this->device != device) { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(double) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(double) * this->len, cudaMemcpyHostToDevice)); - free(this->Mem); - this->Mem = dtmp; - this->device = device; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(double) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(double) * this->len, cudaMemcpyDeviceToHost)); - cudaFree(this->Mem); - this->Mem = htmp; - this->device = device; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(double) * this->cap); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(double) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; - this->device = device; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); -#endif - } - } - } - boost::intrusive_ptr DoubleStorage::to(const int &device) { - // Here, we follow pytorch scheme. if the device is the same as this->device, then return this - // (python self) otherwise, return a clone on different device. - if (this->device == device) { - return this; - } else { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(double) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(double) * this->len, cudaMemcpyHostToDevice)); - boost::intrusive_ptr out(new DoubleStorage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); - return nullptr; -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(double) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(double) * this->len, cudaMemcpyDeviceToHost)); - boost::intrusive_ptr out(new DoubleStorage()); - out->_Init_byptr(htmp, this->len, device, true, this->cap); - return out; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(double) * this->cap); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(double) * this->len)); - boost::intrusive_ptr out(new DoubleStorage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); - return nullptr; -#endif - } - } - } - - void DoubleStorage::PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper) { - char *buffer = (char *)malloc(sizeof(char) * 256); - // checking: - cytnx_uint64 Ne = 1; - for (cytnx_uint64 i = 0; i < shape.size(); i++) { - Ne *= shape[i]; - } - if (Ne != this->len) { - cytnx_error_msg(1, "%s", - "PrintElem_byShape, the number of shape not match with the No. of elements."); - } - - if (len == 0) { - os << "[ "; - os << "\nThe Storage has not been allocated or linked.\n"; - os << "]\n"; - } else { - os << std::endl << "Total elem: " << this->len << "\n"; - - os << "type : " << Type.getname(this->dtype) << std::endl; - - int atDevice = this->device; - os << Device.getname(this->device) << std::endl; - - sprintf(buffer, "%s", "Shape :"); - os << std::string(buffer); - sprintf(buffer, " (%ld", shape[0]); - os << std::string(buffer); - for (cytnx_int32 i = 1; i < shape.size(); i++) { - sprintf(buffer, ",%ld", shape[i]); - os << std::string(buffer); - } - os << ")" << std::endl; - - // temporary move to cpu for printing. - if (this->device != Device.cpu) { - this->to_(Device.cpu); - } - - std::vector stk(shape.size(), 0), stk2; - - cytnx_uint64 s; - cytnx_double *elem_ptr_ = static_cast(this->Mem); - - if (mapper.size() == 0) { - // cout << shape.size() << endl; - cytnx_uint64 cnt = 0; - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << std::string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << std::string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - sprintf(buffer, "%.5e ", elem_ptr_[cnt]); - os << std::string(buffer); - cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << std::string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } else { - /// This is for non-contiguous Tensor printing; - // cytnx_error_msg(1,"%s","print for a non-contiguous Storage is under developing"); - // cytnx_uint64 cnt=0; - std::vector c_offj(shape.size()); - std::vector c_shape(shape.size()); - - cytnx_uint64 accu = 1; - cytnx_uint64 RealMemPos; - for (cytnx_uint32 i = 0; i < shape.size(); i++) { - c_shape[i] = shape[mapper[i]]; - } - for (cytnx_int64 i = c_shape.size() - 1; i >= 0; i--) { - c_offj[i] = accu; - accu *= c_shape[i]; - } - - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << std::string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << std::string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - - /// Calculate the Memory reflection: - RealMemPos = 0; - for (cytnx_uint64 n = 0; n < shape.size(); n++) { - RealMemPos += c_offj[n] * stk2[mapper[n]]; // mapback + backmap = normal-map - } - sprintf(buffer, "%.5e ", elem_ptr_[RealMemPos]); - os << std::string(buffer); - // cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << std::string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } // check if need mapping - - if (atDevice != Device.cpu) { - this->to_(atDevice); - } - - } // len==0 - free(buffer); - } - - void DoubleStorage::print_elems() { - char *buffer = (char *)malloc(sizeof(char) * 256); - cytnx_double *elem_ptr_ = static_cast(this->Mem); - cout << "[ "; - for (unsigned long long cnt = 0; cnt < this->len; cnt++) { - sprintf(buffer, "%.5e ", elem_ptr_[cnt]); - std::cout << std::string(buffer); - } - cout << "]" << endl; - free(buffer); - } - - void DoubleStorage::fill(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void DoubleStorage::fill(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void DoubleStorage::fill(const cytnx_double &val) { - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&val), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&val), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void DoubleStorage::fill(const cytnx_float &val) { - cytnx_double tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void DoubleStorage::fill(const cytnx_int64 &val) { - cytnx_double tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void DoubleStorage::fill(const cytnx_uint64 &val) { - cytnx_double tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void DoubleStorage::fill(const cytnx_int32 &val) { - cytnx_double tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void DoubleStorage::fill(const cytnx_uint32 &val) { - cytnx_double tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void DoubleStorage::fill(const cytnx_int16 &val) { - cytnx_double tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void DoubleStorage::fill(const cytnx_uint16 &val) { - cytnx_double tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void DoubleStorage::fill(const cytnx_bool &val) { - cytnx_double tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - - void DoubleStorage::set_zeros() { - if (this->device == Device.cpu) { - utils_internal::SetZeros(this->Mem, sizeof(cytnx_double) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuSetZeros(this->Mem, sizeof(cytnx_double) * this->len); -#else - cytnx_error_msg(1, "[ERROR][set_zeros] fatal, the storage is on gpu without CUDA support.%s", - "\n"); -#endif - } - } - - void DoubleStorage::resize(const cytnx_uint64 &newsize) { - // cytnx_error_msg(newsize < 1,"[ERROR]resize should have size > 0%s","\n"); - - if (newsize > this->cap) { - if (newsize % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((newsize) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = newsize; - } - if (this->device == Device.cpu) { - void *htmp = calloc(this->cap, sizeof(cytnx_double)); - memcpy(htmp, this->Mem, sizeof(cytnx_double) * this->len); - free(this->Mem); - this->Mem = htmp; - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_double)); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(cytnx_double) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.resize. the Storage is as GPU but without CUDA support."); -#endif - } - } - this->len = newsize; - } - - void DoubleStorage::append(const Scalar &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = double(val); - } - - void DoubleStorage::append(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void DoubleStorage::append(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void DoubleStorage::append(const cytnx_double &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void DoubleStorage::append(const cytnx_float &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void DoubleStorage::append(const cytnx_int64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void DoubleStorage::append(const cytnx_int32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void DoubleStorage::append(const cytnx_int16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void DoubleStorage::append(const cytnx_uint64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void DoubleStorage::append(const cytnx_uint32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void DoubleStorage::append(const cytnx_uint16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void DoubleStorage::append(const cytnx_bool &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - - boost::intrusive_ptr DoubleStorage::real() { - cytnx_error_msg(true, "[ERROR] Storage.real() can only be called from complex type.%s", "\n"); - } - boost::intrusive_ptr DoubleStorage::imag() { - cytnx_error_msg(true, "[ERROR] Storage.imag() can only be called from complex type.%s", "\n"); - } - Scalar DoubleStorage::get_item(const cytnx_uint64 &idx) const { - return Scalar(this->at(idx)); - } - - void DoubleStorage::set_item(const cytnx_uint64 &idx, const Scalar &val) { - this->at(idx) = cytnx_double(val); - } - void DoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void DoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void DoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_double &val) { - this->at(idx) = val; - } - void DoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_float &val) { - this->at(idx) = val; - } - void DoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_int64 &val) { - this->at(idx) = val; - } - void DoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val) { - this->at(idx) = val; - } - void DoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_int32 &val) { - this->at(idx) = val; - } - void DoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val) { - this->at(idx) = val; - } - void DoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_int16 &val) { - this->at(idx) = val; - } - void DoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val) { - this->at(idx) = val; - } - void DoubleStorage::set_item(const cytnx_uint64 &idx, const cytnx_bool &val) { - this->at(idx) = val; - } - - // bool DoubleStorage::approx_eq(const boost::intrusive_ptr &rhs, - // const cytnx_double tol) { - // boost::intrusive_ptr _lhs, _rhs; - // if (rhs->dtype == this->dtype) { - // _lhs = this; - // _rhs = rhs; - // } else if (rhs->dtype > this->dtype) { - // _lhs = this; - // _rhs = rhs->astype(this->dtype); - // } else { - // _lhs = this->astype(rhs->dtype); - // _rhs = rhs; - // } - // if (_rhs->size() != _lhs->size()) { - // if (User_debug) std::cout << "different tensor size." << std::endl; - // return false; - // } - // for (cytnx_uint64 i = 0; i < this->len; i++) { - // if (_lhs->get_item(i).approx_eq(_rhs->get_item(i), tol) == false) { - // if (User_debug) - // std::cout << "tensor different at idx:" << i << "\n" - // << "lhs:" << _lhs->get_item(i) << " rhs:" << _rhs->get_item(i) << "\n"; - // return false; - // } - // } - // return true; - // } - -} // namespace cytnx diff --git a/src/backend/FloatStorage.cpp b/src/backend/FloatStorage.cpp deleted file mode 100644 index ef7239fb..00000000 --- a/src/backend/FloatStorage.cpp +++ /dev/null @@ -1,722 +0,0 @@ -#ifdef UNI_OMP - #include -#endif -#include "backend/Storage.hpp" -#include "utils_internal_interface.hpp" -using namespace std; -using namespace cytnx; - -namespace cytnx { - //+++++++++++++++++++ - void FloatStorage::Init(const unsigned long long &len_in, const int &device, - const bool &init_zero) { - // cout << "Float.init" << endl; - // check: - this->len = len_in; - - // check: - // cytnx_error_msg(len_in < 1, "%s", "[ERROR] cannot init a Storage with zero element"); - this->dtype = Type.Float; - - if (this->len % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((this->len) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = this->len; - } - - if (device == Device.cpu) { - if (init_zero) - this->Mem = utils_internal::Calloc_cpu(this->cap, sizeof(float)); - else - this->Mem = utils_internal::Malloc_cpu(this->cap * sizeof(float)); - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - checkCudaErrors(cudaSetDevice(device)); - // this->Mem = utils_internal::cuMalloc_gpu(this->cap*sizeof(float)); - this->Mem = utils_internal::cuCalloc_gpu(this->cap, sizeof(float)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot init a Storage on gpu without CUDA support."); -#endif - } - this->device = device; - } - - void FloatStorage::_Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device, - const bool &iscap, const unsigned long long &cap_in) { - this->Mem = rawptr; - this->len = len_in; - if (iscap) { - this->cap = cap_in; - } else { - this->cap = len_in; - } - cytnx_error_msg(this->cap % STORAGE_DEFT_SZ != 0, - "[ERROR] _Init_by_ptr cannot have not %dx cap_in.", STORAGE_DEFT_SZ); - -#ifdef UNI_DEBUG - cytnx_error_msg(len_in < 1, "%s", "[ERROR] _Init_by_ptr cannot have len_in < 1."); - cytnx_error_msg(this->cap < this->len, "%s", - "[ERROR] _Init_by_ptr cannot have capacity < size."); -#endif - this->dtype = Type.Float; - this->device = device; - } - - boost::intrusive_ptr FloatStorage::_create_new_sametype() { - boost::intrusive_ptr out(new FloatStorage()); - return out; - } - - boost::intrusive_ptr FloatStorage::clone() { - boost::intrusive_ptr out(new FloatStorage()); - out->Init(this->len, this->device); - if (this->device == Device.cpu) { - memcpy(out->Mem, this->Mem, sizeof(float) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - checkCudaErrors( - cudaMemcpy(out->Mem, this->Mem, sizeof(float) * this->len, cudaMemcpyDeviceToDevice)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot clone a Storage on gpu without CUDA support."); -#endif - } - return out; - } - - void FloatStorage::Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - utils_internal::Movemem_cpu_f(tmp, old_shape, mapper, invmapper, 1); - } else { -#ifdef UNI_GPU - utils_internal::cuMovemem_gpu_f(tmp, old_shape, mapper, invmapper, 1); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); -#endif - } - } - - boost::intrusive_ptr FloatStorage::Move_memory( - const std::vector &old_shape, const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - return utils_internal::Movemem_cpu_f(tmp, old_shape, mapper, invmapper, 0); - } else { -#ifdef UNI_GPU - return utils_internal::cuMovemem_gpu_f(tmp, old_shape, mapper, invmapper, 0); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); - return nullptr; -#endif - } - } - void FloatStorage::to_(const int &device) { - if (this->device != device) { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(float) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(float) * this->len, cudaMemcpyHostToDevice)); - free(this->Mem); - this->Mem = dtmp; - this->device = device; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(float) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(float) * this->len, cudaMemcpyDeviceToHost)); - cudaFree(this->Mem); - this->Mem = htmp; - this->device = device; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(float) * this->cap); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(float) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; - this->device = device; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); -#endif - } - } - } - boost::intrusive_ptr FloatStorage::to(const int &device) { - // Here, we follow pytorch scheme. if the device is the same as this->device, then return this - // (python self) otherwise, return a clone on different device. - if (this->device == device) { - return this; - } else { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(float) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(float) * this->len, cudaMemcpyHostToDevice)); - boost::intrusive_ptr out(new FloatStorage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); - return nullptr; -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(float) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(float) * this->len, cudaMemcpyDeviceToHost)); - boost::intrusive_ptr out(new FloatStorage()); - out->_Init_byptr(htmp, this->len, device, true, this->cap); - return out; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(float) * this->cap); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(float) * this->len)); - boost::intrusive_ptr out(new FloatStorage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); - return nullptr; -#endif - } - } - } - - void FloatStorage::PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper) { - char *buffer = (char *)malloc(sizeof(char) * 256); - // checking: - cytnx_uint64 Ne = 1; - for (cytnx_uint64 i = 0; i < shape.size(); i++) { - Ne *= shape[i]; - } - if (Ne != this->len) { - cytnx_error_msg(1, "%s", - "PrintElem_byShape, the number of shape not match with the No. of elements."); - } - - if (len == 0) { - os << "[ "; - os << "\nThe Storage has not been allocated or linked.\n"; - os << "]\n"; - } else { - os << std::endl << "Total elem: " << this->len << "\n"; - - os << "type : " << Type.getname(this->dtype) << std::endl; - - int atDevice = this->device; - os << Device.getname(this->device) << std::endl; - - sprintf(buffer, "%s", "Shape :"); - os << std::string(buffer); - sprintf(buffer, " (%d", shape[0]); - os << std::string(buffer); - for (cytnx_int32 i = 1; i < shape.size(); i++) { - sprintf(buffer, ",%d", shape[i]); - os << std::string(buffer); - } - os << ")" << std::endl; - - // temporary move to cpu for printing. - if (this->device != Device.cpu) { - this->to_(Device.cpu); - } - - std::vector stk(shape.size(), 0), stk2; - - cytnx_uint64 s; - cytnx_float *elem_ptr_ = static_cast(this->Mem); - - if (mapper.size() == 0) { - cytnx_uint64 cnt = 0; - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << std::string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << std::string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - sprintf(buffer, "%.5e ", elem_ptr_[cnt]); - os << std::string(buffer); - cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << std::string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } else { - /// This is for non-contiguous Tensor printing; - // cytnx_error_msg(1,"%s","print for a non-contiguous Storage is under developing"); - // cytnx_uint64 cnt=0; - std::vector c_offj(shape.size()); - std::vector c_shape(shape.size()); - - cytnx_uint64 accu = 1; - cytnx_uint64 RealMemPos; - for (cytnx_uint32 i = 0; i < shape.size(); i++) { - c_shape[i] = shape[mapper[i]]; - } - for (cytnx_int64 i = c_shape.size() - 1; i >= 0; i--) { - c_offj[i] = accu; - accu *= c_shape[i]; - } - - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << std::string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << std::string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - - /// Calculate the Memory reflection: - RealMemPos = 0; - for (cytnx_uint64 n = 0; n < shape.size(); n++) { - RealMemPos += c_offj[n] * stk2[mapper[n]]; // mapback + backmap = normal-map - } - sprintf(buffer, "%.5e ", elem_ptr_[RealMemPos]); - os << std::string(buffer); - // cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << std::string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } // check if need mapping - - if (atDevice != Device.cpu) { - this->to_(atDevice); - } - - } // len==0 - free(buffer); - } - - void FloatStorage::print_elems() { - char *buffer = (char *)malloc(sizeof(char) * 256); - cytnx_float *elem_ptr_ = static_cast(this->Mem); - cout << "[ "; - for (unsigned long long cnt = 0; cnt < this->len; cnt++) { - sprintf(buffer, "%.5e ", elem_ptr_[cnt]); - cout << std::string(buffer); - } - cout << "]" << endl; - free(buffer); - } - - void FloatStorage::fill(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void FloatStorage::fill(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void FloatStorage::fill(const cytnx_double &val) { - cytnx_float tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void FloatStorage::fill(const cytnx_float &val) { - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&val), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&val), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void FloatStorage::fill(const cytnx_int64 &val) { - cytnx_float tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void FloatStorage::fill(const cytnx_uint64 &val) { - cytnx_float tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void FloatStorage::fill(const cytnx_int32 &val) { - cytnx_float tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void FloatStorage::fill(const cytnx_uint32 &val) { - cytnx_float tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void FloatStorage::fill(const cytnx_int16 &val) { - cytnx_float tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void FloatStorage::fill(const cytnx_uint16 &val) { - cytnx_float tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void FloatStorage::fill(const cytnx_bool &val) { - cytnx_float tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - - void FloatStorage::set_zeros() { - if (this->device == Device.cpu) { - utils_internal::SetZeros(this->Mem, sizeof(cytnx_float) * this->len); - } else { -#ifdef UNI_GPU - utils_internal::cuSetZeros(this->Mem, sizeof(cytnx_float) * this->len); -#else - cytnx_error_msg(1, "[ERROR][set_zeros] fatal, the storage is on gpu without CUDA support.%s", - "\n"); -#endif - } - } - - void FloatStorage::resize(const cytnx_uint64 &newsize) { - // cytnx_error_msg(newsize < 1,"[ERROR]resize should have size > 0%s","\n"); - - if (newsize > this->cap) { - if (newsize % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((newsize) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = newsize; - } - if (this->device == Device.cpu) { - void *htmp = calloc(this->cap, sizeof(cytnx_float)); - memcpy(htmp, this->Mem, sizeof(cytnx_float) * this->len); - free(this->Mem); - this->Mem = htmp; - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_float)); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(float) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.resize. the Storage is as GPU but without CUDA support."); -#endif - } - } - this->len = newsize; - } - - void FloatStorage::append(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void FloatStorage::append(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - - void FloatStorage::append(const Scalar &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = float(val); - } - void FloatStorage::append(const cytnx_double &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void FloatStorage::append(const cytnx_float &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void FloatStorage::append(const cytnx_int64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void FloatStorage::append(const cytnx_int32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void FloatStorage::append(const cytnx_int16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void FloatStorage::append(const cytnx_uint64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void FloatStorage::append(const cytnx_uint32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void FloatStorage::append(const cytnx_uint16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void FloatStorage::append(const cytnx_bool &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - - boost::intrusive_ptr FloatStorage::real() { - cytnx_error_msg(true, "[ERROR] Storage.real() can only be called from complex type.%s", "\n"); - } - boost::intrusive_ptr FloatStorage::imag() { - cytnx_error_msg(true, "[ERROR] Storage.imag() can only be called from complex type.%s", "\n"); - } - Scalar FloatStorage::get_item(const cytnx_uint64 &idx) const { - return Scalar(this->at(idx)); - } - - void FloatStorage::set_item(const cytnx_uint64 &idx, const Scalar &val) { - this->at(idx) = cytnx_float(val); - } - void FloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void FloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void FloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_double &val) { - this->at(idx) = val; - } - void FloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_float &val) { - this->at(idx) = val; - } - void FloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_int64 &val) { - this->at(idx) = val; - } - void FloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val) { - this->at(idx) = val; - } - void FloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_int32 &val) { - this->at(idx) = val; - } - void FloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val) { - this->at(idx) = val; - } - void FloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_int16 &val) { - this->at(idx) = val; - } - void FloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val) { - this->at(idx) = val; - } - void FloatStorage::set_item(const cytnx_uint64 &idx, const cytnx_bool &val) { - this->at(idx) = val; - } - - // bool FloatStorage::approx_eq(const boost::intrusive_ptr &rhs, - // const cytnx_double tol) { - // boost::intrusive_ptr _lhs, _rhs; - // if (rhs->dtype == this->dtype) { - // _lhs = this; - // _rhs = rhs; - // } else if (rhs->dtype > this->dtype) { - // _lhs = this; - // _rhs = rhs->astype(this->dtype); - // } else { - // _lhs = this->astype(rhs->dtype); - // _rhs = rhs; - // } - // if (_rhs->size() != _lhs->size()) { - // if (User_debug) std::cout << "different tensor size." << std::endl; - // return false; - // } - // for (cytnx_uint64 i = 0; i < this->len; i++) { - // if (_lhs->get_item(i).approx_eq(_rhs->get_item(i), tol) == false) { - // if (User_debug) - // std::cout << "tensor different at idx:" << i << "\n" - // << "lhs:" << _lhs->get_item(i) << " rhs:" << _rhs->get_item(i) << "\n"; - // return false; - // } - // } - // return true; - // } -} // namespace cytnx diff --git a/src/backend/Int16Storage.cpp b/src/backend/Int16Storage.cpp deleted file mode 100644 index e8e909a9..00000000 --- a/src/backend/Int16Storage.cpp +++ /dev/null @@ -1,729 +0,0 @@ -#include "backend/Storage.hpp" -#include "utils_internal_interface.hpp" - -using namespace std; - -namespace cytnx { - void Int16Storage::Init(const unsigned long long &len_in, const int &device, - const bool &init_zero) { - // cout << "Int16.init" << endl; - this->len = len_in; - - // check: - // cytnx_error_msg(len_in < 1, "%s", "[ERROR] cannot init a Storage with zero element"); - this->dtype = Type.Int16; - - if (this->len % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((this->len) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = this->len; - } - - if (device == Device.cpu) { - if (init_zero) - this->Mem = utils_internal::Calloc_cpu(this->cap, sizeof(cytnx_int16)); - else - this->Mem = utils_internal::Malloc_cpu(this->cap * sizeof(cytnx_int16)); - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - // this->Mem = utils_internal::cuMalloc_gpu(this->cap*sizeof(cytnx_int16)); - this->Mem = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_int16)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot init a Storage on gpu without CUDA support."); -#endif - } - this->device = device; - } - - void Int16Storage::_Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device, - const bool &iscap, const unsigned long long &cap_in) { - this->Mem = rawptr; - this->len = len_in; - if (iscap) { - this->cap = cap_in; - } else { - this->cap = len_in; - } - cytnx_error_msg(this->cap % STORAGE_DEFT_SZ != 0, - "[ERROR] _Init_by_ptr cannot have not %dx cap_in.", STORAGE_DEFT_SZ); - - if (User_debug) { - cytnx_error_msg(len_in < 1, "%s", "[ERROR] _Init_by_ptr cannot have len_in < 1."); - cytnx_error_msg(this->cap < this->len, "%s", - "[ERROR] _Init_by_ptr cannot have capacity < size."); - } - this->dtype = Type.Int16; - this->device = device; - } - - boost::intrusive_ptr Int16Storage::_create_new_sametype() { - boost::intrusive_ptr out(new Int16Storage()); - return out; - } - - boost::intrusive_ptr Int16Storage::clone() { - boost::intrusive_ptr out(new Int16Storage()); - out->Init(this->len, this->device); - if (this->device == Device.cpu) { - memcpy(out->Mem, this->Mem, sizeof(cytnx_int16) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - checkCudaErrors( - cudaMemcpy(out->Mem, this->Mem, sizeof(cytnx_int16) * this->len, cudaMemcpyDeviceToDevice)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot clone a Storage on gpu without CUDA support."); -#endif - } - return out; - } - - void Int16Storage::Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - utils_internal::Movemem_cpu_i16(tmp, old_shape, mapper, invmapper, 1); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuMovemem_gpu_i16(tmp, old_shape, mapper, invmapper, 1); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); -#endif - } - } - - boost::intrusive_ptr Int16Storage::Move_memory( - const std::vector &old_shape, const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - return utils_internal::Movemem_cpu_i16(tmp, old_shape, mapper, invmapper, 0); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - return utils_internal::cuMovemem_gpu_i16(tmp, old_shape, mapper, invmapper, 0); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); - return nullptr; -#endif - } - } - void Int16Storage::to_(const int &device) { - if (this->device != device) { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_int16) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_int16) * this->len, cudaMemcpyHostToDevice)); - free(this->Mem); - this->Mem = dtmp; - this->device = device; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_int16) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(cytnx_int16) * this->len, cudaMemcpyDeviceToHost)); - cudaFree(this->Mem); - this->Mem = htmp; - this->device = device; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_int16) * this->cap); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(cytnx_int16) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; - this->device = device; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); -#endif - } - } - } - boost::intrusive_ptr Int16Storage::to(const int &device) { - // Here, we follow pytorch scheme. if the device is the same as this->device, then return this - // (python self) otherwise, return a clone on different device. - if (this->device == device) { - return this; - } else { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_int16) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_int16) * this->len, cudaMemcpyHostToDevice)); - boost::intrusive_ptr out(new Int16Storage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); - return nullptr; -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_int16) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(cytnx_int16) * this->len, cudaMemcpyDeviceToHost)); - boost::intrusive_ptr out(new Int16Storage()); - out->_Init_byptr(htmp, this->len, device, true, this->cap); - return out; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_int16) * this->cap); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(cytnx_int16) * this->len)); - boost::intrusive_ptr out(new Int16Storage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); - return nullptr; -#endif - } - } - } - - void Int16Storage::PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper) { - char *buffer = (char *)malloc(sizeof(char) * 256); - // checking: - cytnx_uint64 Ne = 1; - for (cytnx_uint64 i = 0; i < shape.size(); i++) { - Ne *= shape[i]; - } - if (Ne != this->len) { - cytnx_error_msg(1, "%s", - "PrintElem_byShape, the number of shape not match with the No. of elements."); - } - - if (len == 0) { - os << "[ "; - os << "\nThe Storage has not been allocated or linked.\n"; - os << "]\n"; - } else { - os << std::endl << "Total elem: " << this->len << "\n"; - - os << "type : " << Type.getname(this->dtype) << std::endl; - - int atDevice = this->device; - os << Device.getname(this->device) << std::endl; - - sprintf(buffer, "%s", "Shape :"); - os << std::string(buffer); - sprintf(buffer, " (%d", shape[0]); - os << std::string(buffer); - for (cytnx_int32 i = 1; i < shape.size(); i++) { - sprintf(buffer, ",%d", shape[i]); - os << std::string(buffer); - } - os << ")" << std::endl; - - // temporary move to cpu for printing. - if (this->device != Device.cpu) { - this->to_(Device.cpu); - } - - std::vector stk(shape.size(), 0), stk2; - - cytnx_uint64 s; - cytnx_int16 *elem_ptr_ = static_cast(this->Mem); - - if (mapper.size() == 0) { - cytnx_uint64 cnt = 0; - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << std::string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << std::string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - sprintf(buffer, "%+5d ", elem_ptr_[cnt]); - os << std::string(buffer); - cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << std::string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } else { - /// This is for non-contiguous Tensor printing; - // cytnx_error_msg(1,"%s","print for a non-contiguous Storage is under developing"); - // cytnx_uint64 cnt=0; - std::vector c_offj(shape.size()); - std::vector c_shape(shape.size()); - - cytnx_uint64 accu = 1; - cytnx_uint64 RealMemPos; - for (cytnx_uint32 i = 0; i < shape.size(); i++) { - c_shape[i] = shape[mapper[i]]; - } - for (cytnx_int64 i = c_shape.size() - 1; i >= 0; i--) { - c_offj[i] = accu; - accu *= c_shape[i]; - } - - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << std::string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << std::string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - - /// Calculate the Memory reflection: - RealMemPos = 0; - for (cytnx_uint64 n = 0; n < shape.size(); n++) { - RealMemPos += c_offj[n] * stk2[mapper[n]]; // mapback + backmap = normal-map - } - sprintf(buffer, "%+5d ", elem_ptr_[RealMemPos]); - os << std::string(buffer); - // cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } // check if need mapping - - if (atDevice != Device.cpu) { - this->to_(atDevice); - } - - } // len==0 - free(buffer); - } - - void Int16Storage::print_elems() { - char *buffer = (char *)malloc(sizeof(char) * 256); - cytnx_int16 *elem_ptr_ = static_cast(this->Mem); - cout << "[ "; - for (unsigned long long cnt = 0; cnt < this->len; cnt++) { - sprintf(buffer, "%+5d ", elem_ptr_[cnt]); - cout << std::string(buffer); - } - cout << " ]" << endl; - free(buffer); - } - - void Int16Storage::fill(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void Int16Storage::fill(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void Int16Storage::fill(const cytnx_double &val) { - cytnx_int16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int16Storage::fill(const cytnx_float &val) { - cytnx_int16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int16Storage::fill(const cytnx_int64 &val) { - cytnx_int16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int16Storage::fill(const cytnx_uint64 &val) { - cytnx_int16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int16Storage::fill(const cytnx_int32 &val) { - cytnx_int16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int16Storage::fill(const cytnx_uint32 &val) { - cytnx_int16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int16Storage::fill(const cytnx_uint16 &val) { - cytnx_int16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int16Storage::fill(const cytnx_int16 &val) { - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&val), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&val), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int16Storage::fill(const cytnx_bool &val) { - cytnx_int16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - - void Int16Storage::set_zeros() { - if (this->device == Device.cpu) { - utils_internal::SetZeros(this->Mem, sizeof(cytnx_int16) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuSetZeros(this->Mem, sizeof(cytnx_int16) * this->len); -#else - cytnx_error_msg(1, "[ERROR][set_zeros] fatal, the storage is on gpu without CUDA support.%s", - "\n"); -#endif - } - } - - void Int16Storage::resize(const cytnx_uint64 &newsize) { - // cytnx_error_msg(newsize < 1,"[ERROR]resize should have size > 0%s","\n"); - - if (newsize > this->cap) { - if (newsize % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((newsize) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = newsize; - } - if (this->device == Device.cpu) { - void *htmp = calloc(this->cap, sizeof(cytnx_int16)); - memcpy(htmp, this->Mem, sizeof(cytnx_int16) * this->len); - free(this->Mem); - this->Mem = htmp; - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_int16)); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(cytnx_int16) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.resize. the Storage is as GPU but without CUDA support."); -#endif - } - } - this->len = newsize; - } - - void Int16Storage::append(const Scalar &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = cytnx_int16(val); - } - void Int16Storage::append(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void Int16Storage::append(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void Int16Storage::append(const cytnx_double &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int16Storage::append(const cytnx_float &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int16Storage::append(const cytnx_int64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int16Storage::append(const cytnx_int32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int16Storage::append(const cytnx_int16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int16Storage::append(const cytnx_uint64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int16Storage::append(const cytnx_uint32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int16Storage::append(const cytnx_uint16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int16Storage::append(const cytnx_bool &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - - boost::intrusive_ptr Int16Storage::real() { - cytnx_error_msg(true, "[ERROR] Storage.real() can only be called from complex type.%s", "\n"); - } - boost::intrusive_ptr Int16Storage::imag() { - cytnx_error_msg(true, "[ERROR] Storage.imag() can only be called from complex type.%s", "\n"); - } - - Scalar Int16Storage::get_item(const cytnx_uint64 &idx) const { - return Scalar(this->at(idx)); - } - - void Int16Storage::set_item(const cytnx_uint64 &idx, const Scalar &val) { - this->at(idx) = cytnx_int16(val); - } - void Int16Storage::set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void Int16Storage::set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void Int16Storage::set_item(const cytnx_uint64 &idx, const cytnx_double &val) { - this->at(idx) = val; - } - void Int16Storage::set_item(const cytnx_uint64 &idx, const cytnx_float &val) { - this->at(idx) = val; - } - void Int16Storage::set_item(const cytnx_uint64 &idx, const cytnx_int64 &val) { - this->at(idx) = val; - } - void Int16Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val) { - this->at(idx) = val; - } - void Int16Storage::set_item(const cytnx_uint64 &idx, const cytnx_int32 &val) { - this->at(idx) = val; - } - void Int16Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val) { - this->at(idx) = val; - } - void Int16Storage::set_item(const cytnx_uint64 &idx, const cytnx_int16 &val) { - this->at(idx) = val; - } - void Int16Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val) { - this->at(idx) = val; - } - void Int16Storage::set_item(const cytnx_uint64 &idx, const cytnx_bool &val) { - this->at(idx) = val; - } - - // bool Int16Storage::approx_eq(const boost::intrusive_ptr &rhs, - // const cytnx_double tol) { - // boost::intrusive_ptr _lhs, _rhs; - // if (rhs->dtype == this->dtype) { - // _lhs = this; - // _rhs = rhs; - // } else if (rhs->dtype > this->dtype) { - // _lhs = this; - // _rhs = rhs->astype(this->dtype); - // } else { - // _lhs = this->astype(rhs->dtype); - // _rhs = rhs; - // } - // if (_rhs->size() != _lhs->size()) { - // if (User_debug) std::cout << "different tensor size." << std::endl; - // return false; - // } - // for (cytnx_uint64 i = 0; i < this->len; i++) { - // if (_lhs->get_item(i).approx_eq(_rhs->get_item(i), tol) == false) { - // if (User_debug) - // std::cout << "tensor different at idx:" << i << "\n" - // << "lhs:" << _lhs->get_item(i) << " rhs:" << _rhs->get_item(i) << "\n"; - // return false; - // } - // } - // return true; - // } -} // namespace cytnx diff --git a/src/backend/Int32Storage.cpp b/src/backend/Int32Storage.cpp deleted file mode 100644 index 30088d61..00000000 --- a/src/backend/Int32Storage.cpp +++ /dev/null @@ -1,730 +0,0 @@ -#ifdef UNI_OMP - #include -#endif -#include "backend/Storage.hpp" -#include "utils_internal_interface.hpp" - -using namespace std; - -namespace cytnx { - void Int32Storage::Init(const unsigned long long &len_in, const int &device, - const bool &init_zero) { - // cout << "Int32.init" << endl; - this->len = len_in; - - // check: - // cytnx_error_msg(len_in < 1, "%s", "[ERROR] cannot init a Storage with zero element"); - this->dtype = Type.Int32; - - if (this->len % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((this->len) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = this->len; - } - - if (device == Device.cpu) { - if (init_zero) - this->Mem = utils_internal::Calloc_cpu(this->cap, sizeof(cytnx_int32)); - else - this->Mem = utils_internal::Malloc_cpu(this->cap * sizeof(cytnx_int32)); - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - // this->Mem = utils_internal::cuMalloc_gpu(this->cap*sizeof(cytnx_int32)); - this->Mem = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_int32)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot init a Storage on gpu without CUDA support."); -#endif - } - this->device = device; - } - - void Int32Storage::_Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device, - const bool &iscap, const unsigned long long &cap_in) { - this->Mem = rawptr; - this->len = len_in; - if (iscap) { - this->cap = cap_in; - } else { - this->cap = len_in; - } - cytnx_error_msg(this->cap % STORAGE_DEFT_SZ != 0, - "[ERROR] _Init_by_ptr cannot have not %dx cap_in.", STORAGE_DEFT_SZ); - -#ifdef UNI_DEBUG - cytnx_error_msg(len_in < 1, "%s", "[ERROR] _Init_by_ptr cannot have len_in < 1."); - cytnx_error_msg(this->cap < this->len, "%s", - "[ERROR] _Init_by_ptr cannot have capacity < size."); -#endif - this->dtype = Type.Int32; - this->device = device; - } - - boost::intrusive_ptr Int32Storage::_create_new_sametype() { - boost::intrusive_ptr out(new Int32Storage()); - return out; - } - - boost::intrusive_ptr Int32Storage::clone() { - boost::intrusive_ptr out(new Int32Storage()); - out->Init(this->len, this->device); - if (this->device == Device.cpu) { - memcpy(out->Mem, this->Mem, sizeof(cytnx_int32) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - checkCudaErrors( - cudaMemcpy(out->Mem, this->Mem, sizeof(cytnx_int32) * this->len, cudaMemcpyDeviceToDevice)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot clone a Storage on gpu without CUDA support."); -#endif - } - return out; - } - - void Int32Storage::Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - utils_internal::Movemem_cpu_i32(tmp, old_shape, mapper, invmapper, 1); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuMovemem_gpu_i32(tmp, old_shape, mapper, invmapper, 1); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); -#endif - } - } - - boost::intrusive_ptr Int32Storage::Move_memory( - const std::vector &old_shape, const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - return utils_internal::Movemem_cpu_i32(tmp, old_shape, mapper, invmapper, 0); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - return utils_internal::cuMovemem_gpu_i32(tmp, old_shape, mapper, invmapper, 0); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); - return nullptr; -#endif - } - } - void Int32Storage::to_(const int &device) { - if (this->device != device) { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_int32) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_int32) * this->len, cudaMemcpyHostToDevice)); - free(this->Mem); - this->Mem = dtmp; - this->device = device; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_int32) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(cytnx_int32) * this->len, cudaMemcpyDeviceToHost)); - cudaFree(this->Mem); - this->Mem = htmp; - this->device = device; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_int32) * this->cap); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(cytnx_int32) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; - this->device = device; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); -#endif - } - } - } - boost::intrusive_ptr Int32Storage::to(const int &device) { - // Here, we follow pytorch scheme. if the device is the same as this->device, then return this - // (python self) otherwise, return a clone on different device. - if (this->device == device) { - return this; - } else { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_int32) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_int32) * this->len, cudaMemcpyHostToDevice)); - boost::intrusive_ptr out(new Int32Storage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); - return nullptr; -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_int32) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(cytnx_int32) * this->len, cudaMemcpyDeviceToHost)); - boost::intrusive_ptr out(new Int32Storage()); - out->_Init_byptr(htmp, this->len, device, true, this->cap); - return out; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_int32) * this->cap); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(cytnx_int32) * this->len)); - boost::intrusive_ptr out(new Int32Storage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); - return nullptr; -#endif - } - } - } - - void Int32Storage::PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper) { - char *buffer = (char *)malloc(sizeof(char) * 256); - // checking: - cytnx_uint64 Ne = 1; - for (cytnx_uint64 i = 0; i < shape.size(); i++) { - Ne *= shape[i]; - } - if (Ne != this->len) { - cytnx_error_msg(1, "%s", - "PrintElem_byShape, the number of shape not match with the No. of elements."); - } - - if (len == 0) { - os << "[ "; - os << "\nThe Storage has not been allocated or linked.\n"; - os << "]\n"; - } else { - os << std::endl << "Total elem: " << this->len << "\n"; - - os << "type : " << Type.getname(this->dtype) << std::endl; - - int atDevice = this->device; - os << Device.getname(this->device) << std::endl; - - sprintf(buffer, "%s", "Shape :"); - os << std::string(buffer); - sprintf(buffer, " (%llu", shape[0]); - os << std::string(buffer); - for (cytnx_int32 i = 1; i < shape.size(); i++) { - sprintf(buffer, ",%llu", shape[i]); - os << std::string(buffer); - } - os << ")" << std::endl; - - // temporary move to cpu for printing. - if (this->device != Device.cpu) { - this->to_(Device.cpu); - } - - std::vector stk(shape.size(), 0), stk2; - - cytnx_uint64 s; - cytnx_int32 *elem_ptr_ = static_cast(this->Mem); - - if (mapper.size() == 0) { - cytnx_uint64 cnt = 0; - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << std::string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << std::string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - sprintf(buffer, "%+10d ", elem_ptr_[cnt]); - os << std::string(buffer); - cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << std::string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } else { - /// This is for non-contiguous Tensor printing; - // cytnx_error_msg(1,"%s","print for a non-contiguous Storage is under developing"); - // cytnx_uint64 cnt=0; - std::vector c_offj(shape.size()); - std::vector c_shape(shape.size()); - - cytnx_uint64 accu = 1; - cytnx_uint64 RealMemPos; - for (cytnx_uint32 i = 0; i < shape.size(); i++) { - c_shape[i] = shape[mapper[i]]; - } - for (cytnx_int64 i = c_shape.size() - 1; i >= 0; i--) { - c_offj[i] = accu; - accu *= c_shape[i]; - } - - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << std::string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << std::string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - - /// Calculate the Memory reflection: - RealMemPos = 0; - for (cytnx_uint64 n = 0; n < shape.size(); n++) { - RealMemPos += c_offj[n] * stk2[mapper[n]]; // mapback + backmap = normal-map - } - sprintf(buffer, "%+10d ", elem_ptr_[RealMemPos]); - os << std::string(buffer); - // cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << std::string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } // check if need mapping - - if (atDevice != Device.cpu) { - this->to_(atDevice); - } - - } // len==0 - free(buffer); - } - - void Int32Storage::print_elems() { - char *buffer = (char *)malloc(sizeof(char) * 256); - cytnx_int32 *elem_ptr_ = static_cast(this->Mem); - cout << "[ "; - for (unsigned long long cnt = 0; cnt < this->len; cnt++) { - sprintf(buffer, "%+10d ", elem_ptr_[cnt]); - cout << std::string(buffer); - } - cout << " ]" << endl; - free(buffer); - } - - void Int32Storage::fill(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void Int32Storage::fill(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void Int32Storage::fill(const cytnx_double &val) { - cytnx_int32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int32Storage::fill(const cytnx_float &val) { - cytnx_int32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int32Storage::fill(const cytnx_int64 &val) { - cytnx_int32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int32Storage::fill(const cytnx_uint64 &val) { - cytnx_int32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int32Storage::fill(const cytnx_int32 &val) { - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&val), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&val), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int32Storage::fill(const cytnx_uint32 &val) { - cytnx_int32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int32Storage::fill(const cytnx_int16 &val) { - cytnx_int32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int32Storage::fill(const cytnx_uint16 &val) { - cytnx_int32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int32Storage::fill(const cytnx_bool &val) { - cytnx_int32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - - void Int32Storage::set_zeros() { - if (this->device == Device.cpu) { - utils_internal::SetZeros(this->Mem, sizeof(cytnx_int32) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuSetZeros(this->Mem, sizeof(cytnx_int32) * this->len); -#else - cytnx_error_msg(1, "[ERROR][set_zeros] fatal, the storage is on gpu without CUDA support.%s", - "\n"); -#endif - } - } - - void Int32Storage::resize(const cytnx_uint64 &newsize) { - // cytnx_error_msg(newsize < 1,"[ERROR]resize should have size > 0%s","\n"); - - if (newsize > this->cap) { - if (newsize % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((newsize) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = newsize; - } - if (this->device == Device.cpu) { - void *htmp = calloc(this->cap, sizeof(cytnx_int32)); - memcpy(htmp, this->Mem, sizeof(cytnx_int32) * this->len); - free(this->Mem); - this->Mem = htmp; - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_int32)); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(cytnx_int32) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.resize. the Storage is as GPU but without CUDA support."); -#endif - } - } - this->len = newsize; - } - - void Int32Storage::append(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void Int32Storage::append(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void Int32Storage::append(const Scalar &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = cytnx_int32(val); - } - void Int32Storage::append(const cytnx_double &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int32Storage::append(const cytnx_float &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int32Storage::append(const cytnx_int64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int32Storage::append(const cytnx_int32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int32Storage::append(const cytnx_int16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int32Storage::append(const cytnx_uint64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int32Storage::append(const cytnx_uint32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int32Storage::append(const cytnx_uint16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int32Storage::append(const cytnx_bool &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - boost::intrusive_ptr Int32Storage::real() { - cytnx_error_msg(true, "[ERROR] Storage.real() can only be called from complex type.%s", "\n"); - } - boost::intrusive_ptr Int32Storage::imag() { - cytnx_error_msg(true, "[ERROR] Storage.imag() can only be called from complex type.%s", "\n"); - } - Scalar Int32Storage::get_item(const cytnx_uint64 &idx) const { - return Scalar(this->at(idx)); - } - - void Int32Storage::set_item(const cytnx_uint64 &idx, const Scalar &val) { - this->at(idx) = cytnx_int32(val); - } - void Int32Storage::set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void Int32Storage::set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void Int32Storage::set_item(const cytnx_uint64 &idx, const cytnx_double &val) { - this->at(idx) = val; - } - void Int32Storage::set_item(const cytnx_uint64 &idx, const cytnx_float &val) { - this->at(idx) = val; - } - void Int32Storage::set_item(const cytnx_uint64 &idx, const cytnx_int64 &val) { - this->at(idx) = val; - } - void Int32Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val) { - this->at(idx) = val; - } - void Int32Storage::set_item(const cytnx_uint64 &idx, const cytnx_int32 &val) { - this->at(idx) = val; - } - void Int32Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val) { - this->at(idx) = val; - } - void Int32Storage::set_item(const cytnx_uint64 &idx, const cytnx_int16 &val) { - this->at(idx) = val; - } - void Int32Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val) { - this->at(idx) = val; - } - void Int32Storage::set_item(const cytnx_uint64 &idx, const cytnx_bool &val) { - this->at(idx) = val; - } - - // bool Int32Storage::approx_eq(const boost::intrusive_ptr &rhs, - // const cytnx_double tol) { - // boost::intrusive_ptr _lhs, _rhs; - // if (rhs->dtype == this->dtype) { - // _lhs = this; - // _rhs = rhs; - // } else if (rhs->dtype > this->dtype) { - // _lhs = this; - // _rhs = rhs->astype(this->dtype); - // } else { - // _lhs = this->astype(rhs->dtype); - // _rhs = rhs; - // } - // if (_rhs->size() != _lhs->size()) { - // if (User_debug) std::cout << "different tensor size." << std::endl; - // return false; - // } - // for (cytnx_uint64 i = 0; i < this->len; i++) { - // if (_lhs->get_item(i).approx_eq(_rhs->get_item(i), tol) == false) { - // if (User_debug) - // std::cout << "tensor different at idx:" << i << "\n" - // << "lhs:" << _lhs->get_item(i) << " rhs:" << _rhs->get_item(i) << "\n"; - // return false; - // } - // } - // return true; - // } -} // namespace cytnx diff --git a/src/backend/Int64Storage.cpp b/src/backend/Int64Storage.cpp deleted file mode 100644 index adbcba8a..00000000 --- a/src/backend/Int64Storage.cpp +++ /dev/null @@ -1,730 +0,0 @@ -#ifdef UNI_OMP - #include -#endif -#include "backend/Storage.hpp" -#include "utils_internal_interface.hpp" -using namespace std; - -namespace cytnx { - void Int64Storage::Init(const unsigned long long &len_in, const int &device, - const bool &init_zero) { - // cout << "Int64.init" << endl; - this->len = len_in; - - // check: - // cytnx_error_msg(len_in < 1, "%s", "[ERROR] cannot init a Storage with zero element"); - this->dtype = Type.Int64; - - if (this->len % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((this->len) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = this->len; - } - - if (device == Device.cpu) { - if (init_zero) - this->Mem = utils_internal::Calloc_cpu(this->cap, sizeof(cytnx_int64)); - else - this->Mem = utils_internal::Malloc_cpu(this->cap * sizeof(cytnx_int64)); - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - // this->Mem = utils_internal::cuMalloc_gpu(this->cap*sizeof(cytnx_int64)); - this->Mem = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_int64)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot init a Storage on gpu without CUDA support."); -#endif - } - this->device = device; - } - - void Int64Storage::_Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device, - const bool &iscap, const unsigned long long &cap_in) { - this->Mem = rawptr; - this->len = len_in; - if (iscap) { - this->cap = cap_in; - } else { - this->cap = len_in; - } - cytnx_error_msg(this->cap % STORAGE_DEFT_SZ != 0, - "[ERROR] _Init_by_ptr cannot have not %dx cap_in.", STORAGE_DEFT_SZ); - -#ifdef UNI_DEBUG - cytnx_error_msg(len_in < 1, "%s", "[ERROR] _Init_by_ptr cannot have len_in < 1."); - cytnx_error_msg(this->cap < this->len, "%s", - "[ERROR] _Init_by_ptr cannot have capacity < size."); -#endif - this->dtype = Type.Int64; - this->device = device; - } - - boost::intrusive_ptr Int64Storage::_create_new_sametype() { - boost::intrusive_ptr out(new Int64Storage()); - return out; - } - - boost::intrusive_ptr Int64Storage::clone() { - boost::intrusive_ptr out(new Int64Storage()); - out->Init(this->len, this->device); - if (this->device == Device.cpu) { - memcpy(out->Mem, this->Mem, sizeof(cytnx_int64) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - checkCudaErrors( - cudaMemcpy(out->Mem, this->Mem, sizeof(cytnx_int64) * this->len, cudaMemcpyDeviceToDevice)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot clone a Storage on gpu without CUDA support."); -#endif - } - return out; - } - - void Int64Storage::Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - utils_internal::Movemem_cpu_i64(tmp, old_shape, mapper, invmapper, 1); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuMovemem_gpu_i64(tmp, old_shape, mapper, invmapper, 1); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); -#endif - } - } - - boost::intrusive_ptr Int64Storage::Move_memory( - const std::vector &old_shape, const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - return utils_internal::Movemem_cpu_i64(tmp, old_shape, mapper, invmapper, 0); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - return utils_internal::cuMovemem_gpu_i64(tmp, old_shape, mapper, invmapper, 0); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); - return nullptr; -#endif - } - } - - void Int64Storage::to_(const int &device) { - if (this->device != device) { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_int64) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_int64) * this->len, cudaMemcpyHostToDevice)); - free(this->Mem); - this->Mem = dtmp; - this->device = device; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_int64) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(cytnx_int64) * this->len, cudaMemcpyDeviceToHost)); - cudaFree(this->Mem); - this->Mem = htmp; - this->device = device; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_int64) * this->cap); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(cytnx_int64) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; - this->device = device; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); -#endif - } - } - } - boost::intrusive_ptr Int64Storage::to(const int &device) { - // Here, we follow pytorch scheme. if the device is the same as this->device, then return this - // (python self) otherwise, return a clone on different device. - if (this->device == device) { - return this; - } else { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_int64) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_int64) * this->len, cudaMemcpyHostToDevice)); - boost::intrusive_ptr out(new Int64Storage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); - return nullptr; -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_int64) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(cytnx_int64) * this->len, cudaMemcpyDeviceToHost)); - boost::intrusive_ptr out(new Int64Storage()); - out->_Init_byptr(htmp, this->len, device, true, this->cap); - return out; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_int64) * this->cap); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(cytnx_int64) * this->len)); - boost::intrusive_ptr out(new Int64Storage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); - return nullptr; -#endif - } - } - } - - void Int64Storage::PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper) { - char *buffer = (char *)malloc(sizeof(char) * 256); - // checking: - cytnx_uint64 Ne = 1; - for (cytnx_uint64 i = 0; i < shape.size(); i++) { - Ne *= shape[i]; - } - if (Ne != this->len) { - cytnx_error_msg(1, "%s", - "PrintElem_byShape, the number of shape not match with the No. of elements."); - } - - if (len == 0) { - os << "[ "; - os << "\nThe Storage has not been allocated or linked.\n"; - os << "]\n"; - } else { - os << std::endl << "Total elem: " << this->len << "\n"; - - os << "type : " << Type.getname(this->dtype) << std::endl; - - int atDevice = this->device; - os << Device.getname(this->device) << std::endl; - - sprintf(buffer, "%s", "Shape :"); - os << string(buffer); - sprintf(buffer, " (%llu", shape[0]); - os << string(buffer); - for (cytnx_int32 i = 1; i < shape.size(); i++) { - sprintf(buffer, ",%llu", shape[i]); - os << string(buffer); - } - os << ")" << std::endl; - - // temporary move to cpu for printing. - if (this->device != Device.cpu) { - this->to_(Device.cpu); - } - - std::vector stk(shape.size(), 0), stk2; - - cytnx_uint64 s; - cytnx_int64 *elem_ptr_ = static_cast(this->Mem); - - if (mapper.size() == 0) { - cytnx_uint64 cnt = 0; - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - sprintf(buffer, "%+19lld ", elem_ptr_[cnt]); - os << string(buffer); - cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } else { - /// This is for non-contiguous Tensor printing; - // cytnx_error_msg(1,"%s","print for a non-contiguous Storage is under developing"); - // cytnx_uint64 cnt=0; - std::vector c_offj(shape.size()); - std::vector c_shape(shape.size()); - - cytnx_uint64 accu = 1; - cytnx_uint64 RealMemPos; - for (cytnx_uint32 i = 0; i < shape.size(); i++) { - c_shape[i] = shape[mapper[i]]; - } - for (cytnx_int64 i = c_shape.size() - 1; i >= 0; i--) { - c_offj[i] = accu; - accu *= c_shape[i]; - } - - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - - /// Calculate the Memory reflection: - RealMemPos = 0; - for (cytnx_uint64 n = 0; n < shape.size(); n++) { - RealMemPos += c_offj[n] * stk2[mapper[n]]; // mapback + backmap = normal-map - } - sprintf(buffer, "%+19lld ", elem_ptr_[RealMemPos]); - os << string(buffer); - // cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } // check if need mapping - - if (atDevice != Device.cpu) { - this->to_(atDevice); - } - - } // len==0 - free(buffer); - } - - void Int64Storage::print_elems() { - char *buffer = (char *)malloc(sizeof(char) * 256); - cytnx_int64 *elem_ptr_ = static_cast(this->Mem); - cout << "[ "; - for (unsigned long long cnt = 0; cnt < this->len; cnt++) { - sprintf(buffer, "%+19lld ", elem_ptr_[cnt]); - cout << string(buffer); - } - cout << " ]" << endl; - free(buffer); - } - - void Int64Storage::fill(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void Int64Storage::fill(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void Int64Storage::fill(const cytnx_double &val) { - cytnx_int64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int64Storage::fill(const cytnx_float &val) { - cytnx_int64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&val), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int64Storage::fill(const cytnx_int64 &val) { - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&val), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&val), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int64Storage::fill(const cytnx_uint64 &val) { - cytnx_int64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int64Storage::fill(const cytnx_int32 &val) { - cytnx_int64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int64Storage::fill(const cytnx_uint32 &val) { - cytnx_int64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int64Storage::fill(const cytnx_uint16 &val) { - cytnx_int64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int64Storage::fill(const cytnx_int16 &val) { - cytnx_int64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Int64Storage::fill(const cytnx_bool &val) { - cytnx_int64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - - void Int64Storage::set_zeros() { - if (this->device == Device.cpu) { - utils_internal::SetZeros(this->Mem, sizeof(cytnx_int64) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuSetZeros(this->Mem, sizeof(cytnx_int64) * this->len); -#else - cytnx_error_msg(1, "[ERROR][set_zeros] fatal, the storage is on gpu without CUDA support.%s", - "\n"); -#endif - } - } - - void Int64Storage::resize(const cytnx_uint64 &newsize) { - // cytnx_error_msg(newsize < 1,"[ERROR]resize should have size > 0%s","\n"); - - if (newsize > this->cap) { - if (newsize % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((newsize) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = newsize; - } - if (this->device == Device.cpu) { - void *htmp = calloc(this->cap, sizeof(cytnx_int64)); - memcpy(htmp, this->Mem, sizeof(cytnx_int64) * this->len); - free(this->Mem); - this->Mem = htmp; - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_int64)); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(cytnx_int64) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.resize. the Storage is as GPU but without CUDA support."); -#endif - } - } - this->len = newsize; - } - - void Int64Storage::append(const Scalar &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = cytnx_int64(val); - } - void Int64Storage::append(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void Int64Storage::append(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void Int64Storage::append(const cytnx_double &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int64Storage::append(const cytnx_float &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int64Storage::append(const cytnx_int64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int64Storage::append(const cytnx_int32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int64Storage::append(const cytnx_int16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int64Storage::append(const cytnx_uint64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int64Storage::append(const cytnx_uint32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int64Storage::append(const cytnx_uint16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Int64Storage::append(const cytnx_bool &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - boost::intrusive_ptr Int64Storage::real() { - cytnx_error_msg(true, "[ERROR] Storage.real() can only be called from complex type.%s", "\n"); - } - boost::intrusive_ptr Int64Storage::imag() { - cytnx_error_msg(true, "[ERROR] Storage.imag() can only be called from complex type.%s", "\n"); - } - Scalar Int64Storage::get_item(const cytnx_uint64 &idx) const { - return Scalar(this->at(idx)); - } - - void Int64Storage::set_item(const cytnx_uint64 &idx, const Scalar &val) { - this->at(idx) = cytnx_int64(val); - } - void Int64Storage::set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void Int64Storage::set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void Int64Storage::set_item(const cytnx_uint64 &idx, const cytnx_double &val) { - this->at(idx) = val; - } - void Int64Storage::set_item(const cytnx_uint64 &idx, const cytnx_float &val) { - this->at(idx) = val; - } - void Int64Storage::set_item(const cytnx_uint64 &idx, const cytnx_int64 &val) { - this->at(idx) = val; - } - void Int64Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val) { - this->at(idx) = val; - } - void Int64Storage::set_item(const cytnx_uint64 &idx, const cytnx_int32 &val) { - this->at(idx) = val; - } - void Int64Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val) { - this->at(idx) = val; - } - void Int64Storage::set_item(const cytnx_uint64 &idx, const cytnx_int16 &val) { - this->at(idx) = val; - } - void Int64Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val) { - this->at(idx) = val; - } - void Int64Storage::set_item(const cytnx_uint64 &idx, const cytnx_bool &val) { - this->at(idx) = val; - } - - // bool Int64Storage::approx_eq(const boost::intrusive_ptr &rhs, - // const cytnx_double tol) { - // boost::intrusive_ptr _lhs, _rhs; - // if (rhs->dtype == this->dtype) { - // _lhs = this; - // _rhs = rhs; - // } else if (rhs->dtype > this->dtype) { - // _lhs = this; - // _rhs = rhs->astype(this->dtype); - // } else { - // _lhs = this->astype(rhs->dtype); - // _rhs = rhs; - // } - // if (_rhs->size() != _lhs->size()) { - // if (User_debug) std::cout << "different tensor size." << std::endl; - // return false; - // } - // for (cytnx_uint64 i = 0; i < this->len; i++) { - // if (_lhs->get_item(i).approx_eq(_rhs->get_item(i), tol) == false) { - // if (User_debug) - // std::cout << "tensor different at idx:" << i << "\n" - // << "lhs:" << _lhs->get_item(i) << " rhs:" << _rhs->get_item(i) << "\n"; - // return false; - // } - // } - // return true; - // } -} // namespace cytnx diff --git a/src/backend/Storage.cpp b/src/backend/Storage.cpp index e5b6c37a..3c45c213 100644 --- a/src/backend/Storage.cpp +++ b/src/backend/Storage.cpp @@ -279,7 +279,7 @@ namespace cytnx { } } - this->_impl = __SII.USIInit[dt](); + this->_impl = __SII.USIInit[dt](dv); this->_impl->Init(sz, dv); // data: @@ -308,7 +308,7 @@ namespace cytnx { // check: cytnx_error_msg(!f.is_open(), "[ERROR] invalid fstream!.%s", "\n"); - this->_impl = __SII.USIInit[dtype](); + this->_impl = __SII.USIInit[dtype](Device.cpu); this->_impl->Init(Nelem, Device.cpu); f.read((char *)this->_impl->Mem, Type.typeSize(dtype) * Nelem); diff --git a/src/backend/Storage_base.cpp b/src/backend/Storage_base.cpp index 307724c2..db8eb5bf 100644 --- a/src/backend/Storage_base.cpp +++ b/src/backend/Storage_base.cpp @@ -94,7 +94,89 @@ namespace cytnx { } boost::intrusive_ptr Storage_base::astype(const unsigned int &dtype) { - boost::intrusive_ptr out(new Storage_base()); + boost::intrusive_ptr out; + if (device == Device.cpu) { + switch (dtype) { + case Type.ComplexDouble: + out = new ComplexDoubleStorage(device); + break; + case Type.ComplexFloat: + out = new ComplexFloatStorage(device); + break; + case Type.Double: + out = new DoubleStorage(device); + break; + case Type.Float: + out = new FloatStorage(device); + break; + case Type.Int64: + out = new Int64Storage(device); + break; + case Type.Uint64: + out = new Uint64Storage(device); + break; + case Type.Int32: + out = new Int32Storage(device); + break; + case Type.Uint32: + out = new Uint32Storage(device); + break; + case Type.Int16: + out = new Int16Storage(device); + break; + case Type.Uint16: + out = new Uint16Storage(device); + break; + case Type.Bool: + out = new BoolStorage(device); + break; + default: + cytnx_error_msg(true, "[ERROR] Unsupported type:%d", dtype); + break; + } + } else { + switch (dtype) { + case Type.ComplexDouble: + out = new ComplexDoubleGpuStorage(device); + break; + case Type.ComplexFloat: + out = new ComplexFloatGpuStorage(device); + break; + case Type.Double: + out = new DoubleGpuStorage(device); + break; + case Type.Float: + out = new FloatGpuStorage(device); + break; + case Type.Int64: + out = new Int64GpuStorage(device); + break; + case Type.Uint64: + out = new Uint64GpuStorage(device); + break; + case Type.Int32: + out = new Int32GpuStorage(device); + break; + case Type.Uint32: + out = new Uint32GpuStorage(device); + break; + case Type.Int16: + out = new Int16GpuStorage(device); + break; + case Type.Uint16: + out = new Uint16GpuStorage(device); + break; + case Type.Bool: + out = new BoolGpuStorage(device); + break; + default: + cytnx_error_msg(true, "[ERROR] Unsupported type:%d", dtype); + break; + } + } + + cytnx_error_msg(out->device != device, "[ERROR] device not match. out->device:%d device:%d", + out->device, device); if (dtype == this->dtype) return boost::intrusive_ptr(this); if (this->device == Device.cpu) { @@ -125,10 +207,7 @@ namespace cytnx { return nullptr; } - boost::intrusive_ptr Storage_base::clone() { - boost::intrusive_ptr out(new Storage_base()); - return out; - } + boost::intrusive_ptr Storage_base::clone() { return this->clone(); } string Storage_base::dtype_str() const { return Type.getname(this->dtype); } string Storage_base::device_str() const { return Device.getname(this->device); } @@ -137,20 +216,7 @@ namespace cytnx { cytnx_error_msg(1, "%s", "[ERROR] call _Init_byptr in base"); } - Storage_base::~Storage_base() { - // cout << "delet" << endl; - if (Mem != NULL) { - if (this->device == Device.cpu) { - free(Mem); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaFree(Mem)); -#else - cytnx_error_msg(1, "%s", "[ERROR] trying to free an GPU memory without CUDA install"); -#endif - } - } - } + Storage_base::~Storage_base() {} void Storage_base::Move_memory_(const std::vector &old_shape, const std::vector &mapper, @@ -1046,6 +1112,18 @@ namespace cytnx { cytnx_error_msg(true, "[ERROR] trying to call Storage.set_item() from void Storage%s", "\n"); } + template <> + Scalar StorageImplementation::get_item(const cytnx_uint64 &in) const { + return static_cast(storage_[in]); + }; + + template <> + Scalar StorageImplementation>::get_item(const cytnx_uint64 &in) const { + checkCudaErrors(cudaSetDevice(device)); + checkCudaErrors(cudaDeviceSynchronize()); + return static_cast(storage_[in]); + }; + // bool Storage_base::approx_eq(const boost::intrusive_ptr &rhs, // const cytnx_double tol) { // cytnx_error_msg(true, "[ERROR] trying to call Storage.approx_eq() from void Storage%s", diff --git a/src/backend/Tensor_impl.cpp b/src/backend/Tensor_impl.cpp index 74e92b88..a86f4c13 100644 --- a/src/backend/Tensor_impl.cpp +++ b/src/backend/Tensor_impl.cpp @@ -1,6 +1,6 @@ #include #include "backend/Tensor_impl.hpp" -#include "utils_internal_interface.hpp" + #include "linalg.hpp" #include "utils/is.hpp" #include "Type.hpp" diff --git a/src/backend/Uint16Storage.cpp b/src/backend/Uint16Storage.cpp deleted file mode 100644 index e96ad2c9..00000000 --- a/src/backend/Uint16Storage.cpp +++ /dev/null @@ -1,726 +0,0 @@ -#include "backend/Storage.hpp" -#include "utils_internal_interface.hpp" - -using namespace std; - -namespace cytnx { - void Uint16Storage::Init(const unsigned long long &len_in, const int &device, - const bool &init_zero) { - // cout << "Uint16.init" << endl; - this->len = len_in; - - // check: - // cytnx_error_msg(len_in < 1, "%s", "[ERROR] cannot init a Storage with zero element"); - this->dtype = Type.Uint16; - - if (this->len % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((this->len) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = this->len; - } - - if (device == Device.cpu) { - if (init_zero) - this->Mem = utils_internal::Calloc_cpu(this->cap, sizeof(cytnx_uint16)); - else - this->Mem = utils_internal::Malloc_cpu(this->cap * sizeof(cytnx_uint16)); - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - // this->Mem = utils_internal::cuMalloc_gpu(this->cap*sizeof(cytnx_uint16)); - this->Mem = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_uint16)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot init a Storage on gpu without CUDA support."); -#endif - } - this->device = device; - } - - void Uint16Storage::_Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device, - const bool &iscap, const unsigned long long &cap_in) { - this->Mem = rawptr; - this->len = len_in; - if (iscap) { - this->cap = cap_in; - } else { - this->cap = len_in; - } - cytnx_error_msg(this->cap % STORAGE_DEFT_SZ != 0, - "[ERROR] _Init_by_ptr cannot have not %dx cap_in.", STORAGE_DEFT_SZ); - -#ifdef UNI_DEBUG - cytnx_error_msg(len_in < 1, "%s", "[ERROR] _Init_by_ptr cannot have len_in < 1."); - cytnx_error_msg(this->cap < this->len, "%s", - "[ERROR] _Init_by_ptr cannot have capacity < size."); -#endif - this->dtype = Type.Uint16; - this->device = device; - } - - boost::intrusive_ptr Uint16Storage::_create_new_sametype() { - boost::intrusive_ptr out(new Uint16Storage()); - return out; - } - - boost::intrusive_ptr Uint16Storage::clone() { - boost::intrusive_ptr out(new Uint16Storage()); - out->Init(this->len, this->device); - if (this->device == Device.cpu) { - memcpy(out->Mem, this->Mem, sizeof(cytnx_uint16) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - checkCudaErrors(cudaMemcpy(out->Mem, this->Mem, sizeof(cytnx_uint16) * this->len, - cudaMemcpyDeviceToDevice)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot clone a Storage on gpu without CUDA support."); -#endif - } - return out; - } - - void Uint16Storage::Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - utils_internal::Movemem_cpu_u16(tmp, old_shape, mapper, invmapper, 1); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuMovemem_gpu_u16(tmp, old_shape, mapper, invmapper, 1); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); -#endif - } - } - - boost::intrusive_ptr Uint16Storage::Move_memory( - const std::vector &old_shape, const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - return utils_internal::Movemem_cpu_u16(tmp, old_shape, mapper, invmapper, 0); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - return utils_internal::cuMovemem_gpu_u16(tmp, old_shape, mapper, invmapper, 0); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); - return nullptr; -#endif - } - } - - void Uint16Storage::to_(const int &device) { - if (this->device != device) { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_uint16) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_uint16) * this->len, cudaMemcpyHostToDevice)); - free(this->Mem); - this->Mem = dtmp; - this->device = device; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_uint16) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(cytnx_uint16) * this->len, cudaMemcpyDeviceToHost)); - cudaFree(this->Mem); - this->Mem = htmp; - this->device = device; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_uint16) * this->cap); - checkCudaErrors(cudaMemcpyPeer(dtmp, device, this->Mem, this->device, - sizeof(cytnx_uint16) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; - this->device = device; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); -#endif - } - } - } - boost::intrusive_ptr Uint16Storage::to(const int &device) { - // Here, we follow pytorch scheme. if the device is the same as this->device, then return this - // (python self) otherwise, return a clone on different device. - if (this->device == device) { - return this; - } else { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_uint16) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_uint16) * this->len, cudaMemcpyHostToDevice)); - boost::intrusive_ptr out(new Uint16Storage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); - return nullptr; -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_uint16) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(cytnx_uint16) * this->len, cudaMemcpyDeviceToHost)); - boost::intrusive_ptr out(new Uint16Storage()); - out->_Init_byptr(htmp, this->len, device, true, this->cap); - return out; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_uint16) * this->cap); - checkCudaErrors(cudaMemcpyPeer(dtmp, device, this->Mem, this->device, - sizeof(cytnx_uint16) * this->len)); - boost::intrusive_ptr out(new Uint16Storage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); - return nullptr; -#endif - } - } - } - - void Uint16Storage::PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper) { - char *buffer = (char *)malloc(sizeof(char) * 256); - // checking: - cytnx_uint64 Ne = 1; - for (cytnx_uint64 i = 0; i < shape.size(); i++) { - Ne *= shape[i]; - } - if (Ne != this->len) { - cytnx_error_msg(1, "%s", - "PrintElem_byShape, the number of shape not match with the No. of elements."); - } - - if (len == 0) { - os << "[ "; - os << "\nThe Storage has not been allocated or linked.\n"; - os << "]\n"; - } else { - os << std::endl << "Total elem: " << this->len << "\n"; - - os << "type : " << Type.getname(this->dtype) << std::endl; - - int atDevice = this->device; - os << Device.getname(this->device) << std::endl; - - sprintf(buffer, "%s", "Shape :"); - os << std::string(buffer); - sprintf(buffer, " (%d", shape[0]); - os << std::string(buffer); - for (cytnx_int32 i = 1; i < shape.size(); i++) { - sprintf(buffer, ",%d", shape[i]); - os << string(buffer); - } - os << ")" << std::endl; - - // temporary move to cpu for printing. - if (this->device != Device.cpu) { - this->to_(Device.cpu); - } - - std::vector stk(shape.size(), 0), stk2; - - cytnx_uint64 s; - cytnx_uint16 *elem_ptr_ = static_cast(this->Mem); - - if (mapper.size() == 0) { - cytnx_uint64 cnt = 0; - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << std::string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << std::string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - sprintf(buffer, "%5d ", elem_ptr_[cnt]); - os << std::string(buffer); - cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << std::string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } else { - /// This is for non-contiguous Tensor printing; - // cytnx_error_msg(1,"%s","print for a non-contiguous Storage is under developing"); - // cytnx_uint64 cnt=0; - std::vector c_offj(shape.size()); - std::vector c_shape(shape.size()); - - cytnx_uint64 accu = 1; - cytnx_uint64 RealMemPos; - for (cytnx_uint32 i = 0; i < shape.size(); i++) { - c_shape[i] = shape[mapper[i]]; - } - for (cytnx_int64 i = c_shape.size() - 1; i >= 0; i--) { - c_offj[i] = accu; - accu *= c_shape[i]; - } - - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << std::string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << std::string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - - /// Calculate the Memory reflection: - RealMemPos = 0; - for (cytnx_uint64 n = 0; n < shape.size(); n++) { - RealMemPos += c_offj[n] * stk2[mapper[n]]; // mapback + backmap = normal-map - } - sprintf(buffer, "%5d ", elem_ptr_[RealMemPos]); - os << std::string(buffer); - // cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << std::string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } // check if need mapping - - if (atDevice != Device.cpu) { - this->to_(atDevice); - } - - } // len==0 - free(buffer); - } - - void Uint16Storage::print_elems() { - char *buffer = (char *)malloc(sizeof(char) * 256); - cytnx_uint16 *elem_ptr_ = static_cast(this->Mem); - cout << "[ "; - for (unsigned long long cnt = 0; cnt < this->len; cnt++) { - sprintf(buffer, "%5d ", elem_ptr_[cnt]); - cout << std::string(buffer); - } - cout << " ]" << endl; - free(buffer); - } - - void Uint16Storage::fill(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void Uint16Storage::fill(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void Uint16Storage::fill(const cytnx_double &val) { - cytnx_uint16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint16Storage::fill(const cytnx_float &val) { - cytnx_uint16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint16Storage::fill(const cytnx_int64 &val) { - cytnx_uint16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint16Storage::fill(const cytnx_uint64 &val) { - cytnx_uint16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint16Storage::fill(const cytnx_int32 &val) { - cytnx_uint16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint16Storage::fill(const cytnx_uint32 &val) { - cytnx_uint16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint16Storage::fill(const cytnx_uint16 &val) { - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&val), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&val), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint16Storage::fill(const cytnx_int16 &val) { - cytnx_uint16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint16Storage::fill(const cytnx_bool &val) { - cytnx_uint16 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - - void Uint16Storage::set_zeros() { - if (this->device == Device.cpu) { - utils_internal::SetZeros(this->Mem, sizeof(cytnx_uint16) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuSetZeros(this->Mem, sizeof(cytnx_uint16) * this->len); -#else - cytnx_error_msg(1, "[ERROR][set_zeros] fatal, the storage is on gpu without CUDA support.%s", - "\n"); -#endif - } - } - void Uint16Storage::resize(const cytnx_uint64 &newsize) { - // cytnx_error_msg(newsize < 1,"[ERROR]resize should have size > 0%s","\n"); - - if (newsize > this->cap) { - if (newsize % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((newsize) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = newsize; - } - if (this->device == Device.cpu) { - void *htmp = calloc(this->cap, sizeof(cytnx_uint16)); - memcpy(htmp, this->Mem, sizeof(cytnx_uint16) * this->len); - free(this->Mem); - this->Mem = htmp; - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_uint16)); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(cytnx_uint16) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.resize. the Storage is as GPU but without CUDA support."); -#endif - } - } - this->len = newsize; - } - void Uint16Storage::append(const Scalar &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = cytnx_uint16(val); - } - void Uint16Storage::append(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void Uint16Storage::append(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void Uint16Storage::append(const cytnx_double &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint16Storage::append(const cytnx_float &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint16Storage::append(const cytnx_int64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint16Storage::append(const cytnx_int32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint16Storage::append(const cytnx_int16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint16Storage::append(const cytnx_uint64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint16Storage::append(const cytnx_uint32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint16Storage::append(const cytnx_uint16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint16Storage::append(const cytnx_bool &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - boost::intrusive_ptr Uint16Storage::real() { - cytnx_error_msg(true, "[ERROR] Storage.real() can only be called from complex type.%s", "\n"); - } - boost::intrusive_ptr Uint16Storage::imag() { - cytnx_error_msg(true, "[ERROR] Storage.imag() can only be called from complex type.%s", "\n"); - } - Scalar Uint16Storage::get_item(const cytnx_uint64 &idx) const { - return Scalar(this->at(idx)); - } - - void Uint16Storage::set_item(const cytnx_uint64 &idx, const Scalar &val) { - this->at(idx) = cytnx_uint16(val); - } - void Uint16Storage::set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void Uint16Storage::set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void Uint16Storage::set_item(const cytnx_uint64 &idx, const cytnx_double &val) { - this->at(idx) = val; - } - void Uint16Storage::set_item(const cytnx_uint64 &idx, const cytnx_float &val) { - this->at(idx) = val; - } - void Uint16Storage::set_item(const cytnx_uint64 &idx, const cytnx_int64 &val) { - this->at(idx) = val; - } - void Uint16Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val) { - this->at(idx) = val; - } - void Uint16Storage::set_item(const cytnx_uint64 &idx, const cytnx_int32 &val) { - this->at(idx) = val; - } - void Uint16Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val) { - this->at(idx) = val; - } - void Uint16Storage::set_item(const cytnx_uint64 &idx, const cytnx_int16 &val) { - this->at(idx) = val; - } - void Uint16Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val) { - this->at(idx) = val; - } - void Uint16Storage::set_item(const cytnx_uint64 &idx, const cytnx_bool &val) { - this->at(idx) = val; - } - - // bool Uint16Storage::approx_eq(const boost::intrusive_ptr &rhs, - // const cytnx_double tol) { - // boost::intrusive_ptr _lhs, _rhs; - // if (rhs->dtype == this->dtype) { - // _lhs = this; - // _rhs = rhs; - // } else if (rhs->dtype > this->dtype) { - // _lhs = this; - // _rhs = rhs->astype(this->dtype); - // } else { - // _lhs = this->astype(rhs->dtype); - // _rhs = rhs; - // } - // if (_rhs->size() != _lhs->size()) { - // if (User_debug) std::cout << "different tensor size." << std::endl; - // return false; - // } - // for (cytnx_uint64 i = 0; i < this->len; i++) { - // if (_lhs->get_item(i).approx_eq(_rhs->get_item(i), tol) == false) { - // if (User_debug) - // std::cout << "tensor different at idx:" << i << "\n" - // << "lhs:" << _lhs->get_item(i) << " rhs:" << _rhs->get_item(i) << "\n"; - // return false; - // } - // } - // return true; - // } -} // namespace cytnx diff --git a/src/backend/Uint32Storage.cpp b/src/backend/Uint32Storage.cpp deleted file mode 100644 index f7157c12..00000000 --- a/src/backend/Uint32Storage.cpp +++ /dev/null @@ -1,734 +0,0 @@ -#ifdef UNI_OMP - #include -#endif -#include "backend/Storage.hpp" -#include "utils_internal_interface.hpp" - -using namespace std; - -namespace cytnx { - void Uint32Storage::Init(const unsigned long long &len_in, const int &device, - const bool &init_zero) { - // cout << "Uint32.init" << endl; - this->len = len_in; - - // check: - // cytnx_error_msg(len_in < 1, "%s", "[ERROR] cannot init a Storage with zero element"); - this->dtype = Type.Uint32; - - if (this->len % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((this->len) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = this->len; - } - - if (device == Device.cpu) { - if (init_zero) - this->Mem = utils_internal::Calloc_cpu(this->cap, sizeof(cytnx_uint32)); - else - this->Mem = utils_internal::Malloc_cpu(this->cap * sizeof(cytnx_uint32)); - - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - // this->Mem = utils_internal::cuMalloc_gpu(this->cap*sizeof(cytnx_uint32)); - this->Mem = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_uint32)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot init a Storage on gpu without CUDA support."); -#endif - } - this->device = device; - } - - void Uint32Storage::_Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device, - const bool &iscap, const unsigned long long &cap_in) { - this->Mem = rawptr; - this->len = len_in; - - if (iscap) { - this->cap = cap_in; - } else { - this->cap = len_in; - } - cytnx_error_msg(this->cap % STORAGE_DEFT_SZ != 0, - "[ERROR] _Init_by_ptr cannot have not %dx cap_in.", STORAGE_DEFT_SZ); - -#ifdef UNI_DEBUG - cytnx_error_msg(len_in < 1, "%s", "[ERROR] _Init_by_ptr cannot have len_in < 1."); - cytnx_error_msg(this->cap < this->len, "%s", - "[ERROR] _Init_by_ptr cannot have capacity < size."); - -#endif - this->dtype = Type.Uint32; - this->device = device; - } - - boost::intrusive_ptr Uint32Storage::_create_new_sametype() { - boost::intrusive_ptr out(new Uint32Storage()); - return out; - } - - boost::intrusive_ptr Uint32Storage::clone() { - boost::intrusive_ptr out(new Uint32Storage()); - out->Init(this->len, this->device); - if (this->device == Device.cpu) { - memcpy(out->Mem, this->Mem, sizeof(cytnx_uint32) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - checkCudaErrors(cudaMemcpy(out->Mem, this->Mem, sizeof(cytnx_uint32) * this->len, - cudaMemcpyDeviceToDevice)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot clone a Storage on gpu without CUDA support."); -#endif - } - return out; - } - - void Uint32Storage::Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - utils_internal::Movemem_cpu_u32(tmp, old_shape, mapper, invmapper, 1); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuMovemem_gpu_u32(tmp, old_shape, mapper, invmapper, 1); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); -#endif - } - } - - boost::intrusive_ptr Uint32Storage::Move_memory( - const std::vector &old_shape, const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - return utils_internal::Movemem_cpu_u32(tmp, old_shape, mapper, invmapper, 0); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - return utils_internal::cuMovemem_gpu_u32(tmp, old_shape, mapper, invmapper, 0); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); - return nullptr; -#endif - } - } - - void Uint32Storage::to_(const int &device) { - if (this->device != device) { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_uint32) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_uint32) * this->len, cudaMemcpyHostToDevice)); - free(this->Mem); - this->Mem = dtmp; - this->device = device; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_uint32) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(cytnx_uint32) * this->len, cudaMemcpyDeviceToHost)); - cudaFree(this->Mem); - this->Mem = htmp; - this->device = device; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_uint32) * this->cap); - checkCudaErrors(cudaMemcpyPeer(dtmp, device, this->Mem, this->device, - sizeof(cytnx_uint32) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; - this->device = device; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); -#endif - } - } - } - boost::intrusive_ptr Uint32Storage::to(const int &device) { - // Here, we follow pytorch scheme. if the device is the same as this->device, then return this - // (python self) otherwise, return a clone on different device. - if (this->device == device) { - return this; - } else { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_uint32) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_uint32) * this->len, cudaMemcpyHostToDevice)); - boost::intrusive_ptr out(new Uint32Storage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); - return nullptr; -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_uint32) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(cytnx_uint32) * this->len, cudaMemcpyDeviceToHost)); - boost::intrusive_ptr out(new Uint32Storage()); - out->_Init_byptr(htmp, this->len, device, true, this->cap); - return out; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_uint32) * this->cap); - checkCudaErrors(cudaMemcpyPeer(dtmp, device, this->Mem, this->device, - sizeof(cytnx_uint32) * this->len)); - boost::intrusive_ptr out(new Uint32Storage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); - return nullptr; -#endif - } - } - } - - void Uint32Storage::PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper) { - char *buffer = (char *)malloc(sizeof(char) * 256); - // checking: - cytnx_uint64 Ne = 1; - for (cytnx_uint64 i = 0; i < shape.size(); i++) { - Ne *= shape[i]; - } - if (Ne != this->len) { - cytnx_error_msg(1, "%s", - "PrintElem_byShape, the number of shape not match with the No. of elements."); - } - - if (len == 0) { - os << "[ "; - os << "\nThe Storage has not been allocated or linked.\n"; - os << "]\n"; - } else { - os << std::endl << "Total elem: " << this->len << "\n"; - - os << "type : " << Type.getname(this->dtype) << std::endl; - - int atDevice = this->device; - os << Device.getname(this->device) << std::endl; - - sprintf(buffer, buffer, "%s", "Shape :"); - os << std::string(buffer); - sprintf(buffer, buffer, " (%d", shape[0]); - os << std::string(buffer); - for (cytnx_int32 i = 1; i < shape.size(); i++) { - sprintf(buffer, ",%d", shape[i]); - os << std::string(buffer); - } - os << ")" << std::endl; - - // temporary move to cpu for printing. - if (this->device != Device.cpu) { - this->to_(Device.cpu); - } - - std::vector stk(shape.size(), 0), stk2; - - cytnx_uint64 s; - cytnx_uint32 *elem_ptr_ = static_cast(this->Mem); - - if (mapper.size() == 0) { - cytnx_uint64 cnt = 0; - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - sprintf(buffer, "%10d ", elem_ptr_[cnt]); - os << string(buffer); - cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } else { - /// This is for non-contiguous Tensor printing; - // cytnx_error_msg(1,"%s","print for a non-contiguous Storage is under developing"); - // cytnx_uint64 cnt=0; - std::vector c_offj(shape.size()); - std::vector c_shape(shape.size()); - - cytnx_uint64 accu = 1; - cytnx_uint64 RealMemPos; - for (cytnx_uint32 i = 0; i < shape.size(); i++) { - c_shape[i] = shape[mapper[i]]; - } - for (cytnx_int64 i = c_shape.size() - 1; i >= 0; i--) { - c_offj[i] = accu; - accu *= c_shape[i]; - } - - while (1) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - - /// Calculate the Memory reflection: - RealMemPos = 0; - for (cytnx_uint64 n = 0; n < shape.size(); n++) { - RealMemPos += c_offj[n] * stk2[mapper[n]]; // mapback + backmap = normal-map - } - sprintf(buffer, "%10d ", elem_ptr_[RealMemPos]); - os << string(buffer); - // cnt++; - } - - s = 0; - while (1) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } // check if need mapping - - if (atDevice != Device.cpu) { - this->to_(atDevice); - } - - } // len==0 - free(buffer); - } - - void Uint32Storage::print_elems() { - char *buffer = (char *)malloc(sizeof(char) * 256); - cytnx_uint32 *elem_ptr_ = static_cast(this->Mem); - cout << "[ "; - for (unsigned long long cnt = 0; cnt < this->len; cnt++) { - sprintf(buffer, "%10d ", elem_ptr_[cnt]); - cout << string(buffer); - } - cout << " ]" << endl; - free(buffer); - } - - void Uint32Storage::fill(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void Uint32Storage::fill(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void Uint32Storage::fill(const cytnx_double &val) { - cytnx_uint32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint32Storage::fill(const cytnx_float &val) { - cytnx_uint32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint32Storage::fill(const cytnx_int64 &val) { - cytnx_uint32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint32Storage::fill(const cytnx_uint64 &val) { - cytnx_uint32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint32Storage::fill(const cytnx_int32 &val) { - cytnx_uint32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint32Storage::fill(const cytnx_uint32 &val) { - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&val), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&val), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint32Storage::fill(const cytnx_uint16 &val) { - cytnx_uint32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint32Storage::fill(const cytnx_int16 &val) { - cytnx_uint32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint32Storage::fill(const cytnx_bool &val) { - cytnx_uint32 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - - void Uint32Storage::set_zeros() { - if (this->device == Device.cpu) { - utils_internal::SetZeros(this->Mem, sizeof(cytnx_uint32) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuSetZeros(this->Mem, sizeof(cytnx_uint32) * this->len); -#else - cytnx_error_msg(1, "[ERROR][set_zeros] fatal, the storage is on gpu without CUDA support.%s", - "\n"); -#endif - } - } - - void Uint32Storage::resize(const cytnx_uint64 &newsize) { - // cytnx_error_msg(newsize < 1,"[ERROR]resize should have size > 0%s","\n"); - - if (newsize > this->cap) { - if (newsize % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((newsize) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = newsize; - } - if (this->device == Device.cpu) { - void *htmp = calloc(this->cap, sizeof(cytnx_uint32)); - memcpy(htmp, this->Mem, sizeof(cytnx_uint32) * this->len); - free(this->Mem); - this->Mem = htmp; - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_uint32)); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(cytnx_uint32) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.resize. the Storage is as GPU but without CUDA support."); -#endif - } - } - this->len = newsize; - } - - void Uint32Storage::append(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void Uint32Storage::append(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void Uint32Storage::append(const Scalar &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = cytnx_uint32(val); - } - void Uint32Storage::append(const cytnx_double &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint32Storage::append(const cytnx_float &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint32Storage::append(const cytnx_int64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint32Storage::append(const cytnx_int32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint32Storage::append(const cytnx_int16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint32Storage::append(const cytnx_uint64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint32Storage::append(const cytnx_uint32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint32Storage::append(const cytnx_uint16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint32Storage::append(const cytnx_bool &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - boost::intrusive_ptr Uint32Storage::real() { - cytnx_error_msg(true, "[ERROR] Storage.real() can only be called from complex type.%s", "\n"); - } - boost::intrusive_ptr Uint32Storage::imag() { - cytnx_error_msg(true, "[ERROR] Storage.imag() can only be called from complex type.%s", "\n"); - } - Scalar Uint32Storage::get_item(const cytnx_uint64 &idx) const { - return Scalar(this->at(idx)); - } - - void Uint32Storage::set_item(const cytnx_uint64 &idx, const Scalar &val) { - this->at(idx) = cytnx_uint32(val); - } - void Uint32Storage::set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void Uint32Storage::set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void Uint32Storage::set_item(const cytnx_uint64 &idx, const cytnx_double &val) { - this->at(idx) = val; - } - void Uint32Storage::set_item(const cytnx_uint64 &idx, const cytnx_float &val) { - this->at(idx) = val; - } - void Uint32Storage::set_item(const cytnx_uint64 &idx, const cytnx_int64 &val) { - this->at(idx) = val; - } - void Uint32Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val) { - this->at(idx) = val; - } - void Uint32Storage::set_item(const cytnx_uint64 &idx, const cytnx_int32 &val) { - this->at(idx) = val; - } - void Uint32Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val) { - this->at(idx) = val; - } - void Uint32Storage::set_item(const cytnx_uint64 &idx, const cytnx_int16 &val) { - this->at(idx) = val; - } - void Uint32Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val) { - this->at(idx) = val; - } - void Uint32Storage::set_item(const cytnx_uint64 &idx, const cytnx_bool &val) { - this->at(idx) = val; - } - - // bool Uint32Storage::approx_eq(const boost::intrusive_ptr &rhs, - // const cytnx_double tol) { - // boost::intrusive_ptr _lhs, _rhs; - // if (rhs->dtype == this->dtype) { - // _lhs = this; - // _rhs = rhs; - // } else if (rhs->dtype > this->dtype) { - // _lhs = this; - // _rhs = rhs->astype(this->dtype); - // } else { - // _lhs = this->astype(rhs->dtype); - // _rhs = rhs; - // } - // if (_rhs->size() != _lhs->size()) { - // if (User_debug) std::cout << "different tensor size." << std::endl; - // return false; - // } - // for (cytnx_uint64 i = 0; i < this->len; i++) { - // if (_lhs->get_item(i).approx_eq(_rhs->get_item(i), tol) == false) { - // if (User_debug) - // std::cout << "tensor different at idx:" << i << "\n" - // << "lhs:" << _lhs->get_item(i) << " rhs:" << _rhs->get_item(i) << "\n"; - // return false; - // } - // } - // return true; - // } -} // namespace cytnx diff --git a/src/backend/Uint64Storage.cpp b/src/backend/Uint64Storage.cpp deleted file mode 100644 index 231a8cf9..00000000 --- a/src/backend/Uint64Storage.cpp +++ /dev/null @@ -1,729 +0,0 @@ -#ifdef UNI_OMP - #include -#endif -#include "backend/Storage.hpp" -#include "utils_internal_interface.hpp" -using namespace std; - -namespace cytnx { - void Uint64Storage::Init(const unsigned long long &len_in, const int &device, - const bool &init_zero) { - // cout << "Uint64.init" << endl; - this->len = len_in; - - // check: - // cytnx_error_msg(len_in < 1, "%s", "[ERROR] cannot init a Storage with zero element"); - this->dtype = Type.Uint64; - - if (this->len % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((this->len) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = this->len; - } - - if (device == Device.cpu) { - if (init_zero) - this->Mem = utils_internal::Calloc_cpu(this->cap, sizeof(cytnx_uint64)); - else - this->Mem = utils_internal::Malloc_cpu(this->cap * sizeof(cytnx_uint64)); - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - // this->Mem = utils_internal::cuMalloc_gpu(this->cap*sizeof(cytnx_uint64)); - this->Mem = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_uint64)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot init a Storage on gpu without CUDA support."); -#endif - } - this->device = device; - } - - void Uint64Storage::_Init_byptr(void *rawptr, const unsigned long long &len_in, const int &device, - const bool &iscap, const unsigned long long &cap_in) { - this->Mem = rawptr; - this->len = len_in; - if (iscap) { - this->cap = cap_in; - } else { - this->cap = len_in; - } - cytnx_error_msg(this->cap % STORAGE_DEFT_SZ != 0, - "[ERROR] _Init_by_ptr cannot have not %dx cap_in.", STORAGE_DEFT_SZ); - -#ifdef UNI_DEBUG - cytnx_error_msg(len_in < 1, "%s", "[ERROR] _Init_by_ptr cannot have len_in < 1."); - cytnx_error_msg(this->cap < this->len, "%s", - "[ERROR] _Init_by_ptr cannot have capacity < size."); -#endif - this->dtype = Type.Uint64; - this->device = device; - } - - boost::intrusive_ptr Uint64Storage::_create_new_sametype() { - boost::intrusive_ptr out(new Uint64Storage()); - return out; - } - - boost::intrusive_ptr Uint64Storage::clone() { - boost::intrusive_ptr out(new Uint64Storage()); - out->Init(this->len, this->device); - if (this->device == Device.cpu) { - memcpy(out->Mem, this->Mem, sizeof(cytnx_uint64) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - checkCudaErrors(cudaMemcpy(out->Mem, this->Mem, sizeof(cytnx_uint64) * this->len, - cudaMemcpyDeviceToDevice)); -#else - cytnx_error_msg(1, "%s", "[ERROR] cannot clone a Storage on gpu without CUDA support."); -#endif - } - return out; - } - - void Uint64Storage::Move_memory_(const std::vector &old_shape, - const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - utils_internal::Movemem_cpu_u64(tmp, old_shape, mapper, invmapper, 1); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuMovemem_gpu_u64(tmp, old_shape, mapper, invmapper, 1); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); -#endif - } - } - - boost::intrusive_ptr Uint64Storage::Move_memory( - const std::vector &old_shape, const std::vector &mapper, - const std::vector &invmapper) { - boost::intrusive_ptr tmp(this); - if (this->device == Device.cpu) { - return utils_internal::Movemem_cpu_u64(tmp, old_shape, mapper, invmapper, 0); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - return utils_internal::cuMovemem_gpu_u64(tmp, old_shape, mapper, invmapper, 0); -#else - cytnx_error_msg(1, "%s", "[ERROR][Internal] try to call GPU section without CUDA support"); - return nullptr; -#endif - } - } - void Uint64Storage::to_(const int &device) { - if (this->device != device) { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_uint64) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_uint64) * this->len, cudaMemcpyHostToDevice)); - free(this->Mem); - this->Mem = dtmp; - this->device = device; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_uint64) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(cytnx_uint64) * this->len, cudaMemcpyDeviceToHost)); - cudaFree(this->Mem); - this->Mem = htmp; - this->device = device; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_uint64) * this->cap); - checkCudaErrors(cudaMemcpyPeer(dtmp, device, this->Mem, this->device, - sizeof(cytnx_uint64) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; - this->device = device; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); -#endif - } - } - } - boost::intrusive_ptr Uint64Storage::to(const int &device) { - // Here, we follow pytorch scheme. if the device is the same as this->device, then return this - // (python self) otherwise, return a clone on different device. - if (this->device == device) { - return this; - } else { - if (this->device == Device.cpu) { -// here, cpu->gpu with gid=device -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_uint64) * this->cap); - checkCudaErrors( - cudaMemcpy(dtmp, this->Mem, sizeof(cytnx_uint64) * this->len, cudaMemcpyHostToDevice)); - boost::intrusive_ptr out(new Uint64Storage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; -#else - cytnx_error_msg(1, "%s", "[ERROR] try to move from cpu(Host) to gpu without CUDA support."); - return nullptr; -#endif - } else { -#ifdef UNI_GPU - if (device == Device.cpu) { - // here, gpu->cpu - cudaSetDevice(this->device); - void *htmp = malloc(sizeof(cytnx_uint64) * this->cap); - checkCudaErrors( - cudaMemcpy(htmp, this->Mem, sizeof(cytnx_uint64) * this->len, cudaMemcpyDeviceToHost)); - boost::intrusive_ptr out(new Uint64Storage()); - out->_Init_byptr(htmp, this->len, device, true, this->cap); - return out; - } else { - // here, gpu->gpu - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuMalloc_gpu(sizeof(cytnx_uint64) * this->cap); - checkCudaErrors(cudaMemcpyPeer(dtmp, device, this->Mem, this->device, - sizeof(cytnx_uint64) * this->len)); - boost::intrusive_ptr out(new Uint64Storage()); - out->_Init_byptr(dtmp, this->len, device, true, this->cap); - return out; - } -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.to_. the Storage is as GPU but without CUDA support."); - return nullptr; -#endif - } - } - } - - void Uint64Storage::PrintElem_byShape(std::ostream &os, const std::vector &shape, - const std::vector &mapper) { - char *buffer = (char *)malloc(sizeof(char) * 256); - // checking: - cytnx_uint64 Ne = 1; - for (cytnx_uint64 i = 0; i < shape.size(); i++) { - Ne *= shape[i]; - } - if (Ne != this->len) { - cytnx_error_msg(1, "%s", - "PrintElem_byShape, the number of shape not match with the No. of elements."); - } - - if (len == 0) { - os << "[ "; - os << "\nThe Storage has not been allocated or linked.\n"; - os << "]\n"; - } else { - os << std::endl << "Total elem: " << this->len << "\n"; - - os << "type : " << Type.getname(this->dtype) << std::endl; - - int atDevice = this->device; - os << Device.getname(this->device) << std::endl; - - sprintf(buffer, "%s", "Shape :"); - os << string(buffer); - sprintf(buffer, " (%llu", shape[0]); - os << string(buffer); - for (cytnx_size_t i = 1; i < shape.size(); i++) { - sprintf(buffer, ",%llu", shape[i]); - os << string(buffer); - } - os << ")" << std::endl; - - // temporary move to cpu for printing. - if (this->device != Device.cpu) { - this->to_(Device.cpu); - } - - std::vector stk(shape.size(), 0), stk2; - - cytnx_uint64 s; - auto *elem_ptr_ = static_cast(this->Mem); - - if (mapper.size() == 0) { - cytnx_uint64 cnt = 0; - while (true) { - for (cytnx_size_t i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - sprintf(buffer, "%19llu ", elem_ptr_[cnt]); - os << string(buffer); - cnt++; - } - - s = 0; - while (true) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } else { - /// This is for non-contiguous Tensor printing; - // cytnx_error_msg(1,"%s","print for a non-contiguous Storage is under developing"); - // cytnx_uint64 cnt=0; - std::vector c_offj(shape.size()); - std::vector c_shape(shape.size()); - - cytnx_uint64 accu = 1; - cytnx_uint64 RealMemPos; - for (cytnx_uint32 i = 0; i < shape.size(); i++) { - c_shape[i] = shape[mapper[i]]; - } - for (cytnx_int64 i = c_shape.size() - 1; i >= 0; i--) { - c_offj[i] = accu; - accu *= c_shape[i]; - } - - while (true) { - for (cytnx_int32 i = 0; i < shape.size(); i++) { - if (i < shape.size() - stk.size()) { - sprintf(buffer, "%s", " "); - os << string(buffer); - } else { - stk2.push_back(0); - sprintf(buffer, "%s", "["); - os << string(buffer); - stk.pop_back(); - } - } - for (cytnx_uint64 i = 0; i < shape.back(); i++) { - stk2.back() = i; - - /// Calculate the Memory reflection: - RealMemPos = 0; - for (cytnx_uint64 n = 0; n < shape.size(); n++) { - RealMemPos += c_offj[n] * stk2[mapper[n]]; // mapback + backmap = normal-map - } - sprintf(buffer, "%19llu ", elem_ptr_[RealMemPos]); - os << string(buffer); - // cnt++; - } - - s = 0; - while (true) { - if (stk2.empty()) { - break; - } - if (stk2.back() == *(&shape.back() - s) - 1) { - stk.push_back(*(&shape.back() - s)); - s++; - stk2.pop_back(); - sprintf(buffer, "%s", "]"); - os << string(buffer); - } else { - stk2.back() += 1; - break; - } - } - os << "\n"; - - if (stk2.empty()) break; - } - os << std::endl; - - } // check if need mapping - - if (atDevice != Device.cpu) { - this->to_(atDevice); - } - - } // len==0 - free(buffer); - } - - void Uint64Storage::print_elems() { - char *buffer = (char *)malloc(sizeof(char) * 256); - auto *elem_ptr_ = static_cast(this->Mem); - cout << "[ "; - for (unsigned long long cnt = 0; cnt < this->len; cnt++) { - sprintf(buffer, "%19llu ", elem_ptr_[cnt]); - cout << string(buffer); - } - cout << " ]" << endl; - free(buffer); - } - - void Uint64Storage::fill(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void Uint64Storage::fill(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot fill complex value into real container"); - } - void Uint64Storage::fill(const cytnx_double &val) { - cytnx_uint64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint64Storage::fill(const cytnx_float &val) { - cytnx_uint64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint64Storage::fill(const cytnx_int64 &val) { - cytnx_uint64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint64Storage::fill(const cytnx_uint64 &val) { - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&val), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&val), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint64Storage::fill(const cytnx_int32 &val) { - cytnx_uint64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint64Storage::fill(const cytnx_uint32 &val) { - cytnx_uint64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint64Storage::fill(const cytnx_uint16 &val) { - cytnx_uint64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint64Storage::fill(const cytnx_int16 &val) { - cytnx_uint64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint64Storage::fill(const cytnx_bool &val) { - cytnx_uint64 tmp = val; - if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); -#else - cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", - "storage is on gpu without CUDA support\n"); -#endif - } - } - void Uint64Storage::set_zeros() { - if (this->device == Device.cpu) { - utils_internal::SetZeros(this->Mem, sizeof(cytnx_uint64) * this->len); - } else { -#ifdef UNI_GPU - checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuSetZeros(this->Mem, sizeof(cytnx_uint64) * this->len); -#else - cytnx_error_msg(1, "[ERROR][set_zeros] fatal, the storage is on gpu without CUDA support.%s", - "\n"); -#endif - } - } - - void Uint64Storage::resize(const cytnx_uint64 &newsize) { - // cytnx_error_msg(newsize < 1,"[ERROR]resize should have size > 0%s","\n"); - - if (newsize > this->cap) { - if (newsize % STORAGE_DEFT_SZ) { - this->cap = ((unsigned long long)((newsize) / STORAGE_DEFT_SZ) + 1) * STORAGE_DEFT_SZ; - } else { - this->cap = newsize; - } - if (this->device == Device.cpu) { - void *htmp = calloc(this->cap, sizeof(cytnx_uint64)); - memcpy(htmp, this->Mem, sizeof(cytnx_uint64) * this->len); - free(this->Mem); - this->Mem = htmp; - } else { -#ifdef UNI_GPU - cytnx_error_msg(device >= Device.Ngpus, "%s", "[ERROR] invalid device."); - cudaSetDevice(device); - void *dtmp = utils_internal::cuCalloc_gpu(this->cap, sizeof(cytnx_uint64)); - checkCudaErrors( - cudaMemcpyPeer(dtmp, device, this->Mem, this->device, sizeof(cytnx_uint64) * this->len)); - cudaFree(this->Mem); - this->Mem = dtmp; -#else - cytnx_error_msg( - 1, "%s", - "[ERROR][Internal] Storage.resize. the Storage is as GPU but without CUDA support."); -#endif - } - } - this->len = newsize; - } - - void Uint64Storage::append(const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void Uint64Storage::append(const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR]%s", " cannot append complex value into real container"); - } - void Uint64Storage::append(const Scalar &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = cytnx_uint64(val); - } - void Uint64Storage::append(const cytnx_double &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint64Storage::append(const cytnx_float &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint64Storage::append(const cytnx_int64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint64Storage::append(const cytnx_int32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint64Storage::append(const cytnx_int16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint64Storage::append(const cytnx_uint64 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint64Storage::append(const cytnx_uint32 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint64Storage::append(const cytnx_uint16 &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - void Uint64Storage::append(const cytnx_bool &val) { - if (this->len + 1 > this->cap) { - this->resize(this->len + 1); - } else { - this->len += 1; - } - this->at(this->len - 1) = val; - } - - boost::intrusive_ptr Uint64Storage::real() { - cytnx_error_msg(true, "[ERROR] Storage.real() can only be called from complex type.%s", "\n"); - } - boost::intrusive_ptr Uint64Storage::imag() { - cytnx_error_msg(true, "[ERROR] Storage.imag() can only be called from complex type.%s", "\n"); - } - Scalar Uint64Storage::get_item(const cytnx_uint64 &idx) const { - return Scalar(this->at(idx)); - } - - void Uint64Storage::set_item(const cytnx_uint64 &idx, const Scalar &val) { - this->at(idx) = cytnx_uint64(val); - } - void Uint64Storage::set_item(const cytnx_uint64 &idx, const cytnx_complex128 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void Uint64Storage::set_item(const cytnx_uint64 &idx, const cytnx_complex64 &val) { - cytnx_error_msg(true, "[ERROR] cannot set complex to real.%s", "\n"); - } - void Uint64Storage::set_item(const cytnx_uint64 &idx, const cytnx_double &val) { - this->at(idx) = val; - } - void Uint64Storage::set_item(const cytnx_uint64 &idx, const cytnx_float &val) { - this->at(idx) = val; - } - void Uint64Storage::set_item(const cytnx_uint64 &idx, const cytnx_int64 &val) { - this->at(idx) = val; - } - void Uint64Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint64 &val) { - this->at(idx) = val; - } - void Uint64Storage::set_item(const cytnx_uint64 &idx, const cytnx_int32 &val) { - this->at(idx) = val; - } - void Uint64Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint32 &val) { - this->at(idx) = val; - } - void Uint64Storage::set_item(const cytnx_uint64 &idx, const cytnx_int16 &val) { - this->at(idx) = val; - } - void Uint64Storage::set_item(const cytnx_uint64 &idx, const cytnx_uint16 &val) { - this->at(idx) = val; - } - void Uint64Storage::set_item(const cytnx_uint64 &idx, const cytnx_bool &val) { - this->at(idx) = val; - } - - // bool Uint64Storage::approx_eq(const boost::intrusive_ptr &rhs, - // const cytnx_double tol) { - // boost::intrusive_ptr _lhs, _rhs; - // if (rhs->dtype == this->dtype) { - // _lhs = this; - // _rhs = rhs; - // } else if (rhs->dtype > this->dtype) { - // _lhs = this; - // _rhs = rhs->astype(this->dtype); - // } else { - // _lhs = this->astype(rhs->dtype); - // _rhs = rhs; - // } - // if (_rhs->size() != _lhs->size()) { - // if (User_debug) std::cout << "different tensor size." << std::endl; - // return false; - // } - // for (cytnx_uint64 i = 0; i < this->len; i++) { - // if (_lhs->get_item(i).approx_eq(_rhs->get_item(i), tol) == false) { - // if (User_debug) - // std::cout << "tensor different at idx:" << i << "\n" - // << "lhs:" << _lhs->get_item(i) << " rhs:" << _rhs->get_item(i) << "\n"; - // return false; - // } - // } - // return true; - // } -} // namespace cytnx diff --git a/src/backend/linalg_internal_cpu/Add_internal.cpp b/src/backend/linalg_internal_cpu/Add_internal.cpp index 85600977..9e4be124 100644 --- a/src/backend/linalg_internal_cpu/Add_internal.cpp +++ b/src/backend/linalg_internal_cpu/Add_internal.cpp @@ -1,5 +1,5 @@ #include "Add_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/utils.hpp" #include #ifdef UNI_OMP diff --git a/src/backend/linalg_internal_cpu/Cpr_internal.cpp b/src/backend/linalg_internal_cpu/Cpr_internal.cpp index 78c469ae..3793c5e3 100644 --- a/src/backend/linalg_internal_cpu/Cpr_internal.cpp +++ b/src/backend/linalg_internal_cpu/Cpr_internal.cpp @@ -1,5 +1,5 @@ #include "Cpr_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/utils.hpp" #include #ifdef UNI_OMP diff --git a/src/backend/linalg_internal_cpu/Det_internal.cpp b/src/backend/linalg_internal_cpu/Det_internal.cpp index c507059e..471fdbc9 100644 --- a/src/backend/linalg_internal_cpu/Det_internal.cpp +++ b/src/backend/linalg_internal_cpu/Det_internal.cpp @@ -4,7 +4,7 @@ #include #include "backend/lapack_wrapper.hpp" -#include "backend/utils_internal_interface.hpp" + #include "utils/utils.hpp" #ifdef UNI_OMP diff --git a/src/backend/linalg_internal_cpu/Div_internal.cpp b/src/backend/linalg_internal_cpu/Div_internal.cpp index db8f1190..f63b6983 100644 --- a/src/backend/linalg_internal_cpu/Div_internal.cpp +++ b/src/backend/linalg_internal_cpu/Div_internal.cpp @@ -1,5 +1,5 @@ #include "Div_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/utils.hpp" #ifdef UNI_OMP #include diff --git a/src/backend/linalg_internal_cpu/Kron_internal.cpp b/src/backend/linalg_internal_cpu/Kron_internal.cpp index 8634f296..fe117e0e 100644 --- a/src/backend/linalg_internal_cpu/Kron_internal.cpp +++ b/src/backend/linalg_internal_cpu/Kron_internal.cpp @@ -1,7 +1,7 @@ #include "backend/linalg_internal_cpu/Kron_internal.hpp" #include "utils/complex_arithmetic.hpp" -#include "../utils_internal_interface.hpp" + #include // #include "backend/lapack_wrapper.hpp" #ifdef UNI_OMP diff --git a/src/backend/linalg_internal_cpu/Mod_internal.cpp b/src/backend/linalg_internal_cpu/Mod_internal.cpp index 39148d70..e7b9d742 100644 --- a/src/backend/linalg_internal_cpu/Mod_internal.cpp +++ b/src/backend/linalg_internal_cpu/Mod_internal.cpp @@ -1,5 +1,5 @@ #include "Mod_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/utils.hpp" #ifdef UNI_OMP #include diff --git a/src/backend/linalg_internal_cpu/Mul_internal.cpp b/src/backend/linalg_internal_cpu/Mul_internal.cpp index b3497f21..d69d3109 100644 --- a/src/backend/linalg_internal_cpu/Mul_internal.cpp +++ b/src/backend/linalg_internal_cpu/Mul_internal.cpp @@ -1,5 +1,5 @@ #include "Mul_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/utils.hpp" #ifdef UNI_OMP #include diff --git a/src/backend/linalg_internal_cpu/Norm_internal.cpp b/src/backend/linalg_internal_cpu/Norm_internal.cpp index 4f316b24..5fe72b96 100644 --- a/src/backend/linalg_internal_cpu/Norm_internal.cpp +++ b/src/backend/linalg_internal_cpu/Norm_internal.cpp @@ -1,5 +1,5 @@ #include "Norm_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/utils.hpp" #include "backend/lapack_wrapper.hpp" #ifdef UNI_OMP diff --git a/src/backend/linalg_internal_cpu/Outer_internal.cpp b/src/backend/linalg_internal_cpu/Outer_internal.cpp index 004d1128..fb3aeaa2 100644 --- a/src/backend/linalg_internal_cpu/Outer_internal.cpp +++ b/src/backend/linalg_internal_cpu/Outer_internal.cpp @@ -1,6 +1,6 @@ #include "Outer_internal.hpp" #include "Ger_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/complex_arithmetic.hpp" #include "backend/lapack_wrapper.hpp" diff --git a/src/backend/linalg_internal_cpu/Sub_internal.cpp b/src/backend/linalg_internal_cpu/Sub_internal.cpp index f2881797..059b2912 100644 --- a/src/backend/linalg_internal_cpu/Sub_internal.cpp +++ b/src/backend/linalg_internal_cpu/Sub_internal.cpp @@ -1,5 +1,5 @@ #include "Sub_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/utils.hpp" #ifdef UNI_OMP #include diff --git a/src/backend/linalg_internal_cpu/Vectordot_internal.cpp b/src/backend/linalg_internal_cpu/Vectordot_internal.cpp index 15c4b739..8d089c0b 100644 --- a/src/backend/linalg_internal_cpu/Vectordot_internal.cpp +++ b/src/backend/linalg_internal_cpu/Vectordot_internal.cpp @@ -1,5 +1,5 @@ #include "Vectordot_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/utils.hpp" #include "backend/lapack_wrapper.hpp" #include diff --git a/src/backend/linalg_internal_cpu/iAdd_internal.cpp b/src/backend/linalg_internal_cpu/iAdd_internal.cpp index 59937fda..576abb5b 100644 --- a/src/backend/linalg_internal_cpu/iAdd_internal.cpp +++ b/src/backend/linalg_internal_cpu/iAdd_internal.cpp @@ -1,5 +1,5 @@ #include "iAdd_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/utils.hpp" #include #ifdef UNI_OMP diff --git a/src/backend/linalg_internal_cpu/iDiv_internal.cpp b/src/backend/linalg_internal_cpu/iDiv_internal.cpp index 35d19a66..55e77c50 100644 --- a/src/backend/linalg_internal_cpu/iDiv_internal.cpp +++ b/src/backend/linalg_internal_cpu/iDiv_internal.cpp @@ -1,5 +1,5 @@ #include "iDiv_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/utils.hpp" #include #ifdef UNI_OMP diff --git a/src/backend/linalg_internal_cpu/iMul_internal.cpp b/src/backend/linalg_internal_cpu/iMul_internal.cpp index b2ce1027..cfca82ce 100644 --- a/src/backend/linalg_internal_cpu/iMul_internal.cpp +++ b/src/backend/linalg_internal_cpu/iMul_internal.cpp @@ -1,5 +1,5 @@ #include "iMul_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/utils.hpp" #include #ifdef UNI_OMP diff --git a/src/backend/linalg_internal_cpu/iSub_internal.cpp b/src/backend/linalg_internal_cpu/iSub_internal.cpp index b1cc6893..27d61ef9 100644 --- a/src/backend/linalg_internal_cpu/iSub_internal.cpp +++ b/src/backend/linalg_internal_cpu/iSub_internal.cpp @@ -1,5 +1,5 @@ #include "iSub_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/utils.hpp" #include #ifdef UNI_OMP diff --git a/src/backend/linalg_internal_gpu/cuAbs_internal.cu b/src/backend/linalg_internal_gpu/cuAbs_internal.cu index 5cf92815..934235fc 100644 --- a/src/backend/linalg_internal_gpu/cuAbs_internal.cu +++ b/src/backend/linalg_internal_gpu/cuAbs_internal.cu @@ -1,5 +1,4 @@ #include "cuAbs_internal.hpp" -#include "../utils_internal_interface.hpp" // #include "cytnx_error.hpp" // #include "utils/backend/lapack_wrapper.hpp" diff --git a/src/backend/linalg_internal_gpu/cuAdd_internal.cu b/src/backend/linalg_internal_gpu/cuAdd_internal.cu index a48352cd..45879452 100644 --- a/src/backend/linalg_internal_gpu/cuAdd_internal.cu +++ b/src/backend/linalg_internal_gpu/cuAdd_internal.cu @@ -1,5 +1,4 @@ #include "cuAdd_internal.hpp" -#include "../utils_internal_interface.hpp" #ifdef UNI_OMP #include diff --git a/src/backend/linalg_internal_gpu/cuCpr_internal.cu b/src/backend/linalg_internal_gpu/cuCpr_internal.cu index 77c8f09f..9b7a6db8 100644 --- a/src/backend/linalg_internal_gpu/cuCpr_internal.cu +++ b/src/backend/linalg_internal_gpu/cuCpr_internal.cu @@ -1,5 +1,5 @@ #include "cuCpr_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/cucomplex_arithmetic.hpp" #ifdef UNI_OMP #include diff --git a/src/backend/linalg_internal_gpu/cuDiag_internal.cu b/src/backend/linalg_internal_gpu/cuDiag_internal.cu index 54fb69a6..d0e2a69a 100644 --- a/src/backend/linalg_internal_gpu/cuDiag_internal.cu +++ b/src/backend/linalg_internal_gpu/cuDiag_internal.cu @@ -1,5 +1,4 @@ #include "cuDiag_internal.hpp" -#include "../utils_internal_interface.hpp" // #ifdef UNI_OMP // #include diff --git a/src/backend/linalg_internal_gpu/cuDiv_internal.cu b/src/backend/linalg_internal_gpu/cuDiv_internal.cu index de5b1343..2800d2c2 100644 --- a/src/backend/linalg_internal_gpu/cuDiv_internal.cu +++ b/src/backend/linalg_internal_gpu/cuDiv_internal.cu @@ -1,5 +1,4 @@ #include "cuDiv_internal.hpp" -#include "../utils_internal_interface.hpp" #ifdef UNI_OMP #include diff --git a/src/backend/linalg_internal_gpu/cuExp_internal.cu b/src/backend/linalg_internal_gpu/cuExp_internal.cu index d1d1aee3..e3711dd8 100644 --- a/src/backend/linalg_internal_gpu/cuExp_internal.cu +++ b/src/backend/linalg_internal_gpu/cuExp_internal.cu @@ -1,5 +1,4 @@ #include "cuExp_internal.hpp" -#include "../utils_internal_interface.hpp" // #ifdef UNI_OMP // #include diff --git a/src/backend/linalg_internal_gpu/cuGer_internal.cu b/src/backend/linalg_internal_gpu/cuGer_internal.cu index 661209a8..8f929563 100644 --- a/src/backend/linalg_internal_gpu/cuGer_internal.cu +++ b/src/backend/linalg_internal_gpu/cuGer_internal.cu @@ -1,5 +1,4 @@ #include "cuGer_internal.hpp" -#include "../utils_internal_interface.hpp" #include "backend/lapack_wrapper.hpp" diff --git a/src/backend/linalg_internal_gpu/cuInv_inplace_internal.cu b/src/backend/linalg_internal_gpu/cuInv_inplace_internal.cu index d7f0c5d5..9581ed36 100644 --- a/src/backend/linalg_internal_gpu/cuInv_inplace_internal.cu +++ b/src/backend/linalg_internal_gpu/cuInv_inplace_internal.cu @@ -1,5 +1,4 @@ #include "cuInv_inplace_internal.hpp" -#include "../utils_internal_interface.hpp" // #ifdef UNI_OMP // #include diff --git a/src/backend/linalg_internal_gpu/cuKron_internal.cu b/src/backend/linalg_internal_gpu/cuKron_internal.cu index 9d02fd75..bbd5bfab 100644 --- a/src/backend/linalg_internal_gpu/cuKron_internal.cu +++ b/src/backend/linalg_internal_gpu/cuKron_internal.cu @@ -1,5 +1,5 @@ #include "cuKron_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/utils.hpp" #include "../utils_internal_gpu/cuAlloc_gpu.hpp" #include diff --git a/src/backend/linalg_internal_gpu/cuMod_internal.cu b/src/backend/linalg_internal_gpu/cuMod_internal.cu index 8b914604..ce1b28fd 100644 --- a/src/backend/linalg_internal_gpu/cuMod_internal.cu +++ b/src/backend/linalg_internal_gpu/cuMod_internal.cu @@ -1,5 +1,5 @@ #include "cuMod_internal.hpp" -#include "../utils_internal_interface.hpp" + #include #ifdef UNI_OMP #include diff --git a/src/backend/linalg_internal_gpu/cuMul_internal.cu b/src/backend/linalg_internal_gpu/cuMul_internal.cu index 3f70fc15..feb3950b 100644 --- a/src/backend/linalg_internal_gpu/cuMul_internal.cu +++ b/src/backend/linalg_internal_gpu/cuMul_internal.cu @@ -1,5 +1,4 @@ #include "cuMul_internal.hpp" -#include "../utils_internal_interface.hpp" #ifdef UNI_OMP #include diff --git a/src/backend/linalg_internal_gpu/cuNorm_internal.cu b/src/backend/linalg_internal_gpu/cuNorm_internal.cu index a9927aed..2342e596 100644 --- a/src/backend/linalg_internal_gpu/cuNorm_internal.cu +++ b/src/backend/linalg_internal_gpu/cuNorm_internal.cu @@ -1,5 +1,5 @@ #include "cuNorm_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/utils.hpp" #include "cytnx_error.hpp" #include "backend/lapack_wrapper.hpp" diff --git a/src/backend/linalg_internal_gpu/cuOuter_internal.cu b/src/backend/linalg_internal_gpu/cuOuter_internal.cu index 59ec9607..38d7c8ee 100644 --- a/src/backend/linalg_internal_gpu/cuOuter_internal.cu +++ b/src/backend/linalg_internal_gpu/cuOuter_internal.cu @@ -2,7 +2,7 @@ #include #include "cuOuter_internal.hpp" #include "cuGer_internal.hpp" -#include "../utils_internal_interface.hpp" + #include "utils/cucomplex_arithmetic.hpp" #ifdef UNI_OMP diff --git a/src/backend/linalg_internal_gpu/cuPow_internal.cu b/src/backend/linalg_internal_gpu/cuPow_internal.cu index 71771d15..3387de69 100644 --- a/src/backend/linalg_internal_gpu/cuPow_internal.cu +++ b/src/backend/linalg_internal_gpu/cuPow_internal.cu @@ -1,5 +1,4 @@ #include "cuPow_internal.hpp" -#include "../utils_internal_interface.hpp" // #ifdef UNI_OMP // #include diff --git a/src/backend/linalg_internal_gpu/cuSub_internal.cu b/src/backend/linalg_internal_gpu/cuSub_internal.cu index c5029bf9..06acc065 100644 --- a/src/backend/linalg_internal_gpu/cuSub_internal.cu +++ b/src/backend/linalg_internal_gpu/cuSub_internal.cu @@ -1,5 +1,4 @@ #include "cuSub_internal.hpp" -#include "../utils_internal_interface.hpp" #ifdef UNI_OMP #include diff --git a/src/backend/utils_internal_cpu/MvElems_blks_cpu.cpp b/src/backend/utils_internal_cpu/MvElems_blks_cpu.cpp index c3eef813..34e4280e 100644 --- a/src/backend/utils_internal_cpu/MvElems_blks_cpu.cpp +++ b/src/backend/utils_internal_cpu/MvElems_blks_cpu.cpp @@ -4,7 +4,7 @@ #include "utils/vec_map.hpp" #include "utils/cartesian.hpp" #include "Tensor.hpp" -// #include "../utils_internal_interface.hpp" + #include using namespace std; diff --git a/src/backend/utils_internal_cpu/SetElems_contiguous_cpu.cpp b/src/backend/utils_internal_cpu/SetElems_contiguous_cpu.cpp index b31036cf..c4b6b17b 100644 --- a/src/backend/utils_internal_cpu/SetElems_contiguous_cpu.cpp +++ b/src/backend/utils_internal_cpu/SetElems_contiguous_cpu.cpp @@ -1,5 +1,5 @@ #include "SetElems_contiguous_cpu.hpp" -#include "../utils_internal_interface.hpp" + #ifdef UNI_OMP #include #endif diff --git a/src/backend/utils_internal_cpu/SetElems_cpu.cpp b/src/backend/utils_internal_cpu/SetElems_cpu.cpp index 0b13aa4d..72bf05e6 100644 --- a/src/backend/utils_internal_cpu/SetElems_cpu.cpp +++ b/src/backend/utils_internal_cpu/SetElems_cpu.cpp @@ -1,5 +1,5 @@ #include "SetElems_cpu.hpp" -#include "../utils_internal_interface.hpp" + #ifdef UNI_OMP #include #endif diff --git a/src/backend/utils_internal_cpu/blocks_mvelems_cpu.cpp b/src/backend/utils_internal_cpu/blocks_mvelems_cpu.cpp index 59843c30..0c6b4441 100644 --- a/src/backend/utils_internal_cpu/blocks_mvelems_cpu.cpp +++ b/src/backend/utils_internal_cpu/blocks_mvelems_cpu.cpp @@ -4,7 +4,7 @@ #include "utils/vec_map.hpp" #include "utils/cartesian.hpp" #include "Tensor.hpp" -// #include "../utils_internal_interface.hpp" + #include using namespace std; diff --git a/src/backend/utils_internal_gpu/cuAlloc_gpu.cu b/src/backend/utils_internal_gpu/cuAlloc_gpu.cu index e4919455..3061ad8e 100644 --- a/src/backend/utils_internal_gpu/cuAlloc_gpu.cu +++ b/src/backend/utils_internal_gpu/cuAlloc_gpu.cu @@ -1,4 +1,7 @@ -#include "cuAlloc_gpu.hpp" +#include "backend/utils_internal_gpu/cuAlloc_gpu.hpp" + +#include "cytnx_error.hpp" +#include "Type.hpp" using namespace std; diff --git a/src/backend/utils_internal_gpu/cuAlloc_gpu.hpp b/src/backend/utils_internal_gpu/cuAlloc_gpu.hpp index 0a6da914..c7cec5dc 100644 --- a/src/backend/utils_internal_gpu/cuAlloc_gpu.hpp +++ b/src/backend/utils_internal_gpu/cuAlloc_gpu.hpp @@ -1,12 +1,8 @@ -#ifndef _H_cuAlloc_gpu_ -#define _H_cuAlloc_gpu_ +#ifndef CYTNX_BACKEND_UTILS_INTERNAL_GPU_CUALLOC_GPU_H_ +#define CYTNX_BACKEND_UTILS_INTERNAL_GPU_CUALLOC_GPU_H_ -#include -#include -#include -#include #include "Type.hpp" -#include "cytnx_error.hpp" + namespace cytnx { namespace utils_internal { @@ -16,4 +12,4 @@ namespace cytnx { #endif } // namespace utils_internal } // namespace cytnx -#endif +#endif // CYTNX_BACKEND_UTILS_INTERNAL_GPU_CUALLOC_GPU_H_ diff --git a/src/backend/utils_internal_gpu/cuCast_gpu.cu b/src/backend/utils_internal_gpu/cuCast_gpu.cu index 48c6a821..b969622c 100644 --- a/src/backend/utils_internal_gpu/cuCast_gpu.cu +++ b/src/backend/utils_internal_gpu/cuCast_gpu.cu @@ -50,7 +50,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexDoubleStorage()); out->Init(len_in, alloc_device); } checkCudaErrors( @@ -61,7 +60,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexFloatStorage()); out->Init(len_in, alloc_device); } @@ -77,7 +75,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexDoubleStorage()); out->Init(len_in, alloc_device); } cuFloatComplex* _in = static_cast(in->Mem); @@ -92,7 +89,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexFloatStorage()); out->Init(len_in, alloc_device); } checkCudaErrors( @@ -104,7 +100,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexDoubleStorage()); out->Init(len_in, alloc_device); } cytnx_double* _in = static_cast(in->Mem); @@ -120,7 +115,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexFloatStorage()); out->Init(len_in, alloc_device); } cytnx_double* _in = static_cast(in->Mem); @@ -136,7 +130,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new DoubleStorage()); out->Init(len_in, alloc_device); } checkCudaErrors( @@ -146,7 +139,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new FloatStorage()); out->Init(len_in, alloc_device); } cytnx_double* _in = static_cast(in->Mem); @@ -160,7 +152,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int64Storage()); out->Init(len_in, alloc_device); } cytnx_double* _in = static_cast(in->Mem); @@ -173,7 +164,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint64Storage()); out->Init(len_in, alloc_device); } cytnx_double* _in = static_cast(in->Mem); @@ -187,7 +177,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int32Storage()); out->Init(len_in, alloc_device); } cytnx_double* _in = static_cast(in->Mem); @@ -201,7 +190,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint32Storage()); out->Init(len_in, alloc_device); } cytnx_double* _in = static_cast(in->Mem); @@ -215,7 +203,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int16Storage()); out->Init(len_in, alloc_device); } cytnx_double* _in = static_cast(in->Mem); @@ -229,7 +216,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint16Storage()); out->Init(len_in, alloc_device); } cytnx_double* _in = static_cast(in->Mem); @@ -243,7 +229,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new BoolStorage()); out->Init(len_in, alloc_device); } cytnx_double* _in = static_cast(in->Mem); @@ -258,7 +243,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexDoubleStorage()); out->Init(len_in, alloc_device); } cytnx_float* _in = static_cast(in->Mem); @@ -273,7 +257,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexFloatStorage()); out->Init(len_in, alloc_device); } cytnx_float* _in = static_cast(in->Mem); @@ -287,7 +270,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new DoubleStorage()); out->Init(len_in, alloc_device); } cytnx_float* _in = static_cast(in->Mem); @@ -301,7 +283,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new FloatStorage()); out->Init(len_in, alloc_device); } checkCudaErrors( @@ -311,7 +292,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int64Storage()); out->Init(len_in, alloc_device); } cytnx_float* _in = static_cast(in->Mem); @@ -325,7 +305,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint64Storage()); out->Init(len_in, alloc_device); } cytnx_float* _in = static_cast(in->Mem); @@ -339,7 +318,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int32Storage()); out->Init(len_in, alloc_device); } cytnx_float* _in = static_cast(in->Mem); @@ -353,7 +331,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint32Storage()); out->Init(len_in, alloc_device); } cytnx_float* _in = static_cast(in->Mem); @@ -367,7 +344,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int16Storage()); out->Init(len_in, alloc_device); } cytnx_float* _in = static_cast(in->Mem); @@ -381,7 +357,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint16Storage()); out->Init(len_in, alloc_device); } cytnx_float* _in = static_cast(in->Mem); @@ -395,7 +370,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new BoolStorage()); out->Init(len_in, alloc_device); } cytnx_float* _in = static_cast(in->Mem); @@ -411,7 +385,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexDoubleStorage()); out->Init(len_in, alloc_device); } cytnx_int64* _in = static_cast(in->Mem); @@ -425,7 +398,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexFloatStorage()); out->Init(len_in, alloc_device); } cytnx_int64* _in = static_cast(in->Mem); @@ -440,7 +412,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new DoubleStorage()); out->Init(len_in, alloc_device); } cytnx_int64* _in = static_cast(in->Mem); @@ -454,7 +425,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new FloatStorage()); out->Init(len_in, alloc_device); } cytnx_int64* _in = static_cast(in->Mem); @@ -468,7 +438,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int64Storage()); out->Init(len_in, alloc_device); } checkCudaErrors( @@ -478,7 +447,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint64Storage()); out->Init(len_in, alloc_device); } cytnx_int64* _in = static_cast(in->Mem); @@ -492,7 +460,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int32Storage()); out->Init(len_in, alloc_device); } cytnx_int64* _in = static_cast(in->Mem); @@ -506,7 +473,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint32Storage()); out->Init(len_in, alloc_device); } cytnx_int64* _in = static_cast(in->Mem); @@ -520,7 +486,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int16Storage()); out->Init(len_in, alloc_device); } cytnx_int64* _in = static_cast(in->Mem); @@ -534,7 +499,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint16Storage()); out->Init(len_in, alloc_device); } cytnx_int64* _in = static_cast(in->Mem); @@ -548,7 +512,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new BoolStorage()); out->Init(len_in, alloc_device); } cytnx_int64* _in = static_cast(in->Mem); @@ -564,7 +527,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexDoubleStorage()); out->Init(len_in, alloc_device); } cytnx_uint64* _in = static_cast(in->Mem); @@ -578,7 +540,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexFloatStorage()); out->Init(len_in, alloc_device); } cytnx_uint64* _in = static_cast(in->Mem); @@ -592,7 +553,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new DoubleStorage()); out->Init(len_in, alloc_device); } cytnx_uint64* _in = static_cast(in->Mem); @@ -606,7 +566,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new FloatStorage()); out->Init(len_in, alloc_device); } cytnx_uint64* _in = static_cast(in->Mem); @@ -620,7 +579,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int64Storage()); out->Init(len_in, alloc_device); } cytnx_uint64* _in = static_cast(in->Mem); @@ -634,7 +592,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint64Storage()); out->Init(len_in, alloc_device); } checkCudaErrors( @@ -644,7 +601,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int32Storage()); out->Init(len_in, alloc_device); } cytnx_uint64* _in = static_cast(in->Mem); @@ -658,7 +614,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint32Storage()); out->Init(len_in, alloc_device); } cytnx_uint64* _in = static_cast(in->Mem); @@ -672,7 +627,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int16Storage()); out->Init(len_in, alloc_device); } cytnx_uint64* _in = static_cast(in->Mem); @@ -686,7 +640,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint32Storage()); out->Init(len_in, alloc_device); } cytnx_uint64* _in = static_cast(in->Mem); @@ -700,7 +653,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new BoolStorage()); out->Init(len_in, alloc_device); } cytnx_uint64* _in = static_cast(in->Mem); @@ -716,7 +668,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexDoubleStorage()); out->Init(len_in, alloc_device); } cytnx_int32* _in = static_cast(in->Mem); @@ -730,7 +681,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexFloatStorage()); out->Init(len_in, alloc_device); } cytnx_int32* _in = static_cast(in->Mem); @@ -744,7 +694,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new DoubleStorage()); out->Init(len_in, alloc_device); } cytnx_int32* _in = static_cast(in->Mem); @@ -758,7 +707,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new FloatStorage()); out->Init(len_in, alloc_device); } cytnx_int32* _in = static_cast(in->Mem); @@ -772,7 +720,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int64Storage()); out->Init(len_in, alloc_device); } cytnx_int32* _in = static_cast(in->Mem); @@ -786,7 +733,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint64Storage()); out->Init(len_in, alloc_device); } cytnx_int32* _in = static_cast(in->Mem); @@ -800,7 +746,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int32Storage()); out->Init(len_in, alloc_device); } checkCudaErrors( @@ -810,7 +755,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint32Storage()); out->Init(len_in, alloc_device); } cytnx_int32* _in = static_cast(in->Mem); @@ -824,7 +768,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint16Storage()); out->Init(len_in, alloc_device); } cytnx_int32* _in = static_cast(in->Mem); @@ -838,7 +781,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int16Storage()); out->Init(len_in, alloc_device); } cytnx_int32* _in = static_cast(in->Mem); @@ -852,7 +794,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new BoolStorage()); out->Init(len_in, alloc_device); } cytnx_int32* _in = static_cast(in->Mem); @@ -867,7 +808,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexDoubleStorage()); out->Init(len_in, alloc_device); } cytnx_uint32* _in = static_cast(in->Mem); @@ -881,7 +821,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexFloatStorage()); out->Init(len_in, alloc_device); } cytnx_uint32* _in = static_cast(in->Mem); @@ -895,7 +834,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new DoubleStorage()); out->Init(len_in, alloc_device); } cytnx_uint32* _in = static_cast(in->Mem); @@ -909,7 +847,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new FloatStorage()); out->Init(len_in, alloc_device); } cytnx_uint32* _in = static_cast(in->Mem); @@ -923,7 +860,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int64Storage()); out->Init(len_in, alloc_device); } cytnx_uint32* _in = static_cast(in->Mem); @@ -937,7 +873,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint64Storage()); out->Init(len_in, alloc_device); } cytnx_uint32* _in = static_cast(in->Mem); @@ -951,7 +886,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int32Storage()); out->Init(len_in, alloc_device); } cytnx_uint32* _in = static_cast(in->Mem); @@ -965,7 +899,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint32Storage()); out->Init(len_in, alloc_device); } checkCudaErrors( @@ -975,7 +908,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint16Storage()); out->Init(len_in, alloc_device); } cytnx_uint32* _in = static_cast(in->Mem); @@ -989,7 +921,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int16Storage()); out->Init(len_in, alloc_device); } cytnx_uint32* _in = static_cast(in->Mem); @@ -1003,7 +934,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new BoolStorage()); out->Init(len_in, alloc_device); } cytnx_uint32* _in = static_cast(in->Mem); @@ -1018,7 +948,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexDoubleStorage()); out->Init(len_in, alloc_device); } cytnx_uint16* _in = static_cast(in->Mem); @@ -1032,7 +961,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexFloatStorage()); out->Init(len_in, alloc_device); } cytnx_uint16* _in = static_cast(in->Mem); @@ -1046,7 +974,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new DoubleStorage()); out->Init(len_in, alloc_device); } cytnx_uint16* _in = static_cast(in->Mem); @@ -1060,7 +987,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new FloatStorage()); out->Init(len_in, alloc_device); } cytnx_uint16* _in = static_cast(in->Mem); @@ -1074,7 +1000,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int64Storage()); out->Init(len_in, alloc_device); } cytnx_uint16* _in = static_cast(in->Mem); @@ -1088,7 +1013,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint64Storage()); out->Init(len_in, alloc_device); } cytnx_uint16* _in = static_cast(in->Mem); @@ -1102,7 +1026,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int32Storage()); out->Init(len_in, alloc_device); } cytnx_uint16* _in = static_cast(in->Mem); @@ -1116,7 +1039,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint32Storage()); out->Init(len_in, alloc_device); } cytnx_uint16* _in = static_cast(in->Mem); @@ -1130,7 +1052,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint16Storage()); out->Init(len_in, alloc_device); } checkCudaErrors( @@ -1140,7 +1061,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int16Storage()); out->Init(len_in, alloc_device); } cytnx_uint16* _in = static_cast(in->Mem); @@ -1154,7 +1074,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new BoolStorage()); out->Init(len_in, alloc_device); } cytnx_uint16* _in = static_cast(in->Mem); @@ -1169,7 +1088,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexDoubleStorage()); out->Init(len_in, alloc_device); } cytnx_int16* _in = static_cast(in->Mem); @@ -1183,7 +1101,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexFloatStorage()); out->Init(len_in, alloc_device); } cytnx_int16* _in = static_cast(in->Mem); @@ -1197,7 +1114,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new DoubleStorage()); out->Init(len_in, alloc_device); } cytnx_int16* _in = static_cast(in->Mem); @@ -1211,7 +1127,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new FloatStorage()); out->Init(len_in, alloc_device); } cytnx_int16* _in = static_cast(in->Mem); @@ -1225,7 +1140,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int64Storage()); out->Init(len_in, alloc_device); } cytnx_int16* _in = static_cast(in->Mem); @@ -1239,7 +1153,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint64Storage()); out->Init(len_in, alloc_device); } cytnx_int16* _in = static_cast(in->Mem); @@ -1253,7 +1166,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int32Storage()); out->Init(len_in, alloc_device); } cytnx_int16* _in = static_cast(in->Mem); @@ -1267,7 +1179,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint32Storage()); out->Init(len_in, alloc_device); } cytnx_int16* _in = static_cast(in->Mem); @@ -1281,7 +1192,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint16Storage()); out->Init(len_in, alloc_device); } cytnx_int16* _in = static_cast(in->Mem); @@ -1295,7 +1205,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int16Storage()); out->Init(len_in, alloc_device); } checkCudaErrors( @@ -1305,7 +1214,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new BoolStorage()); out->Init(len_in, alloc_device); } cytnx_int16* _in = static_cast(in->Mem); @@ -1320,7 +1228,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexDoubleStorage()); out->Init(len_in, alloc_device); } cytnx_bool* _in = static_cast(in->Mem); @@ -1334,7 +1241,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new ComplexFloatStorage()); out->Init(len_in, alloc_device); } cytnx_bool* _in = static_cast(in->Mem); @@ -1348,7 +1254,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new DoubleStorage()); out->Init(len_in, alloc_device); } cytnx_bool* _in = static_cast(in->Mem); @@ -1362,7 +1267,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new FloatStorage()); out->Init(len_in, alloc_device); } cytnx_bool* _in = static_cast(in->Mem); @@ -1376,7 +1280,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int64Storage()); out->Init(len_in, alloc_device); } cytnx_bool* _in = static_cast(in->Mem); @@ -1390,7 +1293,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint64Storage()); out->Init(len_in, alloc_device); } cytnx_bool* _in = static_cast(in->Mem); @@ -1404,7 +1306,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int32Storage()); out->Init(len_in, alloc_device); } cytnx_bool* _in = static_cast(in->Mem); @@ -1418,7 +1319,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint32Storage()); out->Init(len_in, alloc_device); } cytnx_bool* _in = static_cast(in->Mem); @@ -1432,7 +1332,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Uint16Storage()); out->Init(len_in, alloc_device); } cytnx_bool* _in = static_cast(in->Mem); @@ -1446,7 +1345,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new Int16Storage()); out->Init(len_in, alloc_device); } cytnx_bool* _in = static_cast(in->Mem); @@ -1460,7 +1358,6 @@ namespace cytnx { boost::intrusive_ptr& out, const unsigned long long& len_in, const int& alloc_device) { if (alloc_device >= 0) { - out = boost::intrusive_ptr(new BoolStorage()); out->Init(len_in, alloc_device); } checkCudaErrors( diff --git a/src/backend/utils_internal_gpu/cuComplexmem_gpu.cu b/src/backend/utils_internal_gpu/cuComplexmem_gpu.cu index d4dca871..45f0181c 100644 --- a/src/backend/utils_internal_gpu/cuComplexmem_gpu.cu +++ b/src/backend/utils_internal_gpu/cuComplexmem_gpu.cu @@ -1,11 +1,4 @@ #include "cuComplexmem_gpu.hpp" -#include "cuAlloc_gpu.hpp" -#include "backend/Storage.hpp" -#ifdef UNI_OMP - #include -#endif - -using namespace std; namespace cytnx { namespace utils_internal { diff --git a/src/backend/utils_internal_gpu/cuComplexmem_gpu.hpp b/src/backend/utils_internal_gpu/cuComplexmem_gpu.hpp index 7159da68..978efefa 100644 --- a/src/backend/utils_internal_gpu/cuComplexmem_gpu.hpp +++ b/src/backend/utils_internal_gpu/cuComplexmem_gpu.hpp @@ -1,13 +1,7 @@ -#ifndef _H_cuComplexmem_gpu_ -#define _H_cuComplexmem_gpu_ +#ifndef BACKEND_UTILS_INTERNAL_GPU_CUCOMPLEXMEM_GPU_H_ +#define BACKEND_UTILS_INTERNAL_GPU_CUCOMPLEXMEM_GPU_H_ -#include -#include -#include -#include #include "Type.hpp" -#include "backend/Storage.hpp" -#include "cytnx_error.hpp" namespace cytnx { namespace utils_internal { @@ -18,4 +12,4 @@ namespace cytnx { } // namespace utils_internal } // namespace cytnx -#endif +#endif // BACKEND_UTILS_INTERNAL_GPU_CUCOMPLEXMEM_GPU_H_ diff --git a/src/backend/utils_internal_gpu/cuFill_gpu.cu b/src/backend/utils_internal_gpu/cuFill_gpu.cu index 680c97c4..fe39a728 100644 --- a/src/backend/utils_internal_gpu/cuFill_gpu.cu +++ b/src/backend/utils_internal_gpu/cuFill_gpu.cu @@ -1,10 +1,7 @@ -#include "cuFill_gpu.hpp" -#include "backend/Storage.hpp" -#ifdef UNI_OMP - #include -#endif +#include "backend/utils_internal_gpu/cuFill_gpu.hpp" + +#include "Type.hpp" -using namespace std; namespace cytnx { namespace utils_internal { diff --git a/src/backend/utils_internal_gpu/cuFill_gpu.hpp b/src/backend/utils_internal_gpu/cuFill_gpu.hpp index 4d30bdcd..51220efa 100644 --- a/src/backend/utils_internal_gpu/cuFill_gpu.hpp +++ b/src/backend/utils_internal_gpu/cuFill_gpu.hpp @@ -1,13 +1,7 @@ -#ifndef _H_cuFill_gpu_ -#define _H_cuFill_gpu_ +#ifndef CYTNX_BACKEND_UTILS_INTERNAL_GPU_CUFILL_GPU_H_ +#define CYTNX_BACKEND_UTILS_INTERNAL_GPU_CUFILL_GPU_H_ -#include -#include -#include -#include #include "Type.hpp" -#include "backend/Storage.hpp" -#include "cytnx_error.hpp" namespace cytnx { namespace utils_internal { @@ -25,4 +19,4 @@ namespace cytnx { } // namespace utils_internal } // namespace cytnx -#endif +#endif // CYTNX_BACKEND_UTILS_INTERNAL_GPU_CUFILL_GPU_H_ diff --git a/src/backend/utils_internal_gpu/cuMovemem_gpu.cu b/src/backend/utils_internal_gpu/cuMovemem_gpu.cu index 413d7bf0..73335e3b 100644 --- a/src/backend/utils_internal_gpu/cuMovemem_gpu.cu +++ b/src/backend/utils_internal_gpu/cuMovemem_gpu.cu @@ -1,7 +1,11 @@ #include "cuMovemem_gpu.hpp" -#include "cuAlloc_gpu.hpp" -#include "backend/Storage.hpp" + #include +#include + +#include "backend/Storage.hpp" +#include "backend/utils_internal_gpu/cuAlloc_gpu.hpp" +#include "Type.hpp" #include "utils/vec_print.hpp" #ifdef UNI_GPU @@ -137,7 +141,7 @@ namespace cytnx { checkCudaErrors(cudaFree(dshifter_old)); checkCudaErrors(cudaFree(dperm_shifter_new)); - boost::intrusive_ptr out = __SII.USIInit[dtype_T](); + boost::intrusive_ptr out = __SII.USIInit[dtype_T](in->device); if (is_inplace) { /// cpy back: checkCudaErrors(cudaMemcpy(in->Mem, dtmp, sizeof(T) * Nelem, cudaMemcpyDeviceToDevice)); @@ -182,7 +186,7 @@ namespace cytnx { cuttDestroy(plan); - boost::intrusive_ptr out = __SII.USIInit[dtype_T](); + boost::intrusive_ptr out = __SII.USIInit[dtype_T](in->device); if (is_inplace) { /// cpy back: checkCudaErrors(cudaMemcpy(in->Mem, dtmp, sizeof(T) * Nelem, cudaMemcpyDeviceToDevice)); @@ -269,7 +273,7 @@ namespace cytnx { checkCudaErrors(cutensorDestroyPlan(plan)); checkCudaErrors(cutensorDestroy(handle)); - boost::intrusive_ptr out = __SII.USIInit[dtype_T](); + boost::intrusive_ptr out = __SII.USIInit[dtype_T](in->device); if (is_inplace) { /// cpy back: checkCudaErrors(cudaMemcpy(in->Mem, dtmp, sizeof(T) * Nelem, cudaMemcpyDeviceToDevice)); diff --git a/src/backend/utils_internal_gpu/cuMovemem_gpu.hpp b/src/backend/utils_internal_gpu/cuMovemem_gpu.hpp index 14e31909..84ef96bd 100644 --- a/src/backend/utils_internal_gpu/cuMovemem_gpu.hpp +++ b/src/backend/utils_internal_gpu/cuMovemem_gpu.hpp @@ -1,15 +1,16 @@ -#ifndef _H_cuMovemem_gpu_ -#define _H_cuMovemem_gpu_ +#ifndef CYTNX_BACKEND_UTILS_INTERNAL_GPU_CUMOVEMEM_GPU_H_ +#define CYTNX_BACKEND_UTILS_INTERNAL_GPU_CUMOVEMEM_GPU_H_ + +#include + +#include "boost/smart_ptr/intrusive_ptr.hpp" -#include -#include -#include -#include #include "Type.hpp" -#include "backend/Storage.hpp" -#include "cytnx_error.hpp" namespace cytnx { + // TODO: Remove the dependency of Storage. + class Storage_base; + namespace utils_internal { #ifdef UNI_GPU boost::intrusive_ptr cuMovemem_gpu_cd(boost::intrusive_ptr &in, @@ -80,4 +81,4 @@ namespace cytnx { } // namespace utils_internal } // namespace cytnx -#endif +#endif // CYTNX_BACKEND_UTILS_INTERNAL_GPU_CUMOVEMEM_GPU_H_ diff --git a/src/backend/utils_internal_interface.cpp b/src/backend/utils_internal_interface.cpp index 0b41ad90..b3c966ac 100644 --- a/src/backend/utils_internal_interface.cpp +++ b/src/backend/utils_internal_interface.cpp @@ -1,5 +1,22 @@ #include "utils_internal_interface.hpp" + #include + +#include "backend/utils_internal_cpu/blocks_mvelems_cpu.hpp" +#include "backend/utils_internal_cpu/Cast_cpu.hpp" +#include "backend/utils_internal_cpu/GetElems_contiguous_cpu.hpp" +#include "backend/utils_internal_cpu/GetElems_cpu.hpp" +#include "backend/utils_internal_cpu/SetArange_cpu.hpp" +#include "backend/utils_internal_cpu/SetElems_contiguous_cpu.hpp" +#include "backend/utils_internal_cpu/SetElems_cpu.hpp" +#include "backend/utils_internal_gpu/cuCast_gpu.hpp" +#include "backend/utils_internal_gpu/cuGetElems_contiguous_gpu.hpp" +#include "backend/utils_internal_gpu/cuGetElems_gpu.hpp" +#include "backend/utils_internal_gpu/cuSetArange_gpu.hpp" +#include "backend/utils_internal_gpu/cuSetElems_contiguous_gpu.hpp" +#include "backend/utils_internal_gpu/cuSetElems_contiguous_gpu.hpp" +#include "backend/utils_internal_gpu/cuSetElems_gpu.hpp" + using namespace std; namespace cytnx { namespace utils_internal { diff --git a/src/backend/utils_internal_interface.hpp b/src/backend/utils_internal_interface.hpp index d0d5fe45..fe046bff 100644 --- a/src/backend/utils_internal_interface.hpp +++ b/src/backend/utils_internal_interface.hpp @@ -1,43 +1,51 @@ #ifndef _H_utils_internal_ #define _H_utils_internal_ -#include "utils_internal_cpu/Cast_cpu.hpp" -#include "utils_internal_cpu/Movemem_cpu.hpp" -#include "utils_internal_cpu/Alloc_cpu.hpp" -#include "utils_internal_cpu/SetZeros_cpu.hpp" -#include "utils_internal_cpu/Range_cpu.hpp" -#include "utils_internal_cpu/Fill_cpu.hpp" -#include "utils_internal_cpu/SetArange_cpu.hpp" -#include "utils_internal_cpu/GetElems_cpu.hpp" -#include "utils_internal_cpu/GetElems_contiguous_cpu.hpp" -#include "utils_internal_cpu/SetElems_cpu.hpp" -#include "utils_internal_cpu/SetElems_contiguous_cpu.hpp" -#include "utils_internal_cpu/Complexmem_cpu.hpp" - -#include "utils_internal_cpu/blocks_mvelems_cpu.hpp" +// #include "utils_internal_cpu/Cast_cpu.hpp" +// #include "utils_internal_cpu/Movemem_cpu.hpp" +// #include "utils_internal_cpu/Alloc_cpu.hpp" +// #include "utils_internal_cpu/SetZeros_cpu.hpp" +// #include "utils_internal_cpu/Range_cpu.hpp" +// #include "utils_internal_cpu/Fill_cpu.hpp" +// #include "utils_internal_cpu/SetArange_cpu.hpp" +// #include "utils_internal_cpu/GetElems_cpu.hpp" +// #include "utils_internal_cpu/GetElems_contiguous_cpu.hpp" +// #include "utils_internal_cpu/SetElems_cpu.hpp" +// #include "utils_internal_cpu/SetElems_contiguous_cpu.hpp" +// #include "utils_internal_cpu/Complexmem_cpu.hpp" -#ifdef UNI_GPU - #include "utils_internal_gpu/cuCast_gpu.hpp" - #include "utils_internal_gpu/cuAlloc_gpu.hpp" - #include "utils_internal_gpu/cuMovemem_gpu.hpp" - #include "utils_internal_gpu/cuReduce_gpu.hpp" - #include "utils_internal_gpu/cuSetZeros_gpu.hpp" - #include "utils_internal_gpu/cuFill_gpu.hpp" - #include "utils_internal_gpu/cuSetArange_gpu.hpp" - #include "utils_internal_gpu/cuSetElems_gpu.hpp" - #include "utils_internal_gpu/cuSetElems_contiguous_gpu.hpp" - #include "utils_internal_gpu/cuGetElems_gpu.hpp" - #include "utils_internal_gpu/cuGetElems_contiguous_gpu.hpp" - #include "utils_internal_gpu/cuComplexmem_gpu.hpp" -#endif +// #include "utils_internal_cpu/blocks_mvelems_cpu.hpp" -#include "Type.hpp" -#include "utils/complex_arithmetic.hpp" -#ifdef UNI_GPU - #include "utils/cucomplex_arithmetic.hpp" -#endif +// #ifdef UNI_GPU +// #include "utils_internal_gpu/cuCast_gpu.hpp" +// #include "utils_internal_gpu/cuAlloc_gpu.hpp" +// #include "utils_internal_gpu/cuMovemem_gpu.hpp" +// #include "utils_internal_gpu/cuReduce_gpu.hpp" +// #include "utils_internal_gpu/cuSetZeros_gpu.hpp" +// #include "utils_internal_gpu/cuFill_gpu.hpp" +// #include "utils_internal_gpu/cuSetArange_gpu.hpp" +// #include "utils_internal_gpu/cuSetElems_gpu.hpp" +// #include "utils_internal_gpu/cuSetElems_contiguous_gpu.hpp" +// #include "utils_internal_gpu/cuGetElems_gpu.hpp" +// #include "utils_internal_gpu/cuGetElems_contiguous_gpu.hpp" +// #include "utils_internal_gpu/cuComplexmem_gpu.hpp" +// #endif + +// #include "utils/complex_arithmetic.hpp" +// #ifdef UNI_GPU +// #include "utils/cucomplex_arithmetic.hpp" +// #endif + +#include +#include +#include + +#include "boost/intrusive_ptr.hpp" #include "backend/Storage.hpp" +#include "Tensor.hpp" +#include "Type.hpp" + namespace cytnx { namespace utils_internal { @@ -70,14 +78,14 @@ namespace cytnx { // pair> &, const vector &, const // vector &, const cytnx_uint64 &, const cytnx_uint64 &); - typedef void (*blocks_mvelems_io)(vector &, const vector &, - const vector &, - const vector> &, - const vector> &, - map> &, - map> &, - const vector &, const vector &, - const cytnx_uint64 &, const cytnx_uint64 &); + typedef void (*blocks_mvelems_io)( + std::vector &, const std::vector &, const std::vector &, + const std::vector> &, + const std::vector> &, + std::map> &, + std::map> &, + const std::vector &, const std::vector &, const cytnx_uint64 &, + const cytnx_uint64 &); class utils_internal_interface { public: diff --git a/src/linalg/Trace.cpp.old b/src/linalg/Trace.cpp.old deleted file mode 100644 index 765de434..00000000 --- a/src/linalg/Trace.cpp.old +++ /dev/null @@ -1,392 +0,0 @@ -#include "linalg.hpp" -#include "utils/utils.hpp" -#include "Tensor.hpp" -#include "UniTensor.hpp" -#include "cytnx.hpp" -#ifdef UNI_OMP - #include -#endif - -using namespace std; -namespace cytnx { - namespace linalg { - cytnx::UniTensor Trace(const cytnx::UniTensor &Tin, const cytnx_int64 &a, - const cytnx_int64 &b) { - return Tin.Trace(a, b); - } - cytnx::UniTensor Trace(const cytnx::UniTensor &Tin, const std::string &a, - const std::string &b) { - return Tin.Trace(a, b); - } - cytnx::UniTensor Trace(const cytnx::UniTensor &Tin, const cytnx_int64 &a, const cytnx_int64 &b, - const bool &by_label) { - return Tin.Trace(a, b, by_label); - } - } // namespace linalg -} // namespace cytnx - -namespace cytnx { - - template - void _trace_2d(Tensor &out, const Tensor &Tn, const cytnx_uint64 &Ndiag) { - T a = 0; - T *rawdata = Tn.storage().data(); - cytnx_uint64 Ldim = Tn.shape()[1]; - for (cytnx_uint64 i = 0; i < Ndiag; i++) a += rawdata[i * Ldim + i]; - out.storage().at(0) = a; - } - - template - void _trace_nd(Tensor &out, const Tensor &Tn, const cytnx_uint64 &Ndiag, - const cytnx_uint64 &Nelem, const vector &accu, - const vector &remain_rank_id, const vector &shape, - const cytnx_uint64 &ax1, const cytnx_uint64 &ax2) { - UniTensor I = UniTensor(eye(Ndiag), false, -1); - I.set_labels({"0", "1"}); - UniTensor UTn = UniTensor(Tn, false, 2); - UTn.set_labels(vec_cast(vec_range(100, 100 + UTn.labels().size()))); - // UTn.set_label(ax1, "0"); - // UTn.set_label(ax2, "1"); - UTn._impl->_labels[ax1]="0"; - UTn._impl->_labels[ax2]="1"; - out = Contract(I, UTn).get_block_(); - - // vector indexer(Tn.shape().size(), 0); - // cytnx_uint64 tmp; - // for (cytnx_uint64 i = 0; i < Nelem; i++) { - // tmp = i; - // // calculate indexer - // for (int x = 0; x < shape.size(); x++) { - // indexer[remain_rank_id[x]] = cytnx_uint64(tmp / accu[x]); - // tmp %= accu[x]; - // } - - // for (cytnx_uint64 d = 0; d < Ndiag; d++) { - // indexer[ax1] = indexer[ax2] = d; - // out.storage().at(i) += Tn.at(indexer); - // } - // } - } - -#ifdef UNI_OMP - template - void _trace_2d_para(Tensor &out, const Tensor &Tn, const cytnx_uint64 &Ndiag, const int &Nomp) { - T a = 0; - vector buffer(Nomp); - - #pragma omp parallel for schedule(dynamic) - for (cytnx_uint64 i = 0; i < Ndiag; i++) buffer[omp_get_thread_num()] += Tn.at({i, i}); - - for (int i = 1; i < Nomp; i++) buffer[0] += buffer[i]; - out.storage().at({0}) = buffer[0]; - } - - template - void _trace_nd_para(Tensor &out, const Tensor &Tn, const cytnx_uint64 &Ndiag, - const cytnx_uint64 &Nelem, const vector &accu, - const vector &remain_rank_id, const vector &shape, - const cytnx_uint64 &ax1, const cytnx_uint64 &ax2, const int &Nomp) { - // decide parallel Nelem or Ndiag: - if (false and Nelem < Ndiag) { - // each thread need it's own indexer: - vector> indexers(Nomp, vector(Tn.shape().size(), 0)); - // cout << "Ne < Nd" << endl; - #pragma omp parallel for schedule(dynamic) - for (cytnx_uint64 i = 0; i < Nelem; i++) { - cytnx_uint64 tmp = i; - // calculate indexer - for (int x = 0; x < shape.size(); x++) { - indexers[omp_get_thread_num()][remain_rank_id[x]] = cytnx_uint64(tmp / accu[x]); - tmp %= accu[x]; - } - - for (cytnx_uint64 d = 0; d < Ndiag; d++) { - indexers[omp_get_thread_num()][ax1] = indexers[omp_get_thread_num()][ax2] = d; - out.storage().at(i) += Tn.at(indexers[omp_get_thread_num()]); - } - } - - } else { - #pragma omp parallel - { - vector indexers(Tn.shape().size(), 0); - #pragma omp for schedule(static) - for (cytnx_uint64 i = 0; i < Nelem; i++) { - cytnx_uint64 tmp; - tmp = i; - // calculate indexer - for (int x = 0; x < shape.size(); x++) { - indexers[remain_rank_id[x]] = cytnx_uint64(tmp / accu[x]); - tmp %= accu[x]; - } - - for (cytnx_uint64 d = 0; d < Ndiag; d++) { - indexers[ax1] = indexers[ax2] = d; - out.storage().at(i) += Tn.at(indexers); - } - } - } - } - } -#endif - - namespace linalg { - // dtype -1: default - // device -2: default. - Tensor Trace(const Tensor &Tn, const cytnx_uint64 &axisA, const cytnx_uint64 &axisB) { - // checking: - cytnx_error_msg(Tn.shape().size() < 2, "[ERROR] Tensor must have at least rank-2.%s", "\n"); - cytnx_error_msg(axisA >= Tn.shape().size(), "[ERROR] axisA out of bound.%s", "\n"); - cytnx_error_msg(axisB >= Tn.shape().size(), "[ERROR] axisB out of bound.%s", "\n"); - cytnx_error_msg(axisA == axisB, "[ERROR] axisB cannot be the same as axisA.%s", "\n"); - // cytnx_error_msg(dtype == Type.Void,"[ERROR] cannot have output type to be - // Type.Void.%s","\n"); vector indexer(Tn.shape().size()); - - cytnx_uint64 ax1, ax2; - if (axisA < axisB) { - ax1 = axisA; - ax2 = axisB; - } else { - ax1 = axisB; - ax2 = axisA; - } - - // int out_dtype = dtype==-1?Tn.dtype():dtype; - // int out_device = device==-2?Tn.device():device; - - // 1) get redundant rank: - vector shape(Tn.shape().begin(), Tn.shape().end()); - vector accu; - shape.erase(shape.begin() + ax2); - shape.erase(shape.begin() + ax1); - // 2) get out put elementsize. - cytnx_uint64 Nelem = 1; - for (int i = 0; i < shape.size(); i++) Nelem *= shape[i]; - // 3) get diagonal element numbers: - cytnx_uint64 Ndiag = Tn.shape()[ax1] < Tn.shape()[ax2] ? Tn.shape()[ax1] : Tn.shape()[ax2]; - - Tensor out = Tensor({Nelem}, Tn.dtype(), Tn.device()); - out.storage().set_zeros(); - -#ifdef UNI_OMP - int Nomp = 1; - #pragma omp parallel - { - if (omp_get_thread_num() == 0) Nomp = omp_get_num_threads(); - } - // std::cout << Nomp <(out, Tn, Ndiag, Nomp); - break; - case Type.ComplexFloat: - _trace_2d_para(out, Tn, Ndiag, Nomp); - break; - case Type.Double: - _trace_2d_para(out, Tn, Ndiag, Nomp); - break; - case Type.Float: - _trace_2d_para(out, Tn, Ndiag, Nomp); - break; - case Type.Uint64: - _trace_2d_para(out, Tn, Ndiag, Nomp); - break; - case Type.Int64: - _trace_2d_para(out, Tn, Ndiag, Nomp); - break; - case Type.Uint32: - _trace_2d_para(out, Tn, Ndiag, Nomp); - break; - case Type.Int32: - _trace_2d_para(out, Tn, Ndiag, Nomp); - break; - case Type.Int16: - _trace_2d_para(out, Tn, Ndiag, Nomp); - break; - case Type.Uint16: - _trace_2d_para(out, Tn, Ndiag, Nomp); - break; - case Type.Bool: - //_trace_2d_para(out,Tn,Ndiag,Nomp); - cytnx_error_msg( - true, - "[ERROR][Trace] Bool type cannot perform Trace, use .astype() to promote first.%s", - "\n"); - break; - default: - cytnx_error_msg(true, "[ERROR][Trace] invalid Type.%s", "\n"); - break; - } - } else { - vector remain_rank_id; - vector accu(shape.size()); - accu.back() = 1; - for (int i = shape.size() - 1; i > 0; i--) accu[i - 1] = accu[i] * shape[i]; - - for (cytnx_uint64 i = 0; i < Tn.shape().size(); i++) { - if (i != ax1 && i != ax2) remain_rank_id.push_back(i); - } - - switch (Tn.dtype()) { - case Type.ComplexDouble: - _trace_nd_para(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, - ax1, ax2, Nomp); - break; - case Type.ComplexFloat: - _trace_nd_para(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, - ax2, Nomp); - break; - case Type.Double: - _trace_nd_para(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, - ax2, Nomp); - break; - case Type.Float: - _trace_nd_para(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, - ax2, Nomp); - break; - case Type.Uint64: - _trace_nd_para(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, - ax2, Nomp); - break; - case Type.Int64: - _trace_nd_para(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, - ax2, Nomp); - break; - case Type.Int32: - _trace_nd_para(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, - ax2, Nomp); - break; - case Type.Uint32: - _trace_nd_para(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, - ax2, Nomp); - break; - case Type.Uint16: - _trace_nd_para(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, - ax2, Nomp); - break; - case Type.Int16: - _trace_nd_para(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, - ax2, Nomp); - break; - case Type.Bool: - cytnx_error_msg( - true, - "[ERROR][Trace] Bool type cannot perform Trace, use .astype() to promote first.%s", - "\n"); - break; - default: - cytnx_error_msg(true, "[ERROR][Trace] Invalid Type.%s", "\n"); - break; - } // switch - out.reshape_(shape); - } - -#else - - if (shape.size() == 0) { - switch (Tn.dtype()) { - case Type.ComplexDouble: - _trace_2d(out, Tn, Ndiag); - break; - case Type.ComplexFloat: - _trace_2d(out, Tn, Ndiag); - break; - case Type.Double: - _trace_2d(out, Tn, Ndiag); - break; - case Type.Float: - _trace_2d(out, Tn, Ndiag); - break; - case Type.Uint64: - _trace_2d(out, Tn, Ndiag); - break; - case Type.Int64: - _trace_2d(out, Tn, Ndiag); - break; - case Type.Uint32: - _trace_2d(out, Tn, Ndiag); - break; - case Type.Int32: - _trace_2d(out, Tn, Ndiag); - break; - case Type.Int16: - _trace_2d(out, Tn, Ndiag); - break; - case Type.Uint16: - _trace_2d(out, Tn, Ndiag); - break; - case Type.Bool: - cytnx_error_msg( - true, - "[ERROR][Trace] Bool type cannot perform Trace, use .astype() to promote first.%s", - "\n"); - break; - default: - cytnx_error_msg(true, "[ERROR][Trace] invalid Type.%s", "\n"); - break; - } - } else { - vector remain_rank_id; - vector accu(shape.size()); - accu.back() = 1; - for (int i = shape.size() - 1; i > 0; i--) accu[i - 1] = accu[i] * shape[i]; - - for (cytnx_uint64 i = 0; i < Tn.shape().size(); i++) { - if (i != ax1 && i != ax2) remain_rank_id.push_back(i); - } - - switch (Tn.dtype()) { - case Type.ComplexDouble: - _trace_nd(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, - ax2); - break; - case Type.ComplexFloat: - _trace_nd(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, - ax2); - break; - case Type.Double: - _trace_nd(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, ax2); - break; - case Type.Float: - _trace_nd(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, ax2); - break; - case Type.Uint64: - _trace_nd(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, ax2); - break; - case Type.Int64: - _trace_nd(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, ax2); - break; - case Type.Int32: - _trace_nd(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, ax2); - break; - case Type.Uint32: - _trace_nd(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, ax2); - break; - case Type.Uint16: - _trace_nd(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, ax2); - break; - case Type.Int16: - _trace_nd(out, Tn, Ndiag, Nelem, accu, remain_rank_id, shape, ax1, ax2); - break; - case Type.Bool: - cytnx_error_msg( - true, - "[ERROR][Trace] Bool type cannot perform Trace, use .astype() to promote first.%s", - "\n"); - break; - default: - cytnx_error_msg(true, "[ERROR][Trace] Invalid Type.%s", "\n"); - break; - } // switch - out.reshape_(shape); - } - -#endif - - return out; - } - - } // namespace linalg -} // namespace cytnx diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1c1b1a40..55d0faad 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -36,6 +36,11 @@ add_executable( algo_test/Vstack_test.cpp ) +target_include_directories( + test_main + PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../src +) + if(USE_CUDA) add_subdirectory(gpu) endif() diff --git a/tests/Storage_test.cpp b/tests/Storage_test.cpp index d07cd33c..4d331b3d 100644 --- a/tests/Storage_test.cpp +++ b/tests/Storage_test.cpp @@ -1,7 +1,14 @@ #include "Storage_test.h" -#include "test_tools.h" + +#include #include +#include "backend/Storage.hpp" +#include "utils/is.hpp" + +using namespace cytnx; +using namespace std; + TEST_F(StorageTest, dtype_str) { std::vector vcd = {cytnx_complex128(1, 2), cytnx_complex128(3, 4), cytnx_complex128(5, 6)}; @@ -254,3 +261,5 @@ TYPED_TEST(vector_suite, storage_fill_b) { EXPECT_EQ(sd.at(0), TypeParam(fille)); EXPECT_EQ(sd.at(1), TypeParam(fille)); } + +// TODO: add Storage tests for GPU diff --git a/tests/Storage_test.h b/tests/Storage_test.h index 7bcf347b..b97be06f 100644 --- a/tests/Storage_test.h +++ b/tests/Storage_test.h @@ -1,11 +1,8 @@ #ifndef _H_STORAGE_TEST #define _H_STORAGE_TEST -#include "cytnx.hpp" #include -using namespace cytnx; -using namespace std; class StorageTest : public ::testing::Test { public: protected: diff --git a/tests/gpu/BlockUniTensor_test.cpp b/tests/gpu/BlockUniTensor_test.cpp index f43a7975..a424ae22 100644 --- a/tests/gpu/BlockUniTensor_test.cpp +++ b/tests/gpu/BlockUniTensor_test.cpp @@ -221,8 +221,8 @@ TEST_F(BlockUniTensorTest, gpu_Conj) { } tmp = UT_diag_cplx.Conj(); - for (size_t i = 0; i < UT_diag.bonds()[0].qnums().size(); i++) { - cytnx_uint64 deg = UT_diag.bonds()[0]._impl->_degs[i]; + for (size_t i = 0; i < UT_diag_cplx.bonds()[0].qnums().size(); i++) { + cytnx_uint64 deg = UT_diag_cplx.bonds()[0]._impl->_degs[i]; for (size_t j = 0; j < deg; j++) { EXPECT_DOUBLE_EQ(double(tmp.get_block_(i).at({j}).real()), double(UT_diag_cplx.get_block_(i).at({j}).real())); diff --git a/tests/gpu/CMakeLists.txt b/tests/gpu/CMakeLists.txt index a76fdc6a..b5242435 100644 --- a/tests/gpu/CMakeLists.txt +++ b/tests/gpu/CMakeLists.txt @@ -28,6 +28,12 @@ add_executable( algo_test/Vstack_test.cpp ) +target_include_directories( + gpu_test_main + PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/../../src +) + target_link_libraries( gpu_test_main gtest_main