bev-project/mmdet3d/ops/spconv/include/tensorview/tensorview.h

1164 lines
38 KiB
C++

// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cuda_runtime_api.h>
#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <iostream>
#include <memory>
// #include <prettyprint.h>
#include <sstream>
#include <type_traits>
#include <vector>
namespace tv {
#if defined(__NVCC__) || defined(__HIP__)
#define TV_HOST_DEVICE_INLINE __forceinline__ __device__ __host__
#define TV_DEVICE_INLINE __forceinline__ __device__
#define TV_HOST_DEVICE __device__ __host__
#define TV_ASSERT(expr) assert(expr)
#elif defined(__CUDACC_RTC__)
#define TV_ASSERT(expr) assert(expr)
#define TV_HOST_DEVICE_INLINE __forceinline__ __device__
#define TV_DEVICE_INLINE __forceinline__ __device__
#define TV_HOST_DEVICE __device__ __host__
#else
#define TV_ASSERT(x) assert(x)
#define TV_HOST_DEVICE_INLINE inline
#define TV_HOST_DEVICE
#endif
#define TV_REQUIRE(expr, ...) \
{ \
if (!(expr)) { \
printf(__VA_ARGS__); \
assert(expr); \
} \
}
#define TV_DEVICE_REQUIRE(expr, ...) \
{ \
if (!(expr) && threadIdx.x == 0) printf(__VA_ARGS__); \
assert(expr); \
}
template <class SStream, class T>
void sstream_print(SStream &ss, T val) {
ss << val;
}
template <class SStream, class T, class... TArgs>
void sstream_print(SStream &ss, T val, TArgs... args) {
ss << val << " ";
sstream_print(ss, args...);
}
#define TV_ASSERT_RT_ERR(expr, ...) \
{ \
if (!(expr)) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << #expr << " assert faild. "; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
throw std::runtime_error(__macro_s.str()); \
} \
}
#define TV_ASSERT_INVALID_ARG(expr, ...) \
{ \
if (!(expr)) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << #expr << " assert faild. "; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
throw std::invalid_argument(__macro_s.str()); \
} \
}
#define TV_CHECK_CUDA_ERR() \
{ \
auto err = cudaGetLastError(); \
if (err != cudaSuccess) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << "cuda execution failed with error " << err; \
throw std::runtime_error(__macro_s.str()); \
} \
}
struct GPU {
GPU(cudaStream_t s = 0) : mStream(s) {}
virtual cudaStream_t getStream() const { return mStream; }
cudaStream_t mStream = 0;
};
struct CPU {};
#define TV_MAX_DIM 6
/*
template <typename T>
constexpr size_t calc_align(size_t ndim)
{
if (ndim * sizeof(T) == 1)
return 1;
else if (ndim * sizeof(T) == 2)
return 2;
else if (ndim * sizeof(T) <= 4 && ndim * sizeof(T) > 2)
return 4;
else if (ndim * sizeof(T) <= 8 && ndim * sizeof(T) > 4)
return 8;
else if (ndim * sizeof(T) <= 16 && ndim * sizeof(T) > 8)
return 16;
else if (ndim * sizeof(T) <= 32 && ndim * sizeof(T) > 16)
return 32;
else
return 64;
}
*/
template <typename T, size_t MaxDim = TV_MAX_DIM>
struct /*alignas(calc_align<T>(MaxDim))*/ SimpleVector {
public:
TV_HOST_DEVICE_INLINE SimpleVector(){};
TV_HOST_DEVICE_INLINE SimpleVector(std::initializer_list<T> q) {
TV_ASSERT(q.size() <= MaxDim);
mSize = 0;
for (T s : q) {
mArray[mSize++] = s;
}
mSize = q.size();
}
SimpleVector(const std::vector<T> &arr) {
TV_ASSERT(arr.size() <= MaxDim);
for (size_t i = 0; i < arr.size(); ++i) {
mArray[i] = arr[i];
}
mSize = arr.size();
}
TV_HOST_DEVICE_INLINE SimpleVector(const SimpleVector<T, MaxDim> &arr) {
TV_ASSERT(arr.size() <= MaxDim);
for (size_t i = 0; i < arr.size(); ++i) {
mArray[i] = arr[i];
}
mSize = arr.size();
}
TV_HOST_DEVICE_INLINE T &operator[](int idx) {
#ifdef TV_DEBUG
TV_ASSERT(idx >= 0 && idx < mSize);
#endif
return mArray[idx];
}
TV_HOST_DEVICE_INLINE const T &operator[](int idx) const {
#ifdef TV_DEBUG
TV_ASSERT(idx >= 0 && idx < mSize);
#endif
return mArray[idx];
}
TV_HOST_DEVICE_INLINE void push_back(T s) {
#ifdef TV_DEBUG
TV_ASSERT(mSize < MaxDim);
#endif
mArray[mSize] = s;
mSize++;
}
TV_HOST_DEVICE_INLINE void pop_back() {
#ifdef TV_DEBUG
TV_ASSERT(mSize > 0);
#endif
mSize--;
}
TV_HOST_DEVICE_INLINE size_t size() const { return mSize; }
TV_HOST_DEVICE_INLINE const T *data() const { return mArray; }
TV_HOST_DEVICE_INLINE size_t empty() const { return mSize == 0; }
typedef size_t size_type;
class iterator {
public:
typedef iterator self_type;
typedef T value_type;
typedef T &reference;
typedef T *pointer;
typedef std::forward_iterator_tag iterator_category;
typedef std::ptrdiff_t difference_type;
TV_HOST_DEVICE_INLINE iterator(pointer ptr) : ptr_(ptr) {}
TV_HOST_DEVICE_INLINE self_type operator++(int junk) {
self_type i = *this;
ptr_++;
return i;
}
TV_HOST_DEVICE_INLINE self_type operator++() {
ptr_++;
return *this;
}
TV_HOST_DEVICE_INLINE reference operator*() { return *ptr_; }
TV_HOST_DEVICE_INLINE pointer operator->() { return ptr_; }
TV_HOST_DEVICE_INLINE bool operator==(const self_type &rhs) {
return ptr_ == rhs.ptr_;
}
TV_HOST_DEVICE_INLINE bool operator!=(const self_type &rhs) {
return ptr_ != rhs.ptr_;
}
private:
pointer ptr_;
};
class const_iterator {
public:
typedef const_iterator self_type;
typedef T value_type;
typedef const T &reference;
typedef const T *pointer;
typedef std::ptrdiff_t difference_type;
typedef std::forward_iterator_tag iterator_category;
TV_HOST_DEVICE_INLINE const_iterator(pointer ptr) : ptr_(ptr) {}
TV_HOST_DEVICE_INLINE self_type operator++(int junk) {
self_type i = *this;
ptr_++;
return i;
}
TV_HOST_DEVICE_INLINE self_type operator++() {
ptr_++;
return *this;
}
TV_HOST_DEVICE_INLINE reference operator*() { return *ptr_; }
TV_HOST_DEVICE_INLINE pointer operator->() { return ptr_; }
TV_HOST_DEVICE_INLINE bool operator==(const self_type &rhs) {
return ptr_ == rhs.ptr_;
}
TV_HOST_DEVICE_INLINE bool operator!=(const self_type &rhs) {
return ptr_ != rhs.ptr_;
}
private:
pointer ptr_;
};
TV_HOST_DEVICE_INLINE iterator begin() { return iterator(mArray); }
TV_HOST_DEVICE_INLINE iterator end() { return iterator(mArray + mSize); }
TV_HOST_DEVICE_INLINE const_iterator begin() const {
return const_iterator(mArray);
}
TV_HOST_DEVICE_INLINE const_iterator end() const {
return const_iterator(mArray + mSize);
}
TV_HOST_DEVICE_INLINE const_iterator cbegin() const {
return const_iterator(mArray);
}
TV_HOST_DEVICE_INLINE const_iterator cend() const {
return const_iterator(mArray + mSize);
}
protected:
T mArray[MaxDim];
size_t mSize = 0;
};
template <typename T, size_t MaxDim>
bool operator==(const SimpleVector<T, MaxDim> &lfs,
const SimpleVector<T, MaxDim> &rfs) {
if (lfs.size() != rfs.size()) return false;
for (size_t i = 0; i < lfs.size(); ++i) {
if (lfs[i] != rfs[i]) return false;
}
return true;
}
template <typename T, size_t MaxDim>
bool operator!=(const SimpleVector<T, MaxDim> &lfs,
const SimpleVector<T, MaxDim> &rfs) {
return !(lfs == rfs);
}
struct Slice {
template <class... Integers>
TV_HOST_DEVICE_INLINE Slice(Integers... ints) {
static_assert(sizeof...(ints) <= 3, "slice init must smaller than 3");
SimpleVector<int, 3> slices{int(ints)...};
mSlices[0] = -1;
mSlices[1] = -1;
mSlices[2] = -1;
for (size_t i = 0; i < slices.size(); ++i) {
mSlices[i] = slices[i];
}
}
TV_HOST_DEVICE_INLINE Slice() {
mSlices[0] = -1;
mSlices[1] = -1;
mSlices[2] = -1;
}
template <typename T>
TV_HOST_DEVICE_INLINE Slice(std::initializer_list<T> slice) {
mSlices[0] = -1;
mSlices[1] = -1;
mSlices[2] = -1;
TV_ASSERT(slice.size() <= 3);
int idx = 0;
for (T s : slice) {
mSlices[idx] = int(s);
++idx;
}
}
TV_HOST_DEVICE_INLINE int &operator[](int idx) {
#ifdef TV_DEBUG
TV_ASSERT(idx >= 0 && idx < 3);
#endif
return mSlices[idx];
}
TV_HOST_DEVICE_INLINE const int &operator[](int idx) const {
#ifdef TV_DEBUG
TV_ASSERT(idx >= 0 && idx < 3);
#endif
return mSlices[idx];
}
protected:
int mSlices[3];
};
template <size_t MaxDim = TV_MAX_DIM>
struct ShapeBase : public SimpleVector<int, MaxDim> {
TV_HOST_DEVICE_INLINE ShapeBase() : SimpleVector<int, MaxDim>(){};
TV_HOST_DEVICE_INLINE ShapeBase(std::initializer_list<int> shape)
: SimpleVector<int, MaxDim>(shape) {}
template <typename T, template <class...> class Container>
ShapeBase(Container<T> shape) : SimpleVector<int, MaxDim>(shape) {}
TV_HOST_DEVICE_INLINE ShapeBase(const ShapeBase<MaxDim> &shape)
: SimpleVector<int, MaxDim>(shape) {}
ShapeBase(const std::vector<int> &arr) : SimpleVector<int, MaxDim>(arr) {}
ShapeBase<MaxDim> &operator=(const ShapeBase<MaxDim> &shape) = default;
TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> subshape(int start, int end) const {
#ifdef TV_DEBUG
TV_ASSERT(start >= 0 && end < this->mSize && end > start);
#endif
ShapeBase<MaxDim> shape;
for (int i = start; i < end; ++i) {
shape.push_back(this->mArray[i]);
}
return shape;
}
TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> subshape(int start) const {
#ifdef TV_DEBUG
TV_ASSERT(start >= 0 && start <= this->mSize);
#endif
ShapeBase<MaxDim> shape;
for (int i = start; i < this->mSize; ++i) {
shape.push_back(this->mArray[i]);
}
return shape;
}
TV_HOST_DEVICE_INLINE size_t size() const {
if (this->mSize == 0) return 0;
size_t s = 1;
for (int i = 0; i < int(this->mSize); ++i) {
s *= this->mArray[i];
}
return s;
}
TV_HOST_DEVICE_INLINE size_t ndim() const { return this->mSize; }
TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> squeeze() const {
ShapeBase<MaxDim> shape;
for (int i = 0; i < this->mSize; ++i) {
if (this->mArray[i] != 1) shape.push_back(this->mArray[i]);
}
return shape;
}
TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> squeeze(int dim) const {
ShapeBase<MaxDim> shape;
for (int i = 0; i < this->mSize; ++i) {
if (i != dim || this->mArray[i] != 1) shape.push_back(this->mArray[i]);
}
return shape;
}
};
using Shape = ShapeBase<TV_MAX_DIM>;
template <class... Inds>
TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(std::vector<int> &shape,
Inds... indexes) {
unsigned offset = 0;
unsigned m = 1;
int indexes_vec[sizeof...(indexes)] = {indexes...};
#ifdef TV_DEBUG
TV_ASSERT(sizeof...(indexes) == shape.size());
#endif
#pragma unroll
for (int i = sizeof...(indexes) - 1; i >= 0; --i) {
offset += m * indexes_vec[i];
m *= shape[i];
}
return offset;
}
TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(std::vector<int> &shape,
std::vector<int> &indexes_vec) {
unsigned offset = 0;
unsigned m = 1;
for (int i = shape.size() - 1; i >= 0; --i) {
offset += m * indexes_vec[i];
m *= shape[i];
}
return offset;
}
template <class... Inds>
TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(const Shape &shape,
Inds... indexes) {
unsigned offset = 0;
unsigned m = 1;
int indexes_vec[sizeof...(indexes)] = {indexes...};
#pragma unroll
for (int i = sizeof...(indexes) - 1; i >= 0; --i) {
offset += m * indexes_vec[i];
m *= shape[i];
}
return offset;
}
TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(const Shape &shape,
const Shape &indexes_vec) {
unsigned offset = 0;
unsigned m = 1;
for (int i = indexes_vec.ndim() - 1; i >= 0; --i) {
offset += m * indexes_vec[i];
m *= shape[i];
}
return offset;
}
template <typename Index, unsigned NDim>
TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(const Index *indexes,
const Index *shape) {
unsigned offset = 0;
unsigned m = 1;
#pragma unroll
for (int i = NDim - 1; i >= 0; --i) {
offset += m * indexes[i];
m *= shape[i];
}
return offset;
}
template <typename Index, unsigned NDim>
TV_HOST_DEVICE_INLINE Index rowArrayIdxInv(Index index, Index *output,
const Index *shape) {
#pragma unroll
for (int i = NDim - 1; i >= 0; --i) {
output[i] = index % shape[i];
index -= output[i];
index /= shape[i];
}
return index;
}
template <int N>
struct ArrayIndexRowMajor {
// mPtr[((i1 * mShape[1] + i2) * mShape[2] + i3) * mShape[3] + i4];
TV_HOST_DEVICE_INLINE static unsigned run(const Shape &shape,
const Shape &indexes) {
return indexes[N - 1] +
shape[N - 1] * ArrayIndexRowMajor<N - 1>::run(shape, indexes);
}
};
template <>
struct ArrayIndexRowMajor<0> {
TV_HOST_DEVICE_INLINE static unsigned run(const Shape &shape,
const Shape &indexes) {
return 0;
}
};
namespace detail {
template <typename T>
constexpr const char *simpleTypeName(T val = T());
template <>
constexpr const char *simpleTypeName(float val) {
return "float32";
}
template <>
constexpr const char *simpleTypeName(double val) {
return "float64";
}
template <>
constexpr const char *simpleTypeName(int val) {
return "int32";
}
template <>
constexpr const char *simpleTypeName(unsigned val) {
return "uint32";
}
template <>
constexpr const char *simpleTypeName(long val) {
return "int64";
}
template <>
constexpr const char *simpleTypeName(unsigned long val) {
return "uint64";
}
}; // namespace detail
template <typename T, int Rank = -1>
struct TensorView {
TV_HOST_DEVICE_INLINE TensorView() {}
explicit TV_HOST_DEVICE_INLINE TensorView(T *ptr, Shape shape)
: mPtr(ptr), mShape(shape) {}
// explicit TV_HOST_DEVICE_INLINE TensorView(const
// TensorView<std::remove_const_t<T>> &tview) : mPtr(tview.data()),
// mShape(tview.shape()) {}
template <class... Integers>
explicit TV_HOST_DEVICE_INLINE TensorView(T *ptr, Integers... shapes)
: mPtr(ptr) {
mShape = {int(shapes)...};
}
TV_HOST_DEVICE_INLINE TensorView<T, Rank> &assign(
const TensorView<T, Rank> &tensor) {
TV_REQUIRE(tensor.shape() == shape(), "you must provide same input size%s",
"\n");
T *ptr = mPtr;
const T *other_ptr = tensor.data();
for (size_t i = 0; i < size(); ++i) *(ptr++) = *(other_ptr++);
return *this;
}
template <typename T1>
TV_HOST_DEVICE_INLINE TensorView<T, Rank> &assign(
std::initializer_list<T1> seq) {
TV_REQUIRE(seq.size() == size(), "you must provide same input size%s",
"\n");
T *ptr = mPtr;
for (const T1 &s : seq) *(ptr++) = T(s);
return *this;
}
template <class... Inds>
TV_HOST_DEVICE_INLINE T &operator()(Inds... inds) {
#ifdef TV_DEBUG
int idxes[sizeof...(Inds)]{int(inds)...};
TV_REQUIRE(sizeof...(inds) == mShape.ndim(),
"you provide %d indexes, but dim is %d\n", sizeof...(inds),
mShape.ndim());
for (int i = 0; i < sizeof...(inds); ++i) {
TV_REQUIRE(idxes[i] >= 0 && idxes[i] < mShape[i],
"index-%d(%d) out-of-range: [0, %d)\n", i, idxes[i],
mShape[i]);
}
#endif
return mPtr[rowArrayIdx(mShape, int(inds)...)];
}
template <class... Inds>
TV_HOST_DEVICE_INLINE const T &operator()(Inds... inds) const {
#ifdef TV_DEBUG
int idxes[sizeof...(Inds)]{int(inds)...};
TV_REQUIRE(sizeof...(inds) == mShape.ndim(),
"you provide %d indexes, but dim is %d\n", sizeof...(inds),
mShape.ndim());
for (int i = 0; i < sizeof...(inds); ++i) {
TV_REQUIRE(idxes[i] >= 0 && idxes[i] < mShape[i],
"index-%d(%d) out-of-range: [0, %d)\n", i, idxes[i],
mShape[i]);
}
#endif
return mPtr[rowArrayIdx(mShape, int(inds)...)];
}
TV_HOST_DEVICE_INLINE T &operator()() {
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mPtr != nullptr,
"you want get value but the view is empty.%s", "\n");
TV_DEVICE_REQUIRE(mShape.ndim() == 0,
"you provide 0 indexes, but dim is %ld\n", mShape.ndim());
#else
TV_REQUIRE(mPtr != nullptr, "you want get value but the view is empty.%s",
"\n");
TV_REQUIRE(mShape.ndim() == 0, "you provide 0 indexes, but dim is %ld\n",
mShape.ndim());
#endif
#endif
return mPtr[0];
}
TV_HOST_DEVICE_INLINE const T &operator()() const {
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mPtr != nullptr,
"you want get value but the view is empty.%s", "\n");
TV_DEVICE_REQUIRE(mShape.ndim() == 0,
"you provide 0 indexes, but dim is %ld\n", mShape.ndim());
#else
TV_REQUIRE(mPtr != nullptr, "you want get value but the view is empty.%s",
"\n");
TV_REQUIRE(mShape.ndim() == 0, "you provide 0 indexes, but dim is %ld\n",
mShape.ndim());
#endif
#endif
return mPtr[0];
}
template <class T1>
TV_HOST_DEVICE_INLINE T &operator()(T1 i1) {
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 1,
"you provide 1 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, i1, mShape[0]);
#else
TV_REQUIRE(mShape.ndim() == 1, "you provide 1 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, i1, mShape[0]);
#endif
#endif
return mPtr[i1];
}
template <class T1, class T2>
TV_HOST_DEVICE_INLINE T &operator()(T1 i1, T2 i2) {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 2,
"you provide 2 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
mShape[0]);
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
mShape[1]);
#else
TV_REQUIRE(mShape.ndim() == 2, "you provide 2 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
#endif
#endif
return mPtr[i1 * mShape[1] + i2];
}
template <class T1, class T2, class T3>
TV_HOST_DEVICE_INLINE T &operator()(T1 i1, T2 i2, T3 i3) {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 3,
"you provide 3 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
mShape[0]);
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
mShape[1]);
TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3),
mShape[2]);
#else
TV_REQUIRE(mShape.ndim() == 3, "you provide 3 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
TV_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3), mShape[2]);
#endif
#endif
return mPtr[(i1 * mShape[1] + i2) * mShape[2] + i3];
}
template <class T1, class T2, class T3, class T4>
TV_HOST_DEVICE_INLINE T &operator()(T1 i1, T2 i2, T3 i3, T4 i4) {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 4,
"you provide 4 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
mShape[0]);
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
mShape[1]);
TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3),
mShape[2]);
TV_DEVICE_REQUIRE(i4 >= 0 && i4 < mShape[3],
"index-%d(%d) out-of-range: [0, %d)\n", 3, int(i4),
mShape[3]);
#else
TV_REQUIRE(mShape.ndim() == 4, "you provide 4 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
TV_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3), mShape[2]);
TV_REQUIRE(i4 >= 0 && i4 < mShape[3],
"index-%d(%d) out-of-range: [0, %d)\n", 3, int(i4), mShape[3]);
#endif
#endif
return mPtr[((i1 * mShape[1] + i2) * mShape[2] + i3) * mShape[3] + i4];
}
template <class T1>
TV_HOST_DEVICE_INLINE const T &operator()(T1 i1) const {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 1,
"you provide 1 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
mShape[0]);
#else
TV_REQUIRE(mShape.ndim() == 1, "you provide 1 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
#endif
#endif
return mPtr[i1];
}
template <class T1, class T2>
TV_HOST_DEVICE_INLINE const T &operator()(T1 i1, T2 i2) const {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 2,
"you provide 2 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
mShape[0]);
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
mShape[1]);
#else
TV_REQUIRE(mShape.ndim() == 2, "you provide 2 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
#endif
#endif
return mPtr[i1 * mShape[1] + i2];
}
template <class T1, class T2, class T3>
TV_HOST_DEVICE_INLINE const T &operator()(T1 i1, T2 i2, T3 i3) const {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 3,
"you provide 3 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
mShape[0]);
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
mShape[1]);
TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3),
mShape[2]);
#else
TV_REQUIRE(mShape.ndim() == 3, "you provide 3 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
TV_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3), mShape[2]);
#endif
#endif
return mPtr[(i1 * mShape[1] + i2) * mShape[2] + i3];
}
template <class T1, class T2, class T3, class T4>
TV_HOST_DEVICE_INLINE const T &operator()(T1 i1, T2 i2, T3 i3, T4 i4) const {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 4,
"you provide 4 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
mShape[0]);
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
mShape[1]);
TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3),
mShape[2]);
TV_DEVICE_REQUIRE(i4 >= 0 && i4 < mShape[3],
"index-%d(%d) out-of-range: [0, %d)\n", 3, int(i4),
mShape[3]);
#else
TV_REQUIRE(mShape.ndim() == 4, "you provide 4 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
TV_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3), mShape[2]);
TV_REQUIRE(i4 >= 0 && i4 < mShape[3],
"index-%d(%d) out-of-range: [0, %d)\n", 3, int(i4), mShape[3]);
#endif
#endif
return mPtr[((i1 * mShape[1] + i2) * mShape[2] + i3) * mShape[3] + i4];
}
TV_HOST_DEVICE_INLINE T &operator[](int idx) {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(idx >= 0 && idx < size(),
"index(%d) out-of-range: [0, %ld)\n", int(idx), size());
#else
TV_REQUIRE(idx >= 0 && idx < size(), "index(%d) out-of-range: [0, %ld)\n",
int(idx), size());
#endif
#endif
return mPtr[idx];
}
// TODO: this is conflcit with operator[](SimpleVector<Slice> slice_vec).
/*TV_HOST_DEVICE_INLINE T &operator[](const Shape index) {
int idx = rowArrayIdx(mShape, index);
#ifdef TV_DEBUG
TV_REQUIRE(idx >= 0 && idx < size(), "index(%d) out-of-range: [0, %ld)\n",
int(idx), size());
#endif
return mPtr[idx];
}
TV_HOST_DEVICE_INLINE const T &operator[](const Shape index) const {
int idx = rowArrayIdx(mShape, index);
#ifdef TV_DEBUG
TV_REQUIRE(idx >= 0 && idx < size(), "index(%d) out-of-range: [0, %ld)\n",
int(idx), size());
#endif
return mPtr[idx];
}*/
TV_HOST_DEVICE_INLINE TensorView<T, Rank> operator[](
SimpleVector<Slice> slice_vec) {
return _subview(slice_vec);
}
TV_HOST_DEVICE_INLINE const TensorView<T, Rank> operator[](
SimpleVector<Slice> slice_vec) const {
return _subview(slice_vec);
}
TV_HOST_DEVICE_INLINE bool empty() const { return mPtr == nullptr; }
TV_HOST_DEVICE_INLINE T *data() { return mPtr; }
TV_HOST_DEVICE_INLINE const T *data() const { return mPtr; }
TV_HOST_DEVICE_INLINE const Shape &shape() const { return mShape; }
TV_HOST_DEVICE_INLINE int dim(int idx) const { return mShape[idx]; }
TV_HOST_DEVICE_INLINE int ndim() const { return mShape.ndim(); }
template <class... Inds>
TV_HOST_DEVICE_INLINE TensorView<T, Rank> &reshape(Inds... newShapes) {
Shape shapes{int(newShapes)...};
TV_ASSERT(shapes.size() == size());
mShape = shapes;
return *this;
}
TV_HOST_DEVICE_INLINE TensorView<T, Rank> &reshape(Shape shapes) {
TV_ASSERT(shapes.size() == size());
mShape = shapes;
return *this;
}
template <class... Inds>
TV_HOST_DEVICE_INLINE TensorView<T, Rank> view(Inds... newShapes) const {
Shape shapes{int(newShapes)...};
for (size_t i = 0; i < shapes.ndim(); ++i) {
if (shapes[i] == -1) {
shapes[i] = 1;
shapes[i] = size() / shapes.size();
break;
}
}
TV_ASSERT(shapes.size() == size());
return TensorView<T, Rank>(mPtr, shapes);
}
TV_HOST_DEVICE_INLINE TensorView<T, Rank> view(Shape shapes) const {
TV_ASSERT(shapes.size() == size());
return TensorView<T, Rank>(mPtr, shapes);
}
TV_HOST_DEVICE_INLINE TensorView<T, Rank> squeeze() const {
return TensorView<T, Rank>(mPtr, mShape.squeeze());
}
TV_HOST_DEVICE_INLINE TensorView<T, Rank> squeeze(int dim) const {
return TensorView<T, Rank>(mPtr, mShape.squeeze(dim));
}
TV_HOST_DEVICE_INLINE size_t size() const { return mShape.size(); }
template <class... Slices>
TV_HOST_DEVICE_INLINE TensorView<T, Rank> subview(Slice slice,
Slices... slices) const {
return subview<float, Slice, Slices...>(slice, slices...);
}
template <class T2 = float, class... Slices>
TV_HOST_DEVICE_INLINE TensorView<T, Rank> subview(Slices... slices) const {
Slice slice_vec[sizeof...(Slices)] = {to_slice(slices)...};
Shape new_shape{to_slice(slices)[0]...};
Shape start{to_slice(slices)[0]...};
TV_ASSERT(new_shape.ndim() <= mShape.ndim());
TV_ASSERT(new_shape.ndim() != 0);
size_t idxsize = new_shape.ndim();
for (size_t i = idxsize; i < mShape.ndim(); ++i) {
new_shape.push_back(0);
start.push_back(0);
}
#pragma unroll
for (size_t i = 0; i < sizeof...(Slices); ++i) {
if (slice_vec[i][1] != -1) {
new_shape[i] = slice_vec[i][1] - slice_vec[i][0];
TV_ASSERT(new_shape[i] >= 0);
} else {
new_shape[i] = 1; // reduce dim
}
}
auto offset = rowArrayIdx(mShape, start);
#pragma unroll
for (size_t i = sizeof...(Slices); i < mShape.ndim(); ++i) {
new_shape[i] = mShape[i];
TV_ASSERT(new_shape[i] >= 0);
}
Shape reduced_shape;
#pragma unroll
for (size_t i = 0; i < sizeof...(Slices); ++i) {
if (slice_vec[i][1] != -1) {
reduced_shape.push_back(new_shape[i]);
}
}
#pragma unroll
for (size_t i = sizeof...(Slices); i < mShape.ndim(); ++i) {
reduced_shape.push_back(new_shape[i]);
}
return TensorView<T, Rank>(mPtr + offset, reduced_shape);
}
template <class... Integers>
TV_HOST_DEVICE_INLINE TensorView<T, Rank> subview(int id, Integers... ints) {
Shape start = {id, ints...};
for (int i = 1 + sizeof...(ints); i < ndim(); ++i) {
start.push_back(0);
}
return TensorView<T, Rank>(mPtr + rowArrayIdx(mShape, start),
mShape.subshape(sizeof...(ints) + 1));
}
std::string repr() const {
std::ostringstream ss;
if (empty()) return "";
if (mShape.ndim() == 0) {
ss << *mPtr;
// ss << fmt::format("\nTensor: shape={}, dtype={}", mShape,
// detail::simpleTypeName<T>());
ss << "Tensor: dtype=" << detail::simpleTypeName<T>();
return ss.str();
}
Shape counter = mShape;
auto tensor_flat = this->view(-1);
for (int i = 0; i < counter.ndim(); ++i) {
counter[i] = 0;
ss << "[";
}
for (size_t i = 0; i < this->size(); ++i) {
ss << tensor_flat(rowArrayIdx(mShape, counter));
counter[counter.ndim() - 1] += 1;
int inc_count = 0;
bool print_comma = true;
for (int c = counter.ndim() - 1; c >= 0; --c) {
if (counter[c] == this->dim(c) && c > 0) {
++inc_count;
counter[c - 1] += 1;
counter[c] = 0;
print_comma = false;
}
}
if (print_comma && i != this->size() - 1) ss << ", ";
for (int j = 0; j < inc_count; ++j) {
ss << "]";
}
if (i != this->size() - 1) {
if (inc_count != 0) ss << "\n";
for (int j = 0; j < inc_count; ++j) {
ss << "[";
}
}
}
ss << "]";
// ss << fmt::format("\nTensor: shape={}, dtype={}", mShape,
// detail::simpleTypeName<T>());
ss << "Tensor: dtype=" << detail::simpleTypeName<T>();
return ss.str();
}
protected:
// TODO: make this function public.
// currently this function is called unexpectedly when using subview({0, 0}).
TV_HOST_DEVICE_INLINE TensorView<T, Rank> _subview(
SimpleVector<Slice> slice_vec) {
Shape new_shape;
for (int i = 0; i < slice_vec.size(); ++i) {
new_shape.push_back(slice_vec[i][0]);
}
Shape start = new_shape;
TV_ASSERT(new_shape.ndim() <= mShape.ndim());
TV_ASSERT(new_shape.ndim() != 0);
size_t idxsize = new_shape.ndim();
for (size_t i = idxsize; i < mShape.ndim(); ++i) {
new_shape.push_back(0);
start.push_back(0);
}
for (size_t i = 0; i < slice_vec.size(); ++i) {
if (slice_vec[i][1] != -1) {
new_shape[i] = slice_vec[i][1] - slice_vec[i][0];
TV_ASSERT(new_shape[i] >= 0);
} else {
new_shape[i] = 1; // reduce dim
}
}
auto offset = rowArrayIdx(mShape, start);
for (size_t i = slice_vec.size(); i < mShape.ndim(); ++i) {
new_shape[i] = mShape[i];
TV_ASSERT(new_shape[i] >= 0);
}
Shape reduced_shape;
for (size_t i = 0; i < slice_vec.size(); ++i) {
if (slice_vec[i][1] != -1) {
reduced_shape.push_back(new_shape[i]);
}
}
for (size_t i = slice_vec.size(); i < mShape.ndim(); ++i) {
reduced_shape.push_back(new_shape[i]);
}
return TensorView<T, Rank>(mPtr + offset, reduced_shape);
}
template <typename T1>
TV_HOST_DEVICE_INLINE Slice to_slice(T1 s) const {
return Slice{int(s), -1, -1};
}
TV_HOST_DEVICE_INLINE Slice to_slice(Slice s) const { return Slice(s); }
T *mPtr = nullptr;
Shape mShape;
};
template <typename Os, typename T, int Rank>
Os &operator<<(Os &os, const TensorView<T, Rank> &dt) {
os << dt.repr();
return os;
}
template <typename Os, typename T, int Rank>
Os &operator<<(Os &os, const TensorView<const T, Rank> &dt) {
os << dt.repr();
return os;
}
namespace detail {
template <typename T>
constexpr const char *printfTypeFormat(T val = T());
template <>
constexpr const char *printfTypeFormat(float val) {
return "%.2f";
}
template <>
constexpr const char *printfTypeFormat(double val) {
return "%.2f";
}
template <>
constexpr const char *printfTypeFormat(int val) {
return "%d";
}
template <>
constexpr const char *printfTypeFormat(unsigned val) {
return "%u";
}
template <>
constexpr const char *printfTypeFormat(long val) {
return "%ld";
}
template <>
constexpr const char *printfTypeFormat(unsigned long val) {
return "%lu";
}
}; // namespace detail
template <typename T>
TV_HOST_DEVICE void printTensorView(const TensorView<T> tensor,
const char *format) {
if (tensor.empty()) return;
if (tensor.ndim() == 0) {
printf(format, tensor());
printf("\n");
return;
}
Shape counter = tensor.shape();
auto tensor_flat = tensor.view(-1);
for (int i = 0; i < counter.ndim(); ++i) {
counter[i] = 0;
printf("[");
}
for (size_t i = 0; i < tensor.size(); ++i) {
printf(format, tensor_flat(rowArrayIdx(tensor.shape(), counter)));
counter[counter.ndim() - 1] += 1;
int inc_count = 0;
bool print_comma = true;
for (int c = counter.ndim() - 1; c >= 0; --c) {
if (counter[c] == tensor.dim(c) && c > 0) {
++inc_count;
counter[c - 1] += 1;
counter[c] = 0;
print_comma = false;
}
}
if (print_comma && i != tensor.size() - 1) printf(", ");
for (int j = 0; j < inc_count; ++j) {
printf("]");
}
if (i != tensor.size() - 1) {
if (inc_count != 0) printf("\n");
for (int j = 0; j < inc_count; ++j) {
printf("[");
}
}
}
printf("]\n");
}
template <typename T>
TV_HOST_DEVICE void printTensorView(TensorView<T> tensor) {
using Traw = typename std::remove_const<T>::type;
return printTensorView(tensor, detail::printfTypeFormat<Traw>());
}
template <typename T>
TV_HOST_DEVICE void printTensorView(const T *ptr, Shape shape) {
using Traw = typename std::remove_const<T>::type;
return printTensorView(TensorView<const T>(ptr, shape),
detail::printfTypeFormat<Traw>());
}
template <typename T>
TV_HOST_DEVICE void printTensorView(const T *ptr, Shape shape,
const char *format) {
return printTensorView(TensorView<const T>(ptr, shape), format);
}
} // namespace tv