1164 lines
38 KiB
C++
1164 lines
38 KiB
C++
// Copyright 2019 Yan Yan
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#pragma once
|
|
#include <cuda_runtime_api.h>
|
|
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cstdlib>
|
|
#include <iostream>
|
|
#include <memory>
|
|
// #include <prettyprint.h>
|
|
#include <sstream>
|
|
#include <type_traits>
|
|
#include <vector>
|
|
|
|
namespace tv {
|
|
|
|
#if defined(__NVCC__) || defined(__HIP__)
|
|
#define TV_HOST_DEVICE_INLINE __forceinline__ __device__ __host__
|
|
#define TV_DEVICE_INLINE __forceinline__ __device__
|
|
#define TV_HOST_DEVICE __device__ __host__
|
|
#define TV_ASSERT(expr) assert(expr)
|
|
#elif defined(__CUDACC_RTC__)
|
|
#define TV_ASSERT(expr) assert(expr)
|
|
#define TV_HOST_DEVICE_INLINE __forceinline__ __device__
|
|
#define TV_DEVICE_INLINE __forceinline__ __device__
|
|
#define TV_HOST_DEVICE __device__ __host__
|
|
#else
|
|
#define TV_ASSERT(x) assert(x)
|
|
#define TV_HOST_DEVICE_INLINE inline
|
|
#define TV_HOST_DEVICE
|
|
#endif
|
|
|
|
#define TV_REQUIRE(expr, ...) \
|
|
{ \
|
|
if (!(expr)) { \
|
|
printf(__VA_ARGS__); \
|
|
assert(expr); \
|
|
} \
|
|
}
|
|
|
|
#define TV_DEVICE_REQUIRE(expr, ...) \
|
|
{ \
|
|
if (!(expr) && threadIdx.x == 0) printf(__VA_ARGS__); \
|
|
assert(expr); \
|
|
}
|
|
|
|
template <class SStream, class T>
|
|
void sstream_print(SStream &ss, T val) {
|
|
ss << val;
|
|
}
|
|
|
|
template <class SStream, class T, class... TArgs>
|
|
void sstream_print(SStream &ss, T val, TArgs... args) {
|
|
ss << val << " ";
|
|
sstream_print(ss, args...);
|
|
}
|
|
|
|
#define TV_ASSERT_RT_ERR(expr, ...) \
|
|
{ \
|
|
if (!(expr)) { \
|
|
std::stringstream __macro_s; \
|
|
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
|
|
__macro_s << #expr << " assert faild. "; \
|
|
tv::sstream_print(__macro_s, __VA_ARGS__); \
|
|
throw std::runtime_error(__macro_s.str()); \
|
|
} \
|
|
}
|
|
|
|
#define TV_ASSERT_INVALID_ARG(expr, ...) \
|
|
{ \
|
|
if (!(expr)) { \
|
|
std::stringstream __macro_s; \
|
|
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
|
|
__macro_s << #expr << " assert faild. "; \
|
|
tv::sstream_print(__macro_s, __VA_ARGS__); \
|
|
throw std::invalid_argument(__macro_s.str()); \
|
|
} \
|
|
}
|
|
|
|
#define TV_CHECK_CUDA_ERR() \
|
|
{ \
|
|
auto err = cudaGetLastError(); \
|
|
if (err != cudaSuccess) { \
|
|
std::stringstream __macro_s; \
|
|
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
|
|
__macro_s << "cuda execution failed with error " << err; \
|
|
throw std::runtime_error(__macro_s.str()); \
|
|
} \
|
|
}
|
|
|
|
struct GPU {
|
|
GPU(cudaStream_t s = 0) : mStream(s) {}
|
|
virtual cudaStream_t getStream() const { return mStream; }
|
|
cudaStream_t mStream = 0;
|
|
};
|
|
struct CPU {};
|
|
|
|
#define TV_MAX_DIM 6
|
|
/*
|
|
template <typename T>
|
|
constexpr size_t calc_align(size_t ndim)
|
|
{
|
|
if (ndim * sizeof(T) == 1)
|
|
return 1;
|
|
else if (ndim * sizeof(T) == 2)
|
|
return 2;
|
|
else if (ndim * sizeof(T) <= 4 && ndim * sizeof(T) > 2)
|
|
return 4;
|
|
else if (ndim * sizeof(T) <= 8 && ndim * sizeof(T) > 4)
|
|
return 8;
|
|
else if (ndim * sizeof(T) <= 16 && ndim * sizeof(T) > 8)
|
|
return 16;
|
|
else if (ndim * sizeof(T) <= 32 && ndim * sizeof(T) > 16)
|
|
return 32;
|
|
else
|
|
return 64;
|
|
}
|
|
*/
|
|
template <typename T, size_t MaxDim = TV_MAX_DIM>
|
|
struct /*alignas(calc_align<T>(MaxDim))*/ SimpleVector {
|
|
public:
|
|
TV_HOST_DEVICE_INLINE SimpleVector(){};
|
|
TV_HOST_DEVICE_INLINE SimpleVector(std::initializer_list<T> q) {
|
|
TV_ASSERT(q.size() <= MaxDim);
|
|
mSize = 0;
|
|
for (T s : q) {
|
|
mArray[mSize++] = s;
|
|
}
|
|
mSize = q.size();
|
|
}
|
|
SimpleVector(const std::vector<T> &arr) {
|
|
TV_ASSERT(arr.size() <= MaxDim);
|
|
for (size_t i = 0; i < arr.size(); ++i) {
|
|
mArray[i] = arr[i];
|
|
}
|
|
mSize = arr.size();
|
|
}
|
|
TV_HOST_DEVICE_INLINE SimpleVector(const SimpleVector<T, MaxDim> &arr) {
|
|
TV_ASSERT(arr.size() <= MaxDim);
|
|
for (size_t i = 0; i < arr.size(); ++i) {
|
|
mArray[i] = arr[i];
|
|
}
|
|
mSize = arr.size();
|
|
}
|
|
TV_HOST_DEVICE_INLINE T &operator[](int idx) {
|
|
#ifdef TV_DEBUG
|
|
TV_ASSERT(idx >= 0 && idx < mSize);
|
|
#endif
|
|
return mArray[idx];
|
|
}
|
|
TV_HOST_DEVICE_INLINE const T &operator[](int idx) const {
|
|
#ifdef TV_DEBUG
|
|
TV_ASSERT(idx >= 0 && idx < mSize);
|
|
#endif
|
|
return mArray[idx];
|
|
}
|
|
TV_HOST_DEVICE_INLINE void push_back(T s) {
|
|
#ifdef TV_DEBUG
|
|
TV_ASSERT(mSize < MaxDim);
|
|
#endif
|
|
mArray[mSize] = s;
|
|
mSize++;
|
|
}
|
|
TV_HOST_DEVICE_INLINE void pop_back() {
|
|
#ifdef TV_DEBUG
|
|
TV_ASSERT(mSize > 0);
|
|
#endif
|
|
mSize--;
|
|
}
|
|
|
|
TV_HOST_DEVICE_INLINE size_t size() const { return mSize; }
|
|
TV_HOST_DEVICE_INLINE const T *data() const { return mArray; }
|
|
TV_HOST_DEVICE_INLINE size_t empty() const { return mSize == 0; }
|
|
|
|
typedef size_t size_type;
|
|
|
|
class iterator {
|
|
public:
|
|
typedef iterator self_type;
|
|
typedef T value_type;
|
|
typedef T &reference;
|
|
typedef T *pointer;
|
|
typedef std::forward_iterator_tag iterator_category;
|
|
typedef std::ptrdiff_t difference_type;
|
|
TV_HOST_DEVICE_INLINE iterator(pointer ptr) : ptr_(ptr) {}
|
|
TV_HOST_DEVICE_INLINE self_type operator++(int junk) {
|
|
self_type i = *this;
|
|
ptr_++;
|
|
return i;
|
|
}
|
|
TV_HOST_DEVICE_INLINE self_type operator++() {
|
|
ptr_++;
|
|
return *this;
|
|
}
|
|
TV_HOST_DEVICE_INLINE reference operator*() { return *ptr_; }
|
|
TV_HOST_DEVICE_INLINE pointer operator->() { return ptr_; }
|
|
TV_HOST_DEVICE_INLINE bool operator==(const self_type &rhs) {
|
|
return ptr_ == rhs.ptr_;
|
|
}
|
|
TV_HOST_DEVICE_INLINE bool operator!=(const self_type &rhs) {
|
|
return ptr_ != rhs.ptr_;
|
|
}
|
|
|
|
private:
|
|
pointer ptr_;
|
|
};
|
|
|
|
class const_iterator {
|
|
public:
|
|
typedef const_iterator self_type;
|
|
typedef T value_type;
|
|
typedef const T &reference;
|
|
typedef const T *pointer;
|
|
typedef std::ptrdiff_t difference_type;
|
|
typedef std::forward_iterator_tag iterator_category;
|
|
TV_HOST_DEVICE_INLINE const_iterator(pointer ptr) : ptr_(ptr) {}
|
|
TV_HOST_DEVICE_INLINE self_type operator++(int junk) {
|
|
self_type i = *this;
|
|
ptr_++;
|
|
return i;
|
|
}
|
|
TV_HOST_DEVICE_INLINE self_type operator++() {
|
|
ptr_++;
|
|
return *this;
|
|
}
|
|
TV_HOST_DEVICE_INLINE reference operator*() { return *ptr_; }
|
|
TV_HOST_DEVICE_INLINE pointer operator->() { return ptr_; }
|
|
TV_HOST_DEVICE_INLINE bool operator==(const self_type &rhs) {
|
|
return ptr_ == rhs.ptr_;
|
|
}
|
|
TV_HOST_DEVICE_INLINE bool operator!=(const self_type &rhs) {
|
|
return ptr_ != rhs.ptr_;
|
|
}
|
|
|
|
private:
|
|
pointer ptr_;
|
|
};
|
|
|
|
TV_HOST_DEVICE_INLINE iterator begin() { return iterator(mArray); }
|
|
|
|
TV_HOST_DEVICE_INLINE iterator end() { return iterator(mArray + mSize); }
|
|
|
|
TV_HOST_DEVICE_INLINE const_iterator begin() const {
|
|
return const_iterator(mArray);
|
|
}
|
|
|
|
TV_HOST_DEVICE_INLINE const_iterator end() const {
|
|
return const_iterator(mArray + mSize);
|
|
}
|
|
TV_HOST_DEVICE_INLINE const_iterator cbegin() const {
|
|
return const_iterator(mArray);
|
|
}
|
|
|
|
TV_HOST_DEVICE_INLINE const_iterator cend() const {
|
|
return const_iterator(mArray + mSize);
|
|
}
|
|
|
|
protected:
|
|
T mArray[MaxDim];
|
|
size_t mSize = 0;
|
|
};
|
|
|
|
template <typename T, size_t MaxDim>
|
|
bool operator==(const SimpleVector<T, MaxDim> &lfs,
|
|
const SimpleVector<T, MaxDim> &rfs) {
|
|
if (lfs.size() != rfs.size()) return false;
|
|
for (size_t i = 0; i < lfs.size(); ++i) {
|
|
if (lfs[i] != rfs[i]) return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <typename T, size_t MaxDim>
|
|
bool operator!=(const SimpleVector<T, MaxDim> &lfs,
|
|
const SimpleVector<T, MaxDim> &rfs) {
|
|
return !(lfs == rfs);
|
|
}
|
|
|
|
struct Slice {
|
|
template <class... Integers>
|
|
TV_HOST_DEVICE_INLINE Slice(Integers... ints) {
|
|
static_assert(sizeof...(ints) <= 3, "slice init must smaller than 3");
|
|
SimpleVector<int, 3> slices{int(ints)...};
|
|
mSlices[0] = -1;
|
|
mSlices[1] = -1;
|
|
mSlices[2] = -1;
|
|
for (size_t i = 0; i < slices.size(); ++i) {
|
|
mSlices[i] = slices[i];
|
|
}
|
|
}
|
|
|
|
TV_HOST_DEVICE_INLINE Slice() {
|
|
mSlices[0] = -1;
|
|
mSlices[1] = -1;
|
|
mSlices[2] = -1;
|
|
}
|
|
template <typename T>
|
|
TV_HOST_DEVICE_INLINE Slice(std::initializer_list<T> slice) {
|
|
mSlices[0] = -1;
|
|
mSlices[1] = -1;
|
|
mSlices[2] = -1;
|
|
TV_ASSERT(slice.size() <= 3);
|
|
int idx = 0;
|
|
for (T s : slice) {
|
|
mSlices[idx] = int(s);
|
|
++idx;
|
|
}
|
|
}
|
|
TV_HOST_DEVICE_INLINE int &operator[](int idx) {
|
|
#ifdef TV_DEBUG
|
|
TV_ASSERT(idx >= 0 && idx < 3);
|
|
#endif
|
|
return mSlices[idx];
|
|
}
|
|
TV_HOST_DEVICE_INLINE const int &operator[](int idx) const {
|
|
#ifdef TV_DEBUG
|
|
TV_ASSERT(idx >= 0 && idx < 3);
|
|
#endif
|
|
return mSlices[idx];
|
|
}
|
|
|
|
protected:
|
|
int mSlices[3];
|
|
};
|
|
|
|
template <size_t MaxDim = TV_MAX_DIM>
|
|
struct ShapeBase : public SimpleVector<int, MaxDim> {
|
|
TV_HOST_DEVICE_INLINE ShapeBase() : SimpleVector<int, MaxDim>(){};
|
|
TV_HOST_DEVICE_INLINE ShapeBase(std::initializer_list<int> shape)
|
|
: SimpleVector<int, MaxDim>(shape) {}
|
|
|
|
template <typename T, template <class...> class Container>
|
|
ShapeBase(Container<T> shape) : SimpleVector<int, MaxDim>(shape) {}
|
|
TV_HOST_DEVICE_INLINE ShapeBase(const ShapeBase<MaxDim> &shape)
|
|
: SimpleVector<int, MaxDim>(shape) {}
|
|
ShapeBase(const std::vector<int> &arr) : SimpleVector<int, MaxDim>(arr) {}
|
|
|
|
ShapeBase<MaxDim> &operator=(const ShapeBase<MaxDim> &shape) = default;
|
|
TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> subshape(int start, int end) const {
|
|
#ifdef TV_DEBUG
|
|
TV_ASSERT(start >= 0 && end < this->mSize && end > start);
|
|
#endif
|
|
ShapeBase<MaxDim> shape;
|
|
for (int i = start; i < end; ++i) {
|
|
shape.push_back(this->mArray[i]);
|
|
}
|
|
return shape;
|
|
}
|
|
TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> subshape(int start) const {
|
|
#ifdef TV_DEBUG
|
|
TV_ASSERT(start >= 0 && start <= this->mSize);
|
|
#endif
|
|
ShapeBase<MaxDim> shape;
|
|
for (int i = start; i < this->mSize; ++i) {
|
|
shape.push_back(this->mArray[i]);
|
|
}
|
|
return shape;
|
|
}
|
|
|
|
TV_HOST_DEVICE_INLINE size_t size() const {
|
|
if (this->mSize == 0) return 0;
|
|
size_t s = 1;
|
|
for (int i = 0; i < int(this->mSize); ++i) {
|
|
s *= this->mArray[i];
|
|
}
|
|
return s;
|
|
}
|
|
TV_HOST_DEVICE_INLINE size_t ndim() const { return this->mSize; }
|
|
TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> squeeze() const {
|
|
ShapeBase<MaxDim> shape;
|
|
for (int i = 0; i < this->mSize; ++i) {
|
|
if (this->mArray[i] != 1) shape.push_back(this->mArray[i]);
|
|
}
|
|
return shape;
|
|
}
|
|
TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> squeeze(int dim) const {
|
|
ShapeBase<MaxDim> shape;
|
|
for (int i = 0; i < this->mSize; ++i) {
|
|
if (i != dim || this->mArray[i] != 1) shape.push_back(this->mArray[i]);
|
|
}
|
|
return shape;
|
|
}
|
|
};
|
|
|
|
using Shape = ShapeBase<TV_MAX_DIM>;
|
|
|
|
template <class... Inds>
|
|
TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(std::vector<int> &shape,
|
|
Inds... indexes) {
|
|
unsigned offset = 0;
|
|
unsigned m = 1;
|
|
int indexes_vec[sizeof...(indexes)] = {indexes...};
|
|
#ifdef TV_DEBUG
|
|
TV_ASSERT(sizeof...(indexes) == shape.size());
|
|
#endif
|
|
#pragma unroll
|
|
for (int i = sizeof...(indexes) - 1; i >= 0; --i) {
|
|
offset += m * indexes_vec[i];
|
|
m *= shape[i];
|
|
}
|
|
return offset;
|
|
}
|
|
|
|
TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(std::vector<int> &shape,
|
|
std::vector<int> &indexes_vec) {
|
|
unsigned offset = 0;
|
|
unsigned m = 1;
|
|
for (int i = shape.size() - 1; i >= 0; --i) {
|
|
offset += m * indexes_vec[i];
|
|
m *= shape[i];
|
|
}
|
|
return offset;
|
|
}
|
|
|
|
template <class... Inds>
|
|
TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(const Shape &shape,
|
|
Inds... indexes) {
|
|
unsigned offset = 0;
|
|
unsigned m = 1;
|
|
int indexes_vec[sizeof...(indexes)] = {indexes...};
|
|
#pragma unroll
|
|
for (int i = sizeof...(indexes) - 1; i >= 0; --i) {
|
|
offset += m * indexes_vec[i];
|
|
m *= shape[i];
|
|
}
|
|
return offset;
|
|
}
|
|
|
|
TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(const Shape &shape,
|
|
const Shape &indexes_vec) {
|
|
unsigned offset = 0;
|
|
unsigned m = 1;
|
|
for (int i = indexes_vec.ndim() - 1; i >= 0; --i) {
|
|
offset += m * indexes_vec[i];
|
|
m *= shape[i];
|
|
}
|
|
return offset;
|
|
}
|
|
|
|
template <typename Index, unsigned NDim>
|
|
TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(const Index *indexes,
|
|
const Index *shape) {
|
|
unsigned offset = 0;
|
|
unsigned m = 1;
|
|
#pragma unroll
|
|
for (int i = NDim - 1; i >= 0; --i) {
|
|
offset += m * indexes[i];
|
|
m *= shape[i];
|
|
}
|
|
return offset;
|
|
}
|
|
|
|
template <typename Index, unsigned NDim>
|
|
TV_HOST_DEVICE_INLINE Index rowArrayIdxInv(Index index, Index *output,
|
|
const Index *shape) {
|
|
#pragma unroll
|
|
for (int i = NDim - 1; i >= 0; --i) {
|
|
output[i] = index % shape[i];
|
|
index -= output[i];
|
|
index /= shape[i];
|
|
}
|
|
return index;
|
|
}
|
|
|
|
template <int N>
|
|
struct ArrayIndexRowMajor {
|
|
// mPtr[((i1 * mShape[1] + i2) * mShape[2] + i3) * mShape[3] + i4];
|
|
TV_HOST_DEVICE_INLINE static unsigned run(const Shape &shape,
|
|
const Shape &indexes) {
|
|
return indexes[N - 1] +
|
|
shape[N - 1] * ArrayIndexRowMajor<N - 1>::run(shape, indexes);
|
|
}
|
|
};
|
|
|
|
template <>
|
|
struct ArrayIndexRowMajor<0> {
|
|
TV_HOST_DEVICE_INLINE static unsigned run(const Shape &shape,
|
|
const Shape &indexes) {
|
|
return 0;
|
|
}
|
|
};
|
|
|
|
namespace detail {
|
|
template <typename T>
|
|
constexpr const char *simpleTypeName(T val = T());
|
|
template <>
|
|
constexpr const char *simpleTypeName(float val) {
|
|
return "float32";
|
|
}
|
|
template <>
|
|
constexpr const char *simpleTypeName(double val) {
|
|
return "float64";
|
|
}
|
|
template <>
|
|
constexpr const char *simpleTypeName(int val) {
|
|
return "int32";
|
|
}
|
|
template <>
|
|
constexpr const char *simpleTypeName(unsigned val) {
|
|
return "uint32";
|
|
}
|
|
template <>
|
|
constexpr const char *simpleTypeName(long val) {
|
|
return "int64";
|
|
}
|
|
template <>
|
|
constexpr const char *simpleTypeName(unsigned long val) {
|
|
return "uint64";
|
|
}
|
|
}; // namespace detail
|
|
|
|
template <typename T, int Rank = -1>
|
|
struct TensorView {
|
|
TV_HOST_DEVICE_INLINE TensorView() {}
|
|
explicit TV_HOST_DEVICE_INLINE TensorView(T *ptr, Shape shape)
|
|
: mPtr(ptr), mShape(shape) {}
|
|
// explicit TV_HOST_DEVICE_INLINE TensorView(const
|
|
// TensorView<std::remove_const_t<T>> &tview) : mPtr(tview.data()),
|
|
// mShape(tview.shape()) {}
|
|
template <class... Integers>
|
|
explicit TV_HOST_DEVICE_INLINE TensorView(T *ptr, Integers... shapes)
|
|
: mPtr(ptr) {
|
|
mShape = {int(shapes)...};
|
|
}
|
|
|
|
TV_HOST_DEVICE_INLINE TensorView<T, Rank> &assign(
|
|
const TensorView<T, Rank> &tensor) {
|
|
TV_REQUIRE(tensor.shape() == shape(), "you must provide same input size%s",
|
|
"\n");
|
|
T *ptr = mPtr;
|
|
const T *other_ptr = tensor.data();
|
|
for (size_t i = 0; i < size(); ++i) *(ptr++) = *(other_ptr++);
|
|
return *this;
|
|
}
|
|
|
|
template <typename T1>
|
|
TV_HOST_DEVICE_INLINE TensorView<T, Rank> &assign(
|
|
std::initializer_list<T1> seq) {
|
|
TV_REQUIRE(seq.size() == size(), "you must provide same input size%s",
|
|
"\n");
|
|
T *ptr = mPtr;
|
|
for (const T1 &s : seq) *(ptr++) = T(s);
|
|
return *this;
|
|
}
|
|
|
|
template <class... Inds>
|
|
TV_HOST_DEVICE_INLINE T &operator()(Inds... inds) {
|
|
#ifdef TV_DEBUG
|
|
int idxes[sizeof...(Inds)]{int(inds)...};
|
|
TV_REQUIRE(sizeof...(inds) == mShape.ndim(),
|
|
"you provide %d indexes, but dim is %d\n", sizeof...(inds),
|
|
mShape.ndim());
|
|
for (int i = 0; i < sizeof...(inds); ++i) {
|
|
TV_REQUIRE(idxes[i] >= 0 && idxes[i] < mShape[i],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", i, idxes[i],
|
|
mShape[i]);
|
|
}
|
|
#endif
|
|
return mPtr[rowArrayIdx(mShape, int(inds)...)];
|
|
}
|
|
template <class... Inds>
|
|
TV_HOST_DEVICE_INLINE const T &operator()(Inds... inds) const {
|
|
#ifdef TV_DEBUG
|
|
int idxes[sizeof...(Inds)]{int(inds)...};
|
|
TV_REQUIRE(sizeof...(inds) == mShape.ndim(),
|
|
"you provide %d indexes, but dim is %d\n", sizeof...(inds),
|
|
mShape.ndim());
|
|
for (int i = 0; i < sizeof...(inds); ++i) {
|
|
TV_REQUIRE(idxes[i] >= 0 && idxes[i] < mShape[i],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", i, idxes[i],
|
|
mShape[i]);
|
|
}
|
|
#endif
|
|
return mPtr[rowArrayIdx(mShape, int(inds)...)];
|
|
}
|
|
TV_HOST_DEVICE_INLINE T &operator()() {
|
|
#if defined TV_DEBUG
|
|
#if defined(__CUDA_ARCH__)
|
|
TV_DEVICE_REQUIRE(mPtr != nullptr,
|
|
"you want get value but the view is empty.%s", "\n");
|
|
TV_DEVICE_REQUIRE(mShape.ndim() == 0,
|
|
"you provide 0 indexes, but dim is %ld\n", mShape.ndim());
|
|
#else
|
|
TV_REQUIRE(mPtr != nullptr, "you want get value but the view is empty.%s",
|
|
"\n");
|
|
TV_REQUIRE(mShape.ndim() == 0, "you provide 0 indexes, but dim is %ld\n",
|
|
mShape.ndim());
|
|
#endif
|
|
#endif
|
|
return mPtr[0];
|
|
}
|
|
TV_HOST_DEVICE_INLINE const T &operator()() const {
|
|
#if defined TV_DEBUG
|
|
#if defined(__CUDA_ARCH__)
|
|
TV_DEVICE_REQUIRE(mPtr != nullptr,
|
|
"you want get value but the view is empty.%s", "\n");
|
|
TV_DEVICE_REQUIRE(mShape.ndim() == 0,
|
|
"you provide 0 indexes, but dim is %ld\n", mShape.ndim());
|
|
#else
|
|
TV_REQUIRE(mPtr != nullptr, "you want get value but the view is empty.%s",
|
|
"\n");
|
|
TV_REQUIRE(mShape.ndim() == 0, "you provide 0 indexes, but dim is %ld\n",
|
|
mShape.ndim());
|
|
#endif
|
|
#endif
|
|
return mPtr[0];
|
|
}
|
|
|
|
template <class T1>
|
|
TV_HOST_DEVICE_INLINE T &operator()(T1 i1) {
|
|
#if defined TV_DEBUG
|
|
#if defined(__CUDA_ARCH__)
|
|
TV_DEVICE_REQUIRE(mShape.ndim() == 1,
|
|
"you provide 1 indexes, but dim is %ld\n", mShape.ndim());
|
|
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, i1, mShape[0]);
|
|
#else
|
|
TV_REQUIRE(mShape.ndim() == 1, "you provide 1 indexes, but dim is %ld\n",
|
|
mShape.ndim());
|
|
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, i1, mShape[0]);
|
|
#endif
|
|
#endif
|
|
return mPtr[i1];
|
|
}
|
|
template <class T1, class T2>
|
|
TV_HOST_DEVICE_INLINE T &operator()(T1 i1, T2 i2) {
|
|
#ifdef TV_DEBUG
|
|
#if defined(__CUDA_ARCH__)
|
|
TV_DEVICE_REQUIRE(mShape.ndim() == 2,
|
|
"you provide 2 indexes, but dim is %ld\n", mShape.ndim());
|
|
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
|
|
mShape[0]);
|
|
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
|
|
mShape[1]);
|
|
#else
|
|
TV_REQUIRE(mShape.ndim() == 2, "you provide 2 indexes, but dim is %ld\n",
|
|
mShape.ndim());
|
|
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
|
|
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
|
|
#endif
|
|
#endif
|
|
return mPtr[i1 * mShape[1] + i2];
|
|
}
|
|
template <class T1, class T2, class T3>
|
|
TV_HOST_DEVICE_INLINE T &operator()(T1 i1, T2 i2, T3 i3) {
|
|
#ifdef TV_DEBUG
|
|
#if defined(__CUDA_ARCH__)
|
|
TV_DEVICE_REQUIRE(mShape.ndim() == 3,
|
|
"you provide 3 indexes, but dim is %ld\n", mShape.ndim());
|
|
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
|
|
mShape[0]);
|
|
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
|
|
mShape[1]);
|
|
TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3),
|
|
mShape[2]);
|
|
#else
|
|
TV_REQUIRE(mShape.ndim() == 3, "you provide 3 indexes, but dim is %ld\n",
|
|
mShape.ndim());
|
|
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
|
|
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
|
|
TV_REQUIRE(i3 >= 0 && i3 < mShape[2],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3), mShape[2]);
|
|
#endif
|
|
#endif
|
|
return mPtr[(i1 * mShape[1] + i2) * mShape[2] + i3];
|
|
}
|
|
template <class T1, class T2, class T3, class T4>
|
|
TV_HOST_DEVICE_INLINE T &operator()(T1 i1, T2 i2, T3 i3, T4 i4) {
|
|
#ifdef TV_DEBUG
|
|
#if defined(__CUDA_ARCH__)
|
|
TV_DEVICE_REQUIRE(mShape.ndim() == 4,
|
|
"you provide 4 indexes, but dim is %ld\n", mShape.ndim());
|
|
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
|
|
mShape[0]);
|
|
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
|
|
mShape[1]);
|
|
TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3),
|
|
mShape[2]);
|
|
TV_DEVICE_REQUIRE(i4 >= 0 && i4 < mShape[3],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 3, int(i4),
|
|
mShape[3]);
|
|
#else
|
|
TV_REQUIRE(mShape.ndim() == 4, "you provide 4 indexes, but dim is %ld\n",
|
|
mShape.ndim());
|
|
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
|
|
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
|
|
TV_REQUIRE(i3 >= 0 && i3 < mShape[2],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3), mShape[2]);
|
|
TV_REQUIRE(i4 >= 0 && i4 < mShape[3],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 3, int(i4), mShape[3]);
|
|
#endif
|
|
#endif
|
|
return mPtr[((i1 * mShape[1] + i2) * mShape[2] + i3) * mShape[3] + i4];
|
|
}
|
|
|
|
template <class T1>
|
|
TV_HOST_DEVICE_INLINE const T &operator()(T1 i1) const {
|
|
#ifdef TV_DEBUG
|
|
#if defined(__CUDA_ARCH__)
|
|
TV_DEVICE_REQUIRE(mShape.ndim() == 1,
|
|
"you provide 1 indexes, but dim is %ld\n", mShape.ndim());
|
|
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
|
|
mShape[0]);
|
|
#else
|
|
TV_REQUIRE(mShape.ndim() == 1, "you provide 1 indexes, but dim is %ld\n",
|
|
mShape.ndim());
|
|
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
|
|
#endif
|
|
#endif
|
|
return mPtr[i1];
|
|
}
|
|
template <class T1, class T2>
|
|
TV_HOST_DEVICE_INLINE const T &operator()(T1 i1, T2 i2) const {
|
|
#ifdef TV_DEBUG
|
|
#if defined(__CUDA_ARCH__)
|
|
TV_DEVICE_REQUIRE(mShape.ndim() == 2,
|
|
"you provide 2 indexes, but dim is %ld\n", mShape.ndim());
|
|
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
|
|
mShape[0]);
|
|
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
|
|
mShape[1]);
|
|
#else
|
|
TV_REQUIRE(mShape.ndim() == 2, "you provide 2 indexes, but dim is %ld\n",
|
|
mShape.ndim());
|
|
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
|
|
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
|
|
|
|
#endif
|
|
#endif
|
|
return mPtr[i1 * mShape[1] + i2];
|
|
}
|
|
template <class T1, class T2, class T3>
|
|
TV_HOST_DEVICE_INLINE const T &operator()(T1 i1, T2 i2, T3 i3) const {
|
|
#ifdef TV_DEBUG
|
|
#if defined(__CUDA_ARCH__)
|
|
TV_DEVICE_REQUIRE(mShape.ndim() == 3,
|
|
"you provide 3 indexes, but dim is %ld\n", mShape.ndim());
|
|
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
|
|
mShape[0]);
|
|
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
|
|
mShape[1]);
|
|
TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3),
|
|
mShape[2]);
|
|
#else
|
|
TV_REQUIRE(mShape.ndim() == 3, "you provide 3 indexes, but dim is %ld\n",
|
|
mShape.ndim());
|
|
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
|
|
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
|
|
TV_REQUIRE(i3 >= 0 && i3 < mShape[2],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3), mShape[2]);
|
|
#endif
|
|
#endif
|
|
return mPtr[(i1 * mShape[1] + i2) * mShape[2] + i3];
|
|
}
|
|
template <class T1, class T2, class T3, class T4>
|
|
TV_HOST_DEVICE_INLINE const T &operator()(T1 i1, T2 i2, T3 i3, T4 i4) const {
|
|
#ifdef TV_DEBUG
|
|
#if defined(__CUDA_ARCH__)
|
|
TV_DEVICE_REQUIRE(mShape.ndim() == 4,
|
|
"you provide 4 indexes, but dim is %ld\n", mShape.ndim());
|
|
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
|
|
mShape[0]);
|
|
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
|
|
mShape[1]);
|
|
TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3),
|
|
mShape[2]);
|
|
TV_DEVICE_REQUIRE(i4 >= 0 && i4 < mShape[3],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 3, int(i4),
|
|
mShape[3]);
|
|
#else
|
|
TV_REQUIRE(mShape.ndim() == 4, "you provide 4 indexes, but dim is %ld\n",
|
|
mShape.ndim());
|
|
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
|
|
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
|
|
TV_REQUIRE(i3 >= 0 && i3 < mShape[2],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3), mShape[2]);
|
|
TV_REQUIRE(i4 >= 0 && i4 < mShape[3],
|
|
"index-%d(%d) out-of-range: [0, %d)\n", 3, int(i4), mShape[3]);
|
|
#endif
|
|
#endif
|
|
return mPtr[((i1 * mShape[1] + i2) * mShape[2] + i3) * mShape[3] + i4];
|
|
}
|
|
|
|
TV_HOST_DEVICE_INLINE T &operator[](int idx) {
|
|
#ifdef TV_DEBUG
|
|
#if defined(__CUDA_ARCH__)
|
|
TV_DEVICE_REQUIRE(idx >= 0 && idx < size(),
|
|
"index(%d) out-of-range: [0, %ld)\n", int(idx), size());
|
|
#else
|
|
TV_REQUIRE(idx >= 0 && idx < size(), "index(%d) out-of-range: [0, %ld)\n",
|
|
int(idx), size());
|
|
#endif
|
|
#endif
|
|
return mPtr[idx];
|
|
}
|
|
// TODO: this is conflcit with operator[](SimpleVector<Slice> slice_vec).
|
|
/*TV_HOST_DEVICE_INLINE T &operator[](const Shape index) {
|
|
int idx = rowArrayIdx(mShape, index);
|
|
#ifdef TV_DEBUG
|
|
TV_REQUIRE(idx >= 0 && idx < size(), "index(%d) out-of-range: [0, %ld)\n",
|
|
int(idx), size());
|
|
#endif
|
|
return mPtr[idx];
|
|
}
|
|
TV_HOST_DEVICE_INLINE const T &operator[](const Shape index) const {
|
|
int idx = rowArrayIdx(mShape, index);
|
|
#ifdef TV_DEBUG
|
|
TV_REQUIRE(idx >= 0 && idx < size(), "index(%d) out-of-range: [0, %ld)\n",
|
|
int(idx), size());
|
|
#endif
|
|
return mPtr[idx];
|
|
}*/
|
|
TV_HOST_DEVICE_INLINE TensorView<T, Rank> operator[](
|
|
SimpleVector<Slice> slice_vec) {
|
|
return _subview(slice_vec);
|
|
}
|
|
TV_HOST_DEVICE_INLINE const TensorView<T, Rank> operator[](
|
|
SimpleVector<Slice> slice_vec) const {
|
|
return _subview(slice_vec);
|
|
}
|
|
TV_HOST_DEVICE_INLINE bool empty() const { return mPtr == nullptr; }
|
|
TV_HOST_DEVICE_INLINE T *data() { return mPtr; }
|
|
TV_HOST_DEVICE_INLINE const T *data() const { return mPtr; }
|
|
TV_HOST_DEVICE_INLINE const Shape &shape() const { return mShape; }
|
|
TV_HOST_DEVICE_INLINE int dim(int idx) const { return mShape[idx]; }
|
|
TV_HOST_DEVICE_INLINE int ndim() const { return mShape.ndim(); }
|
|
template <class... Inds>
|
|
TV_HOST_DEVICE_INLINE TensorView<T, Rank> &reshape(Inds... newShapes) {
|
|
Shape shapes{int(newShapes)...};
|
|
TV_ASSERT(shapes.size() == size());
|
|
mShape = shapes;
|
|
return *this;
|
|
}
|
|
TV_HOST_DEVICE_INLINE TensorView<T, Rank> &reshape(Shape shapes) {
|
|
TV_ASSERT(shapes.size() == size());
|
|
mShape = shapes;
|
|
return *this;
|
|
}
|
|
template <class... Inds>
|
|
TV_HOST_DEVICE_INLINE TensorView<T, Rank> view(Inds... newShapes) const {
|
|
Shape shapes{int(newShapes)...};
|
|
for (size_t i = 0; i < shapes.ndim(); ++i) {
|
|
if (shapes[i] == -1) {
|
|
shapes[i] = 1;
|
|
shapes[i] = size() / shapes.size();
|
|
break;
|
|
}
|
|
}
|
|
TV_ASSERT(shapes.size() == size());
|
|
return TensorView<T, Rank>(mPtr, shapes);
|
|
}
|
|
TV_HOST_DEVICE_INLINE TensorView<T, Rank> view(Shape shapes) const {
|
|
TV_ASSERT(shapes.size() == size());
|
|
return TensorView<T, Rank>(mPtr, shapes);
|
|
}
|
|
TV_HOST_DEVICE_INLINE TensorView<T, Rank> squeeze() const {
|
|
return TensorView<T, Rank>(mPtr, mShape.squeeze());
|
|
}
|
|
TV_HOST_DEVICE_INLINE TensorView<T, Rank> squeeze(int dim) const {
|
|
return TensorView<T, Rank>(mPtr, mShape.squeeze(dim));
|
|
}
|
|
TV_HOST_DEVICE_INLINE size_t size() const { return mShape.size(); }
|
|
|
|
template <class... Slices>
|
|
TV_HOST_DEVICE_INLINE TensorView<T, Rank> subview(Slice slice,
|
|
Slices... slices) const {
|
|
return subview<float, Slice, Slices...>(slice, slices...);
|
|
}
|
|
template <class T2 = float, class... Slices>
|
|
TV_HOST_DEVICE_INLINE TensorView<T, Rank> subview(Slices... slices) const {
|
|
Slice slice_vec[sizeof...(Slices)] = {to_slice(slices)...};
|
|
Shape new_shape{to_slice(slices)[0]...};
|
|
Shape start{to_slice(slices)[0]...};
|
|
TV_ASSERT(new_shape.ndim() <= mShape.ndim());
|
|
TV_ASSERT(new_shape.ndim() != 0);
|
|
size_t idxsize = new_shape.ndim();
|
|
for (size_t i = idxsize; i < mShape.ndim(); ++i) {
|
|
new_shape.push_back(0);
|
|
start.push_back(0);
|
|
}
|
|
#pragma unroll
|
|
for (size_t i = 0; i < sizeof...(Slices); ++i) {
|
|
if (slice_vec[i][1] != -1) {
|
|
new_shape[i] = slice_vec[i][1] - slice_vec[i][0];
|
|
TV_ASSERT(new_shape[i] >= 0);
|
|
} else {
|
|
new_shape[i] = 1; // reduce dim
|
|
}
|
|
}
|
|
auto offset = rowArrayIdx(mShape, start);
|
|
#pragma unroll
|
|
for (size_t i = sizeof...(Slices); i < mShape.ndim(); ++i) {
|
|
new_shape[i] = mShape[i];
|
|
TV_ASSERT(new_shape[i] >= 0);
|
|
}
|
|
Shape reduced_shape;
|
|
#pragma unroll
|
|
for (size_t i = 0; i < sizeof...(Slices); ++i) {
|
|
if (slice_vec[i][1] != -1) {
|
|
reduced_shape.push_back(new_shape[i]);
|
|
}
|
|
}
|
|
#pragma unroll
|
|
for (size_t i = sizeof...(Slices); i < mShape.ndim(); ++i) {
|
|
reduced_shape.push_back(new_shape[i]);
|
|
}
|
|
return TensorView<T, Rank>(mPtr + offset, reduced_shape);
|
|
}
|
|
|
|
template <class... Integers>
|
|
TV_HOST_DEVICE_INLINE TensorView<T, Rank> subview(int id, Integers... ints) {
|
|
Shape start = {id, ints...};
|
|
for (int i = 1 + sizeof...(ints); i < ndim(); ++i) {
|
|
start.push_back(0);
|
|
}
|
|
return TensorView<T, Rank>(mPtr + rowArrayIdx(mShape, start),
|
|
mShape.subshape(sizeof...(ints) + 1));
|
|
}
|
|
|
|
std::string repr() const {
|
|
std::ostringstream ss;
|
|
if (empty()) return "";
|
|
if (mShape.ndim() == 0) {
|
|
ss << *mPtr;
|
|
// ss << fmt::format("\nTensor: shape={}, dtype={}", mShape,
|
|
// detail::simpleTypeName<T>());
|
|
ss << "Tensor: dtype=" << detail::simpleTypeName<T>();
|
|
return ss.str();
|
|
}
|
|
Shape counter = mShape;
|
|
auto tensor_flat = this->view(-1);
|
|
for (int i = 0; i < counter.ndim(); ++i) {
|
|
counter[i] = 0;
|
|
ss << "[";
|
|
}
|
|
for (size_t i = 0; i < this->size(); ++i) {
|
|
ss << tensor_flat(rowArrayIdx(mShape, counter));
|
|
counter[counter.ndim() - 1] += 1;
|
|
int inc_count = 0;
|
|
bool print_comma = true;
|
|
for (int c = counter.ndim() - 1; c >= 0; --c) {
|
|
if (counter[c] == this->dim(c) && c > 0) {
|
|
++inc_count;
|
|
counter[c - 1] += 1;
|
|
counter[c] = 0;
|
|
print_comma = false;
|
|
}
|
|
}
|
|
if (print_comma && i != this->size() - 1) ss << ", ";
|
|
for (int j = 0; j < inc_count; ++j) {
|
|
ss << "]";
|
|
}
|
|
if (i != this->size() - 1) {
|
|
if (inc_count != 0) ss << "\n";
|
|
for (int j = 0; j < inc_count; ++j) {
|
|
ss << "[";
|
|
}
|
|
}
|
|
}
|
|
ss << "]";
|
|
// ss << fmt::format("\nTensor: shape={}, dtype={}", mShape,
|
|
// detail::simpleTypeName<T>());
|
|
ss << "Tensor: dtype=" << detail::simpleTypeName<T>();
|
|
return ss.str();
|
|
}
|
|
|
|
protected:
|
|
// TODO: make this function public.
|
|
// currently this function is called unexpectedly when using subview({0, 0}).
|
|
TV_HOST_DEVICE_INLINE TensorView<T, Rank> _subview(
|
|
SimpleVector<Slice> slice_vec) {
|
|
Shape new_shape;
|
|
for (int i = 0; i < slice_vec.size(); ++i) {
|
|
new_shape.push_back(slice_vec[i][0]);
|
|
}
|
|
Shape start = new_shape;
|
|
TV_ASSERT(new_shape.ndim() <= mShape.ndim());
|
|
TV_ASSERT(new_shape.ndim() != 0);
|
|
size_t idxsize = new_shape.ndim();
|
|
for (size_t i = idxsize; i < mShape.ndim(); ++i) {
|
|
new_shape.push_back(0);
|
|
start.push_back(0);
|
|
}
|
|
for (size_t i = 0; i < slice_vec.size(); ++i) {
|
|
if (slice_vec[i][1] != -1) {
|
|
new_shape[i] = slice_vec[i][1] - slice_vec[i][0];
|
|
TV_ASSERT(new_shape[i] >= 0);
|
|
} else {
|
|
new_shape[i] = 1; // reduce dim
|
|
}
|
|
}
|
|
auto offset = rowArrayIdx(mShape, start);
|
|
for (size_t i = slice_vec.size(); i < mShape.ndim(); ++i) {
|
|
new_shape[i] = mShape[i];
|
|
TV_ASSERT(new_shape[i] >= 0);
|
|
}
|
|
Shape reduced_shape;
|
|
for (size_t i = 0; i < slice_vec.size(); ++i) {
|
|
if (slice_vec[i][1] != -1) {
|
|
reduced_shape.push_back(new_shape[i]);
|
|
}
|
|
}
|
|
for (size_t i = slice_vec.size(); i < mShape.ndim(); ++i) {
|
|
reduced_shape.push_back(new_shape[i]);
|
|
}
|
|
return TensorView<T, Rank>(mPtr + offset, reduced_shape);
|
|
}
|
|
template <typename T1>
|
|
TV_HOST_DEVICE_INLINE Slice to_slice(T1 s) const {
|
|
return Slice{int(s), -1, -1};
|
|
}
|
|
|
|
TV_HOST_DEVICE_INLINE Slice to_slice(Slice s) const { return Slice(s); }
|
|
|
|
T *mPtr = nullptr;
|
|
Shape mShape;
|
|
};
|
|
|
|
template <typename Os, typename T, int Rank>
|
|
Os &operator<<(Os &os, const TensorView<T, Rank> &dt) {
|
|
os << dt.repr();
|
|
return os;
|
|
}
|
|
|
|
template <typename Os, typename T, int Rank>
|
|
Os &operator<<(Os &os, const TensorView<const T, Rank> &dt) {
|
|
os << dt.repr();
|
|
return os;
|
|
}
|
|
|
|
namespace detail {
|
|
template <typename T>
|
|
constexpr const char *printfTypeFormat(T val = T());
|
|
template <>
|
|
constexpr const char *printfTypeFormat(float val) {
|
|
return "%.2f";
|
|
}
|
|
template <>
|
|
constexpr const char *printfTypeFormat(double val) {
|
|
return "%.2f";
|
|
}
|
|
template <>
|
|
constexpr const char *printfTypeFormat(int val) {
|
|
return "%d";
|
|
}
|
|
template <>
|
|
constexpr const char *printfTypeFormat(unsigned val) {
|
|
return "%u";
|
|
}
|
|
template <>
|
|
constexpr const char *printfTypeFormat(long val) {
|
|
return "%ld";
|
|
}
|
|
template <>
|
|
constexpr const char *printfTypeFormat(unsigned long val) {
|
|
return "%lu";
|
|
}
|
|
}; // namespace detail
|
|
|
|
template <typename T>
|
|
TV_HOST_DEVICE void printTensorView(const TensorView<T> tensor,
|
|
const char *format) {
|
|
if (tensor.empty()) return;
|
|
if (tensor.ndim() == 0) {
|
|
printf(format, tensor());
|
|
printf("\n");
|
|
return;
|
|
}
|
|
Shape counter = tensor.shape();
|
|
auto tensor_flat = tensor.view(-1);
|
|
for (int i = 0; i < counter.ndim(); ++i) {
|
|
counter[i] = 0;
|
|
printf("[");
|
|
}
|
|
for (size_t i = 0; i < tensor.size(); ++i) {
|
|
printf(format, tensor_flat(rowArrayIdx(tensor.shape(), counter)));
|
|
counter[counter.ndim() - 1] += 1;
|
|
int inc_count = 0;
|
|
bool print_comma = true;
|
|
for (int c = counter.ndim() - 1; c >= 0; --c) {
|
|
if (counter[c] == tensor.dim(c) && c > 0) {
|
|
++inc_count;
|
|
counter[c - 1] += 1;
|
|
counter[c] = 0;
|
|
print_comma = false;
|
|
}
|
|
}
|
|
if (print_comma && i != tensor.size() - 1) printf(", ");
|
|
for (int j = 0; j < inc_count; ++j) {
|
|
printf("]");
|
|
}
|
|
if (i != tensor.size() - 1) {
|
|
if (inc_count != 0) printf("\n");
|
|
for (int j = 0; j < inc_count; ++j) {
|
|
printf("[");
|
|
}
|
|
}
|
|
}
|
|
printf("]\n");
|
|
}
|
|
|
|
template <typename T>
|
|
TV_HOST_DEVICE void printTensorView(TensorView<T> tensor) {
|
|
using Traw = typename std::remove_const<T>::type;
|
|
return printTensorView(tensor, detail::printfTypeFormat<Traw>());
|
|
}
|
|
template <typename T>
|
|
TV_HOST_DEVICE void printTensorView(const T *ptr, Shape shape) {
|
|
using Traw = typename std::remove_const<T>::type;
|
|
return printTensorView(TensorView<const T>(ptr, shape),
|
|
detail::printfTypeFormat<Traw>());
|
|
}
|
|
template <typename T>
|
|
TV_HOST_DEVICE void printTensorView(const T *ptr, Shape shape,
|
|
const char *format) {
|
|
return printTensorView(TensorView<const T>(ptr, shape), format);
|
|
}
|
|
|
|
} // namespace tv
|