Welcome to MShadow-Notes’s documentation!¶
Tensor.h¶
./tensor.h
consists of header file of tensor data structure and functions
- CPU and GPU
- Shape
- Stream
- TRValue
:public expr::RValueExp<Container, DType>
- Tensor
:public TRValue<Tensor<Device, dimension, DType>, Device, dimension, DType>
- Tensor<Device, 1, DType>
:public TRValue<Tensor<Device, 1, DType>, Device, 1, DType>
CPU and GPU¶
The struct cpu
and gpu
here is to guarantee the success of checkings before final evaluation.
// device name CPU
struct cpu {
// whether this device is CPU or not
static const bool kDevCPU = true;
// device flag number, identifies this device
static const int kDevMask = 1 << 0;
};
// device name GPU
struct gpu {
// whether this device is CPU or not
static const bool kDevCPU = false;
// device flag number, identifies this device
static const int kDevMask = 1 << 1;
};
Shape¶
The struct Shape
here is to provide a flexible component for the construction of class Tensor.
Member variables¶
The kDimension
defines the dimension of current shape, while the kSubdim
can be used to infer
the lowest dimension in the Tensor.
static const int kDimension = dimension;
static const int kSubdim = dimension - 1;
With the information of kDimension
, we can construct an unsigned array shape_
to store the shape of Tensor.
// typedef unsigned index_t;
index_t shape_[kDimension];
Constructor¶
After the difinition of three member variables, we can write the constructor to create a struct Shape
.
// default constructor, do nothing
MSHADOW_XINLINE Shape(void) {}
// constuctor
MSHADOW_XINLINE Shape(const Shape<kDimension> &s) {
#pragma unroll
for (int i = 0; i < kDimension; ++i) {
this->shape_[i] = s[i];
}
}
The code const Shape<kDimension> &s
as a variable of constructor makes sure that only struct Shape
with
same kDimension
can be allowed to initiate the new ones.
Overloaded Operator¶
Operator []
is overloaded to return the sub-dimension of required index.
MSHADOW_XINLINE index_t &operator[](index_t idx) {
return shape_[idx];
}
// the returned value is constant
MSHADOW_XINLINE const index_t &operator[](index_t idx) const {
return shape_[idx];
}
Operator ==
and !=
are overloaded to check whether two struct Shape
are same.
// Shape<kDimension> implicitly check whether the input variable has the same size
MSHADOW_XINLINE bool operator==(const Shape<kDimension> &s) const {
#pragma unroll
for (int i = 0; i < kDimension; ++i) {
if (s.shape_[i] != this->shape_[i]) return false;
}
return true;
}
// return whether two shape not equal
// s: the shape to compare against
MSHADOW_XINLINE bool operator!=(const Shape<kDimension> &s) const {
return !(*this == s);
}
Operator <<
is overloaded to output the shape_
of Shape
.
template<int dim>
friend std::ostream &operator<<(std::ostream &os, const Shape<dim> &shape);
Above is only a declaration, while its definition is in ./tensor_cpu-inl.h
.
template<int ndim>
inline std::ostream &operator<<(std::ostream &os, const Shape<ndim> &shape) {
os << '(';
for (int i = 0; i < ndim; ++i) {
if (i != 0) os << ',';
os << shape[i];
}
// python style tuple
if (ndim == 1) os << ',';
os << ')';
return os;
}
Member Functions¶
Size¶
The size
function returns the product of all sub-dimensions. e.g. (5,3,6) -> 90
MSHADOW_XINLINE size_t Size(void) const {
size_t size = this->shape_[0];
#pragma unroll
for (int i = 1; i < kDimension; ++i) {
size *= this->shape_[i];
}
return size;
}
FlatTo1D¶
The FlatTo1D
function returns a Shape
with kDimension=1
and its dimension equals to
the product of original Shape
. e.g. (5,3,6) -> (90)
MSHADOW_XINLINE Shape<1> FlatTo1D(void) const {
Shape<1> s;
s[0] = this->Size();
return s;
}
FlatTo2D¶
The FlatTo2D
function returns a Shape
with kDimension=2
and its first dimension equals
to the product of original Shape
instead of the lowest dimension, which is put into its second
dimension.
the reason of doing so will be considered and explained later
MSHADOW_XINLINE Shape<2> FlatTo2D(void) const {
Shape<2> s;
s.shape_[1] = this->shape_[kDimension - 1];
index_t ymax = 1;
#pragma unroll
for (int i = 0; i < kDimension - 1; ++i) {
ymax *= this->shape_[i];
}
s.shape_[0] = ymax;
return s;
}
ProdShape¶
The function ProdShape
returns the product of shape in range [dimstart, dimend)
.
MSHADOW_XINLINE index_t ProdShape(int dimstart, int dimend) const {
index_t num = 1;
#pragma unroll
for (int i = dimstart; i < dimend; ++i) {
num *= this->shape_[i];
}
return num;
}
SubShape¶
The function SubShape
return a new shape, whose kDimension
is the minus 1
of original
one. e.g. (3,2,6,4) -> (2,6,4)
Since it is majorly built for cuda, its effectiveness will be considered and explained later
MSHADOW_XINLINE Shape<kSubdim> SubShape(void) const {
Shape<kSubdim> s;
// for cuda
#pragma unroll
for (int i = 0; i < kSubdim; ++i) {
s.shape_[i] = this->shape_[i + 1];
}
return s;
}
Slice¶
The function Slice
return a new shape, whose kDimension
is the dimend-dimstart
of original
one.
template<int dimstart, int dimend>
MSHADOW_XINLINE Shape<dimend - dimstart> Slice(void) const {
Shape<dimend - dimstart> s;
#pragma unroll
for (int i = dimstart; i < dimend; ++i) {
s[i - dimstart] = this->shape_[i];
}
return s;
}
It can used in the following way, e.g. (3,4,5,6,7) -> (5,6,7)
.
// usage
Shape<5> ss = Shape5(3,4,5,6,7);
Shape<3> sss = ss.Slice<2,5>();
cout << sss <<endl;
// output (5,6,7)
Useful Construction¶
According to the usage
instruction above, we introduce several construction functions
for struct Shape
as APIs.
// useful construction functions to generate shape
MSHADOW_XINLINE Shape<1> Shape1(index_t s0) {
Shape<1> s; s[0] = s0;
return s;
}
MSHADOW_XINLINE Shape<2> Shape2(index_t s0, index_t s1) {
Shape<2> s; s[0] = s0; s[1] = s1;
return s;
}
MSHADOW_XINLINE Shape<3> Shape3(index_t s0, index_t s1, index_t s2) {
Shape<3> s;
s[0] = s0; s[1] = s1; s[2] = s2;
return s;
}
MSHADOW_XINLINE Shape<4> Shape4(index_t s0, index_t s1,
index_t s2, index_t s3) {
Shape<4> s;
s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
return s;
}
MSHADOW_XINLINE Shape<5> Shape5(index_t s0, index_t s1, index_t s2,
index_t s3, index_t s4) {
Shape<5> s;
s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; s[4] = s4;
return s;
}
Stream¶
The Stream
here is only a dummy implementation for CPU, we left it for further discussion when
we run into the implementation of GPU.
template<typename Device>
struct Stream {
// this is only a dummy implementation for CPU
// for GPU, the actual implementation will be specialized in tensor_gpu-inl.h
//wait for all the computation associated with this stream to complete
inline void Wait(void) {}
// query whether the the stream is idle
// return true if the stream is idle and all the job have been completed
inline bool CheckIdle(void) {
return true;
}
// create a blas handle
inline void CreateBlasHandle() {}
};
TRValue¶
:public expr::RValueExp<Container, DType>
This is Tensor RValue, which is also the super type of all kinds of possible tensors.
The meaning of its existence is essential for the understanding of MShadow framework, thus
we postpone its discussion into the core part of this tutorial
template<typename Container, typename Device, int dimension, typename DType>
struct TRValue: public expr::RValueExp<Container, DType> {
};
Tensor¶
:public TRValue<Tensor<Device, dimension, DType>, Device, dimension, DType>
The struct Tensor
is the key element in MShadow.
//in /mshadow/base.h
//#ifndef MSHADOW_DEFAULT_DTYPE
//#define MSHADOW_DEFAULT_DTYPE = default_real_t
//#endif
//typedef float default_real_t;
//as a result, the default data type of Tensor is float
template<typename Device, int dimension, typename DType MSHADOW_DEFAULT_DTYPE>
struct Tensor: public TRValue<Tensor<Device, dimension, DType>, Device, dimension, DType> {
};
// Trival Usage
Tensor<cpu, 3> ts(data, Shape3(2,5,2));
Member Variables¶
The variable kDevCPU
indicates in which type of device the data are stored. And the kSubdim
is
same as in the struct Shape
.
static const bool kDevCPU = Device::kDevCPU;
static const int kSubdim = dimension - 1;
The pointer dptr_
points to the data wherever it is stored. Besides, the shape of data is
controlled by the struct Shape
to make it flexible to handle.
DType *dptr_;
Shape<dimension> shape_;
At last, the stride_
variable is used to deal with pitch allocation in gpu or
sse (align x dimension to 64bit) for efficiency.
the concept of stream is important for GPU devices, we leave the discussion to there.
index_t stride_;
// stream where the computation lies
// stream is a device dependency concept
Stream<Device> *stream_;
Constructor¶
It is worth noting that the `stride_` is default initialized to be the lowest dimension of
`Shape`. The reason of doing it is remained to be examined and discussed.
// default constructor
MSHADOW_XINLINE Tensor(void) : stream_(NULL) {}
// constructor from shape
MSHADOW_XINLINE Tensor(const Shape<dimension> &shape): shape_(shape), stream_(NULL) {}
// constructor from data pointer and shape, without stride
MSHADOW_XINLINE Tensor(DType *dptr, const Shape<dimension> &shape)
: dptr_(dptr), shape_(shape), stride_(shape[kSubdim]), stream_(NULL) {}
// constructor from data pointer, shape and stream, without stride
MSHADOW_XINLINE Tensor(DType *dptr, const Shape<dimension> &shape,Stream<Device> *stream)
: dptr_(dptr), shape_(shape), stride_(shape[kSubdim]), stream_(stream) {}
// constructor from data pointer, shape, stride and stream
MSHADOW_XINLINE Tensor(DType *dptr, const Shape<dimension> &shape, index_t stride, Stream<Device> *stream)
: dptr_(dptr), shape_(shape), stride_(stride), stream_(stream) {}
Member Functions¶
MSize and MemSize¶
The function MSize
returns the memory cost of specified tensor, including the aligned x dimension
(so it starts with the value of largest dimension of tensor). While the function MemSize
returns the
memory starting from the startdim
.
MSHADOW_XINLINE size_t MSize(void) const {
return this->MemSize<0>();
}
template<int startdim>
MSHADOW_XINLINE size_t MemSize(void) const {
size_t memsz = this->stride_;
#pragma unroll
for (int i = startdim; i < kSubdim; ++i) {
memsz *= this->shape_[i];
}
return memsz;
}
size¶
The function size
return the shape
of the specified sub-dimension.
MSHADOW_XINLINE index_t size(index_t idx) const {
return shape_[idx];
}
FlatTo1D and FlatTo2D¶
The functions FlatTo1D
and FlatTo2D
return a new tensor with same data (unchanged dptr_
),
but different shape (refer to FlatTo1D
and FlatTo2D
in the context of Shape
).
MSHADOW_XINLINE Tensor<Device, 1, DType> FlatTo1D(void) const {
return Tensor<Device, 1, DType>(dptr_, shape_.FlatTo1D(), stride_, stream_);
}
MSHADOW_XINLINE Tensor<Device, 2, DType> FlatTo2D(void) const {
return Tensor<Device, 2, DType>(dptr_, shape_.FlatTo2D(), stride_, stream_);
}
Slice¶
The function Slice
returns a new Tensor, which is a subset of the highest dimension.
e.g. (128,3,224,224) -> (64,3,224,224)
MSHADOW_XINLINE Tensor<Device, dimension, DType>
Slice(index_t begin, index_t end) const {
Shape<dimension> s = this->shape_;
s[0] = end - begin;
return Tensor<Device, dimension, DType>(dptr_ + this->MemSize<1>() * begin, s, stride_, stream_);
}
Overloaded Operators¶
Operator []
¶
The operator []
is overloaded to return the corresponding index in the highest dimension of Tensor
.
The code dptr_ + this->MemSize<1>() * idx
is to fetch the idx
sub-tensor in Tensor
, e.g. (128,3,224,224)[5] -> 5-th (3,224,224)
MSHADOW_XINLINE Tensor<Device, kSubdim, DType> operator[](index_t idx) const {
return Tensor<Device, kSubdim, DType>(dptr_ + this->MemSize<1>() * idx, shape_.SubShape(), stride_, stream_);
}
Operator =
¶
The operator =
is overloaded to be assignment operator when the rhs
(right hand side) is also a Tensor
variable.
// implement the assignment of same type
inline Tensor<Device, dimension, DType> &
operator=(const Tensor<Device, dimension, DType> &exp) {
dptr_ = exp.dptr_;
shape_ = exp.shape_;
stride_ = exp.stride_;
stream_ = exp.stream_;
return *this;
}
However, if the rhs
is a scalar
, e.g. 3.0f
, or a Exp
(expression) type, the operator =
is overloaded to
trigger the computation, which calls the __assign
function, defined in its grandfather class RValueExp
.
// we trigger computation at here
template<typename E, int etype>
inline Tensor<Device, dimension, DType> &
operator=(const expr::Exp<E, DType, etype> &exp) {
return this->__assign(exp);
}
inline Tensor<Device, dimension, DType> &
operator=(const DType &exp) {
return this->__assign(exp);
}
It is worth noting that there are several other assignment related operators are overloaded,
but in the grandfather class RValueExp
. We will do analysis until reaching there.
Missing Explanations¶
the usage of #pragma unroll
:¶
if the following for loop
has a constant number of loops, the for loop
will be expanded
in the compile time to accelerate the process.
e.g. for(i = 1; i < 10; i++){...}
; will be expanded
Otherwise, if the number of loop is undetermined, it will keep itself same as common loop
e.g. for(i = 1; i < n; i++){...};
will be the same. Since computer will not be able to
know the exact value of n
until the computation time.
Missing Component¶
Shape::ConvertLayout¶
ConvertLayout
is left to the discussion of MxNet, which uses it to do convolution
.
// Convert shape in src_layout to shape in dst_layout
inline Shape<4> ConvertLayout(const Shape<4>& src, int src_layout, int dst_layout) {
Shape<4> dst;
switch (src_layout) {
case kNCHW:
dst = src;
break;
case kNHWC:
dst[0] = src[0];
dst[2] = src[1];
dst[3] = src[2];
dst[1] = src[3];
break;
default:
LOG(FATAL) << "Invalid layout for 4d shape " << src_layout;
}
Shape<4> dst2;
switch (dst_layout) {
case kNCHW:
return dst;
case kNHWC:
dst2[0] = dst[0];
dst2[1] = dst[2];
dst2[2] = dst[3];
dst2[3] = dst[1];
break;
default:
LOG(FATAL) << "Invalid layout for 4d shape " << src_layout;
}
return dst2;
}
// Convert shape in src_layout to shape in dst_layout
inline Shape<5> ConvertLayout(const Shape<5>& src, int src_layout, int dst_layout) {
Shape<5> dst;
switch (src_layout) {
case kNCDHW:
dst = src;
break;
case kNDHWC:
dst[0] = src[0];
dst[2] = src[1];
dst[3] = src[2];
dst[4] = src[3];
dst[1] = src[4];
break;
default:
LOG(FATAL) << "Invalid layout for 5d shape " << src_layout;
}
Shape<5> dst2;
switch (dst_layout) {
case kNCDHW:
return dst;
case kNDHWC:
dst2[0] = dst[0];
dst2[1] = dst[2];
dst2[2] = dst[3];
dst2[3] = dst[4];
dst2[4] = dst[1];
break;
default:
LOG(FATAL) << "Invalid layout for 5d shape " << src_layout;
}
return dst2;
}
Tensor:set_stream and Tensor::CheckContiguous¶
These two functions are heavily related to GPU implementation, so they are left for further discussion.
inline void set_stream(Stream<Device> *stream) {
this->stream_ = stream;
}
MSHADOW_XINLINE bool CheckContiguous(void) const {
return this->shape_[dimension - 1] == stride_;
}
Tensor<Device, 1, DType>¶
We must respecialize struct Tensor
in the 1D
situation, since the implementation of
overloaded operator []
is different.
It can also be considered as a review of member variables and functions in original Tensor
.
template<typename Device, typename DType>
struct Tensor<Device, 1, DType>:
public TRValue<Tensor<Device, 1, DType>, Device, 1, DType> {
public:
DType *dptr_;
Shape<1> shape_;
index_t stride_;
Stream<Device> *stream_;
// constructor
MSHADOW_XINLINE Tensor(void) : stream_(NULL) {}
MSHADOW_XINLINE Tensor(const Shape<1> &shape)
: shape_(shape), stream_(NULL) {}
MSHADOW_XINLINE Tensor(DType *dptr, Shape<1> shape)
: dptr_(dptr), shape_(shape), stride_(shape[0]), stream_(NULL) {}
MSHADOW_XINLINE Tensor(DType *dptr, Shape<1> shape, Stream<Device> *stream)
: dptr_(dptr), shape_(shape), stride_(shape[0]), stream_(stream) {}
MSHADOW_XINLINE Tensor(DType *dptr, Shape<1> shape,
index_t stride, Stream<Device> *stream)
: dptr_(dptr), shape_(shape), stride_(stride), stream_(stream) {}
inline void set_stream(Stream<Device> *stream) {
this->stream_ = stream;
}
MSHADOW_XINLINE Tensor<Device, 1, DType> FlatTo1D(void) const {
return *this;
}
MSHADOW_XINLINE Tensor<Device, 2, DType> FlatTo2D(void) const {
return Tensor<Device, 2, DType>(dptr_, shape_.FlatTo2D(), stride_, stream_);
}
MSHADOW_XINLINE Tensor<Device, 1, DType> Slice(index_t begin, index_t end) const {
Shape<1> s;
s[0] = end - begin;
return Tensor<Device, 1, DType>(dptr_ + begin, s, s[0], stream_);
}
MSHADOW_XINLINE bool CheckContiguous(void) const {
return true;
}
MSHADOW_XINLINE size_t MSize(void) const {
return shape_[0];
}
MSHADOW_XINLINE index_t size(index_t i) const {
return shape_[0];
}
MSHADOW_XINLINE DType &operator[](index_t idx) {
return dptr_[idx];
}
MSHADOW_XINLINE const DType &operator[](index_t idx) const {
return dptr_[idx];
}
// implement the assignment of same type
inline Tensor<Device, 1, DType> &
operator=(const Tensor<Device, 1, DType> &exp) {
dptr_ = exp.dptr_;
shape_ = exp.shape_;
stride_ = exp.stride_;
stream_ = exp.stream_;
return *this;
}
template<typename E, int etype>
inline Tensor<Device, 1, DType> &
operator=(const expr::Exp<E, DType, etype> &exp) {
return this->__assign(exp);
}
inline Tensor<Device, 1, DType> &operator=(const DType &exp) {
return this->__assign(exp);
}
};
Expression.h¶
./expression.h
consists of the definitions of abstract expressions and their related expression templates.
- Type
- Exp
- ScalarExp
: public Exp<ScalarExp<DType>, DType, type::kMapper>
- TypecastExp
: public Exp<TypecastExp<DstDType, SrcDType, EType, etype>, DstDType, etype>
- TransposeExp
: public Exp<TransposeExp<EType, DType>, DType, type::kChainer>
- ExpEngine
- RValueExp
: public Exp<Container, DType, type::kRValue>
- DotExp
: public Exp<DotExp<TA, TB, ltrans, rtrans, DType>, DType, type::kComplex>
- TernaryMapExp
: public Exp<TernaryMapExp<OP, TA, TB, TC, DType, etype>, DType, etype>
- UnaryMapExp
: public Exp<UnaryMapExp<OP, TA, DType, etype>, DType, etype>
- BinaryMapExp
: public Exp<BinaryMapExp<OP, TA, TB, DType, etype>, DType, etype>
Type¶
Every expression in MShadow has to own its own type.
kRValue
: this expression directly correspnds to a data class, can be used to assign data.kMapper
: this expression contains element-wise tensor operations, can map a expression to same shape.kChainer
: this expression can be chained with other expressions. Most of functions in file./extension/
are of this type.kComplex
: othercase: e.g dot product.
namespace type {
// type expression type are defined as bitmask
// subtype relationshop kRValue < kMapper < kPull < kComplex
const int kRValue = 0;
const int kMapper = 1;
const int kChainer = 3;
const int kComplex = 7;
}
Exp¶
The struct Exp
is the base class for every expression.
Each class inheritated from Exp
has to put their type into SubType
, while DType
is the
type of data being manipulated and exp_type
is to indicate the expression type inheritated
from it.
template<typename SubType, typename DType, int exp_type>
struct Exp {
public:
// return subtype instance of current class
inline const SubType& self(void) const {
return *static_cast<const SubType*>(this);
}
// return reference of subtype instance of current class
inline SubType* ptrself(void) {
return static_cast<SubType*>(this);
}
};
ScalarExp¶
: public Exp<ScalarExp<DType>, DType, type::kMapper>
The ScalarExp
is to map a scalar type to a sub-class ScalarExp
of expression type Exp
.
For now, we can just think explicit constructor is majorly used for avoiding the implicit consturction of class in declaration. the reason we cannot make the constructor explicit here is that, if the constructor is explicit, the occurence of scalar in either its lhs or rhs will not call this constructor.
template<typename DType>
struct ScalarExp: public Exp<ScalarExp<DType>, DType, type::kMapper> {
// scalar value
DType scalar_;
// implicit constructor, MUST NOT BE explicit
ScalarExp(DType scalar) : scalar_(scalar) {}
};
// this is a type cast function that turns the scalar s with type Dtype into type ScalarExp
template<typename DType>
inline ScalarExp<DType> scalar(DType s) {
return ScalarExp<DType>(s); // turn the scalar s with type Dtype into type ScalarExp
}
// usage
scalar<float>(10.0f)
// it is worthy noting that this process is generally implicitly performed
// (by overloaded operator in struct RValueExp)
TypecastExp¶
: public Exp<TypecastExp<DstDType, SrcDType, EType, etype>, DstDType, etype>
The TypecastExp
is to explicitly change the DType
of a expression type Exp
.
template<typename DstDType, typename SrcDType, typename EType, int etype>
struct TypecastExp:
public Exp<TypecastExp<DstDType, SrcDType, EType, etype>, DstDType, etype> {
// expression to be typecasted
const EType &exp;
// constructor
explicit TypecastExp(const EType &e) : exp(e) {}
};
// create an Typecast expression
template<typename DstDType, typename SrcDType, typename EType, int etype>
inline TypecastExp<DstDType, SrcDType, EType, (etype|type::kMapper)>
tcast(const Exp<EType, SrcDType, etype> &exp) {
return TypecastExp<DstDType, SrcDType, EType, (etype|type::kMapper)>(exp.self());
}
// usage
float data[10];
int data1[10];
Tensor<cpu,2> mat(data,Shape2(5,2));
Tensor<cpu,2,int> mat1(data1,Shape2(5,2));
mat = 3.2f;
mat1 = tcast<int>(mat);
for (index_t i = 0; i < mat.size(0); ++i) {
for (index_t j = 0; j < mat.size(1); ++j) {
printf("%.2f ", mat[i][j]);
}
printf("\n");
}
for (index_t i = 0; i < mat1.size(0); ++i) {
for (index_t j = 0; j < mat1.size(1); ++j) {
printf("%d ", mat1[i][j]);
}
printf("\n");
}
// output:
// 3.20 3.20
// 3.20 3.20
// 3.20 3.20
// 3.20 3.20
// 3.20 3.20
// 3 3
// 3 3
// 3 3
// 3 3
// 3 3
TransposeExp¶
: public Exp<TransposeExp<EType, DType>, DType, type::kChainer>
The TransposeExp
is generated by .T()
function in class RValueExp
.
Through the constructor function, a same type Tensor
is generated.
The actual transpose actually happens in the `Eval()` function.
Whether its member function `T()` is useful or not remains to be checking.
template<typename EType, typename DType>
struct TransposeExp: public Exp<TransposeExp<EType, DType>,
DType, type::kChainer> {
// expression to be transposed, the generated one is same
const EType &exp;
// constructor
explicit TransposeExp(const EType &e) : exp(e) {}
// transpose expression
inline const EType &T(void) const {
return exp;
}
};
ExpEngine¶
ExpEngine
is the struct that starts to interpret all of the expressions.
It is generated by member functions in RValueExp
, while its implementation is in ./expr_engine-inl.h
, which call the MapExp
in ./tensor_cpu-inl.h
or ./tensor_gpu-inl.h
to begin the interpretion of
expressions.
The details of its discussion is left to its implementation codes.
template<typename Saver, typename RValue, typename DType>
struct ExpEngine;
RValueExp¶
: public Exp<Container, DType, type::kRValue>
RValueExp
is a special class, that is inheritated by TRValue
(Tensor Rvalue), which is the super class
of all kinds of tensors.
Since it explicitly set the exp_type
(expression type) in Exp
to kRValue
during inheritance,
it correspnds to every situation with a form dst () src
, where dst
is the destination to receive the
result of computation, ()
is +=
/ -=
/ *=
/ /=
/ =
, and src
can be a value or expression.
As a result, in this class, we have eight reloaded operator
and two assign functions
, with a extra transpose function
.
template<typename Container, typename DType>
class RValueExp: public Exp<Container, DType, type::kRValue> {
public:
// transpose of a matrix, return transpose of current expression
// usage: *.T()
inline const TransposeExp<Container, DType> T(void) const {
return TransposeExp<Container, DType>(this->self());
}
// operator overload
// += for scalar
inline Container &operator+=(DType s) {
ExpEngine<sv::plusto, Container, DType>::Eval(this->ptrself(), scalar<DType>(s));
return *(this->ptrself());
}
// -= for scalar
inline Container &operator-=(DType s) {
ExpEngine<sv::minusto, Container, DType>::Eval(this->ptrself(), scalar<DType>(s));
return *(this->ptrself());
}
// *= for scalar
inline Container &operator*=(DType s) {
ExpEngine<sv::multo, Container, DType>::Eval(this->ptrself(), scalar<DType>(s));
return *(this->ptrself());
}
// /= for scalar
inline Container &operator/=(DType s) {
ExpEngine<sv::divto, Container, DType>::Eval(this->ptrself(), scalar<DType>(s));
return *(this->ptrself());
}
// assign for scalar
inline Container &__assign(DType s) {
ExpEngine<sv::saveto, Container, DType>::Eval(this->ptrself(), scalar<DType>(s));
return *(this->ptrself());
}
// assign for expression
template<typename E, int etype>
inline Container &__assign(const Exp<E, DType, etype> &exp) {
ExpEngine<sv::saveto, Container, DType>::Eval(this->ptrself(), exp.self());
return *(this->ptrself());
}
// not sure the difference to above function
inline Container &__assign(const Exp<Container, DType, type::kRValue> &exp);
// += for expression
template<typename E, int etype>
inline Container &operator+=(const Exp<E, DType, etype> &exp) {
ExpEngine<sv::plusto, Container, DType>::Eval(this->ptrself(), exp.self());
return *(this->ptrself());
}
// -= for expression
template<typename E, int etype>
inline Container &operator-=(const Exp<E, DType, etype> &exp) {
ExpEngine<sv::minusto, Container, DType>::Eval(this->ptrself(), exp.self());
return *(this->ptrself());
}
// *= for expression
template<typename E, int etype>
inline Container &operator*=(const Exp<E, DType, etype> &exp) {
ExpEngine<sv::multo, Container, DType>::Eval(this->ptrself(), exp.self());
return *(this->ptrself());
}
// /= for expression
template<typename E, int etype>
inline Container &operator/=(const Exp<E, DType, etype> &exp) {
ExpEngine<sv::divto, Container, DType>::Eval(this->ptrself(), exp.self());
return *(this->ptrself());
}
};
In every reloaded function, ExpEngine
is used to trigger the Eval
function,
which use template MapExp
to MakePlan
as described above.
At last, the namespace sv
is defined in the ./base.h
to perform the final assignment.
DotExp¶
: public Exp<DotExp<TA, TB, ltrans, rtrans, DType>, DType, type::kComplex>
The DotExp
is an expression to do matrix computation between two Tensors
.
template<typename TA, typename TB, bool ltrans, bool rtrans, typename DType>
struct DotExp: public Exp<DotExp<TA, TB, ltrans, rtrans, DType>, DType, type::kComplex> {
const TA &lhs_;
const TB &rhs_;
DType scale_; // scale over result
explicit DotExp(const TA &lhs, const TB &rhs, DType scale)
: lhs_(lhs), rhs_(rhs), scale_(scale) {}
};
The generation of struct DotExp
is triggered by the function dot()
with following four different
styles by consideration of transpose.
- both lhs and rhs are without transpose
template<typename TA, typename TB, typename DType>
inline DotExp<TA, TB, false, false, DType>
dot(const RValueExp<TA, DType> &lhs, const RValueExp<TB, DType> &rhs) {
return DotExp<TA, TB, false, false, DType>(lhs.self(), rhs.self(), DType(1.0f));
}
- lhs is with transpose, while rhs is not
template<typename TA, typename TB, typename DType>
inline DotExp<TA, TB, true, false, DType>
dot(const TransposeExp<TA, DType> &lhs, const RValueExp<TB, DType> &rhs) {
return DotExp<TA, TB, true, false, DType>(lhs.exp, rhs.self(), DType(1.0f));
}
- rhs is with transpose, while lhs is not
template<typename TA, typename TB, typename DType>
inline DotExp<TA, TB, false, true, DType>
dot(const RValueExp<TA, DType> &lhs, const TransposeExp<TB, DType> &rhs) {
return DotExp<TA, TB, false, true, DType>(lhs.self(), rhs.exp, DType(1.0f));
}
- both lhs and rhs are with transpose
template<typename TA, typename TB, typename DType>
inline DotExp<TA, TB, true, true, DType>
dot(const TransposeExp<TA, DType> &lhs, const TransposeExp<TB, DType> &rhs) {
return DotExp<TA, TB, true, true, DType>(lhs.exp, rhs.exp, DType(1.0f));
}
the usage of batch_dot is unclear yet!
template<bool transpose_left, bool transpose_right, typename TA, typename TB, typename DType>
inline DotExp<TA, TB, transpose_left, transpose_right, DType>
batch_dot(const RValueExp<TA, DType> &lhs, const RValueExp<TB, DType> &rhs) {
return DotExp<TA, TB, transpose_left, transpose_right, DType>(
lhs.self(), rhs.self(), DType(1.0f));
}
TernaryMapExp¶
public Exp<TernaryMapExp<OP, TA, TB, TC, DType, etype>, DType, etype>
TernaryMapExp
is designed to handle the ternary operation expression. Its member variables contain three
different expressions as their original types.
template<typename OP, typename TA, typename TB, typename TC, typename DType, int etype>
struct TernaryMapExp: public Exp<TernaryMapExp<OP, TA, TB, TC, DType, etype>, DType, etype> {
const TA &item1_;
const TB &item2_;
const TC &item3_;
explicit TernaryMapExp(const TA &item1, const TB &item2, const TC &item3)
:item1_(item1), item2_(item2), item3_(item3) {}
};
The struct TernaryMapExp
is generated by the function MakeExp
.
template<typename OP, typename TA, typename TB, typename TC, typename DType, int ta, int tb, int tc>
inline TernaryMapExp<OP, TA, TB, TC, DType, (ta|tb|tc|type::kMapper)>
MakeExp(const Exp<TA, DType, ta> &item1, const Exp<TB, DType, tb> &item2,
const Exp<TC, DType, tc> &item3) {
return TernaryMapExp<OP, TA, TB, TC, DType,
(ta|tb|tc|type::kMapper)>(item1.self(), item2.self(), item3.self());
}
The function F<op>()
provides a short hand for MakeExp()
, with a operator defined in namespace op
, which
provides Map()
function to guide the way of evaluation.
template<typename OP, typename TA, typename TB, typename TC, typename DType, int ta, int tb, int tc>
inline TernaryMapExp<OP, TA, TB, TC, DType, (ta|tb|tc|type::kMapper)>
F(const Exp<TA, DType, ta> &item1, const Exp<TB, DType, tb> &item2,
const Exp<TC, DType, tc> &item3) {
return MakeExp<OP>(item1, item2, item3);
}
UnaryMapExp¶
public public Exp<UnaryMapExp<OP, TA, DType, etype>, DType, etype>
UnaryMapExp
is designed to handle the unary operation expression. Its member variables contain only one
expression as its original type.
template<typename OP, typename TA, typename DType, int etype>
struct UnaryMapExp: public Exp<UnaryMapExp<OP, TA, DType, etype>, DType, etype> {
const TA &src_;
explicit UnaryMapExp(const TA &src) : src_(src) {}
};
The struct UnaryMapExp
is also generated by the function MakeExp
.
template<typename OP, typename TA, typename DType, int ta>
inline UnaryMapExp<OP, TA, DType, (ta|type::kMapper)>
MakeExp(const Exp<TA, DType, ta> &src) {
return UnaryMapExp<OP, TA, DType, (ta|type::kMapper)>(src.self());
}
Similarly, UnaryMapExp
also has its own function F<op>()
providing a short hand for MakeExp()
,
with a operator defined in namespace op
, which provides Map()
function to guide the way of evaluation.
template<typename OP, typename TA, typename DType, int ta>
inline UnaryMapExp<OP, TA, DType, (ta|type::kMapper)>
F(const Exp<TA, DType, ta> &src) {
return MakeExp<OP>(src);
}
BinaryMapExp¶
: public Exp<BinaryMapExp<OP, TA, TB, DType, etype>, DType, etype>
BinaryMapExp
is designed to handle the binary operation expression. Its member variables contain two different
expressions as their original type.
template<typename OP, typename TA, typename TB, typename DType, int etype>
struct BinaryMapExp: public Exp<BinaryMapExp<OP, TA, TB, DType, etype>,
DType, etype> {
const TA &lhs_;
const TB &rhs_;
explicit BinaryMapExp(const TA &lhs, const TB &rhs): lhs_(lhs), rhs_(rhs) {}
};
The struct BinaryMapExp
is also generated by the function MakeExp
.
template<typename OP, typename TA, typename TB, typename DType, int ta, int tb>
inline BinaryMapExp<OP, TA, TB, DType, (ta|tb|type::kMapper)>
MakeExp(const Exp<TA, DType, ta> &lhs, const Exp<TB, DType, tb> &rhs) {
return BinaryMapExp<OP, TA, TB, DType, (ta|tb|type::kMapper)>(lhs.self(), rhs.self());
}
Similarly, BinaryMapExp
also has its own function F<op>()
providing a short hand for MakeExp()
,
with a operator defined in namespace op
, which provides Map()
function to guide the way of evaluation.
template<typename OP, typename TA, typename TB, typename DType, int ta, int tb>
inline BinaryMapExp<OP, TA, TB, DType, (ta|tb|type::kMapper)>
F(const Exp<TA, DType, ta> &lhs, const Exp<TB, DType, tb> &rhs) {
return MakeExp<OP>(lhs, rhs);
}
Moreover, the four basic operators +
/ -
/ *
/ /
are also overloaded to make sure
we can add an Exp
type to built-in scalar type
, e.g. a+3.0f
where a
is a Tensor
.
Since the constructor of ScalarExp
is not explicit, the 3.0f
will be implicitly converted
to type ScalarExp
.
template<typename TA, typename TB, typename DType, int ta, int tb>
inline BinaryMapExp<op::plus, TA, TB, DType, (ta|tb|type::kMapper)>
operator+(const Exp<TA, DType, ta> &lhs, const Exp<TB, DType, tb> &rhs) {
return MakeExp<op::plus>(lhs, rhs);
}
template<typename TA, typename TB, typename DType, int ta, int tb>
inline BinaryMapExp<op::minus, TA, TB, DType, (ta|tb|type::kMapper)>
operator-(const Exp<TA, DType, ta> &lhs, const Exp<TB, DType, tb> &rhs) {
return MakeExp<op::minus>(lhs, rhs);
}
template<typename TA, typename TB, typename DType, int ta, int tb>
inline BinaryMapExp<op::mul, TA, TB, DType, (ta|tb|type::kMapper)>
operator*(const Exp<TA, DType, ta> &lhs, const Exp<TB, DType, tb> &rhs) {
return MakeExp<op::mul>(lhs, rhs);
}
template<typename TA, typename TB, typename DType, int ta, int tb>
inline BinaryMapExp<op::div, TA, TB, DType, (ta|tb|type::kMapper)>
operator/(const Exp<TA, DType, ta> &lhs, const Exp<TB, DType, tb> &rhs) {
return MakeExp<op::div>(lhs, rhs);
}
Expr_engine-inl.h¶
./expr_engine-inl.h
provides methods to evaluate all of the expressions.
- MakeTensorExp
- Plan
- MakePlan
- ExpInfo
- TypeCheck
- StreamInfo
- ShapeCheck
- ExpEngine
MakeTensorExp¶
: public Exp<MakeTensorExp<SubType, SrcExp, dim, DType>, DType, type::kChainer>
MakeTensorExp
is a base class inheritated by most functions in ./extension/
. Details will be provided
when those functions are discussed.
template<typename SubType, typename SrcExp, int dim, typename DType>
struct MakeTensorExp : public Exp<MakeTensorExp<SubType, SrcExp, dim, DType>, DType, type::kChainer> {
Shape<dim> shape_;
inline const SubType& real_self(void) const{
return *static_cast<const SubType*>(this);
}
};
Plan¶
The class Plan
is the class to provide Eval()
function, which designs the way to do actual evaluation.
Plan Declaration¶
The template Plan
should include the ExpType
, since its major purpose is to evaluate an expression, and DType
to guide the evaluation process.
template<typename ExpType, typename DType>
class Plan {
public:
MSHADOW_XINLINE DType Eval(index_t y, index_t x) const;
};
Tensor Plan¶
The tensor plan includes two member variables *dptr_
and stride_
to make sure the program can fetch
expected value by dptr_[y * stride_ + x]
.
Since we will never expect to change the evaluation result from Eval()
function, it is return type always
include the key word const
.
One exception is in the tensor plan, since it has chances to be the left hand side of the assignment. Thus,
it owns a special function REval()
, whose return type should not be const
.
Moreover, bacause tensor value lies in a pre-allocated memory space, the return type of REval()
function should be
a reference type. To keep consistency, the Eval()
function in tensor plan also be made as a reference type but const.
template <typename Device, int dim, typename DType>
class Plan<Tensor<Device, dim, DType>, DType> {
public:
explicit Plan(const Tensor<Device, dim, DType> &t) : dptr_(t.dptr_), stride_(t.stride_) {}
MSHADOW_XINLINE DType &REval(index_t y, index_t x) {
return dptr_[y * stride_ + x];
}
MSHADOW_XINLINE const DType &Eval(index_t y, index_t x) const {
return dptr_[y * stride_ + x];
}
private:
DType *dptr_;
index_t stride_;
};
Because of the special case for 1d tensor (no stride_), the tensor plan for 1d case is specifically defined.
template <typename Device, typename DType>
class Plan<Tensor<Device, 1, DType>, DType> {
public:
explicit Plan(const Tensor<Device, 1, DType> &t) : dptr_(t.dptr_) {}
MSHADOW_XINLINE DType &REval(index_t y, index_t x) {
return dptr_[x];
}
MSHADOW_XINLINE const DType &Eval(index_t y, index_t x) const {
return dptr_[x];
}
private:
DType *dptr_;
};
Scalar Plan¶
The usage of scalar plan includes
- It has a explicit constructor that takes in a built-in scalar type value and assign the value with type
DType
to its private member variable - It has a public member function
Eval()
that return its private member variablescalar_
, no matter what the input is
template<typename DType>
class Plan<ScalarExp<DType>, DType> {
public:
explicit Plan(DType scalar) : scalar_(scalar) {}
MSHADOW_XINLINE DType Eval(index_t y, index_t x) const {
return scalar_;
}
private:
DType scalar_;
};
Typecast Plan¶
The usage of typecast expression includes
- it is used for the type cast expression
TypecastExp
, e.g. tcast(x) - it has an explicit constructor that takes in a
Plan
type as its private member variable. The reason of doing so is it uses the original plan for evaluation, and only convert its returned type at last. - it has a function called
Eval()
, where the type cast happens by callingDstDType(src_.Eval(y, x))
template<typename DstDType, typename SrcDType, typename EType, int etype>
class Plan<TypecastExp<DstDType, SrcDType, EType, etype>, DstDType> {
public:
explicit Plan(const Plan<EType, SrcDType> &src) : src_(src) {}
MSHADOW_XINLINE DstDType Eval(index_t y, index_t x) const {
return DstDType(src_.Eval(y, x));
}
private:
Plan<EType, SrcDType> src_;
};
Ternary Plan¶
The usage of ternary plan includes
- it has an explicit constructor that takes in 3 expressions as its private member variables
- it has a function called
Eval()
, where the computation happens by callingOP::Map(item1_.Eval(y, x), item2_.Eval(y, x), item3_.Eval(y, x))
;
template<typename OP, typename TA, typename TB, typename TC, int etype, typename DType>
class Plan<TernaryMapExp<OP, TA, TB, TC, DType, etype>, DType> {
public:
explicit Plan(const Plan<TA, DType> &item1, const Plan<TB, DType> &item2, const Plan<TC, DType> &item3)
: item1_(item1), item2_(item2), item3_(item3) {}
MSHADOW_XINLINE DType Eval(index_t y, index_t x) const {
return OP::Map(item1_.Eval(y, x), item2_.Eval(y, x), item3_.Eval(y, x));
}
private:
Plan<TA, DType> item1_;
Plan<TB, DType> item2_;
Plan<TC, DType> item3_;
};
Binary Plan¶
The usage of binary plan includes
- it has an explicit constructor that takes in 2 expressions as its private member variables
- it has a function called
Eval()
, where the computation happens by callingOP::Map(lhs_.Eval(y, x), rhs_.Eval(y, x));
template<typename OP, typename TA, typename TB, int etype, typename DType>
class Plan<BinaryMapExp<OP, TA, TB, DType, etype>, DType> {
public:
explicit Plan(const Plan<TA, DType> &lhs, const Plan<TB, DType> &rhs)
: lhs_(lhs), rhs_(rhs) {}
MSHADOW_XINLINE DType Eval(index_t y, index_t x) const {
return OP::Map(lhs_.Eval(y, x), rhs_.Eval(y, x));
}
private:
Plan<TA, DType> lhs_;
Plan<TB, DType> rhs_;
};
Unary Plan¶
The usage of unary plan includes
- it has an explicit constructor that takes in 1 expression as its private member variables
- it has a function called
Eval()
, where the computation happens by callingOP::Map(src_.Eval(y, x));
template<typename OP, typename TA, int etype, typename DType>
class Plan<UnaryMapExp<OP, TA, DType, etype>, DType> {
public:
explicit Plan(const Plan<TA, DType> &src) : src_(src) {}
MSHADOW_XINLINE DType Eval(index_t y, index_t x) const {
return OP::Map(src_.Eval(y, x));
}
private:
Plan<TA, DType> src_;
};
MakeTensorExp Plan¶
Since the expression type MakeTensorExp
is only a base class of several extension functions (will be discussed
in ./extensions/
), the MakeTensorExp plan also only consider its input Plan
as its member variables, and use the
Eval()
function of input plan as Eval()
function of itself.
Since any expression should be composed by some types of calling, e.g. `MakeExp()`.
However, the `MakeTensorExp` lacks of it, since it is only a general base class.
Thus, I think this Plan will also never be called. Remain unproven!!!
template<typename SubType, typename SrcExp, int dim, typename DType>
struct Plan<MakeTensorExp<SubType, SrcExp, dim, DType>, DType> {
public:
Plan(const Plan<SubType, DType> &src) : src_(src) {}
MSHADOW_XINLINE DType Eval(index_t y, index_t x) const {
return src_.Eval(y, x);
}
private:
Plan<SubType, DType> src_;
};
Transpose Plan¶
The usage of transpose plan includes
- it has an explicit constructor that takes in a Plan
src
to initiate a same one as its private member variable. Since it is only a transpose expression, it still need the originalsrc
to do the evaluation, and only show the transpose effect in the special design in theEval()
function (the interchage ofy
andx
). - it has a member function called
Eval()
, which do the transpose in the computation by callingsrc_.Eval(x, y)
, instead ofsrc_.Eval(y, x)
;
template<typename EType, typename DType>
class Plan<TransposeExp<EType, DType>, DType> {
public:
explicit Plan(const Plan<EType, DType> &src) : src_(src) {}
MSHADOW_XINLINE DType Eval(index_t y, index_t x) const {
return src_.Eval(x, y);
}
private:
Plan<EType, DType> src_;
};
MakePlan¶
The major purpose of MakePlan
is to transform an Expression to its corresponding Plan
.
Tensor MakePlan¶
The input variable of Tensor MakePlan is RValueExp<T, DType> &e
, which is the grandfather class of
Tensor
. The reason of such design choice is compatibility, since a father class can safely refer to
its children class.
template<typename T, typename DType>
inline Plan<T, DType> MakePlan(const RValueExp<T, DType> &e) {
return Plan<T, DType>(e.self());
}
Scalar MakePlan¶
The Scalar MakePlan takes in a ScalarExp
, and uses its member variable scalar_
to initiate
Plan<ScalarExp<DType>, DType>
.
template<typename DType>
inline Plan<ScalarExp<DType>, DType> MakePlan(const ScalarExp<DType> &e) {
return Plan<ScalarExp<DType>, DType>(e.scalar_);
}
Typecast MakePlan¶
Since the Typecast Plan only use the Eval()
function of original plan to do Eval()
of itself,
with a enforced typecast at the end, the only thing Typecast MakePlan needs to do is input a plan
of original expression
template<typename DstDType, typename SrcDType, typename EType, int etype>
inline Plan<TypecastExp<DstDType, SrcDType, EType, etype>, DstDType>
MakePlan(const TypecastExp<DstDType, SrcDType, EType, etype> &e) {
return Plan<TypecastExp<DstDType, SrcDType, EType, etype>, DstDType>(MakePlan(e.exp));
}
Ternary MakePlan¶
Since the Ternary Plan require three Plans to do the evaluation, the Ternary MakePlan just provides the plan of its components.
// declaration
template<typename OP, typename TA, typename TB, typename TC, typename DType, int etype>
inline Plan<TernaryMapExp<OP, TA, TB, TC, DType, etype>, DType>
MakePlan(const TernaryMapExp<OP, TA, TB, TC, DType, etype> &e);
// definition
template<typename OP, typename TA, typename TB, typename TC, typename DType, int etype>
inline Plan<TernaryMapExp<OP, TA, TB, TC, DType, etype>, DType>
MakePlan(const TernaryMapExp<OP, TA, TB, TC, DType, etype> &e) {
return Plan<TernaryMapExp<OP, TA, TB, TC, DType, etype>,
DType>(MakePlan(e.item1_), MakePlan(e.item2_), MakePlan(e.item3_));
}
Binary MakePlan¶
Since the Binary Plan require two Plans to do the evaluation, the Biary MakePlan just provides the plan of its components.
// declaration
template<typename OP, typename TA, typename TB, typename DType, int etype>
inline Plan<BinaryMapExp<OP, TA, TB, DType, etype>, DType>
MakePlan(const BinaryMapExp<OP, TA, TB, DType, etype> &e);
// definition
template<typename OP, typename TA, typename TB, typename DType, int etype>
inline Plan<BinaryMapExp<OP, TA, TB, DType, etype>, DType>
MakePlan(const BinaryMapExp<OP, TA, TB, DType, etype> &e) {
return Plan<BinaryMapExp<OP, TA, TB, DType, etype>,
DType>(MakePlan(e.lhs_), MakePlan(e.rhs_));
}
Unary MakePlan¶
Since the Unary Plan require one Plan to do the evaluation, the Unary MakePlan just provides the plan of its component.
template<typename OP, typename TA, typename DType, int etype>
inline Plan<UnaryMapExp<OP, TA, DType, etype>, DType>
MakePlan(const UnaryMapExp<OP, TA, DType, etype> &e) {
return Plan<UnaryMapExp<OP, TA, DType, etype>, DType>(MakePlan(e.src_));
}
MakeTensorExp MakePlan¶
In 2.8
, we have claimed that the expression type MakeTensorExp is only a base class of several extension functions
(will be discussed in ./extensions/), and use the Eval()
function of input plan as Eval()
function of itself.
Therefore, in MakeTensorExp MakePlan, the input MakeTensorExp<T, SrcExp, dim, DType> &e
is kind of like
RValueExp<T, DType> &e
, since both of them are the father class of other classes. Same as Tensor MakePlan,
it returns Plan<T, DType>(e.real_self());
as a Plan of its SubType
.
template<typename T, typename SrcExp, int dim, typename DType>
inline Plan<T, DType>
MakePlan(const MakeTensorExp<T, SrcExp, dim, DType> &e) {
return Plan<T, DType>(e.real_self());
}
Transpose MakePlan¶
Since the Transpose Plan only use the Eval()
function of original plan to do Eval()
of itself,
with a interchage of y
and x
, the only thing Transpose MakePlan needs to do is input a plan
of original expression
template<typename T, typename DType>
inline Plan<TransposeExp<T, DType>, DType>
MakePlan(const TransposeExp<T, DType> &e) {
return Plan<TransposeExp<T, DType>, DType>(MakePlan(e.exp));
}
ExpInfo¶
ExpInfo
is a template struct providing expression infomation to help the TypeCheck
.
kDim
: is the dimension of expression related variableskDevice
: is the device where the expression related variables stored
The default configuration is:
template<typename E>
struct ExpInfo {
static const int kDim = -1;
static const int kDevMask = 0;
};
For Tensor ExpInfo, information can be directly extracted from the Tensor
:
template<typename Device, int dim, typename DType>
struct ExpInfo<Tensor<Device, dim, DType> > {
static const int kDim = dim;
static const int kDevMask = Device::kDevMask;
// kDevMask = 1 for cpu (01), = 2 for gpu (10)
};
For Scalar ExpInfo, kDim
is always 0
, and kDevMask
is always 0xffff
to make sure no influence to other
variables caused by itself.
template<typename DType>
struct ExpInfo< ScalarExp<DType> > {
static const int kDim = 0;
static const int kDevMask = 0xffff;
};
For Typecast ExpInfo, the information can be directly copied from the information of original expresion
template<typename DstDType, typename SrcDType, typename EType, int etype>
struct ExpInfo<TypecastExp<DstDType, SrcDType, EType, etype> > {
static const int kDim = ExpInfo<EType>::kDim;
static const int kDevMask = ExpInfo<EType>::kDevMask;
};
For Ternary ExpInfo, its information is from three different expressions:
template<typename OP, typename TA, typename TB, typename TC, typename DType, int etype>
struct ExpInfo<TernaryMapExp<OP, TA, TB, TC, DType, etype> > {
static const int kDimItem1 = ExpInfo<TA>::kDim;
static const int kDimItem2 = ExpInfo<TB>::kDim;
static const int kDimItem3 = ExpInfo<TC>::kDim;
static const int kDim = kDimItem1;
static const int kDevMask = ExpInfo<TA>::kDevMask & ExpInfo<TB>::kDevMask & ExpInfo<TC>::kDevMask;
};
For Binary ExpInfo, its information is from two different expressions:
template<typename OP, typename TA, typename TB, typename DType, int etype>
struct ExpInfo<BinaryMapExp<OP, TA, TB, DType, etype> > {
static const int kDimLhs = ExpInfo<TA>::kDim;
static const int kDimRhs = ExpInfo<TB>::kDim;
static const int kDim = (kDimLhs >= 0 && kDimRhs >= 0) ?\
(kDimLhs == 0 ?\
kDimRhs :\
((kDimRhs == 0 || kDimLhs == kDimRhs) ? kDimLhs : -1)) : -1;
static const int kDevMask = ExpInfo<TA>::kDevMask & ExpInfo<TB>::kDevMask;
};
For Unary ExpInfo and Transpose ExpInfo, its information is also simply the copy of its variable:
template<typename OP, typename TA, typename DType, int etype>
struct ExpInfo<UnaryMapExp<OP, TA, DType, etype> > {
static const int kDim = ExpInfo<TA>::kDim;
static const int kDevMask = ExpInfo<TA>::kDevMask;
};
template<typename E, typename DType>
struct ExpInfo<TransposeExp<E, DType> > {
static const int kDim = ExpInfo<E>::kDim;
static const int kDevMask = ExpInfo<E>::kDevMask;
};
At last, the MakeTensorExp Info is determined by SrcExp
as input expression variable of its SubType
the reason of its design is unclear for now, remain to be checking !!!
template<typename T, typename SrcExp, int dim, typename DType>
struct ExpInfo<MakeTensorExp<T, SrcExp, dim, DType> > {
static const int kDimSrc = ExpInfo<SrcExp>::kDim;
static const int kDim = kDimSrc >= 0 ? dim : -1;
static const int kDevMask = ExpInfo<SrcExp>::kDevMask;
};
TypeCheck¶
TypeCheck
is a template to do type checking. The checking happens in compile time.
kExpDim
: is the dimension of expression.kDevPass
: checks whether the expression device type matches the provided type. This is to make sure that the expression related variables are all in one specified device.kMapPass
: checks whether the expression can be mapped to expression of dimkRedPass
: checks whether the expression can be reduced to expression of dim
The usage of kRedPass is unclear yet!!!
template<typename Device, int dim, typename DType, typename E>
struct TypeCheck {
static const int kExpDim = ExpInfo<E>::kDim;
static const bool kDevPass = (ExpInfo<E>::kDevMask & Device::kDevMask) != 0;
static const bool kMapPass = (kExpDim == 0 || kExpDim == dim) && kDevPass;
static const bool kRedPass = (kExpDim > dim) && kDevPass;
};
The usage of TypeCheck is like
expr::TypeCheckPass<expr::TypeCheck<cpu, dim, DType, E>::kMapPass> ::Error_All_Tensor_in_Exp_Must_Have_Same_Type();
.
If the TypeCheck<cpu, dim, DType, E>::kMapPass>
returns false
, then the corresponding function Error_All_Tensor_in_Exp_Must_Have_Same_Type()
will not be found, which leads to the fail of compile.
template<bool kPass>
struct TypeCheckPass;
template<>
struct TypeCheckPass<false> {};
template<>
struct TypeCheckPass<true> {
inline static void Error_All_Tensor_in_Exp_Must_Have_Same_Type(void) {}
inline static void Error_TypeCheck_Not_Pass_For_Reduce_Exp(void) {}
inline static void Error_Expression_Does_Not_Meet_Dimension_Req(void) {}
};
StreamInfo¶
The StreamInfo
returns the stream of a Tensor
in its stored Device
.
The usage of it is unclear yet !!!
template<typename Device, typename E>
struct StreamInfo {
inline static Stream<Device> *Get(const E &t);
};
template<int dim, typename Device, typename DType>
struct StreamInfo<Device, Tensor<Device, dim, DType> > {
inline static Stream<Device> *Get(const Tensor<Device, dim, DType> &t) {
return t.stream_;
}
};
ShapeCheck¶
ShapeCheck
is a runtime shape checking template to get the shape of an expression,
reporting error if shape mismatch
Declaration¶
template<int dim, typename E>
struct ShapeCheck {
inline static Shape<dim> Check(const E &t);
};
Tensor ShapeCheck¶
The Tensor ShapeCheck simply returns its shape.
template<int dim, typename Device, typename DType>
struct ShapeCheck<dim, Tensor<Device, dim, DType> > {
inline static Shape<dim> Check(const Tensor<Device, dim, DType> &t) {
return t.shape_;
}
};
Scalar ShapeCheck¶
The Scalar ShapeCheck returns a same Shape
of provided dim
, but all of the dimension equal to 0
.
template<int dim, typename DType>
struct ShapeCheck<dim, ScalarExp<DType> > {
inline static Shape<dim> Check(const ScalarExp<DType> &exp) {
Shape<dim> shape;
for (int i = 0; i < dim; ++i) {
shape[i] = 0;
}
return shape;
}
};
Typecast ShapeCheck¶
The Typecast ShapeCheck simply conducts in a way that calls the original Check()
of the expression
wishing to cast it type.
template<int dim, typename DstDType, typename SrcDType, typename EType, int etype>
struct ShapeCheck<dim, TypecastExp<DstDType, SrcDType, EType, etype> > {
inline static Shape<dim>
Check(const TypecastExp<DstDType, SrcDType, EType, etype> &exp) {
return ShapeCheck<dim, EType>::Check(exp.exp);
}
};
Ternary ShapeCheck¶
The Ternary ShapeCheck first fetch the check results of its three variable expressions. Then, it couducts a explicit checking to make sure all of the expressions have same shape. At last, it returns one of the shape of expressions.
template<int dim, typename OP, typename TA, typename TB, typename TC, typename DType, int etype>
struct ShapeCheck<dim, TernaryMapExp<OP, TA, TB, TC, DType, etype> > {
inline static Shape<dim>
Check(const TernaryMapExp<OP, TA, TB, TC, DType, etype> &t) {
Shape<dim> shape1 = ShapeCheck<dim, TA>::Check(t.item1_);
Shape<dim> shape2 = ShapeCheck<dim, TB>::Check(t.item2_);
Shape<dim> shape3 = ShapeCheck<dim, TC>::Check(t.item3_);
bool same = (shape1 == shape2) && (shape2 == shape3);
CHECK(same) << "TernaryMapExp: Shapes of operands are not the same, " <<
"Shape1=" << shape1 << ", Shape2=" << shape2 << ", Shape3=" << shape3;
return shape1;
}
};
Binary ShapeCheck¶
The Binary ShapeCheck first fetch the check results of its two variable expressions. Then, it couducts a explicit checking to make sure all of the expressions have same shape. At last, it returns one of the shape of expressions.
template<int dim, typename OP, typename TA, typename TB, typename DType, int etype>
struct ShapeCheck<dim, BinaryMapExp<OP, TA, TB, DType, etype> > {
inline static Shape<dim>
Check(const BinaryMapExp<OP, TA, TB, DType, etype> &t) {
Shape<dim> shape1 = ShapeCheck<dim, TA>::Check(t.lhs_);
Shape<dim> shape2 = ShapeCheck<dim, TB>::Check(t.rhs_);
if (shape1[0] == 0) return shape2;
if (shape2[0] == 0) return shape1;
CHECK_EQ(shape1, shape2) << "BinaryMapExp: Shapes of operands are not the same, " <<
"Shape1=" << shape1 << ", Shape2=" << shape2;
return shape1;
}
};
Unary ShapeCheck¶
For Unary ShapeCheck, it just fetch the check result of its variable expressions and return it.
template<int dim, typename OP, typename TA, typename DType, int etype>
struct ShapeCheck<dim, UnaryMapExp<OP, TA, DType, etype> > {
inline static Shape<dim> Check(const UnaryMapExp<OP, TA, DType, etype> &t) {
Shape<dim> s = ShapeCheck<dim, TA>::Check(t.src_);
return s;
}
};
MakeTensorExp ShapeCheck¶
MakeTensorExp ShapeCheck simply use the member variable shape_
of MakeTensorExp
as its return.
It is design strategy needs to be further examined.
template<int dim, typename SrcExp, typename T, typename DType>
struct ShapeCheck<dim, MakeTensorExp<T, SrcExp, dim, DType> > {
inline static Shape<dim>
Check(const MakeTensorExp<T, SrcExp, dim, DType> &t) {
return t.shape_;
}
};
Transpose ShapeCheck¶
Transpose ShapeCheck fetchs the checked shape of expression to be transposed, and simply return the swaped lowest two dimensions.
The strategy of Transpose needs to be further studied when it comes to application.
template<int dim, typename E, typename DType>
struct ShapeCheck<dim, TransposeExp<E, DType> > {
inline static Shape<dim> Check(const TransposeExp<E, DType> &e) {
// swap the lowest two dimensions
Shape<dim> s = ShapeCheck<dim, E>::Check(e.exp);
std::swap(s[0], s[1]);
return s;
}
};
ExpEngine¶
ExpEngine
is a struct with several overloaded Eval()
functions, always called by the assignment
related functions in RValueExp
to dispatch simple operations.
template<typename SV, typename RV, typename DType>
struct ExpEngine {
template<typename E>
inline static void Eval(RV *dst,
const Exp<E, DType, type::kMapper> &exp) {
MapExp<SV>(dst, exp);
}
template<typename E>
inline static void Eval(RV *dst,
const Exp<E, DType, type::kChainer> &exp) {
MapExp<SV>(dst, exp);
}
template<typename E>
inline static void Eval(RV *dst,
const Exp<E, DType, type::kRValue> &exp) {
MapExp<SV>(dst, exp);
}
template<typename E>
inline static void Eval(RV *dst,
const Exp<E, DType, type::kComplex> &exp) {
ExpComplexEngine<SV, RV, E, DType>::Eval(dst->ptrself(), exp.self());
}
};
The ExpComplexEngine
to evaluate expression with type::kComplex
majorly deals with
dot()
expression and looks like:
template<typename SV, typename RV, typename E, typename DType>
struct ExpComplexEngine {
inline static void Eval(RV *dst, const E &exp);
};
template<typename SV, typename Device, int dim, int ldim,
int rdim, bool ltrans, bool rtrans, typename DType>
struct ExpComplexEngine<SV,
Tensor<Device, dim, DType>,
DotExp<Tensor<Device, ldim, DType>,
Tensor<Device, rdim, DType>,
ltrans, rtrans, DType>,
DType> {
inline static void Eval(Tensor<Device, dim, DType> *dst,
const DotExp<Tensor<Device, ldim, DType>,
Tensor<Device, rdim, DType>,
ltrans, rtrans, DType> &exp) {
DotEngine<SV, Device, dim, ldim, rdim,
ltrans, rtrans, DType>::Eval(dst, exp.lhs_, exp.rhs_, exp.scale_);
}
};
The DotEngine will be further discussed in ./dot_engine-inl.h
.
Packet-inl.h¶
Packet
is designed to further speed up computation.
- Alignment
- Packet
- Packet Operator
- Saver
- Packet Plan
- Packet Check
- Packet Align Check
- MapPacketPlan
A general Packet
has two modes to choose:
enum PacketArch {
kPlain,
kSSE2,
};
kPlain
is just a plain version using standard C language in case SSE is not availablekSSE2
is a set of instructions allowing to perform computations on packets of128
bits at once. Since a float is32
bits, this means thatSSE2
instructions can handle4
floats at once, which also means that, if correctly used, they can make our computation go up to4x
faster.
However, in real applications, e.g. we have chosen `size=50`, so our vectors consist of `50`
float, and `50` is not a multiple of `4`. This means that we cannot hope to do all
of that computation using SSE2 instructions. The second best thing, to which we should aim,
is to handle the `48` first coefficients with SSE2 instructions, since `48` is the biggest
multiple of `4` below `50`, and then handle separately, without SSE2, the `49th` and `50th`
coefficients.
As a result, we define a struct containing the required aligned bytes
template<PacketArch Arch>
struct AlignBytes {
// typedef unsigned index_t;
static const index_t value = 4;
};
Packet¶
Packet
is a struct that is designed to handle the evaluation process of expression,
in a scalar level, by using SSE2 instructions set for better efficiency.
It has three different basic type <DType, kPlain
> for standard C computation,
<float, kSSE2>
for float SSE2 optimization, and <double, kSSE2>
for double
SSE2 optimization. All of them will be described in parallel.
Member Variables¶
kSize
: number of float can be handled as a vectordata_
: the internal data that is loaded from a pointer pointing to a built-in scalar type
Definition in <DType, kPlain>
:
// since kPlain is only defined as default C computation in case of the absense of SSE2
// it naturally can only handle 1 float a time
static const index_t kSize = 1;
DType data_;
Definition in <float, kSSE2>
and <double, kSSE2>
:
// since float size is 32, it can handle 4 floats a time
static const index_t kSize = 4;
// __m128 is a special type in SSE2
__m128 data_;
// since double size is 64, it can only handle 2 doubles a time
static const index_t kSize = 2;
__m128d data_;
The vectorization makes 4
floats or 2
doubles looks like a scalar
.
Constructor¶
Since Packet
only has 2
member variables, and one of them is static const type,
we only need initiate the data_
in constructor.
Definition in <DType, kPlain>
:
Packet(void) {}
explicit Packet(DType data) : data_(data) {}
Definition for <float, kSSE2>
and <double, kSSE2>
is same:
Packet(void) {}
explicit Packet(__m128d data) : data_(data) {}
Member Functions¶
The member functions in <DType, kPlain>
just use the simple C / C++ code to realize same
function in <float/double, kSSE2>
.
// create a fill with the target value s
MSHADOW_CINLINE static Packet<DType, kPlain> Fill(DType s) {
return Packet<DType, kPlain>(s);
}
// load from address src, using *src for the value
MSHADOW_CINLINE static Packet<DType, kPlain> Load(const DType* src) {
return Packet<DType, kPlain>(*src);
}
// load from address src, using *src for the value
MSHADOW_CINLINE static Packet<DType, kPlain> LoadUnAligned(const DType* src) {
return Packet<DType, kPlain>(*src);
}
// store data into dst
MSHADOW_CINLINE void Store(DType* dst) const {
*dst = data_;
}
Compared to plain implementation, the realization of <float/double, kSSE2>
requires
the usage of intrinsics
, included in file <emmintrin.h>
.
_mm_set1_ps(w)
: set all the four floats to the value ofw
, e.g.r0 := r1 := r2 := r3 := w
_mm_load_ps(w)
: load the first four floats pointed byw
, e.g.r0 := p[0]
,r1 := p[1]
,r2 := p[2]
,r3 := p[3]
. The address must be 16-byte aligned._mm_loadu_ps(w)
: same as_mm_load_ps
, but the address does not need to be 16-byte aligned._mm_store_ps(*p,a)
: transfer the4
floats values ina
to the address pointed byp
. e.g.p[0] := a0
,p[1] := a1
,p[2] := a2
,p[3] := a3
MSHADOW_CINLINE static Packet<float, kSSE2> Fill(float s) {
return Packet<float, kSSE2>(_mm_set1_ps(s));
}
MSHADOW_CINLINE static Packet<float, kSSE2> Load(const float* src) {
return Packet<float, kSSE2>(_mm_load_ps(src));
}
MSHADOW_CINLINE static Packet<float, kSSE2> LoadUnAligned(const float* src) {
return Packet<float, kSSE2>(_mm_loadu_ps(src));
}
MSHADOW_CINLINE void Store(float* dst) const {
_mm_store_ps(dst, data_);
}
For <double, kSSE2>
, the process and instructions are all similar, instead of the
final ps
is changed to pd
for dealing with double
.
MSHADOW_CINLINE static Packet<double, kSSE2> Fill(double s) {
return Packet<double, kSSE2>(_mm_set1_pd(s));
}
MSHADOW_CINLINE static Packet<double, kSSE2> Load(const double* src) {
return Packet<double, kSSE2>(_mm_load_pd(src));
}
MSHADOW_CINLINE static Packet<double, kSSE2> LoadUnAligned(const double* src) {
return Packet<double, kSSE2>(_mm_loadu_pd(src));
}
MSHADOW_CINLINE void Store(double* dst) const {
_mm_store_pd(dst, data_);
}
We intentionally leave the discussion of Sum() function, since its application is scarce in
our context and has heave extra SSE2 instructions.
Overloaded Operators¶
=
operator¶
The =
operator simply change the value of data_
to the value in right hand side, and
return itself by *this
.
// plain
MSHADOW_CINLINE Packet<DType, kPlain>& operator=(DType s) {
data_ = s;
return *this;
}
// <float, kSSE2>
MSHADOW_CINLINE Packet<float, kSSE2>& operator=(float s) {
data_ = _mm_set1_ps(s);
return *this;
}
// <double, kSSE2>
MSHADOW_CINLINE Packet<double, kSSE2>& operator=(double s) {
data_ = _mm_set1_pd(s);
return *this;
}
+
/ -
/ *
/ /
operators¶
For Plain version:
template<typename DType>
MSHADOW_CINLINE Packet<DType, kPlain> operator+(const Packet<DType, kPlain>& lhs,
const Packet<DType, kPlain>& rhs) {
return Packet<DType, kPlain>(lhs.data_ + rhs.data_);
}
template<typename DType>
MSHADOW_CINLINE Packet<DType, kPlain> operator-(const Packet<DType, kPlain>& lhs,
const Packet<DType, kPlain>& rhs) {
return Packet<DType, kPlain>(lhs.data_ - rhs.data_);
}
template<typename DType>
MSHADOW_CINLINE Packet<DType, kPlain> operator*(const Packet<DType, kPlain>& lhs,
const Packet<DType, kPlain>& rhs) {
return Packet<DType, kPlain>(lhs.data_ * rhs.data_);
}
template<typename DType>
MSHADOW_CINLINE Packet<DType, kPlain> operator/(const Packet<DType, kPlain>& lhs,
const Packet<DType, kPlain>& rhs) {
return Packet<DType, kPlain>(lhs.data_ / rhs.data_);
}
For SSE2 version:
MSHADOW_CINLINE Packet<float, kSSE2> operator+(const Packet<float, kSSE2>& lhs,
const Packet<float, kSSE2>& rhs) {
return Packet<float, kSSE2>(_mm_add_ps(lhs.data_, rhs.data_));
}
MSHADOW_CINLINE Packet<double, kSSE2> operator+(const Packet<double, kSSE2>& lhs,
const Packet<double, kSSE2>& rhs) {
return Packet<double, kSSE2>(_mm_add_pd(lhs.data_, rhs.data_));
}
MSHADOW_CINLINE Packet<float, kSSE2> operator-(const Packet<float, kSSE2>& lhs,
const Packet<float, kSSE2>& rhs) {
return Packet<float, kSSE2>(_mm_sub_ps(lhs.data_, rhs.data_));
}
MSHADOW_CINLINE Packet<double, kSSE2> operator-(const Packet<double, kSSE2>& lhs,
const Packet<double, kSSE2>& rhs) {
return Packet<double, kSSE2>(_mm_sub_pd(lhs.data_, rhs.data_));
}
MSHADOW_CINLINE Packet<float, kSSE2> operator*(const Packet<float, kSSE2>& lhs,
const Packet<float, kSSE2>& rhs) {
return Packet<float, kSSE2>(_mm_mul_ps(lhs.data_, rhs.data_));
}
MSHADOW_CINLINE Packet<double, kSSE2> operator*(const Packet<double, kSSE2>& lhs,
const Packet<double, kSSE2>& rhs) {
return Packet<double, kSSE2>(_mm_mul_pd(lhs.data_, rhs.data_));
}
MSHADOW_CINLINE Packet<float, kSSE2> operator/(const Packet<float, kSSE2>& lhs,
const Packet<float, kSSE2>& rhs) {
return Packet<float, kSSE2>(_mm_div_ps(lhs.data_, rhs.data_));
}
MSHADOW_CINLINE Packet<double, kSSE2> operator/(const Packet<double, kSSE2>& lhs,
const Packet<double, kSSE2>& rhs) {
return Packet<double, kSSE2>(_mm_div_pd(lhs.data_, rhs.data_));
}
Alignment¶
To make fully use of SSE2 instructions set, Alignment is important in the context.
Aligned and Aligned Free¶
AlignedMallocPitch()
is an analog to cudaMallocPitch()
, which allocates a aligned
space with num_line * lspace
cells.
out_pitch
: output parameter, the actuall space allocated for each linelspace
: number of cells required for each linenum_line
: number of lines to be allocated
First, according to lspace
, out_pitch
can be calculated as the smallest number that
is larger than lspace
and can be divided by 16
. Then, the aligned space to be allocated
can be calculated by out_pitch * num_line
.
inline void* AlignedMallocPitch(size_t *out_pitch,
size_t lspace,
size_t num_line) {
const index_t bits = AlignBytes<MSHADOW_DEFAULT_PACKET>::value; // = 4
const index_t mask = (1 << bits) - 1; // = 15
// e.g. ((25 + 15) >> 4) << 4 = 32;
size_t pitch = ((lspace + mask) >> bits) << bits;
*out_pitch = pitch;
#ifdef _MSC_VER
void *res = _aligned_malloc(pitch * num_line, 1 << bits);
#else
void *res;
int ret = posix_memalign(&res, 1 << bits, pitch * num_line);
CHECK_EQ(ret, 0) << "AlignedMallocPitch failed";
#endif
if (res == NULL) {
LOG(FATAL) << "AlignedMallocPitch failed";
}
return res;
}
As usual, since we allocate a space, we should free it after using by
inline void AlignedFree(void *ptr) {
#ifdef _MSC_VER
_aligned_free(ptr);
#else
free(ptr);
#endif
}
CheckAlign¶
CheckAlign
is designed to make sure a value or the address of a pointer can be divided by 16.
// check whether the value can be divided by 16
template<PacketArch Arch>
inline bool CheckAlign(size_t pitch) {
const index_t bits = AlignBytes<Arch>::value;
// the aligned pitch is expected to hold 4 zeros in its lowest 4 bits, or just can be divided by 16.
// thus, when it is & with 15, the result should be zero
return !(pitch & ((1 << bits) - 1));
}
// check whether the stored address of a pointer can be divided by 16
template<PacketArch Arch>
inline bool CheckAlign(void *ptr) {
// reinterpret_cast forces the complier thinking ptr as a size_t type
return CheckAlign<Arch>(reinterpret_cast<size_t>(ptr));
}
UpperAlign and LowerAlign¶
UpperAlign
and LowerAlign
return the required loop times to execute a string with size
.
According to the example in the beginning, it is a common strategy to use LowerAlign
and then
handle remaining separately. If UpperAlign
is used, padding is required.
template<typename DType, PacketArch Arch>
inline index_t UpperAlign(index_t size) {
const index_t bits = AlignBytes<MSHADOW_DEFAULT_PACKET>::value; // = 4
const index_t mask = (1 << bits) - 1; // = 15
const index_t fsize = sizeof(DType); // = e.g. 4 for float or 8 for double
return (((size * fsize + mask) >> bits) << bits) / fsize;
}
template<typename DType, PacketArch Arch>
inline index_t LowerAlign(index_t size) {
const index_t bits = AlignBytes<MSHADOW_DEFAULT_PACKET>::value; // = 4
const index_t fsize = sizeof(DType); // = e.g. 4 for float or 8 for double
// e.g. (((50 * 4) >> 4) << 4) / 4 = 48 as claimed in the beginning example.
return (((size * fsize) >> bits) << bits) / fsize;
}
Packet Operator¶
PacketOp
is a struct same as the operator defined in the op
namespace. The different choice of
PacketOp
is trigger by the given typename OP
.
Its main usage is to deal with the operation between differnt Packet
. Each +
/ -
/ *
/ /
is
defined in the file ./packet/plain-inl.h
or ./packet/sse-inl.h
.
// specialization of operators
template<typename DType, PacketArch Arch>
struct PacketOp<op::plus, DType, Arch> {
static const bool kEnabled = true;
MSHADOW_CINLINE static Packet<DType, Arch> Map(const Packet<DType, Arch>& lhs,
const Packet<DType, Arch>& rhs) {
return lhs + rhs;
}
};
template<typename DType, PacketArch Arch>
struct PacketOp<op::minus, DType, Arch> {
static const bool kEnabled = true;
MSHADOW_CINLINE static Packet<DType, Arch> Map(const Packet<DType, Arch>& lhs,
const Packet<DType, Arch>& rhs) {
return lhs - rhs;
}
};
template<typename DType, PacketArch Arch>
struct PacketOp<op::mul, DType, Arch> {
static const bool kEnabled = true;
MSHADOW_CINLINE static Packet<DType, Arch> Map(const Packet<DType, Arch>& lhs,
const Packet<DType, Arch>& rhs) {
return lhs * rhs;
}
};
template<typename DType, PacketArch Arch>
struct PacketOp<op::div, DType, Arch> {
static const bool kEnabled = true;
MSHADOW_CINLINE static Packet<DType, Arch> Map(const Packet<DType, Arch>& lhs,
const Packet<DType, Arch>& rhs) {
return lhs / rhs;
}
};
template<typename DType, PacketArch Arch>
struct PacketOp<op::identity, DType, Arch> {
static const bool kEnabled = true;
MSHADOW_CINLINE static Packet<DType, Arch> Map(const Packet<DType, Arch>& src) {
return src;
}
};
if the operator is out of range of op::plus
, op::minus
, op::mul
,
op::div
and op:identity
, the declaration will in its default setting,
which leads it fail to pass the checking and back to original C++ implement.
template<typename OP, typename DType, PacketArch Arch>
struct PacketOp {
static const bool kEnabled = false;
};
Saver¶
Saver
is the struct that calls its member function Save()
to do actual evaluation and saving.
It has two formulations. One for evaluation and saving (e.g. +=
/ -=
/ *=
/ /=
),
and one only for saving (e.g. =
) with operator sv::saveto
.
For the first Save()
,
we first change the left hand side *dst
to be a Packet
, then call PacketOp::Map()
to finish evaluation. At last, the result is stored to *dst
.
template<typename SV, typename TFloat, PacketArch Arch>
struct Saver{
MSHADOW_CINLINE static void Save(TFloat *dst, const Packet<TFloat, Arch>& src) {
Packet<TFloat, Arch> lhs = Packet<TFloat, Arch>::Load(dst);
Packet<TFloat, Arch> ans = PacketOp<typename SV::OPType, TFloat, Arch>::Map(lhs, src);
ans.Store(dst);
}
};
For the second Save()
, we simply store the result to *dst
.
template<typename TFloat, PacketArch Arch>
struct Saver<sv::saveto, TFloat, Arch> {
MSHADOW_CINLINE static void Save(TFloat *dst, const Packet<TFloat, Arch>& src) {
src.Store(dst);
}
};
Packet Plan¶
Same as the class Plan
defined in expr_engine-inl.h
, the class PacketPlan
is the class to provide
Eval()
function for Packet
, which is the way to do actual evaluation.
Declaration¶
According to the declaration, a PacketPlan
contains two member functions EvalPacket()
which use SSE
optimization,
and Eval()
to do common computation as in Plan
.
typedef packet::PacketArch PacketArch;
// same as plan, but use packet
template<typename ExpType, typename DType, PacketArch Arch>
class PacketPlan {
public:
MSHADOW_CINLINE packet::Packet<DType, Arch> EvalPacket(index_t y, index_t x) const;
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const;
};
Tensor PacketPlan¶
The usage of EvalPacket()
is to provide the data according to dptr_[y * stride_ + x]
template <typename Device, int dim, typename DType, PacketArch Arch>
class PacketPlan<Tensor<Device, dim, DType>, DType, Arch> {
public:
explicit PacketPlan(const Tensor<Device, dim, DType> &t)
:dptr_(t.dptr_), stride_(t.stride_) {}
MSHADOW_CINLINE packet::Packet<DType, Arch> EvalPacket(index_t y, index_t x) const {
return packet::Packet<DType, Arch>::Load(&dptr_[y * stride_ + x]);
}
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const {
return dptr_[y * stride_ + x];
}
private:
const DType *dptr_;
index_t stride_;
};
Scalar PacketPlan¶
The usage of EvalPacket()
is to provide scalar_
for every x
and y
.
template<typename DType, PacketArch Arch>
class PacketPlan<ScalarExp<DType>, DType, Arch> {
public:
explicit PacketPlan(DType scalar) : scalar_(scalar) {}
MSHADOW_CINLINE packet::Packet<DType, Arch> EvalPacket(index_t y, index_t x) const {
return packet::Packet<DType, Arch>::Fill(scalar_);
}
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const {
return scalar_;
}
private:
DType scalar_;
};
Binary PacketPlan¶
The usage of EvalPacket()
is to perform EvalPacket()
for lhs_
and rhs_
respectively, and
use the pre-defined OP
to do the combinational evaluation.
template<typename OP, typename TA, typename TB, int etype, typename DType, PacketArch Arch>
class PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>, DType, Arch> {
public:
PacketPlan(const PacketPlan<TA, DType, Arch> &lhs, const PacketPlan<TB, DType, Arch> &rhs)
: lhs_(lhs), rhs_(rhs) {}
MSHADOW_CINLINE packet::Packet<DType, Arch> EvalPacket(index_t y, index_t x) const {
return packet::PacketOp<OP, DType, Arch>::Map(lhs_.EvalPacket(y, x), rhs_.EvalPacket(y, x));
}
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const {
return OP::Map(lhs_.Eval(y, x), rhs_.Eval(y, x));
}
private:
PacketPlan<TA, DType, Arch> lhs_;
PacketPlan<TB, DType, Arch> rhs_;
};
Unary PacketPlan¶
The usage of EvalPacket()
is to perform EvalPacket()
for src_
first, and
use the pre-defined OP
to do the compositional evaluation.
template<typename OP, typename TA, int etype, typename DType, PacketArch Arch>
class PacketPlan<UnaryMapExp<OP, TA, DType, etype>, DType, Arch> {
public:
PacketPlan(const PacketPlan<TA, DType, Arch> &src) : src_(src) {}
MSHADOW_CINLINE packet::Packet<DType> EvalPacket(index_t y, index_t x) const {
return packet::PacketOp<OP, DType, Arch>::Map(src_.EvalPacket(y, x));
}
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const {
return OP::Map(src_.Eval(y, x));
}
private:
PacketPlan<TA, DType, Arch> src_;
};
MapPacketPlan¶
RValueExp¶
The MapPacketPlan()
for RValueExp
is only to return a PacketPlan
initiated by the
subtype of RValueExp<T, DType>
, which is always a Tensor
.
template<PacketArch Arch, typename T, typename DType>
inline PacketPlan<T, DType, Arch> MakePacketPlan(const RValueExp<T, DType> &e) {
return PacketPlan<T, DType, Arch>(e.self());
}
ScalarExp¶
The MapPacketPlan()
for ScalarExp
is only to return a PacketPlan
initiated by the
member variable scalar_
:
template<PacketArch Arch, typename DType>
inline PacketPlan<ScalarExp<DType>, DType, Arch> MakePacketPlan(const ScalarExp<DType> &e) {
return PacketPlan<ScalarExp<DType>, DType, Arch>(e.scalar_);
}
BinaryMapExp¶
The MapPacketPlan()
for BinaryMapExp
is to recursively called MapPacketPlan()
for its
lhs
and rhs
at first stage, and then return the PacketPlan
initiated by the two results.
template<PacketArch Arch, typename OP, typename TA, typename TB, typename DType, int etype>
inline PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>, DType, Arch>
MakePacketPlan(const BinaryMapExp<OP, TA, TB, DType, etype> &e);
template<PacketArch Arch, typename OP, typename TA, typename TB, typename DType, int etype>
inline PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>, DType, Arch>
MakePacketPlan(const BinaryMapExp<OP, TA, TB, DType, etype> &e) {
return PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>,
DType, Arch>(MakePacketPlan<Arch>(e.lhs_), MakePacketPlan<Arch>(e.rhs_));
}
UnaryMapExp¶
The MapPacketPlan()
for UnaryMapExp
is to call MapPacketPlan()
of its input variable src
,
then construct the PacketPlan
for the return
template<PacketArch Arch, typename OP, typename TA, typename DType, int etype>
inline PacketPlan<UnaryMapExp<OP, TA, DType, etype>, DType, Arch>
MakePacketPlan(const UnaryMapExp<OP, TA, DType, etype> &e) {
return PacketPlan<UnaryMapExp<OP, TA, DType, etype>, DType, Arch>(MakePacketPlan<Arch>(e.src_));
}
Packet Check¶
The major usage of PacketCheck
is to make sure every related DType
is float
or double
,
and every related operator
is defined in the context of Packet
. Otherwise, the program
will back to the original C++ implement.
template<PacketArch Arch>
struct PacketCheck<float, Arch> {
static const bool kPass = true;
};
template<PacketArch Arch>
struct PacketCheck<double, Arch> {
static const bool kPass = true;
};
template<typename DType, PacketArch Arch>
struct PacketCheck<ScalarExp<DType>, Arch> {
static const bool kPass = PacketCheck<DType, Arch>::kPass;
};
template<int dim, typename DType, PacketArch Arch>
struct PacketCheck<Tensor<cpu, dim, DType>, Arch> {
static const bool kPass = PacketCheck<DType, Arch>::kPass;
};
// check both operator implementation and data type in Tensor
template<typename OP, typename TA, typename DType, int etype, PacketArch Arch>
struct PacketCheck<UnaryMapExp<OP, TA, DType, etype>, Arch> {
static const bool kPass = PacketCheck<TA, Arch>::kPass &&
packet::PacketOp<OP, DType, Arch>::kEnabled;
};
// check both operator implementation and data type in Tensor
template<typename OP, typename TA, typename TB, typename DType, int etype, PacketArch Arch>
struct PacketCheck< BinaryMapExp<OP, TA, TB, DType, etype>, Arch> {
static const bool kPass = packet::PacketOp<OP, DType, Arch>::kEnabled &&
PacketCheck<TA, Arch>::kPass && PacketCheck<TB, Arch>::kPass;
};
For some expressions, like TransposeExp()
, and scalars, like int
of built-in type
which is not implemented in Packet
, the PacketCheck
is automatically failed. As a result,
commmon C++ code will be used to evaluate such expression.
The following code makes sure the default setting is false
.
template<typename E, PacketArch Arch>
struct PacketCheck{
static const bool kPass = false;
};
Packet Align Check¶
PacketAlignCheck
is to check whether the data is aligned and the expressions
has been implemented.
Default¶
First, for some expressions that is not implemented, like TransposeExp()
, the
check should automatically fail and return the evaluation of expressions to their
original implementations. So following codes guarantee the automatic switch.
template<int dim, typename E, PacketArch Arch>
struct PacketAlignCheck {
inline static bool Check(const E &exp) {
return false;
}
};
Tensor¶
In the Check()
function, it checks whether the address is 16-byte aligned, and the stride_
is available for packet (can be divided by 4
for float
).
For now, I highly wonder the importance of `stride_` checking. Since we can always compute
the parts that can be divided by `4` and calculate the remaining as written by itself in
function `MapPacketPlan()`. However, the author just ignores it obviously.
The reason is remained to be checking !!!
template<int dim, typename DType, PacketArch Arch>
struct PacketAlignCheck<dim, Tensor<cpu, dim, DType>, Arch> {
inline static bool Check(const Tensor<cpu, dim, DType> &t) {
return packet::CheckAlign<Arch>(t.dptr_) &&
packet::CheckAlign<Arch>(t.stride_ * sizeof(DType));
}
};
ScalarExp¶
The ScalarExp
is naturally aligned, so we always return true
.
template<int dim, typename DType, PacketArch Arch>
struct PacketAlignCheck<dim, ScalarExp<DType>, Arch> {
inline static bool Check(const ScalarExp<DType> &exp) {
return true;
}
};
BinaryMapExp¶
It first recursively calls Check()
function of its lhs
and rhs
, and combine them
by &&
in the end.
template<int dim, typename OP, typename TA, typename TB,
typename DType, int etype, PacketArch Arch>
struct PacketAlignCheck<dim, BinaryMapExp<OP, TA, TB, DType, etype>, Arch> {
inline static bool Check(const BinaryMapExp<OP, TA, TB, DType, etype> &t) {
return PacketAlignCheck<dim, TA, Arch>::Check(t.lhs_) &&
PacketAlignCheck<dim, TB, Arch>::Check(t.rhs_);
}
};
UnaryMapExp¶
It just does Check()
for its variable src
template<int dim, typename OP, typename TA, typename DType, int etype, PacketArch Arch>
struct PacketAlignCheck<dim, UnaryMapExp<OP, TA, DType, etype>, Arch> {
inline static bool Check(const UnaryMapExp<OP, TA, DType, etype> &t) {
return PacketAlignCheck<dim, TA, Arch>::Check(t.src_);
}
};
MapPacketPlan¶
MapPacketPlan
finishes the evaluation and final assignment.
I wonder why the `Tensor` here is 2-D. One possible explanation is the `MapPlan()`
function in `./tensor_cpu-inl.h` is 2-D for the usage of transpose. But the transpose
is not written in the context of Packet. So either the transpose can be added or
just change it to 1-D ???
template<typename SV, typename E, int dim, typename DType, PacketArch Arch>
inline void MapPacketPlan(Tensor<cpu, dim, DType> _dst,
const expr::PacketPlan<E, DType, Arch>& plan) {
Tensor<cpu, 2, DType> dst = _dst.FlatTo2D();
const index_t xlen = packet::LowerAlign<DType, Arch>(dst.size(1));
for (index_t y = 0; y < dst.size(0); ++y) {
for (index_t x = 0; x < xlen; x += packet::Packet<DType, Arch>::kSize) {
packet::Saver<SV, DType, Arch>::Save(&dst[y][x], plan.EvalPacket(y, x));
}
for (index_t x = xlen; x < dst.size(1); ++x) {
SV::Save(dst[y][x], plan.Eval(y, x));
}
}
}
The code above implement a two-step evaluation strategy, but in current code, the
second step can not happen. This is due to the check of stride_
. Its necessary
is under question and remains to be checking.
Missing Explanations¶
void* pointer¶
The type void* is a special pointer type that can hold the address of any object.
Like any other pointer, a void* pointer holds an address, but the type of the object at that address is unknown.
size_t keyword¶
size_t
is a machine-specific unsigned type that is guaranteed to be large enough to hold
the size of any object in memory. The size_t
type is defined in the cstddef
header.
Missing Components¶
Packet::Sum()¶
For Plain packet:
MSHADOW_CINLINE DType Sum() const {
return data_;
}
For SSE2 packet:
MSHADOW_CINLINE float Sum() const {
__m128 ans = _mm_add_ps(data_, _mm_movehl_ps(data_, data_));
__m128 rst = _mm_add_ss(ans, _mm_shuffle_ps(ans, ans, 1));
#if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64)
return rst.m128_f32[0];
#else
float rr = _mm_cvtss_f32(rst);
return rr;
#endif
}
inline double Sum(void) const {
__m128d tmp = _mm_add_sd(data_, _mm_unpackhi_pd(data_, data_));
#if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64)
return tmp.m128d_f64[0];
#else
double ans = _mm_cvtsd_f64(tmp);
return ans;
#endif
}
MapPacketPlan for MakeTensorExp¶
this function will never be called, since the MakeTensorExp
type will not pass the Check() function.
So we tempararily ignore it.
template<PacketArch Arch, typename T, int dim, typename DType>
inline PacketPlan<T, DType, Arch>
MakePacketPlan(const MakeTensorExp<T, cpu, dim, DType> &e) {
return PacketPlan<T, DType, Arch>(e.real_self());
}
Tensor_blob.h¶
./tensor_blob.h
consists of common representation of arbitrary dimension tensor, which can be used to
transforme to normal fixed dimension tensor.
- TShape
- TBlob
TShape¶
TShape
is a dynamic shape class (not a template class) that can hold shape of arbitrary dimension.
The shape will be stored in data_stack_
when dimension is smaller than kStackCache
.
When it is bigger, it will be stored in data_heap_
.
Member Variables¶
kStackCache
: size of space in stack.ndim_
: number of dimensions of the shape.num_heap_allocated_
: number of cells allocated indata_heap_
.data_stack_[kStackCache]
: stack space used to store shape when dimension is small.*data_heap_
: space to store shape when dimension is big.
static const index_t kStackCache = 4;
index_t ndim_;
index_t num_heap_allocated_;
index_t data_stack_[kStackCache];
index_t *data_heap_;
Constructors¶
Default Constructor¶
The default constructor can be a good way to know which variable is required to be initialized.
In this case, we have to initiate ndim_
(kep variable
in the context), num_heap_allocated_
(to decide whether uses heap), and
data_heap_
.
Since kStackCache
is a static const, and data_stack_
is
automatically initialized, we can ignore them.
TShape()
: ndim_(0),
num_heap_allocated_(0),
data_heap_(NULL) {}
Explicit Constructor¶
The explicit constructor constructs an “all-one” TShape
with given dimensions.
explicit TShape(index_t ndim)
: ndim_(ndim) {
if (ndim_ <= kStackCache) {
data_heap_ = NULL;
num_heap_allocated_ = 0;
std::fill_n(data_stack_, ndim_, 1);
} else {
data_heap_ = new index_t[ndim_];
num_heap_allocated_ = ndim_;
std::fill_n(data_heap_, ndim_, 1);
}
}
Constructor from TShape¶
The idea is same as the explicit constructor. The only difference is it is an itentical
copy by using std::copy()
.
TShape(const TShape &s)
: ndim_(s.ndim_) {
if (ndim_ <= kStackCache) {
data_heap_ = NULL;
num_heap_allocated_ = 0;
std::copy(s.data_stack_, s.data_stack_ + ndim_, data_stack_);
} else {
data_heap_ = new index_t[ndim_];
num_heap_allocated_ = ndim_;
std::copy(s.data_heap_, s.data_heap_ + ndim_, data_heap_);
}
}
Constructor from RandomAccessIterator¶
The beginning of this constructor is same as a default constructor, the
difference lies in the calling of member function CopyFrom()
.
template<typename RandomAccessIterator>
TShape(RandomAccessIterator begin,
RandomAccessIterator end)
: ndim_(0),
num_heap_allocated_(0),
data_heap_(NULL) {
this->CopyFrom(begin, end);
}
The CopyFrom()
function first set the dimension of shape by calling SetDim()
.
Then, copy the data from range [first,end)
to data()
, which is determined by
ndim_
compared to kStackCache
.
template<typename RandomAccessIterator>
inline void CopyFrom(RandomAccessIterator begin,
RandomAccessIterator end) {
this->SetDim(end - begin);
std::copy(begin, end, data());
}
SetDim()
also choose to initiate data_heap_
or not by the value of ndim_
.
inline void SetDim(index_t dim) {
if (dim > kStackCache &&
dim > num_heap_allocated_) {
// data_heap_ can be NULL
delete [] data_heap_;
data_heap_ = new index_t[dim];
num_heap_allocated_ = dim;
}
ndim_ = dim;
}
inline index_t *data() {
return ndim_ <= kStackCache ? data_stack_ : data_heap_;
}
Move Constructor from TShape¶
According to the property of move constructor, we first transfer the member variables
from input arguments to the new TShape
. Then, the original pointer of s.data_heap_
is reset to NULL
.
TShape(TShape &&s)
: ndim_(s.ndim_),
num_heap_allocated_(s.num_heap_allocated_),
data_heap_(s.data_heap_) {
if (ndim_ <= kStackCache) {
std::copy(s.data_stack_, s.data_stack_ + ndim_, data_stack_);
}
// remove data heap space from s
s.data_heap_ = NULL;
}
Move Constructor from Shape¶
Similar to constructor from RandomAccessIterator
, the beginning of this constructor
is same as a default constructor, the difference lies in the calling of member function
CopyFrom()
.
The CopyFrom()
function takes in two iterators as its input. It first set the dimension
of shape by calling SetDim()
. Then, copy the data from range [first,end)
to data()
,
which is determined by ndim_
compared to kStackCache
.
template<int dim>
TShape(Shape<dim> &&s)
: ndim_(0),
num_heap_allocated_(0),
data_heap_(NULL) {
this->CopyFrom(s.shape_, s.shape_ + dim);
}
Destructor¶
Since data_heap_
is the only member variables with a pointer-related type, we explicitly
delete it in destructor.
~TShape() {
// data_heap_ can be NULL
delete [] data_heap_;
}
Overloaded Operators¶
Overloaded =
¶
Overloaded from Tshape
¶
It first sets the shape of itself to shape.ndim_
, then assigns the first address of data
(data_stack_
or data_heap_
) to a temp variable src
. At last, it copies the contents
pointed by src
to the data()
of itself.
inline TShape &operator=(const TShape &shape) {
this->SetDim(shape.ndim_);
const index_t *src = shape.data();
std::copy(src, src + ndim_, data());
return *this;
}
Overloaded from std::vector
¶
The CopyFrom()
function takes in two iterators as its input, which is suitable for
the two variables shape.begin()
and shape.end()
. It first set the dimension
of shape by calling SetDim()
. Then, copy the data from range [first,end)
to data()
,
which is determined by ndim_
compared to kStackCache
.
inline TShape &operator=(const std::vector<index_t> &shape) {
this->CopyFrom(shape.begin(), shape.end());
return *this;
}
Overloaded from Shape
¶
It first sets the shape of itself to dim
. Then, it definite a pointer pointing to
data_stack_
or data_heap_
by comparing dim
to kStackCache
. At last, it just
do a element-wise assignment from shape
to itself.
template<int dim>
inline TShape &operator=(const Shape<dim> &shape) {
this->SetDim(dim);
index_t *d = dim <= kStackCache ? data_stack_ : data_heap_;
for (int i = 0; i < dim; ++i) {
d[i] = shape[i];
}
return *this;
}
Overloaded []
¶
It returns the dimension by calling data()
to fetch the first
address, and uses operator []
to reach the value.
Is it safe to leave the bound check out ???
inline index_t &operator[](index_t i) {
return data()[i];
}
inline const index_t &operator[](index_t i) const {
return data()[i];
}
Overloaded ==
¶
Overloaded from TShape
¶
It returns whether two shape equals.
inline bool operator==(const TShape &s) const {
if (ndim_ != s.ndim_) return false;
if (ndim_ <= kStackCache) {
for (index_t i = 0; i < ndim_; ++i) {
if (data_stack_[i] != s.data_stack_[i]) return false;
}
} else {
for (index_t i = 0; i < ndim_; ++i) {
if (data_heap_[i] != s.data_heap_[i]) return false;
}
}
return true;
}
Overloaded from Shape
¶
It returns whether two shape equals.
template<int dim>
inline bool operator==(const Shape<dim> &s) const {
if (ndim_ != dim) return false;
const index_t *d = dim <= kStackCache ? data_stack_ : data_heap_;
for (index_t i = 0; i < dim; ++i) {
if (d[i] != s.shape_[i]) return false;
}
return true;
}
Overloaded !=
¶
Overloaded from TShape
¶
It returns whether two shape not equals.
inline bool operator!=(const TShape &s) const {
return !(*this == s);
}
Overloaded from Shape
¶
It returns whether two shape not equals.
template<int dim>
inline bool operator!=(const Shape<dim> &s) const {
return !(*this == s);
}
Overloaded from <<
¶
It outputs a python-style tuple as in the class Shape
.
inline std::ostream &operator<<(std::ostream &os, const TShape &shape) {
os << '(';
for (index_t i = 0; i < shape.ndim(); ++i) {
if (i != 0) os << ',';
os << shape[i];
}
// python style tuple
if (shape.ndim() == 1) os << ',';
os << ')';
return os;
}
Overloaded from >>
¶
It reads the input shape
from the istream and assigns it to a TShape
type.
There are two while
loops in the overloaded function for 1-D shape and multi-dimensional
shape respectively.
in the first while
loop, we first peek the next character in
a input stream. Then, we judge whether it is a digit or not.
if it is a digit, it means we have a shape with only one dimension.
Thus, we input it to a variable named idx
, set the ndim_
of
shape
to be 1
, and copy the value of idx
to data()
(Refer
to the definition of CopyFrom()
).
Otherwise, we use is.get()
to extract the character, but do not
assign it to anywhere. Again, we judge whether the input is a (
to support
multi-dimension. If it is, we break the while
loop and move to the
second part. If not, we judge whether it is a space
. If true, we
continue the process to see what is the next character. Otherwise,
we set the failbit
to disable the following istream
and return.
If we receive a (
in the first part, meaning we will receive the shape of a
multi-dimensional tensor, we move to the next part.
In the beginning, we define a index_t
type idx
to receive the elements of
each dimension, and a std::vector
to represent the complete shape.
In the second while
loop, we first fetch the next character by is>>idx
.
It automatically neglects any space
and check the success of read. It may set a
error bit if read fails and break the while
loop, e.g. input a char a
to idx
.
If the input is correct index_t
type, we push it into the vector, and recursively
get the next character until it is not a space
.
If next is a L
as the representation of long
type in Python, we do the fetch again
to receive next character.
If next is ,
, then we recursively peek the next character until it is not a space.
If the peeked variable is )
, we break the whole while
loop. Otherwise we return to
the start of while
loop.
At last, we copy tmp
to shape
, even there is a false input, e.g. (3,4,a)
. We will
receive a tmp
with value (3,4)
and similarly copy it to shape
.
inline std::istream &operator>>(std::istream &is, TShape &shape) {
// first part
while (true) {
char ch = is.peek();
if (isdigit(ch)) {
index_t idx;
if (is >> idx) {
shape.CopyFrom(&idx, &idx + 1);
}
return is;
}
is.get();
if (ch == '(') break;
if (!isspace(ch)) {
is.setstate(std::ios::failbit);
return is;
}
}
// second part
index_t idx;
std::vector<index_t> tmp;
while (is >> idx) {
tmp.push_back(idx);
char ch;
do {
ch = is.get();
} while (isspace(ch));
if (ch == 'L') {
ch = is.get();
}
if (ch == ',') {
while (true) {
ch = is.peek();
if (isspace(ch)) {
is.get(); continue;
}
if (ch == ')') {
is.get(); break;
}
break;
}
if (ch == ')') break;
} else if (ch == ')') {
break;
} else {
is.setstate(std::ios::failbit);
return is;
}
}
shape.CopyFrom(tmp.begin(), tmp.end());
return is;
}
Usage:
int main() {
TShape a;
cout << (cin >> a) << endl;
return 0;
}
// correct example
// 3 -> (3,)
// (3,5) -> (3,5)
// (3 , 5) -> (3,5)
// (3, 4L, 5) -> (3,4,5)
// incorrect example
// a -> wrong!
// (3,4,a) -> (3,4)
Member Functions¶
CopyFrom¶
The CopyFrom()
function first set the dimension of shape by calling SetDim()
with argument equaling the difference between iterators begin
and end
.
Then, copy the data from range [begin,end)
to data()
, which is determined by
ndim_
compared to kStackCache
.
template<typename RandomAccessIterator>
inline void CopyFrom(RandomAccessIterator begin,
RandomAccessIterator end) {
this->SetDim(end - begin);
std::copy(begin, end, data());
}
data¶
It returns the data content of the TShape
.
The reason that these two functions can be overloaded
is they are called according to the constness of their
corresponding variables. If a variable is const
, then
the const
version will be called, and vice versa.
Actually, it may just according to the implicit this
argument, by checking this
is a const
type or not.
inline const index_t *data() const {
return ndim_ <= kStackCache ? data_stack_ : data_heap_;
}
inline index_t *data() {
return ndim_ <= kStackCache ? data_stack_ : data_heap_;
}
ndim¶
It simply returns the private member variable ndim_
.
inline index_t ndim(void) const {
return ndim_;
}
Size¶
It returns the multiplication of the values from all dimensions.
Its return type is size_t
, which is a machine-related type that is large enough
to hold any size can be stored in memory.
inline size_t Size(void) const {
size_t size = 1;
const index_t *d = this->data();
for (index_t i = 0; i < ndim_; ++i) {
size *= d[i];
}
return size;
}
FlatTo2D¶
It flattens the higher dimension of TShape
to second dimension, returns a 2D shape
.
inline Shape<2> FlatTo2D(void) const {
Shape<2> s;
if (ndim_ == 0) return Shape2(0, 0);
const index_t *d = this->data();
s.shape_[1] = d[ndim_ - 1];
index_t ymax = 1;
for (index_t i = 1; i < ndim_; ++i) {
ymax *= d[i - 1];
}
s.shape_[0] = ymax;
return s;
}
FlatTo3D¶
It flattens the shape into three parts: [0, axis_begin)
, [axis_begin, axis_end]
, and (axis_end, ndim)
.
inline Shape<3> FlatTo3D(index_t axis_begin, index_t axis_end) const {
CHECK(axis_end >= axis_begin);
Shape<3> s;
if (ndim_ == 0) return Shape3(0, 0, 0);
const index_t *d = this->data();
s.shape_[0] = 1;
s.shape_[1] = 1;
s.shape_[2] = 1;
for (index_t i = 0; i < axis_begin; ++i) {
s.shape_[0] *= d[i];
}
for (index_t i = axis_begin; i <= axis_end; ++i) {
s.shape_[1] *= d[i];
}
for (index_t i = axis_end + 1; i < ndim_; ++i) {
s.shape_[2] *= d[i];
}
return s;
}
It flattens the axis before and after the specified axis, so it becomes 3D tensor.
inline Shape<3> FlatTo3D(index_t axis) const {
return FlatTo3D(axis, axis);
}
ProdShape¶
It returns product of shape in [dimstart,dimend)
.
inline index_t ProdShape(int dimstart, int dimend) const {
index_t num = 1;
const index_t *d = this->data();
for (int i = dimstart; i < dimend; ++i) {
num *= d[i];
}
return num;
}
get¶
It returns the class shape
, which is a component of Tensor
.
template<int dim>
inline Shape<dim> get(void) const {
CHECK_EQ(dim, ndim_) << "dimension do not match target dimension " << dim << " vs " << ndim_;
const index_t *d = this->data();
Shape<dim> s;
for (int i = 0; i < dim; ++i) {
s[i] = d[i];
}
return s;
}
SetDim (private
)¶
Not only does it set the value of ndim_
, but it decides the usage of data_stack_
or
data_heap_
.
inline void SetDim(index_t dim) {
if (dim > kStackCache &&
dim > num_heap_allocated_) {
// data_heap_ can be NULL
delete [] data_heap_;
data_heap_ = new index_t[dim];
num_heap_allocated_ = dim;
}
ndim_ = dim;
}
TBlob¶
Tensor blob class (TBlob
) that can be used to hold tensor of any dimension,
any device and any data type.
This is only a weak type that can be used to transfer data through interface.
TBlob
itself do not involve any arithmetic operations,
but it can be converted to Tensor
of fixed dimension for further operations.
Like Tensor
, this data structure is like a pointer class and do not
implicit allocated, de-allocate space.
This data structure can be helpful to hold tensors of different dimensions
and wait for further processing.
Member Variables¶
There are five member variables belonging to TBlob
.
dptr_
: pointer to the datashape_
: shape of the tensorTShape
stride_
: storing the stride information in x dimensiondev_mask_
: device mask of the corresponding devicetype_flag_
: typ flag of the tensor blob
void *dptr_;
TShape shape_;
index_t stride_;
int dev_mask_;
int type_flag_;
Constructor¶
Default Constructor¶
Default TBlob
is set with dptr_
to be NULL
, dev_mask_
to be the mask
of cpu
, and type_flag_
to be the flag of default_real_t
, which is actually
float
type.
TBlob(void)
: dptr_(NULL), dev_mask_(cpu::kDevMask),
type_flag_(DataType<default_real_t>::kFlag) {}
Constructor from TShape
¶
It constructs TBlob
from contiguous memory by setting the stride_
to be the
highest dimension of TShape
.
template<typename DType>
TBlob(DType *dptr,
const TShape &shape,
int dev_mask)
: dptr_(dptr), shape_(shape),
stride_(shape[shape.ndim() - 1]),
dev_mask_(dev_mask),
type_flag_(DataType<DType>::kFlag) {}
Constructor from TShape
with type¶
It constructs TBlob
from contiguous memory by setting the stride_
to be the
highest dimension of TShape
, and provides a user-defined type_flag
.
TBlob(void *dptr,
const TShape &shape,
int dev_mask,
int type_flag)
: dptr_(dptr), shape_(shape),
stride_(shape[shape.ndim() - 1]),
dev_mask_(dev_mask),
type_flag_(type_flag) {}
Constructor from Tensor
¶
It uses the overloaded assignment operator =
to construct TBlob
from Tensor
. The
detail of =
can be referred to following notes.
template<typename Device, int dim, typename DType>
TBlob(const Tensor<Device, dim, DType> &src) {
*this = src;
}
Overloaded Operators¶
Only the assignment operator is overloaded in class TBlob
. The rhs
should only be
the Tensor
type.
template<typename Device, int dim, typename DType>
inline TBlob
&operator=(const Tensor<Device, dim, DType> &src) {
dptr_ = src.dptr_;
shape_ = src.shape_;
stride_ = src.stride_;
dev_mask_ = Device::kDevMask;
type_flag_ = DataType<DType>::kFlag;
return *this;
}
Member Functions¶
CheckContiguous¶
It checks whether the stride_
equals to the highest dimension of shape_
.
inline bool CheckContiguous(void) const {
return shape_[shape_.ndim() - 1] == stride_;
}
FlatTo2D¶
It first checks the consistency of device and type between the desired return Tensor
and
TBlob
itself. Then, it calls the constructor of Tensor
.
template<typename Device, typename DType>
inline Tensor<Device, 2, DType> FlatTo2D(Stream<Device> *stream = NULL) const {
CHECK(Device::kDevMask == dev_mask_)
<< "TBlob.get: device type do not match specified type";
CHECK(DataType<DType>::kFlag == type_flag_)
<< "TBlob.get_with_shape: data type do not match specified type."
<< "Expected: " << type_flag_ << " v.s. given " << DataType<DType>::kFlag;
return Tensor<Device, 2, DType>(static_cast<DType*>(dptr_),
shape_.FlatTo2D(), stride_, stream);
}
ndim¶
It simply calls the ndim()
function, which is a member function of its member variable
shape_
with type TShape
.
inline int ndim(void) const {
return shape_.ndim();
}
size¶
It returns the size of i
-th dimension.
inline index_t size(index_t idx) const {
return shape_[idx];
}
Size¶
It returns the total number of elements in Tensor
.
inline index_t Size(void) const {
return shape_.Size();
}
get¶
It fetches the tensor, with respect to specific dimension dim
. if it do not
match the stored dimension, an error will be issued by the function get()
of
class TBlob
.
template<typename Device, int dim, typename DType>
inline Tensor<Device, dim, DType> get(Stream<Device> *stream = NULL) const {
CHECK(Device::kDevMask == dev_mask_)
<< "TBlob.get: device type do not match specified type";
CHECK(DataType<DType>::kFlag == type_flag_)
<< "TBlob.get_with_shape: data type do not match specified type."
<< "Expected: " << type_flag_ << " v.s. given " << DataType<DType>::kFlag;
return Tensor<Device, dim, DType>(static_cast<DType*>(dptr_),
shape_.get<dim>(),
stride_, stream);
}
get_with_shape¶
It fetches a tensor in given shape. If size do not match the stored size, an error will be issued.
It first checks the consistency of device and type between the desired return Tensor
and
TBlob
itself. Then, it checks whether the TBlob
is contiguous by comparing the stride_
to the highest dimension of shape_
. Moreover, it also compares the sizes of two shapes to
make sure the change of shape will not cause memory leak. At last, it calls the constructor
of Tensor
to create a new one according to the given shape
.
template<typename Device, int dim, typename DType>
inline Tensor<Device, dim, DType> get_with_shape(const Shape<dim> &shape,
Stream<Device> *stream = NULL) const {
CHECK(Device::kDevMask == dev_mask_)
<< "TBlob.get: device type do not match specified type";
CHECK(DataType<DType>::kFlag == type_flag_)
<< "TBlob.get_with_shape: data type do not match specified type."
<< "Expected: " << type_flag_ << " v.s. given " << DataType<DType>::kFlag;
CHECK_EQ(this->CheckContiguous(), true) << "TBlob.get_reshape: must be contiguous";
CHECK_EQ(this->shape_.Size(), shape.Size())
<< "TBlob.get_with_shape: new and old shape do not match total elements";
return Tensor<Device, dim, DType>(static_cast<DType*>(dptr_),
shape,
shape[dim - 1],
stream);
}
FlatTo3D¶
It flattens the tensor to 3
dimension by calling its member function FlatTo3D()
.
Its first version collapses the dimension before and after specified axis.
template<typename Device, typename DType>
inline Tensor<Device, 3, DType> FlatTo3D(int axis, Stream<Device> *stream = NULL) const {
return this->get_with_shape<Device, 3, DType>(
this->shape_.FlatTo3D(axis), stream);
}
Its second version collapses the dimension: [0, axis_begin)
, axis_begin, axis_end]
,
and (axis_end, ndim)
.
template<typename Device, typename DType>
inline Tensor<Device, 3, DType> FlatTo3D(int axis_begin, int axis_end,
Stream<Device> *stream = NULL) const {
return this->get_with_shape<Device, 3, DType>(
this->shape_.FlatTo3D(axis_begin, axis_end), stream);
}
Missing Explanation¶
Algorithm: std::fill_n
¶
The std::fill_n
is defined in <algorithm>
.
template<sclass OutputIt, class Size, class T >
OutputIt fill_n( OutputIt first, Size count, const T& value );
a possible implementation:
template<class OutputIt, class Size, class T>
OutputIt fill_n(OutputIt first, Size count, const T& value)
{
for (Size i = 0; i < count; i++) {
*first++ = value;
}
return first;
}
Assigns the given value
to the number of count
elements in the first
with the range beginning at first
if count>0
. Does nothing otherwise.
Algorithm: std::copy
¶
The std::copy
is defined in <algorithm>
.
template< class InputIt, class OutputIt >
OutputIt copy( InputIt first, InputIt last, OutputIt d_first );
a possible implementation:
template<class InputIt, class OutputIt>
OutputIt copy(InputIt first, InputIt last,
OutputIt d_first)
{
while (first != last) {
*d_first++ = *first++;
}
return d_first;
}
Copies all elements in the range [first, last)
to d_first
.
Move Constructor¶
The move constructor must ensure that the moved-from object is left in a state such that destroying that object will be harmless. In particular, once its resources are moved, the original object must no longer point to those moved resources—responsibility for those resources has been assumed by the newly created object.
Unlike the copy constructor, the move constructor does not allocate any new memory; it takes over the memory in the given argument. Having taken over the memory from its argument, the constructor body sets the pointers in the given object to nullptr.
IOStream: istream::peak
¶
int peak();
It returns the ASCII code of next character in the input sequence, without extracting it: The character is left as the next character to be extracted from the stream.
IOStream: istream::setstate
¶
void setstate (iostate state);
It modifies the current internal error state flags by combining the current flags with those in argument state (as if performing a bitwise OR operation).
IOStream: std::ios::failbit
¶
std::ios::failbit
represents logical error on i/o operation
Once an error has occurred, subsequent IO operations on that stream will fail.
Condition State of istream
¶
The easiest way to determine the state of a stream object is to use that object as a condition.
The while
condition checks the state of the stream returned from the >>
expression.
If that input operation succeeds, the state remains valid and the condition will
succeed. For example,
while (is >> idx) { // `>>` input a value to `idx`
do_some_thing();
}
Missing Explanation¶
Load and Save¶
These two functions are related to the functions in ./io.h
, and we leave their
explanations to the related parts.
template<typename TStream>
inline void Save(TStream *strm) const {
strm->Write(&ndim_, sizeof(ndim_));
strm->Write(data(), sizeof(index_t) * ndim_);
}
template<typename TStream>
inline bool Load(TStream *strm) {
if (strm->Read(&ndim_, sizeof(ndim_)) != sizeof(ndim_)) return false;
this->SetDim(ndim_);
size_t nread = sizeof(index_t) * ndim_;
if (strm->Read(data(), nread) != nread) return false;
return true;
}
/dmlc-core/parameter.h¶
Overview¶
[TOC]
parameter.h
provides lightweight utilities to do parameter setup and checking.
- Predefined enum and struct
- FieldAccessEntry
- FieldEntryBase
- FieldEntryNumeric
- FieldEntry
- ParamManager
- ParamManagerSingleton
- Parameter
- MACRO
1. Predefined enum and struct¶
1.1 ParamInitOption¶
ParamInitOption
is the option in parameter initialization.
kAllowUnknown
: allow unknown parameterskAllMatch
: need to match exact parameterskAllowHidde
: allow unmatched hidden field with format__*__
enum ParamInitOption {
kAllowUnknown,
kAllMatch,
kAllowHidde
};
1.2 ParamFieldInfo¶
ParamFieldInfo
provides information about a parameter field in string representations.
name
: name of the fieldtype
: type of the field in string formattype_info_str
: This includes the default value, enum constrain and typenamedescription
: detailed description of the type
struct ParamFieldInfo {
std::string name;
std::string type;
std::string type_info_str;
std::string description;
};
2. FieldAccessEntry¶
FieldAccessEntry
is an internal interface to help managge the parameters.
Each entry can be used to access one parameter in the struct
inherited from Parameter
.
Following is an overview of its member variables and functions, which are all defined as virtual function
class FieldAccessEntry {
public:
FieldAccessEntry()
: has_default_(false) {}
virtual ~FieldAccessEntry() {}
virtual void SetDefault(void *head) const = 0;
virtual void Set(void *head, const std::string &value) const = 0;
virtual void Check(void *head) const {}
virtual std::string GetStringValue(void *head) const = 0;
virtual ParamFieldInfo GetFieldInfo() const = 0;
protected:
bool has_default_;
size_t index_;
std::string key_;
std::string type_;
std::string description_;
virtual void PrintDefaultValueString(std::ostream &os) const = 0;
friend class ParamManager;
};
We add some brief introduction of the concepts in above code.
2.1 Pure Virtual Function¶
Pure virtual function is a special type of virtual function by adding = 0
in the back.
A class containing one or more pure virtual functions is called abstract class
. We cannot
declare instances from this class. It only serve as a base class
for its derived class
.
Moreover, unless all pure virtual functions are defined in the derived class
, we can still
not declare instances using the derived class
.
2.2 Virtual Destructor¶
When the base class
has virtual function, its destructor
must be
defined to be virtual
. If not, when deconstruct its child classes,
the system will call the destructor of base class
, which will lead
to the memory leak, since the variables owned by child classes
will not be deleted.
3. FieldEntryBase¶
template<typename TEntry, typename DType>
class FieldEntryBase : public FieldAccessEntry
FieldEntryBase
is a base class of all FieldEntry
class, but itself is inherited from
FieldAccessEntry
with all pure virtual function defined.
3.1 Member Variables¶
ptrdiff_t offset_;
DType default_value_;
// together with inherited ones
bool has_default_;
size_t index_;
std::string key_;
std::string type_;
std::string description_;
ptrdiff_t
, like size_t
(unsigned
), is a machine-related type (signed
), defined in cstddef
head file.
In the context of dmlc-core
, we use it to identify the offset of the address between the head
and
the target instance
.
3.2 Member Functions¶
3.2.1 public:
Init¶
Init()
function sets values for member variables key_
, type_
, and offset_
.
inline void Init(const std::string &key,
void *head, DType &ref) {
this->key_ = key;
if (this->type_.length() == 0) {
this->type_ = dmlc::type_name<DType>();
}
this->offset_ = ((char*)&ref) - ((char*)head);
}
The code this->type_.length() == 0
means the type_
is only set once and cannot change.
Moreover, the function dmlc::type_name<DType>()
is defined in ./type_traits.h
.
#define DMLC_DECLARE_TYPE_NAME(Type, Name) \
template<> \
inline const char* type_name<Type>() { \
return Name; \
}
DMLC_DECLARE_TYPE_NAME(float, "float");
DMLC_DECLARE_TYPE_NAME(double, "double");
DMLC_DECLARE_TYPE_NAME(int, "int");
DMLC_DECLARE_TYPE_NAME(uint32_t, "int (non-negative)");
DMLC_DECLARE_TYPE_NAME(uint64_t, "long (non-negative)");
DMLC_DECLARE_TYPE_NAME(std::string, "string");
DMLC_DECLARE_TYPE_NAME(bool, "boolean");
3.2.2 protected:
Get¶
Get()
function fetches the internal representation of parameters.
For example, if this entry corresponds field param.learning_rate
then Get(¶m)
will return reference to param.learning_rate
.
Its realization thanks to the offset_
between head
and the target
instance.
inline DType &Get(void *head) const {
return *(DType*)((char*)(head) + offset_);
}
First, it converts head
wity type void*
to char*
.
Second it adds the offset_
to head
.
Third, it converts head
added by offset_
to DType*
.
Last, it returns the value of head
with type DType*
.
Note, protected
variables or functions can be accessed by friend
or hild class
.
3.2.3 public:
Set¶
Set()
function set the target instance with value
. In its implementation,
it explicitly avoids space
in the value.
Important Note: the value returned by Get()
is DType
. Thanks to the overloaded operator
>>
in istringstream
, we can convert the string
type of is
to the corresponding DType
.
istream& operator>> (bool& val);
istream& operator>> (short& val);
istream& operator>> (unsigned short& val);
istream& operator>> (int& val);
istream& operator>> (unsigned int& val);
istream& operator>> (long& val);
istream& operator>> (unsigned long& val);
istream& operator>> (float& val);
istream& operator>> (double& val);
istream& operator>> (long double& val);
istream& operator>> (void*& val);
virtual void Set(void *head, const std::string &value) const {
std::istringstream is(value);
is >> this->Get(head);
// the following codes is designed for avoiding `space` in the value
if (!is.fail()) {
while (!is.eof()) {
int ch = is.get(); // `.get()` returns the next character of istringstream
if (ch == EOF) {
is.clear(); break; // `.clear()` clears all error flags in default
}
if (!isspace(ch)) { // `isspace()` is a C function to check space
is.setstate(std::ios::failbit); break; // if we run into space, we manually set the failbit
// so the program will go into the next `if()`
}
}
}
if (is.fail()) {
std::ostringstream os;
os << "Invalid Parameter format for " << key_
<< " expect " << type_ << " but value=\'" << value<< '\'';
throw dmlc::ParamError(os.str());
}
}
The declaration of explicit constructor of istringstream
is
explicit istringstream (const string& str, ios_base::openmode which = ios_base::in);
// str: A string object, whose content is copied.
3.2.4 public:
set_default¶
In default, class inherited from FieldEntryBase
do not have a default value.
set_default()
function changes it by setting has_default_
to be true
and
sets default_value_
defined by itself to the value of input variable. At last,
it returns itself to allow chaining.
inline TEntry &set_default(const DType &default_value) {
default_value_ = default_value;
has_default_ = true;
return this->self();
}
3.2.5 public:
self¶
self()
first change the type of pointer this
to the pointer type of its sub-class.
Then, it returns the pointed instance of the pointer.
inline TEntry &self() {
return *(static_cast<TEntry*>(this));
}
Note: a static_cast
is useful to perform a conversion that the compiler will not
generate automatically.
3.2.6 public:
SetDefault¶
Since the function set_default()
changes the flag has_default_
to be true
, we
are able to modify the target instance with the pre-set default_value_
. If it is
called before set_default()
, it will throw out a error.
virtual void SetDefault(void *head) const {
if (!has_default_) {
std::ostringstream os;
os << "Required parameter " << key_
<< " of " << type_ << " is not presented";
throw dmlc::ParamError(os.str());
} else {
this->Get(head) = default_value_;
}
}
3.2.7 public:
describe¶
It simply sets the self-defined member variables description_
to the input.
inline TEntry &describe(const std::string &description) {
description_ = description;
return this->self();
}
3.2.8 protected:
PrintValue¶
PrintValue()
simply prints the value with DType
.
virtual void PrintValue(std::ostream &os, DType value) const {
os << value;
}
3.2.9 protected:
PrintDefaultValueString¶
PrintDefaultValueString()
simply prints the default value.
virtual void PrintDefaultValueString(std::ostream &os) const {
PrintValue(os, default_value_);
}
3.2.10 public:
GetStringValue¶
GetStringValue()
simply prints the value it stores.
virtual std::string GetStringValue(void *head) const {
std::ostringstream os;
PrintValue(os, this->Get(head));
return os.str();
}
3.2.11 public:
GetFieldInfo¶
It simply sets information of the class into the four member variables in ParamFieldInfo
,
and returns it.
virtual ParamFieldInfo GetFieldInfo() const {
ParamFieldInfo info;
std::ostringstream os;
info.name = key_;
info.type = type_;
os << type_;
if (has_default_) {
os << ',' << " optional, default=";
PrintDefaultValueString(os);
} else {
os << ", required";
}
info.type_info_str = os.str();
info.description = description_;
return info;
}
4. FieldEntryNumeric¶
FieldEntryNumeric
is a base class for numeric types that have ranges. It is the
child class of FieldEntryBase
and grandchild class of FieldAccessEntry
.
template<typename TEntry, typename DType>
class FieldEntryNumeric : public FieldEntryBase<TEntry, DType> {};
4.1 Member Variables¶
bool has_begin_, has_end_;
DType begin_, end_;
// inherited from `FieldEntryBase`
ptrdiff_t offset_;
DType default_value_;
// inherited from `FieldAccessEntry`
bool has_default_;
size_t index_;
std::string key_;
std::string type_;
std::string description_;
4.2 Constructor¶
In default, a FieldEntryNumeric
type do not have range constraints.
FieldEntryNumeric()
: has_begin_(false), has_end_(false) {}
4.3 Member Functions¶
4.3.1 set_range¶
It simply sets the range of begin
and end
, and turns on the flag
has_begin_
and has_end_
.
virtual TEntry &set_range(DType begin, DType end) {
begin_ = begin; end_ = end;
has_begin_ = true; has_end_ = true;
return this->self();
}
4.3.2 set_lower_bound¶
It simply sets the begin_
and turns on the flag has_begin_
.
virtual TEntry &set_lower_bound(DType begin) {
begin_ = begin; has_begin_ = true;
return this->self();
}
4.3.3 Check¶
It does a simple checking of parameter constraints.
virtual void Check(void *head) const {
FieldEntryBase<TEntry, DType>::Check(head); // not implement
DType v = this->Get(head);
if (has_begin_ && has_end_) {
if (v < begin_ || v > end_) {
std::ostringstream os;
os << "value " << v << "for Parameter " << this->key_
<< " exceed bound [" << begin_ << ',' << end_ <<']';
throw dmlc::ParamError(os.str());
}
} else if (has_begin_ && v < begin_) {
std::ostringstream os;
os << "value " << v << "for Parameter " << this->key_
<< " should be greater equal to " << begin_;
throw dmlc::ParamError(os.str());
} else if (has_end_ && v > end_) {
std::ostringstream os;
os << "value " << v << "for Parameter " << this->key_
<< " should be smaller equal to " << end_;
throw dmlc::ParamError(os.str());
}
}
5. FieldEntry¶
FieldEntry
defines parsing and checking behavior of DType
.
This class can be specialized to implement specific behavior of more settings.
5.1 FieldEntry<typename DType>
¶
It is a general definition of FieldEntry, which only determines it inheriting from
a common FieldEntryBase
class or a specific FieldEntryNumeric
class for numeric.
template<typename DType>
class FieldEntry :
public IfThenElseType<dmlc::is_arithmetic<DType>::value,
FieldEntryNumeric<FieldEntry<DType>, DType>,
FieldEntryBase<FieldEntry<DType>, DType> >::Type {
};
Basically, it uses a trick to let FieldEntry
inherit from different classes.
Following codes will be helpful for understanding.
// all defined in `./type_traits.h`
template<typename T>
struct is_arithmetic {
#if DMLC_USE_CXX11
/*! \brief the value of the traits */
static const bool value = std::is_arithmetic<T>::value;
#else
/*! \brief the value of the traits */
static const bool value = (dmlc::is_integral<T>::value ||
dmlc::is_floating_point<T>::value);
#endif
};
template<typename Then, typename Else>
struct IfThenElseType<true, Then, Else> {
typedef Then Type;
};
template<typename Then, typename Else>
struct IfThenElseType<false, Then, Else> {
typedef Else Type;
};
5.2 FieldEntry<int>
¶
template<>
class FieldEntry<int> : public FieldEntryNumeric<FieldEntry<int>, int> {};
It specializes the definition of FieldEntry for int
type to implement some
specific behaviors.
5.2.1 Member Variables¶
bool is_enum_;
std::map<std::string, int> enum_map_;
std::map<int, std::string> enum_back_map_;
// inherited from `FieldEntryNumeric`
bool has_begin_, has_end_;
DType begin_, end_;
// inherited from `FieldEntryBase`
ptrdiff_t offset_;
DType default_value_;
// inherited from `FieldAccessEntry`
bool has_default_;
size_t index_;
std::string key_;
std::string type_;
std::string description_;
5.2.3 Member Functions¶
5.2.3.1 Set¶
It adds the consideration of enumeration case. If is_enum_
is true
,
it will first find the corresponding instance of value
and return its
iterator. Then, it will determine whether to set the value according to
the return. All in all, the actual set is still performed by its father
class.
virtual void Set(void *head, const std::string &value) const {
if (is_enum_) {
std::map<std::string, int>::const_iterator it = enum_map_.find(value);
std::ostringstream os;
if (it == enum_map_.end()) {
os << "Invalid Input: \'" << value;
os << "\', valid values are: ";
PrintEnums(os);
throw dmlc::ParamError(os.str());
} else {
os << it->second;
Parent::Set(head, os.str());
}
} else {
Parent::Set(head, value);
}
}
5.2.3.2 add_enum¶
It only works when the enum_map_.size()
is 0
and the key
in enum_map_
has not
been set, and value
in enum_back_map_
also has not been set.
Its workflow is set the corresponding relation between key
and value
directly and
reversely. Then, it will turn on the flag is_enum_
and return itself for chaining.
inline FieldEntry<int> &add_enum(const std::string &key, int value) {
if ((enum_map_.size() != 0 && enum_map_.count(key) != 0) || \
enum_back_map_.count(value) != 0) {
std::ostringstream os;
os << "Enum " << "(" << key << ": " << value << " exisit!" << ")\n";
os << "Enums: ";
for (std::map<std::string, int>::const_iterator it = enum_map_.begin();
it != enum_map_.end(); ++it) {
os << "(" << it->first << ": " << it->second << "), ";
}
throw dmlc::ParamError(os.str());
}
enum_map_[key] = value;
enum_back_map_[value] = key;
is_enum_ = true;
return this->self();
}
why not use auto here?
may not get the const_iterator?
why not use cbegin()
5.2.3.3 PrintValue¶
PrintValue()
directly print the input value of type int
if the flag is_enum_
is not on.
If the flag is on, it first check whether the value is in the enum_back_map_
. If it is, it
returns the value corresponded to value
in enum_back_map_
.
virtual void PrintValue(std::ostream &os, int value) const {
if (is_enum_) {
CHECK_NE(enum_back_map_.count(value), 0U)
<< "Value not found in enum declared";
os << enum_back_map_.at(value);
} else {
os << value;
}
}
5.2.3.4 PrintDefaultValueString¶
It simply call the function PrintValue()
to print its default_value_
.
virtual void PrintDefaultValueString(std::ostream &os) const {
os << '\'';
PrintValue(os, default_value_);
os << '\'';
}
5.2.3.5 PrintEnums¶
It simply output every string
type, which is the key in the enum_map_
.
inline void PrintEnums(std::ostream &os) const {
os << '{';
for (std::map<std::string, int>::const_iterator
it = enum_map_.begin(); it != enum_map_.end(); ++it) {
if (it != enum_map_.begin()) {
os << ", ";
}
os << "\'" << it->first << '\'';
}
os << '}';
}
5.2.3.6 GetFieldInfo¶
If is_enum_
is not on, it simply call GetFieldInfo()
of its parent class.
Otherwise, it perform its own codes. The only difference is the PrintEnums(os)
.
virtual ParamFieldInfo GetFieldInfo() const {
if (is_enum_) {
ParamFieldInfo info;
std::ostringstream os;
info.name = key_;
info.type = type_;
PrintEnums(os);
if (has_default_) {
os << ',' << "optional, default=";
PrintDefaultValueString(os);
} else {
os << ", required";
}
info.type_info_str = os.str();
info.description = description_;
return info;
} else {
return Parent::GetFieldInfo();
}
}
5.3 FieldEntry<std::string>
¶
It specializes the definition of FieldEntry
for std::string
type to implement some specific behaviors.
template<>
class FieldEntry<std::string>
: public FieldEntryBase<FieldEntry<std::string>, std::string> {};
5.3.1 Member Functions¶
5.3.1.1 Set¶
Since the Set()
function in its base class FieldEntryBase
has a design for avoiding space
in the value, it is naturally insuitable for a specific std::string
type. So it overloads the
Set()
by directly set the target instance as the input value
.
virtual void Set(void *head, const std::string &value) const {
this->Get(head) = value;
}
5.3.1.2 PrintDefaultValueString¶
It overloads the PrintDefaultValueString()
to add single quotes on its two sides.
virtual void PrintDefaultValueString(std::ostream &os) const { // NOLINT(*)
os << '\'' << default_value_ << '\'';
}
5.4 FieldEntry<bool>
¶
It specializes the definition of FieldEntry
for bool
type to implement some specific behaviors.
template<>
class FieldEntry<bool>
: public FieldEntryBase<FieldEntry<bool>, bool> {};
5.4.1 Member Functions¶
5.4.1.1 Set¶
It first define a string
with same size of value
. Then it transforms the characters
in value
to be lowercase and stores in the defined string. If the value is one of
true
, false
, 1
, and 0
, it will set the target instance as true
of false
accordingly. Otherwise, it will raise an error.
virtual void Set(void *head, const std::string &value) const {
std::string lower_case; lower_case.resize(value.length());
std::transform(value.begin(), value.end(), lower_case.begin(), ::tolower);
bool &ref = this->Get(head); // directly set the value by reference
if (lower_case == "true") {
ref = true;
} else if (lower_case == "false") {
ref = false;
} else if (lower_case == "1") {
ref = true;
} else if (lower_case == "0") {
ref = false;
} else {
std::ostringstream os;
os << "Invalid Parameter format for " << key_
<< " expect " << type_ << " but value=\'" << value<< '\'';
throw dmlc::ParamError(os.str());
}
}
5.4.1.2 PrintValue¶
Instead of directly output true
and false
as 1
and 0
, it outputs them as
a string
.
virtual void PrintValue(std::ostream &os, bool value) const {
if (value) {
os << "True";
} else {
os << "False";
}
}
6. ParamManager¶
class ParamManager {};
ParamManager
class is to handle parameter settings for each type in the structure.
An ParamManager
will be created for each parameter structures.
6.1 Member Variables¶
name_
: parameter holding struct nameentry_
: positional list of entriesentry_map_
: map from key to entry
std::string name_;
std::vector<FieldAccessEntry*> entry_;
std::map<std::string, FieldAccessEntry*> entry_map_;
6.2 Destructor¶
ParamManager
explicitly deletes every entry_
to trigger their destructors.
~ParamManager() {
for (size_t i = 0; i < entry_.size(); ++i) {
delete entry_[i];
}
}
6.3 Member Functions¶
6.3.1 AddEntry¶
AddEntry()
is an internal function to add entry to manager, and the manager will take over
the ownership of the entry.
It first sets the member variable index_
in FieldAccessEntry
to current entry_.size()
.
So, every FieldEntry
can be sorted according to the order of add. Second, if the key
is
already in the entry_map_
, it will raise an error. Otherwise, it will push the object
of type FieldAccessEntry
into its entry_
, and set the map connection between key
and
the object instance.
inline void AddEntry(const std::string &key, FieldAccessEntry *e) {
e->index_ = entry_.size();
if (entry_map_.count(key) != 0) {
LOG(FATAL) << "key " << key << " has already been registered in " << name_;
}
entry_.push_back(e);
entry_map_[key] = e;
}
6.3.2 AddAlias¶
AddAlias()
is to set an alias to existent entry. It first checks whether the field
name
has already registered, and alias
has not been registered. If both is true, it create a
new map connection between alias
and the object instance.
inline void AddAlias(const std::string& field, const std::string& alias) {
if (entry_map_.count(field) == 0) {
LOG(FATAL) << "key " << field << " has not been registered in " << name_;
}
if (entry_map_.count(alias) != 0) {
LOG(FATAL) << "Alias " << alias << " has already been registered in " << name_;
}
entry_map_[alias] = entry_map_[field];
}
6.3.3 set_name¶
set_name()
simply set its member variable name_
to be the name of struct
.
inline void set_name(const std::string &name) {
name_ = name;
}
6.3.4 GetFieldInfo¶
GetFieldInfo()
simply builds a vector
to store the field information of every
entry respectively and returns it.
inline std::vector<ParamFieldInfo> GetFieldInfo() const {
std::vector<ParamFieldInfo> ret(entry_.size());
for (size_t i = 0; i < entry_.size(); ++i) {
ret[i] = entry_[i]->GetFieldInfo();
}
return ret;
}
6.3.5 PrintDocString¶
PrintDocString()
is designed for readible docstring.
For every component in entry_
, it first fetches its corresponding ParamFieldInfo
, and
output its member variable name
and type_info_str
. If it has description
, that will
also be output.
inline void PrintDocString(std::ostream &os) const {
for (size_t i = 0; i < entry_.size(); ++i) {
ParamFieldInfo info = entry_[i]->GetFieldInfo();
os << info.name << " : " << info.type_info_str << '\n';
if (info.description.length() != 0) {
os << " " << info.description << '\n';
}
}
}
6.3.6 GetDict¶
It gets the internal parameters and push them into a vector of pairs. The first
value in
the pair is the name of the instance, and the second
value is the content of the internal
parameters been converted to string
.
inline std::vector<std::pair<std::string, std::string> > GetDict(void * head) const {
std::vector<std::pair<std::string, std::string> > ret;
for (std::map<std::string, FieldAccessEntry*>::const_iterator
it = entry_map_.begin(); it != entry_map_.end(); ++it) {
ret.push_back(std::make_pair(it->first, it->second->GetStringValue(head)));
}
return ret;
}
6.3.7 Find¶
It finds the entry in entry_map_
by the key
value.
inline FieldAccessEntry *Find(const std::string &key) const {
std::map<std::string, FieldAccessEntry*>::const_iterator it =
entry_map_.find(key);
if (it == entry_map_.end()) return NULL;
return it->second;
}
6.3.8 RunInit¶
It does the initialization to every entry stored, by the contents inside of RandomAccessIterator
.
It first checks whether the key
inside of it->first
has been registered in entry_map_
.
If it is, it fetches the class FieldEntry
corresponding to the key
, and set its actual value
to be it->second
. Then, it will call Check()
to examine the satisfication of constraints, and
insert the processed FieldEntry
into selected_args
.
Otherwise, it will check several extra conditions to determine whether keep the program running or raise an error. We will leave the discussion until we meet the case that uses them.
After the initialization, it checks every component inside entry_map_
, if there are components
not being initialized, the function SetDefault()
will be called.
template<typename RandomAccessIterator>
inline void RunInit(void *head,
RandomAccessIterator begin,
RandomAccessIterator end,
std::vector<std::pair<std::string, std::string> > *unknown_args,
parameter::ParamInitOption option) const {
std::set<FieldAccessEntry*> selected_args;
for (RandomAccessIterator it = begin; it != end; ++it) {
FieldAccessEntry *e = Find(it->first);
if (e != NULL) {
e->Set(head, it->second);
e->Check(head);
selected_args.insert(e);
} else {
if (unknown_args != NULL) {
unknown_args->push_back(*it);
} else {
if (option != parameter::kAllowUnknown) {
if (option == parameter::kAllowHidden &&
it->first.length() > 4 &&
it->first.find("__") == 0 &&
it->first.rfind("__") == it->first.length()-2) {
continue;
}
std::ostringstream os;
os << "Cannot find argument \'" << it->first << "\', Possible Arguments:\n";
os << "----------------\n";
PrintDocString(os);
throw dmlc::ParamError(os.str());
}
}
}
}
for (std::map<std::string, FieldAccessEntry*>::const_iterator it = entry_map_.begin();
it != entry_map_.end(); ++it) {
if (selected_args.count(it->second) == 0) {
it->second->SetDefault(head);
}
}
}
7. ParamManagerSingleton¶
In the execution, the typename PType
will be replaced by the type of use-defined struct
.
In fact, the process of its initialization is actually also the initialization process for
ParamManager
.
It first defines a param
with PType
. Then, call the member function __DECLARE__()
of PType
with pointer this
passing into it. Inside the __DECLARE__
, we do the initialization of PType
by manipulating the member variable manager
with type ParamManager
through this
pointer.
At last, it set the name of the manager
according to the name of the user-defined struct
.
template<typename PType>
struct ParamManagerSingleton {
ParamManager manager;
explicit ParamManagerSingleton(const std::string ¶m_name) {
PType param;
param.__DECLARE__(this);
manager.set_name(param_name);
}
};
The detailed discussion of this class should be combined with the following contents.
8. Macro¶
In ./parameter.h
we introduce several macros to declare parameter.
8.1 DMLC_DECLARE_PARAMETER¶
DMLC_DECLARE_PARAMETER
is used inside the struct
as a declaration or definition of
member functions.
The usage of DMLC_DECLARE_PARAMETER
is firstly declare a function named __MANAGER__()
,
then define a function named __DECLARE__()
whose input variable is with type
ParamManagerSingleton
.
#define DMLC_DECLARE_PARAMETER(PType) \
static ::dmlc::parameter::ParamManager *__MANAGER__(); \
inline void __DECLARE__(::dmlc::parameter::ParamManagerSingleton<PType> *manager) \
8.2 DMLC_DECLARE_FIELD¶
DMLC_DECLARE_FIELD
should follow right behind DMLC_DECLARE_PARAMETER
, it call
the DECLARE()
function of class Parameter
to register parameter.
#define DMLC_DECLARE_FIELD(FieldName) this->DECLARE(manager, #FieldName, FieldName)
8.3 DMLC_DECLARE_ALIAS¶
DMLC_DECLARE_ALIAS
is also used inside the struct
to store parameters. It always
appears after DMLC_DECLARE_PARAMETER
and DMLC_DECLARE_FIELD
to set alias of
defined parameters by calling member function AddAlias()
of class Parameter
.
#define DMLC_DECLARE_ALIAS(FieldName, AliasName) manager->manager.AddAlias(#FieldName, #AliasName)
8.4 DMLC_REGISTER_PARAMETER¶
This macro need to be put in a source file so that the __MANAGER__()
function declared inside
the struct
can be defined.
The usage of DMLC_REGISTER_PARAMETER
is to
define the function __MANAGER__()
declared in the struct
, and
call the defined function __MANAGER__()
(I wonder the reason of its existence).
#define DMLC_REGISTER_PARAMETER(PType) \
::dmlc::parameter::ParamManager *PType::__MANAGER__() { \
static ::dmlc::parameter::ParamManagerSingleton<PType> inst(#PType); \
return &inst.manager; \
} \
static DMLC_ATTRIBUTE_UNUSED ::dmlc::parameter::ParamManager& \
__make__ ## PType ## ParamManager__ = \
(*PType::__MANAGER__()) \
We indicates __make__ ## PType ## ParamManager__
will never be used in the
context. It only triggers the function __MANAGER__()
and accpets its return.
9. Parameter¶
Parameter
is the base type that every parameter struct should inheritate from.
The following code is a complete example to setup parameters. Note: It is
important for every parameter struct
inherited from class Parameter<PType>
,
where PType
is itself.
struct Param : public dmlc::Parameter<Param> {
float learning_rate;
int num_hidden;
std::string name;
// declare parameters in header file
DMLC_DECLARE_PARAMETER(Param) {
DMLC_DECLARE_FIELD(num_hidden).set_range(0, 1000);
DMLC_DECLARE_FIELD(learning_rate).set_default(0.01f);
DMLC_DECLARE_FIELD(name).set_default("hello");
}
};
// register it in cc file
DMLC_REGISTER_PARAMETER(Param);
The only difference between a normal struct
is that we will need to
declare all the fields
, as well as their default value or constraints.
9.1 Member Functions¶
9.1.1 protected:
DECLARE¶
DECLARE()
function is the most important member function of Parameter
.
Everytime we want to manipulate the stored parameter inside struct
, it will
be called.
Inside of it, it constructs and inits a new instance with type FieldEntry<DType>
.
The DType
is determined by the type of its input variable ref
. At last, it
add the entry into the ParamManager
.
template<typename DType>
inline parameter::FieldEntry<DType>& DECLARE(parameter::ParamManagerSingleton<PType> *manager,
const std::string &key, DType &ref) {
parameter::FieldEntry<DType> *e = new parameter::FieldEntry<DType>();
e->Init(key, this->head(), ref);
manager->manager.AddEntry(key, e);
return *e;
}
At here, we can conclude the execution process of the calling PType::__MANAGER__()
.
- (1) It initializes a new
ParamManagerSingleton
. Inside of the constructor, a temporalstruct
will be built. And the__DECLARE__()
function of the temporalstruct
will be called to initialize a newFieldEntry<DType>
added as a new entry of theParamManager
. - (2) The
ParamManager
will be returned for further execution of thestruct
likeInit()
,__DICT__()
or__DOC__()
.
9.1.2 public:
Init¶
It initializes the parameter by keyword arguments, stored in a container, which can
be a map
or a vector
containing pair
. Inside this function, parameter struct
will bo initialized, and every parameter will be checked and error will be thrown
if something is wrong.
template<typename Container>
inline void Init(const Container &kwargs,
parameter::ParamInitOption option = parameter::kAllowHidden) {
PType::__MANAGER__()->RunInit(static_cast<PType*>(this),
kwargs.begin(), kwargs.end(),
NULL,
option);
}
Followe the example before, the usage of Init()
function is:
int main() {
MyParam param;
std::vector<std::pair<std::string, std::string> > param_data = {
{"num_hidden", "100"},
{"learning_rate", "0.1f"},
{"name", "MyNet"}
};
// set the parameters
param.Init(param_data);
return 0;
}
The execution process is as follows. First, it calls the __MANAGER__()
function of
the struct
.
ParamManager *PType::__MANAGER__() {
static ::dmlc::parameter::ParamManagerSingleton<PType> inst(#PType);
return &inst.manager;
}
Inside the __MANAGER__()
, a ParamManagerSingleton
will be created, whose constructor
will call the member function __DECLARE__()
of the struct
. Inside struct, every parameter
related default value or constraints will all be set. Then, it returns the member variable
manager
with type ParamManager
.
Second, the member function RunInit()
of returned manager
will be called. We only consider
the case that all parameter are correctly set, so all parameters are set inside the RunInit()
referred to its description in this notebook.
Let us summarize the usage of `./parameter.h`. It is major difference to normal structure is it
introduce more information into the struct like default value, constraints, and descriptions.
Indeed, the parameter is stored in the same way as usual. But, the difference is the way to
manipulate them. Everytime we want to manipulate them, a temporal `ParamManager` will be created
to handle it.
Once we finish our task, we will run into the end of function `Init()`, which will automatically
trigger the destructor of `ParamManager`.
9.1.3 private:
head¶
head()
returns head pointer of child structure
inline PType *head() const {
return static_cast<PType*>(const_cast<Parameter<PType>*>(this));
}
9.1.4 public:
_DICT_¶
It gets the internal parameters and push them into a vector of pairs.
The first
value in the pair
is the name of the instance, and the
second
value is the content of the internal parameters been converted to string
.
Then, it use range constructor
to initialize a map
.
Note of range constructor, We can also initialize an associative container from a range of values, so long as those values can be converted to the type of the container.
inline std::map<std::string, std::string> __DICT__() const {
std::vector<std::pair<std::string, std::string> > vec
= PType::__MANAGER__()->GetDict(this->head());
return std::map<std::string, std::string>(vec.begin(), vec.end());
}
9.1.5 public:
_FIELDS_¶
It simply return the result of function GetFieldInfo()
of a ParamManager
type.
inline static std::vector<ParamFieldInfo> __FIELDS__() {
return PType::__MANAGER__()->GetFieldInfo();
}
9.1.6 public:
_DOC_¶
It simply output the result of function PrintDocString()
of a ParamManager
type.
inline static std::string __DOC__() {
std::ostringstream os;
PType::__MANAGER__()->PrintDocString(os);
return os.str();
}
Missing Explanation¶
std::string::length()¶
size_t length() const;
std::string::length()
returns the length of the string, in terms of bytes. e.g.
#include <iostream>
#include <string>
int main ()
{
std::string str ("Test string");
std::cout << "The size of str is " << str.length() << " bytes.\n";
return 0;
}
// output: The size of str is 11 bytes
map::find()¶
map::find(k)
searches the container for an element with a key
equivalent to k and returns an iterator to it if found, otherwise
it returns an iterator to map::end.
map::size()¶
map::size()
returns the number of elements in the map container.
map::count()¶
map::count(k)
searches the container for elements with a key equivalent
to k
and returns the number of matches.
Because all elements in a map container are unique, the
function can only return 1
(if the element is found) or
0
(otherwise).
map::at()¶
std::at(k)
returns a reference to the mapped value of the element
identified with key k
.
If k
does not match the key of any element in the container,
the function throws an out_of_range
exception.
std::resize()¶
void resize (size_t n);
void resize (size_t n, char c);
std::resize()
resizes the string to a length of n
characters.
If n
is smaller than the current string length, the current value
is shortened to its first n
character, removing the characters beyond the n
-th.
If n
is greater than the current string length, the current content is extended by
inserting at the end as many characters as needed to reach a size of n
. If c
is
specified, the new elements are initialized as copies of c
, otherwise, they are
value-initialized characters (null characters).
std::transform()¶
template< class InputIt, class OutputIt, class UnaryOperation >
OutputIt transform(InputIt first1, InputIt last1, OutputIt d_first, UnaryOperation unary_op);
std::transform()
applies the given function to a range and stores the result in another range, beginning at d_first
.
The unary operation unary_op
is applied to the range defined by [first1, last1)
.
std::tolower()¶
std::tolower()
converts the given character to lowercase according to the character conversion rules.
Special Character in #define
¶
##
: is to combine two operatees, e.g.#define conn(x,y) x##y
will resultint n = conn(123,456)
asn = 123456
#@
: is to add single quotes, e.g.#define ToChar(x) #@x
will resultchar a = ToChar(1)
asa = '1'
#
: is to add double quotes, e.g.#define ToString(x) #x
will resultchar* str = ToString(123)
asstr = "123"
DMLC_ATTRIBUTE_UNUSED¶
Normally, the compiler warns if a variable is declared but is never referenced.
__attribute__((unused))
informs the compiler that you expect a variable to be unused and
tells it not to issue a warning if it is not used. So, in dmlc-core
, we define
#if defined(__GNUC__)
#define DMLC_ATTRIBUTE_UNUSED __attribute__((unused))
#else
#define DMLC_ATTRIBUTE_UNUSED
#endif
Missing Component¶
Parameter::InitAllowUnknown¶
We will consider it later, when we meet it
template<typename Container>
inline std::vector<std::pair<std::string, std::string> >
InitAllowUnknown(const Container &kwargs) {
std::vector<std::pair<std::string, std::string> > unknown;
PType::__MANAGER__()->RunInit(static_cast<PType*>(this),
kwargs.begin(), kwargs.end(),
&unknown, parameter::kAllowUnknown);
return unknown;
}
Parameter::Save¶
We will leave it to the discussion of ./json.h
inline void Save(dmlc::JSONWriter *writer) const {
writer->Write(this->__DICT__());
}
Parameter::Load¶
We will leave it to the discussion of ./json.h
inline void Load(dmlc::JSONReader *reader) {
std::map<std::string, std::string> kwargs;
reader->Read(&kwargs);
this->Init(kwargs);
}
FieldEntry<optional >¶
Currently, we have no idea of its usage.
template<>
class FieldEntry<optional<int> > : public FieldEntryBase<FieldEntry<optional<int> >, optional<int> > {
public:
// construct
FieldEntry<optional<int> >() : is_enum_(false) {}
// parent
typedef FieldEntryBase<FieldEntry<optional<int> >, optional<int> > Parent;
// override set
virtual void Set(void *head, const std::string &value) const {
if (is_enum_ && value != "None") {
std::map<std::string, int>::const_iterator it = enum_map_.find(value);
std::ostringstream os;
if (it == enum_map_.end()) {
os << "Invalid Input: \'" << value;
os << "\', valid values are: ";
PrintEnums(os);
throw dmlc::ParamError(os.str());
} else {
os << it->second;
Parent::Set(head, os.str());
}
} else {
Parent::Set(head, value);
}
}
virtual ParamFieldInfo GetFieldInfo() const {
if (is_enum_) {
ParamFieldInfo info;
std::ostringstream os;
info.name = key_;
info.type = type_;
PrintEnums(os);
if (has_default_) {
os << ',' << "optional, default=";
PrintDefaultValueString(os);
} else {
os << ", required";
}
info.type_info_str = os.str();
info.description = description_;
return info;
} else {
return Parent::GetFieldInfo();
}
}
// add enum
inline FieldEntry<optional<int> > &add_enum(const std::string &key, int value) {
CHECK_NE(key, "None") << "None is reserved for empty optional<int>";
if ((enum_map_.size() != 0 && enum_map_.count(key) != 0) || \
enum_back_map_.count(value) != 0) {
std::ostringstream os;
os << "Enum " << "(" << key << ": " << value << " exisit!" << ")\n";
os << "Enums: ";
for (std::map<std::string, int>::const_iterator it = enum_map_.begin();
it != enum_map_.end(); ++it) {
os << "(" << it->first << ": " << it->second << "), ";
}
throw dmlc::ParamError(os.str());
}
enum_map_[key] = value;
enum_back_map_[value] = key;
is_enum_ = true;
return this->self();
}
protected:
// enum flag
bool is_enum_;
// enum map
std::map<std::string, int> enum_map_;
// enum map
std::map<int, std::string> enum_back_map_;
// override print behavior
virtual void PrintDefaultValueString(std::ostream &os) const { // NOLINT(*)
os << '\'';
PrintValue(os, default_value_);
os << '\'';
}
// override print default
virtual void PrintValue(std::ostream &os, optional<int> value) const { // NOLINT(*)
if (is_enum_) {
if (!value) {
os << "None";
} else {
CHECK_NE(enum_back_map_.count(value.value()), 0)
<< "Value not found in enum declared";
os << enum_back_map_.at(value.value());
}
} else {
os << value;
}
}
private:
inline void PrintEnums(std::ostream &os) const { // NOLINT(*)
os << "{None";
for (std::map<std::string, int>::const_iterator
it = enum_map_.begin(); it != enum_map_.end(); ++it) {
os << ", ";
os << "\'" << it->first << '\'';
}
os << '}';
}
};