环境

编译通过的版本

各章思维导图

mindmap
  root((Getting Started))
    Installation
      Python env
      TVM build (LLVM, CUDA)
      MXNet
    Vector Add
      NumPy impl
      TVM compute
      Schedule
      Build & Run
    Neural Network Inference
      Pretrained model (ResNet18)
      Relay IR
      Compilation (LLVM)
      Save/Load Module
    Remote Execution
      RPC server
      Cross-compilation
      Run on remote device
mindmap
  root((Expressions for Operators))
    Data Types
      float32
      int8
      cast/astype
    Shapes
      te.var
      multi-dim tensors
    Index & Shape Expressions
      Transpose
      Reshape
      Slicing
    Reduction
      te.reduce_axis
      te.sum, te.max, te.min
      comm_reducer
    Conditional
      if_then_else
    Truth Values
      all
      any
mindmap
  root((Common Operators))
    Broadcast Add
    MatMul
    Conv2D
    Depthwise Conv
    Pooling
      MaxPool
      AvgPool
    BatchNorm
mindmap
  root((CPU Operator Optimizations))
    CPU Architecture
      Cache
      SIMD
      Pipeline
    Function Call Overhead
    Vector Add
    Broadcast Add
    MatMul
      Loop blocking
      Vectorization
    Convolution
      Packed layout
    Pooling
    BatchNorm
mindmap
  root((GPU Operator Optimizations))
    GPU Architecture
      Warp
      SM
      Shared Memory
    Vector Add
    Broadcast Add
    MatMul
      Thread tiling
      Shared memory
    Conv2D
    Depthwise Conv
    Pooling
    BatchNorm

TVM编译

sudo apt-get install -y -q \
        git \
        cmake \
        build-essential \
        libtinfo-dev \
        libffi-dev \
        zlib1g-dev \
        llvm-15-dev \
        libclang-15-dev \
        ninja-build

sudo apt install nvidia-cuda-toolkit

git clone --recursive https://github.com/apache/tvm tvm-src
mkdir build; cd build

cp ../cmake/config.cmake .
echo "set(USE_LLVM \"llvm-config-15 --ignore-libllvm --link-static\")" >> config.cmake
sed -i 's/# set(USE_CUDA OFF)/set(USE_CUDA ON)/g' config.cmake
set(USE_GTEST OFF)

cmake ../
cmake --build . --parallel $(nproc)

d2ltvm相关手册

结论