1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
|
// Copyright (c) 2018, ETH Zurich and UNC Chapel Hill.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
// its contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: Johannes L. Schoenberger (jsch at inf.ethz.ch)
#ifndef COLMAP_SRC_MVS_CUDA_FLIP_H_
#define COLMAP_SRC_MVS_CUDA_FLIP_H_
#include <cuda_runtime.h>
namespace colmap {
namespace mvs {
// Flip the input matrix horizontally.
template <typename T>
void CudaFlipHorizontal(const T* input, T* output, const int width,
const int height, const int pitch_input,
const int pitch_output);
////////////////////////////////////////////////////////////////////////////////
// Implementation
////////////////////////////////////////////////////////////////////////////////
#ifdef __CUDACC__
// TILE_DIM_FLIP must divide by BLOCK_ROWS. Do not change these values.
#define TILE_DIM_FLIP 32
#define BLOCK_ROWS_FLIP 8
namespace internal {
template <typename T>
__global__ void CudaFlipHorizontalKernel(T* output_data, const T* input_data,
const int width, const int height,
const int input_pitch,
const int output_pitch) {
int x_index = blockIdx.x * TILE_DIM_FLIP + threadIdx.x;
const int y_index = blockIdx.y * TILE_DIM_FLIP + threadIdx.y;
__shared__ T tile[TILE_DIM_FLIP][TILE_DIM_FLIP + 1];
const int tile_x = min(threadIdx.x, width - 1 - blockIdx.x * TILE_DIM_FLIP);
const int tile_y = min(threadIdx.y, height - 1 - blockIdx.y * TILE_DIM_FLIP);
for (int i = 0; i < TILE_DIM_FLIP; i += BLOCK_ROWS_FLIP) {
const int x = min(x_index, width - 1);
const int y = min(y_index, height - i - 1);
const int index = y * input_pitch + x + i * input_pitch;
tile[tile_y + i][tile_x] = input_data[index];
}
__syncthreads();
x_index = width - 1 - (blockIdx.x * TILE_DIM_FLIP + threadIdx.x);
if (x_index < width) {
const int index = x_index + y_index * output_pitch;
for (int i = 0; i < TILE_DIM_FLIP; i += BLOCK_ROWS_FLIP) {
if (y_index + i < height) {
output_data[index + i * output_pitch] =
tile[threadIdx.y + i][threadIdx.x];
}
}
}
}
} // namespace internal
template <typename T>
void CudaFlipHorizontal(const T* input, T* output, const int width,
const int height, const int pitch_input,
const int pitch_output) {
dim3 block_dim(TILE_DIM_FLIP, BLOCK_ROWS_FLIP, 1);
dim3 grid_dim;
grid_dim.x = (width - 1) / TILE_DIM_FLIP + 1;
grid_dim.y = (height - 1) / TILE_DIM_FLIP + 1;
internal::CudaFlipHorizontalKernel<<<grid_dim, block_dim>>>(
output, input, width, height, pitch_input / sizeof(T),
pitch_output / sizeof(T));
}
#undef TILE_DIM_FLIP
#undef BLOCK_ROWS_FLIP
#endif // __CUDACC__
} // namespace mvs
} // namespace colmap
#endif // COLMAP_SRC_MVS_CUDA_FLIP_H_
|