Point Cloud Library (PCL) 1.12.1
NCVAlg.hpp
1/*
2 * Software License Agreement (BSD License)
3 *
4 * Point Cloud Library (PCL) - www.pointclouds.org
5 * Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
6 * Third party copyrights are property of their respective owners.
7 *
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * * Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials provided
19 * with the distribution.
20 * * Neither the name of Willow Garage, Inc. nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
28 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 *
37 * $Id: $
38 * Ported to PCL by Koen Buys : Attention Work in progress!
39 */
40
41#ifndef _ncv_alg_hpp_
42#define _ncv_alg_hpp_
43
44#include "NCV.hpp"
45
46
47template <class T>
48static void swap(T &p1, T &p2)
49{
50 T tmp = p1;
51 p1 = p2;
52 p2 = tmp;
53}
54
55
56template<typename T>
57static T divUp(T a, T b)
58{
59 return (a + b - 1) / b;
60}
61
62
63template<typename T>
65{
66 static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
67 {
68 //Works only for integral types. If you see compiler error here, then you have to specify how to copy your object as a set of integral fields.
69 *dst = *src;
70 }
71 static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
72 {
73 in1out += in2;
74 }
75};
76
77
78template<typename T>
80{
81 static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
82 {
83 //Works only for integral types. If you see compiler error here, then you have to specify how to copy your object as a set of integral fields.
84 *dst = *src;
85 }
86 static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
87 {
88 in1out = in1out > in2 ? in2 : in1out;
89 }
90};
91
92
93template<typename T>
95{
96 static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
97 {
98 //Works only for integral types. If you see compiler error here, then you have to specify how to copy your object as a set of integral fields.
99 *dst = *src;
100 }
101 static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
102 {
103 in1out = in1out > in2 ? in1out : in2;
104 }
105};
106
107
108template<typename Tdata, class Tfunc, Ncv32u nThreads>
109static __device__ Tdata subReduce(Tdata threadElem)
110{
111 Tfunc functor;
112
113 __shared__ Tdata _reduceArr[nThreads];
114 volatile Tdata *reduceArr = _reduceArr;
115 functor.assign(reduceArr + threadIdx.x, &threadElem);
116 __syncthreads();
117
118 if (nThreads >= 256 && threadIdx.x < 128)
119 {
120 functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 128]);
121 }
122 __syncthreads();
123
124 if (nThreads >= 128 && threadIdx.x < 64)
125 {
126 functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 64]);
127 }
128 __syncthreads();
129
130 if (threadIdx.x < 32)
131 {
132 if (nThreads >= 64)
133 {
134 functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 32]);
135 }
136 if (nThreads >= 32 && threadIdx.x < 16)
137 {
138 functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 16]);
139 functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 8]);
140 functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 4]);
141 functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 2]);
142 functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 1]);
143 }
144 }
145
146 __syncthreads();
147 Tdata reduceRes;
148 functor.assign(&reduceRes, reduceArr);
149 return reduceRes;
150}
151
152
153#endif //_ncv_alg_hpp_
__host__ __device__ void swap(float &a, float &b)
Definition: eigen.h:155
static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
Definition: NCVAlg.hpp:71
static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
Definition: NCVAlg.hpp:66
static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
Definition: NCVAlg.hpp:96
static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
Definition: NCVAlg.hpp:101
static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
Definition: NCVAlg.hpp:86
static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
Definition: NCVAlg.hpp:81