Open3D (C++ API)  0.15.1
SlabHashBackend.h
Go to the documentation of this file.
1// ----------------------------------------------------------------------------
2// - Open3D: www.open3d.org -
3// ----------------------------------------------------------------------------
4// The MIT License (MIT)
5//
6// Copyright (c) 2018-2021 www.open3d.org
7//
8// Permission is hereby granted, free of charge, to any person obtaining a copy
9// of this software and associated documentation files (the "Software"), to deal
10// in the Software without restriction, including without limitation the rights
11// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12// copies of the Software, and to permit persons to whom the Software is
13// furnished to do so, subject to the following conditions:
14//
15// The above copyright notice and this permission notice shall be included in
16// all copies or substantial portions of the Software.
17//
18// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24// IN THE SOFTWARE.
25// ----------------------------------------------------------------------------
26
27#pragma once
28
29#include <memory>
30
35
36namespace open3d {
37namespace core {
38template <typename Key, typename Hash, typename Eq>
40public:
41 SlabHashBackend(int64_t init_capacity,
42 int64_t key_dsize,
43 const std::vector<int64_t>& value_dsizes,
44 const Device& device);
45
47
48 void Reserve(int64_t capacity) override;
49
50 void Insert(const void* input_keys,
51 const std::vector<const void*>& input_values_soa,
52 buf_index_t* output_buf_indices,
53 bool* output_masks,
54 int64_t count) override;
55
56 void Find(const void* input_keys,
57 buf_index_t* output_buf_indices,
58 bool* output_masks,
59 int64_t count) override;
60
61 void Erase(const void* input_keys,
62 bool* output_masks,
63 int64_t count) override;
64
65 int64_t GetActiveIndices(buf_index_t* output_indices) override;
66 void Clear() override;
67
68 int64_t Size() const override;
69 int64_t GetBucketCount() const override;
70 std::vector<int64_t> BucketSizes() const override;
71 float LoadFactor() const override;
72
74
75 void Allocate(int64_t capacity) override;
76 void Free() override;
77
78protected:
82
84 std::shared_ptr<SlabNodeManager> node_mgr_;
85
87};
88
89template <typename Key, typename Hash, typename Eq>
91 int64_t init_capacity,
92 int64_t key_dsize,
93 const std::vector<int64_t>& value_dsizes,
94 const Device& device)
95 : DeviceHashBackend(init_capacity, key_dsize, value_dsizes, device) {
96 Allocate(init_capacity);
97}
98
99template <typename Key, typename Hash, typename Eq>
101 Free();
102}
103
104template <typename Key, typename Hash, typename Eq>
106
107template <typename Key, typename Hash, typename Eq>
108void SlabHashBackend<Key, Hash, Eq>::Find(const void* input_keys,
109 buf_index_t* output_buf_indices,
110 bool* output_masks,
111 int64_t count) {
112 if (count == 0) return;
113
114 OPEN3D_CUDA_CHECK(cudaMemset(output_masks, 0, sizeof(bool) * count));
116 OPEN3D_CUDA_CHECK(cudaGetLastError());
117
118 const int64_t num_blocks =
119 (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
120 FindKernel<<<num_blocks, kThreadsPerBlock, 0, core::cuda::GetStream()>>>(
121 impl_, input_keys, output_buf_indices, output_masks, count);
123 OPEN3D_CUDA_CHECK(cudaGetLastError());
124}
125
126template <typename Key, typename Hash, typename Eq>
127void SlabHashBackend<Key, Hash, Eq>::Erase(const void* input_keys,
128 bool* output_masks,
129 int64_t count) {
130 if (count == 0) return;
131
132 OPEN3D_CUDA_CHECK(cudaMemset(output_masks, 0, sizeof(bool) * count));
134 OPEN3D_CUDA_CHECK(cudaGetLastError());
135 auto buf_indices = static_cast<buf_index_t*>(
136 MemoryManager::Malloc(sizeof(buf_index_t) * count, this->device_));
137
138 const int64_t num_blocks =
139 (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
140 EraseKernelPass0<<<num_blocks, kThreadsPerBlock, 0,
141 core::cuda::GetStream()>>>(
142 impl_, input_keys, buf_indices, output_masks, count);
143 EraseKernelPass1<<<num_blocks, kThreadsPerBlock, 0,
144 core::cuda::GetStream()>>>(impl_, buf_indices,
145 output_masks, count);
147 OPEN3D_CUDA_CHECK(cudaGetLastError());
148
149 MemoryManager::Free(buf_indices, this->device_);
150}
151
152template <typename Key, typename Hash, typename Eq>
154 buf_index_t* output_buf_indices) {
155 uint32_t* count = static_cast<uint32_t*>(
156 MemoryManager::Malloc(sizeof(uint32_t), this->device_));
157 OPEN3D_CUDA_CHECK(cudaMemset(count, 0, sizeof(uint32_t)));
158
160 OPEN3D_CUDA_CHECK(cudaGetLastError());
161
162 const int64_t num_blocks =
163 (impl_.bucket_count_ * kWarpSize + kThreadsPerBlock - 1) /
164 kThreadsPerBlock;
165 GetActiveIndicesKernel<<<num_blocks, kThreadsPerBlock, 0,
166 core::cuda::GetStream()>>>(
167 impl_, output_buf_indices, count);
169 OPEN3D_CUDA_CHECK(cudaGetLastError());
170
171 uint32_t ret;
172 MemoryManager::MemcpyToHost(&ret, count, this->device_, sizeof(uint32_t));
173 MemoryManager::Free(count, this->device_);
174
175 return static_cast<int64_t>(ret);
176}
177
178template <typename Key, typename Hash, typename Eq>
180 // Clear the heap
181 this->buffer_->ResetHeap();
182
183 // Clear the linked list heads
184 OPEN3D_CUDA_CHECK(cudaMemset(impl_.bucket_list_head_, 0xFF,
185 sizeof(Slab) * this->bucket_count_));
187 OPEN3D_CUDA_CHECK(cudaGetLastError());
188
189 // Clear the linked list nodes
190 node_mgr_->Reset();
191}
192
193template <typename Key, typename Hash, typename Eq>
195 return this->buffer_->GetHeapTopIndex();
196}
197
198template <typename Key, typename Hash, typename Eq>
200 return bucket_count_;
201}
202
203template <typename Key, typename Hash, typename Eq>
205 thrust::device_vector<int64_t> elems_per_bucket(impl_.bucket_count_);
206 thrust::fill(elems_per_bucket.begin(), elems_per_bucket.end(), 0);
207
208 const int64_t num_blocks =
209 (impl_.buffer_accessor_.capacity_ + kThreadsPerBlock - 1) /
210 kThreadsPerBlock;
211 CountElemsPerBucketKernel<<<num_blocks, kThreadsPerBlock, 0,
212 core::cuda::GetStream()>>>(
213 impl_, thrust::raw_pointer_cast(elems_per_bucket.data()));
215 OPEN3D_CUDA_CHECK(cudaGetLastError());
216
217 std::vector<int64_t> result(impl_.bucket_count_);
218 thrust::copy(elems_per_bucket.begin(), elems_per_bucket.end(),
219 result.begin());
220 return result;
221}
222
223template <typename Key, typename Hash, typename Eq>
225 return float(Size()) / float(this->bucket_count_);
226}
227
228template <typename Key, typename Hash, typename Eq>
230 const void* input_keys,
231 const std::vector<const void*>& input_values_soa,
232 buf_index_t* output_buf_indices,
233 bool* output_masks,
234 int64_t count) {
235 if (count == 0) return;
236
239 int prev_heap_top = this->buffer_->GetHeapTopIndex();
240 *thrust::device_ptr<int>(impl_.buffer_accessor_.heap_top_) =
241 prev_heap_top + count;
242
243 const int64_t num_blocks =
244 (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
245 InsertKernelPass0<<<num_blocks, kThreadsPerBlock, 0,
246 core::cuda::GetStream()>>>(
247 impl_, input_keys, output_buf_indices, prev_heap_top, count);
248 InsertKernelPass1<<<num_blocks, kThreadsPerBlock, 0,
249 core::cuda::GetStream()>>>(
250 impl_, input_keys, output_buf_indices, output_masks, count);
251
252 thrust::device_vector<const void*> input_values_soa_device(
253 input_values_soa.begin(), input_values_soa.end());
254
255 int64_t n_values = input_values_soa.size();
256 const void* const* ptr_input_values_soa =
257 thrust::raw_pointer_cast(input_values_soa_device.data());
258 DISPATCH_DIVISOR_SIZE_TO_BLOCK_T(
259 impl_.buffer_accessor_.common_block_size_, [&]() {
260 InsertKernelPass2<Key, Hash, Eq, block_t>
261 <<<num_blocks, kThreadsPerBlock, 0,
262 core::cuda::GetStream()>>>(
263 impl_, ptr_input_values_soa, output_buf_indices,
264 output_masks, count, n_values);
265 });
267 OPEN3D_CUDA_CHECK(cudaGetLastError());
268}
269
270template <typename Key, typename Hash, typename Eq>
272 this->bucket_count_ = capacity * 2;
273 this->capacity_ = capacity;
274
275 // Allocate buffer for key values.
276 this->buffer_ = std::make_shared<HashBackendBuffer>(
277 this->capacity_, this->key_dsize_, this->value_dsizes_,
278 this->device_);
279 buffer_accessor_.Setup(*this->buffer_);
280
281 // Allocate buffer for linked list nodes.
282 node_mgr_ = std::make_shared<SlabNodeManager>(this->device_);
283
284 // Allocate linked list heads.
285 impl_.bucket_list_head_ = static_cast<Slab*>(MemoryManager::Malloc(
286 sizeof(Slab) * this->bucket_count_, this->device_));
287 OPEN3D_CUDA_CHECK(cudaMemset(impl_.bucket_list_head_, 0xFF,
288 sizeof(Slab) * this->bucket_count_));
290 OPEN3D_CUDA_CHECK(cudaGetLastError());
291
292 impl_.Setup(this->bucket_count_, node_mgr_->impl_, buffer_accessor_);
293}
294
295template <typename Key, typename Hash, typename Eq>
297 buffer_accessor_.Shutdown(this->device_);
298 MemoryManager::Free(impl_.bucket_list_head_, this->device_);
299}
300} // namespace core
301} // namespace open3d
Common CUDA utilities.
#define OPEN3D_CUDA_CHECK(err)
Definition: CUDAUtils.h:66
Definition: CUDAHashBackendBufferAccessor.h:43
Definition: DeviceHashBackend.h:39
Definition: Device.h:39
static void MemcpyToHost(void *host_ptr, const void *src_ptr, const Device &src_device, size_t num_bytes)
Same as Memcpy, but with host (CPU:0) as default dst_device.
Definition: MemoryManager.cpp:94
static void * Malloc(size_t byte_size, const Device &device)
Definition: MemoryManager.cpp:41
static void Free(void *ptr, const Device &device)
Frees previously allocated memory at address ptr on device device.
Definition: MemoryManager.cpp:47
Definition: SlabHashBackend.h:39
SlabHashBackend(int64_t init_capacity, int64_t key_dsize, const std::vector< int64_t > &value_dsizes, const Device &device)
Definition: SlabHashBackend.h:90
void Free() override
Definition: SlabHashBackend.h:296
CUDAHashBackendBufferAccessor buffer_accessor_
Definition: SlabHashBackend.h:83
void Allocate(int64_t capacity) override
Definition: SlabHashBackend.h:271
~SlabHashBackend()
Definition: SlabHashBackend.h:100
float LoadFactor() const override
Get the current load factor, defined as size / bucket count.
Definition: SlabHashBackend.h:224
std::shared_ptr< SlabNodeManager > node_mgr_
Definition: SlabHashBackend.h:84
int64_t GetActiveIndices(buf_index_t *output_indices) override
Parallel collect all iterators in the hash table.
Definition: SlabHashBackend.h:153
SlabHashBackendImpl< Key, Hash, Eq > impl_
Definition: SlabHashBackend.h:81
void Insert(const void *input_keys, const std::vector< const void * > &input_values_soa, buf_index_t *output_buf_indices, bool *output_masks, int64_t count) override
Parallel insert contiguous arrays of keys and values.
Definition: SlabHashBackend.h:229
int64_t bucket_count_
Definition: SlabHashBackend.h:86
int64_t Size() const override
Get the size (number of valid entries) of the hash map.
Definition: SlabHashBackend.h:194
int64_t GetBucketCount() const override
Get the number of buckets of the hash map.
Definition: SlabHashBackend.h:199
void Reserve(int64_t capacity) override
Definition: SlabHashBackend.h:105
void Clear() override
Clear stored map without reallocating memory.
Definition: SlabHashBackend.h:179
std::vector< int64_t > BucketSizes() const override
Get the number of entries per bucket.
Definition: SlabHashBackend.h:204
SlabHashBackendImpl< Key, Hash, Eq > GetImpl()
Definition: SlabHashBackend.h:73
void Find(const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count) override
Parallel find a contiguous array of keys.
Definition: SlabHashBackend.h:108
void Erase(const void *input_keys, bool *output_masks, int64_t count) override
Parallel erase a contiguous array of keys.
Definition: SlabHashBackend.h:127
Definition: SlabHashBackendImpl.h:64
Definition: SlabNodeManager.h:58
int count
Definition: FilePCD.cpp:61
void Synchronize()
Definition: CUDAUtils.cpp:78
__global__ void InsertKernelPass1(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:512
__global__ void InsertKernelPass0(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, int heap_counter_prev, int64_t count)
Kernels.
Definition: SlabHashBackendImpl.h:493
uint32_t buf_index_t
Definition: HashBackendBuffer.h:63
__global__ void EraseKernelPass1(SlabHashBackendImpl< Key, Hash, Eq > impl, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:655
__global__ void FindKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:581
__global__ void EraseKernelPass0(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:620
__global__ void GetActiveIndicesKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, buf_index_t *output_buf_indices, uint32_t *output_count)
Definition: SlabHashBackendImpl.h:666
__global__ void CountElemsPerBucketKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, int64_t *bucket_elem_counts)
Definition: SlabHashBackendImpl.h:706
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:567
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle float
Definition: K4aPlugin.cpp:479
Definition: PinholeCameraIntrinsic.cpp:35