OpenVDB 10.0.1
Loading...
Searching...
No Matches
CudaDeviceBuffer.h
Go to the documentation of this file.
1// Copyright Contributors to the OpenVDB Project
2// SPDX-License-Identifier: MPL-2.0
3
4/*!
5 \file CudaDeviceBuffer.h
6
7 \author Ken Museth
8
9 \date January 8, 2020
10
11 \brief Implements a simple CUDA allocator!
12
13 CudaDeviceBuffer - a class for simple cuda buffer allocation and management
14*/
15
16#ifndef NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED
17#define NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED
18
19#include "HostBuffer.h" // for BufferTraits
20
21#include <cuda_runtime_api.h> // for cudaMalloc/cudaMallocManaged/cudaFree
22
23#if defined(DEBUG) || defined(_DEBUG)
24 static inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true)
25 {
26 if (code != cudaSuccess) {
27 fprintf(stderr, "CUDA Runtime Error: %s %s %d\n", cudaGetErrorString(code), file, line);
28 if (abort) exit(code);
29 }
30 }
31 static inline void ptrAssert(void* ptr, const char* msg, const char* file, int line, bool abort = true)
32 {
33 if (ptr == nullptr) {
34 fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line);
35 if (abort) exit(1);
36 }
37 if (uint64_t(ptr) % NANOVDB_DATA_ALIGNMENT) {
38 fprintf(stderr, "Pointer misalignment error: %s %s %d\n", msg, file, line);
39 if (abort) exit(1);
40 }
41 }
42#else
43 static inline void gpuAssert(cudaError_t, const char*, int, bool = true){}
44 static inline void ptrAssert(void*, const char*, const char*, int, bool = true){}
45#endif
46
47// Convenience function for checking CUDA runtime API results
48// can be wrapped around any runtime API call. No-op in release builds.
49#define cudaCheck(ans) \
50 { \
51 gpuAssert((ans), __FILE__, __LINE__); \
52 }
53
54#define checkPtr(ptr, msg) \
55 { \
56 ptrAssert((ptr), (msg), __FILE__, __LINE__); \
57 }
58
59namespace nanovdb {
60
61// ----------------------------> CudaDeviceBuffer <--------------------------------------
62
63/// @brief Simple memory buffer using un-managed pinned host memory when compiled with NVCC.
64/// Obviously this class is making explicit used of CUDA so replace it with your own memory
65/// allocator if you are not using CUDA.
66/// @note While CUDA's pinned host memory allows for asynchronous memory copy between host and device
67/// it is significantly slower then cached (un-pinned) memory on the host.
69{
70 uint64_t mSize; // total number of bytes for the NanoVDB grid.
71 uint8_t *mCpuData, *mGpuData; // raw buffer for the NanoVDB grid.
72
73public:
75 : mSize(0)
76 , mCpuData(nullptr)
77 , mGpuData(nullptr)
78 {
79 this->init(size);
80 }
81 /// @brief Disallow copy-construction
83 /// @brief Move copy-constructor
85 : mSize(other.mSize)
86 , mCpuData(other.mCpuData)
87 , mGpuData(other.mGpuData)
88 {
89 other.mSize = 0;
90 other.mCpuData = nullptr;
91 other.mGpuData = nullptr;
92 }
93 /// @brief Disallow copy assignment operation
95 /// @brief Move copy assignment operation
97 {
98 clear();
99 mSize = other.mSize;
100 mCpuData = other.mCpuData;
101 mGpuData = other.mGpuData;
102 other.mSize = 0;
103 other.mCpuData = nullptr;
104 other.mGpuData = nullptr;
105 return *this;
106 }
107 /// @brief Destructor frees memory on both the host and device
108 ~CudaDeviceBuffer() { this->clear(); };
109
110 void init(uint64_t size);
111
112 // @brief Retuns a pointer to the raw memory buffer managed by this allocator.
113 ///
114 /// @warning Note that the pointer can be NULL is the allocator was not initialized!
115 uint8_t* data() const { return mCpuData; }
116 uint8_t* deviceData() const { return mGpuData; }
117
118 /// @brief Copy grid from the CPU/host to the GPU/device. If @c sync is false the memory copy is asynchronous!
119 ///
120 /// @note This will allocate memory on the GPU/device if it is not already allocated
121 void deviceUpload(void* stream = 0, bool sync = true) const;
122
123 /// @brief Copy grid from the GPU/device to the CPU/host. If @c sync is false the memory copy is asynchronous!
124 void deviceDownload(void* stream = 0, bool sync = true) const;
125
126 /// @brief Returns the size in bytes of the raw memory buffer managed by this allocator.
127 uint64_t size() const { return mSize; }
128
129 /// @brief Returns true if this allocator is empty, i.e. has no allocated memory
130 bool empty() const { return mSize == 0; }
131
132 /// @brief De-allocate all memory managed by this allocator and set all pointer to NULL
133 void clear();
134
136
137}; // CudaDeviceBuffer class
138
139template<>
141{
142 static const bool hasDeviceDual = true;
143};
144
145// --------------------------> Implementations below <------------------------------------
146
151
153{
154 if (size == mSize)
155 return;
156 if (mSize > 0)
157 this->clear();
158 if (size == 0)
159 return;
160 mSize = size;
161 cudaCheck(cudaMallocHost((void**)&mCpuData, size)); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned
162 checkPtr(mCpuData, "failed to allocate host data");
163} // CudaDeviceBuffer::init
164
165inline void CudaDeviceBuffer::deviceUpload(void* stream, bool sync) const
166{
167 checkPtr(mCpuData, "uninitialized cpu data");
168 if (mGpuData == nullptr)
169 cudaCheck(cudaMalloc((void**)&mGpuData, mSize)); // un-managed memory on the device, always 32B aligned!
170 checkPtr(mGpuData, "uninitialized gpu data");
171 cudaCheck(cudaMemcpyAsync(mGpuData, mCpuData, mSize, cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream)));
172 if (sync)
174} // CudaDeviceBuffer::gpuUpload
175
176inline void CudaDeviceBuffer::deviceDownload(void* stream, bool sync) const
177{
178 checkPtr(mCpuData, "uninitialized cpu data");
179 checkPtr(mGpuData, "uninitialized gpu data");
180 cudaCheck(cudaMemcpyAsync(mCpuData, mGpuData, mSize, cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream)));
181 if (sync)
183} // CudaDeviceBuffer::gpuDownload
184
186{
187 if (mGpuData)
188 cudaCheck(cudaFree(mGpuData));
189 if (mCpuData)
190 cudaCheck(cudaFreeHost(mCpuData));
191 mCpuData = mGpuData = nullptr;
192 mSize = 0;
193} // CudaDeviceBuffer::clear
194
195} // namespace nanovdb
196
197#endif // end of NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED
#define cudaCheck(ans)
Definition CudaDeviceBuffer.h:49
static void ptrAssert(void *, const char *, const char *, int, bool=true)
Definition CudaDeviceBuffer.h:44
static void gpuAssert(cudaError_t, const char *, int, bool=true)
Definition CudaDeviceBuffer.h:43
#define checkPtr(ptr, msg)
Definition CudaDeviceBuffer.h:54
HostBuffer - a buffer that contains a shared or private bump pool to either externally or internally ...
#define NANOVDB_DATA_ALIGNMENT
Definition NanoVDB.h:137
Simple memory buffer using un-managed pinned host memory when compiled with NVCC. Obviously this clas...
Definition CudaDeviceBuffer.h:69
CudaDeviceBuffer & operator=(CudaDeviceBuffer &&other) noexcept
Move copy assignment operation.
Definition CudaDeviceBuffer.h:96
void init(uint64_t size)
Definition CudaDeviceBuffer.h:152
CudaDeviceBuffer(uint64_t size=0)
Definition CudaDeviceBuffer.h:74
void deviceDownload(void *stream=0, bool sync=true) const
Copy grid from the GPU/device to the CPU/host. If sync is false the memory copy is asynchronous!
Definition CudaDeviceBuffer.h:176
uint8_t * data() const
Definition CudaDeviceBuffer.h:115
CudaDeviceBuffer(CudaDeviceBuffer &&other) noexcept
Move copy-constructor.
Definition CudaDeviceBuffer.h:84
bool empty() const
Returns true if this allocator is empty, i.e. has no allocated memory.
Definition CudaDeviceBuffer.h:130
uint64_t size() const
Returns the size in bytes of the raw memory buffer managed by this allocator.
Definition CudaDeviceBuffer.h:127
~CudaDeviceBuffer()
Destructor frees memory on both the host and device.
Definition CudaDeviceBuffer.h:108
uint8_t * deviceData() const
Definition CudaDeviceBuffer.h:116
void deviceUpload(void *stream=0, bool sync=true) const
Copy grid from the CPU/host to the GPU/device. If sync is false the memory copy is asynchronous!
Definition CudaDeviceBuffer.h:165
static CudaDeviceBuffer create(uint64_t size, const CudaDeviceBuffer *context=nullptr)
Definition CudaDeviceBuffer.h:147
void clear()
De-allocate all memory managed by this allocator and set all pointer to NULL.
Definition CudaDeviceBuffer.h:185
CudaDeviceBuffer & operator=(const CudaDeviceBuffer &)=delete
Disallow copy assignment operation.
CudaDeviceBuffer(const CudaDeviceBuffer &)=delete
Disallow copy-construction.
Definition DenseGrid.h:402
Definition NanoVDB.h:208
Definition HostBuffer.h:100
static const bool hasDeviceDual
Definition HostBuffer.h:101