|
|
Line 1: |
Line 1: |
| <seo title="NVIDIA CUDA Memory Management | CUDA Memory Management | CUDA" titlemode="replace" keywords="GStreamer, Linux SDK, Linux BSP, Embedded Linux, Device Drivers, NVIDIA, Xilinx, TI, NXP, Freescale, Embedded Linux driver development, Linux Software development, Embedded Linux SDK, Embedded Linux Application development, GStreamer Multimedia Framework,NVIDIA CUDA, CUDA Memory,CUDA, CUDA Memory Management, NVIDIA CUDA Memory,CUDA memory management, Jetson AGX Xavier, Jetson Xavier." description="This wiki is a brief summary of the CUDA memory management programming concepts for Jetson TX2 and Xavier boards."></seo>
| | {{#seo: |
| | | |title=NVIDIA CUDA Memory Management |
| | |title_mode=replace |
| | |description={{{description|This wiki is a brief summary of the CUDA memory management programming concepts for Jetson TX2 and Xavier boards.}}} |
| | }} |
| {{NVIDIA Pref Partner logo and RR Contact}} | | {{NVIDIA Pref Partner logo and RR Contact}} |
|
| |
|
Line 16: |
Line 19: |
| === Coding Example === | | === Coding Example === |
| The following is a coding example using zero-copy memory [https://arrayfire.com/zero-copy-on-tegra-k1/] | | The following is a coding example using zero-copy memory [https://arrayfire.com/zero-copy-on-tegra-k1/] |
| <syntaxhighlight lang="c">
| | {{NVIDIA CUDA Memory Management-Template1}} |
| // Set flag to enable zero copy access
| |
| cudaSetDeviceFlags(cudaDeviceMapHost);
| |
|
| |
| // Host Arrays (CPU pointers)
| |
| float* h_in = NULL;
| |
| float* h_out = NULL;
| |
|
| |
| // Process h_in
| |
|
| |
| // Allocate host memory using CUDA allocation calls
| |
| cudaHostAlloc((void **)&h_in, sizeIn, cudaHostAllocMapped);
| |
| cudaHostAlloc((void **)&h_out, sizeOut, cudaHostAllocMapped);
| |
|
| |
| // Device arrays (CPU pointers)
| |
| float *d_out, *d_in;
| |
| // Get device pointer from host memory. No allocation or memcpy
| |
| cudaHostGetDevicePointer((void **)&d_in, (void *) h_in , 0);
| |
| cudaHostGetDevicePointer((void **)&d_out, (void *) h_out, 0);
| |
|
| |
| // Launch the GPU kernel
| |
| kernel<<<blocks, threads>>>(d_out, d_in);
| |
| | |
| // No need to copy d_out back
| |
| // Continue processing on host using h_out
| |
| </syntaxhighlight>
| |
|
| |
|
| == Unified Memory Programming (UM)== | | == Unified Memory Programming (UM)== |