Contains utility function for the host related to GPUs. More...

Classes
class	_Block

Functions
cudaDeviceProp *	getGPUDeviceInfo (unsigned int device)
	send device information to the gpu More...

template<typename T >
T *	copyArrayToDevice (const T *in, unsigned int numElems)

template<typename T >
T *	copyValueToDevice (const T *in)

template<typename T >
T *	alloc1DCudaArray (unsigned int sz1)

template<typename T >
T **	alloc2DCudaArray (unsigned int sz1, unsigned int sz2)
	allocates a 2D cuda array on the device. More...

template<typename T >
T **	alloc2DCudaArray (T **&hostPtr, unsigned int sz1, unsigned int sz2)
	allocates a 2D cuda array on the device. More...

template<typename T >
T **	alloc2DCudaArray (const T **in, unsigned int sz1, unsigned int sz2)
	allocates a 2D cuda array on the device and copy data. More...

template<typename T >
void	copyArrayToDeviceAsync (const T in, T __deviceptr, unsigned int numElems, const cudaStream_t stream)

template<typename T >
void	copy2DCudaArrayToDeviceAsync (const T in, T __devicePtr, unsigned int sz1, unsigned int sz2, const cudaStream_t stream)
	allocates a 2D cuda array on the device and copy data. More...

template<typename T >
void	copyArrayToHostAsync (T host_ptr, const T __deviceptr, unsigned int numElems, const cudaStream_t stream)

template<typename T >
void	dealloc2DCudaArray (T **&__array2D, unsigned int sz1)
	deallocates the 2D cuda array. More...

void	computeDendroBlockToGPUMap (const ot::Block blkList, unsigned int numBlocks, unsigned int &blockMap, dim3 &gridDim)

template<typename T >
void	copyArrayToHost (T host_ptr, const T __device_ptr, unsigned int numElems)

template<typename T >
void	copy2DArrayToHost (T host_ptr, const T __device_ptr, unsigned int sz1, unsigned int sz2)

Detailed Description

Contains utility function for the host related to GPUs.

Author: Milinda Fernando School of Computing, University of Utah

Function Documentation

◆ alloc1DCudaArray()

template<typename T >

T * cuda::alloc1DCudaArray ( unsigned int sz1 )

allocates 1D array

Parameters

[in] sz1

Returns: the pointer to the device allocation

◆ alloc2DCudaArray() [1/3]

template<typename T >

T ** cuda::alloc2DCudaArray	(	unsigned int	sz1,
		unsigned int	sz2
	)

allocates a 2D cuda array on the device.

Parameters

[in]	sz1	dim 1 size
[in]	sz2	dim 2 size

Returns: the double pointer to the 2D array.

◆ alloc2DCudaArray() [2/3]

template<typename T >

T ** cuda::alloc2DCudaArray	(	T **&	hostPtr,
		unsigned int	sz1,
		unsigned int	sz2
	)

allocates a 2D cuda array on the device.

Parameters

[out]	hostPtr	2D pointer accesible from the host.
[in]	sz1	dim 1 size
[in]	sz2	dim 2 size

Returns: the double pointer to the 2D array (device pointer).

◆ alloc2DCudaArray() [3/3]

template<typename T >

T ** cuda::alloc2DCudaArray	(	const T **	in,
		unsigned int	sz1,
		unsigned int	sz2
	)

allocates a 2D cuda array on the device and copy data.

Parameters

[in]	sz1	dim 1 size
[in]	sz2	dim 2 size

Returns: the double pointer to the 2D array.

◆ copy2DArrayToHost()

template<typename T >

void cuda::copy2DArrayToHost	(	T **	host_ptr,
		const T **	__device_ptr,
		unsigned int	sz1,
		unsigned int	sz2
	)

copy 2D array from device to memory

Parameters

[in]	__device_ptr	: 2D pointer to the device
[in]	sz1	: size1
[in]	sz2	: size2
[out]	host_ptr	host ptr

◆ copy2DCudaArrayToDeviceAsync()

template<typename T >

void cuda::copy2DCudaArrayToDeviceAsync	(	const T **	in,
		T **	__devicePtr,
		unsigned int	sz1,
		unsigned int	sz2,
		const cudaStream_t	stream
	)

allocates a 2D cuda array on the device and copy data.

Parameters

[in]	sz1	dim 1 size
[in]	sz2	dim 2 size

Returns: the double pointer to the 2D array.

◆ copyArrayToDevice()

template<typename T >

T * cuda::copyArrayToDevice	(	const T *	in,
		unsigned int	numElems
	)

send mesh blocks to the gpu

Parameters

[in]	in	: input array
[in]	out	device pointer where the data is copied to.

◆ copyArrayToDeviceAsync()

template<typename T >

void cuda::copyArrayToDeviceAsync	(	const T *	in,
		T *	__deviceptr,
		unsigned int	numElems,
		const cudaStream_t	stream
	)

send mesh blocks to the gpu (async)

Parameters

[in]	in	: input array
[in]	out	device pointer where the data is copied to.

◆ copyArrayToHost()

template<typename T >

void cuda::copyArrayToHost	(	T *	host_ptr,
		const T *	__device_ptr,
		unsigned int	numElems
	)

copy array from device to memory

Parameters

[in]	__device_ptr	: device pointer
[in]	numElems	: number of elements
[out]	host_ptr	host ptr

◆ copyValueToDevice()

template<typename T >

T * cuda::copyValueToDevice ( const T * in )

inline

copy value to device

Parameters

[in]	in	: input value
[in]	out	device pointer where the data is copied to.

◆ dealloc2DCudaArray()

template<typename T >

void cuda::dealloc2DCudaArray	(	T **&	__array2D,
		unsigned int	sz1
	)

deallocates the 2D cuda array.

Parameters

[in] sz1 dim 1 size

◆ getGPUDeviceInfo()

cudaDeviceProp * cuda::getGPUDeviceInfo ( unsigned int device )

send device information to the gpu

Parameters

[in] device : gpu device ID

Returns: cudaDeviceProp allocated on the device

Classes

Functions

Detailed Description

Function Documentation

◆ alloc1DCudaArray()

◆ alloc2DCudaArray() [1/3]

◆ alloc2DCudaArray() [2/3]

◆ alloc2DCudaArray() [3/3]

◆ copy2DArrayToHost()

◆ copy2DCudaArrayToDeviceAsync()

◆ copyArrayToDevice()

◆ copyArrayToDeviceAsync()

◆ copyArrayToHost()

◆ copyValueToDevice()

◆ dealloc2DCudaArray()

◆ getGPUDeviceInfo()