Implementation of the management component of the GPUart Implemenation layer. More...

#include "GPUart_Impl.cuh"
#include "GPUart_Impl.h"
#include "GPUart_Impl_Abstr_IF.h"
#include "GPUart_Impl_Sched_IF.h"
#include "GPUart_Sobel.cuh"
#include "GPUart_MatrMul.cuh"

Include dependency graph for GPUart_Impl.cu:

Classes
struct	device_global_memory_s
	Typedef for a struct which combines global memory pointers, their related host pointers, and the size of the memory buffer. More...

struct	device_constant_memory_s
	Typedef for a struct which combines constant memory pointers and the size of the related memory buffer. More...

Macros
#define	S_USE_ZERO_COPY_FOR_GLOBAL_APPLICATION_MEMORY
	Use zero copy memory (requires integrated GPU) More...

#define	C_PERSISTENT_KERNEL_EVENT_QUEUE_LENGTH (10)
	The length of the Event Queue, shared between GPU and CPU, used for kernel launch events. More...

#define	C_PERSISTENT_KERNEL_TERMINATE (-1)
	Event ID to indicate a termination request for the persistent kernel. More...

Typedefs
typedef cudaStream_t	command_queue_s
	Typedef for command queues (streams) to abstract GPGPU-API. More...

Functions
__global__ void	GPUart_Persistent_Kernel (uint32 __restrict__ perKer_isRunning_u32_g, uint32 __restrict__ perKer_eventQueueCntDevice_u32_g, volatile uint32 __restrict__ perKer_eventQueueCntHost_u32_g, volatile sint32 __restrict__ perKer_eventQueue_s32_g, volatile uint32 __restrict__ perKer_kernelTasksRunningStates_u32_g, sint32 __restrict__ sob1_matrix_in_s32_g, sint32 __restrict__ sob1_matrix_out_s32_g, sint32 __restrict__ sob2_matrix_in_s32_g, sint32 __restrict__ sob2_matrix_out_s32_g, float32 __restrict__ mm_matrix_A_f32_g, float32 __restrict__ mm_matrix_B_f32_g, float32 __restrict__ mm_matrix_C_f32_g, uint32 __restrict__ sync_SOB1_flags_in_u32_g, uint32 __restrict__ sync_SOB1_flags_out_u32_g, uint32 __restrict__ sync_SOB2_flags_in_u32_g, uint32 __restrict__ sync_SOB2_flags_out_u32_g, uint32 __restrict__ sync_MM_flags_in_u32_g, uint32 __restrict__ sync_MM_flags_out_u32_g, sint32 __restrict__ preempt_SOB1_flag_g, sint32 __restrict__ preempt_SOB1_flag_internal_g, sint32 __restrict__ preempt_SOB1_sm_g, sint32 __restrict__ preempt_SOB2_flag_g, sint32 __restrict__ preempt_SOB2_flag_internal_g, sint32 __restrict__ preempt_SOB2_sm_g, sint32 __restrict__ preempt_MM_flag_g, sint32 __restrict__ preempt_MM_sm_g, uint32 __restrict__ sob1_buffer_loop_counter_u32_g, uint32 __restrict__ sob2_buffer_loop_counter_u32_g, uint32 __restrict__ mm_buffer_blockY_g, uint32 __restrict__ mm_buffer_blockX_g, uint32 *__restrict__ mm_buffer_M_g)
	The persistent kernel (GPU Daemon) which is used to reduce kernel launch latencies. More...

GPUart_Retval	gpuI_memcpyHost2Device (void *variable_p, device_global_memory_id_e id_p)
	Copy data from host memory to device memory. More...

GPUart_Retval	gpuI_memcpyDevice2Host (void *variable_p, device_global_memory_id_e id_p)
	Copy data from device memory to host memory. More...

GPUart_Retval	gpuI_memcpyConstantMemory (void *variable_p, device_constant_memory_id_e id_p)
	Copy data from host memory to constant device memory. More...

GPUart_Retval	gpuI_runJob (kernel_task_id_e task_id_e)
	Request the launch of a GPGPU kernel. More...

GPUart_Retval	gpuI_preemptJob (kernel_task_id_e task_id_p)

uint32	gpuI_queryKernelIsRunning (kernel_task_id_e task_id_e)

uint32	gpuI_queryKernelTerminatedSuccessful (kernel_task_id_e task_id_e)

uint32	gpuI_queryKernelPreempted (kernel_task_id_e task_id_e)

uint32	gpuI_getJobCosts (kernel_task_id_e task_id_e)

GPUart_Retval	gpuI_SetKernelStatusReady (kernel_task_id_e task_id_e)

GPUart_Retval	gpuI_get_NrOfMultiprocessors (uint32 *nrOfMultprocessors, uint32 resourceFactor)

GPUart_Retval	gpuI_init (void)

GPUart_Retval	gpuI_start (void)

GPUart_Retval	gpuI_stop (void)

GPUart_Retval	gpuI_destroy (void)

Variables
static command_queue_s	memory_command_queue_s
	The command queue (stream) for memory operations. More...

static command_queue_s	persistent_kernel_command_queue_s
	The command queue (stream) for the persistent kernel. More...

volatile uint32 *	perKer_isRunning_u32_host
	A status flag, which represents the running status of the persistent kernel (host pointer). More...

uint32 *	perKer_isRunning_u32_g
	A status flag, which represents the running status of the persistent kernel (device pointer). More...

volatile uint32 *	perKer_eventQueueCntHost_u32_host
	The index of the tail of the event queue for kernel launches written by the host (host pointer). More...

uint32 *	perKer_eventQueueCntHost_u32_g
	The index of the tail of the event queue for kernel launches written by the host (device pointer). More...

volatile uint32 *	perKer_eventQueueCntDevice_u32_host
	The index of the head of the event queue for kernel launches written by the device (host pointer). More...

uint32 *	perKer_eventQueueCntDevice_u32_g
	The index of the head of the event queue for kernel launches written by the device (device pointer). More...

volatile sint32 *	perKer_eventQueue_s32_host
	The event queue for kernel launch requests, written by the CPU and red by the GPU (host pointer). More...

sint32 *	perKer_eventQueue_s32_g
	The event queue for kernel launch requests, written by the CPU and red by the GPU (device pointer). More...

volatile uint32 *	perKer_kernelTasksRunningStates_u32_host
	A status flag, which represents the running status of each kernel (host pointer). More...

uint32 *	perKer_kernelTasksRunningStates_u32_g
	A status flag, which represents the running status of each kernel (device pointer). More...

uint32	max_costs_per_kernel = 0
	The allowed job cost per kernel. More...

sint32 *	sob1_matrix_in_s32_g

sint32 *	sob1_matrix_in_s32_host

sint32 *	sob1_matrix_out_s32_g

sint32 *	sob1_matrix_out_s32_host

uint32 *	sync_SOB1_flags_in_u32_g

uint32 *	sync_SOB1_flags_out_u32_g

sint32 *	preempt_SOB1_flag_g

volatile sint32 *	preempt_SOB1_flag_host

sint32 *	preempt_SOB1_flag_internal_g

sint32 *	preempt_SOB1_sm_g

volatile sint32 *	preempt_SOB1_sm_host

uint32 *	sob1_buffer_loop_counter_u32_g

sint32 *	sob2_matrix_in_s32_g

sint32 *	sob2_matrix_in_s32_host

sint32 *	sob2_matrix_out_s32_g

sint32 *	sob2_matrix_out_s32_host

uint32 *	sync_SOB2_flags_in_u32_g

uint32 *	sync_SOB2_flags_out_u32_g

sint32 *	preempt_SOB2_flag_g

volatile sint32 *	preempt_SOB2_flag_host

sint32 *	preempt_SOB2_flag_internal_g

sint32 *	preempt_SOB2_sm_g

volatile sint32 *	preempt_SOB2_sm_host

uint32 *	sob2_buffer_loop_counter_u32_g

float32 *	mm_matrix_A_f32_g

float32 *	mm_matrix_A_f32_host

float32 *	mm_matrix_B_f32_g

float32 *	mm_matrix_B_f32_host

float32 *	mm_matrix_C_f32_g

float32 *	mm_matrix_C_f32_host

uint32 *	sync_MM_flags_in_u32_g

uint32 *	sync_MM_flags_out_u32_g

sint32 *	preempt_MM_flag_g

volatile sint32 *	preempt_MM_flag_host

sint32 *	preempt_MM_sm_g

volatile sint32 *	preempt_MM_sm_host

uint32 *	mm_buffer_blockY_g

uint32 *	mm_buffer_blockX_g

uint32 *	mm_buffer_M_g

static device_constant_memory_s	constant_memory_list_a [E_CM_TOTAL_NR_OF_CONST_MEM_VARIABLES]
	The constant memory table. More...

static device_global_memory_s	global_memory_list_a [E_GM_TOTAL_NR_OF_GLOB_MEM_VARIABLES]
	The global memory table. More...

static volatile sint32 **	device_preemption_flags_a [E_KTID_NUMBER_OF_KERNEL_TASKS]
	The preemption flag table. More...

static const sint32	preemption_enabled_a [E_KTID_NUMBER_OF_KERNEL_TASKS]
	The preemption enabled table. More...

static volatile sint32 **	device_kernel_task_SM_a [E_KTID_NUMBER_OF_KERNEL_TASKS]
	The kernel state machine table. More...

static uint32	nb_of_StateMachines_in_kernel_a [E_KTID_NUMBER_OF_KERNEL_TASKS]
	The number of state machines table. More...

static uint8	kernel_job_costs [E_KTID_NUMBER_OF_KERNEL_TASKS]
	The job cost table. More...

static uint8	gpuI_deviceID_u8 = 0
	The device ID of the used GPU. More...

Detailed Description

Implementation of the management component of the GPUart Implemenation layer.

This file concentrates all GPGPU related memory declarations and allocations, memory transfers operations, kernel launches, kernel initialisations, and GPU related implementation details.

Author: Christoph Hartmann

Date: Created on: 7 Apr 2017

Macro Definition Documentation

#define C_PERSISTENT_KERNEL_EVENT_QUEUE_LENGTH (10)

The length of the Event Queue, shared between GPU and CPU, used for kernel launch events.

See Also: perKer_eventQueueCntHost_u32_host; perKer_eventQueueCntDevice_u32_host; perKer_eventQueue_s32_host

#define C_PERSISTENT_KERNEL_TERMINATE (-1)

Event ID to indicate a termination request for the persistent kernel.

See Also: perKer_eventQueueCntHost_u32_host; perKer_eventQueueCntHost_u32_g

#define S_USE_ZERO_COPY_FOR_GLOBAL_APPLICATION_MEMORY

Use zero copy memory (requires integrated GPU)

This MUST be defined so far, since memory transfers over PCIe are currently not implemented completely.

See Also: http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#zero-copy-memory; https://software.intel.com/en-us/articles/getting-the-most-from-opencl-12-how-to-increase-performance-by-minimizing-buffer-copies-on-intel-processor-graphics

Typedef Documentation

typedef cudaStream_t command_queue_s

Typedef for command queues (streams) to abstract GPGPU-API.

Command queues are required to improve the concurrency of memory and kernel operatation on the GPU.

See Also: https://developer.download.nvidia.com/CUDA/training/StreamsAndConcurrencyWebinar.pdf; https://www.khronos.org/registry/OpenCL/sdk/1.0/docs/man/xhtml/clCreateCommandQueue.html

Function Documentation

__global__ void GPUart_Persistent_Kernel	(	uint32 *__restrict__	perKer_isRunning_u32_g,
		uint32 *__restrict__	perKer_eventQueueCntDevice_u32_g,
		volatile uint32 *__restrict__	perKer_eventQueueCntHost_u32_g,
		volatile sint32 *__restrict__	perKer_eventQueue_s32_g,
		volatile uint32 *__restrict__	perKer_kernelTasksRunningStates_u32_g,
		sint32 *__restrict__	sob1_matrix_in_s32_g,
		sint32 *__restrict__	sob1_matrix_out_s32_g,
		sint32 *__restrict__	sob2_matrix_in_s32_g,
		sint32 *__restrict__	sob2_matrix_out_s32_g,
		float32 *__restrict__	mm_matrix_A_f32_g,
		float32 *__restrict__	mm_matrix_B_f32_g,
		float32 *__restrict__	mm_matrix_C_f32_g,
		uint32 *__restrict__	sync_SOB1_flags_in_u32_g,
		uint32 *__restrict__	sync_SOB1_flags_out_u32_g,
		uint32 *__restrict__	sync_SOB2_flags_in_u32_g,
		uint32 *__restrict__	sync_SOB2_flags_out_u32_g,
		uint32 *__restrict__	sync_MM_flags_in_u32_g,
		uint32 *__restrict__	sync_MM_flags_out_u32_g,
		sint32 *__restrict__	preempt_SOB1_flag_g,
		sint32 *__restrict__	preempt_SOB1_flag_internal_g,
		sint32 *__restrict__	preempt_SOB1_sm_g,
		sint32 *__restrict__	preempt_SOB2_flag_g,
		sint32 *__restrict__	preempt_SOB2_flag_internal_g,
		sint32 *__restrict__	preempt_SOB2_sm_g,
		sint32 *__restrict__	preempt_MM_flag_g,
		sint32 *__restrict__	preempt_MM_sm_g,
		uint32 *__restrict__	sob1_buffer_loop_counter_u32_g,
		uint32 *__restrict__	sob2_buffer_loop_counter_u32_g,
		uint32 *__restrict__	mm_buffer_blockY_g,
		uint32 *__restrict__	mm_buffer_blockX_g,
		uint32 *__restrict__	mm_buffer_M_g
	)

The persistent kernel (GPU Daemon) which is used to reduce kernel launch latencies.

The kernel arguments must include all global memory buffers of all kernels in this system, since this kernel is used to launch GPGPU kernel on demand. The persistent kernel reduces kernel launch latencies by bypassing the GPGPU driver stack when launching kernels.

See Also: Mrozek et al. GPU Daemon: Road to zero cost submission, in Proceedings of the 4th International Workshop on OpenCL, Vienna, Austria, 2016 -> https://dl.acm.org/citation.cfm?id=2909450

GPUart_Retval gpuI_destroy ( void )

Here is the caller graph for this function:

GPUart_Retval gpuI_get_NrOfMultiprocessors	(	uint32 *	nrOfMultprocessors,
		uint32	resourceFactor
	)

Here is the caller graph for this function:

uint32 gpuI_getJobCosts ( kernel_task_id_e task_id_e )

Here is the caller graph for this function:

GPUart_Retval gpuI_init ( void )

Here is the caller graph for this function:

GPUart_Retval gpuI_memcpyConstantMemory	(	void *	variable_p,
		device_constant_memory_id_e	id_p
	)

Copy data from host memory to constant device memory.

The copy is only possible if persistent GPUart kernel GPUart_Persistent_Kernel is not running, since a constant memory data is immutable during kernel execution and its value is inherited from parent to child kernel.

Parameters

[in]	void	* variable_p -> The host variable to be copied
[in]	device_constant_memory_id_e	id_p -> The ID of the constant memory buffer

Returns: GPUART_SUCCESS if memory copy operation has been successfully.; GPUART_ERROR_INVALID_ARGUMENT if id_p is an invalid ID.

GPUart_Retval gpuI_memcpyDevice2Host	(	void *	variable_p,
		device_global_memory_id_e	id_p
	)

Copy data from device memory to host memory.

Device memory may be shared physical memory or discrete device memory. The device driver API call may depend on the type of device memory (global or texture memory).

Parameters

[out]	void	* variable_p -> The host variable to be written
[in]	device_global_memory_id_e	id_p -> The ID of the global memory variable

Returns: GPUART_SUCCESS if memory copy operation has been successfully.; GPUART_ERROR_INVALID_ARGUMENT if id_p is an invalid ID.

Here is the caller graph for this function:

GPUart_Retval gpuI_memcpyHost2Device	(	void *	variable_p,
		device_global_memory_id_e	id_p
	)

Copy data from host memory to device memory.

Device memory may be shared physical memory or discrete device memory. The device driver API call may depend on the type of device memory (global or texture memory).

Parameters

[in]	void	* variable_p -> The host variable to be copied
[in]	device_global_memory_id_e	id_p -> The ID of the global memory variable

Returns: GPUART_SUCCESS if memory copy operation has been successfully.; GPUART_ERROR_INVALID_ARGUMENT if id_p is an invalid ID.

Here is the caller graph for this function:

GPUart_Retval gpuI_preemptJob ( kernel_task_id_e task_id_p )

Here is the caller graph for this function:

uint32 gpuI_queryKernelIsRunning ( kernel_task_id_e task_id_e )

uint32 gpuI_queryKernelPreempted ( kernel_task_id_e task_id_e )

Here is the caller graph for this function:

uint32 gpuI_queryKernelTerminatedSuccessful ( kernel_task_id_e task_id_e )

Here is the caller graph for this function:

GPUart_Retval gpuI_runJob ( kernel_task_id_e task_id_e )

Request the launch of a GPGPU kernel.

Parameters

kernel_task_id_e task_id_e -> The ID of the kernel to be launched.

Returns: GPUART_SUCCESS if kernel launch has been successfully.; GPUART_ERROR_NOT_READY if launch request is already active.

Here is the caller graph for this function:

GPUart_Retval gpuI_SetKernelStatusReady ( kernel_task_id_e task_id_e )

Here is the caller graph for this function:

GPUart_Retval gpuI_start ( void )

Here is the caller graph for this function:

GPUart_Retval gpuI_stop ( void )

Here is the caller graph for this function:

Variable Documentation

device_constant_memory_s constant_memory_list_a[E_CM_TOTAL_NR_OF_CONST_MEM_VARIABLES]

static

Initial value:

=
{
}

The constant memory table.

All constant memory buffers which must be written during runtime must be defined here. The i'th element represents the i'th constant memory buffer, define by device_constant_memory_id_e in GPUart_Config.h. Each element must defined in the following style: { (void **)& CONSTANT_BUFFER_NAME, SIZE_IN_BYTES }.

See Also: device_constant_memory_id_e

volatile sint32** device_kernel_task_SM_a[E_KTID_NUMBER_OF_KERNEL_TASKS]

static

Initial value:

=
{
    &preempt_SOB1_sm_host,      
    &preempt_SOB2_sm_host,      
    &preempt_MM_sm_host         
}

The kernel state machine table.

The i'th element represents the i'th kernel, according to the enum kernel_task_id_e in GPUart_Config.h. Each element must defined in the following style: &NAME_OF_STATE_MACHINE_POINTER. Use NULL if the related kernel is non-preemptive.

See Also: kernel_task_id_e

volatile sint32** device_preemption_flags_a[E_KTID_NUMBER_OF_KERNEL_TASKS]

static

Initial value:

=
{
    (volatile sint32**) &preempt_SOB1_flag_host,    
    (volatile sint32**) &preempt_SOB2_flag_host,    
    (volatile sint32**) &preempt_MM_flag_host       
}

The preemption flag table.

All preemption flags must be included by this table. The i'th element represents the i'th kernel, according to the enum kernel_task_id_e in GPUart_Config.h. Each element must defined in the following style: (volatile sint32**)& NAME_OF_PREEMPTION_FLAG_POINTER. If a kernel does not implement a preemption flag, because it is non-preemptive, insert a NULL.

See Also: kernel_task_id_e

device_global_memory_s global_memory_list_a[E_GM_TOTAL_NR_OF_GLOB_MEM_VARIABLES]

static

Initial value:

=
{
    
    { (void **)&sob1_matrix_in_s32_g,       (void **)&sob1_matrix_in_s32_host,      C_SOB1_MATRIX_SIZE * sizeof(sint32)     }, 
    { (void **)&sob1_matrix_out_s32_g,      (void **)&sob1_matrix_out_s32_host,     C_SOB1_MATRIX_SIZE * sizeof(sint32)     }, 
    
    { (void **)&sob2_matrix_in_s32_g,       (void **)&sob2_matrix_in_s32_host,      C_SOB2_MATRIX_SIZE * sizeof(sint32)     }, 
    { (void **)&sob2_matrix_out_s32_g,      (void **)&sob2_matrix_out_s32_host,     C_SOB2_MATRIX_SIZE * sizeof(sint32)     }, 
    
    { (void **)&mm_matrix_A_f32_g,          (void **)&mm_matrix_A_f32_host,         C_MM_MATRIX_TOTAL_SIZE * sizeof(sint32) }, 
    { (void **)&mm_matrix_B_f32_g,          (void **)&mm_matrix_B_f32_host,         C_MM_MATRIX_TOTAL_SIZE * sizeof(sint32) }, 
    { (void **)&mm_matrix_C_f32_g,          (void **)&mm_matrix_C_f32_host,         C_MM_MATRIX_TOTAL_SIZE * sizeof(sint32) }  
}

The global memory table.

All global memory buffers which must be written or red during runtime must be defined here. The i'th element represents the i'th global memory buffer, define by device_global_memory_id_e in GPUart_Config.h. Each element must defined in the following style: { (void **)& GLOBAL_MEMORY_BUFFER_POINTER_DEVICE, GLOBAL_MEMORY_BUFFER_POINTER_HOST, SIZE_IN_BYTES }.

See Also: device_global_memory_id_e

uint8 gpuI_deviceID_u8 = 0

static

The device ID of the used GPU.

See Also: http://docs.nvidia.com/cuda/cuda-c-programming-guide; https://www.khronos.org/registry/OpenCL/sdk/1.0/docs/man/xhtml/clGetDeviceIDs.html

uint8 kernel_job_costs[E_KTID_NUMBER_OF_KERNEL_TASKS]

static

Initial value:

=
{
    C_SOB1_NUMBER_OF_BLOCKS,    
    C_SOB2_NUMBER_OF_BLOCKS,    
    C_MM_NUMBER_OF_BLOCKS       
}

The job cost table.

The i'th element represents the i'th kernel, according to the enum kernel_task_id_e in GPUart_Config.h. Each element represents the job costs of the related kernel. If a thread block of a kernel requires more then 1/µ of the available registers, shared memory, thread residency slots, or thread block residency slots of an Streaming Multiprocessor, then set corresponding value to m * µ, whereby µ is the resource factor and m is the GPU's number of Streaming Multiprocessors. If a thread block of a kernel requires less then 1/µ of each resource type, then set the corresponding value to the kernels number of thread blocks.

See Also: kernel_task_id_e; C_GPUS_RESOURCE_FACTOR; gpuS_nrOfMultiprocessor_u32; max_costs_per_kernel

uint32 max_costs_per_kernel = 0

The allowed job cost per kernel.

This value is equal to m * µ, whereby m is the number of Streaming Multiprocessors of the GPU gpuS_nrOfMultiprocessor_u32 and µ is the resource factor C_GPUS_RESOURCE_FACTOR.

See Also: kernel_task_id_e; C_GPUS_RESOURCE_FACTOR; gpuS_nrOfMultiprocessor_u32; kernel_job_costs

command_queue_s memory_command_queue_s

static

The command queue (stream) for memory operations.

uint32* mm_buffer_blockX_g

uint32* mm_buffer_blockY_g

uint32* mm_buffer_M_g

float32* mm_matrix_A_f32_g

float32 * mm_matrix_A_f32_host

float32* mm_matrix_B_f32_g

float32 * mm_matrix_B_f32_host

float32* mm_matrix_C_f32_g

float32 * mm_matrix_C_f32_host

uint32 nb_of_StateMachines_in_kernel_a[E_KTID_NUMBER_OF_KERNEL_TASKS]

static

Initial value:

=
{
    1u,                         
    1u,                         
    C_MM_NUMBER_OF_BLOCKS       
}

The number of state machines table.

The i'th element represents the i'th kernel, according to the enum kernel_task_id_e in GPUart_Config.h. Each element must defined in the following style: NUMBER_OF_SM_IN_KERNEL. If a kernel preempts grid-synchronous then use the value 1u. If a kernel preempts thread-block synchronous then use the number of thread blocks of this kernel. If a kernel is non-preemptive then use 0u.

See Also: kernel_task_id_e

perKer_eventQueue_s32_g

The event queue for kernel launch requests, written by the CPU and red by the GPU (device pointer).

To request a kernel launch, write the kernel's ID (kernel_task_id_e) into the tail of the queue. Write C_PERSISTENT_KERNEL_TERMINATE to terminate the persistent kernel GPUart_Persistent_Kernel.

See Also: perKer_eventQueue_s32_host

perKer_eventQueue_s32_host

The event queue for kernel launch requests, written by the CPU and red by the GPU (host pointer).

To request a kernel launch, write the kernel's ID (kernel_task_id_e) into the tail of the queue. Write C_PERSISTENT_KERNEL_TERMINATE to terminate the persistent kernel GPUart_Persistent_Kernel.

See Also: perKer_eventQueue_s32_g

perKer_eventQueueCntDevice_u32_g

The index of the head of the event queue for kernel launches written by the device (device pointer).

See Also: perKer_eventQueueCntDevice_u32_host

perKer_eventQueueCntDevice_u32_host

The index of the head of the event queue for kernel launches written by the device (host pointer).

See Also: perKer_eventQueueCntDevice_u32_g

perKer_eventQueueCntHost_u32_g

The index of the tail of the event queue for kernel launches written by the host (device pointer).

See Also: perKer_eventQueueCntHost_u32_host

perKer_eventQueueCntHost_u32_host

The index of the tail of the event queue for kernel launches written by the host (host pointer).

See Also: perKer_eventQueueCntDevice_u32_g

perKer_isRunning_u32_g

A status flag, which represents the running status of the persistent kernel (device pointer).

See Also: perKer_isRunning_u32_host

perKer_isRunning_u32_host

A status flag, which represents the running status of the persistent kernel (host pointer).

See Also: perKer_isRunning_u32_g

perKer_kernelTasksRunningStates_u32_g

A status flag, which represents the running status of each kernel (device pointer).

See Also: perKer_kernelTasksRunningStates_u32_host

perKer_kernelTasksRunningStates_u32_host

A status flag, which represents the running status of each kernel (host pointer).

See Also: perKer_kernelTasksRunningStates_u32_g

command_queue_s persistent_kernel_command_queue_s

static

The command queue (stream) for the persistent kernel.

sint32* preempt_MM_flag_g

volatile sint32* preempt_MM_flag_host

sint32* preempt_MM_sm_g

volatile sint32* preempt_MM_sm_host

sint32* preempt_SOB1_flag_g

volatile sint32* preempt_SOB1_flag_host

sint32* preempt_SOB1_flag_internal_g

sint32* preempt_SOB1_sm_g

volatile sint32* preempt_SOB1_sm_host

sint32* preempt_SOB2_flag_g

volatile sint32* preempt_SOB2_flag_host

sint32* preempt_SOB2_flag_internal_g

sint32* preempt_SOB2_sm_g

volatile sint32* preempt_SOB2_sm_host

const sint32 preemption_enabled_a[E_KTID_NUMBER_OF_KERNEL_TASKS]

static

Initial value:

=
{
    C_TRUE,                     
    C_TRUE,                     
    C_TRUE                      
}

The preemption enabled table.

The i'th element represents the i'th kernel, according to the enum kernel_task_id_e in GPUart_Config.h. Each element must defined in the following style: C_TRUE if the related kernel is preemptive; C_FALSE if the related kernel is non-preemptive.

See Also: kernel_task_id_e

uint32* sob1_buffer_loop_counter_u32_g

sint32* sob1_matrix_in_s32_g

sint32 * sob1_matrix_in_s32_host

sint32* sob1_matrix_out_s32_g

sint32 * sob1_matrix_out_s32_host

uint32* sob2_buffer_loop_counter_u32_g

sint32* sob2_matrix_in_s32_g

sint32 * sob2_matrix_in_s32_host

sint32* sob2_matrix_out_s32_g

sint32 * sob2_matrix_out_s32_host

uint32* sync_MM_flags_in_u32_g

uint32* sync_MM_flags_out_u32_g

uint32* sync_SOB1_flags_in_u32_g

uint32* sync_SOB1_flags_out_u32_g

uint32* sync_SOB2_flags_in_u32_g

uint32* sync_SOB2_flags_out_u32_g

Classes

Macros

Typedefs

Functions

Variables

Detailed Description

Macro Definition Documentation

Typedef Documentation

Function Documentation

Variable Documentation