GPUart  1.01
Prototype of GPUart, an application-based limited preemptive GPU scheduler for embedded real-time systems
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros
Macros | Functions
GPUart_MatrMul.cuh File Reference
#include "../GPUart_Common/GPUart_Common.h"
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_profiler_api.h>
Include dependency graph for GPUart_MatrMul.cuh:
This graph shows which files directly or indirectly include this file:

Macros

#define GPUART_MM_CUH
 
#define C_MM_GLOBAL_WORK_SIZE   3072
 
#define C_MM_LOCAL_WORK_SIZE_X   16
 
#define C_MM_LOCAL_WORK_SIZE_Y   16
 
#define C_MM_LOCAL_WORK_SIZE   (C_MM_LOCAL_WORK_SIZE_X * C_MM_LOCAL_WORK_SIZE_Y)
 
#define C_MM_NUMBER_OF_BLOCKS_X   12
 
#define C_MM_NUMBER_OF_BLOCKS_Y   1
 
#define C_MM_NUMBER_OF_BLOCKS   (C_MM_NUMBER_OF_BLOCKS_X * C_MM_NUMBER_OF_BLOCKS_Y)
 
#define C_MM_BLOCK_SIZE   16
 
#define C_MM_MATRIX_N   768
 
#define C_MM_MATRIX_TOTAL_SIZE   C_MM_MATRIX_N * C_MM_MATRIX_N
 
#define C_MM_SM_INIT   (0)
 
#define C_MM_SM_LOOP   (1)
 
#define C_MM_SM_FINISH   (2)
 

Functions

__global__ void MatrMul_Kernel (float32 *__restrict__ A, float32 *__restrict__ B, float32 *__restrict__ C, uint32 *__restrict__ block_Y_buffer, uint32 *__restrict__ block_X_buffer, uint32 *__restrict__ m_buffer, volatile sint32 *__restrict__ preemption_flag_g, volatile sint32 *__restrict__ preemption_sm_g, uint32 *__restrict__ sync_flags_in_u32_g, uint32 *__restrict__ sync_flags_out_u32_g, volatile uint32 *__restrict__ kernelRunningStatus_g)
 

Macro Definition Documentation

#define C_MM_BLOCK_SIZE   16
#define C_MM_GLOBAL_WORK_SIZE   3072
#define C_MM_LOCAL_WORK_SIZE   (C_MM_LOCAL_WORK_SIZE_X * C_MM_LOCAL_WORK_SIZE_Y)
#define C_MM_LOCAL_WORK_SIZE_X   16
#define C_MM_LOCAL_WORK_SIZE_Y   16
#define C_MM_MATRIX_N   768
#define C_MM_MATRIX_TOTAL_SIZE   C_MM_MATRIX_N * C_MM_MATRIX_N
#define C_MM_NUMBER_OF_BLOCKS   (C_MM_NUMBER_OF_BLOCKS_X * C_MM_NUMBER_OF_BLOCKS_Y)
#define C_MM_NUMBER_OF_BLOCKS_X   12
#define C_MM_NUMBER_OF_BLOCKS_Y   1
#define C_MM_SM_FINISH   (2)
#define C_MM_SM_INIT   (0)
#define C_MM_SM_LOOP   (1)
#define GPUART_MM_CUH

Function Documentation

__global__ void MatrMul_Kernel ( float32 *__restrict__  A,
float32 *__restrict__  B,
float32 *__restrict__  C,
uint32 *__restrict__  block_Y_buffer,
uint32 *__restrict__  block_X_buffer,
uint32 *__restrict__  m_buffer,
volatile sint32 *__restrict__  preemption_flag_g,
volatile sint32 *__restrict__  preemption_sm_g,
uint32 *__restrict__  sync_flags_in_u32_g,
uint32 *__restrict__  sync_flags_out_u32_g,
volatile uint32 *__restrict__  kernelRunningStatus_g 
)

Here is the call graph for this function: