![]() |
GPUart
1.01
Prototype of GPUart, an application-based limited preemptive GPU scheduler for embedded real-time systems
|
#include "../GPUart_Common/GPUart_Common.h"#include <cuda.h>#include <cuda_runtime.h>#include <cuda_profiler_api.h>

Macros | |
| #define | GPUART_MM_CUH |
| #define | C_MM_GLOBAL_WORK_SIZE 3072 |
| #define | C_MM_LOCAL_WORK_SIZE_X 16 |
| #define | C_MM_LOCAL_WORK_SIZE_Y 16 |
| #define | C_MM_LOCAL_WORK_SIZE (C_MM_LOCAL_WORK_SIZE_X * C_MM_LOCAL_WORK_SIZE_Y) |
| #define | C_MM_NUMBER_OF_BLOCKS_X 12 |
| #define | C_MM_NUMBER_OF_BLOCKS_Y 1 |
| #define | C_MM_NUMBER_OF_BLOCKS (C_MM_NUMBER_OF_BLOCKS_X * C_MM_NUMBER_OF_BLOCKS_Y) |
| #define | C_MM_BLOCK_SIZE 16 |
| #define | C_MM_MATRIX_N 768 |
| #define | C_MM_MATRIX_TOTAL_SIZE C_MM_MATRIX_N * C_MM_MATRIX_N |
| #define | C_MM_SM_INIT (0) |
| #define | C_MM_SM_LOOP (1) |
| #define | C_MM_SM_FINISH (2) |
Functions | |
| __global__ void | MatrMul_Kernel (float32 *__restrict__ A, float32 *__restrict__ B, float32 *__restrict__ C, uint32 *__restrict__ block_Y_buffer, uint32 *__restrict__ block_X_buffer, uint32 *__restrict__ m_buffer, volatile sint32 *__restrict__ preemption_flag_g, volatile sint32 *__restrict__ preemption_sm_g, uint32 *__restrict__ sync_flags_in_u32_g, uint32 *__restrict__ sync_flags_out_u32_g, volatile uint32 *__restrict__ kernelRunningStatus_g) |
| #define C_MM_BLOCK_SIZE 16 |
| #define C_MM_GLOBAL_WORK_SIZE 3072 |
| #define C_MM_LOCAL_WORK_SIZE (C_MM_LOCAL_WORK_SIZE_X * C_MM_LOCAL_WORK_SIZE_Y) |
| #define C_MM_LOCAL_WORK_SIZE_X 16 |
| #define C_MM_LOCAL_WORK_SIZE_Y 16 |
| #define C_MM_MATRIX_N 768 |
| #define C_MM_MATRIX_TOTAL_SIZE C_MM_MATRIX_N * C_MM_MATRIX_N |
| #define C_MM_NUMBER_OF_BLOCKS (C_MM_NUMBER_OF_BLOCKS_X * C_MM_NUMBER_OF_BLOCKS_Y) |
| #define C_MM_NUMBER_OF_BLOCKS_X 12 |
| #define C_MM_NUMBER_OF_BLOCKS_Y 1 |
| #define C_MM_SM_FINISH (2) |
| #define C_MM_SM_INIT (0) |
| #define C_MM_SM_LOOP (1) |
| #define GPUART_MM_CUH |
| __global__ void MatrMul_Kernel | ( | float32 *__restrict__ | A, |
| float32 *__restrict__ | B, | ||
| float32 *__restrict__ | C, | ||
| uint32 *__restrict__ | block_Y_buffer, | ||
| uint32 *__restrict__ | block_X_buffer, | ||
| uint32 *__restrict__ | m_buffer, | ||
| volatile sint32 *__restrict__ | preemption_flag_g, | ||
| volatile sint32 *__restrict__ | preemption_sm_g, | ||
| uint32 *__restrict__ | sync_flags_in_u32_g, | ||
| uint32 *__restrict__ | sync_flags_out_u32_g, | ||
| volatile uint32 *__restrict__ | kernelRunningStatus_g | ||
| ) |

1.8.6