Spaces:

FaceOnLive
/

Face-Liveness-Detection-SDK

Running

App Files Files Community

Face-Liveness-Detection-SDK / openvino /vpu_custom_kernels /reorg_chw.cl

Zhu-FaceOnLive

Initial commit.

2ded60b about 1 year ago

raw

history blame

2.11 kB

	// Copyright (C) 2018-2022 Intel Corporation
	// SPDX-License-Identifier: Apache-2.0
	//

	#pragma OPENCL EXTENSION cl_khr_fp16 : enable
	#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable

	__kernel void reorg_chw(
	__global const half *restrict src,
	__global half *restrict dst,
	int W,
	int H,
	int C,
	int stride)
	{
	__local half local_src[8 * 1024];
	__local half local_dst[8 * 1024];

	event_t e1 = async_work_group_copy_2D2D(
	local_src, // dst
	src + get_group_id(1) * W * stride
	+ get_group_id(0) * W * stride * stride, // src
	W * stride, // num_elements_per_line,
	get_local_size(0), // num_lines,
	W * stride * (stride * get_num_groups(0) - 1), // src_line_stride,
	0, // dst_line_stride,
	0);
	wait_group_events(1, &e1);

	const int c = get_local_id(0);
	const int stride_x = get_local_id(1);

	const int srcIdx = stride_x + c * W * stride;
	const int dstIdx = stride_x * W * get_local_size(0) + c * W;

	int x = 0;
	for (; x <= W - 8; x += 8) {
	half8 data = (half8){
	local_src[srcIdx + (x + 0) * stride],
	local_src[srcIdx + (x + 1) * stride],
	local_src[srcIdx + (x + 2) * stride],
	local_src[srcIdx + (x + 3) * stride],
	local_src[srcIdx + (x + 4) * stride],
	local_src[srcIdx + (x + 5) * stride],
	local_src[srcIdx + (x + 6) * stride],
	local_src[srcIdx + (x + 7) * stride]};

	((__local half8 )(&local_dst[dstIdx + x])) = data;
	}

	for (; x < W; x++) {
	local_dst[dstIdx + x] = local_src[srcIdx + x * stride];
	}

	barrier(CLK_LOCAL_MEM_FENCE);

	event_t e2 = async_work_group_copy_2D2D(
	dst + get_group_id(0) * W
	+ get_group_id(1) * W * stride * get_global_size(0), // dst
	local_dst, // src
	W, // num_elements_per_line
	get_local_size(0) * stride, // num_lines
	0, // src_line_stride
	W * (get_num_groups(0) - 1), // dst_line_stride
	0);
	wait_group_events(1, &e2);
	}