CUDA 核函数示例：使用 mykernel 将数据从源数组复制到目标数组

以下是一个使用 CUDA 核函数 mykernel 将数据从源数组复制到目标数组的示例代码：

__global__ void mykernel(int* dst, int* src, int size) {
    int tid = threadIdx.x + blockIdx.x * blockDim.x;
    if (tid < size) {
        dst[tid] = src[tid];
    }
}

int main() {
    const int SIZE = 10;
    int src[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
    int dst[SIZE];
    int* dev_src;
    int* dev_dst;
    cudaMalloc(&dev_src, SIZE * sizeof(int));
    cudaMalloc(&dev_dst, SIZE * sizeof(int));
    cudaMemcpy(dev_src, src, SIZE * sizeof(int), cudaMemcpyHostToDevice);
    mykernel<<<1, SIZE>>>(dev_dst, dev_src, SIZE);
    cudaMemcpy(dst, dev_dst, SIZE * sizeof(int), cudaMemcpyDeviceToHost);
    cudaFree(dev_src);
    cudaFree(dev_dst);
    for (int i = 0; i < SIZE; i++) {
        std::cout << dst[i] << ' '; // 输出 1 2 3 4 5 6 7 8 9 10
    }
    return 0;
}

在这个示例代码中，mykernel 的功能是将 dev_src 中的数据复制到 dev_dst 中。在主函数中，先将 src 数组复制到 dev_src 中，然后调用 mykernel 函数进行复制，最后将 dev_dst 中的数据复制到 dst 数组中。最终输出 dst 数组中的数据，即为 src 数组中的数据。

代码解释：

__global__ void mykernel(int* dst, int* src, int size)：定义了一个名为 mykernel 的全局核函数，它接受三个参数：目标数组指针 dst、源数组指针 src 和数组大小 size。
int tid = threadIdx.x + blockIdx.x * blockDim.x;：获取当前线程的全局线程 ID。
if (tid < size) { dst[tid] = src[tid]; }：如果线程 ID 小于数组大小，则将源数组中的数据复制到目标数组中。
int main()：主函数。
const int SIZE = 10;：定义数组大小。
int src[SIZE] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};：定义源数组。
int dst[SIZE];：定义目标数组。
int* dev_src; int* dev_dst;：定义设备内存指针。
cudaMalloc(&dev_src, SIZE * sizeof(int)); 和 cudaMalloc(&dev_dst, SIZE * sizeof(int));：为设备内存分配空间。
cudaMemcpy(dev_src, src, SIZE * sizeof(int), cudaMemcpyHostToDevice);：将源数组 src 复制到设备内存 dev_src 中。
mykernel<<<1, SIZE>>>(dev_dst, dev_src, SIZE);：调用 mykernel 核函数，其中 <<<1, SIZE>>> 表示使用一个 block 和 SIZE 个 threads。
cudaMemcpy(dst, dev_dst, SIZE * sizeof(int), cudaMemcpyDeviceToHost);：将设备内存 dev_dst 中的数据复制到目标数组 dst 中。
cudaFree(dev_src); 和 cudaFree(dev_dst);：释放设备内存。
for (int i = 0; i < SIZE; i++) { std::cout << dst[i] << ' '; }：输出目标数组 dst 中的数据。

这个示例代码演示了如何使用 CUDA 核函数来进行数据复制操作，并提供了基本的 CUDA 内存管理和核函数调用的示例。