38 lines
1.4 KiB

  1. #include <iostream>
  2. #include <chrono>
  3. __global__ void cuda_kernel_kernelSwitchTest(int const * const A, int * const B) {
  4. *B = *A;
  5. }
  6. void kernelSwitchTest(size_t N) {
  7. int* deviceIntA;
  8. int* deviceIntB;
  9. if (cudaMalloc((void**)&deviceIntA, sizeof(int)) != cudaSuccess) {
  10. std::cout << "Error in cudaMalloc while allocating " << sizeof(int) << " Bytes!" << std::endl;
  11. return;
  12. }
  13. if (cudaMalloc((void**)&deviceIntB, sizeof(int)) != cudaSuccess) {
  14. std::cout << "Error in cudaMalloc while allocating " << sizeof(int) << " Bytes!" << std::endl;
  15. return;
  16. }
  17. // Allocate space on the device
  18. auto start_time = std::chrono::high_resolution_clock::now();
  19. for (int i = 0; i < N; ++i) {
  20. cuda_kernel_kernelSwitchTest<<<1,1>>>(deviceIntA, deviceIntB);
  21. }
  22. auto end_time = std::chrono::high_resolution_clock::now();
  23. std::cout << "Switching the Kernel " << N << " times took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl;
  24. std::cout << "Resulting in " << (std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() / ((double)(N))) << "Microseconds per Kernel Switch" << std::endl;
  25. // Free memory on device
  26. if (cudaFree(deviceIntA) != cudaSuccess) {
  27. std::cout << "Error in cudaFree!" << std::endl;
  28. return;
  29. }
  30. if (cudaFree(deviceIntB) != cudaSuccess) {
  31. std::cout << "Error in cudaFree!" << std::endl;
  32. return;
  33. }
  34. }