You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							39 lines
						
					
					
						
							1.4 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							39 lines
						
					
					
						
							1.4 KiB
						
					
					
				
								#include <iostream>
							 | 
						|
								#include <chrono>
							 | 
						|
								
							 | 
						|
								__global__ void cuda_kernel_kernelSwitchTest(int const * const A, int * const B) {
							 | 
						|
									*B = *A;
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								void kernelSwitchTest(size_t N) {
							 | 
						|
									int* deviceIntA;
							 | 
						|
									int* deviceIntB;
							 | 
						|
								
							 | 
						|
									if (cudaMalloc((void**)&deviceIntA, sizeof(int)) != cudaSuccess) {
							 | 
						|
										std::cout << "Error in cudaMalloc while allocating " << sizeof(int) << " Bytes!" << std::endl;
							 | 
						|
										return;
							 | 
						|
									}
							 | 
						|
									if (cudaMalloc((void**)&deviceIntB, sizeof(int)) != cudaSuccess) {
							 | 
						|
										std::cout << "Error in cudaMalloc while allocating " << sizeof(int) << " Bytes!" << std::endl;
							 | 
						|
										return;
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									// Allocate space on the device
							 | 
						|
									auto start_time = std::chrono::high_resolution_clock::now();
							 | 
						|
									for (int i = 0; i < N; ++i) {
							 | 
						|
										cuda_kernel_kernelSwitchTest<<<1,1>>>(deviceIntA, deviceIntB);
							 | 
						|
									}
							 | 
						|
									auto end_time = std::chrono::high_resolution_clock::now();
							 | 
						|
									std::cout << "Switching the Kernel " << N << " times took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl;
							 | 
						|
									std::cout << "Resulting in " << (std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() / ((double)(N))) << "Microseconds per Kernel Switch" << std::endl;
							 | 
						|
								
							 | 
						|
									// Free memory on device
							 | 
						|
									if (cudaFree(deviceIntA) != cudaSuccess) {
							 | 
						|
										std::cout << "Error in cudaFree!" << std::endl;
							 | 
						|
										return;
							 | 
						|
									}
							 | 
						|
									if (cudaFree(deviceIntB) != cudaSuccess) {
							 | 
						|
										std::cout << "Error in cudaFree!" << std::endl;
							 | 
						|
										return;
							 | 
						|
									}
							 | 
						|
								}
							 |