Browse Source
Removed the extra shell around the Cuda Plugin. Changed include pathes.
Removed the extra shell around the Cuda Plugin. Changed include pathes.
Former-commit-id: c7fec9220d
tempestpy_adaptions
PBerger
11 years ago
9 changed files with 31 additions and 213 deletions
-
3CMakeLists.txt
-
4resources/cudaForStorm/CMakeLists.txt
-
15resources/cudaForStorm/src/cudaForStorm.h
-
124resources/cudaForStorm/src/cudaTests.h
-
69resources/cudaForStorm/src/main.cpp
-
6resources/cudaForStorm/srcCuda/basicValueIteration.cu
-
6resources/cudaForStorm/srcCuda/basicValueIteration.h
-
14resources/cudaForStorm/srcCuda/cudaForStorm.h
-
3src/storm.cpp
@ -1,15 +0,0 @@ |
|||||
#ifndef STORM_CUDAFORSTORM_CUDAFORSTORM_H_ |
|
||||
#define STORM_CUDAFORSTORM_CUDAFORSTORM_H_ |
|
||||
|
|
||||
// Library exports |
|
||||
#include "cudaForStorm_Export.h" |
|
||||
|
|
||||
/* |
|
||||
* List of exported functions in this library |
|
||||
*/ |
|
||||
|
|
||||
cudaForStorm_EXPORT int cudaForStormTest(int value); |
|
||||
|
|
||||
|
|
||||
|
|
||||
#endif // STORM_CUDAFORSTORM_CUDAFORSTORM_H_ |
|
@ -1,124 +0,0 @@ |
|||||
#include <cuda.h> |
|
||||
#include "srcCuda/allCudaKernels.h" |
|
||||
|
|
||||
#include <iostream> |
|
||||
#include <chrono> |
|
||||
#include <random> |
|
||||
|
|
||||
void cudaShowDevices() { |
|
||||
// Todo |
|
||||
} |
|
||||
|
|
||||
void cudaSimpleAddTest(int a, int b) { |
|
||||
std::cout << "Running cudaSimpleAddTest:" << std::endl; |
|
||||
std::cout << "a = " << a << ", b = " << b << "" << std::endl; |
|
||||
|
|
||||
int c = cuda_basicAdd(a, b); |
|
||||
|
|
||||
std::cout << "Result: " << c << "" << std::endl; |
|
||||
} |
|
||||
|
|
||||
void cudaArrayFmaTest(int N) { |
|
||||
std::cout << "Running cudaArrayFmaTest:" << std::endl; |
|
||||
std::cout << "N is " << N << ", resulting in " << (5 * sizeof(int) * N) << " Bytes of Data." << std::endl; |
|
||||
|
|
||||
std::cout << "Generating random input arrays." << std::endl; |
|
||||
|
|
||||
std::default_random_engine generator; |
|
||||
std::uniform_int_distribution<int> distribution(0, INT32_MAX); |
|
||||
int dice_roll = distribution(generator); |
|
||||
|
|
||||
auto start_time = std::chrono::high_resolution_clock::now(); |
|
||||
|
|
||||
int* arrayA = new int[N]; |
|
||||
int* arrayB = new int[N]; |
|
||||
int* arrayC = new int[N]; |
|
||||
int* arrayD = new int[N]; |
|
||||
int* arrayD_CPU = new int[N]; |
|
||||
|
|
||||
for (int i = 0; i < N; ++i) { |
|
||||
//arrayA[i] = distribution(generator); |
|
||||
//arrayB[i] = distribution(generator); |
|
||||
//arrayC[i] = distribution(generator); |
|
||||
arrayA[i] = i * 1000 + 137; |
|
||||
arrayB[i] = i * 7000 + 1537; |
|
||||
arrayC[i] = i * 15000 + 97; |
|
||||
arrayD[i] = 0; |
|
||||
arrayD_CPU[i] = 0; |
|
||||
} |
|
||||
|
|
||||
auto end_time = std::chrono::high_resolution_clock::now(); |
|
||||
std::cout << "Array generation took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl; |
|
||||
|
|
||||
std::cout << "Running FMA test on CPU." << std::endl; |
|
||||
|
|
||||
start_time = std::chrono::high_resolution_clock::now(); |
|
||||
cuda_arrayFmaHelper(arrayA, arrayB, arrayC, arrayD_CPU, N); |
|
||||
end_time = std::chrono::high_resolution_clock::now(); |
|
||||
std::cout << "FMA on CPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl; |
|
||||
|
|
||||
start_time = std::chrono::high_resolution_clock::now(); |
|
||||
cuda_arrayFma(arrayA, arrayB, arrayC, arrayD, N); |
|
||||
end_time = std::chrono::high_resolution_clock::now(); |
|
||||
std::cout << "FMA on GPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl; |
|
||||
|
|
||||
int errors = 0; |
|
||||
for (int i = 0; i < N; ++i) { |
|
||||
if (arrayD[i] != arrayD_CPU[i]) { |
|
||||
std::cout << "Error in Entry " << i << ": GPU has " << arrayD[i] << " but CPU has " << arrayD_CPU[i] << "!" << std::endl; |
|
||||
++errors; |
|
||||
} |
|
||||
} |
|
||||
std::cout << "Checked Arrays for Errors: " << errors << " Errors occured." << std::endl; |
|
||||
} |
|
||||
|
|
||||
void cudaArrayFmaOptimizedTest(int N, int M) { |
|
||||
std::cout << "Running cudaArrayFmaTest:" << std::endl; |
|
||||
std::cout << "N is " << N << ", resulting in " << (4 * sizeof(int) * N) << " Bytes of Data." << std::endl; |
|
||||
|
|
||||
size_t freeCudaMemory = getFreeCudaMemory(); |
|
||||
size_t totalCudaMemory = getTotalCudaMemory(); |
|
||||
int freeProzent = static_cast<int>(((double)freeCudaMemory)/((double)totalCudaMemory) * 100); |
|
||||
|
|
||||
std::cout << "CUDA Device has " << freeCudaMemory << " Bytes of " << totalCudaMemory << " Bytes free (" << (freeProzent) << "%)." << std::endl; |
|
||||
|
|
||||
std::cout << "Generating random input arrays." << std::endl; |
|
||||
|
|
||||
std::default_random_engine generator; |
|
||||
std::uniform_int_distribution<int> distribution(0, INT32_MAX); |
|
||||
|
|
||||
auto start_time = std::chrono::high_resolution_clock::now(); |
|
||||
|
|
||||
int* arrayA = new int[4 * N]; |
|
||||
int* arrayA_CPU = new int[4 * N]; |
|
||||
|
|
||||
for (int i = 0; i < 4*N; ++i) { |
|
||||
arrayA[i] = i * 1000 + i + (357854878 % (i+1)); |
|
||||
arrayA_CPU[i] = arrayA[i]; |
|
||||
} |
|
||||
|
|
||||
auto end_time = std::chrono::high_resolution_clock::now(); |
|
||||
std::cout << "Array generation took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl; |
|
||||
|
|
||||
start_time = std::chrono::high_resolution_clock::now(); |
|
||||
cuda_arrayFmaOptimizedHelper(arrayA_CPU, N); |
|
||||
end_time = std::chrono::high_resolution_clock::now(); |
|
||||
std::cout << "FMA on CPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl; |
|
||||
|
|
||||
start_time = std::chrono::high_resolution_clock::now(); |
|
||||
cuda_arrayFmaOptimized(arrayA, N, M); |
|
||||
end_time = std::chrono::high_resolution_clock::now(); |
|
||||
std::cout << "FMA on GPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl; |
|
||||
|
|
||||
int errors = 0; |
|
||||
for (int i = 0; i < N; i+=4) { |
|
||||
if (arrayA[i+3] != arrayA_CPU[i+3]) { |
|
||||
//std::cout << "Error in Entry " << i << ": GPU has " << arrayA[i+3] << " but CPU has " << arrayA_CPU[i+3] << "!" << std::endl; |
|
||||
++errors; |
|
||||
} |
|
||||
} |
|
||||
std::cout << "Checked Arrays for Errors: " << errors << " Errors occured." << std::endl; |
|
||||
|
|
||||
delete[] arrayA; |
|
||||
delete[] arrayA_CPU; |
|
||||
} |
|
@ -1,69 +0,0 @@ |
|||||
#include "cudaForStorm.h"
|
|
||||
|
|
||||
#include <stdio.h>
|
|
||||
#include <stdlib.h>
|
|
||||
|
|
||||
#include <iostream>
|
|
||||
#include <chrono>
|
|
||||
#include <random>
|
|
||||
|
|
||||
#include "cudaTests.h"
|
|
||||
|
|
||||
int cudaForStormTest(int value) { |
|
||||
return value + 42; |
|
||||
} |
|
||||
|
|
||||
|
|
||||
int main_Test12345(int argc, char **argv){ |
|
||||
resetCudaDevice(); |
|
||||
|
|
||||
int testNumber = 0; |
|
||||
int N = 10000; |
|
||||
int M = 402653184; |
|
||||
if (argc > 1) { |
|
||||
testNumber = atoi(argv[1]); |
|
||||
if (argc > 2) { |
|
||||
N = atoi(argv[2]); |
|
||||
if (argc > 3) { |
|
||||
M = atoi(argv[3]); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
switch (testNumber) { |
|
||||
case 1: |
|
||||
cudaSimpleAddTest(N, M); |
|
||||
break; |
|
||||
case 2: |
|
||||
cudaArrayFmaTest(N); |
|
||||
break; |
|
||||
case 3: |
|
||||
cudaArrayFmaOptimizedTest(N, M); |
|
||||
break; |
|
||||
case 4: |
|
||||
cpp_cuda_bandwidthTest(M, N); |
|
||||
break; |
|
||||
case 5: |
|
||||
kernelSwitchTest(N); |
|
||||
break; |
|
||||
break; |
|
||||
// DEFAULT AND 0
|
|
||||
case 0: |
|
||||
default: |
|
||||
std::cout << "Available functions are:" << std::endl; |
|
||||
std::cout << "0 - Show this overview" << std::endl; |
|
||||
std::cout << "1 - cuda simpleAddTest(N, M)" << std::endl; |
|
||||
std::cout << "2 - cuda arrayFmaTest(N)" << std::endl; |
|
||||
std::cout << "3 - cuda arrayFmaOptimizedTest(N, M)" << std::endl; |
|
||||
std::cout << "4 - cuda bandwidthTest(M, N)" << std::endl; |
|
||||
std::cout << "5 - cuda kernelSwitchTest(N)" << std::endl; |
|
||||
std::cout << std::endl; |
|
||||
std::cout << "Call: " << argv[0] << " Selection [N [M]]" << std::endl; |
|
||||
std::cout << "Defaults:" <<std::endl; |
|
||||
std::cout << "N: 10000" << std::endl; |
|
||||
std::cout << "M: 402653184" << std::endl; |
|
||||
break; |
|
||||
} |
|
||||
|
|
||||
return 0; |
|
||||
} |
|
@ -1,4 +1,8 @@ |
|||||
#include <cstdint> |
#include <cstdint> |
||||
#include <vector> |
#include <vector> |
||||
|
|
||||
void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<uint_fast64_t> const& matrixColumnIndices, std::vector<double> const& matrixValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices); |
|
||||
|
// Library exports |
||||
|
#include "cudaForStorm_Export.h" |
||||
|
|
||||
|
cudaForStorm_EXPORT void cudaForStormTestFunction(int a, int b); |
||||
|
cudaForStorm_EXPORT void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<uint_fast64_t> const& matrixColumnIndices, std::vector<double> const& matrixValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices); |
@ -0,0 +1,14 @@ |
|||||
|
#ifndef STORM_CUDAFORSTORM_CUDAFORSTORM_H_ |
||||
|
#define STORM_CUDAFORSTORM_CUDAFORSTORM_H_ |
||||
|
|
||||
|
/* |
||||
|
* List of exported functions in this library |
||||
|
*/ |
||||
|
|
||||
|
// TopologicalValueIteration |
||||
|
#include "srcCuda/basicValueIteration.h" |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
#endif // STORM_CUDAFORSTORM_CUDAFORSTORM_H_ |
Write
Preview
Loading…
Cancel
Save
Reference in new issue