Browse Source
Removed the extra shell around the Cuda Plugin. Changed include pathes.
Removed the extra shell around the Cuda Plugin. Changed include pathes.
Former-commit-id: c7fec9220d
tempestpy_adaptions
PBerger
11 years ago
9 changed files with 31 additions and 213 deletions
-
3CMakeLists.txt
-
4resources/cudaForStorm/CMakeLists.txt
-
15resources/cudaForStorm/src/cudaForStorm.h
-
124resources/cudaForStorm/src/cudaTests.h
-
69resources/cudaForStorm/src/main.cpp
-
6resources/cudaForStorm/srcCuda/basicValueIteration.cu
-
6resources/cudaForStorm/srcCuda/basicValueIteration.h
-
14resources/cudaForStorm/srcCuda/cudaForStorm.h
-
3src/storm.cpp
@ -1,15 +0,0 @@ |
|||
#ifndef STORM_CUDAFORSTORM_CUDAFORSTORM_H_ |
|||
#define STORM_CUDAFORSTORM_CUDAFORSTORM_H_ |
|||
|
|||
// Library exports |
|||
#include "cudaForStorm_Export.h" |
|||
|
|||
/* |
|||
* List of exported functions in this library |
|||
*/ |
|||
|
|||
cudaForStorm_EXPORT int cudaForStormTest(int value); |
|||
|
|||
|
|||
|
|||
#endif // STORM_CUDAFORSTORM_CUDAFORSTORM_H_ |
@ -1,124 +0,0 @@ |
|||
#include <cuda.h> |
|||
#include "srcCuda/allCudaKernels.h" |
|||
|
|||
#include <iostream> |
|||
#include <chrono> |
|||
#include <random> |
|||
|
|||
void cudaShowDevices() { |
|||
// Todo |
|||
} |
|||
|
|||
void cudaSimpleAddTest(int a, int b) { |
|||
std::cout << "Running cudaSimpleAddTest:" << std::endl; |
|||
std::cout << "a = " << a << ", b = " << b << "" << std::endl; |
|||
|
|||
int c = cuda_basicAdd(a, b); |
|||
|
|||
std::cout << "Result: " << c << "" << std::endl; |
|||
} |
|||
|
|||
void cudaArrayFmaTest(int N) { |
|||
std::cout << "Running cudaArrayFmaTest:" << std::endl; |
|||
std::cout << "N is " << N << ", resulting in " << (5 * sizeof(int) * N) << " Bytes of Data." << std::endl; |
|||
|
|||
std::cout << "Generating random input arrays." << std::endl; |
|||
|
|||
std::default_random_engine generator; |
|||
std::uniform_int_distribution<int> distribution(0, INT32_MAX); |
|||
int dice_roll = distribution(generator); |
|||
|
|||
auto start_time = std::chrono::high_resolution_clock::now(); |
|||
|
|||
int* arrayA = new int[N]; |
|||
int* arrayB = new int[N]; |
|||
int* arrayC = new int[N]; |
|||
int* arrayD = new int[N]; |
|||
int* arrayD_CPU = new int[N]; |
|||
|
|||
for (int i = 0; i < N; ++i) { |
|||
//arrayA[i] = distribution(generator); |
|||
//arrayB[i] = distribution(generator); |
|||
//arrayC[i] = distribution(generator); |
|||
arrayA[i] = i * 1000 + 137; |
|||
arrayB[i] = i * 7000 + 1537; |
|||
arrayC[i] = i * 15000 + 97; |
|||
arrayD[i] = 0; |
|||
arrayD_CPU[i] = 0; |
|||
} |
|||
|
|||
auto end_time = std::chrono::high_resolution_clock::now(); |
|||
std::cout << "Array generation took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl; |
|||
|
|||
std::cout << "Running FMA test on CPU." << std::endl; |
|||
|
|||
start_time = std::chrono::high_resolution_clock::now(); |
|||
cuda_arrayFmaHelper(arrayA, arrayB, arrayC, arrayD_CPU, N); |
|||
end_time = std::chrono::high_resolution_clock::now(); |
|||
std::cout << "FMA on CPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl; |
|||
|
|||
start_time = std::chrono::high_resolution_clock::now(); |
|||
cuda_arrayFma(arrayA, arrayB, arrayC, arrayD, N); |
|||
end_time = std::chrono::high_resolution_clock::now(); |
|||
std::cout << "FMA on GPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl; |
|||
|
|||
int errors = 0; |
|||
for (int i = 0; i < N; ++i) { |
|||
if (arrayD[i] != arrayD_CPU[i]) { |
|||
std::cout << "Error in Entry " << i << ": GPU has " << arrayD[i] << " but CPU has " << arrayD_CPU[i] << "!" << std::endl; |
|||
++errors; |
|||
} |
|||
} |
|||
std::cout << "Checked Arrays for Errors: " << errors << " Errors occured." << std::endl; |
|||
} |
|||
|
|||
void cudaArrayFmaOptimizedTest(int N, int M) { |
|||
std::cout << "Running cudaArrayFmaTest:" << std::endl; |
|||
std::cout << "N is " << N << ", resulting in " << (4 * sizeof(int) * N) << " Bytes of Data." << std::endl; |
|||
|
|||
size_t freeCudaMemory = getFreeCudaMemory(); |
|||
size_t totalCudaMemory = getTotalCudaMemory(); |
|||
int freeProzent = static_cast<int>(((double)freeCudaMemory)/((double)totalCudaMemory) * 100); |
|||
|
|||
std::cout << "CUDA Device has " << freeCudaMemory << " Bytes of " << totalCudaMemory << " Bytes free (" << (freeProzent) << "%)." << std::endl; |
|||
|
|||
std::cout << "Generating random input arrays." << std::endl; |
|||
|
|||
std::default_random_engine generator; |
|||
std::uniform_int_distribution<int> distribution(0, INT32_MAX); |
|||
|
|||
auto start_time = std::chrono::high_resolution_clock::now(); |
|||
|
|||
int* arrayA = new int[4 * N]; |
|||
int* arrayA_CPU = new int[4 * N]; |
|||
|
|||
for (int i = 0; i < 4*N; ++i) { |
|||
arrayA[i] = i * 1000 + i + (357854878 % (i+1)); |
|||
arrayA_CPU[i] = arrayA[i]; |
|||
} |
|||
|
|||
auto end_time = std::chrono::high_resolution_clock::now(); |
|||
std::cout << "Array generation took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl; |
|||
|
|||
start_time = std::chrono::high_resolution_clock::now(); |
|||
cuda_arrayFmaOptimizedHelper(arrayA_CPU, N); |
|||
end_time = std::chrono::high_resolution_clock::now(); |
|||
std::cout << "FMA on CPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl; |
|||
|
|||
start_time = std::chrono::high_resolution_clock::now(); |
|||
cuda_arrayFmaOptimized(arrayA, N, M); |
|||
end_time = std::chrono::high_resolution_clock::now(); |
|||
std::cout << "FMA on GPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl; |
|||
|
|||
int errors = 0; |
|||
for (int i = 0; i < N; i+=4) { |
|||
if (arrayA[i+3] != arrayA_CPU[i+3]) { |
|||
//std::cout << "Error in Entry " << i << ": GPU has " << arrayA[i+3] << " but CPU has " << arrayA_CPU[i+3] << "!" << std::endl; |
|||
++errors; |
|||
} |
|||
} |
|||
std::cout << "Checked Arrays for Errors: " << errors << " Errors occured." << std::endl; |
|||
|
|||
delete[] arrayA; |
|||
delete[] arrayA_CPU; |
|||
} |
@ -1,69 +0,0 @@ |
|||
#include "cudaForStorm.h"
|
|||
|
|||
#include <stdio.h>
|
|||
#include <stdlib.h>
|
|||
|
|||
#include <iostream>
|
|||
#include <chrono>
|
|||
#include <random>
|
|||
|
|||
#include "cudaTests.h"
|
|||
|
|||
int cudaForStormTest(int value) { |
|||
return value + 42; |
|||
} |
|||
|
|||
|
|||
int main_Test12345(int argc, char **argv){ |
|||
resetCudaDevice(); |
|||
|
|||
int testNumber = 0; |
|||
int N = 10000; |
|||
int M = 402653184; |
|||
if (argc > 1) { |
|||
testNumber = atoi(argv[1]); |
|||
if (argc > 2) { |
|||
N = atoi(argv[2]); |
|||
if (argc > 3) { |
|||
M = atoi(argv[3]); |
|||
} |
|||
} |
|||
} |
|||
|
|||
switch (testNumber) { |
|||
case 1: |
|||
cudaSimpleAddTest(N, M); |
|||
break; |
|||
case 2: |
|||
cudaArrayFmaTest(N); |
|||
break; |
|||
case 3: |
|||
cudaArrayFmaOptimizedTest(N, M); |
|||
break; |
|||
case 4: |
|||
cpp_cuda_bandwidthTest(M, N); |
|||
break; |
|||
case 5: |
|||
kernelSwitchTest(N); |
|||
break; |
|||
break; |
|||
// DEFAULT AND 0
|
|||
case 0: |
|||
default: |
|||
std::cout << "Available functions are:" << std::endl; |
|||
std::cout << "0 - Show this overview" << std::endl; |
|||
std::cout << "1 - cuda simpleAddTest(N, M)" << std::endl; |
|||
std::cout << "2 - cuda arrayFmaTest(N)" << std::endl; |
|||
std::cout << "3 - cuda arrayFmaOptimizedTest(N, M)" << std::endl; |
|||
std::cout << "4 - cuda bandwidthTest(M, N)" << std::endl; |
|||
std::cout << "5 - cuda kernelSwitchTest(N)" << std::endl; |
|||
std::cout << std::endl; |
|||
std::cout << "Call: " << argv[0] << " Selection [N [M]]" << std::endl; |
|||
std::cout << "Defaults:" <<std::endl; |
|||
std::cout << "N: 10000" << std::endl; |
|||
std::cout << "M: 402653184" << std::endl; |
|||
break; |
|||
} |
|||
|
|||
return 0; |
|||
} |
@ -1,4 +1,8 @@ |
|||
#include <cstdint> |
|||
#include <vector> |
|||
|
|||
void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<uint_fast64_t> const& matrixColumnIndices, std::vector<double> const& matrixValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices); |
|||
// Library exports |
|||
#include "cudaForStorm_Export.h" |
|||
|
|||
cudaForStorm_EXPORT void cudaForStormTestFunction(int a, int b); |
|||
cudaForStorm_EXPORT void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<uint_fast64_t> const& matrixColumnIndices, std::vector<double> const& matrixValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices); |
@ -0,0 +1,14 @@ |
|||
#ifndef STORM_CUDAFORSTORM_CUDAFORSTORM_H_ |
|||
#define STORM_CUDAFORSTORM_CUDAFORSTORM_H_ |
|||
|
|||
/* |
|||
* List of exported functions in this library |
|||
*/ |
|||
|
|||
// TopologicalValueIteration |
|||
#include "srcCuda/basicValueIteration.h" |
|||
|
|||
|
|||
|
|||
|
|||
#endif // STORM_CUDAFORSTORM_CUDAFORSTORM_H_ |
Write
Preview
Loading…
Cancel
Save
Reference in new issue