Browse Source

Removed the extra shell around the Cuda Plugin. Changed include pathes.

Former-commit-id: c7fec9220d
tempestpy_adaptions
PBerger 11 years ago
parent
commit
da9fe04ba4
  1. 3
      CMakeLists.txt
  2. 4
      resources/cudaForStorm/CMakeLists.txt
  3. 15
      resources/cudaForStorm/src/cudaForStorm.h
  4. 124
      resources/cudaForStorm/src/cudaTests.h
  5. 69
      resources/cudaForStorm/src/main.cpp
  6. 6
      resources/cudaForStorm/srcCuda/basicValueIteration.cu
  7. 6
      resources/cudaForStorm/srcCuda/basicValueIteration.h
  8. 14
      resources/cudaForStorm/srcCuda/cudaForStorm.h
  9. 3
      src/storm.cpp

3
CMakeLists.txt

@ -330,7 +330,8 @@ target_link_libraries(storm-performance-tests ${Boost_LIBRARIES})
############################################################# #############################################################
if (ENABLE_CUDAFORSTORM) if (ENABLE_CUDAFORSTORM)
message (STATUS "StoRM - Linking with CudaForStorm") message (STATUS "StoRM - Linking with CudaForStorm")
include_directories("${PROJECT_SOURCE_DIR}/build/cudaForStorm/include")
include_directories("${PROJECT_BINARY_DIR}/cudaForStorm/include")
include_directories("${PROJECT_SOURCE_DIR}/resources/cudaForStorm")
target_link_libraries(storm cudaForStorm) target_link_libraries(storm cudaForStorm)
target_link_libraries(storm-functional-tests cudaForStorm) target_link_libraries(storm-functional-tests cudaForStorm)
target_link_libraries(storm-performance-tests cudaForStorm) target_link_libraries(storm-performance-tests cudaForStorm)

4
resources/cudaForStorm/CMakeLists.txt

@ -183,7 +183,7 @@ include (GenerateExportHeader)
## ##
############################################################# #############################################################
#set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --gpu-architecture sm_30) #set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --gpu-architecture sm_30)
cuda_add_library(cudaForStorm
cuda_add_library(cudaForStorm SHARED
${CUDAFORSTORM_CUDA_SOURCES} ${CUDAFORSTORM_CUDA_HEADERS} ${CUDAFORSTORM_CUDA_SOURCES} ${CUDAFORSTORM_CUDA_HEADERS}
OPTIONS -DSTUFF="" -arch=sm_30 OPTIONS -DSTUFF="" -arch=sm_30
RELEASE -DNDEBUG RELEASE -DNDEBUG
@ -223,4 +223,4 @@ endif(LINK_LIBCXXABI)
# Install Directive # Install Directive
install(TARGETS cudaForStorm DESTINATION "${STORM_LIB_INSTALL_DIR}/lib") install(TARGETS cudaForStorm DESTINATION "${STORM_LIB_INSTALL_DIR}/lib")
install(FILES "${PROJECT_SOURCE_DIR}/src/cudaForStorm.h" "${PROJECT_BINARY_DIR}/cudaForStorm_Export.h" DESTINATION "${STORM_LIB_INSTALL_DIR}/include")
install(FILES "${PROJECT_SOURCE_DIR}/srcCuda/cudaForStorm.h" "${PROJECT_BINARY_DIR}/include/cudaForStorm_Export.h" DESTINATION "${STORM_LIB_INSTALL_DIR}/include")

15
resources/cudaForStorm/src/cudaForStorm.h

@ -1,15 +0,0 @@
#ifndef STORM_CUDAFORSTORM_CUDAFORSTORM_H_
#define STORM_CUDAFORSTORM_CUDAFORSTORM_H_
// Library exports
#include "cudaForStorm_Export.h"
/*
* List of exported functions in this library
*/
cudaForStorm_EXPORT int cudaForStormTest(int value);
#endif // STORM_CUDAFORSTORM_CUDAFORSTORM_H_

124
resources/cudaForStorm/src/cudaTests.h

@ -1,124 +0,0 @@
#include <cuda.h>
#include "srcCuda/allCudaKernels.h"
#include <iostream>
#include <chrono>
#include <random>
void cudaShowDevices() {
// Todo
}
void cudaSimpleAddTest(int a, int b) {
std::cout << "Running cudaSimpleAddTest:" << std::endl;
std::cout << "a = " << a << ", b = " << b << "" << std::endl;
int c = cuda_basicAdd(a, b);
std::cout << "Result: " << c << "" << std::endl;
}
void cudaArrayFmaTest(int N) {
std::cout << "Running cudaArrayFmaTest:" << std::endl;
std::cout << "N is " << N << ", resulting in " << (5 * sizeof(int) * N) << " Bytes of Data." << std::endl;
std::cout << "Generating random input arrays." << std::endl;
std::default_random_engine generator;
std::uniform_int_distribution<int> distribution(0, INT32_MAX);
int dice_roll = distribution(generator);
auto start_time = std::chrono::high_resolution_clock::now();
int* arrayA = new int[N];
int* arrayB = new int[N];
int* arrayC = new int[N];
int* arrayD = new int[N];
int* arrayD_CPU = new int[N];
for (int i = 0; i < N; ++i) {
//arrayA[i] = distribution(generator);
//arrayB[i] = distribution(generator);
//arrayC[i] = distribution(generator);
arrayA[i] = i * 1000 + 137;
arrayB[i] = i * 7000 + 1537;
arrayC[i] = i * 15000 + 97;
arrayD[i] = 0;
arrayD_CPU[i] = 0;
}
auto end_time = std::chrono::high_resolution_clock::now();
std::cout << "Array generation took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl;
std::cout << "Running FMA test on CPU." << std::endl;
start_time = std::chrono::high_resolution_clock::now();
cuda_arrayFmaHelper(arrayA, arrayB, arrayC, arrayD_CPU, N);
end_time = std::chrono::high_resolution_clock::now();
std::cout << "FMA on CPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl;
start_time = std::chrono::high_resolution_clock::now();
cuda_arrayFma(arrayA, arrayB, arrayC, arrayD, N);
end_time = std::chrono::high_resolution_clock::now();
std::cout << "FMA on GPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl;
int errors = 0;
for (int i = 0; i < N; ++i) {
if (arrayD[i] != arrayD_CPU[i]) {
std::cout << "Error in Entry " << i << ": GPU has " << arrayD[i] << " but CPU has " << arrayD_CPU[i] << "!" << std::endl;
++errors;
}
}
std::cout << "Checked Arrays for Errors: " << errors << " Errors occured." << std::endl;
}
void cudaArrayFmaOptimizedTest(int N, int M) {
std::cout << "Running cudaArrayFmaTest:" << std::endl;
std::cout << "N is " << N << ", resulting in " << (4 * sizeof(int) * N) << " Bytes of Data." << std::endl;
size_t freeCudaMemory = getFreeCudaMemory();
size_t totalCudaMemory = getTotalCudaMemory();
int freeProzent = static_cast<int>(((double)freeCudaMemory)/((double)totalCudaMemory) * 100);
std::cout << "CUDA Device has " << freeCudaMemory << " Bytes of " << totalCudaMemory << " Bytes free (" << (freeProzent) << "%)." << std::endl;
std::cout << "Generating random input arrays." << std::endl;
std::default_random_engine generator;
std::uniform_int_distribution<int> distribution(0, INT32_MAX);
auto start_time = std::chrono::high_resolution_clock::now();
int* arrayA = new int[4 * N];
int* arrayA_CPU = new int[4 * N];
for (int i = 0; i < 4*N; ++i) {
arrayA[i] = i * 1000 + i + (357854878 % (i+1));
arrayA_CPU[i] = arrayA[i];
}
auto end_time = std::chrono::high_resolution_clock::now();
std::cout << "Array generation took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl;
start_time = std::chrono::high_resolution_clock::now();
cuda_arrayFmaOptimizedHelper(arrayA_CPU, N);
end_time = std::chrono::high_resolution_clock::now();
std::cout << "FMA on CPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl;
start_time = std::chrono::high_resolution_clock::now();
cuda_arrayFmaOptimized(arrayA, N, M);
end_time = std::chrono::high_resolution_clock::now();
std::cout << "FMA on GPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl;
int errors = 0;
for (int i = 0; i < N; i+=4) {
if (arrayA[i+3] != arrayA_CPU[i+3]) {
//std::cout << "Error in Entry " << i << ": GPU has " << arrayA[i+3] << " but CPU has " << arrayA_CPU[i+3] << "!" << std::endl;
++errors;
}
}
std::cout << "Checked Arrays for Errors: " << errors << " Errors occured." << std::endl;
delete[] arrayA;
delete[] arrayA_CPU;
}

69
resources/cudaForStorm/src/main.cpp

@ -1,69 +0,0 @@
#include "cudaForStorm.h"
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <chrono>
#include <random>
#include "cudaTests.h"
int cudaForStormTest(int value) {
return value + 42;
}
int main_Test12345(int argc, char **argv){
resetCudaDevice();
int testNumber = 0;
int N = 10000;
int M = 402653184;
if (argc > 1) {
testNumber = atoi(argv[1]);
if (argc > 2) {
N = atoi(argv[2]);
if (argc > 3) {
M = atoi(argv[3]);
}
}
}
switch (testNumber) {
case 1:
cudaSimpleAddTest(N, M);
break;
case 2:
cudaArrayFmaTest(N);
break;
case 3:
cudaArrayFmaOptimizedTest(N, M);
break;
case 4:
cpp_cuda_bandwidthTest(M, N);
break;
case 5:
kernelSwitchTest(N);
break;
break;
// DEFAULT AND 0
case 0:
default:
std::cout << "Available functions are:" << std::endl;
std::cout << "0 - Show this overview" << std::endl;
std::cout << "1 - cuda simpleAddTest(N, M)" << std::endl;
std::cout << "2 - cuda arrayFmaTest(N)" << std::endl;
std::cout << "3 - cuda arrayFmaOptimizedTest(N, M)" << std::endl;
std::cout << "4 - cuda bandwidthTest(M, N)" << std::endl;
std::cout << "5 - cuda kernelSwitchTest(N)" << std::endl;
std::cout << std::endl;
std::cout << "Call: " << argv[0] << " Selection [N [M]]" << std::endl;
std::cout << "Defaults:" <<std::endl;
std::cout << "N: 10000" << std::endl;
std::cout << "M: 402653184" << std::endl;
break;
}
return 0;
}

6
resources/cudaForStorm/srcCuda/basicValueIteration.cu

@ -11,8 +11,12 @@ __global__ void cuda_kernel_basicValueIteration_mvReduce(int const * const A, in
*B = *A; *B = *A;
} }
void cudaForStormTestFunction(int a, int b) {
std::cout << "Cuda for Storm: a + b = " << (a+b) << std::endl;
}
void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<uint_fast64_t> const& matrixColumnIndices, std::vector<double> const& matrixValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices) { void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<uint_fast64_t> const& matrixColumnIndices, std::vector<double> const& matrixValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices) {
std::cout << "basicValueIteration_mvReduce is implemented for ValueType == double :)" << std::endl;
} }
/* /*

6
resources/cudaForStorm/srcCuda/basicValueIteration.h

@ -1,4 +1,8 @@
#include <cstdint> #include <cstdint>
#include <vector> #include <vector>
void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<uint_fast64_t> const& matrixColumnIndices, std::vector<double> const& matrixValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices);
// Library exports
#include "cudaForStorm_Export.h"
cudaForStorm_EXPORT void cudaForStormTestFunction(int a, int b);
cudaForStorm_EXPORT void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<uint_fast64_t> const& matrixColumnIndices, std::vector<double> const& matrixValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices);

14
resources/cudaForStorm/srcCuda/cudaForStorm.h

@ -0,0 +1,14 @@
#ifndef STORM_CUDAFORSTORM_CUDAFORSTORM_H_
#define STORM_CUDAFORSTORM_CUDAFORSTORM_H_
/*
* List of exported functions in this library
*/
// TopologicalValueIteration
#include "srcCuda/basicValueIteration.h"
#endif // STORM_CUDAFORSTORM_CUDAFORSTORM_H_

3
src/storm.cpp

@ -62,6 +62,8 @@
#include "src/exceptions/InvalidSettingsException.h" #include "src/exceptions/InvalidSettingsException.h"
#include "cudaForStorm.h"
#include <iostream> #include <iostream>
#include <iomanip> #include <iomanip>
#include <fstream> #include <fstream>
@ -160,6 +162,7 @@ void printHeader(const int argc, const char* argv[]) {
} }
std::cout << "Command line: " << commandStream.str() << std::endl; std::cout << "Command line: " << commandStream.str() << std::endl;
std::cout << "Current working directory: " << getCurrentWorkingDirectory() << std::endl << std::endl; std::cout << "Current working directory: " << getCurrentWorkingDirectory() << std::endl << std::endl;
cudaForStormTestFunction(21, 21);
} }
/*! /*!
Loading…
Cancel
Save