Removed the extra shell around the Cuda Plugin. Changed include pathes.

Former-commit-id: c7fec9220d
12 years ago · da9fe04ba4
9 changed files with 31 additions and 213 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -330,7 +330,8 @@ target_link_libraries(storm-performance-tests ${Boost_LIBRARIES})
 #############################################################
 if (ENABLE_CUDAFORSTORM)
    message (STATUS "StoRM - Linking with CudaForStorm")
 	include_directories("${PROJECT_SOURCE_DIR}/build/cudaForStorm/include")
 	include_directories("${PROJECT_BINARY_DIR}/cudaForStorm/include")
 	include_directories("${PROJECT_SOURCE_DIR}/resources/cudaForStorm")
    target_link_libraries(storm cudaForStorm)
    target_link_libraries(storm-functional-tests cudaForStorm)
    target_link_libraries(storm-performance-tests cudaForStorm)
--- a/resources/cudaForStorm/CMakeLists.txt
+++ b/resources/cudaForStorm/CMakeLists.txt
@ -183,7 +183,7 @@ include (GenerateExportHeader)
 ##
 #############################################################
 #set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --gpu-architecture sm_30)
 cuda_add_library(cudaForStorm
 cuda_add_library(cudaForStorm SHARED
  ${CUDAFORSTORM_CUDA_SOURCES} ${CUDAFORSTORM_CUDA_HEADERS}
  OPTIONS -DSTUFF="" -arch=sm_30
  RELEASE -DNDEBUG
@ -223,4 +223,4 @@ endif(LINK_LIBCXXABI)
 # Install Directive
 install(TARGETS cudaForStorm DESTINATION "${STORM_LIB_INSTALL_DIR}/lib")
 install(FILES "${PROJECT_SOURCE_DIR}/src/cudaForStorm.h" "${PROJECT_BINARY_DIR}/cudaForStorm_Export.h" DESTINATION "${STORM_LIB_INSTALL_DIR}/include")
 install(FILES "${PROJECT_SOURCE_DIR}/srcCuda/cudaForStorm.h" "${PROJECT_BINARY_DIR}/include/cudaForStorm_Export.h" DESTINATION "${STORM_LIB_INSTALL_DIR}/include")
--- a/resources/cudaForStorm/src/cudaForStorm.h
+++ b/resources/cudaForStorm/src/cudaForStorm.h
@ -1,15 +0,0 @@
 #ifndef STORM_CUDAFORSTORM_CUDAFORSTORM_H_
 #define STORM_CUDAFORSTORM_CUDAFORSTORM_H_
 // Library exports
 #include "cudaForStorm_Export.h"
 /*
 * List of exported functions in this library
 */
 cudaForStorm_EXPORT int cudaForStormTest(int value);
 #endif // STORM_CUDAFORSTORM_CUDAFORSTORM_H_
--- a/resources/cudaForStorm/src/cudaTests.h
+++ b/resources/cudaForStorm/src/cudaTests.h
@ -1,124 +0,0 @@
 #include <cuda.h>
 #include "srcCuda/allCudaKernels.h"
 #include <iostream>
 #include <chrono>
 #include <random>
 void cudaShowDevices() {
 	// Todo
 }
 void cudaSimpleAddTest(int a, int b) {
 	std::cout << "Running cudaSimpleAddTest:" << std::endl;
 	std::cout << "a = " << a << ", b = " << b << "" << std::endl;
 	int c = cuda_basicAdd(a, b);
 	std::cout << "Result: " << c << "" << std::endl;
 }
 void cudaArrayFmaTest(int N) {
 	std::cout << "Running cudaArrayFmaTest:" << std::endl;
 	std::cout << "N is " << N << ", resulting in " << (5 * sizeof(int) * N) << " Bytes of Data." << std::endl;
 	std::cout << "Generating random input arrays." << std::endl;
 	std::default_random_engine generator;
 	std::uniform_int_distribution<int> distribution(0, INT32_MAX);
 	int dice_roll = distribution(generator);
 	auto start_time = std::chrono::high_resolution_clock::now();
 	int* arrayA = new int[N];
 	int* arrayB = new int[N];
 	int* arrayC = new int[N];
 	int* arrayD = new int[N];
 	int* arrayD_CPU = new int[N];
 	for (int i = 0; i < N; ++i) {
 		//arrayA[i] = distribution(generator);
 		//arrayB[i] = distribution(generator);
 		//arrayC[i] = distribution(generator);
 		arrayA[i] = i * 1000 + 137;
 		arrayB[i] = i * 7000 + 1537;
 		arrayC[i] = i * 15000 + 97;
 		arrayD[i] = 0;
 		arrayD_CPU[i] = 0;
 	}
 	auto end_time = std::chrono::high_resolution_clock::now();
 	std::cout << "Array generation took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl;
 	std::cout << "Running FMA test on CPU." << std::endl;
 	start_time = std::chrono::high_resolution_clock::now();	
 	cuda_arrayFmaHelper(arrayA, arrayB, arrayC, arrayD_CPU, N);
 	end_time = std::chrono::high_resolution_clock::now();
 	std::cout << "FMA on CPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl;
 	start_time = std::chrono::high_resolution_clock::now();
 	cuda_arrayFma(arrayA, arrayB, arrayC, arrayD, N);
 	end_time = std::chrono::high_resolution_clock::now();
 	std::cout << "FMA on GPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl;
 	int errors = 0;
 	for (int i = 0; i < N; ++i) {
 		if (arrayD[i] != arrayD_CPU[i]) {
 			std::cout << "Error in Entry " << i << ": GPU has " << arrayD[i] << " but CPU has " << arrayD_CPU[i] << "!" << std::endl;
 			++errors;
 		}
 	}
 	std::cout << "Checked Arrays for Errors: " << errors << " Errors occured." << std::endl;
 }
 void cudaArrayFmaOptimizedTest(int N, int M) {
 	std::cout << "Running cudaArrayFmaTest:" << std::endl;
 	std::cout << "N is " << N << ", resulting in " << (4 * sizeof(int) * N) << " Bytes of Data." << std::endl;
 	size_t freeCudaMemory = getFreeCudaMemory();
 	size_t totalCudaMemory = getTotalCudaMemory();
 	int freeProzent = static_cast<int>(((double)freeCudaMemory)/((double)totalCudaMemory) * 100);
 	std::cout << "CUDA Device has " << freeCudaMemory << " Bytes of " << totalCudaMemory << " Bytes free (" << (freeProzent) << "%)." << std::endl;
 	std::cout << "Generating random input arrays." << std::endl;
 	std::default_random_engine generator;
 	std::uniform_int_distribution<int> distribution(0, INT32_MAX);
 	auto start_time = std::chrono::high_resolution_clock::now();
 	int* arrayA = new int[4 * N];
 	int* arrayA_CPU = new int[4 * N];
 	for (int i = 0; i < 4*N; ++i) {
 		arrayA[i] = i * 1000 + i + (357854878 % (i+1));
 		arrayA_CPU[i] = arrayA[i];
 	}
 	auto end_time = std::chrono::high_resolution_clock::now();
 	std::cout << "Array generation took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl;
 	start_time = std::chrono::high_resolution_clock::now();	
 	cuda_arrayFmaOptimizedHelper(arrayA_CPU, N);
 	end_time = std::chrono::high_resolution_clock::now();
 	std::cout << "FMA on CPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl;
 	start_time = std::chrono::high_resolution_clock::now();
 	cuda_arrayFmaOptimized(arrayA, N, M);
 	end_time = std::chrono::high_resolution_clock::now();
 	std::cout << "FMA on GPU took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl;
 	int errors = 0;
 	for (int i = 0; i < N; i+=4) {
 		if (arrayA[i+3] != arrayA_CPU[i+3]) {
 			//std::cout << "Error in Entry " << i << ": GPU has " << arrayA[i+3] << " but CPU has " << arrayA_CPU[i+3] << "!" << std::endl;
 			++errors;
 		}
 	}
 	std::cout << "Checked Arrays for Errors: " << errors << " Errors occured." << std::endl;
 	delete[] arrayA;
 	delete[] arrayA_CPU;
 }
--- a/resources/cudaForStorm/src/main.cpp
+++ b/resources/cudaForStorm/src/main.cpp
@ -1,69 +0,0 @@
 #include "cudaForStorm.h"
 #include <stdio.h>  
 #include <stdlib.h>
 #include <iostream>
 #include <chrono>
 #include <random>
 #include "cudaTests.h"
 int cudaForStormTest(int value) {
 	return value + 42;
 }
 int main_Test12345(int argc, char **argv){
 	resetCudaDevice();
 	int testNumber = 0;
 	int N = 10000;
 	int M = 402653184;
 	if (argc > 1) {
 		testNumber = atoi(argv[1]);
 		if (argc > 2) {
 			N = atoi(argv[2]);
 			if (argc > 3) {
 				M = atoi(argv[3]);
 			}
 		}
 	}
 	switch (testNumber) {
 		case 1:
 			cudaSimpleAddTest(N, M);
 			break;
 		case 2:
 			cudaArrayFmaTest(N);
 			break;
 		case 3:
 			cudaArrayFmaOptimizedTest(N, M);
 			break;
 		case 4:
 			cpp_cuda_bandwidthTest(M, N);
 			break;
 		case 5:
 			kernelSwitchTest(N);
 			break;
 			break;
 		// DEFAULT AND 0
 		case 0:
 		default:
 			std::cout << "Available functions are:" << std::endl;
 			std::cout << "0 - Show this  overview" << std::endl;
 			std::cout << "1 - cuda   simpleAddTest(N, M)" << std::endl;
 			std::cout << "2 - cuda   arrayFmaTest(N)" << std::endl;
 			std::cout << "3 - cuda   arrayFmaOptimizedTest(N, M)" << std::endl;
 			std::cout << "4 - cuda   bandwidthTest(M, N)" << std::endl;
 			std::cout << "5 - cuda   kernelSwitchTest(N)" << std::endl;
 			std::cout << std::endl;
 			std::cout << "Call: " << argv[0] << " Selection [N [M]]" << std::endl;
 			std::cout << "Defaults:" <<std::endl;
 			std::cout << "N: 10000" << std::endl;
 			std::cout << "M: 402653184" << std::endl;
 			break;
 	}
    return 0;
 }
--- a/resources/cudaForStorm/srcCuda/basicValueIteration.cu
+++ b/resources/cudaForStorm/srcCuda/basicValueIteration.cu
@ -11,8 +11,12 @@ __global__ void cuda_kernel_basicValueIteration_mvReduce(int const * const A, in
 	*B = *A;
 }
 void cudaForStormTestFunction(int a, int b) {
 	std::cout << "Cuda for Storm: a + b = " << (a+b) << std::endl;
 }
 void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<uint_fast64_t> const& matrixColumnIndices, std::vector<double> const& matrixValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices) {
 	std::cout << "basicValueIteration_mvReduce is implemented for ValueType == double :)" << std::endl;
 }
 /*
--- a/resources/cudaForStorm/srcCuda/basicValueIteration.h
+++ b/resources/cudaForStorm/srcCuda/basicValueIteration.h
@ -1,4 +1,8 @@
 #include <cstdint>
 #include <vector>
 void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<uint_fast64_t> const& matrixColumnIndices, std::vector<double> const& matrixValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices);
 // Library exports
 #include "cudaForStorm_Export.h"
 cudaForStorm_EXPORT void cudaForStormTestFunction(int a, int b);
 cudaForStorm_EXPORT void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<uint_fast64_t> const& matrixColumnIndices, std::vector<double> const& matrixValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices);
--- a/resources/cudaForStorm/srcCuda/cudaForStorm.h
+++ b/resources/cudaForStorm/srcCuda/cudaForStorm.h
@ -0,0 +1,14 @@
 #ifndef STORM_CUDAFORSTORM_CUDAFORSTORM_H_
 #define STORM_CUDAFORSTORM_CUDAFORSTORM_H_
 /*
 * List of exported functions in this library
 */
 // TopologicalValueIteration
 #include "srcCuda/basicValueIteration.h"
 #endif // STORM_CUDAFORSTORM_CUDAFORSTORM_H_
--- a/src/storm.cpp
+++ b/src/storm.cpp
@ -62,6 +62,8 @@
 #include "src/exceptions/InvalidSettingsException.h"
 #include "cudaForStorm.h"
 #include <iostream>
 #include <iomanip>
 #include <fstream>
@ -160,6 +162,7 @@ void printHeader(const int argc, const char* argv[]) {
 	}
 	std::cout << "Command line: " << commandStream.str() << std::endl;
 	std::cout << "Current working directory: " << getCurrentWorkingDirectory() << std::endl << std::endl;
 	cudaForStormTestFunction(21, 21);
 }
 /*!