From 71e077f420397eff251ed24365a8bdb075f4342d Mon Sep 17 00:00:00 2001 From: PBerger Date: Sat, 8 Mar 2014 18:28:39 +0100 Subject: [PATCH] Compiles with CUSP :) Former-commit-id: 78555303bf9615a97c25127082b89c89d53748ec --- resources/cmake/FindCusp.cmake | 55 +++++++++++ resources/cmake/FindThrust.cmake | 69 +++++++------- .../cudaForStorm/CMakeAlignmentCheck.cpp | 64 +++++++++++++ resources/cudaForStorm/CMakeLists.txt | 93 ++++++++++++------- .../srcCuda/basicValueIteration.cu | 87 ++++++----------- .../srcCuda/basicValueIteration.h | 2 +- .../cudaForStorm/srcCuda/cuspExtension.h | 83 +++++++++++++++++ .../cudaForStorm/storm-cudaplugin-config.h.in | 13 +++ 8 files changed, 337 insertions(+), 129 deletions(-) create mode 100644 resources/cmake/FindCusp.cmake create mode 100644 resources/cudaForStorm/CMakeAlignmentCheck.cpp create mode 100644 resources/cudaForStorm/srcCuda/cuspExtension.h create mode 100644 resources/cudaForStorm/storm-cudaplugin-config.h.in diff --git a/resources/cmake/FindCusp.cmake b/resources/cmake/FindCusp.cmake new file mode 100644 index 000000000..9520d1426 --- /dev/null +++ b/resources/cmake/FindCusp.cmake @@ -0,0 +1,55 @@ +# +# FindCusp +# +# This module finds the CUSP header files and extracts their version. It +# sets the following variables. +# +# CUSP_INCLUDE_DIR - Include directory for cusp header files. (All header +# files will actually be in the cusp subdirectory.) +# CUSP_VERSION - Version of cusp in the form "major.minor.patch". +# +# CUSP_FOUND - Indicates whether Cusp has been found +# + +find_path(CUSP_INCLUDE_DIR + HINTS + /usr/include/cusp + /usr/local/include + /usr/local/cusp/include + ${CUSP_INCLUDE_DIRS} + ${CUSP_HINT} + NAMES cusp/version.h + DOC "Cusp headers" +) +if(CUSP_INCLUDE_DIR) + list(REMOVE_DUPLICATES CUSP_INCLUDE_DIR) +endif(CUSP_INCLUDE_DIR) + +# Find cusp version +file(STRINGS ${CUSP_INCLUDE_DIR}/cusp/version.h + version + REGEX "#define CUSP_VERSION[ \t]+([0-9x]+)" +) +string(REGEX REPLACE + "#define CUSP_VERSION[ \t]+" + "" + version + "${version}" +) + +#define CUSP_MAJOR_VERSION (CUSP_VERSION / 100000) +#define CUSP_MINOR_VERSION (CUSP_VERSION / 100 % 1000) +#define CUSP_SUBMINOR_VERSION (CUSP_VERSION % 100) + +math(EXPR CUSP_MAJOR_VERSION "${version} / 100000") +math(EXPR CUSP_MINOR_VERSION "${version} / 100 % 1000") +math(EXPR CUSP_PATCH_VERSION "${version} % 100") + +set(CUSP_VERSION "${CUSP_MAJOR_VERSION}.${CUSP_MINOR_VERSION}.${CUSP_PATCH_VERSION}") + +# Check for required components +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Cusp REQUIRED_VARS CUSP_INCLUDE_DIR VERSION_VAR CUSP_VERSION) + +set(CUSP_INCLUDE_DIRS ${CUSP_INCLUDE_DIR}) +mark_as_advanced(CUSP_INCLUDE_DIR) \ No newline at end of file diff --git a/resources/cmake/FindThrust.cmake b/resources/cmake/FindThrust.cmake index 9ad65b7e8..8f811bda3 100644 --- a/resources/cmake/FindThrust.cmake +++ b/resources/cmake/FindThrust.cmake @@ -1,53 +1,52 @@ # # FindThrust # -# This module finds the Thrust header files and extrats their version. It +# This module finds the Thrust header files and extracts their version. It # sets the following variables. # # THRUST_INCLUDE_DIR - Include directory for thrust header files. (All header # files will actually be in the thrust subdirectory.) # THRUST_VERSION - Version of thrust in the form "major.minor.patch". # +# Thrust_FOUND - Indicates whether Thrust has been found +# -find_path( THRUST_INCLUDE_DIR - HINTS - /usr/include/cuda - /usr/local/include - /usr/local/cuda/include - ${CUDA_INCLUDE_DIRS} - NAMES thrust/version.h - DOC "Thrust headers" - ) -if( THRUST_INCLUDE_DIR ) - list( REMOVE_DUPLICATES THRUST_INCLUDE_DIR ) -endif( THRUST_INCLUDE_DIR ) +find_path(THRUST_INCLUDE_DIR + HINTS + /usr/include/cuda + /usr/local/include + /usr/local/cuda/include + ${CUDA_INCLUDE_DIRS} + NAMES thrust/version.h + DOC "Thrust headers" +) +if(THRUST_INCLUDE_DIR) + list(REMOVE_DUPLICATES THRUST_INCLUDE_DIR) +endif(THRUST_INCLUDE_DIR) # Find thrust version -file( STRINGS ${THRUST_INCLUDE_DIR}/thrust/version.h - version - REGEX "#define THRUST_VERSION[ \t]+([0-9x]+)" - ) -string( REGEX REPLACE - "#define THRUST_VERSION[ \t]+" - "" - version - "${version}" - ) +file(STRINGS ${THRUST_INCLUDE_DIR}/thrust/version.h + version + REGEX "#define THRUST_VERSION[ \t]+([0-9x]+)" +) +string(REGEX REPLACE + "#define THRUST_VERSION[ \t]+" + "" + version + "${version}" +) -string( REGEX MATCH "^[0-9]" major ${version} ) -string( REGEX REPLACE "^${major}00" "" version "${version}" ) -string( REGEX MATCH "^[0-9]" minor ${version} ) -string( REGEX REPLACE "^${minor}0" "" version "${version}" ) -set( THRUST_VERSION "${major}.${minor}.${version}") -set( THRUST_MAJOR_VERSION "${major}") -set( THRUST_MINOR_VERSION "${minor}") +string(REGEX MATCH "^[0-9]" major ${version}) +string(REGEX REPLACE "^${major}00" "" version "${version}") +string(REGEX MATCH "^[0-9]" minor ${version}) +string(REGEX REPLACE "^${minor}0" "" version "${version}") +set(THRUST_VERSION "${major}.${minor}.${version}") +set(THRUST_MAJOR_VERSION "${major}") +set(THRUST_MINOR_VERSION "${minor}") # Check for required components -include( FindPackageHandleStandardArgs ) -find_package_handle_standard_args( Thrust - REQUIRED_VARS THRUST_INCLUDE_DIR - VERSION_VAR THRUST_VERSION - ) +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Thrust REQUIRED_VARS THRUST_INCLUDE_DIR VERSION_VAR THRUST_VERSION) set(THRUST_INCLUDE_DIRS ${THRUST_INCLUDE_DIR}) mark_as_advanced(THRUST_INCLUDE_DIR) \ No newline at end of file diff --git a/resources/cudaForStorm/CMakeAlignmentCheck.cpp b/resources/cudaForStorm/CMakeAlignmentCheck.cpp new file mode 100644 index 000000000..1dc9b470b --- /dev/null +++ b/resources/cudaForStorm/CMakeAlignmentCheck.cpp @@ -0,0 +1,64 @@ +/* + * This is component of StoRM - Cuda Plugin to check whether type alignment matches the assumptions done while optimizing the code. + */ + #include + #include + #include + + #define CONTAINER_SIZE 100ul + + template + int checkForAlignmentOfPairTypes(size_t containerSize, IndexType const firstValue, ValueType const secondValue) { + std::vector>* myVector = new std::vector>(); + for (size_t i = 0; i < containerSize; ++i) { + myVector->push_back(std::make_pair(firstValue, secondValue)); + } + size_t myVectorSize = myVector->size(); + IndexType* firstStart = &(myVector->at(0).first); + IndexType* firstEnd = &(myVector->at(myVectorSize - 1).first); + ValueType* secondStart = &(myVector->at(0).second); + ValueType* secondEnd = &(myVector->at(myVectorSize - 1).second); + size_t startOffset = reinterpret_cast(secondStart) - reinterpret_cast(firstStart); + size_t endOffset = reinterpret_cast(secondEnd) - reinterpret_cast(firstEnd); + size_t firstOffset = reinterpret_cast(firstEnd) - reinterpret_cast(firstStart); + size_t secondOffset = reinterpret_cast(secondEnd) - reinterpret_cast(secondStart); + + delete myVector; + myVector = nullptr; + + if (myVectorSize != containerSize) { + return -2; + } + + // Check for alignment: + // Requirement is that the pairs are aligned like: first, second, first, second, first, second, ... + if (sizeof(IndexType) != sizeof(ValueType)) { + return -3; + } + if (startOffset != sizeof(IndexType)) { + return -4; + } + if (endOffset != sizeof(IndexType)) { + return -5; + } + if (firstOffset != ((sizeof(IndexType) + sizeof(ValueType)) * (myVectorSize - 1))) { + return -6; + } + if (secondOffset != ((sizeof(IndexType) + sizeof(ValueType)) * (myVectorSize - 1))) { + return -7; + } + + return 0; + } + + + int main(int argc, char* argv[]) { + int result = 0; + + result = checkForAlignmentOfPairTypes(CONTAINER_SIZE, 42, 3.14); + if (result != 0) { + return result; + } + + return 0; + } \ No newline at end of file diff --git a/resources/cudaForStorm/CMakeLists.txt b/resources/cudaForStorm/CMakeLists.txt index acedd56d1..6b0496d51 100644 --- a/resources/cudaForStorm/CMakeLists.txt +++ b/resources/cudaForStorm/CMakeLists.txt @@ -35,9 +35,12 @@ set(STORM_LIB_INSTALL_DIR "${PROJECT_SOURCE_DIR}/../../build/cudaForStorm" CACHE # Add the resources/cmake folder to Module Search Path for FindTBB.cmake set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/../cmake/") +# Set the hint for CUSP +set(CUSP_HINT "${PROJECT_SOURCE_DIR}/../3rdparty/cusplibrary") + find_package(CUDA REQUIRED) +find_package(Cusp REQUIRED) find_package(Doxygen REQUIRED) -find_package(Threads REQUIRED) find_package(Thrust REQUIRED) # If the DEBUG option was turned on, we will target a debug version and a release version otherwise @@ -115,10 +118,23 @@ endif() ## ############################################################# +# Test for type alignment +try_run(STORM_CUDA_RUN_RESULT_TYPEALIGNMENT STORM_CUDA_COMPILE_RESULT_TYPEALIGNMENT + ${PROJECT_BINARY_DIR} "${PROJECT_SOURCE_DIR}/CMakeAlignmentCheck.cpp" + COMPILE_OUTPUT_VARIABLE OUTPUT_TEST_VAR +) +if(NOT STORM_CUDA_COMPILE_RESULT_TYPEALIGNMENT) + message(FATAL_ERROR "StoRM (CudaPlugin) - Could not test type alignment, there was an Error while compiling the file ${PROJECT_SOURCE_DIR}/CMakeAlignmentCheck.cpp: ${OUTPUT_TEST_VAR}") +elseif(STORM_CUDA_RUN_RESULT_TYPEALIGNMENT EQUAL 0) + message(STATUS "StoRM (CudaPlugin) - Result of Type Alignment Check: OK.") +else() + message(FATAL_ERROR "StoRM (CudaPlugin) - Result of Type Alignment Check: FAILED (Code ${STORM_CUDA_RUN_RESULT_TYPEALIGNMENT})") +endif() + # Configure a header file to pass some of the CMake settings to the source code configure_file ( - "${PROJECT_SOURCE_DIR}/../../storm-config.h.in" - "${PROJECT_BINARY_DIR}/include/storm-config.h" + "${PROJECT_SOURCE_DIR}/storm-cudaplugin-config.h.in" + "${PROJECT_BINARY_DIR}/include/storm-cudaplugin-config.h" ) # Add the binary dir include directory for storm-config.h include_directories("${PROJECT_BINARY_DIR}/include") @@ -161,22 +177,6 @@ endif(ADDITIONAL_LINK_DIRS) ############################################################# -############################################################################### -## # -## Executable Creation # -## # -## All link_directories() calls MUST be made before this point # -## # -############################################################################### -include (GenerateExportHeader) - -#add_library(cudaForStorm SHARED ${CUDAFORSTORM_HEADERS} ${CUDAFORSTORM_SOURCES}) -#GENERATE_EXPORT_HEADER( cudaForStorm -# BASE_NAME cudaForStorm -# EXPORT_MACRO_NAME cudaForStorm_EXPORT -# EXPORT_FILE_NAME include/cudaForStorm_Export.h -# STATIC_DEFINE cudaForStorm_BUILT_AS_STATIC -#) ############################################################# ## @@ -184,18 +184,6 @@ include (GenerateExportHeader) ## ############################################################# #set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --gpu-architecture sm_30) -cuda_add_library(cudaForStorm SHARED - ${CUDAFORSTORM_CUDA_SOURCES} ${CUDAFORSTORM_CUDA_HEADERS} - OPTIONS -DSTUFF="" -arch=sm_30 - RELEASE -DNDEBUG - DEBUG -g -DDEBUG -) -GENERATE_EXPORT_HEADER( cudaForStorm - BASE_NAME cudaForStorm - EXPORT_MACRO_NAME cudaForStorm_EXPORT - EXPORT_FILE_NAME include/cudaForStorm_Export.h - STATIC_DEFINE cudaForStorm_BUILT_AS_STATIC -) #target_link_libraries(cudaLibrary ${CUDA_cusparse_LIBRARY}) #ADD_DEPENDENCIES(cudaForStorm cudaLibrary) #target_link_libraries(cudaForStorm cudaLibrary) @@ -204,19 +192,52 @@ include_directories(${CUDA_INCLUDE_DIRS}) ############################################################# ## -## Threads +## CUSP ## ############################################################# -include_directories(${THREADS_INCLUDE_DIRS}) -target_link_libraries(cudaForStorm ${CMAKE_THREAD_LIBS_INIT}) +if(CUSP_FOUND) + include_directories(${CUSP_INCLUDE_DIR}) + cuda_include_directories(${CUSP_INCLUDE_DIR}) + message(STATUS "StoRM (CudaPlugin) - Found CUSP Version ${CUSP_VERSION} in location ${CUSP_INCLUDE_DIR}") +else() + message(FATAL_ERROR "StoRM (CudaPlugin) - Could not find CUSP!") +endif() ############################################################# ## ## Thrust ## ############################################################# -include_directories(${THRUST_INCLUDE_DIR}) -message(STATUS "StoRM (CudaPlugin) - Found Thrust Version ${THRUST_VERSION}") +if(THRUST_FOUND) + include_directories(${THRUST_INCLUDE_DIR}) + cuda_include_directories(${THRUST_INCLUDE_DIR}) + message(STATUS "StoRM (CudaPlugin) - Found Thrust Version ${THRUST_VERSION} in location ${THRUST_INCLUDE_DIR}") +else() + message(FATAL_ERROR "StoRM (CudaPlugin) - Could not find Thrust! Check your CUDA installation.") +endif() + +############################################################################### +## # +## Executable Creation # +## # +## All link_directories() calls AND include_directories() calls # +## MUST be made before this point # +## # +############################################################################### +include (GenerateExportHeader) + +cuda_add_library(cudaForStorm SHARED + ${CUDAFORSTORM_CUDA_SOURCES} ${CUDAFORSTORM_CUDA_HEADERS} + OPTIONS -DSTUFF="" -arch=sm_30 + RELEASE -DNDEBUG + DEBUG -g -DDEBUG +) +GENERATE_EXPORT_HEADER( cudaForStorm + BASE_NAME cudaForStorm + EXPORT_MACRO_NAME cudaForStorm_EXPORT + EXPORT_FILE_NAME include/cudaForStorm_Export.h + STATIC_DEFINE cudaForStorm_BUILT_AS_STATIC +) if (MSVC) # Add the DebugHelper DLL diff --git a/resources/cudaForStorm/srcCuda/basicValueIteration.cu b/resources/cudaForStorm/srcCuda/basicValueIteration.cu index 80bb87e57..f23e7ed09 100644 --- a/resources/cudaForStorm/srcCuda/basicValueIteration.cu +++ b/resources/cudaForStorm/srcCuda/basicValueIteration.cu @@ -6,61 +6,54 @@ #include #include "cusparse_v2.h" +#include "cuspExtension.h" __global__ void cuda_kernel_basicValueIteration_mvReduce(int const * const A, int * const B) { *B = *A; } -void cudaForStormTestFunction(int a, int b) { - std::cout << "Cuda for Storm: a + b = " << (a+b) << std::endl; -} - -void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector const& matrixRowIndices, std::vector> columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices) { - if (sizeof(double) != sizeof(uint_fast64_t)) { - std::cout << "FATAL ERROR - Internal Sizes of Double and uint_fast64_t do NOT match, CUDA acceleration not possible!" << std::endl; - return; - } - - uint_fast64_t* device_matrixRowIndices = nullptr; - uint_fast64_t* device_matrixColIndicesAndValues = nullptr; - double* device_x = nullptr; - double* device_b = nullptr; - double* device_multiplyResult = nullptr; - uint_fast64_t* device_nondeterministicChoiceIndices = nullptr; +template +void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector const& matrixRowIndices, std::vector> columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices) { + IndexType* device_matrixRowIndices = nullptr; + IndexType* device_matrixColIndicesAndValues = nullptr; + ValueType* device_x = nullptr; + ValueType* device_b = nullptr; + ValueType* device_multiplyResult = nullptr; + IndexType* device_nondeterministicChoiceIndices = nullptr; cudaError_t cudaMallocResult; - cudaMallocResult = cudaMalloc(&device_matrixRowIndices, matrixRowIndices.size()); + cudaMallocResult = cudaMalloc(&device_matrixRowIndices, matrixRowIndices.size()); if (cudaMallocResult != cudaSuccess) { std::cout << "Could not allocate memory for Matrix Row Indices, Error Code " << cudaMallocResult << "." << std::endl; goto cleanup; } - cudaMallocResult = cudaMalloc(&device_matrixColIndicesAndValues, columnIndicesAndValues.size() * 2); + cudaMallocResult = cudaMalloc(&device_matrixColIndicesAndValues, columnIndicesAndValues.size() * 2); if (cudaMallocResult != cudaSuccess) { std::cout << "Could not allocate memory for Matrix Column Indices and Values, Error Code " << cudaMallocResult << "." << std::endl; goto cleanup; } - cudaMallocResult = cudaMalloc(&device_x, x.size()); + cudaMallocResult = cudaMalloc(&device_x, x.size()); if (cudaMallocResult != cudaSuccess) { std::cout << "Could not allocate memory for Vector x, Error Code " << cudaMallocResult << "." << std::endl; goto cleanup; } - cudaMallocResult = cudaMalloc(&device_b, b.size()); + cudaMallocResult = cudaMalloc(&device_b, b.size()); if (cudaMallocResult != cudaSuccess) { std::cout << "Could not allocate memory for Vector b, Error Code " << cudaMallocResult << "." << std::endl; goto cleanup; } - cudaMallocResult = cudaMalloc(&device_multiplyResult, b.size()); + cudaMallocResult = cudaMalloc(&device_multiplyResult, b.size()); if (cudaMallocResult != cudaSuccess) { std::cout << "Could not allocate memory for Vector multiplyResult, Error Code " << cudaMallocResult << "." << std::endl; goto cleanup; } - cudaMallocResult = cudaMalloc(&device_nondeterministicChoiceIndices, nondeterministicChoiceIndices.size()); + cudaMallocResult = cudaMalloc(&device_nondeterministicChoiceIndices, nondeterministicChoiceIndices.size()); if (cudaMallocResult != cudaSuccess) { std::cout << "Could not allocate memory for Nondeterministic Choice Indices, Error Code " << cudaMallocResult << "." << std::endl; goto cleanup; @@ -69,31 +62,31 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::ve // Memory allocated, copy data to device cudaError_t cudaCopyResult; - cudaCopyResult = cudaMemcpy(device_matrixRowIndices, matrixRowIndices.data(), sizeof(uint_fast64_t) * matrixRowIndices.size(), cudaMemcpyHostToDevice); + cudaCopyResult = cudaMemcpy(device_matrixRowIndices, matrixRowIndices.data(), sizeof(IndexType) * matrixRowIndices.size(), cudaMemcpyHostToDevice); if (cudaCopyResult != cudaSuccess) { std::cout << "Could not copy data for Matrix Row Indices, Error Code " << cudaCopyResult << std::endl; goto cleanup; } - cudaCopyResult = cudaMemcpy(device_matrixColIndicesAndValues, columnIndicesAndValues.data(), (sizeof(uint_fast64_t) * columnIndicesAndValues.size()) + (sizeof(double) * columnIndicesAndValues.size()), cudaMemcpyHostToDevice); + cudaCopyResult = cudaMemcpy(device_matrixColIndicesAndValues, columnIndicesAndValues.data(), (sizeof(IndexType) * columnIndicesAndValues.size()) + (sizeof(ValueType) * columnIndicesAndValues.size()), cudaMemcpyHostToDevice); if (cudaCopyResult != cudaSuccess) { std::cout << "Could not copy data for Matrix Column Indices and Values, Error Code " << cudaCopyResult << std::endl; goto cleanup; } - cudaCopyResult = cudaMemcpy(device_x, x.data(), sizeof(double) * x.size(), cudaMemcpyHostToDevice); + cudaCopyResult = cudaMemcpy(device_x, x.data(), sizeof(ValueType) * x.size(), cudaMemcpyHostToDevice); if (cudaCopyResult != cudaSuccess) { std::cout << "Could not copy data for Vector x, Error Code " << cudaCopyResult << std::endl; goto cleanup; } - cudaCopyResult = cudaMemcpy(device_b, b.data(), sizeof(double) * b.size(), cudaMemcpyHostToDevice); + cudaCopyResult = cudaMemcpy(device_b, b.data(), sizeof(ValueType) * b.size(), cudaMemcpyHostToDevice); if (cudaCopyResult != cudaSuccess) { std::cout << "Could not copy data for Vector b, Error Code " << cudaCopyResult << std::endl; goto cleanup; } - cudaCopyResult = cudaMemcpy(device_nondeterministicChoiceIndices, nondeterministicChoiceIndices.data(), sizeof(uint_fast64_t) * nondeterministicChoiceIndices.size(), cudaMemcpyHostToDevice); + cudaCopyResult = cudaMemcpy(device_nondeterministicChoiceIndices, nondeterministicChoiceIndices.data(), sizeof(IndexType) * nondeterministicChoiceIndices.size(), cudaMemcpyHostToDevice); if (cudaCopyResult != cudaSuccess) { std::cout << "Could not copy data for Vector b, Error Code " << cudaCopyResult << std::endl; goto cleanup; @@ -101,6 +94,7 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::ve // Data is on device, start Kernel + // All code related to freeing memory and clearing up the device cleanup: if (device_matrixRowIndices != nullptr) { @@ -148,35 +142,14 @@ cleanup: } /* -void kernelSwitchTest(size_t N) { - int* deviceIntA; - int* deviceIntB; + * Declare and implement all exported functions for these Kernels here + * + */ - if (cudaMalloc((void**)&deviceIntA, sizeof(int)) != cudaSuccess) { - std::cout << "Error in cudaMalloc while allocating " << sizeof(int) << " Bytes!" << std::endl; - return; - } - if (cudaMalloc((void**)&deviceIntB, sizeof(int)) != cudaSuccess) { - std::cout << "Error in cudaMalloc while allocating " << sizeof(int) << " Bytes!" << std::endl; - return; - } - - // Allocate space on the device - auto start_time = std::chrono::high_resolution_clock::now(); - for (int i = 0; i < N; ++i) { - cuda_kernel_kernelSwitchTest<<<1,1>>>(deviceIntA, deviceIntB); - } - auto end_time = std::chrono::high_resolution_clock::now(); - std::cout << "Switching the Kernel " << N << " times took " << std::chrono::duration_cast(end_time - start_time).count() << "micros" << std::endl; - std::cout << "Resulting in " << (std::chrono::duration_cast(end_time - start_time).count() / ((double)(N))) << "Microseconds per Kernel Switch" << std::endl; +void cudaForStormTestFunction(int a, int b) { + std::cout << "Cuda for Storm: a + b = " << (a+b) << std::endl; +} - // Free memory on device - if (cudaFree(deviceIntA) != cudaSuccess) { - std::cout << "Error in cudaFree!" << std::endl; - return; - } - if (cudaFree(deviceIntB) != cudaSuccess) { - std::cout << "Error in cudaFree!" << std::endl; - return; - } -}*/ \ No newline at end of file +void basicValueIteration_mvReduce_uint64_double(uint_fast64_t const maxIterationCount, std::vector const& matrixRowIndices, std::vector> columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices) { + basicValueIteration_mvReduce(maxIterationCount, matrixRowIndices, columnIndicesAndValues, x, b, nondeterministicChoiceIndices); +} \ No newline at end of file diff --git a/resources/cudaForStorm/srcCuda/basicValueIteration.h b/resources/cudaForStorm/srcCuda/basicValueIteration.h index fc5a8a322..1316e5014 100644 --- a/resources/cudaForStorm/srcCuda/basicValueIteration.h +++ b/resources/cudaForStorm/srcCuda/basicValueIteration.h @@ -6,4 +6,4 @@ #include "cudaForStorm_Export.h" cudaForStorm_EXPORT void cudaForStormTestFunction(int a, int b); -cudaForStorm_EXPORT void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector const& matrixRowIndices, std::vector> columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices); \ No newline at end of file +cudaForStorm_EXPORT void basicValueIteration_mvReduce_uint64_double(uint_fast64_t const maxIterationCount, std::vector const& matrixRowIndices, std::vector> columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices); \ No newline at end of file diff --git a/resources/cudaForStorm/srcCuda/cuspExtension.h b/resources/cudaForStorm/srcCuda/cuspExtension.h new file mode 100644 index 000000000..238b3aa36 --- /dev/null +++ b/resources/cudaForStorm/srcCuda/cuspExtension.h @@ -0,0 +1,83 @@ +/* + * This is an extension of the original CUSP csr_vector.h SPMV implementation. + * It is based on the Code and incorporates changes as to cope with the details + * of the StoRM code. + * As this is mostly copy & past, the original license still applies. + */ + +/* + * Copyright 2008-2009 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#pragma once + +#include + +namespace cusp +{ +namespace detail +{ +namespace device +{ + +template +void __storm_cuda_spmv_csr_vector(const IndexType num_rows, const IndexType * matrixRowIndices, const IndexType * matrixColumnIndices, const ValueType * matrixValues, const ValueType* x, ValueType* y) +{ + const size_t THREADS_PER_BLOCK = 128; + const size_t VECTORS_PER_BLOCK = THREADS_PER_BLOCK / THREADS_PER_VECTOR; + + const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(spmv_csr_vector_kernel, THREADS_PER_BLOCK, (size_t) 0); + const size_t NUM_BLOCKS = std::min(MAX_BLOCKS, DIVIDE_INTO(num_rows, VECTORS_PER_BLOCK)); + + if (UseCache) + bind_x(x); + + spmv_csr_vector_kernel <<>> + (num_rows, matrixRowIndices, matrixColumnIndices, matrixValues, x, y); + + if (UseCache) + unbind_x(x); +} + +template +void storm_cuda_spmv_csr_vector(const IndexType num_rows, const IndexType num_entries, const IndexType * matrixRowIndices, const IndexType * matrixColumnIndices, const ValueType * matrixValues, const ValueType* x, ValueType* y) +{ + const IndexType nnz_per_row = num_entries / num_rows; + + if (nnz_per_row <= 2) { __storm_cuda_spmv_csr_vector(num_rows, matrixRowIndices, matrixColumnIndices, matrixValues, x, y); return; } + if (nnz_per_row <= 4) { __storm_cuda_spmv_csr_vector(num_rows, matrixRowIndices, matrixColumnIndices, matrixValues, x, y); return; } + if (nnz_per_row <= 8) { __storm_cuda_spmv_csr_vector(num_rows, matrixRowIndices, matrixColumnIndices, matrixValues, x, y); return; } + if (nnz_per_row <= 16) { __storm_cuda_spmv_csr_vector(num_rows, matrixRowIndices, matrixColumnIndices, matrixValues, x, y); return; } + + __storm_cuda_spmv_csr_vector(num_rows, matrixRowIndices, matrixColumnIndices, matrixValues, x, y); +} + +template +void storm_cuda_spmv_csr_vector_tex(const IndexType num_rows, const IndexType num_entries, const IndexType * matrixRowIndices, const IndexType * matrixColumnIndices, const ValueType * matrixValues, const ValueType* x, ValueType* y) +{ + const IndexType nnz_per_row = num_entries / num_rows; + + if (nnz_per_row <= 2) { __storm_cuda_spmv_csr_vector(num_rows, matrixRowIndices, matrixColumnIndices, matrixValues, x, y); return; } + if (nnz_per_row <= 4) { __storm_cuda_spmv_csr_vector(num_rows, matrixRowIndices, matrixColumnIndices, matrixValues, x, y); return; } + if (nnz_per_row <= 8) { __storm_cuda_spmv_csr_vector(num_rows, matrixRowIndices, matrixColumnIndices, matrixValues, x, y); return; } + if (nnz_per_row <= 16) { __storm_cuda_spmv_csr_vector(num_rows, matrixRowIndices, matrixColumnIndices, matrixValues, x, y); return; } + + __storm_cuda_spmv_csr_vector(num_rows, matrixRowIndices, matrixColumnIndices, matrixValues, x, y); +} + +} // end namespace device +} // end namespace detail +} // end namespace cusp \ No newline at end of file diff --git a/resources/cudaForStorm/storm-cudaplugin-config.h.in b/resources/cudaForStorm/storm-cudaplugin-config.h.in new file mode 100644 index 000000000..d59532a6c --- /dev/null +++ b/resources/cudaForStorm/storm-cudaplugin-config.h.in @@ -0,0 +1,13 @@ +/* + * StoRM - Build-in Options + * + * This file is parsed by CMake during makefile generation + * It contains information such as the base path to the test/example data + */ + +#ifndef STORM_CUDAPLUGIN_GENERATED_STORMCONFIG_H_ +#define STORM_CUDAPLUGIN_GENERATED_STORMCONFIG_H_ + + + +#endif // STORM_CUDAPLUGIN_GENERATED_STORMCONFIG_H_