From c5f3555932c540e3a98fd0ad2210cd5daf8ae8ad Mon Sep 17 00:00:00 2001 From: svkurowski Date: Fri, 28 Nov 2014 11:36:23 +0100 Subject: [PATCH] Move CUDA code into namespace Former-commit-id: 98d065ec2c9c07034ca7756ae5627708f163ab8d --- cuda/kernels/graph.cu | 71 ++++++++++++++++++++++--------------------- cuda/kernels/graph.h | 71 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 106 insertions(+), 36 deletions(-) diff --git a/cuda/kernels/graph.cu b/cuda/kernels/graph.cu index c3d708b14..fbc7c9a7d 100644 --- a/cuda/kernels/graph.cu +++ b/cuda/kernels/graph.cu @@ -1,5 +1,3 @@ -#include "cuda/kernels/graph.h" - #include const int N = 16; @@ -21,38 +19,41 @@ void hello(char *a, int *b) a[threadIdx.x] += b[threadIdx.x]; } -void helloWorldCuda() -{ - printf("CUDA TEST START\n"); - printf("Should print \"Hello World\"\n"); +namespace stormcuda { + namespace graph { + void helloWorld() { + printf("CUDA TEST START\n"); + printf("Should print \"Hello World\"\n"); - char a[N] = "Hello \0\0\0\0\0\0"; - int b[N] = {15, 10, 6, 0, -11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - char c[N] = "YELLO \0\0\0\0\0\0"; - - char *ad; - int *bd; - const int csize = N*sizeof(char); - const int isize = N*sizeof(int); - - printf("%s", a); - - cudaMalloc( (void**)&ad, csize ); - cudaMalloc( (void**)&bd, isize ); - cudaMemcpy( ad, a, csize, cudaMemcpyHostToDevice ); - cudaMemcpy( bd, b, isize, cudaMemcpyHostToDevice ); - - dim3 dimBlock( blocksize, 1 ); - dim3 dimGrid( 1, 1 ); - hello<<>>(ad, bd); - - gpuErrchk( cudaPeekAtLastError() ); - gpuErrchk( cudaDeviceSynchronize() ); - - cudaMemcpy( c, ad, csize, cudaMemcpyDeviceToHost ); - cudaFree( ad ); - cudaFree( bd ); - - printf("%s\n", c); - printf("CUDA TEST END\n"); + char a[N] = "Hello \0\0\0\0\0\0"; + int b[N] = {15, 10, 6, 0, -11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + char c[N] = "YELLO \0\0\0\0\0\0"; + + char *ad; + int *bd; + const int csize = N * sizeof(char); + const int isize = N * sizeof(int); + + printf("%s", a); + + cudaMalloc((void **) &ad, csize); + cudaMalloc((void **) &bd, isize); + cudaMemcpy(ad, a, csize, cudaMemcpyHostToDevice); + cudaMemcpy(bd, b, isize, cudaMemcpyHostToDevice); + + dim3 dimBlock(blocksize, 1); + dim3 dimGrid(1, 1); + hello << < dimGrid, dimBlock >> > (ad, bd); + + gpuErrchk(cudaPeekAtLastError()); + gpuErrchk(cudaDeviceSynchronize()); + + cudaMemcpy(c, ad, csize, cudaMemcpyDeviceToHost); + cudaFree(ad); + cudaFree(bd); + + printf("%s\n", c); + printf("CUDA TEST END\n"); + } + } } diff --git a/cuda/kernels/graph.h b/cuda/kernels/graph.h index 8f9cf8e8b..89b77d519 100644 --- a/cuda/kernels/graph.h +++ b/cuda/kernels/graph.h @@ -1,6 +1,75 @@ #ifndef CUDA_KERNELS_GRAPH_H #define CUDA_KERNELS_GRAPH_H -void helloWorldCuda(); +#include +#include + +#include "src/storage/sparse/StateType.h" +#include "src/models/AbstractDeterministicModel.h" +#include "src/models/AbstractNondeterministicModel.h" +#include "src/utility/constants.h" +#include "src/exceptions/InvalidArgumentException.h" + +namespace stormcuda { + namespace graph { + void helloWorld(); + template + storm::storage::BitVector performProbGreater0(storm::storage::SparseMatrix const& backwardTransitions, storm::storage::BitVector const& phiStates, storm::storage::BitVector const& psiStates, bool useStepBound = false, uint_fast64_t maximalSteps = 0) { + helloWorld(); + // Prepare the resulting bit vector. + uint_fast64_t numberOfStates = phiStates.size(); + storm::storage::BitVector statesWithProbabilityGreater0(numberOfStates); + + // Add all psi states as the already satisfy the condition. + statesWithProbabilityGreater0 |= psiStates; + + // Initialize the stack used for the DFS with the states. + std::vector stack(psiStates.begin(), psiStates.end()); + + // Initialize the stack for the step bound, if the number of steps is bounded. + std::vector stepStack; + std::vector remainingSteps; + if (useStepBound) { + stepStack.reserve(numberOfStates); + stepStack.insert(stepStack.begin(), psiStates.getNumberOfSetBits(), maximalSteps); + remainingSteps.resize(numberOfStates); + for (auto state : psiStates) { + remainingSteps[state] = maximalSteps; + } + } + + // Perform the actual DFS. + uint_fast64_t currentState, currentStepBound; + while (!stack.empty()) { + currentState = stack.back(); + stack.pop_back(); + + if (useStepBound) { + currentStepBound = stepStack.back(); + stepStack.pop_back(); + } + + for (typename storm::storage::SparseMatrix::const_iterator entryIt = backwardTransitions.begin(currentState), entryIte = backwardTransitions.end(currentState); entryIt != entryIte; ++entryIt) { + if (phiStates[entryIt->getColumn()] && (!statesWithProbabilityGreater0.get(entryIt->getColumn()) || (useStepBound && remainingSteps[entryIt->getColumn()] < currentStepBound - 1))) { + // If we don't have a bound on the number of steps to take, just add the state to the stack. + if (!useStepBound) { + statesWithProbabilityGreater0.set(entryIt->getColumn(), true); + stack.push_back(entryIt->getColumn()); + } else if (currentStepBound > 0) { + // If there is at least one more step to go, we need to push the state and the new number of steps. + remainingSteps[entryIt->getColumn()] = currentStepBound - 1; + statesWithProbabilityGreater0.set(entryIt->getColumn(), true); + stack.push_back(entryIt->getColumn()); + stepStack.push_back(currentStepBound - 1); + } + } + } + } + + // Return result. + return statesWithProbabilityGreater0; + } + } +} #endif /* CUDA_KERNELS_GRAPH_H */ \ No newline at end of file