Browse Source

Added string utility functions to find similar strings.

tempestpy_adaptions
Tim Quatmann 6 years ago
parent
commit
27c2a8ba95
  1. 73
      src/storm/utility/string.cpp
  2. 53
      src/storm/utility/string.h

73
src/storm/utility/string.cpp

@ -0,0 +1,73 @@
#include "storm/utility/string.h"
#include <vector>
#include <boost/algorithm/string/join.hpp>
namespace storm {
namespace utility {
namespace string {
SimilarStrings::SimilarStrings(std::string reference, double similarityFactor, bool caseSensitive) : reference(reference), similarityFactor(similarityFactor), caseSensitive(caseSensitive), cmp([](std::pair<uint64_t, std::string> const& lhs, std::pair<uint64_t, std::string> const& rhs) { return lhs.first > rhs.first; }), distances(cmp) {
// intentionally left empty.
}
bool SimilarStrings::add(std::string const& string) {
double distance = levenshteinDistance(reference, string, caseSensitive);
if (distance <= static_cast<double>(std::max(reference.size(), string.size())) * (1.0 - similarityFactor)) {
distances.emplace(storm::utility::string::levenshteinDistance(reference, string, caseSensitive), string);
return true;
}
return false;
}
std::vector<std::string> SimilarStrings::toList() const {
auto distancesCopy = distances;
std::vector<std::string> result;
while (!distancesCopy.empty()) {
result.push_back(distancesCopy.top().second);
distancesCopy.pop();
}
return result;
}
std::string SimilarStrings::toDidYouMeanString() const {
uint64_t size = distances.size();
std::string result = boost::algorithm::join(toList(), ", ");
if (size == 0) {
return "";
} else if (size == 1) {
return "Did you mean " + result + "?";
} else {
return "Did you mean any of [" + result + "]?";
}
}
uint64_t levenshteinDistance(std::string const& lhs, std::string const& rhs, bool caseSensitive) {
std::vector<std::vector<uint64_t>> d(lhs.size() + 1, std::vector<uint64_t>(rhs.size() + 1, 0ull));
for (uint64_t row = 1; row < d.size(); ++row) {
d[row].front() = row;
}
for (uint64_t col = 1; col < d.front().size(); ++col) {
d.front()[col] = col;
}
for (uint64_t row = 1; row < d.size(); ++row) {
for (uint64_t col = 1; col < d[row].size(); ++col) {
uint64_t cost = 1;
if (caseSensitive) {
if (tolower(lhs[row-1]) == tolower(rhs[col-1])) {
cost = 0;
}
} else {
if (lhs[row-1] == rhs[col-1]) {
cost = 0;
}
}
d[row][col] = std::min( { d[row-1][col] + 1, d[row][col - 1] + 1, d[row-1][col-1] + cost } );
}
}
return d.back().back();
}
}
}
}

53
src/storm/utility/string.h

@ -0,0 +1,53 @@
#pragma once
#include <string>
#include <functional>
#include <queue>
namespace storm {
namespace utility {
namespace string {
class SimilarStrings {
public:
/*!
* Gathers strings that are similar to the given reference string
* @param reference
* @param similarityFactor controls how similar the strings need to be (0 means any string is similar, 1 means only the reference string is similar)
* @param caseSensitive if false, lower/upper case is ignored
*/
SimilarStrings(std::string reference, double similarityFactor = 0.6, bool caseSensitive = true);
/*!
* Adds the given string to the set of similar strings (if it is similar)
* @return true, if the given string is considered similar.
*/
bool add(std::string const& string);
/*!
* Gets a list of all added strings that are similar to the reference string.
* Erases all strings gathered so far.
*/
std::vector<std::string> toList() const;
/*!
* Returns a "Did you mean abc?" string
* @return
*/
std::string toDidYouMeanString() const;
private:
std::string reference;
double similarityFactor;
bool caseSensitive;
std::function<bool (std::pair<uint64_t, std::string> const&, std::pair<uint64_t, std::string> const&)> cmp;
std::priority_queue<std::pair<uint64_t, std::string>, std::vector<std::pair<uint64_t, std::string>>, decltype(cmp)> distances;
};
/*!
* Levenstein distance to find similar strings
*/
uint64_t levenshteinDistance(std::string const& lhs, std::string const& rhs, bool caseSensitive = true);
}
}
}
|||||||
100:0
Loading…
Cancel
Save