2 changed files with 126 additions and 0 deletions
			
			
		| @ -0,0 +1,73 @@ | |||
| #include "storm/utility/string.h"
 | |||
| #include <vector>
 | |||
| #include <boost/algorithm/string/join.hpp>
 | |||
| 
 | |||
| namespace storm { | |||
|     namespace utility { | |||
|         namespace string { | |||
| 
 | |||
|             SimilarStrings::SimilarStrings(std::string reference, double similarityFactor, bool caseSensitive) : reference(reference), similarityFactor(similarityFactor), caseSensitive(caseSensitive), cmp([](std::pair<uint64_t, std::string> const& lhs, std::pair<uint64_t, std::string> const& rhs) { return lhs.first > rhs.first; }), distances(cmp) { | |||
|                 // intentionally left empty.
 | |||
|             } | |||
|              | |||
|             bool SimilarStrings::add(std::string const& string) { | |||
|                 double distance = levenshteinDistance(reference, string, caseSensitive); | |||
|                 if (distance <= static_cast<double>(std::max(reference.size(), string.size())) * (1.0 - similarityFactor)) { | |||
|                     distances.emplace(storm::utility::string::levenshteinDistance(reference, string, caseSensitive), string); | |||
|                     return true; | |||
|                 } | |||
|                 return false; | |||
|             } | |||
|              | |||
|             std::vector<std::string> SimilarStrings::toList() const { | |||
|                 auto distancesCopy = distances; | |||
|                 std::vector<std::string> result; | |||
|                 while (!distancesCopy.empty()) { | |||
|                     result.push_back(distancesCopy.top().second); | |||
|                     distancesCopy.pop(); | |||
|                 } | |||
|                 return result; | |||
|             } | |||
|              | |||
|             std::string SimilarStrings::toDidYouMeanString() const { | |||
|                 uint64_t size = distances.size(); | |||
|                 std::string result = boost::algorithm::join(toList(), ", "); | |||
|                 if (size == 0) { | |||
|                     return ""; | |||
|                 } else if (size == 1) { | |||
|                     return "Did you mean " + result + "?"; | |||
|                 } else { | |||
|                     return "Did you mean any of [" + result + "]?"; | |||
|                 } | |||
|             } | |||
|              | |||
|              | |||
|             uint64_t levenshteinDistance(std::string const& lhs, std::string const& rhs, bool caseSensitive) { | |||
|                 std::vector<std::vector<uint64_t>> d(lhs.size() + 1, std::vector<uint64_t>(rhs.size() + 1, 0ull)); | |||
|                 for (uint64_t row = 1; row < d.size(); ++row) { | |||
|                     d[row].front() = row; | |||
|                 } | |||
|                 for (uint64_t col = 1; col < d.front().size(); ++col) { | |||
|                     d.front()[col] = col; | |||
|                 } | |||
|                  | |||
|                 for (uint64_t row = 1; row < d.size(); ++row) { | |||
|                     for (uint64_t col = 1; col < d[row].size(); ++col) { | |||
|                         uint64_t cost = 1; | |||
|                         if (caseSensitive) { | |||
|                             if (tolower(lhs[row-1]) == tolower(rhs[col-1])) { | |||
|                                 cost = 0; | |||
|                             } | |||
|                         } else { | |||
|                             if (lhs[row-1] == rhs[col-1]) { | |||
|                                 cost = 0; | |||
|                             } | |||
|                         } | |||
|                         d[row][col] = std::min( { d[row-1][col] + 1, d[row][col - 1] + 1, d[row-1][col-1] + cost } ); | |||
|                     } | |||
|                 } | |||
|                 return d.back().back(); | |||
|             } | |||
|         } | |||
|     } | |||
| } | |||
| @ -0,0 +1,53 @@ | |||
| #pragma once | |||
| 
 | |||
| #include <string> | |||
| #include <functional> | |||
| #include <queue> | |||
| 
 | |||
| namespace storm { | |||
|     namespace utility { | |||
|         namespace string { | |||
|              | |||
|             class SimilarStrings { | |||
|             public: | |||
|                 /*! | |||
|                  * Gathers strings that are similar to the given reference string | |||
|                  * @param reference | |||
|                  * @param similarityFactor controls how similar the strings need to be (0 means any string is similar, 1 means only the reference string is similar) | |||
|                  * @param caseSensitive if false, lower/upper case is ignored | |||
|                  */ | |||
|                 SimilarStrings(std::string reference, double similarityFactor = 0.6, bool caseSensitive = true); | |||
|                  | |||
|                 /*! | |||
|                  * Adds the given string to the set of similar strings (if it is similar) | |||
|                  * @return true, if the given string is considered similar. | |||
|                  */ | |||
|                 bool add(std::string const& string); | |||
|                  | |||
|                 /*! | |||
|                  * Gets a list of all added strings that are similar to the reference string. | |||
|                  * Erases all strings gathered so far. | |||
|                  */ | |||
|                 std::vector<std::string> toList() const; | |||
|                  | |||
|                 /*! | |||
|                  * Returns a "Did you mean abc?" string | |||
|                  * @return | |||
|                  */ | |||
|                 std::string toDidYouMeanString() const; | |||
| 
 | |||
|             private: | |||
|                 std::string reference; | |||
|                 double similarityFactor; | |||
|                 bool caseSensitive; | |||
|                 std::function<bool (std::pair<uint64_t, std::string> const&, std::pair<uint64_t, std::string> const&)> cmp; | |||
|                 std::priority_queue<std::pair<uint64_t, std::string>, std::vector<std::pair<uint64_t, std::string>>, decltype(cmp)> distances; | |||
|             }; | |||
|              | |||
|             /*! | |||
|              * Levenstein distance to find similar strings | |||
|              */ | |||
|             uint64_t levenshteinDistance(std::string const& lhs, std::string const& rhs, bool caseSensitive = true); | |||
|         } | |||
|     } | |||
| } | |||
						Write
						Preview
					
					
					Loading…
					
					Cancel
						Save
					
		Reference in new issue