2 changed files with 126 additions and 0 deletions
			
			
		@ -0,0 +1,73 @@ | 
				
			|||
#include "storm/utility/string.h"
 | 
				
			|||
#include <vector>
 | 
				
			|||
#include <boost/algorithm/string/join.hpp>
 | 
				
			|||
 | 
				
			|||
namespace storm { | 
				
			|||
    namespace utility { | 
				
			|||
        namespace string { | 
				
			|||
 | 
				
			|||
            SimilarStrings::SimilarStrings(std::string reference, double similarityFactor, bool caseSensitive) : reference(reference), similarityFactor(similarityFactor), caseSensitive(caseSensitive), cmp([](std::pair<uint64_t, std::string> const& lhs, std::pair<uint64_t, std::string> const& rhs) { return lhs.first > rhs.first; }), distances(cmp) { | 
				
			|||
                // intentionally left empty.
 | 
				
			|||
            } | 
				
			|||
             | 
				
			|||
            bool SimilarStrings::add(std::string const& string) { | 
				
			|||
                double distance = levenshteinDistance(reference, string, caseSensitive); | 
				
			|||
                if (distance <= static_cast<double>(std::max(reference.size(), string.size())) * (1.0 - similarityFactor)) { | 
				
			|||
                    distances.emplace(storm::utility::string::levenshteinDistance(reference, string, caseSensitive), string); | 
				
			|||
                    return true; | 
				
			|||
                } | 
				
			|||
                return false; | 
				
			|||
            } | 
				
			|||
             | 
				
			|||
            std::vector<std::string> SimilarStrings::toList() const { | 
				
			|||
                auto distancesCopy = distances; | 
				
			|||
                std::vector<std::string> result; | 
				
			|||
                while (!distancesCopy.empty()) { | 
				
			|||
                    result.push_back(distancesCopy.top().second); | 
				
			|||
                    distancesCopy.pop(); | 
				
			|||
                } | 
				
			|||
                return result; | 
				
			|||
            } | 
				
			|||
             | 
				
			|||
            std::string SimilarStrings::toDidYouMeanString() const { | 
				
			|||
                uint64_t size = distances.size(); | 
				
			|||
                std::string result = boost::algorithm::join(toList(), ", "); | 
				
			|||
                if (size == 0) { | 
				
			|||
                    return ""; | 
				
			|||
                } else if (size == 1) { | 
				
			|||
                    return "Did you mean " + result + "?"; | 
				
			|||
                } else { | 
				
			|||
                    return "Did you mean any of [" + result + "]?"; | 
				
			|||
                } | 
				
			|||
            } | 
				
			|||
             | 
				
			|||
             | 
				
			|||
            uint64_t levenshteinDistance(std::string const& lhs, std::string const& rhs, bool caseSensitive) { | 
				
			|||
                std::vector<std::vector<uint64_t>> d(lhs.size() + 1, std::vector<uint64_t>(rhs.size() + 1, 0ull)); | 
				
			|||
                for (uint64_t row = 1; row < d.size(); ++row) { | 
				
			|||
                    d[row].front() = row; | 
				
			|||
                } | 
				
			|||
                for (uint64_t col = 1; col < d.front().size(); ++col) { | 
				
			|||
                    d.front()[col] = col; | 
				
			|||
                } | 
				
			|||
                 | 
				
			|||
                for (uint64_t row = 1; row < d.size(); ++row) { | 
				
			|||
                    for (uint64_t col = 1; col < d[row].size(); ++col) { | 
				
			|||
                        uint64_t cost = 1; | 
				
			|||
                        if (caseSensitive) { | 
				
			|||
                            if (tolower(lhs[row-1]) == tolower(rhs[col-1])) { | 
				
			|||
                                cost = 0; | 
				
			|||
                            } | 
				
			|||
                        } else { | 
				
			|||
                            if (lhs[row-1] == rhs[col-1]) { | 
				
			|||
                                cost = 0; | 
				
			|||
                            } | 
				
			|||
                        } | 
				
			|||
                        d[row][col] = std::min( { d[row-1][col] + 1, d[row][col - 1] + 1, d[row-1][col-1] + cost } ); | 
				
			|||
                    } | 
				
			|||
                } | 
				
			|||
                return d.back().back(); | 
				
			|||
            } | 
				
			|||
        } | 
				
			|||
    } | 
				
			|||
} | 
				
			|||
@ -0,0 +1,53 @@ | 
				
			|||
#pragma once | 
				
			|||
 | 
				
			|||
#include <string> | 
				
			|||
#include <functional> | 
				
			|||
#include <queue> | 
				
			|||
 | 
				
			|||
namespace storm { | 
				
			|||
    namespace utility { | 
				
			|||
        namespace string { | 
				
			|||
             | 
				
			|||
            class SimilarStrings { | 
				
			|||
            public: | 
				
			|||
                /*! | 
				
			|||
                 * Gathers strings that are similar to the given reference string | 
				
			|||
                 * @param reference | 
				
			|||
                 * @param similarityFactor controls how similar the strings need to be (0 means any string is similar, 1 means only the reference string is similar) | 
				
			|||
                 * @param caseSensitive if false, lower/upper case is ignored | 
				
			|||
                 */ | 
				
			|||
                SimilarStrings(std::string reference, double similarityFactor = 0.6, bool caseSensitive = true); | 
				
			|||
                 | 
				
			|||
                /*! | 
				
			|||
                 * Adds the given string to the set of similar strings (if it is similar) | 
				
			|||
                 * @return true, if the given string is considered similar. | 
				
			|||
                 */ | 
				
			|||
                bool add(std::string const& string); | 
				
			|||
                 | 
				
			|||
                /*! | 
				
			|||
                 * Gets a list of all added strings that are similar to the reference string. | 
				
			|||
                 * Erases all strings gathered so far. | 
				
			|||
                 */ | 
				
			|||
                std::vector<std::string> toList() const; | 
				
			|||
                 | 
				
			|||
                /*! | 
				
			|||
                 * Returns a "Did you mean abc?" string | 
				
			|||
                 * @return | 
				
			|||
                 */ | 
				
			|||
                std::string toDidYouMeanString() const; | 
				
			|||
 | 
				
			|||
            private: | 
				
			|||
                std::string reference; | 
				
			|||
                double similarityFactor; | 
				
			|||
                bool caseSensitive; | 
				
			|||
                std::function<bool (std::pair<uint64_t, std::string> const&, std::pair<uint64_t, std::string> const&)> cmp; | 
				
			|||
                std::priority_queue<std::pair<uint64_t, std::string>, std::vector<std::pair<uint64_t, std::string>>, decltype(cmp)> distances; | 
				
			|||
            }; | 
				
			|||
             | 
				
			|||
            /*! | 
				
			|||
             * Levenstein distance to find similar strings | 
				
			|||
             */ | 
				
			|||
            uint64_t levenshteinDistance(std::string const& lhs, std::string const& rhs, bool caseSensitive = true); | 
				
			|||
        } | 
				
			|||
    } | 
				
			|||
} | 
				
			|||
						Write
						Preview
					
					
					Loading…
					
					Cancel
						Save
					
		Reference in new issue