有路径的地图如何将tham与给定路径进行比较?
我们将boost路径映射到字符串对,如name:location(绝对位置路径a la usr/myfolder/
)。我们给了一些位置la usr/myfolder/mysubfolder/myfile
。如何找到最适合给定网址的地图位置?有路径的地图如何将tham与给定路径进行比较?
例子,我们有一个地图就像它,如果我们需要,我们可以诉诸:
service1:myfolder/
service2:myfolder/mysubfolder/
service3:myfolder/myothersubfolder/
service4:myfolder/mysubfolder/myfile
我们给定值myfolder/mysubfolder/myfile/blablabla/
(路径)。 我们想知道它与我们地图中哪个项目最相关。 搜索结果为service4
为最相关内容的地图项。
那么如何通过给定的字符串值来查找它最相关的地图元素呢?
所以original question是关于一般字符串的情况下,但我有一些重新配置,所以没有,我只是在升压路径上工作。
您可以使用Levenshtein distance
编辑
因为我最终需要类似的东西我自己,这个问题仍然开放。这是我玩过的一些代码。既提高了字符串距离,也将Levenshtein算法应用于路径标记。
C++代码
/*
----- String based Levenshtein ----
Matching : this/is/a/strange/path
0 : this/is/a/strange/path
2 : this/is/a/strong/path
2 : that/is/a/strange/path
4 : is/this/a/strange/path
5 : this/is/a/strange
13 : this/is/a
15 : this/is
16 : that/was
18 : this
24 : completely/different/folder
----- Path based Levenshtein ----
Matching : this/is/a/strange/path
0 : this/is/a/strange/path
1 : this/is/a/strange
2 : this/is/a/strong/path
2 : that/is/a/strange/path
2 : this/is/a
2 : is/this/a/strange/path
3 : this/is
4 : this
7 : that/was
8 : completely/different/folder
*/
#include <string>
#include <vector>
#include <algorithm>
#include <stdio.h>
/// returns the smaller of two parameters
template< typename T >
T levmin(T v1, T v2)
{ return (v1 < v2) ? v1 : v2; }
/// Returns the Levenshtein distance between the specified strings
template < typename T, typename T_STR >
typename T_STR::size_type levstr(const T_STR &s1, const T_STR &s2)
{
typename T_STR::size_type l1 = s1.length(), l2 = s2.length();
std::vector< typename T_STR::size_type > d((l1 + 1) * (l2 + 1));
typename T_STR::size_type i, j;
for (i = 0; i <= l1; i++)
d[ i * l2 ] = i;
for (i = 0; i <= l2; i++)
d[ i ] = i;
for (i = 1; i <= l1; i++)
for (j = 1; j <= l2; j++)
d[ i * l2 + j ] = levmin(levmin(d[ (i - 1) * l2 + j ] + 1, d[ i * l2 + (j - 1) ] + 1),
d[ (i - 1) * l2 + (j - 1) ] + (s1[ i - 1 ] == s2[ j - 1 ] ? 0 : 1)
);
return d[ (l1 * l2) + l2 ];
}
/// Returns the Levenshtein distance between the specified split paths
template < typename T, typename T_STR, typename T_LST >
typename T_STR::size_type levpath(const T_LST &lst1, const T_LST &lst2)
{
typename T_STR::size_type l1 = lst1.size(), l2 = lst2.size();
std::vector< typename T_STR::size_type > d((l1 + 1) * (l2 + 1));
typename T_STR::size_type i, j;
for (i = 0; i <= l1; i++)
d[ i * l2 ] = i;
for (i = 0; i <= l2; i++)
d[ i ] = i;
for (i = 1; i <= l1; i++)
for (j = 1; j <= l2; j++)
d[ i * l2 + j ] = levmin(levmin(d[ (i - 1) * l2 + j ] + 1, d[ i * l2 + (j - 1) ] + 1),
d[ (i - 1) * l2 + (j - 1) ] + levstr< T, T_STR>(lst1[ i - 1 ], lst2[ j - 1 ])
);
return d[ (l1 * l2) + l2 ];
}
/// Generic split path function
/*
Returns a vector of path tokens
*/
template < typename T, typename T_STR, typename T_LST >
T_LST splitpath(const T_STR &s, const T sep)
{ T_LST lst;
typename T_STR::size_type i = 0, l = 0;
while(T_STR::npos != (i = s.find_first_of(sep, i)))
{ if (l < i)
lst.push_back(T_STR(s, l, i - l));
l = ++i;
} // end while
if (l < s.length())
lst.push_back(T_STR(s, l, s.length() - l));
return lst;
}
/// Generic join path function
/*
Returns a string of joined vector tokens
*/
template < typename T, typename T_STR, typename T_LST >
T_STR joinpath(const T_LST &p, const T sep)
{ T_STR s;
for (typename T_LST::const_iterator it = p.begin(); it != p.end(); it++)
{ if (s.length()) s += sep; s += *it; }
return s;
}
// String types
typedef char t_levchar;
typedef std::basic_string<t_levchar> t_levstr;
typedef std::vector<t_levstr> t_levpath;
typedef std::vector<t_levpath> t_levpathlist;
// Sort compare for split paths
template< typename T, typename T_STR, typename T_LST > struct levcmp
{ levcmp(const T_LST &p) { m_p = p; }
bool operator() (const T_LST &i, const T_LST &j)
{ return levpath< T, T_STR, T_LST >(i, m_p) < levpath< T, T_STR, T_LST >(j, m_p); }
T_LST m_p;
};
// Sort compare for strings
template< typename T, typename T_STR > struct levcmp_str
{ levcmp_str(const T_STR &s) { m_s = s; }
bool operator() (const T_STR &i, const T_STR &j)
{ return levstr< T, T_STR >(i, m_s) < levstr< T, T_STR >(j, m_s); }
T_STR m_s;
};
int main(int argc, char* argv[])
{
// Path to compare with
const t_levchar* compare_path = "this/is/a/strange/path";
// Paths to sort
const t_levchar* path_list[] =
{
"this/is/a/strong/path",
"that/is/a/strange/path",
"this/is/a/strange",
"this/is",
"this/is/a",
"this",
"this/is/a/strange/path",
"is/this/a/strange/path",
"that/was",
"completely/different/folder",
0
};
printf("\n----- String based Levenshtein ----\n"
"Matching : %s\n\n", compare_path);
// Create vector from paths
std::vector<t_levstr> paths;
for(int i = 0; path_list[ i ]; i++)
paths.push_back(path_list[ i ]);
// Sort the paths
std::sort(paths.begin(), paths.end(), levcmp_str< t_levchar, t_levstr >(compare_path));
// Show the result
for (std::vector<t_levstr>::iterator it = paths.begin(); it != paths.end(); it++)
printf("%d : %s\n",
(int)levstr< t_levchar, t_levstr >(*it, compare_path),
(const char*)it->c_str());
printf("\n----- Path based Levenshtein ----\n"
"Matching : %s\n\n", compare_path);
// Create vector from split paths
t_levpath splitcompare = splitpath< t_levchar, t_levstr, t_levpath >(compare_path, '/');
t_levpathlist splitpaths;
for (int i = 0; path_list[ i ]; i++)
splitpaths.push_back(splitpath< t_levchar, t_levstr, t_levpath >(path_list[ i ], '/'));
// Sort the paths
std::sort(splitpaths.begin(), splitpaths.end(),
levcmp< t_levchar, t_levstr, t_levpath >(splitcompare));
// Show the results
for (t_levpathlist::iterator it = splitpaths.begin(); it != splitpaths.end(); it++)
printf("%d : %s\n",
(int)levpath< t_levchar, t_levstr, t_levpath >(*it, splitcompare),
(const char*)joinpath< t_levchar, t_levstr, t_levpath >(*it, '/').c_str());
return 0;
}
Levenshtein似乎有点过分。或者,也许这个问题已经改变。 – 2012-02-14 22:22:15
你可能是对的,这个问题在我看来似乎暗示路径中可能没有完全匹配。 – bob2 2012-02-14 22:57:57
我没有考虑过。这是一个确定的可能性。或者也许他需要处理路径中的链接。我的回答并没有处理。 – 2012-02-14 23:04:25
我真的没有一个现成的C++的答案,但我最近做的在C#中类似的东西,并用以下想出了:通过
循环整个矢量,检查有趣的路径,看它是否以元素开始。 最长的这样的比赛是胜利者。这将是O(n)操作,具体取决于比较集中路径的数量。
我上面的改进版本变得有点不同了,因为我将要检查一些我之前已经检查过的条目。因此,我按照路径的长度降序对向量进行排序,这样我遇到的第一个匹配也是最好的(给我平均的O(n/2)操作,我认为),并且将结果存储到数组中一本字典,所以我不需要再强制搜索。
希望这会有所帮助!
如果你想更新你的答案,问题就改变了。 – 2012-02-14 22:23:00
//I don't know what path is, I'll pretend it's a std::string
//algorithm is the same, just functions change
const std::vector<boost::path>& services;
const std::string& findme = ???
std::vector<boost::path>::iterator result = services.end();
for(auto i = services.begin(); i != services.end(); ++i) {
//if this path is shorter than the best so far, skip it
if (result == services.end() || i->second.size()>result->second.size()) {
const size_t colonpos = i->second.find(':', 8); //find colon
//if this path is longer than findme, skip it
if (findme.size() >= i->second.size()-colonpos) {
//make sure they match, in _reverse_ order (faster in this case)
int o=i->second.size()-1;
for(; o>=colonpos; --o) {
if (i->second[o] != findme[o-colonpos])
break;
}
//if it was a match, this is the new best result
if (o < colonpos)
result = i;
}
}
}
//either result is services.end() meaning none matched
//or result is the _longest_ path that matched
显然,boost::path
不是std::string
,但可能有一个得到std::string
或类似物体,所以你只需要该成员添加到i->second
和result->second
当你说这是在地图中,你是指“std :: map”还是一个向量?他们排序? – 2012-02-14 22:24:07