`
jgsj
  • 浏览: 963738 次
文章分类
社区版块
存档分类
最新评论

Cracking the coding interview: 查找文中两个单词的距离

 
阅读更多

题目:You have a large text file containing words. Given any two words, find the shortest distance (in terms of number of words) between them in the file. If the operation will be repeated many times for the same file(but different pairs of words), can you optimize your solution?


解法一:只需要一次性查询的,我们可以利用两个下标,一个记录最后一次遇到第一个单词的下标,另外一个记录遇到另外一个单词的下标,然后每次比较两个单词的距离,记录当前最小的两个单词的距离,最后就得到最小的距离了。时间效率O(n);

程序如下:

int shortestWordsDistance(vector<string> &words, string &w1, string &w2)
{
	int lastWord1 = -1, lastWord2 = -1;
	int minDistance = INT_MAX;

	for (int i = 0; i < words.size(); i++)
	{
		if (words[i] == w1)
		{
			lastWord1 = i;
			if (lastWord2 != -1) minDistance = 
				min(minDistance, lastWord1 - lastWord2);
		}
		if (words[i] == w2)//不加else是因为当w1==w2的时候,可以计算出为零
		{
			lastWord2 = i;
			if (lastWord1 != -1) minDistance = 
				min(minDistance, lastWord2 - lastWord1);
		}
	}
	return minDistance;
}

解法二:这是个可以优化重复查找效率 的算法,利用额外空间存储文中所有单词出现的次数,并记录其出现的位置,形成一个数列,如果需要查找两个单词距离的时候,就把两个单词的出现位置数列合并起来,遍历一次这个数列就可以得到两个单词的距离了。

那么需要建立一个表,所用时间效率是:O(n);查找的时候所用时间内效率是O(m), m是两个单词出现在文中的次数。因为表不需要重复计算,所以效率可以优化很多。

带测试实例的程序如下:

struct MarkInt
{
	int a;
	string str;
	MarkInt(int a1, string s1):a(a1), str(s1){}
};

bool operator<(const MarkInt &m1, const MarkInt &m2)
{
	return m1.a < m2.a;
}

class WordsDistacne
{
	map<string, vector<MarkInt> > lookupTable;
	vector<string> words;
public:
	int shortestWordsDistanceRepeated(string &w1, string &w2)
	{
		if (!lookupTable.count(w1) || !lookupTable.count(w2)) return -1;
		vector<MarkInt> vm;
		lookupTable[w1].push_back(MarkInt(INT_MAX,w1));
		lookupTable[w2].push_back(MarkInt(INT_MAX,w2));
		int n1 = lookupTable[w1].size();
		int n2 = lookupTable[w2].size();
		
		for (int i = 0, j = 0; i < n1 && j < n2; )
		{
			if (lookupTable[w1][i] < lookupTable[w2][j])
			{
				vm.push_back(lookupTable[w1][i++]);
			}
			else vm.push_back(lookupTable[w2][j++]);
		}
		vm.pop_back();
		lookupTable[w1].pop_back();
		lookupTable[w2].pop_back();

		int lastword1 = -1, lastword2 = -1;
		int minDistance = INT_MAX;
		for (int i = 0; i < vm.size(); i++)
		{
			if (vm[i].str == w1) 
			{
				lastword1 = vm[i].a;
				if (lastword2 >= 0)
				{
					minDistance = min(minDistance, lastword1 - lastword2);
				}
			}
			if(vm[i].str == w2)
			{
				lastword2 = vm[i].a;
				if (lastword1 >= 0)
				{
					minDistance = min(minDistance, lastword2 - lastword1);
				}
			}
		}
		return minDistance;
	}

	void setupTable()
	{
		for (int i = 0; i < words.size(); i++)
		{
			lookupTable[words[i]].push_back(MarkInt(i, words[i]));
		}
	}
	WordsDistacne(vector<string> s):words(s)
	{
		setupTable();
	}
};

int main()
{
	string s[] = {"my", "you", "hi", "you", "hi","my", "go", "not", "yes", "you"};
	int n = sizeof(s)/sizeof(string);
	vector<string> vs(s, s+n);
	string s1 = "my";
	string s2 = "not";
	cout<<shortestWordsDistance(vs, s1, s2)<<endl;

	WordsDistacne wd(vs);

	cout<<wd.shortestWordsDistanceRepeated(s1,s2)<<endl;

	system("pause");
	return 0;
}






分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics