Content
页替换算法 —— 最优算法 OPT/MIN 的 C with stl 实现
MIN的最优证明参考:http://algo2.iti.kit.edu/vanstee/courses/caching.pdf
文档里叫做LFD(longest forward distance)
输入
- K, cache size
- N, number of pages
- N integers, represents the index of the quert pages in order
构想
把某次query的page id 命名为 queryId。在miss发生时,Min是要找出在cache的所有page id中,在query队列中最远出现的page id (命名这个变量为furthest),并用queryId替换掉cache中的furthest。
最开始的想法:
cache用hashset,每次miss时在query队列中找furthest
这样,查询某个 page id是否在cache中的时间复杂度是O(1),找到furthest的复杂度是O(N),替换的复杂度是O(1)。总共会有N次查询,因此总时间复杂度是O(N^2)
改进:
新建一个node
struct MinNode
{
int pageId;
int nextAppear;
MinNode(int id)
{
pageId = id;
nextAppear = -1;
}
MinNode(int id, int next)
{
pageId = id;
nextAppear = next;
}
};
再定义两个数据结构:
unordered_map<int, list<MinNode>::iterator> queryMap;
list<MinNode> queryList;
读入query队列是用for循环,for(i = 0; i < N; i++)
,先检查queryMap
中有没有要查询的page id,如果没有,用第一种构方法MinNode(int id)
把这个id封装成MinNode
(相当于初始化nextAppear
为-1)并加到queryList
尾部,在queryMap
中加入<key = page id, value = --queryList.end()>
如果queryMap
中已经有要查询的page id了,那么更新这个MinNode
——把它的nextAppear
设置成i
,同样还是用第一种构方法MinNode(int id)
把这个id封装成MinNode
并加到queryList
尾部,在queryMap
中加入<key = page id, value = --queryList.end()>
这一步完成之后,queryList
里就保存了输入的query队列,并且还保留了每一个query的page id,与这个page id最近的下一个相同page id的位置(nextAppear
),如果没有下一个,那么nextAppear
为-1。
接下来就是cache,也用list<MinNode>
。当miss发生时,只需要遍历cache就可以找到要被替换的node。因此找到furthest
的复杂度降为O(K)。虽然cache用list实现,但是查询某个page id是否在cache中的复杂度也可以降为O(1),这只需要通过map+list的方式即可实现,具体可以看代码。替换的复杂度还是O(1)。因此总时间复杂度是O(KN)。
代码
void algoMin(int K, int N)
{
char *line = NULL;
size_t len = 0;
ssize_t nread;
char *token;
// char *querySequence;
int hitCount = 0;
int maxSize = K;
unordered_map<int, list<MinNode>::iterator> queryMap;
list<MinNode> queryList;
list<MinNode> cacheList;
unordered_map<int, list<MinNode>::iterator>::iterator mapPtr;
list<MinNode>::iterator liPtr;
int queryId;
list<MinNode>::iterator deletePtr;
int furthest = -1;
int i;
int appear = 0;
list<MinNode>::iterator subLiPtr;
if((nread = getline(&line, &len, stdin)) != -1)
{
/* parse input */
for(i = 0; i < N; i++)
{
if(i > 0)
{
token = strtok(NULL, " \n");
}
else
{
token = strtok(line, " \n");
}
sscanf(token, "%d", &queryId);
if((mapPtr = queryMap.find(queryId)) != queryMap.end())
{
/* Have appeared before, update old one's next
in the list, add the new one to the end of the list
, and replace the old one with the new one in the map */
mapPtr->second->nextAppear = i;
}
/* Not have appeared, add to the end of the list,
add into the map */
queryList.push_back( MinNode(queryId) );
queryMap[queryId] = --queryList.end();
}
/* Reuse query map */
queryMap.clear();
/* Query */
for(liPtr = queryList.begin(); liPtr != queryList.end(); liPtr++)
{
furthest = -1; /* Should be initialized */
if((mapPtr = queryMap.find(liPtr->pageId)) != queryMap.end())
{
/* If page in cache */
mapPtr->second->nextAppear = liPtr->nextAppear;
hitCount++;
}
else
{
/* Page not in cache */
if(cacheList.size() < maxSize)
{
/* Cache not full */
cacheList.push_back(MinNode(liPtr->pageId, liPtr->nextAppear));
queryMap[liPtr->pageId] = --cacheList.end();
}
else
{
/* Cache full. Look up pages in cache and pick the furthest one in the future to delete */
for(subLiPtr = cacheList.begin(); subLiPtr != cacheList.end(); subLiPtr++)
{
/* If one not appear in the future, then pick that out. */
if(subLiPtr->nextAppear == -1)
{
deletePtr = subLiPtr;
break;
}
else
{
/* Find the furthest in the future */
if(furthest < subLiPtr->nextAppear)
{
furthest = subLiPtr->nextAppear;
deletePtr = subLiPtr;
}
}
}
queryMap.erase(deletePtr->pageId);
cacheList.erase(deletePtr);
cacheList.push_back(MinNode(liPtr->pageId, liPtr->nextAppear));
queryMap[liPtr->pageId] = --cacheList.end();
}
}
}
}
displayHitRate(hitCount, N);
free(line);
}