cache lab
Part A
实验目标
- 完成一个cache simulator(c语言)
- 有任意的size, associativity
- LRU(least-recently used) policy, 当需要evition时
- 使用argc, argv传参数
- getopt, fsacnf辅助实现
数据结构
cache代表cache的整体
cache是每一个组的指针形成的数组的指针
cache_line实现一个组中的一行
typedef struct
{
int valid; //有效的标志
int tag; //标志位
int time_tamp; //时间戳 LRU
}cache_line;
cache_line ** cache = NULL;
cache[i][j] 代表 cache中第i组的第j行
函数实现
设定参数
- 用getopt来读取每一个字母参数以及其后跟着的附加信息
- switch语句来执行每一个参数
- -h 单独设计了一个print_help()函数来打出所需信息
- -v 用一个全局变量verbose来标记是否需要打出扩展信息
- -s 全局变量s
- -E 全局变量E
- -b 全局变量b
- -t 全局变量t
//设定参数
int opt; //getopt return value
while ( (opt = getopt(argc,argv,"hvs:E:b:t:") ) != -1 )
{
switch (opt)
{
case 'h':
print_help();
exit(0);
break;
case 'v':
verbose = 1;
break;
case 's':
s = atoi(optarg);
break;
case 'E':E = atoi(optarg);
break;
case 'b':b = atoi(optarg);
break;
case 't':strcpy(filename, optarg);
break;
case '?':
printf("Unknown !\n");
print_help();
default:
break;
}
}
动态分配数组
cache中包含S个cache_line的指针
Cache_line的指针指向具有E个cache_line的数组
分配完空间以后,对tag, valid, time_tamp分别进行初始化
//动态分配数组
S = (1<<s) ; //S = 2^s
cache = (cache_line**)malloc(sizeof(cache_line*) * S);
for(int i = 0;i < S;i++)
{
cache[i] = (cache_line*)malloc(sizeof(cache_line) * E);
}
for(int i = 0;i < S;i++)
{
for(int j = 0;j < E;j++)
{
cache[i][j].tag = cache[i][j].time_tamp = cache[i][j].valid = 0;
}
}
读取文本并执行操作
读入部分
- 打开文件
- 逐行的将文件内容读到buffer中
- sscanf将操作名称、地址、元素大小分别保存到type, address, size_elem中
- type:I 不用考虑,由于只关心data cache的表现
- address:scanf中用16进制读取,由于数据中地址全部都是16进制
- size_elem:
没什么用的样子由于题目相较于实际情况放松了一些条件,不会出现组中一行存不下一个数据的情况,故不会用到size_elem
- sscanf将操作名称、地址、元素大小分别保存到type, address, size_elem中
- switch判断指令类别
- “L”, "S"均只考虑一次数据加载
- "M"考虑一次数据加载和一次数据放回,故execute() 2次
- 由于要维护LRU policy, 故每一次执行完指令以后对所有的time_tamp加1
//读取文本
FILE* trace = fopen(filename,"r");
if(trace == NULL)
{
fprintf(stderr,"The File is wrong!\n");
exit(-1);
}
int size_elem;
char type;
int address;
char buffer[1000];
while(fgets(buffer,1000,trace))
{
char s_temp[10];
const char *str = s_temp;
sscanf(buffer," %c %x,%d",&type,&address,&size_elem);//%x是因为输入数据地址都是16进制的
if(type != 'I')
{
if(verbose)
{
printf("%c %d,%d",type,address,size_elem);
}
switch (type)
{
case 'L':
execute(type,address);
break;
case 'M':
execute(type,address);
execute(type,address);
break;
case 'S':
execute(type,address);
break;
default:
break;
}
if(verbose)printf("\n");
}
update_time();
}
fclose(trace);
执行部分
- 用所学的bit的知识从address中取出S的编号(S_index)和tag的值(tag_)
- 检查是否是一次hit
- 遍历组S_index中所有的cache_line
- 如果valid为1且tag相同,确定是一次hit
- 重置时间戳
- number_hit++
- 检查是否是一次miss
- 遍历组S_index中所有的cache_line
- 如果有valid为0的cache_line,则是miss
- 复制tag
- 重置time_tamp和valid
- miss++
- 寻找time_tamp最大的cache_line来eviction
- 遍历S_index中的所有cache_line
- evict time_tamp最大的cache_line
void execute(char operation,int addr)
{
int S_index = ( (0x1 << s) - 1) & (addr >> b);// index for S
int tag_ = addr >> (b+s); //tag
//check if it is a hit
for(int i = 0;i < E;i++)
{
if(cache[S_index][i].tag == tag_ && cache[S_index][i].valid == 1)
{
cache[S_index][i].time_tamp = 0; //reset the time
number_hit++;
if(verbose)printf(" hit ");
return;
}
}
//check if it is a miss
for(int i = 0;i < E;i++)
{
if(cache[S_index][i].valid == 0)
{
cache[S_index][i].valid = 1;
cache[S_index][i].tag = tag_;
cache[S_index][i].time_tamp = 0;
number_miss++;
if(verbose)printf(" miss ");
return;
}
}
//it is a eviction
int min_tamp = 0;
int index = 0;
number_miss++;number_eviction++;
for(int i = 0;i < E;i++)
{
if(cache[S_index][i].time_tamp > min_tamp)
{
min_tamp = cache[S_index][i].time_tamp;
index = i;
}
}
cache[S_index][index].time_tamp = 0;
cache[S_index][index].tag = tag_;
printf(" miss eviction ");
}
- 每一次执行了指令以后,所有的cache_line的time_tamp都++
void update_time()
{
for(int i = 0;i < S;i++)
{
for(int j = 0;j < E;j++)
{
if(cache[i][j].valid == 1)
cache[i][j].time_tamp++;
}
}
}
释放内存并输出结果
先释放* 再释放**
//free!
for(int i = 0;i < S;i++)
free(cache[i]);
free(cache);
printSummary(number_hit,number_miss,number_eviction);
整体代码
#include "cachelab.h"
#include<stdio.h>
#include<stdlib.h>
#include<getopt.h>
#include<string.h>
#include<unistd.h>
#include<assert.h>
int s, E, b, S, number_hit, number_miss, number_eviction, verbose;
char filename[1000];
//name: SiZiHua
//student ID: 2018202181
typedef struct
{
int valid; //有效的标志
int tag; //标志位
int time_tamp; //时间戳 LRU
}cache_line;
cache_line ** cache = NULL;
void print_help()
{
printf("-h get help info\n");
printf("-v Optional verbose flag that displays trace info 可选的详细标志,可以显示trace信息\n");
printf("-s <s> Number of set index bits 设置索引位的数量,即设定了组数\n");
printf("-E <E> Associativity (number of lines per set) 设置每组的行数\n");
printf("-b <b> Number of block bits 设定了块大小\n");
printf("-t <tracefile>: Name of the valgrind trace to replay 设定trace文件\n");
}
void execute(char operation,int addr)
{
int S_index = ( (0x1 << s) - 1) & (addr >> b);// index for S
int tag_ = addr >> (b+s); //tag
//check if it is a hit
for(int i = 0;i < E;i++)
{
if(cache[S_index][i].tag == tag_ && cache[S_index][i].valid == 1)
{
cache[S_index][i].time_tamp = 0; //reset the time
number_hit++;
if(verbose)printf(" hit ");
return;
}
}
//check if it is a miss
for(int i = 0;i < E;i++)
{
if(cache[S_index][i].valid == 0)
{
cache[S_index][i].valid = 1;
cache[S_index][i].tag = tag_;
cache[S_index][i].time_tamp = 0;
number_miss++;
if(verbose)printf(" miss ");
return;
}
}
//it is a eviction
int min_tamp = 0;
int index = 0;
number_miss++;number_eviction++;
for(int i = 0;i < E;i++)
{
if(cache[S_index][i].time_tamp > min_tamp)
{
min_tamp = cache[S_index][i].time_tamp;
index = i;
}
}
cache[S_index][index].time_tamp = 0;
cache[S_index][index].tag = tag_;
printf(" miss eviction ");
}
void update_time()
{
for(int i = 0;i < S;i++)
{
for(int j = 0;j < E;j++)
{
if(cache[i][j].valid == 1)
cache[i][j].time_tamp++;
}
}
}
int main(int argc,char* argv[])
{
verbose = number_eviction = number_hit = number_miss = 0;//init
//设定参数
int opt; //getopt return value
while ( (opt = getopt(argc,argv,"hvs:E:b:t:") ) != -1 )
{
switch (opt)
{
case 'h':
print_help();
exit(0);
break;
case 'v':
verbose = 1;
break;
case 's':
s = atoi(optarg);
break;
case 'E':E = atoi(optarg);
break;
case 'b':b = atoi(optarg);
break;
case 't':strcpy(filename, optarg);
break;
case '?':
printf("Unknown !\n");
print_help();
default:
break;
}
}
//动态分配数组
S = (1<<s) ; //S = 2^s
cache = (cache_line**)malloc(sizeof(cache_line*) * S);
for(int i = 0;i < S;i++)
{
cache[i] = (cache_line*)malloc(sizeof(cache_line) * E);
}
for(int i = 0;i < S;i++)
{
for(int j = 0;j < E;j++)
{
cache[i][j].tag = cache[i][j].time_tamp = cache[i][j].valid = 0;
}
}
//读取文本
FILE* trace = fopen(filename,"r");
if(trace == NULL)
{
fprintf(stderr,"The File is wrong!\n");
exit(-1);
}
int size_elem;
char type;
int address;
char buffer[1000];
while(fgets(buffer,1000,trace))
{
char s_temp[10];
const char *str = s_temp;
sscanf(buffer," %c %x,%d",&type,&address,&size_elem);//%x是因为输入数据地址都是16进制的
if(type != 'I')
{
if(verbose)
{
printf("%c %d,%d",type,address,size_elem);
}
switch (type)
{
case 'L':
execute(type,address);
break;
case 'M':
execute(type,address);
execute(type,address);
break;
case 'S':
execute(type,address);
break;
default:
break;
}
if(verbose)printf("\n");
}
update_time();
}
fclose(trace);
//free!
for(int i = 0;i < S;i++)
free(cache[i]);
free(cache);
printSummary(number_hit,number_miss,number_eviction);
return 0;
}
Part B
分析cache
- a block size of 32 bytes
- Cache , size of 1k
- 计算得cache有32个组
- 对于组一条cache_line,可以存4个int
- cache可以存下128个int
32 * 32
- 一行32个int,占用4个bolck,所以cache可以存8行
- 只要2个int之间相差8行的整数倍,读取这两个int时就会发生替换,再读后面的连续元素会发生抖动
- 由于cache只能存前8行,所以用分块法时分8*8的块比较合理
- 对于对角线上的块
- A中每读一行,会有一次miss,也就是miss操作数是操作数的1/8
- 对于B数组来说,第一次读取这行会产生一次miss,之后读到第i行时,只有A中读到第i行时,会被移除cache,存的时候会产生一次miss,可以近似为miss概率为1/4
- 对于不在对角线上的块
- A还是1/8的miss概率
- B的每行在cache中和A的每行不冲突,也是1/8的概率
- 故最终计算得 4 * 64 * (1/8 + 1/4)+ 12 * 64 * 2 * 1/8 = 288
- 实际结果为287,基本符合计算结果,满足<300的条件
for(int i = 0;i < N;i = i+8)
{
for(int j = 0;j < M;j = j+8)
{
for(int k = i;k < i+8;k++)
{
int temp0 = A[k][j];
int temp1 = A[k][j+1];
int temp2 = A[k][j+2];
int temp3 = A[k][j+3];
int temp4 = A[k][j+4];
int temp5 = A[k][j+5];
int temp6 = A[k][j+6];
int temp7 = A[k][j+7];
B[j][k] = temp0;
B[j+1][k] = temp1;
B[j+2][k] = temp2;
B[j+3][k] = temp3;
B[j+4][k] = temp4;
B[j+5][k] = temp5;
B[j+6][k] = temp6;
B[j+7][k] = temp7;
}
}
}
64 * 64
数组一行有64个int,即8个block
所以每4行就会填满一个cache,故两个元素如果差了4行的整数倍就会发生冲突
如果我们使用4*4的方式类似于上面32*32的方法分块进行计算,不会出错,但是每次都有1/2的cache没有被利用到,得分不够,经过实践,用4*4的方式得到的结果是1843,没有达到小于1300的要求
-
最终方法:
还是使用8*8的块来做,但是在B中不用一次将数据移动到正确位置
- 先将A的上半部分存入B中,将A中左上4*4存在B中左上4*4,将A中右上4*4存在B中右上4*4,都是按照列存(对于对角线上的元素,A中miss率是1/8,B中左上角的部分miss率是1/2;对于不在对角线上的元素,A的miss率还是1/8,B的miss率是1/4)
- 把A左下角一列4个int读出,B右上角中按照行读出4个int,用临时变量存储,交换位置(对于在对角线上的块,从A左下角读取miss率是1,B的右上角miss率是1/4,B的左下角miss率是1/4;对于不在对角线上的块,A的miss率是1/4,B的miss率是0)
- 最后把A的右下角填入B的右下角(对于在对角线上的块,A的miss率为1/4,B的miss率为1/2;不在对角线上的块,A的miss率为0,B的miss率为0)
for(int i = 0;i < N;i = i+8)
{
for(int j = 0;j < M;j = j+8)
{
for(int k = 0;k < 4;k++)
{
//将A上面4行的共8列 放在 B中上面四行,按照列放,待会再移动右半边
int temp0 = A[i+k][j];
int temp1 = A[i+k][j+1];
int temp2 = A[i+k][j+2];
int temp3 = A[i+k][j+3];
int temp4 = A[i+k][j+4];
int temp5 = A[i+k][j+5];
int temp6 = A[i+k][j+6];
int temp7 = A[i+k][j+7];
B[j+0][i+k] = temp0;
B[j+1][i+k] = temp1;
B[j+2][i+k] = temp2;
B[j+3][i+k] = temp3;
B[j+0][i+k+4] = temp4;
B[j+1][i+k+4] = temp5;
B[j+2][i+k+4] = temp6;
B[j+3][i+k+4] = temp7;
}
for(int k = 0;k < 4;k++)
{
//B中右上角的4*4的矩阵 放在 B中左下角
//A中左下脚的4*4的矩阵 放在 B中右上角
int temp0 = B[j+k][i+4];
int temp1 = B[j+k][i+5];
int temp2 = B[j+k][i+6];
int temp3 = B[j+k][i+7];
int temp4 = A[i+4][j+k];
int temp5 = A[i+5][j+k];
int temp6 = A[i+6][j+k];
int temp7 = A[i+7][j+k];
B[j+k][i+4] = temp4;
B[j+k][i+5] = temp5;
B[j+k][i+6] = temp6;
B[j+k][i+7] = temp7;
B[j+4+k][i+0] = temp0;
B[j+4+k][i+1] = temp1;
B[j+4+k][i+2] = temp2;
B[j+4+k][i+3] = temp3;
}
//右下角4*4的矩阵
for(int ii = 4 ;ii < 8;ii++)
{
for(int jj = 4;jj < 8;jj++)
{
B[j+jj][i+ii] = A[i+ii][j+jj];
}
}
}
}
61 * 67
由于61*67是不规则的,无法得到隔几行一定会重复的规律,所以采用直接转秩的分块方法,具体分块大小无法计算确定,经过不同的实验尝试,尝试到16*16的块,miss的数量小于要求的值
- 处理16*16的块来转秩
- 处理mod 16 的部分
int i,j;
for(i = 0;i + 16< N;i = i + 16)
{
for( j = 0;j + 16< M;j = j + 16)
{
for(int k = i;k < i+16;k++)
{
int temp0 = A[k][j+0];
int temp1 = A[k][j+1];
int temp2 = A[k][j+2];
int temp3 = A[k][j+3];
int temp4 = A[k][j+4];
int temp5 = A[k][j+5];
int temp6 = A[k][j+6];
int temp7 = A[k][j+7];
B[j+0][k] = temp0;
B[j+1][k] = temp1;
B[j+2][k] = temp2;
B[j+3][k] = temp3;
B[j+4][k] = temp4;
B[j+5][k] = temp5;
B[j+6][k] = temp6;
B[j+7][k] = temp7;
temp0 = A[k][j+8];
temp1 = A[k][j+9];
temp2 = A[k][j+10];
temp3 = A[k][j+11];
temp4 = A[k][j+12];
temp5 = A[k][j+13];
temp6 = A[k][j+14];
temp7 = A[k][j+15];
B[j+8][k] = temp0;
B[j+9][k] = temp1;
B[j+10][k] = temp2;
B[j+11][k] = temp3;
B[j+12][k] = temp4;
B[j+13][k] = temp5;
B[j+14][k] = temp6;
B[j+15][k] = temp7;
}
}
}
for(int ii = i;ii < N;ii++)
{
for(int jj = 0 ;jj < M;jj++)
{
B[jj][ii] = A[ii][jj];
}
}
for(int ii = 0;ii < i;ii++)
{
for(int jj = j;jj < M;jj++)
{
B[jj][ii] = A[ii][jj];
}
}
完整代码:
#include "cachelab.h"
#include<stdio.h>
#include<stdlib.h>
#include<getopt.h>
#include<string.h>
#include<unistd.h>
#include<assert.h>
int s, E, b, S, number_hit, number_miss, number_eviction, verbose;
char filename[1000];
//name: SiZiHua
//student ID: 2018202181
typedef struct
{
int valid; //有效的标志
int tag; //标志位
int time_tamp; //时间戳 LRU
}cache_line;
cache_line ** cache = NULL;
void print_help()
{
printf("-h get help info\n");
printf("-v Optional verbose flag that displays trace info 可选的详细标志,可以显示trace信息\n");
printf("-s <s> Number of set index bits 设置索引位的数量,即设定了组数\n");
printf("-E <E> Associativity (number of lines per set) 设置每组的行数\n");
printf("-b <b> Number of block bits 设定了块大小\n");
printf("-t <tracefile>: Name of the valgrind trace to replay 设定trace文件\n");
}
void execute(char operation,int addr)
{
int S_index = ( (0x1 << s) - 1) & (addr >> b);// index for S
int tag_ = addr >> (b+s); //tag
//check if it is a hit
for(int i = 0;i < E;i++)
{
if(cache[S_index][i].tag == tag_ && cache[S_index][i].valid == 1)
{
cache[S_index][i].time_tamp = 0; //reset the time
number_hit++;
if(verbose)printf(" hit ");
return;
}
}
//check if it is a miss
for(int i = 0;i < E;i++)
{
if(cache[S_index][i].valid == 0)
{
cache[S_index][i].valid = 1;
cache[S_index][i].tag = tag_;
cache[S_index][i].time_tamp = 0;
number_miss++;
if(verbose)printf(" miss ");
return;
}
}
//it is a eviction
int min_tamp = 0;
int index = 0;
number_miss++;number_eviction++;
for(int i = 0;i < E;i++)
{
if(cache[S_index][i].time_tamp > min_tamp)
{
min_tamp = cache[S_index][i].time_tamp;
index = i;
}
}
cache[S_index][index].time_tamp = 0;
cache[S_index][index].tag = tag_;
printf(" miss eviction ");
}
void update_time()
{
for(int i = 0;i < S;i++)
{
for(int j = 0;j < E;j++)
{
if(cache[i][j].valid == 1)
cache[i][j].time_tamp++;
}
}
}
int main(int argc,char* argv[])
{
verbose = number_eviction = number_hit = number_miss = 0;//init
//设定参数
int opt; //getopt return value
while ( (opt = getopt(argc,argv,"hvs:E:b:t:") ) != -1 )
{
switch (opt)
{
case 'h':
print_help();
exit(0);
break;
case 'v':
verbose = 1;
break;
case 's':
s = atoi(optarg);
break;
case 'E':E = atoi(optarg);
break;
case 'b':b = atoi(optarg);
break;
case 't':strcpy(filename, optarg);
break;
case '?':
printf("Unknown !\n");
print_help();
default:
break;
}
}
//动态分配数组
S = (1<<s) ; //S = 2^s
cache = (cache_line**)malloc(sizeof(cache_line*) * S);
for(int i = 0;i < S;i++)
{
cache[i] = (cache_line*)malloc(sizeof(cache_line) * E);
}
for(int i = 0;i < S;i++)
{
for(int j = 0;j < E;j++)
{
cache[i][j].tag = cache[i][j].time_tamp = cache[i][j].valid = 0;
}
}
//读取文本
FILE* trace = fopen(filename,"r");
if(trace == NULL)
{
fprintf(stderr,"The File is wrong!\n");
exit(-1);
}
int size_elem;
char type;
int address;
char buffer[1000];
while(fgets(buffer,1000,trace))
{
char s_temp[10];
const char *str = s_temp;
sscanf(buffer," %c %x,%d",&type,&address,&size_elem);//%x是因为输入数据地址都是16进制的
if(type != 'I')
{
if(verbose)
{
printf("%c %d,%d",type,address,size_elem);
}
switch (type)
{
case 'L':
execute(type,address);
break;
case 'M':
execute(type,address);
execute(type,address);
break;
case 'S':
execute(type,address);
break;
default:
break;
}
if(verbose)printf("\n");
}
update_time();
}
fclose(trace);
//free!
for(int i = 0;i < S;i++)
free(cache[i]);
free(cache);
printSummary(number_hit,number_miss,number_eviction);
return 0;
}