深入理解计算机系统arch lab

这里写图片描述

Arch lab

一:实验梗概

在这个lab中,分为PartA,PartB,PartC。在PartA中,你要编写Y86-64程序.在PartB中,你要给SEQ处理器添加iaddq指令。在PartC,你要优化你的程序。

file contains
misc YAS和YIS
seq SEQ处理器
pipe PIPE处理器
y86_code 书上的y86代码
ptest 测试脚本
Makefile Makefile
README README

工具

YAS
Y86-64汇编器,输入.ys结尾的y86 code,输出.yo结尾的object code。

./yas ./asum.ys       //会输出asum.yo文件

YIS
Y86-64指令模拟器,输入.yo结尾的object code,输出执行结果

./yis ./asum.yo     //会输出asum.yo程序情况

SEQ
SEQ+
PIPE
SEQ处理器对应于ssim(这个可执行文件),PIPE处理器对应于psim

-h help
-g gui模式
-t 输出处理器执行和ISA执行的差异(用来检测处理器是否遵守ISA)
./ssim -t <../a.yo     //ssim执行a.yo




PART A

在sim/misc文件里,有examples.c*文件,里面有三个函数,sum,rsum,copy。编写这三个函数的y86-64code.


sum.ys

#Execution begins at address 0,written by peanwang
    .pos 0
    irmovq stack,%rsp
    call main
    halt
# Sample linked list
    .align 8
ele1:
    .quad 0x00a
    .quad ele2
ele2:
    .quad 0x0b0
    .quad ele3
ele3:
    .quad 0xc00
    .quad 0
#This is main function
main:
    irmovq ele1,%rdi
    call sum_list
    ret 
#long sum_list(list_ptr ls)
# ls in %rdi ,return i %rax
sum_list:
    irmovq $0,%r14
        irmovq $0,%rax
L2:
        subq %r14,%rdi
        je L4
    mrmovq (%rdi),%r13
        addq %r13,%rax
    mrmovq 8(%rdi),%rdi
        jmp     L2
L4:
        ret

#stack starts here and grows to lower addresses
    .pos 0x200
stack:

这里写图片描述


rsum.ys

#rsum.ys,written by peanwang
    .pos 0
    irmovq stack,%rsp
    call main
    halt
# Sample linked list
    .align 8
ele1:
    .quad 0x00a
    .quad ele2
ele2:
    .quad 0x0b0
    .quad ele3
ele3:
    .quad 0xc00
    .quad 0

#main function
main:
    irmovq ele1,%rdi
    call rsum_list
    ret

#rsum_list(list_ptr ls)
#ls in %rdi,  return in %rax
rsum_list:
  irmovq $0,%r14
  subq %r14,%rdi
  je L7
  pushq %rbx
  mrmovq (%rdi), %rbx
  mrmovq 8(%rdi), %rdi
  call rsum_list
  addq %rbx,%rax
  popq %rbx
  ret
L7:
  irmovq $0,%rax
  ret

#stack starts here and grows to lower address
    .pos 0x200
stack:

这里写图片描述


copy.ys

#copy.ys,written by peanwang
    .pos 0
    irmovq stack,%rsp
    call main
    halt

#two block
    .align 8
# Source block
src:
    .quad 0x00a
    .quad 0x0b0
    .quad 0xc00
# Destination block
dest:
    .quad 0x111
    .quad 0x222
    .quad 0x333

main:
    irmovq src,%rdi
    irmovq dest,%rsi
    irmovq $3,%rdx
    call copy_block
    ret

#long copy_block(long *src,long *dest,long len)
copy_block:
  irmovq $0,%r14
  irmovq $1,%r13
  irmovq $8,%r12
  irmovq $0, %rax
L13:
  subq %r14,%rdx
  jle L15
  mrmovq (%rdi), %rcx
  rmmovq %rcx, (%rsi)
  xorq %rcx, %rax
  subq %r13, %rdx
  addq %r12,%rsi
  addq %r12,%rdi
  jmp L13
L15:
  ret

#stack starts here and grows to lower addresses
    .pos 0x200
stack:

这里写图片描述



PART B

在sim/seq文件夹里,修改seq-full.hcl文件,添加iaddq指令
首先:写出iaddq指令描述

state do
fetch icode:ifun<-M1[PC]
rA,rB<-M1[PC+1]
valC<-M1[PC+2]
ValP<-PC+10
decode valB<-R[rB]
execute ValE<-ValB+ValC
memory
writeback R[rB]<-ValE
PC<-valP



seq-full.hcl

#/* $begin seq-all-hcl */
####################################################################
#  HCL Description of Control for Single Cycle Y86-64 Processor SEQ   #
#  Copyright (C) Randal E. Bryant, David R. O'Hallaron, 2010       #
####################################################################

## Your task is to implement the iaddq instruction
## The file contains a declaration of the icodes
## for iaddq (IIADDQ)
## Your job is to add the rest of the logic to make it work

####################################################################
#    C Include's.  Don't alter these                               #
####################################################################

quote '#include <stdio.h>'
quote '#include "isa.h"'
quote '#include "sim.h"'
quote 'int sim_main(int argc, char *argv[]);'
quote 'word_t gen_pc(){return 0;}'
quote 'int main(int argc, char *argv[])'
quote '  {plusmode=0;return sim_main(argc,argv);}'

####################################################################
#    Declarations.  Do not change/remove/delete any of these       # 
####################################################################

##### Symbolic representation of Y86-64 Instruction Codes #############
wordsig INOP    'I_NOP'
wordsig IHALT   'I_HALT'
wordsig IRRMOVQ 'I_RRMOVQ'
wordsig IIRMOVQ 'I_IRMOVQ'
wordsig IRMMOVQ 'I_RMMOVQ'
wordsig IMRMOVQ 'I_MRMOVQ'
wordsig IOPQ    'I_ALU'
wordsig IJXX    'I_JMP'
wordsig ICALL   'I_CALL'
wordsig IRET    'I_RET'
wordsig IPUSHQ  'I_PUSHQ'
wordsig IPOPQ   'I_POPQ'
# Instruction code for iaddq instruction
wordsig IIADDQ  'I_IADDQ'

##### Symbolic represenations of Y86-64 function codes                  #####
wordsig FNONE    'F_NONE'        # Default function code

##### Symbolic representation of Y86-64 Registers referenced explicitly #####
wordsig RRSP     'REG_RSP'      # Stack Pointer
wordsig RNONE    'REG_NONE'     # Special value indicating "no register"

##### ALU Functions referenced explicitly                            #####
wordsig ALUADD  'A_ADD'     # ALU should add its arguments

##### Possible instruction status values                             #####
wordsig SAOK    'STAT_AOK'  # Normal execution
wordsig SADR    'STAT_ADR'  # Invalid memory address
wordsig SINS    'STAT_INS'  # Invalid instruction
wordsig SHLT    'STAT_HLT'  # Halt instruction encountered

##### Signals that can be referenced by control logic ####################

##### Fetch stage inputs        #####
wordsig pc 'pc'             # Program counter
##### Fetch stage computations      #####
wordsig imem_icode 'imem_icode'     # icode field from instruction memory
wordsig imem_ifun  'imem_ifun'      # ifun field from instruction memory
wordsig icode     'icode'       # Instruction control code
wordsig ifun      'ifun'        # Instruction function
wordsig rA    'ra'          # rA field from instruction
wordsig rB    'rb'          # rB field from instruction
wordsig valC      'valc'        # Constant from instruction
wordsig valP      'valp'        # Address of following instruction
boolsig imem_error 'imem_error'     # Error signal from instruction memory
boolsig instr_valid 'instr_valid'   # Is fetched instruction valid?

##### Decode stage computations     #####
wordsig valA    'vala'          # Value from register A port
wordsig valB    'valb'          # Value from register B port

##### Execute stage computations    #####
wordsig valE    'vale'          # Value computed by ALU
boolsig Cnd 'cond'          # Branch test

##### Memory stage computations     #####
wordsig valM    'valm'          # Value read from memory
boolsig dmem_error 'dmem_error'     # Error signal from data memory


####################################################################
#    Control Signal Definitions.                                   #
####################################################################

################ Fetch Stage     ###################################

# Determine instruction code
word icode = [
    imem_error: INOP;
    1: imem_icode;      # Default: get from instruction memory
];

# Determine instruction function
word ifun = [
    imem_error: FNONE;
    1: imem_ifun;       # Default: get from instruction memory
];

bool instr_valid = icode in 
    { INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
           IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ ,IIADDQ };

# Does fetched instruction require a regid byte?
bool need_regids =
    icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ, 
             IIRMOVQ, IRMMOVQ, IMRMOVQ,IIADDQ };

# Does fetched instruction require a constant word?
bool need_valC =
    icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL,IIADDQ };

################ Decode Stage    ###################################

## What register should be used as the A source?
word srcA = [
    icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ  } : rA;
    icode in { IPOPQ, IRET } : RRSP;
    1 : RNONE; # Don't need register
];

## What register should be used as the B source?
word srcB = [
    icode in { IOPQ, IRMMOVQ, IMRMOVQ ,IIADDQ } : rB;
    icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
    1 : RNONE;  # Don't need register
];

## What register should be used as the E destination?
word dstE = [
    icode in { IRRMOVQ } && Cnd : rB;
    icode in { IIRMOVQ, IOPQ ,IIADDQ } : rB;
    icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
    1 : RNONE;  # Don't write any register
];
## What register should be used as the M destination?
word dstM = [
    icode in { IMRMOVQ, IPOPQ } : rA;
    1 : RNONE;  # Don't write any register
];

################ Execute Stage   ###################################

## Select input A to ALU
word aluA = [
    icode in { IRRMOVQ, IOPQ } : valA;
    icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ ,IIADDQ } : valC;
    icode in { ICALL, IPUSHQ } : -8;
    icode in { IRET, IPOPQ } : 8;
    # Other instructions don't need ALU
];

## Select input B to ALU
word aluB = [
    icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, 
              IPUSHQ, IRET, IPOPQ ,IIADDQ } : valB;
    icode in { IRRMOVQ, IIRMOVQ } : 0;
    # Other instructions don't need ALU
];

## Set the ALU function
word alufun = [
    icode == IOPQ : ifun;
    1 : ALUADD;
];

## Should the condition codes be updated?
bool set_cc = icode in { IOPQ ,IIADDQ};

################ Memory Stage    ###################################

## Set read control signal
bool mem_read = icode in { IMRMOVQ, IPOPQ, IRET };

## Set write control signal
bool mem_write = icode in { IRMMOVQ, IPUSHQ, ICALL };

## Select memory address
word mem_addr = [
    icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ } : valE;
    icode in { IPOPQ, IRET } : valA;
    # Other instructions don't need address
];

## Select memory input data
word mem_data = [
    # Value from register
    icode in { IRMMOVQ, IPUSHQ } : valA;
    # Return PC
    icode == ICALL : valP;
    # Default: Don't write anything
];

## Determine instruction status
word Stat = [
    imem_error || dmem_error : SADR;
    !instr_valid: SINS;
    icode == IHALT : SHLT;
    1 : SAOK;
];

################ Program Counter Update ############################

## What address should instruction be fetched at

word new_pc = [
    # Call.  Use instruction constant
    icode == ICALL : valC;
    # Taken branch.  Use instruction constant
    icode == IJXX && Cnd : valC;
    # Completion of RET instruction.  Use value from stack
    icode == IRET : valM;
    # Default: Use incremented PC
    1 : valP;
];
#/* $end seq-all-hcl */



编译和测试

make VERSION=full

做测试
这里写图片描述
这里写图片描述
regression_test1 output:

./optest.pl -s ../seq/ssim 
Simulating with ../seq/ssim
  All 49 ISA Checks Succeed
./jtest.pl -s ../seq/ssim 
Simulating with ../seq/ssim
  All 64 ISA Checks Succeed
./ctest.pl -s ../seq/ssim 
Simulating with ../seq/ssim
  All 22 ISA Checks Succeed
./htest.pl -s ../seq/ssim 
Simulating with ../seq/ssim
  All 600 ISA Checks Succeed

regression_test2 output:

./optest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 58 ISA Checks Succeed
./jtest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 96 ISA Checks Succeed
./ctest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 22 ISA Checks Succeed
./htest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 756 ISA Checks Succeed

PartC

任务:修改ncopy.ys和pipe-full.hcl.尽所能提高ncopy.ys性能
我逻辑控制哪里每看明白(看了5遍了(っ °Д °;)っ),所以PartC我写的不好。

pipe-full.hcl修改
①:添加iaddq指令
②:修改预测分支器,修改成BTFNT(家庭作业)。(没做出来)

ncopy.ys修改
①:循环展开
②:避免加载使用冒险



我修改pipe-full(只添加了iaddq)。和PartB差不多。


ncopy.ys 避免了加载使用冒险 ,使用了iaddq指令

# You can modify this portion
    # Loop header
    xorq %rax,%rax      # count = 0;
    andq %rdx,%rdx      # len <= 0?
    jle Done        # if so, goto Done:

Loop:   mrmovq (%rdi), %r10 # read val from src...
    rmmovq %r10, (%rsi) # ...and store it to dst
    andq %r10, %r10     # val <= 0?
    jle Npos        # if so, goto Npos:
    irmovq $1, %r10
    addq %r10, %rax     # count++
Npos:   irmovq $1, %r10
    subq %r10, %rdx     # len--
    irmovq $8, %r10
    addq %r10, %rdi     # src++
    addq %r10, %rsi     # dst++
    andq %rdx,%rdx      # len > 0?
    jg Loop         # if so, goto Loop:



还是各种测试
这里写图片描述
这里写图片描述

这里写图片描述
这里写图片描述
正如你们所见,这样做,只能得零分(っ °Д °;)っ。


ncopy.ys 避免了加载使用冒险 ,使用了iaddq指令,二层循环展开

# You can modify this portion
  #Loop header
  irmovq $-1,%rcx
  addq %rdx,%rcx    #limit
  xorq %rax,%rax       #count
  jmp L2
L4:
  rmmovq %r8, (%rsi)    # *dst = %r8  val1
  rmmovq %rdi, 8(%rsi)  # *dst++ =%rdi val2
  iaddq $16,%rsi         #dst++ dst++
  rrmovq %r9, %rdi
L2:
  iaddq $0, %rcx      #limit>0 ?
  jle L7
  mrmovq (%rdi), %r8    # val1 in %r8

  rrmovq %rdi,%r9
  iaddq $16,%r9         #src++ src++ in %r9
  mrmovq 8(%rdi),%rdi    #get val2
  iaddq $-2, %rcx        #limit-2
  iaddq $0, %r8       #val1>0 ?
  jle L3
  iaddq $1, %rax     #count++
L3:
  iaddq $0, %rdi     #val2>0
  jle L4
  iaddq $1, %rax    #count++
  jmp L4
L7:
  irmovq $1,%r14
  andq %r14,%rdx     #len is odd?
  je Done
  mrmovq (%rdi), %rdx   #val1
  rmmovq %rdx, (%rsi)  #dst =val
  iaddq $0,%rdx
  jle Done
  iaddq $1, %rax

这里写图片描述
这里写图片描述
这里写图片描述
正如你们所见,还是只能得零分(っ °Д °;)っ。
分享两位大佬的:
46分
58.6.
真的很难QAQ

猜你喜欢

转载自blog.csdn.net/weixin_41256413/article/details/80967372
今日推荐