第四章 深入理解计算机系统[第三版] 家庭作业参考答案 CSAPP homework solutions

版权声明:本文为博主原创,未经博主允许不得转载。 https://blog.csdn.net/one_of_a_kind/article/details/81836111

代码均经过测试,请放心食用
这一章可能是最难的一章了,虽然难,但是还是挺有趣的
本章所需的网络旁注和模拟器请自行下载 http://csapp.cs.cmu.edu/3e/students.html
下面给出测试简要指南,详情请自行阅读网络旁注和 README 文件

测试简要指南

从 HCL 的实现角度来看,HCL 实际上是用来生成一种非常格式化的 C 代码的语言。HCL 文件中的所有块定义都被 HCL2C 程序转换成C函数。然后这些函数和实现其他模拟器函数的源代码一起编译,来生成一个可执行的模拟程序,比如 ssim 和 psim。
我并没有使用 GUI 模式的模拟器,因为配置太过麻烦,要下载一些东西,况且在终端进行测试就足够了;如果使用终端模式的话,请将 MakeFile 文件中关于 GUIMODE, TKLIBS, TKINC 的参数注释掉;
先下载压缩包,然后运行下列指令,如果出错可能是因为一些参数没有注释掉:

USER@NAME:~# tar xf sim.tar
USER@NAME:~# cd sim
USER@NAME:~/sim# make clean
USER@NAME:~/sim# make

可以直接使用 YIS 运行程序,它是指令集模拟器,不模拟任何具体处理器的实现:

USER@NAME:~/sim/y86-code# ../misc/yis asum.yo

ssim 和 psim 分别是 SEQ 和 PIPE 的模拟器;如果要生成模拟器(xxx代表不同版本):

USER@NAME:~/sim/seq# make clean; make ssim VERSION=xxx

也可以使用模拟器来运行某一个程序;可以在 /sim/y86-code 下复制一个 ys 文件比如 asum.ys,重命名为 bubble.ys,将里面的 sum 函数删掉,改成所要测试的函数,然后在 main 里调用;
然后 make bubble.yo 生成一个目标文件 bubble.yo,这个 yo 文件就可以用模拟器运行了:

USER@NAME:~/sim/y86-code# make bubble.yo
USER@NAME:~/sim/y86-code# ../seq/ssim -t < ./bubble.yo

如果需要测试修改后的 hcl 文件,需要先生成模拟器,在上面说过了;然后在 /sim/ptest 中测试(参数 SIM 指示需要测试的是 psim 还是 ssim,参数 TFLAGS=-i 表示测试时包含 iaddq 指令);如果测试失败,会在当前文件夹中留下导致错误的 ys 文件:

USER@NAME:~/sim/ptest# make SIM=../pipe/psim TFLAGS=-i

4.54 题中有完整的生成模拟器并测试的步骤

4.45

A.
错误,它压入了减去 8 的 %rsp 的值

B.

movq REG, -8(%rsp)
subq $8, %rsp

4.46

A.
错误,它将栈指针设置为了正确的值再减去 8

B.

扫描二维码关注公众号,回复: 3079666 查看本文章
addq $8, %rsp
movq -8(%rsp), REG

4.47

使用数组索引

/*参考代码*/
void bubble_a(long *data, long count) {
    long i, last;
    for (last = count - 1; last > 0; last--) {
        for (i = 0; i < last; i++) {
            if (data[i + 1] < data[i]) {
                /*Swap adjacent elements*/
                long t = data[i + 1];
                data[i + 1] = data[i];
                data[i] = t;
            }
        }
    }
}

A.
使用指针引用数组元素

void bubble_b(long *data, long count) {
    long i, last;
    for (last = count - 1; last > 0; last--) {
        for (i = 0; i < last; i++) {
            if (*(data + i + 1)< *(data + i)) {
                /*Swap adjacent elements*/
                long t = *(data + i + 1);
                *(data + i + 1) = *(data + i);
                *(data + i) = t;
            }
        }
    }
}

B.
手写汇编真麻烦我都要吐了怪不得发明高级语言
这里完整的 ys 文件,以后的题目只给出函数定义

# 从地址 0 开始执行
    .pos 0
    irmovq stack, %rsp      # Set up stack pointer
    call main               # Execute main program
    halt                    # Terminate program 

# 四个元素的数组
    .align 8
array:  
    .quad 0x000000000abc
    .quad 0x0000000000bc
    .quad 0x00000000000c
    .quad 0x000000000001

# 主函数
main:   
    irmovq array,%rdi
    irmovq $4,%rsi
    call bubble_b           # bubble(array, 4)
    ret

# void bubble_b(long *data, long count)
# data in %rdi, count in %rsi
bubble_b:
    irmovq $1, %r8          # 常数 1
    irmovq $8, %r9          # 常数 8
    rrmovq %rsi, %rax       
    subq %r8, %rax          # last = %rax = count - 1
    je Done                 # last == 0 -> jmp done
Loop1:
    xorq %rcx, %rcx         # i = %rcx = 0
Loop2:
    rrmovq %rcx, %rdx       # %rdx = i
    addq %rdx, %rdx         # %rdx = 2 * i
    addq %rdx, %rdx         # %rdx = 4 * i
    addq %rdx, %rdx         # %rdx = 8 * i
    addq %rdi, %rdx         # %rdx = data + 8 * i 
    mrmovq (%rdx), %r10     # %r10 = data[i]
    addq %r9, %rdx          # %rdx = data + 8 * i + 8
    mrmovq (%rdx), %rbx     # %rbx = data[i + 1]
    rrmovq %rbx, %r11       # %r11 = data[i + 1]
    subq %r10, %rbx         # %rbx = data[i + 1] - data[i]
    jge Test1               # data[i + 1] - data[i] > 0 -> do nothing
    rmmovq %r10, (%rdx)     # data[i + 1] = data[i]
    subq %r9, %rdx          # %rdx = %rdx - 8
    rmmovq %r11, (%rdx)     # data[i] = data[i + 1]
Test1:
    addq %r8, %rcx          # i++
    rrmovq %rcx, %r12       # %r12 = %rcx = i
    subq %rax, %r12         # i - last      
    jl Loop2                # i < last -> jmp Loop2
    subq %r8, %rax          # last--                        
    jg Loop1                # last > 0 -> jmp Loop1
Done:
    ret                     # Return

# 栈从 0x200 向低地址生长
    .pos 0x200
stack:

可以看到输出,左边是运行前,右边是运行后:

Changed Memory State:
0x0018: 0x0000000000000abc  0x0000000000000001
0x0020: 0x00000000000000bc  0x000000000000000c
0x0028: 0x000000000000000c  0x00000000000000bc
0x0030: 0x0000000000000001  0x0000000000000abc

4.48

#号包围区域与 4.47 题做对比:

# void bubble_c(long *data, long count)
# data in %rdi, count in %rsi
bubble_c:
    irmovq $1, %r8          # 常数 1
    irmovq $8, %r9          # 常数 8
    rrmovq %rsi, %rax       
    subq %r8, %rax          # last = %rax = count - 1
    je Done                 # last == 0 -> jmp done
Loop1:
    xorq %rcx, %rcx         # i = %rcx = 0
Loop2:
    rrmovq %rcx, %rdx       # %rdx = i
    addq %rdx, %rdx         # %rdx = 2 * i
    addq %rdx, %rdx         # %rdx = 4 * i
    addq %rdx, %rdx         # %rdx = 8 * i
    addq %rdi, %rdx         # %rdx = data + 8 * i 
    mrmovq (%rdx), %r10     # %r10 = data[i]
    addq %r9, %rdx          # %rdx = data + 8 * i + 8
    mrmovq (%rdx), %rbx     # %rbx = data[i + 1]
    rrmovq %rbx, %r11       # %r11 = data[i + 1]
    subq %r10, %rbx         # %rbx = data[i + 1] - data[i]
    ######################### 删除部分 #################################
    ##jge Test1             # data[i + 1] - data[i] > 0 -> do nothing##
    ##rmmovq %r10, (%rdx)   # data[i + 1] = data[i]                  ##
    ##subq %r9, %rdx        # %rdx = %rdx - 8                        ##
    ##rmmovq %r11, (%rdx)   # data[i] = data[i + 1]                  ##
    ###################################################################

    ########################## 新增部分 ######################################
    rrmovq %r11, %r12       # %r12 = data[i + 1]
    rrmovq %r10, %r13       # %r13 = data[i]
    cmovl %r10, %r12        # data[i + 1] < data[i] -> %r12 = data[i]
    cmovl %r11, %r13        # data[i + 1] < data[i] -> %r13 = data[i + 1]
    rmmovq %r12, (%rdx)     # data[i + 1] = %r12
    subq %r9, %rdx          # %rdx = %rdx - 8
    rmmovq %r13, (%rdx)     # data[i] = %r13
    #########################################################################
Test1:
    addq %r8, %rcx          # i++
    rrmovq %rcx, %r12       # %r12 = %rcx = i
    subq %rax, %r12         # i - last      
    jl Loop2                # i < last -> jmp Loop2
    subq %r8, %rax          # last--                        
    jg Loop1                # last > 0 -> jmp Loop1
Done:
    ret                     # Return

4.49

一个条件传送,比较巧妙,#号包围区域与 4.47 题做对比:

# void bubble_d(long *data, long count)
# data in %rdi, count in %rsi
bubble_d:
    irmovq $1, %r8          # 常数 1
    irmovq $8, %r9          # 常数 8
    irmovq $0, %r12         # 常数 0
    rrmovq %rsi, %rax       
    subq %r8, %rax          # last = %rax = count - 1
    je Done                 # last == 0 -> jmp done
Loop1:
    xorq %rcx, %rcx         # i = %rcx = 0
Loop2:
    rrmovq %rcx, %rdx       # %rdx = i
    addq %rdx, %rdx         # %rdx = 2 * i
    addq %rdx, %rdx         # %rdx = 4 * i
    addq %rdx, %rdx         # %rdx = 8 * i
    addq %rdi, %rdx         # %rdx = data + 8 * i 
    mrmovq (%rdx), %r10     # %r10 = data[i]
    addq %r9, %rdx          # %rdx = data + 8 * i + 8
    mrmovq (%rdx), %rbx     # %rbx = data[i + 1]
    rrmovq %rbx, %r11       # %r11 = data[i + 1]
    subq %r10, %rbx         # %rbx = data[i + 1] - data[i]
    ######################### 删除部分 #################################
    ##jge Test1             # data[i + 1] - data[i] > 0 -> do nothing##
    ##rmmovq %r10, (%rdx)   # data[i + 1] = data[i]                  ##
    ##subq %r9, %rdx        # %rdx = %rdx - 8                        ##
    ##rmmovq %r11, (%rdx)   # data[i] = data[i + 1]                  ##
    ###################################################################

    ########################## 新增部分 ######################################
    cmovge %r12, %rbx       # data[i + 1] < data[i] -> %rbx = data[i + 1] - data[i] 否则 %rbx = 0
    subq %rbx, %r11         # %r11 = data[i+1] < data[i] : data[i] : data[i+1]
    rmmovq %r11, (%rdx)     # data[i+1] = %r11
    subq %r9, %rdx          # %rdx = %rdx - 8
    addq %rbx, %r10         # %r10 = data[i+1] < data[i] : data[i] : data[i+1]
    rmmovq %r10, (%rdx)     # data[i] = %r10
    #########################################################################
Test1:
    addq %r8, %rcx          # i++
    rrmovq %rcx, %r12       # %r12 = %rcx = i
    subq %rax, %r12         # i - last      
    jl Loop2                # i < last -> jmp Loop2
    subq %r8, %rax          # last--                        
    jg Loop1                # last > 0 -> jmp Loop1
Done:
    ret                     # Return

4.50

完整的 ys 文件:

# 从地址 0 开始执行
    .pos 0
    irmovq stack, %rsp      # Set up stack pointer
    call main               # Execute main program
    halt                    # Terminate program 

# 跳转表
    .align 8
table:  
    .quad 0x00000000015e
    .quad 0x00000000017f
    .quad 0x000000000169
    .quad 0x000000000174
    .quad 0x00000000017f
    .quad 0x000000000169
    .quad 0x00000000017f

# 主函数
main:
    irmovq $3,%rdi
    call switchv            # switchv(3)
    ret

    .pos 0x100
# long switchv(long idx)
# idx in %rdi
# 地址:0x100
switchv:
    irmovq 0xaaa, %r8       # %r8 = 0xaaa
    irmovq 0xbbb, %r9       # %r9 = 0xbbb
    irmovq 0xccc, %r10      # %r10 = 0xccc
    irmovq 0xddd, %r11      # %r11 = 0x‭ddd
    irmovq $5, %r12            # %r12 = 5
    irmovq table, %r13      # %r13 = table
    rrmovq %rdi, %rdx
    subq %r12, %rdx         # idx - 5
    jg default              # idx > 5 -> jmp default
    addq %rdi, %rdi         # idx = 2 * idx
    addq %rdi, %rdi         # idx = 4 * idx
    addq %rdi, %rdi         # idx = 8 * idx
    addq %rdi, %r13         # %r13 = table + 8 * idx
    mrmovq (%r13), %r13
    pushq %r13
    ret
# 地址:0x15e
    rrmovq %r8, %rax
    jmp Done
# 地址:0x169
    rrmovq %r9, %rax
    jmp Done
# 地址:0x174
    rrmovq %r10, %rax
    jmp Done
# 地址:0x17f
default:
    rrmovq %r11, %rax
Done:
    ret

# 栈从 0x200 向低地址生长
    .pos 0x200
stack:

输出:

Changed Register State:
%rax:   0x0000000000000000  0x0000000000000ccc

4.51

参照图 4-18:

阶段 iaddq V, rB
取指 icode:ifun <- M1[PC]
rA:rB <- M1[PC+1]
valC <- M8[PC+2]
valP <- PC + 10
译码 valB <- R[rB]
执行 valE <- valB + valC
访存
写回 R[rB] <- valE
更新PC PC <- valP

4.52

根据上题修改 /sim/seq-full.hcl,先复制一个备份文件 seq-full-backup.hcl

USER@NAME:~/sim/seq# diff -u seq-full-backup.hcl seq-full.hcl
--- seq-full-backup.hcl 2014-06-23 22:01:01.000000000 +0800
+++ seq-full.hcl    2018-08-22 23:04:46.906999999 +0800
@@ -106,16 +106,16 @@

 bool instr_valid = icode in 
    { INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
-          IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ };
+          IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ, IIADDQ };

 # Does fetched instruction require a regid byte?
 bool need_regids =
    icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ, 
-            IIRMOVQ, IRMMOVQ, IMRMOVQ };
+            IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ };

 # Does fetched instruction require a constant word?
 bool need_valC =
-   icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL };
+   icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL, IIADDQ };

 ################ Decode Stage    ###################################

@@ -128,7 +128,7 @@

 ## What register should be used as the B source?
 word srcB = [
-   icode in { IOPQ, IRMMOVQ, IMRMOVQ  } : rB;
+   icode in { IOPQ, IRMMOVQ, IMRMOVQ, IIADDQ } : rB;
    icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
    1 : RNONE;  # Don't need register
 ];
@@ -136,7 +136,7 @@
 ## What register should be used as the E destination?
 word dstE = [
    icode in { IRRMOVQ } && Cnd : rB;
-   icode in { IIRMOVQ, IOPQ} : rB;
+   icode in { IIRMOVQ, IOPQ, IIADDQ} : rB;
    icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
    1 : RNONE;  # Don't write any register
 ];
@@ -152,7 +152,7 @@
 ## Select input A to ALU
 word aluA = [
    icode in { IRRMOVQ, IOPQ } : valA;
-   icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ } : valC;
+   icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : valC;
    icode in { ICALL, IPUSHQ } : -8;
    icode in { IRET, IPOPQ } : 8;
    # Other instructions don't need ALU
@@ -161,7 +161,7 @@
 ## Select input B to ALU
 word aluB = [
    icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, 
-             IPUSHQ, IRET, IPOPQ } : valB;
+             IPUSHQ, IRET, IPOPQ, IIADDQ } : valB;
    icode in { IRRMOVQ, IIRMOVQ } : 0;
    # Other instructions don't need ALU
 ];
@@ -173,7 +173,7 @@
 ];

 ## Should the condition codes be updated?
-bool set_cc = icode in { IOPQ };
+bool set_cc = icode in { IOPQ, IIADDQ };

 ################ Memory Stage    ###################################

4.53

在我下载的 sim 文件中,此题的文件为 pipe-nobypass.hcl,先创建一个备份文件 pipe-nobypass-backup.hcl
1. 数据冒险
d_srcA in { e_dstE, M_dstM, M_dstE, W_dstM, W_dstE } ||
d_srcB in { e_dstE, M_dstM, M_dstE, W_dstM, W_dstE }
发生数据冒险,需要在 E 插入气泡并暂停 F 和 D,这对于加载/使用数据冒险同样适用,由此得到数据冒险的条件:

bool s_data_hazard =
  (
    (
      d_srcA != RNONE  &&
      d_srcA in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE }
    ) ||
    (
      d_srcB != RNONE  &&
      d_srcB in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE }
    )
  )

2. 处理ret
ret 时的条件与 pipe-full.hcl 中的一致,也与书上的触发条件一致:

bool s_ret = IRET in { D_icode, E_icode, M_icode }

3. 预测错误的分支
也与书上的触发条件、 pipe-full.hcl 中的一致,其条件为:

bool s_mispredicted = (E_icode == IJXX && !e_Cnd)

不考虑组合,得出流水线控制逻辑的动作:

条件 F D E M W
(Ⅰ)数据冒险 暂停 暂停 气泡 正常 正常
(Ⅱ)处理 ret 暂停 气泡 正常 正常 正常
(Ⅲ)预测错误的分支 正常 气泡 气泡 正常 正常

4. 上述条件的组合
让我们参照书上 图 4-67 来做:
数据冒险有三种基本情况(执行/使用、访存/使用、写回/使用),JXX 只有一种,ret 有三种

寄存器 ①执行/使用 ②访存/使用 ③写回/使用 ④JXX ⑤ret ⑥ret ⑦ret
W —— —— 写回 —— —— —— ——
M —— 访存 —— —— —— —— ret
E 执行 —— —— JXX —— ret 气泡
D 使用 使用 使用 —— ret 气泡 气泡

可行的组合:
①② 或 ①③:属于数据冒险基本情况的组合,同(Ⅰ)
①⑤:与书上组合B类似,组合结果也与其相同,即同(Ⅰ)
②③:同(Ⅰ)
②④:因为不选择分支,使用被取消,所以没有数据冒险,因此同(Ⅲ)
②⑤:与书上组合B类似,同(Ⅰ)
③④:同(Ⅲ)
③⑤:与书上组合B类似,同(Ⅰ)
④⑤:同书上组合A
① (或② 或③) ④⑤:与 ④⑤ 类似,同书上组合A

根据上述分析得出:

组合 数据冒险 处理ret 预测错误的分支 F D E M W
1 0 0 0 正常 正常 正常 正常 正常
2 0 0 1 正常 气泡 气泡 正常 正常
3 0 1 0 暂停 气泡 正常 正常 正常
4 1 0 0 暂停 暂停 气泡 正常 正常
5 0 1 1 暂停 气泡 气泡 正常 正常
6 1 0 1 正常 气泡 气泡 正常 正常
7 1 1 0 暂停 暂停 气泡 正常 正常
8 1 1 1 暂停 气泡 气泡 正常 正常

因此:
F:
bubble = 0
stall = (s_data_hazard || s_ret) && (!s_data_hazard || s_ret || !s_mispredicted)
D:
bubble = s_mispredicted || (s_ret && !s_data_hazard)
stall = s_data_hazard && !s_mispredicted
E:
bubble = s_data_hazard || s_mispredicted
stall = 0
M:
不变
E:
不变

修改 pipe-nobypass.hcl:

USER@NAME:~/sim/pipe# diff -u pipe-nobypass-backup.hcl pipe-nobypass.hcl
--- pipe-nobypass-backup.hcl    2014-12-29 23:08:40.000000000 +0800
+++ pipe-nobypass.hcl   2018-08-25 01:57:59.011000000 +0800
@@ -303,39 +303,43 @@
 ];

 ################ Pipeline Register Control #########################
 # Should I stall or inject a bubble into Pipeline Register F?
 # At most one of these can be true.
+#bool F_bubble = 0;
+#bool F_stall = (s_data_hazard || s_ret) && (!s_data_hazard || s_ret || !s_mispredicted);
 bool F_bubble = 0;
-bool F_stall =
-   # Modify the following to stall the update of pipeline register F
-   0 ||
-   # Stalling at fetch while ret passes through pipeline
-   IRET in { D_icode, E_icode, M_icode };
+bool F_stall = 
+   (((d_srcA != RNONE && d_srcA in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE }) || 
+   (d_srcB != RNONE  &&d_srcB in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE })) || 
+   IRET in { D_icode, E_icode, M_icode }) && 
+   (!((d_srcA != RNONE && d_srcA in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE }) || 
+   (d_srcB != RNONE  &&d_srcB in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE })) || 
+   IRET in { D_icode, E_icode, M_icode } ||
+   !(E_icode == IJXX && !e_Cnd));

 # Should I stall or inject a bubble into Pipeline Register D?
 # At most one of these can be true.
+#bool D_stall = s_data_hazard && !s_mispredicted;
+#bool D_bubble = s_mispredicted || (s_ret && !s_data_hazard);
+bool D_bubble = 
+   (E_icode == IJXX && !e_Cnd) || 
+   (IRET in { D_icode, E_icode, M_icode } && 
+   !((d_srcA != RNONE && d_srcA in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE }) || 
+   (d_srcB != RNONE  &&d_srcB in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE })));
 bool D_stall = 
-   # Modify the following to stall the instruction in decode
-   0;
-
-bool D_bubble =
-   # Mispredicted branch
-   (E_icode == IJXX && !e_Cnd) ||
-   # Stalling at fetch while ret passes through pipeline
-   !(E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_srcB }) &&
-   # but not condition for a generate/use hazard
-   !0 &&
-     IRET in { D_icode, E_icode, M_icode };
+   ((d_srcA != RNONE && d_srcA in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE }) || 
+   (d_srcB != RNONE  &&d_srcB in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE })) &&
+   !(E_icode == IJXX && !e_Cnd);

 # Should I stall or inject a bubble into Pipeline Register E?
 # At most one of these can be true.
+#bool E_stall = 0;
+#bool E_bubble = s_data_hazard || s_mispredicted;
 bool E_stall = 0;
-bool E_bubble =
-   # Mispredicted branch
-   (E_icode == IJXX && !e_Cnd) ||
-   # Modify the following to inject bubble into the execute stage
-   0;
+bool E_bubble = 
+   ((d_srcA != RNONE && d_srcA in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE }) || 
+   (d_srcB != RNONE  &&d_srcB in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE })) ||
+   (E_icode == IJXX && !e_Cnd);

 # Should I stall or inject a bubble into Pipeline Register M?
 # At most one of these can be true.

参照本文开始处的指南,所有测试均通过

4.54

4.51 题的图:

阶段 iaddq V, rB
取指 icode:ifun <- M1[PC]
rA:rB <- M1[PC+1]
valC <- M8[PC+2]
valP <- PC + 10
译码 valB <- R[rB]
执行 valE <- valB + valC
访存
写回 R[rB] <- valE
更新PC PC <- valP

修改 pipe-full.hcl 文件并测试:

USER@NAME:~/sim/pipe# make clean; make psim VERSION=full
rm -f psim pipe-*.c *.o *.exe *~ 
# Building the pipe-full.hcl version of PIPE
../misc/hcl2c -n pipe-full.hcl < pipe-full.hcl > pipe-full.c
gcc -Wall -O2  -I../misc  -o psim psim.c pipe-full.c \
    ../misc/isa.c  -lm
USER@NAME:~/sim/pipe# cd ../ptest/
USER@NAME:~/sim/ptest# make SIM=../pipe/psim TFLAGS=-i
./optest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
  All 58 ISA Checks Succeed
./jtest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
  All 96 ISA Checks Succeed
./ctest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
  All 22 ISA Checks Succeed
./htest.pl -s ../pipe/psim -i
Simulating with ../pipe/psim
  All 756 ISA Checks Succeed
USER@NAME:~/sim/ptest# cd ../pipe
USER@NAME:~/sim/pipe# diff -u pipe-full-backup.hcl pipe-full.hcl 
--- pipe-full-backup.hcl    2014-12-29 23:08:40.000000000 +0800
+++ pipe-full.hcl   2018-08-26 02:45:01.228184132 +0800
@@ -158,7 +158,7 @@
 # Is instruction valid?
 bool instr_valid = f_icode in 
    { INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
-     IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ };
+     IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ, IIADDQ };

 # Determine status code for fetched instruction
 word f_stat = [
@@ -171,11 +171,11 @@
 # Does fetched instruction require a regid byte?
 bool need_regids =
    f_icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ, 
-            IIRMOVQ, IRMMOVQ, IMRMOVQ };
+            IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ };

 # Does fetched instruction require a constant word?
 bool need_valC =
-   f_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL };
+   f_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL, IIADDQ };

 # Predict next value of PC
 word f_predPC = [
@@ -195,14 +195,14 @@

 ## What register should be used as the B source?
 word d_srcB = [
-   D_icode in { IOPQ, IRMMOVQ, IMRMOVQ  } : D_rB;
+   D_icode in { IOPQ, IRMMOVQ, IMRMOVQ, IIADDQ  } : D_rB;
    D_icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
    1 : RNONE;  # Don't need register
 ];

 ## What register should be used as the E destination?
 word d_dstE = [
-   D_icode in { IRRMOVQ, IIRMOVQ, IOPQ} : D_rB;
+   D_icode in { IRRMOVQ, IIRMOVQ, IOPQ, IIADDQ} : D_rB;
    D_icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
    1 : RNONE;  # Don't write any register
 ];
@@ -239,7 +239,7 @@
 ## Select input A to ALU
 word aluA = [
    E_icode in { IRRMOVQ, IOPQ } : E_valA;
-   E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ } : E_valC;
+   E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : E_valC;
    E_icode in { ICALL, IPUSHQ } : -8;
    E_icode in { IRET, IPOPQ } : 8;
    # Other instructions don't need ALU
@@ -248,7 +248,7 @@
 ## Select input B to ALU
 word aluB = [
    E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, 
-            IPUSHQ, IRET, IPOPQ } : E_valB;
+            IPUSHQ, IRET, IPOPQ, IIADDQ } : E_valB;
    E_icode in { IRRMOVQ, IIRMOVQ } : 0;
    # Other instructions don't need ALU
 ];
@@ -260,7 +260,7 @@
 ];

 ## Should the condition codes be updated?
-bool set_cc = E_icode == IOPQ &&
+bool set_cc = E_icode in { IOPQ, IIADDQ }  &&
    # State changes only during normal operation
    !m_stat in { SADR, SINS, SHLT } && !W_stat in { SADR, SINS, SHLT };

4.55

做这个题时可以边看 图4-52 流水线化的最终实现
需注意 Cnd 表示的是选择分支(take branch),可以从 hcl 文件中的定义看出:boolsig M_Cnd 'ex_mem_curr->takebranch' # Condition flag
主要的思路就是让 “Sel+Fwd A” 在无条件转移时选择 D_valP,在条件转移时选择 D_valC;这样的话 valC 就会传递到 M_valA,从而使得“Select PC”可以(通过M_valA)接收到 ValC,以便当预测失败时使用 valC 更新 PC;

USER@NAME:~/sim/pipe# diff -u pipe-nt-backup.hcl pipe-nt.hcl
--- pipe-nt-backup.hcl  2014-12-29 23:08:40.000000000 +0800
+++ pipe-nt.hcl 2018-08-26 01:35:40.452673831 +0800
@@ -80,9 +80,11 @@

 ##### Pipeline Register D ##########################################
 wordsig D_icode 'if_id_curr->icode'   # Instruction code
+wordsig D_ifun 'if_id_curr->ifun'  # 译码阶段的 ifun
 wordsig D_rA 'if_id_curr->ra'       # rA field from instruction
 wordsig D_rB 'if_id_curr->rb'       # rB field from instruction
 wordsig D_valP 'if_id_curr->valp'     # Incremented PC
+wordsig D_valC 'if_id_curr->valc'  # 译码阶段的 valC

 ##### Intermediate Values in Decode Stage  #########################

@@ -139,7 +141,7 @@
 ## What address should instruction be fetched at
 word f_pc = [
    # Mispredicted branch.  Fetch at incremented PC
-   M_icode == IJXX && !M_Cnd : M_valA;
+   M_icode == IJXX && M_ifun != UNCOND && M_Cnd : M_valA;
    # Completion of RET instruction
    W_icode == IRET : W_valM;
    # Default: Use predicted value of PC
@@ -183,7 +185,8 @@
 # Predict next value of PC
 word f_predPC = [
    # BNT: This is where you'll change the branch prediction rule
-   f_icode in { IJXX, ICALL } : f_valC;
+   # 无条件转移
+   f_icode in { IJXX, ICALL } && f_ifun == UNCOND : f_valC;
    1 : f_valP;
 ];

@@ -220,7 +223,11 @@
 ## What should be the A value?
 ## Forward into decode stage for valA
 word d_valA = [
-   D_icode in { ICALL, IJXX } : D_valP; # Use incremented PC
+   # 无条件转移
+   D_icode in { IJXX, ICALL } && D_ifun == UNCOND : D_valP;
+   # 条件转移
+   D_icode == IJXX : D_valC;
+   #D_icode in { ICALL, IJXX } : D_valP; # Use incremented PC
    d_srcA == e_dstE : e_valE;    # Forward valE from execute
    d_srcA == M_dstM : m_valM;    # Forward valM from memory
    d_srcA == M_dstE : M_valE;    # Forward valE from memory
@@ -343,7 +350,7 @@

 bool D_bubble =
    # Mispredicted branch
-   (E_icode == IJXX && !e_Cnd) ||
+   (E_icode == IJXX && E_ifun != UNCOND && e_Cnd) ||
    # Stalling at fetch while ret passes through pipeline
    # but not condition for a load/use hazard
    !(E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_srcB }) &&
@@ -354,7 +361,7 @@
 bool E_stall = 0;
 bool E_bubble =
    # Mispredicted branch
-   (E_icode == IJXX && !e_Cnd) ||
+   (E_icode == IJXX && E_ifun != UNCOND && e_Cnd) ||
    # Conditions for a load/use hazard
    E_icode in { IMRMOVQ, IPOPQ } &&
     E_dstM in { d_srcA, d_srcB};

4.56

思路和上题有异曲同工之处;valP 是送往“Sel+Fwd A”,之后会到达 M_valM;而 valC 送往 ALUA ,之后会到达 M_valE;只需比较 M_valM 和 M_valE 就知道是前向分支还是后向;然后根据 Cnd 的值修改 pc;实际上是增加了一条转发路径;

USER@NAME:~/sim/pipe# diff -u pipe-btfnt-backup.hcl pipe-btfnt.hcl
--- pipe-btfnt-backup.hcl   2014-12-29 23:08:40.000000000 +0800
+++ pipe-btfnt.hcl  2018-08-26 22:31:56.452823164 +0800
@@ -83,6 +83,8 @@
 wordsig D_rA 'if_id_curr->ra'       # rA field from instruction
 wordsig D_rB 'if_id_curr->rb'       # rB field from instruction
 wordsig D_valP 'if_id_curr->valp'     # Incremented PC
+wordsig D_valC 'if_id_curr->valc'   # 译码阶段的 valC
+wordsig D_ifun 'if_id_curr->ifun'  # 译码阶段的 ifun

 ##### Intermediate Values in Decode Stage  #########################

@@ -138,8 +140,11 @@

 ## What address should instruction be fetched at
 word f_pc = [
-   # Mispredicted branch.  Fetch at incremented PC
-   M_icode == IJXX && !M_Cnd : M_valA;
+   # 后向分支预测错误
+   M_icode == IJXX && M_ifun != UNCOND && M_valE < M_valA && !M_Cnd : M_valA;
+   # 前向分支预测错误
+   M_icode == IJXX && M_ifun != UNCOND && M_valE > M_valA && M_Cnd : M_valE;
    # Completion of RET instruction
    W_icode == IRET : W_valM;
    # Default: Use predicted value of PC
@@ -183,13 +188,14 @@
 # Predict next value of PC
 word f_predPC = [
    # BBTFNT: This is where you'll change the branch prediction rule
+   f_icode == IJXX && f_ifun != UNCOND && f_valC < f_valP : f_valC;
+   f_icode == IJXX && f_ifun != UNCOND && f_valC > f_valP : f_valP;
    f_icode in { IJXX, ICALL } : f_valC;
    1 : f_valP;
 ];

 ################ Decode Stage ######################################

 ## What register should be used as the A source?
 word d_srcA = [
    D_icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ  } : D_rA;
@@ -247,7 +253,8 @@
 ## Select input A to ALU
 word aluA = [
    E_icode in { IRRMOVQ, IOPQ } : E_valA;
-   E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ } : E_valC;
+   E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ } || 
+       E_icode == IJXX : E_valC;
    E_icode in { ICALL, IPUSHQ } : -8;
    E_icode in { IRET, IPOPQ } : 8;
    # Other instructions don't need ALU
@@ -257,7 +264,8 @@
 word aluB = [
    E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, 
             IPUSHQ, IRET, IPOPQ } : E_valB;
-   E_icode in { IRRMOVQ, IIRMOVQ } : 0;
+   E_icode in { IRRMOVQ, IIRMOVQ } || 
+       E_icode == IJXX  : 0;
    # Other instructions don't need ALU
 ];

@@ -343,7 +351,8 @@

 bool D_bubble =
    # Mispredicted branch
-   (E_icode == IJXX && !e_Cnd) ||
+   ((E_icode == IJXX && E_ifun != UNCOND && E_valC < E_valA && !e_Cnd) ||
+   (E_icode == IJXX && E_ifun != UNCOND && E_valC > E_valA && e_Cnd)) ||
    # BBTFNT: This condition will change
    # Stalling at fetch while ret passes through pipeline
    # but not condition for a load/use hazard
@@ -355,7 +364,8 @@
 bool E_stall = 0;
 bool E_bubble =
    # Mispredicted branch
-   (E_icode == IJXX && !e_Cnd) ||
+   ((E_icode == IJXX && E_ifun != UNCOND && E_valC < E_valA && !e_Cnd) ||
+   (E_icode == IJXX && E_ifun != UNCOND && E_valC > E_valA && e_Cnd)) ||
    # BBTFNT: This condition will change
    # Conditions for a load/use hazard
    E_icode in { IMRMOVQ, IPOPQ } &&

4.57

A.
书上加载使用冒险的条件:

E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_srcB };
情况 1 2 3 4
E_dsM == d_srcA 1 1 0 0
E_dsM == d_srcB 1 0 1 0

情况 1 2 3 会发生加载使用冒险,因为 rB 会在执行阶段访问,所以情况 1 3 是不能通过加载转发解决的;
只需考虑情况 2 —— E_dsM == d_srcA,对于所有指令,在访存阶段才使用 valA 的只有 rmmovq 和 pushq,因此得出下列条件:

E_icode in { IMRMOVQ, IPOPQ } &&
(
  E_dstM == d_srcB ||
  (
    E_dstM == d_srcA && !(D_icode in { IRMMOVQ, IPUSHQ })
  )
);

B.
修改一下 e_valA 的值,再修改暂停和气泡的条件就行了:

USER@NAME:~/sim/pipe# diff -u pipe-lf-backup.hcl pipe-lf.hcl
--- pipe-lf-backup.hcl  2014-12-29 23:08:40.000000000 +0800
+++ pipe-lf.hcl 2018-08-30 02:05:59.609000000 +0800
@@ -271,6 +271,7 @@
 ##   from memory stage when appropriate
 ## Here it is set to the default used in the normal pipeline
 word e_valA = [
+   E_icode in { IRMMOVQ, IPUSHQ } && E_srcA == M_dstM : m_valM;
    1 : E_valA;  # Use valA from stage pipe register
 ];

@@ -329,7 +330,13 @@
 bool F_stall =
    # Conditions for a load/use hazard
    ## Set this to the new load/use condition
-   0 ||
+   E_icode in { IMRMOVQ, IPOPQ } &&
+   (
+       E_dstM == d_srcB ||
+       (
+       E_dstM == d_srcA && !(D_icode in { IRMMOVQ, IPUSHQ })
+       )
+   ) ||
    # Stalling at fetch while ret passes through pipeline
    IRET in { D_icode, E_icode, M_icode };

@@ -338,14 +345,26 @@
 bool D_stall = 
    # Conditions for a load/use hazard
    ## Set this to the new load/use condition
-   0; 
+   E_icode in { IMRMOVQ, IPOPQ } &&
+   (
+       E_dstM == d_srcB ||
+       (
+       E_dstM == d_srcA && !(D_icode in { IRMMOVQ, IPUSHQ })
+       )
+   ); 

 bool D_bubble =
    # Mispredicted branch
    (E_icode == IJXX && !e_Cnd) ||
    # Stalling at fetch while ret passes through pipeline
    # but not condition for a load/use hazard
-   !(E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_srcB }) &&
+   !(E_icode in { IMRMOVQ, IPOPQ } &&
+   (
+       E_dstM == d_srcB ||
+       (
+       E_dstM == d_srcA && !(D_icode in { IRMMOVQ, IPUSHQ })
+       )
+   )) &&
      IRET in { D_icode, E_icode, M_icode };

 # Should I stall or inject a bubble into Pipeline Register E?
@@ -356,7 +375,13 @@
    (E_icode == IJXX && !e_Cnd) ||
    # Conditions for a load/use hazard
    ## Set this to the new load/use condition
-   0;
+   E_icode in { IMRMOVQ, IPOPQ } &&
+   (
+       E_dstM == d_srcB ||
+       (
+       E_dstM == d_srcA && !(D_icode in { IRMMOVQ, IPUSHQ })
+       )
+   );

 # Should I stall or inject a bubble into Pipeline Register M?
 # At most one of these can be true.

4.58

很简单

USER@NAME:~/sim/pipe# diff -u pipe-1w-backup.hcl pipe-1w.hcl 
--- pipe-1w-backup.hcl  2014-12-29 23:08:40.000000000 +0800
+++ pipe-1w.hcl 2018-08-30 04:02:30.477000000 +0800
@@ -157,6 +157,7 @@
 ## so that it will be IPOP2 when fetched for second time.
 word f_icode = [
    imem_error : INOP;
+   D_icode == IPOPQ : IPOP2;
    1: imem_icode;
 ];

@@ -169,7 +170,7 @@
 # Is instruction valid?
 bool instr_valid = f_icode in 
    { INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
-     IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ };
+     IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ, IPOP2 };

 # Determine status code for fetched instruction
 word f_stat = [
@@ -182,7 +183,7 @@
 # Does fetched instruction require a regid byte?
 bool need_regids =
    f_icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ, 
-            IIRMOVQ, IRMMOVQ, IMRMOVQ };
+            IIRMOVQ, IRMMOVQ, IMRMOVQ, IPOP2 };

 # Does fetched instruction require a constant word?
 bool need_valC =
@@ -192,6 +193,7 @@
 word f_predPC = [
    f_icode in { IJXX, ICALL } : f_valC;
    ## 1W: Want to refetch popq one time
+   f_icode == IPOPQ : f_pc;
    1 : f_valP;
 ];

@@ -204,14 +206,14 @@
 ## What register should be used as the A source?
 word d_srcA = [
    D_icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ  } : D_rA;
-   D_icode in { IPOPQ, IRET } : RRSP;
+   D_icode in { IRET } : RRSP;
    1 : RNONE; # Don't need register
 ];

 ## What register should be used as the B source?
 word d_srcB = [
    D_icode in { IOPQ, IRMMOVQ, IMRMOVQ  } : D_rB;
-   D_icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
+   D_icode in { IPUSHQ, IPOPQ, ICALL, IRET, IPOP2 } : RRSP;
    1 : RNONE;  # Don't need register
 ];

@@ -224,7 +226,7 @@

 ## What register should be used as the M destination?
 word d_dstM = [
-   D_icode in { IMRMOVQ, IPOPQ } : D_rA;
+   D_icode in { IMRMOVQ, IPOP2 } : D_rA;
    1 : RNONE;  # Don't write any register
 ];

@@ -255,7 +257,7 @@
 word aluA = [
    E_icode in { IRRMOVQ, IOPQ } : E_valA;
    E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ } : E_valC;
-   E_icode in { ICALL, IPUSHQ } : -8;
+   E_icode in { ICALL, IPUSHQ, IPOP2 } : -8;
    E_icode in { IRET, IPOPQ } : 8;
    # Other instructions don't need ALU
 ];
@@ -263,7 +265,7 @@
 ## Select input B to ALU
 word aluB = [
    E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, 
-            IPUSHQ, IRET, IPOPQ } : E_valB;
+            IPUSHQ, IRET, IPOPQ, IPOP2 } : E_valB;
    E_icode in { IRRMOVQ, IIRMOVQ } : 0;
    # Other instructions don't need ALU
 ];
@@ -292,13 +294,13 @@

 ## Select memory address
 word mem_addr = [
-   M_icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ } : M_valE;
-   M_icode in { IPOPQ, IRET } : M_valA;
+   M_icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ, IPOP2 } : M_valE;
+   M_icode in { IRET } : M_valA;
    # Other instructions don't need address
 ];

 ## Set read control signal
-bool mem_read = M_icode in { IMRMOVQ, IPOPQ, IRET };
+bool mem_read = M_icode in { IMRMOVQ, IPOP2, IRET };

 ## Set write control signal
 bool mem_write = M_icode in { IRMMOVQ, IPUSHQ, ICALL };
@@ -350,7 +352,7 @@
 bool F_bubble = 0;
 bool F_stall =
    # Conditions for a load/use hazard
-   E_icode in { IMRMOVQ, IPOPQ } &&
+   E_icode in { IMRMOVQ, IPOP2 } &&
     E_dstM in { d_srcA, d_srcB } ||
    # Stalling at fetch while ret passes through pipeline
    IRET in { D_icode, E_icode, M_icode };
@@ -359,7 +361,7 @@
 # At most one of these can be true.
 bool D_stall = 
    # Conditions for a load/use hazard
-   E_icode in { IMRMOVQ, IPOPQ } &&
+   E_icode in { IMRMOVQ, IPOP2 } &&
     E_dstM in { d_srcA, d_srcB };

 bool D_bubble =
@@ -367,7 +369,7 @@
    (E_icode == IJXX && !e_Cnd) ||
    # Stalling at fetch while ret passes through pipeline
    # but not condition for a load/use hazard
-   !(E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_srcB }) &&
+   !(E_icode in { IMRMOVQ, IPOP2 } && E_dstM in { d_srcA, d_srcB }) &&
    # 1W: This condition will change
      IRET in { D_icode, E_icode, M_icode };

@@ -378,7 +380,7 @@
    # Mispredicted branch
    (E_icode == IJXX && !e_Cnd) ||
    # Conditions for a load/use hazard
-   E_icode in { IMRMOVQ, IPOPQ } &&
+   E_icode in { IMRMOVQ, IPOP2 } &&
     E_dstM in { d_srcA, d_srcB};

 # Should I stall or inject a bubble into Pipeline Register M?

4.59

# 4.47
    jge Test1               # data[i + 1] - data[i] > 0 -> do nothing
    rmmovq %r10, (%rdx)   # data[i + 1] = data[i]                    
    subq %r9, %rdx      # %rdx = %rdx - 8                        
    rmmovq %r11, (%rdx) # data[i] = data[i + 1]                  
#######################################################################
# 4.48
    rrmovq %r11, %r12       # %r12 = data[i + 1]
    rrmovq %r10, %r13       # %r13 = data[i]
    cmovl %r10, %r12        # data[i + 1] < data[i] -> %r12 = data[i]
    cmovl %r11, %r13        # data[i + 1] < data[i] -> %r13 = data[i + 1]
    rmmovq %r12, (%rdx)     # data[i + 1] = %r12
    subq %r9, %rdx          # %rdx = %rdx - 8
    rmmovq %r13, (%rdx)     # data[i] = %r13
####################################################################### 
# 4.49
    cmovge %r12, %rbx       # data[i + 1] < data[i] -> %rbx = data[i + 1] - data[i] 否则 %rbx = 0
    subq %rbx, %r11         # %r11 = data[i+1] < data[i] : data[i] : data[i+1]
    rmmovq %r11, (%rdx)     # data[i+1] = %r11
    subq %r9, %rdx          # %rdx = %rdx - 8
    addq %rbx, %r10         # %r10 = data[i+1] < data[i] : data[i] : data[i+1]
    rmmovq %r10, (%rdx)     # data[i] = %r10

设 data[i + 1] > data[i] 的几率为 50%
平均:
三者分别执行的指令数 2.5, 7, 6
三者分别插入的气泡 1, 0, 0
三者分别需要的时钟周期 3.5, 7, 6

显然 4.47 性能更好

猜你喜欢

转载自blog.csdn.net/one_of_a_kind/article/details/81836111