.686 ; create 32 bit code .mmx .xmm .model flat, stdcall ; 32 bit memory model option casemap :none ; case sensitive .code ; ml -c /omf ad.asm ; Microsoft (R) Macro Assembler Version 10.00.30319.01 ; Copyright (C) Microsoft Corporation. All rights reserved. _align16_zero_memory proc C _byte_num, _source option prologue:none, epilogue:none mov eax, [esp+4] ; - U load _byte_num push edi ; - V save old frame mov edi, [esp+12] ; - U load _source push ecx ; - V save old frame mov ecx, eax ; - U save old frame nop ; - V spare shr eax, 7 ; - U get round je _remain_byte_deal ; - V/N deal remain frame pxor xmm0, xmm0 ; get zero align 16 main_loop: movdqa [edi], xmm0 movdqa [edi+010h], xmm0 movdqa [edi+020h], xmm0 movdqa [edi+030h], xmm0 movdqa [edi+040h], xmm0 movdqa [edi+050h], xmm0 movdqa [edi+060h], xmm0 movdqa [edi+070h], xmm0 add edi, 128 nop dec eax jnz main_loop ; jnz opr ... maybe unsafe ... mov eax, ecx ; - U save old frame and ecx, 112 ; - V/N 0x0111 0000 shr ecx, 3 ; - U 0x0000 1110 0/2/4/6/ align 16 ; - V/N maybe jmp opcode/nop opcode _remain_byte_deal: pxor xmm0, xmm0 ; get zero lea edi, [edi-070h+ecx*8] ; - N jmp [RemainDQwordTable+ecx*4] ; - N align 16 SB0E: SB0F: movdqa [edi], xmm0 SB0C: SB0D: movdqa [edi+010h], xmm0 SB0A: SB0B: movdqa [edi+020h], xmm0 SB08: SB09: movdqa [edi+030h], xmm0 SB06: SB07: movdqa [edi+040h], xmm0 SB04: SB05: movdqa [edi+050h], xmm0 SB02: SB03: movdqa [edi+060h], xmm0 SB00: SB01: neg ecx and eax, 15 lea edi, [edi+070h+ecx*8] mov ecx, eax xor eax, eax cld rep stosb pop ecx pop edi ret RemainDQwordTable dd SB00, SB01, SB02, SB03, SB04, SB05, SB06, SB07 dd SB08, SB09, SB0A, SB0B, SB0C, SB0D, SB0E, SB0F _align16_zero_memory endp SetUpZeroMem proc C _byte_num, _source option prologue:none, epilogue:none mov eax, [esp+8] ; - U load source push edi ; - V save old frame mov edi, eax ; - U push ecx ; - V save old frame mov ecx, [esp+12] ; - U load num nop ; - V spare cmp ecx, 16 jb __miniature_copy and eax, 15 ; - U jmp [eax*4+_ByteTable] align 16 OP00: push edi push ecx call _align16_zero_memory add esp, 8 pop ecx pop edi ret OP01: ; 0000 0001 mov byte ptr [edi], 0 mov word ptr [edi+1], 0 mov dword ptr [edi+3], 0 mov dword ptr [edi+7], 0 mov dword ptr [edi+11], 0 add edi, 15 sub ecx, 15 jmp OP00 OP02: ; 0000 0002 mov word ptr [edi], 0 mov dword ptr [edi+2], 0 mov dword ptr [edi+6], 0 mov dword ptr [edi+10], 0 add edi, 14 sub ecx, 14 jmp OP00 OP03: ; 0000 0003 mov byte ptr [edi], 0 mov dword ptr [edi+1], 0 mov dword ptr [edi+5], 0 mov dword ptr [edi+9], 0 add edi, 13 sub ecx, 13 jmp OP00 OP04: ; 0000 0004 mov dword ptr [edi], 0 mov dword ptr [edi+4], 0 mov dword ptr [edi+8], 0 add edi, 12 sub ecx, 12 jmp OP00 OP05: ; 0000 0005 mov byte ptr [edi], 0 mov word ptr [edi+1], 0 mov dword ptr [edi+3], 0 mov dword ptr [edi+7], 0 add edi, 11 sub ecx, 11 jmp OP00 OP06: ; 0000 0006 mov word ptr [edi], 0 mov dword ptr [edi+2], 0 mov dword ptr [edi+6], 0 add edi, 10 sub ecx, 10 jmp OP00 OP07: ; 0000 0007 mov byte ptr [edi], 0 mov dword ptr [edi+1], 0 mov dword ptr [edi+5], 0 add edi, 9 sub ecx, 9 jmp OP00 OP08: mov dword ptr [edi], 0 mov dword ptr [edi+8], 0 sub ecx, 8 add edi, 8 jmp OP00 OP09: mov byte ptr [edi], 0 mov word ptr [edi+1], 0 mov dword ptr [edi+3], 0 sub ecx, 7 add edi, 7 jmp OP00 OP0A: mov word ptr [edi], 0 mov dword ptr [edi+2], 0 sub ecx, 6 add edi, 6 jmp OP00 OP0B: mov byte ptr [edi], 0 mov dword ptr [edi+1], 0 sub ecx, 5 add edi, 5 jmp OP00 OP0C: mov dword ptr [edi], 0 sub ecx, 4 add edi, 4 jmp OP00 OP0D: mov byte ptr [edi], 0 mov word ptr [edi+2], 0 sub ecx, 3 add edi, 3 jmp OP00 OP0E: mov word ptr [edi], 0 sub ecx, 2 add edi, 2 jmp OP00 OP0F: mov byte ptr [edi], 0 dec ecx inc edi jmp OP00 __miniature_copy: cld xor eax, eax rep stosb ; rep maybe rubbish pop ecx pop edi ret _ByteTable dd OP00, OP01, OP02, OP03, OP04, OP05, OP06, OP07 dd OP08, OP09, OP0A, OP0B, OP0C, OP0D, OP0E, OP0F SetUpZeroMem endp end
ZeroMemory SSE 实现
猜你喜欢
转载自xuling1993728.iteye.com/blog/2207891
今日推荐
周排行