;this file holds code for speed testing (note: smaller #s are faster!!) c1 macro ;place code here inc esi inc esi endm c2 macro ;place alternate code here add esi,2 endm COMMENT ! The following are examples it's tested. c1 macro ;7 cylces xor cx,cx dec cx endm c2 macro mov cx,-1 ;5 cycles ;faster endm c1 macro xor ebx,ebx ;535 (faster) mov bl,al endm ;This is the 2nd code to be compared c2 macro movzx ebx,al ;753 endm ;This is the 1st code to be compared c1 macro xor ebx,ebx ;640 (faster) mov bx,ax endm ;This is the 2nd code to be compared c2 macro movzx ebx,ax ;747 endm c1 macro push eax push ebx push ecx push edx push esi push edi push ebp push esp xor eax,eax pop esp pop ebp pop edi pop esi pop edx pop ecx pop ebx pop eax endm c2 macro pushad xor eax,eax popad endm ;Finding #1 loop vs. dec/jnz c1 macro ;5647 (faster) mov ecx,10 t1: dec ecx jnz t1 endm c2 macro ;9193 mov ecx,10 t2: loop t2 endm ;Finding #2 mov vs. xor (clearing) c1 macro ;same mov ecx,0 endm c2 macro ;same ? - (only because of caching) - once I disable cache xor ecx,ecx ;the proper anwser should arise endm ;Finding #3 xor vs. movzx (word) c1 macro ;640 (faster) xor eax,eax xor ebx,ebx mov bx,ax endm c2 macro ;870 xor eax,eax movzx ebx,ax endm ;Finding #4 xor vs. movzx (byte) c1 macro ;533 (faster) xor eax,eax xor ebx,ebx mov bl,al endm c2 macro ;833 xor eax,eax movzx ebx,al endm ;Finding #5 imul vs. lea c1 macro mov eax,2 ;1386 imul eax,3 endm c2 macro mov eax,2 ;640 (faster) lea eax,[eax+eax*2] endm ;Finding #6 Pipeline cache c1 macro ; These are both identical but I was testing out cld ; my new Pipeline cache mov esi,offset junk ; w/o Pipeline = 20170 mov edi,offset junk ; w/ Pipeline = 15486 (23% faster) mov ecx,0ffffh rep movsb endm c2 macro cld mov esi,offset junk mov edi,offset junk mov ecx,0ffffh rep movsb endm .data junk db 10000h dup (12h) .code !