1 2 .globl _start 3 4 _start: 5 # This code tests for the fldcw "load floating point command word" 6 # instruction. On most x86 processors the retired_instruction 7 # performance counter counts this as one instruction. However, 8 # on Pentium 4 systems it counts as two. Therefore this can 9 # affect BBV results on such a system. 10 # fldcw is most often used to set the rouding mode when doing 11 # floating point to integer conversions 12 13 # It is encoded as "d9 /5" which means 14 # 1101 1001 xx10 1yyy 15 # Where xx is the "mod" which will be 00, 01, or 10 indicating offset 16 # and yyy is the register field 17 18 # these are instructions with similar encodings to fldcw 19 # that can cause false positives if the test isn't explicit enough 20 similar: 21 fld1 # d9 e8 22 fldl2t # d9 e9 23 fldl2e # d9 ea 24 fldpi # d9 eb 25 fldlg2 # d9 ec 26 fldln2 # d9 ed 27 fldz # d9 ee 28 29 # check some varied ways of calling fldcw 30 31 # offset on stack 32 stack: 33 sub $8,%rsp # allocate space on stack 34 fnstcw 2(%rsp) 35 fldcw 2(%rsp) 36 add $8,%rsp # restore stack 37 38 # 64-bit register 39 sixtyfour_reg: 40 fnstcw cw 41 mov $cw,%rax 42 fldcw 0(%rax) # rax 43 mov $cw,%rbx 44 fldcw 0(%rbx) # rbx 45 mov $cw,%rcx 46 fldcw 0(%rcx) # rcx 47 mov $cw,%rdx 48 fldcw 0(%rdx) # rdx 49 50 # 32-bit register 51 52 # Note! The assembler that comes with SuSE 9.1 53 # cannot assemble 32-bit fldcw on 64-bit systems 54 # Hence the need to hand-code them 55 56 57 thirtytwo_reg: 58 fnstcw cw 59 mov $cw,%eax 60 61 # fldcw 0(%eax) # eax 62 .byte 0x67,0xd9,0x28 63 64 mov $cw,%ebx 65 66 # fldcw 0(%ebx) # ebx 67 .byte 0x67,0xd9,0x2b 68 69 mov $cw,%ecx 70 71 # fldcw 0(%ecx) # ecx 72 .byte 0x67,0xd9,0x29 73 74 mov $cw,%edx 75 76 # fldcw 0(%edx) # edx 77 .byte 0x67,0xd9,0x2a 78 79 # register + 8-bit offset 80 eight_bit: 81 mov $cw,%eax 82 sub $32,%eax 83 84 # fldcw 32(%eax) # eax + 8 bit offset 85 .byte 0x67,0xd9,0x68,0x20 86 87 mov %eax,%ebx 88 # fldcw 32(%ebx) # ebx + 8 bit offset 89 .byte 0x67,0xd9,0x6b,0x20 90 91 mov %eax,%ecx 92 93 # fldcw 32(%ecx) # ecx + 8 bit offset 94 .byte 0x67,0xd9,0x69,0x20 95 96 mov %eax,%edx 97 98 # fldcw 32(%edx) # edx + 8 bit offset 99 .byte 0x67,0xd9,0x6a,0x20 100 101 102 # register + 32-bit offset 103 thirtytwo_bit: 104 mov $cw,%eax 105 sub $30000,%eax 106 107 # fldcw 30000(%eax) # eax + 16 bit offset 108 .byte 0x67,0xd9,0xa8,0x30,0x75,0x00,0x00 109 110 mov %eax,%ebx 111 112 # fldcw 30000(%ebx) # ebx + 16 bit offset 113 .byte 0x67,0xd9,0xab,0x30,0x75,0x00,0x00 114 115 mov %eax,%ecx 116 117 # fldcw 30000(%ecx) # ecx + 16 bit offset 118 .byte 0x67,0xd9,0xa9,0x30,0x75,0x00,0x00 119 120 mov %eax,%edx 121 122 # fldcw 30000(%edx) # edx + 16 bit offset 123 .byte 0x67,0xd9,0xaa,0x30,0x75,0x00,0x00 124 125 # check an fp/integer conversion 126 # in a loop to give a bigger count 127 128 mov $1024,%rcx 129 big_loop: 130 131 fldl three # load value onto fp stack 132 fnstcw saved_cw # store control word to mem 133 movzwl saved_cw, %eax # load cw from mem, zero extending 134 movb $12, %ah # set cw for "round to zero" 135 movw %rax, cw # store back to memory 136 fldcw cw # save new rounding mode 137 fistpl result # save stack value as integer to mem 138 fldcw saved_cw # restore old cw 139 140 loop big_loop # loop to make the count more obvious 141 142 movl result, %ebx # sanity check to see if the 143 cmp $3,%rbx # result is the expected one 144 je exit 145 146 print_error: 147 mov $1,%rax # write syscall 148 mov $1,%rdi # stdout 149 mov $error,%rsi # string 150 mov $22,%rdx # length of string 151 syscall 152 153 exit: 154 xor %rdi, %rdi # return 0 155 mov $60, %rax # SYSCALL_EXIT 156 syscall 157 158 159 160 .data 161 saved_cw: .long 0 162 cw: .long 0 163 result: .long 0 164 three: .long 0 # a floating point 3.0 165 .long 1074266112 166 error: .asciz "Error! Wrong result!\n" 167