Home | History | Annotate | Download | only in ia32
      1 // Copyright 2011 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 #include "v8.h"
     29 
     30 #if defined(V8_TARGET_ARCH_IA32)
     31 
     32 #include "codegen.h"
     33 
     34 namespace v8 {
     35 namespace internal {
     36 
     37 
     38 // -------------------------------------------------------------------------
     39 // Platform-specific RuntimeCallHelper functions.
     40 
     41 void StubRuntimeCallHelper::BeforeCall(MacroAssembler* masm) const {
     42   masm->EnterInternalFrame();
     43 }
     44 
     45 
     46 void StubRuntimeCallHelper::AfterCall(MacroAssembler* masm) const {
     47   masm->LeaveInternalFrame();
     48 }
     49 
     50 
     51 #define __ masm.
     52 
     53 static void MemCopyWrapper(void* dest, const void* src, size_t size) {
     54   memcpy(dest, src, size);
     55 }
     56 
     57 
     58 OS::MemCopyFunction CreateMemCopyFunction() {
     59   size_t actual_size;
     60   // Allocate buffer in executable space.
     61   byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB,
     62                                                  &actual_size,
     63                                                  true));
     64   if (buffer == NULL) return &MemCopyWrapper;
     65   MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
     66 
     67   // Generated code is put into a fixed, unmovable, buffer, and not into
     68   // the V8 heap. We can't, and don't, refer to any relocatable addresses
     69   // (e.g. the JavaScript nan-object).
     70 
     71   // 32-bit C declaration function calls pass arguments on stack.
     72 
     73   // Stack layout:
     74   // esp[12]: Third argument, size.
     75   // esp[8]: Second argument, source pointer.
     76   // esp[4]: First argument, destination pointer.
     77   // esp[0]: return address
     78 
     79   const int kDestinationOffset = 1 * kPointerSize;
     80   const int kSourceOffset = 2 * kPointerSize;
     81   const int kSizeOffset = 3 * kPointerSize;
     82 
     83   int stack_offset = 0;  // Update if we change the stack height.
     84 
     85   if (FLAG_debug_code) {
     86     __ cmp(Operand(esp, kSizeOffset + stack_offset),
     87            Immediate(OS::kMinComplexMemCopy));
     88     Label ok;
     89     __ j(greater_equal, &ok);
     90     __ int3();
     91     __ bind(&ok);
     92   }
     93   if (CpuFeatures::IsSupported(SSE2)) {
     94     CpuFeatures::Scope enable(SSE2);
     95     __ push(edi);
     96     __ push(esi);
     97     stack_offset += 2 * kPointerSize;
     98     Register dst = edi;
     99     Register src = esi;
    100     Register count = ecx;
    101     __ mov(dst, Operand(esp, stack_offset + kDestinationOffset));
    102     __ mov(src, Operand(esp, stack_offset + kSourceOffset));
    103     __ mov(count, Operand(esp, stack_offset + kSizeOffset));
    104 
    105 
    106     __ movdqu(xmm0, Operand(src, 0));
    107     __ movdqu(Operand(dst, 0), xmm0);
    108     __ mov(edx, dst);
    109     __ and_(edx, 0xF);
    110     __ neg(edx);
    111     __ add(Operand(edx), Immediate(16));
    112     __ add(dst, Operand(edx));
    113     __ add(src, Operand(edx));
    114     __ sub(Operand(count), edx);
    115 
    116     // edi is now aligned. Check if esi is also aligned.
    117     Label unaligned_source;
    118     __ test(Operand(src), Immediate(0x0F));
    119     __ j(not_zero, &unaligned_source);
    120     {
    121       // Copy loop for aligned source and destination.
    122       __ mov(edx, count);
    123       Register loop_count = ecx;
    124       Register count = edx;
    125       __ shr(loop_count, 5);
    126       {
    127         // Main copy loop.
    128         Label loop;
    129         __ bind(&loop);
    130         __ prefetch(Operand(src, 0x20), 1);
    131         __ movdqa(xmm0, Operand(src, 0x00));
    132         __ movdqa(xmm1, Operand(src, 0x10));
    133         __ add(Operand(src), Immediate(0x20));
    134 
    135         __ movdqa(Operand(dst, 0x00), xmm0);
    136         __ movdqa(Operand(dst, 0x10), xmm1);
    137         __ add(Operand(dst), Immediate(0x20));
    138 
    139         __ dec(loop_count);
    140         __ j(not_zero, &loop);
    141       }
    142 
    143       // At most 31 bytes to copy.
    144       Label move_less_16;
    145       __ test(Operand(count), Immediate(0x10));
    146       __ j(zero, &move_less_16);
    147       __ movdqa(xmm0, Operand(src, 0));
    148       __ add(Operand(src), Immediate(0x10));
    149       __ movdqa(Operand(dst, 0), xmm0);
    150       __ add(Operand(dst), Immediate(0x10));
    151       __ bind(&move_less_16);
    152 
    153       // At most 15 bytes to copy. Copy 16 bytes at end of string.
    154       __ and_(count, 0xF);
    155       __ movdqu(xmm0, Operand(src, count, times_1, -0x10));
    156       __ movdqu(Operand(dst, count, times_1, -0x10), xmm0);
    157 
    158       __ mov(eax, Operand(esp, stack_offset + kDestinationOffset));
    159       __ pop(esi);
    160       __ pop(edi);
    161       __ ret(0);
    162     }
    163     __ Align(16);
    164     {
    165       // Copy loop for unaligned source and aligned destination.
    166       // If source is not aligned, we can't read it as efficiently.
    167       __ bind(&unaligned_source);
    168       __ mov(edx, ecx);
    169       Register loop_count = ecx;
    170       Register count = edx;
    171       __ shr(loop_count, 5);
    172       {
    173         // Main copy loop
    174         Label loop;
    175         __ bind(&loop);
    176         __ prefetch(Operand(src, 0x20), 1);
    177         __ movdqu(xmm0, Operand(src, 0x00));
    178         __ movdqu(xmm1, Operand(src, 0x10));
    179         __ add(Operand(src), Immediate(0x20));
    180 
    181         __ movdqa(Operand(dst, 0x00), xmm0);
    182         __ movdqa(Operand(dst, 0x10), xmm1);
    183         __ add(Operand(dst), Immediate(0x20));
    184 
    185         __ dec(loop_count);
    186         __ j(not_zero, &loop);
    187       }
    188 
    189       // At most 31 bytes to copy.
    190       Label move_less_16;
    191       __ test(Operand(count), Immediate(0x10));
    192       __ j(zero, &move_less_16);
    193       __ movdqu(xmm0, Operand(src, 0));
    194       __ add(Operand(src), Immediate(0x10));
    195       __ movdqa(Operand(dst, 0), xmm0);
    196       __ add(Operand(dst), Immediate(0x10));
    197       __ bind(&move_less_16);
    198 
    199       // At most 15 bytes to copy. Copy 16 bytes at end of string.
    200       __ and_(count, 0x0F);
    201       __ movdqu(xmm0, Operand(src, count, times_1, -0x10));
    202       __ movdqu(Operand(dst, count, times_1, -0x10), xmm0);
    203 
    204       __ mov(eax, Operand(esp, stack_offset + kDestinationOffset));
    205       __ pop(esi);
    206       __ pop(edi);
    207       __ ret(0);
    208     }
    209 
    210   } else {
    211     // SSE2 not supported. Unlikely to happen in practice.
    212     __ push(edi);
    213     __ push(esi);
    214     stack_offset += 2 * kPointerSize;
    215     __ cld();
    216     Register dst = edi;
    217     Register src = esi;
    218     Register count = ecx;
    219     __ mov(dst, Operand(esp, stack_offset + kDestinationOffset));
    220     __ mov(src, Operand(esp, stack_offset + kSourceOffset));
    221     __ mov(count, Operand(esp, stack_offset + kSizeOffset));
    222 
    223     // Copy the first word.
    224     __ mov(eax, Operand(src, 0));
    225     __ mov(Operand(dst, 0), eax);
    226 
    227     // Increment src,dstso that dst is aligned.
    228     __ mov(edx, dst);
    229     __ and_(edx, 0x03);
    230     __ neg(edx);
    231     __ add(Operand(edx), Immediate(4));  // edx = 4 - (dst & 3)
    232     __ add(dst, Operand(edx));
    233     __ add(src, Operand(edx));
    234     __ sub(Operand(count), edx);
    235     // edi is now aligned, ecx holds number of remaning bytes to copy.
    236 
    237     __ mov(edx, count);
    238     count = edx;
    239     __ shr(ecx, 2);  // Make word count instead of byte count.
    240     __ rep_movs();
    241 
    242     // At most 3 bytes left to copy. Copy 4 bytes at end of string.
    243     __ and_(count, 3);
    244     __ mov(eax, Operand(src, count, times_1, -4));
    245     __ mov(Operand(dst, count, times_1, -4), eax);
    246 
    247     __ mov(eax, Operand(esp, stack_offset + kDestinationOffset));
    248     __ pop(esi);
    249     __ pop(edi);
    250     __ ret(0);
    251   }
    252 
    253   CodeDesc desc;
    254   masm.GetCode(&desc);
    255   ASSERT(desc.reloc_size == 0);
    256 
    257   CPU::FlushICache(buffer, actual_size);
    258   return FUNCTION_CAST<OS::MemCopyFunction>(buffer);
    259 }
    260 
    261 #undef __
    262 
    263 } }  // namespace v8::internal
    264 
    265 #endif  // V8_TARGET_ARCH_IA32
    266