Main Page | Class Hierarchy | Class List | File List | Class Members | File Members

fermiqcd_sse.h File Reference

Go to the source code of this file.

Defines

#define ALIGN16   __attribute__ ((aligned (16)))
#define ALIGN64   __attribute__ ((aligned (64)))
#define _ASM   __asm__ __volatile__
#define _sse_float_prefetch_spinor(addr)
#define _sse_float_prefetch_su3(addr)
#define _sse_float_pair_load(sl, sh)
#define _sse_float_pair_load_up(sl, sh)
#define _sse_float_pair_store(rl, rh)
#define _sse_float_pair_store_up(rl, rh)
#define _sse_float_vector_load(s)
#define _sse_float_vector_load_up(s)
#define _sse_float_vector_store(r)
#define _sse_float_vector_mul(c)
#define _sse_float_vector_add()
#define _sse_float_vector_sub()
#define _sse_float_vector_addsub()
#define _sse_float_su3_multiply(u)
#define _sse_float_su3_inverse_multiply(u)
#define _sse_float_vector_subadd()
#define _sse_float_vector_i_add()
#define _sse_float_vector_i_sub()
#define _sse_float_vector_xch_i_add()
#define _sse_float_vector_xch_i_sub()
#define _sse_float_vector_i_addsub()
#define _sse_float_vector_i_subadd()
#define _sse_float_vector_xch()
#define _sse_double_prefetch_16(addr)
#define _sse_double_prefetch_spinor(addr)
#define _sse_double_prefetch_nta_spinor(addr)
#define _sse_double_prefetch_su3(addr)
#define _sse_double_load(s)
#define _sse_double_load_123(c1, c2, c3)
#define _sse_double_load_up(s)
#define _sse_double_load_up_123(c1, c2, c3)
#define _sse_double_store(r)
#define _sse_double_store_123(c1, c2, c3)
#define _sse_double_store_up(r)
#define _sse_double_store_up_123(c1, c2, c3)
#define _sse_double_vector_mul(c)
#define _sse_double_vector_mul_complex(x, y)
#define _sse_double_vector_add()
#define _sse_double_vector_sub()
#define _sse_double_su3_multiply(u)
#define _sse_double_su3_inverse_multiply(u)
#define _sse_double_vector_i_mul()
#define _sse_double_vector_minus_i_mul()
#define _sse_double_add_norm_square_16(r, c)
#define _sse_double_add_real_scalar_product_16(r, s, c)
#define _sse_double_add_imag_scalar_product_16(r, s, c)
#define _sse_double_hermitian_su3(r, s)
#define _sse_double_copy_16(r, s)
#define _sse_double_add_16(r, s)
#define _sse_double_sub_16(r, s)
#define _sse_double_add_multiply_16(r, c, s)
#define _sse_double_multiply_16(r, c, s)


Detailed Description

Version:
3-1-2005
Author:
Martin Luesher and Massimo Di Pierro <mdipierro@cs.depaul.edu>
Basic actions for Wilson Fermions optimized in assembler

Define Documentation

#define _sse_double_load  ) 
 

Value:

_ASM ("movapd %0, %%xmm0 \n\t" \
      "movapd %1, %%xmm1 \n\t" \
      "movapd %2, %%xmm2" \
      : \
      : \
      "m" ((s).c1), \
      "m" ((s).c2), \
      "m" ((s).c3))

#define _sse_double_load_123 c1,
c2,
c3   ) 
 

Value:

_ASM ("movapd %0, %%xmm0 \n\t" \
      "movapd %1, %%xmm1 \n\t" \
      "movapd %2, %%xmm2" \
      : \
      : \
      "m" (c1), \
      "m" (c2), \
      "m" (c3))

#define _sse_double_load_up  ) 
 

Value:

_ASM ("movapd %0, %%xmm3 \n\t" \
      "movapd %1, %%xmm4 \n\t" \
      "movapd %2, %%xmm5" \
      : \
      : \
      "m" ((s).c1), \
      "m" ((s).c2), \
      "m" ((s).c3))

#define _sse_double_load_up_123 c1,
c2,
c3   ) 
 

Value:

_ASM ("movapd %0, %%xmm3 \n\t" \
      "movapd %1, %%xmm4 \n\t" \
      "movapd %2, %%xmm5" \
      : \
      : \
      "m" (c1), \
      "m" (c2), \
      "m" (c3))

#define _sse_double_prefetch_16 addr   ) 
 

Value:

_ASM ("prefetcht0 %0" \
      : \
      : "m" (*(addr)))

#define _sse_double_prefetch_nta_spinor addr   ) 
 

Value:

_ASM ("prefetchnta %0 \n\t" \
      "prefetchnta %1" \
      : \
      : \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f)))), \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f))+128)))

#define _sse_double_prefetch_spinor addr   ) 
 

Value:

_ASM ("prefetcht0 %0 \n\t" \
      "prefetcht0 %1" \
      : \
      : \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f)))), \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f))+128)))

#define _sse_double_prefetch_su3 addr   ) 
 

Value:

_ASM ("prefetcht0 %0 \n\t" \
      "prefetcht0 %1" \
      : \
      : \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f)))), \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f))+128)))

#define _sse_double_store  ) 
 

Value:

_ASM ("movapd %%xmm0, %0 \n\t" \
      "movapd %%xmm1, %1 \n\t" \
      "movapd %%xmm2, %2" \
      : \
      "=m" ((r).c1), \
      "=m" ((r).c2), \
      "=m" ((r).c3))

#define _sse_double_store_123 c1,
c2,
c3   ) 
 

Value:

_ASM ("movapd %%xmm0, %0 \n\t" \
      "movapd %%xmm1, %1 \n\t" \
      "movapd %%xmm2, %2" \
      : \
      "=m" (c1), \
      "=m" (c2), \
      "=m" (c3))

#define _sse_double_store_up  ) 
 

Value:

_ASM ("movapd %%xmm3, %0 \n\t" \
      "movapd %%xmm4, %1 \n\t" \
      "movapd %%xmm5, %2" \
      : \
      "=m" ((r).c1), \
      "=m" ((r).c2), \
      "=m" ((r).c3))

#define _sse_double_store_up_123 c1,
c2,
c3   ) 
 

Value:

_ASM ("movapd %%xmm3, %0 \n\t" \
      "movapd %%xmm4, %1 \n\t" \
      "movapd %%xmm5, %2" \
      : \
      "=m" (c1), \
      "=m" (c2), \
      "=m" (c3))

 
#define _sse_double_vector_add  ) 
 

Value:

_ASM ("addpd %%xmm3, %%xmm0 \n\t" \
      "addpd %%xmm4, %%xmm1 \n\t" \
      "addpd %%xmm5, %%xmm2" \
      : \
      :)

 
#define _sse_double_vector_i_mul  ) 
 

Value:

_ASM ("shufpd $0x1, %%xmm3, %%xmm3 \n\t" \
      "shufpd $0x1, %%xmm4, %%xmm4 \n\t" \
      "shufpd $0x1, %%xmm5, %%xmm5 \n\t" \
      "xorpd %0, %%xmm3 \n\t" \
      "xorpd %0, %%xmm4 \n\t" \
      "xorpd %0, %%xmm5" \
      : \
      : \
      "m" (_sse_double_sgn))

 
#define _sse_double_vector_minus_i_mul  ) 
 

Value:

_ASM ("xorpd %0, %%xmm3 \n\t" \
      "xorpd %0, %%xmm4 \n\t" \
      "xorpd %0, %%xmm5 \n\t" \
      "shufpd $0x1, %%xmm3, %%xmm3 \n\t" \
      "shufpd $0x1, %%xmm4, %%xmm4 \n\t" \
      "shufpd $0x1, %%xmm5, %%xmm5" \
      : \
      : \
      "m" (_sse_double_sgn))

#define _sse_double_vector_mul  ) 
 

Value:

_ASM ("mulpd %0, %%xmm0 \n\t" \
      "mulpd %0, %%xmm1 \n\t" \
      "mulpd %0, %%xmm2" \
      : \
      : \
      "m" (c))

#define _sse_double_vector_mul_complex x,
 ) 
 

Value:

_ASM ("movapd %%xmm0, %%xmm3 \n\t" \
      "movapd %%xmm1, %%xmm4 \n\t" \
      "movapd %%xmm2, %%xmm5 \n\t" \
      "mulpd %1, %%xmm3 \n\t" \
      "mulpd %1, %%xmm4 \n\t" \
      "mulpd %1, %%xmm5 \n\t" \
      "shufpd $0x1, %%xmm3, %%xmm3 \n\t" \
      "shufpd $0x1, %%xmm4, %%xmm4 \n\t" \
      "shufpd $0x1, %%xmm5, %%xmm5 \n\t" \
      "xorpd %2, %%xmm3 \n\t" \
      "xorpd %2, %%xmm4 \n\t" \
      "xorpd %2, %%xmm5 \n\t" \
      "mulpd %0, %%xmm0 \n\t" \
      "mulpd %0, %%xmm1 \n\t" \
      "mulpd %0, %%xmm2 \n\t" \
      "addpd %%xmm0, %%xmm3 \n\t" \
      "addpd %%xmm1, %%xmm4 \n\t" \
      "addpd %%xmm2, %%xmm5" \
      : \
      : \
      "m" (x), \
      "m" (y), \
      "m" (_sse_double_sgn))

 
#define _sse_double_vector_sub  ) 
 

Value:

_ASM ("subpd %%xmm3, %%xmm0 \n\t" \
      "subpd %%xmm4, %%xmm1 \n\t" \
      "subpd %%xmm5, %%xmm2" \
      : \
      :)

#define _sse_float_pair_load sl,
sh   ) 
 

Value:

_ASM ("movlps %0, %%xmm0 \n\t" \
      "movlps %1, %%xmm1 \n\t" \
      "movlps %2, %%xmm2 \n\t" \
      "movhps %3, %%xmm0 \n\t" \
      "movhps %4, %%xmm1 \n\t" \
      "movhps %5, %%xmm2" \
       : \
       : \
       "m" ((sl).c1), \
       "m" ((sl).c2), \
       "m" ((sl).c3), \
       "m" ((sh).c1), \
       "m" ((sh).c2), \
       "m" ((sh).c3))

#define _sse_float_pair_load_up sl,
sh   ) 
 

Value:

_ASM ("movlps %0, %%xmm3 \n\t" \
      "movlps %1, %%xmm4 \n\t" \
      "movlps %2, %%xmm5 \n\t" \
      "movhps %3, %%xmm3 \n\t" \
      "movhps %4, %%xmm4 \n\t" \
      "movhps %5, %%xmm5" \
      : \
      : \
      "m" ((sl).c1), \
      "m" ((sl).c2), \
      "m" ((sl).c3), \
      "m" ((sh).c1), \
      "m" ((sh).c2), \
      "m" ((sh).c3))

#define _sse_float_pair_store rl,
rh   ) 
 

Value:

_ASM ("movlps %%xmm0, %0 \n\t" \
      "movlps %%xmm1, %1 \n\t" \
      "movlps %%xmm2, %2 \n\t" \
      "movhps %%xmm0, %3 \n\t" \
      "movhps %%xmm1, %4 \n\t" \
      "movhps %%xmm2, %5" \
      : \
      "=m" ((rl).c1), \
      "=m" ((rl).c2), \
      "=m" ((rl).c3), \
      "=m" ((rh).c1), \
      "=m" ((rh).c2), \
      "=m" ((rh).c3))

#define _sse_float_pair_store_up rl,
rh   ) 
 

Value:

_ASM ("movlps %%xmm3, %0 \n\t" \
      "movlps %%xmm4, %1 \n\t" \
      "movlps %%xmm5, %2 \n\t" \
      "movhps %%xmm3, %3 \n\t" \
      "movhps %%xmm4, %4 \n\t" \
      "movhps %%xmm5, %5" \
      : \
      "=m" ((rl).c1), \
      "=m" ((rl).c2), \
      "=m" ((rl).c3), \
      "=m" ((rh).c1), \
      "=m" ((rh).c2), \
      "=m" ((rh).c3))

#define _sse_float_prefetch_spinor addr   ) 
 

Value:

_ASM ("prefetcht0 %0 \n\t" \
      "prefetcht0 %1" \
      : \
      : \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f)))), \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f))+128)))

#define _sse_float_prefetch_su3 addr   ) 
 

Value:

_ASM ("prefetcht0 %0 \n\t" \
      "prefetcht0 %1" \
      : \
      : \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f)))), \
      "m" (*(((char*)(((unsigned int)(addr))&~0x7f))+128)))

 
#define _sse_float_vector_add  ) 
 

Value:

_ASM ("addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2 \n\t" \
      : \
      : )

 
#define _sse_float_vector_addsub  ) 
 

Value:

_ASM ("mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn34))

 
#define _sse_float_vector_i_add  ) 
 

Value:

_ASM ("shufps $0xb1, %%xmm3, %%xmm3 \n\t" \
      "shufps $0xb1, %%xmm4, %%xmm4 \n\t" \
      "shufps $0xb1, %%xmm5, %%xmm5 \n\t" \
      "mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn13))

 
#define _sse_float_vector_i_addsub  ) 
 

Value:

_ASM ("shufps $0xb1, %%xmm3, %%xmm3 \n\t" \
      "shufps $0xb1, %%xmm4, %%xmm4 \n\t" \
      "shufps $0xb1, %%xmm5, %%xmm5 \n\t" \
      "mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn14))

 
#define _sse_float_vector_i_sub  ) 
 

Value:

_ASM ("shufps $0xb1, %%xmm3, %%xmm3 \n\t" \
      "shufps $0xb1, %%xmm4, %%xmm4 \n\t" \
      "shufps $0xb1, %%xmm5, %%xmm5 \n\t" \
      "mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn24))

 
#define _sse_float_vector_i_subadd  ) 
 

Value:

_ASM ("shufps $0xb1, %%xmm3, %%xmm3 \n\t" \
      "shufps $0xb1, %%xmm4, %%xmm4 \n\t" \
      "shufps $0xb1, %%xmm5, %%xmm5 \n\t" \
      "mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn23))

#define _sse_float_vector_load  ) 
 

Value:

_ASM ("movaps %0, %%xmm0 \n\t" \
      "movaps %1, %%xmm1 \n\t" \
      "movaps %2, %%xmm2" \
      : \
      : \
      "m" ((s).c1), \
      "m" ((s).c2), \
      "m" ((s).c3))

#define _sse_float_vector_load_up  ) 
 

Value:

_ASM ("movaps %0, %%xmm3 \n\t" \
      "movaps %1, %%xmm4 \n\t" \
      "movaps %2, %%xmm5" \
      : \
      : \
      "m" ((s).c1), \
      "m" ((s).c2), \
      "m" ((s).c3))

#define _sse_float_vector_mul  ) 
 

Value:

_ASM ("mulps %0, %%xmm0 \n\t" \
      "mulps %0, %%xmm1 \n\t" \
      "mulps %0, %%xmm2" \
      : \
      : \
      "m" (c))

#define _sse_float_vector_store  ) 
 

Value:

_ASM ("movaps %%xmm0, %0 \n\t" \
      "movaps %%xmm1, %1 \n\t" \
      "movaps %%xmm2, %2" \
      : \
      "=m" ((r).c1), \
      "=m" ((r).c2), \
      "=m" ((r).c3))

 
#define _sse_float_vector_sub  ) 
 

Value:

_ASM ("subps %%xmm3, %%xmm0 \n\t" \
      "subps %%xmm4, %%xmm1 \n\t" \
      "subps %%xmm5, %%xmm2" \
      : \
      :)

 
#define _sse_float_vector_subadd  ) 
 

Value:

_ASM ("mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn12))

 
#define _sse_float_vector_xch  ) 
 

Value:

_ASM ("shufps $0x4e, %%xmm3, %%xmm3 \n\t" \
      "shufps $0x4e, %%xmm4, %%xmm4 \n\t" \
      "shufps $0x4e, %%xmm5, %%xmm5" \
      : \
      :)

 
#define _sse_float_vector_xch_i_add  ) 
 

Value:

_ASM ("shufps $0x1b, %%xmm3, %%xmm3 \n\t" \
      "shufps $0x1b, %%xmm4, %%xmm4 \n\t" \
      "shufps $0x1b, %%xmm5, %%xmm5 \n\t" \
      "mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn13))

 
#define _sse_float_vector_xch_i_sub  ) 
 

Value:

_ASM ("shufps $0x1b, %%xmm3, %%xmm3 \n\t" \
      "shufps $0x1b, %%xmm4, %%xmm4 \n\t" \
      "shufps $0x1b, %%xmm5, %%xmm5 \n\t" \
      "mulps %0, %%xmm3 \n\t" \
      "mulps %0, %%xmm4 \n\t" \
      "mulps %0, %%xmm5 \n\t" \
      "addps %%xmm3, %%xmm0 \n\t" \
      "addps %%xmm4, %%xmm1 \n\t" \
      "addps %%xmm5, %%xmm2" \
      : \
      : \
      "m" (_sse_float_sgn24))


Generated on Sun Feb 27 15:12:22 2005 by  doxygen 1.4.1