Go to the source code of this file.
Basic actions for Wilson Fermions optimized in assembler
| #define _ASM __asm__ __volatile__ |
| #define _sse_double_add_16 | ( | r, | |||
| s | ) |
| #define _sse_double_add_imag_scalar_product_16 | ( | r, | |||
| s, | |||||
| c | ) |
| #define _sse_double_add_multiply_16 | ( | r, | |||
| c, | |||||
| s | ) |
| #define _sse_double_add_norm_square_16 | ( | r, | |||
| c | ) |
| #define _sse_double_add_real_scalar_product_16 | ( | r, | |||
| s, | |||||
| c | ) |
| #define _sse_double_copy_16 | ( | r, | |||
| s | ) |
| #define _sse_double_hermitian_su3 | ( | r, | |||
| s | ) |
| #define _sse_double_load | ( | s | ) |
| #define _sse_double_load_123 | ( | c1, | |||
| c2, | |||||
| c3 | ) |
_ASM ("movapd %0, %%xmm0 \n\t" \ "movapd %1, %%xmm1 \n\t" \ "movapd %2, %%xmm2" \ : \ : \ "m" (c1), \ "m" (c2), \ "m" (c3))
| #define _sse_double_load_up | ( | s | ) |
| #define _sse_double_load_up_123 | ( | c1, | |||
| c2, | |||||
| c3 | ) |
_ASM ("movapd %0, %%xmm3 \n\t" \ "movapd %1, %%xmm4 \n\t" \ "movapd %2, %%xmm5" \ : \ : \ "m" (c1), \ "m" (c2), \ "m" (c3))
| #define _sse_double_multiply_16 | ( | r, | |||
| c, | |||||
| s | ) |
| #define _sse_double_prefetch_16 | ( | addr | ) |
_ASM ("prefetcht0 %0" \ : \ : "m" (*(addr)))
| #define _sse_double_prefetch_nta_spinor | ( | addr | ) |
_ASM ("prefetchnta %0 \n\t" \ "prefetchnta %1" \ : \ : \ "m" (*(((char*)(((unsigned int)(addr))&~0x7f)))), \ "m" (*(((char*)(((unsigned int)(addr))&~0x7f))+128)))
| #define _sse_double_prefetch_spinor | ( | addr | ) |
_ASM ("prefetcht0 %0 \n\t" \ "prefetcht0 %1" \ : \ : \ "m" (*(((char*)(((unsigned int)(addr))&~0x7f)))), \ "m" (*(((char*)(((unsigned int)(addr))&~0x7f))+128)))
| #define _sse_double_prefetch_su3 | ( | addr | ) |
_ASM ("prefetcht0 %0 \n\t" \ "prefetcht0 %1" \ : \ : \ "m" (*(((char*)(((unsigned int)(addr))&~0x7f)))), \ "m" (*(((char*)(((unsigned int)(addr))&~0x7f))+128)))
| #define _sse_double_store | ( | r | ) |
_ASM ("movapd %%xmm0, %0 \n\t" \ "movapd %%xmm1, %1 \n\t" \ "movapd %%xmm2, %2" \ : \ "=m" ((r).c1), \ "=m" ((r).c2), \ "=m" ((r).c3))
| #define _sse_double_store_123 | ( | c1, | |||
| c2, | |||||
| c3 | ) |
_ASM ("movapd %%xmm0, %0 \n\t" \ "movapd %%xmm1, %1 \n\t" \ "movapd %%xmm2, %2" \ : \ "=m" (c1), \ "=m" (c2), \ "=m" (c3))
| #define _sse_double_store_up | ( | r | ) |
_ASM ("movapd %%xmm3, %0 \n\t" \ "movapd %%xmm4, %1 \n\t" \ "movapd %%xmm5, %2" \ : \ "=m" ((r).c1), \ "=m" ((r).c2), \ "=m" ((r).c3))
| #define _sse_double_store_up_123 | ( | c1, | |||
| c2, | |||||
| c3 | ) |
_ASM ("movapd %%xmm3, %0 \n\t" \ "movapd %%xmm4, %1 \n\t" \ "movapd %%xmm5, %2" \ : \ "=m" (c1), \ "=m" (c2), \ "=m" (c3))
| #define _sse_double_su3_inverse_multiply | ( | u | ) |
| #define _sse_double_su3_multiply | ( | u | ) |
| #define _sse_double_sub_16 | ( | r, | |||
| s | ) |
| #define _sse_double_vector_add | ( | ) |
_ASM ("addpd %%xmm3, %%xmm0 \n\t" \ "addpd %%xmm4, %%xmm1 \n\t" \ "addpd %%xmm5, %%xmm2" \ : \ :)
| #define _sse_double_vector_i_mul | ( | ) |
_ASM ("shufpd $0x1, %%xmm3, %%xmm3 \n\t" \ "shufpd $0x1, %%xmm4, %%xmm4 \n\t" \ "shufpd $0x1, %%xmm5, %%xmm5 \n\t" \ "xorpd %0, %%xmm3 \n\t" \ "xorpd %0, %%xmm4 \n\t" \ "xorpd %0, %%xmm5" \ : \ : \ "m" (_sse_double_sgn))
| #define _sse_double_vector_minus_i_mul | ( | ) |
_ASM ("xorpd %0, %%xmm3 \n\t" \ "xorpd %0, %%xmm4 \n\t" \ "xorpd %0, %%xmm5 \n\t" \ "shufpd $0x1, %%xmm3, %%xmm3 \n\t" \ "shufpd $0x1, %%xmm4, %%xmm4 \n\t" \ "shufpd $0x1, %%xmm5, %%xmm5" \ : \ : \ "m" (_sse_double_sgn))
| #define _sse_double_vector_mul | ( | c | ) |
| #define _sse_double_vector_mul_complex | ( | x, | |||
| y | ) |
_ASM ("movapd %%xmm0, %%xmm3 \n\t" \ "movapd %%xmm1, %%xmm4 \n\t" \ "movapd %%xmm2, %%xmm5 \n\t" \ "mulpd %1, %%xmm3 \n\t" \ "mulpd %1, %%xmm4 \n\t" \ "mulpd %1, %%xmm5 \n\t" \ "shufpd $0x1, %%xmm3, %%xmm3 \n\t" \ "shufpd $0x1, %%xmm4, %%xmm4 \n\t" \ "shufpd $0x1, %%xmm5, %%xmm5 \n\t" \ "xorpd %2, %%xmm3 \n\t" \ "xorpd %2, %%xmm4 \n\t" \ "xorpd %2, %%xmm5 \n\t" \ "mulpd %0, %%xmm0 \n\t" \ "mulpd %0, %%xmm1 \n\t" \ "mulpd %0, %%xmm2 \n\t" \ "addpd %%xmm0, %%xmm3 \n\t" \ "addpd %%xmm1, %%xmm4 \n\t" \ "addpd %%xmm2, %%xmm5" \ : \ : \ "m" (x), \ "m" (y), \ "m" (_sse_double_sgn))
| #define _sse_double_vector_sub | ( | ) |
_ASM ("subpd %%xmm3, %%xmm0 \n\t" \ "subpd %%xmm4, %%xmm1 \n\t" \ "subpd %%xmm5, %%xmm2" \ : \ :)
| #define _sse_float_pair_load | ( | sl, | |||
| sh | ) |
_ASM ("movlps %0, %%xmm0 \n\t" \ "movlps %1, %%xmm1 \n\t" \ "movlps %2, %%xmm2 \n\t" \ "movhps %3, %%xmm0 \n\t" \ "movhps %4, %%xmm1 \n\t" \ "movhps %5, %%xmm2 " \ : \ : \ "m" ((sl).c1), \ "m" ((sl).c2), \ "m" ((sl).c3), \ "m" ((sh).c1), \ "m" ((sh).c2), \ "m" ((sh).c3))
| #define _sse_float_pair_load_up | ( | sl, | |||
| sh | ) |
_ASM ("movlps %0, %%xmm3 \n\t" \ "movlps %1, %%xmm4 \n\t" \ "movlps %2, %%xmm5 \n\t" \ "movhps %3, %%xmm3 \n\t" \ "movhps %4, %%xmm4 \n\t" \ "movhps %5, %%xmm5" \ : \ : \ "m" ((sl).c1), \ "m" ((sl).c2), \ "m" ((sl).c3), \ "m" ((sh).c1), \ "m" ((sh).c2), \ "m" ((sh).c3))
| #define _sse_float_pair_store | ( | rl, | |||
| rh | ) |
_ASM ("movlps %%xmm0, %0 \n\t" \ "movlps %%xmm1, %1 \n\t" \ "movlps %%xmm2, %2 \n\t" \ "movhps %%xmm0, %3 \n\t" \ "movhps %%xmm1, %4 \n\t" \ "movhps %%xmm2, %5" \ : \ "=m" ((rl).c1), \ "=m" ((rl).c2), \ "=m" ((rl).c3), \ "=m" ((rh).c1), \ "=m" ((rh).c2), \ "=m" ((rh).c3))
| #define _sse_float_pair_store_up | ( | rl, | |||
| rh | ) |
_ASM ("movlps %%xmm3, %0 \n\t" \ "movlps %%xmm4, %1 \n\t" \ "movlps %%xmm5, %2 \n\t" \ "movhps %%xmm3, %3 \n\t" \ "movhps %%xmm4, %4 \n\t" \ "movhps %%xmm5, %5" \ : \ "=m" ((rl).c1), \ "=m" ((rl).c2), \ "=m" ((rl).c3), \ "=m" ((rh).c1), \ "=m" ((rh).c2), \ "=m" ((rh).c3))
| #define _sse_float_prefetch_spinor | ( | addr | ) |
_ASM ("prefetcht0 %0 \n\t" \ "prefetcht0 %1" \ : \ : \ "m" (*(((char*)(((unsigned int)(addr))&~0x7f)))), \ "m" (*(((char*)(((unsigned int)(addr))&~0x7f))+128)))
| #define _sse_float_prefetch_su3 | ( | addr | ) |
_ASM ("prefetcht0 %0 \n\t" \ "prefetcht0 %1" \ : \ : \ "m" (*(((char*)(((unsigned int)(addr))&~0x7f)))), \ "m" (*(((char*)(((unsigned int)(addr))&~0x7f))+128)))
| #define _sse_float_su3_inverse_multiply | ( | u | ) |
| #define _sse_float_su3_multiply | ( | u | ) |
| #define _sse_float_vector_add | ( | ) |
_ASM ("addps %%xmm3, %%xmm0 \n\t" \ "addps %%xmm4, %%xmm1 \n\t" \ "addps %%xmm5, %%xmm2 \n\t" \ : \ : )
| #define _sse_float_vector_addsub | ( | ) |
_ASM ("mulps %0, %%xmm3 \n\t" \ "mulps %0, %%xmm4 \n\t" \ "mulps %0, %%xmm5 \n\t" \ "addps %%xmm3, %%xmm0 \n\t" \ "addps %%xmm4, %%xmm1 \n\t" \ "addps %%xmm5, %%xmm2" \ : \ : \ "m" (_sse_float_sgn34))
| #define _sse_float_vector_i_add | ( | ) |
_ASM ("shufps $0xb1, %%xmm3, %%xmm3 \n\t" \ "shufps $0xb1, %%xmm4, %%xmm4 \n\t" \ "shufps $0xb1, %%xmm5, %%xmm5 \n\t" \ "mulps %0, %%xmm3 \n\t" \ "mulps %0, %%xmm4 \n\t" \ "mulps %0, %%xmm5 \n\t" \ "addps %%xmm3, %%xmm0 \n\t" \ "addps %%xmm4, %%xmm1 \n\t" \ "addps %%xmm5, %%xmm2" \ : \ : \ "m" (_sse_float_sgn13))
| #define _sse_float_vector_i_addsub | ( | ) |
_ASM ("shufps $0xb1, %%xmm3, %%xmm3 \n\t" \ "shufps $0xb1, %%xmm4, %%xmm4 \n\t" \ "shufps $0xb1, %%xmm5, %%xmm5 \n\t" \ "mulps %0, %%xmm3 \n\t" \ "mulps %0, %%xmm4 \n\t" \ "mulps %0, %%xmm5 \n\t" \ "addps %%xmm3, %%xmm0 \n\t" \ "addps %%xmm4, %%xmm1 \n\t" \ "addps %%xmm5, %%xmm2" \ : \ : \ "m" (_sse_float_sgn14))
| #define _sse_float_vector_i_sub | ( | ) |
_ASM ("shufps $0xb1, %%xmm3, %%xmm3 \n\t" \ "shufps $0xb1, %%xmm4, %%xmm4 \n\t" \ "shufps $0xb1, %%xmm5, %%xmm5 \n\t" \ "mulps %0, %%xmm3 \n\t" \ "mulps %0, %%xmm4 \n\t" \ "mulps %0, %%xmm5 \n\t" \ "addps %%xmm3, %%xmm0 \n\t" \ "addps %%xmm4, %%xmm1 \n\t" \ "addps %%xmm5, %%xmm2" \ : \ : \ "m" (_sse_float_sgn24))
| #define _sse_float_vector_i_subadd | ( | ) |
_ASM ("shufps $0xb1, %%xmm3, %%xmm3 \n\t" \ "shufps $0xb1, %%xmm4, %%xmm4 \n\t" \ "shufps $0xb1, %%xmm5, %%xmm5 \n\t" \ "mulps %0, %%xmm3 \n\t" \ "mulps %0, %%xmm4 \n\t" \ "mulps %0, %%xmm5 \n\t" \ "addps %%xmm3, %%xmm0 \n\t" \ "addps %%xmm4, %%xmm1 \n\t" \ "addps %%xmm5, %%xmm2" \ : \ : \ "m" (_sse_float_sgn23))
| #define _sse_float_vector_load | ( | s | ) |
| #define _sse_float_vector_load_up | ( | s | ) |
| #define _sse_float_vector_mul | ( | c | ) |
| #define _sse_float_vector_store | ( | r | ) |
_ASM ("movaps %%xmm0, %0 \n\t" \ "movaps %%xmm1, %1 \n\t" \ "movaps %%xmm2, %2" \ : \ "=m" ((r).c1), \ "=m" ((r).c2), \ "=m" ((r).c3))
| #define _sse_float_vector_sub | ( | ) |
_ASM ("subps %%xmm3, %%xmm0 \n\t" \ "subps %%xmm4, %%xmm1 \n\t" \ "subps %%xmm5, %%xmm2" \ : \ :)
| #define _sse_float_vector_subadd | ( | ) |
_ASM ("mulps %0, %%xmm3 \n\t" \ "mulps %0, %%xmm4 \n\t" \ "mulps %0, %%xmm5 \n\t" \ "addps %%xmm3, %%xmm0 \n\t" \ "addps %%xmm4, %%xmm1 \n\t" \ "addps %%xmm5, %%xmm2" \ : \ : \ "m" (_sse_float_sgn12))
| #define _sse_float_vector_xch | ( | ) |
_ASM ("shufps $0x4e, %%xmm3, %%xmm3 \n\t" \ "shufps $0x4e, %%xmm4, %%xmm4 \n\t" \ "shufps $0x4e, %%xmm5, %%xmm5" \ : \ :)
| #define _sse_float_vector_xch_i_add | ( | ) |
_ASM ("shufps $0x1b, %%xmm3, %%xmm3 \n\t" \ "shufps $0x1b, %%xmm4, %%xmm4 \n\t" \ "shufps $0x1b, %%xmm5, %%xmm5 \n\t" \ "mulps %0, %%xmm3 \n\t" \ "mulps %0, %%xmm4 \n\t" \ "mulps %0, %%xmm5 \n\t" \ "addps %%xmm3, %%xmm0 \n\t" \ "addps %%xmm4, %%xmm1 \n\t" \ "addps %%xmm5, %%xmm2" \ : \ : \ "m" (_sse_float_sgn13))
| #define _sse_float_vector_xch_i_sub | ( | ) |
_ASM ("shufps $0x1b, %%xmm3, %%xmm3 \n\t" \ "shufps $0x1b, %%xmm4, %%xmm4 \n\t" \ "shufps $0x1b, %%xmm5, %%xmm5 \n\t" \ "mulps %0, %%xmm3 \n\t" \ "mulps %0, %%xmm4 \n\t" \ "mulps %0, %%xmm5 \n\t" \ "addps %%xmm3, %%xmm0 \n\t" \ "addps %%xmm4, %%xmm1 \n\t" \ "addps %%xmm5, %%xmm2" \ : \ : \ "m" (_sse_float_sgn24))
| #define ALIGN16 __attribute__ ((aligned (16))) |
| #define ALIGN64 __attribute__ ((aligned (64))) |
1.6.1