head	1.2;
access;
symbols
	perseant-exfatfs-base-20250801:1.2
	perseant-exfatfs-base-20240630:1.2
	perseant-exfatfs:1.2.0.8
	perseant-exfatfs-base:1.2
	netbsd-8-3-RELEASE:1.1.1.1
	cjep_sun2x:1.2.0.6
	cjep_sun2x-base:1.2
	cjep_staticlib_x-base1:1.2
	cjep_staticlib_x:1.2.0.4
	cjep_staticlib_x-base:1.2
	phil-wifi-20200421:1.2
	phil-wifi-20200411:1.2
	phil-wifi-20200406:1.2
	netbsd-8-2-RELEASE:1.1.1.1
	netbsd-8-1-RELEASE:1.1.1.1
	netbsd-8-1-RC1:1.1.1.1
	pgoyette-compat-merge-20190127:1.2
	pgoyette-compat-20190127:1.2
	pgoyette-compat-20190118:1.2
	pgoyette-compat-1226:1.2
	pgoyette-compat-1126:1.2
	pgoyette-compat-1020:1.2
	pgoyette-compat-0930:1.2
	pgoyette-compat-0906:1.2
	netbsd-7-2-RELEASE:1.1.1.1
	pgoyette-compat-0728:1.2
	netbsd-8-0-RELEASE:1.1.1.1
	pgoyette-compat-0625:1.2
	netbsd-8-0-RC2:1.1.1.1
	pgoyette-compat-0521:1.2
	pgoyette-compat-0502:1.2
	pgoyette-compat-0422:1.2
	netbsd-8-0-RC1:1.1.1.1
	pgoyette-compat-0415:1.2
	pgoyette-compat-0407:1.2
	pgoyette-compat-0330:1.2
	pgoyette-compat-0322:1.2
	pgoyette-compat-0315:1.2
	netbsd-7-1-2-RELEASE:1.1.1.1
	pgoyette-compat:1.2.0.2
	pgoyette-compat-base:1.2
	netbsd-7-1-1-RELEASE:1.1.1.1
	matt-nb8-mediatek:1.1.1.1.0.26
	matt-nb8-mediatek-base:1.1.1.1
	perseant-stdc-iso10646:1.1.1.1.0.24
	perseant-stdc-iso10646-base:1.1.1.1
	netbsd-8:1.1.1.1.0.22
	netbsd-8-base:1.1.1.1
	prg-localcount2-base3:1.1.1.1
	prg-localcount2-base2:1.1.1.1
	prg-localcount2-base1:1.1.1.1
	prg-localcount2:1.1.1.1.0.20
	prg-localcount2-base:1.1.1.1
	pgoyette-localcount-20170426:1.1.1.1
	bouyer-socketcan-base1:1.1.1.1
	pgoyette-localcount-20170320:1.1.1.1
	netbsd-7-1:1.1.1.1.0.18
	netbsd-7-1-RELEASE:1.1.1.1
	netbsd-7-1-RC2:1.1.1.1
	netbsd-7-nhusb-base-20170116:1.1.1.1
	bouyer-socketcan:1.1.1.1.0.16
	bouyer-socketcan-base:1.1.1.1
	pgoyette-localcount-20170107:1.1.1.1
	netbsd-7-1-RC1:1.1.1.1
	pgoyette-localcount-20161104:1.1.1.1
	netbsd-7-0-2-RELEASE:1.1.1.1
	localcount-20160914:1.1.1.1
	netbsd-7-nhusb:1.1.1.1.0.14
	netbsd-7-nhusb-base:1.1.1.1
	pgoyette-localcount-20160806:1.1.1.1
	pgoyette-localcount-20160726:1.1.1.1
	pgoyette-localcount:1.1.1.1.0.12
	pgoyette-localcount-base:1.1.1.1
	netbsd-7-0-1-RELEASE:1.1.1.1
	netbsd-7-0:1.1.1.1.0.10
	netbsd-7-0-RELEASE:1.1.1.1
	netbsd-7-0-RC3:1.1.1.1
	netbsd-7-0-RC2:1.1.1.1
	netbsd-7-0-RC1:1.1.1.1
	tls-maxphys-base:1.1.1.1
	tls-maxphys:1.1.1.1.0.8
	netbsd-7:1.1.1.1.0.6
	netbsd-7-base:1.1.1.1
	yamt-pagecache:1.1.1.1.0.4
	yamt-pagecache-base9:1.1.1.1
	tls-earlyentropy:1.1.1.1.0.2
	tls-earlyentropy-base:1.1.1.1
	riastradh-xf86-video-intel-2-7-1-pre-2-21-15:1.1.1.1
	riastradh-drm2-base3:1.1.1.1
	gmp-5-1-3:1.1.1.1
	gmp:1.1.1;
locks; strict;
comment	@;; @;


1.2
date	2017.08.22.09.55.46;	author mrg;	state dead;
branches;
next	1.1;
commitid	DVNRZh45aSIRZb4A;

1.1
date	2013.11.29.07.49.48;	author mrg;	state Exp;
branches
	1.1.1.1;
next	;
commitid	L2Av4PuGmdoL39fx;

1.1.1.1
date	2013.11.29.07.49.48;	author mrg;	state Exp;
branches
	1.1.1.1.4.1
	1.1.1.1.8.1;
next	;
commitid	L2Av4PuGmdoL39fx;

1.1.1.1.4.1
date	2013.11.29.07.49.48;	author yamt;	state dead;
branches;
next	1.1.1.1.4.2;
commitid	nx2BSsHy0NPeAxBx;

1.1.1.1.4.2
date	2014.05.22.14.09.06;	author yamt;	state Exp;
branches;
next	;
commitid	nx2BSsHy0NPeAxBx;

1.1.1.1.8.1
date	2013.11.29.07.49.48;	author tls;	state dead;
branches;
next	1.1.1.1.8.2;
commitid	jTnpym9Qu0o4R1Nx;

1.1.1.1.8.2
date	2014.08.19.23.59.55;	author tls;	state Exp;
branches;
next	;
commitid	jTnpym9Qu0o4R1Nx;


desc
@@


1.2
log
@merge GMP 6.1.2.
@
text
@dnl  AMD64 mpn_tabselect.

dnl  Copyright 2011, 2012 Free Software Foundation, Inc.

dnl  This file is part of the GNU MP Library.

dnl  The GNU MP Library is free software; you can redistribute it and/or modify
dnl  it under the terms of the GNU Lesser General Public License as published
dnl  by the Free Software Foundation; either version 3 of the License, or (at
dnl  your option) any later version.

dnl  The GNU MP Library is distributed in the hope that it will be useful, but
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
dnl  License for more details.

dnl  You should have received a copy of the GNU Lesser General Public License
dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.

include(`../config.m4')


C	     cycles/limb
C AMD K8,K9	 2.5
C AMD K10	 2.5
C AMD bobcat	 3.5
C Intel P4	 4
C Intel core2	 2.33
C Intel NHM	 2.5
C Intel SBR	 2.2
C Intel atom	 5
C VIA nano	 3.5

C NOTES
C  * This has not been tuned for any specific processor.  Its speed should not
C    be too bad, though.
C  * Using SSE2/AVX2 could result in many-fold speedup.

C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
define(`rp',     `%rdi')
define(`tp',     `%rsi')
define(`n',      `%rdx')
define(`nents',  `%rcx')
define(`which',  `%r8')

define(`i',      `%rbp')
define(`maskp',  `%r11')
define(`maskn',  `%r12')

C rax rbx  rcx  rdx rdi rsi rbp (rsp)  r8   r9 r10 r11 r12 r13 r14 r15
C         nents  n  rp  tab           which

ABI_SUPPORT(DOS64)
ABI_SUPPORT(STD64)

ASM_START()
	TEXT
	ALIGN(16)
PROLOGUE(mpn_tabselect)
	FUNC_ENTRY(4)
IFDOS(`	mov	56(%rsp), %r8d	')
	push	%rbx
	push	%rbp
	push	%r12

	lea	(rp,n,8), rp
	lea	(tp,n,8), tp
	sub	nents, which
L(outer):
	lea	(which,nents), %rax
	neg	%rax			C set CF iff 'which' != k
	sbb	maskn, maskn
	mov	maskn, maskp
	not	maskp

	mov	n, i
	neg	i
	test	$1, R32(n)
	je	L(top)
	mov	(tp,i,8), %rax
	and	maskp, %rax
	mov	(rp,i,8), %r9
	and	maskn, %r9
	or	%r9, %rax
	mov	%rax, (rp,i,8)
	add	$1, i
	jns	L(end)

	ALIGN(16)
L(top):	mov	(tp,i,8), %rax
	mov	8(tp,i,8), %rbx
	and	maskp, %rax
	and	maskp, %rbx
	mov	(rp,i,8), %r9
	mov	8(rp,i,8), %r10
	and	maskn, %r9
	and	maskn, %r10
	or	%r9, %rax
	or	%r10, %rbx
	mov	%rax, (rp,i,8)
	mov	%rbx, 8(rp,i,8)
	add	$2, i
	js	L(top)

L(end):	lea	(tp,n,8), tp
	dec	nents
	jne	L(outer)

L(outer_end):
	pop	%r12
	pop	%rbp
	pop	%rbx
	FUNC_EXIT()
	ret
EPILOGUE()
@


1.1
log
@Initial revision
@
text
@@


1.1.1.1
log
@initial import GMP 5.1.3 sources.  changes include:

fixes for:
- mpn_sbpi1_div_qr_sec and mpn_sbpi1_div_r_sec
- mpz_powm_ui
- AMD family 11h
- mpz_powm_sec and mpn_powm_sec
- ASSERT() fixes
- gcd, gcdext, and invert function fixes
- some PPC division operations
@
text
@@


1.1.1.1.8.1
log
@file tabselect.asm was added on branch tls-maxphys on 2014-08-19 23:59:55 +0000
@
text
@d1 115
@


1.1.1.1.8.2
log
@Rebase to HEAD as of a few days ago.
@
text
@a0 115
dnl  AMD64 mpn_tabselect.

dnl  Copyright 2011, 2012 Free Software Foundation, Inc.

dnl  This file is part of the GNU MP Library.

dnl  The GNU MP Library is free software; you can redistribute it and/or modify
dnl  it under the terms of the GNU Lesser General Public License as published
dnl  by the Free Software Foundation; either version 3 of the License, or (at
dnl  your option) any later version.

dnl  The GNU MP Library is distributed in the hope that it will be useful, but
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
dnl  License for more details.

dnl  You should have received a copy of the GNU Lesser General Public License
dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.

include(`../config.m4')


C	     cycles/limb
C AMD K8,K9	 2.5
C AMD K10	 2.5
C AMD bobcat	 3.5
C Intel P4	 4
C Intel core2	 2.33
C Intel NHM	 2.5
C Intel SBR	 2.2
C Intel atom	 5
C VIA nano	 3.5

C NOTES
C  * This has not been tuned for any specific processor.  Its speed should not
C    be too bad, though.
C  * Using SSE2/AVX2 could result in many-fold speedup.

C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
define(`rp',     `%rdi')
define(`tp',     `%rsi')
define(`n',      `%rdx')
define(`nents',  `%rcx')
define(`which',  `%r8')

define(`i',      `%rbp')
define(`maskp',  `%r11')
define(`maskn',  `%r12')

C rax rbx  rcx  rdx rdi rsi rbp (rsp)  r8   r9 r10 r11 r12 r13 r14 r15
C         nents  n  rp  tab           which

ABI_SUPPORT(DOS64)
ABI_SUPPORT(STD64)

ASM_START()
	TEXT
	ALIGN(16)
PROLOGUE(mpn_tabselect)
	FUNC_ENTRY(4)
IFDOS(`	mov	56(%rsp), %r8d	')
	push	%rbx
	push	%rbp
	push	%r12

	lea	(rp,n,8), rp
	lea	(tp,n,8), tp
	sub	nents, which
L(outer):
	lea	(which,nents), %rax
	neg	%rax			C set CF iff 'which' != k
	sbb	maskn, maskn
	mov	maskn, maskp
	not	maskp

	mov	n, i
	neg	i
	test	$1, R32(n)
	je	L(top)
	mov	(tp,i,8), %rax
	and	maskp, %rax
	mov	(rp,i,8), %r9
	and	maskn, %r9
	or	%r9, %rax
	mov	%rax, (rp,i,8)
	add	$1, i
	jns	L(end)

	ALIGN(16)
L(top):	mov	(tp,i,8), %rax
	mov	8(tp,i,8), %rbx
	and	maskp, %rax
	and	maskp, %rbx
	mov	(rp,i,8), %r9
	mov	8(rp,i,8), %r10
	and	maskn, %r9
	and	maskn, %r10
	or	%r9, %rax
	or	%r10, %rbx
	mov	%rax, (rp,i,8)
	mov	%rbx, 8(rp,i,8)
	add	$2, i
	js	L(top)

L(end):	lea	(tp,n,8), tp
	dec	nents
	jne	L(outer)

L(outer_end):
	pop	%r12
	pop	%rbp
	pop	%rbx
	FUNC_EXIT()
	ret
EPILOGUE()
@


1.1.1.1.4.1
log
@file tabselect.asm was added on branch yamt-pagecache on 2014-05-22 14:09:06 +0000
@
text
@d1 115
@


1.1.1.1.4.2
log
@sync with head.

for a reference, the tree before this commit was tagged
as yamt-pagecache-tag8.

this commit was splitted into small chunks to avoid
a limitation of cvs.  ("Protocol error: too many arguments")
@
text
@a0 115
dnl  AMD64 mpn_tabselect.

dnl  Copyright 2011, 2012 Free Software Foundation, Inc.

dnl  This file is part of the GNU MP Library.

dnl  The GNU MP Library is free software; you can redistribute it and/or modify
dnl  it under the terms of the GNU Lesser General Public License as published
dnl  by the Free Software Foundation; either version 3 of the License, or (at
dnl  your option) any later version.

dnl  The GNU MP Library is distributed in the hope that it will be useful, but
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
dnl  License for more details.

dnl  You should have received a copy of the GNU Lesser General Public License
dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.

include(`../config.m4')


C	     cycles/limb
C AMD K8,K9	 2.5
C AMD K10	 2.5
C AMD bobcat	 3.5
C Intel P4	 4
C Intel core2	 2.33
C Intel NHM	 2.5
C Intel SBR	 2.2
C Intel atom	 5
C VIA nano	 3.5

C NOTES
C  * This has not been tuned for any specific processor.  Its speed should not
C    be too bad, though.
C  * Using SSE2/AVX2 could result in many-fold speedup.

C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
define(`rp',     `%rdi')
define(`tp',     `%rsi')
define(`n',      `%rdx')
define(`nents',  `%rcx')
define(`which',  `%r8')

define(`i',      `%rbp')
define(`maskp',  `%r11')
define(`maskn',  `%r12')

C rax rbx  rcx  rdx rdi rsi rbp (rsp)  r8   r9 r10 r11 r12 r13 r14 r15
C         nents  n  rp  tab           which

ABI_SUPPORT(DOS64)
ABI_SUPPORT(STD64)

ASM_START()
	TEXT
	ALIGN(16)
PROLOGUE(mpn_tabselect)
	FUNC_ENTRY(4)
IFDOS(`	mov	56(%rsp), %r8d	')
	push	%rbx
	push	%rbp
	push	%r12

	lea	(rp,n,8), rp
	lea	(tp,n,8), tp
	sub	nents, which
L(outer):
	lea	(which,nents), %rax
	neg	%rax			C set CF iff 'which' != k
	sbb	maskn, maskn
	mov	maskn, maskp
	not	maskp

	mov	n, i
	neg	i
	test	$1, R32(n)
	je	L(top)
	mov	(tp,i,8), %rax
	and	maskp, %rax
	mov	(rp,i,8), %r9
	and	maskn, %r9
	or	%r9, %rax
	mov	%rax, (rp,i,8)
	add	$1, i
	jns	L(end)

	ALIGN(16)
L(top):	mov	(tp,i,8), %rax
	mov	8(tp,i,8), %rbx
	and	maskp, %rax
	and	maskp, %rbx
	mov	(rp,i,8), %r9
	mov	8(rp,i,8), %r10
	and	maskn, %r9
	and	maskn, %r10
	or	%r9, %rax
	or	%r10, %rbx
	mov	%rax, (rp,i,8)
	mov	%rbx, 8(rp,i,8)
	add	$2, i
	js	L(top)

L(end):	lea	(tp,n,8), tp
	dec	nents
	jne	L(outer)

L(outer_end):
	pop	%r12
	pop	%rbp
	pop	%rbx
	FUNC_EXIT()
	ret
EPILOGUE()
@


