head	1.2;
access;
symbols
	perseant-exfatfs-base-20250801:1.2
	perseant-exfatfs-base-20240630:1.2
	perseant-exfatfs:1.2.0.8
	perseant-exfatfs-base:1.2
	netbsd-8-3-RELEASE:1.1.1.1
	cjep_sun2x:1.2.0.6
	cjep_sun2x-base:1.2
	cjep_staticlib_x-base1:1.2
	cjep_staticlib_x:1.2.0.4
	cjep_staticlib_x-base:1.2
	phil-wifi-20200421:1.2
	phil-wifi-20200411:1.2
	phil-wifi-20200406:1.2
	netbsd-8-2-RELEASE:1.1.1.1
	netbsd-8-1-RELEASE:1.1.1.1
	netbsd-8-1-RC1:1.1.1.1
	pgoyette-compat-merge-20190127:1.2
	pgoyette-compat-20190127:1.2
	pgoyette-compat-20190118:1.2
	pgoyette-compat-1226:1.2
	pgoyette-compat-1126:1.2
	pgoyette-compat-1020:1.2
	pgoyette-compat-0930:1.2
	pgoyette-compat-0906:1.2
	netbsd-7-2-RELEASE:1.1.1.1
	pgoyette-compat-0728:1.2
	netbsd-8-0-RELEASE:1.1.1.1
	pgoyette-compat-0625:1.2
	netbsd-8-0-RC2:1.1.1.1
	pgoyette-compat-0521:1.2
	pgoyette-compat-0502:1.2
	pgoyette-compat-0422:1.2
	netbsd-8-0-RC1:1.1.1.1
	pgoyette-compat-0415:1.2
	pgoyette-compat-0407:1.2
	pgoyette-compat-0330:1.2
	pgoyette-compat-0322:1.2
	pgoyette-compat-0315:1.2
	netbsd-7-1-2-RELEASE:1.1.1.1
	pgoyette-compat:1.2.0.2
	pgoyette-compat-base:1.2
	netbsd-7-1-1-RELEASE:1.1.1.1
	matt-nb8-mediatek:1.1.1.1.0.26
	matt-nb8-mediatek-base:1.1.1.1
	perseant-stdc-iso10646:1.1.1.1.0.24
	perseant-stdc-iso10646-base:1.1.1.1
	netbsd-8:1.1.1.1.0.22
	netbsd-8-base:1.1.1.1
	prg-localcount2-base3:1.1.1.1
	prg-localcount2-base2:1.1.1.1
	prg-localcount2-base1:1.1.1.1
	prg-localcount2:1.1.1.1.0.20
	prg-localcount2-base:1.1.1.1
	pgoyette-localcount-20170426:1.1.1.1
	bouyer-socketcan-base1:1.1.1.1
	pgoyette-localcount-20170320:1.1.1.1
	netbsd-7-1:1.1.1.1.0.18
	netbsd-7-1-RELEASE:1.1.1.1
	netbsd-7-1-RC2:1.1.1.1
	netbsd-7-nhusb-base-20170116:1.1.1.1
	bouyer-socketcan:1.1.1.1.0.16
	bouyer-socketcan-base:1.1.1.1
	pgoyette-localcount-20170107:1.1.1.1
	netbsd-7-1-RC1:1.1.1.1
	pgoyette-localcount-20161104:1.1.1.1
	netbsd-7-0-2-RELEASE:1.1.1.1
	localcount-20160914:1.1.1.1
	netbsd-7-nhusb:1.1.1.1.0.14
	netbsd-7-nhusb-base:1.1.1.1
	pgoyette-localcount-20160806:1.1.1.1
	pgoyette-localcount-20160726:1.1.1.1
	pgoyette-localcount:1.1.1.1.0.12
	pgoyette-localcount-base:1.1.1.1
	netbsd-7-0-1-RELEASE:1.1.1.1
	netbsd-7-0:1.1.1.1.0.10
	netbsd-7-0-RELEASE:1.1.1.1
	netbsd-7-0-RC3:1.1.1.1
	netbsd-7-0-RC2:1.1.1.1
	netbsd-7-0-RC1:1.1.1.1
	tls-maxphys-base:1.1.1.1
	tls-maxphys:1.1.1.1.0.8
	netbsd-7:1.1.1.1.0.6
	netbsd-7-base:1.1.1.1
	yamt-pagecache:1.1.1.1.0.4
	yamt-pagecache-base9:1.1.1.1
	tls-earlyentropy:1.1.1.1.0.2
	tls-earlyentropy-base:1.1.1.1
	riastradh-xf86-video-intel-2-7-1-pre-2-21-15:1.1.1.1
	riastradh-drm2-base3:1.1.1.1
	gmp-5-1-3:1.1.1.1
	gmp:1.1.1;
locks; strict;
comment	@;; @;


1.2
date	2017.08.22.09.55.46;	author mrg;	state dead;
branches;
next	1.1;
commitid	DVNRZh45aSIRZb4A;

1.1
date	2013.11.29.07.49.48;	author mrg;	state Exp;
branches
	1.1.1.1;
next	;
commitid	L2Av4PuGmdoL39fx;

1.1.1.1
date	2013.11.29.07.49.48;	author mrg;	state Exp;
branches
	1.1.1.1.4.1
	1.1.1.1.8.1;
next	;
commitid	L2Av4PuGmdoL39fx;

1.1.1.1.4.1
date	2013.11.29.07.49.48;	author yamt;	state dead;
branches;
next	1.1.1.1.4.2;
commitid	nx2BSsHy0NPeAxBx;

1.1.1.1.4.2
date	2014.05.22.14.09.02;	author yamt;	state Exp;
branches;
next	;
commitid	nx2BSsHy0NPeAxBx;

1.1.1.1.8.1
date	2013.11.29.07.49.48;	author tls;	state dead;
branches;
next	1.1.1.1.8.2;
commitid	jTnpym9Qu0o4R1Nx;

1.1.1.1.8.2
date	2014.08.19.23.59.51;	author tls;	state Exp;
branches;
next	;
commitid	jTnpym9Qu0o4R1Nx;


desc
@@


1.2
log
@merge GMP 6.1.2.
@
text
@dnl  PowerPC-64 mpn_addlshC_n and mpn_sublshC_n, where C is a small constant.

dnl  Copyright 2003, 2005, 2009, 2010 Free Software Foundation, Inc.

dnl  This file is part of the GNU MP Library.

dnl  The GNU MP Library is free software; you can redistribute it and/or modify
dnl  it under the terms of the GNU Lesser General Public License as published
dnl  by the Free Software Foundation; either version 3 of the License, or (at
dnl  your option) any later version.

dnl  The GNU MP Library is distributed in the hope that it will be useful, but
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
dnl  License for more details.

dnl  You should have received a copy of the GNU Lesser General Public License
dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.

C                  cycles/limb
C POWER3/PPC630          1.83   (1.5 c/l should be possible)
C POWER4/PPC970          3      (2.0 c/l should be possible)
C POWER5                 3
C POWER6              3.5-47
C POWER7                 3

C STATUS
C  * Try combining upx+up, and vpx+vp.
C  * The worst case 47 c/l for POWER6 happens if the 3rd operand for ldx is
C    greater than the 2nd operand.  Yes, this addition is non-commutative wrt
C    performance.

C INPUT PARAMETERS
define(`rp', `r3')
define(`up', `r4')
define(`vp', `r5')
define(`n',  `r6')

define(`rpx', `r6')
define(`upx', `r7')
define(`vpx', `r12')

define(`s0', `r0')  define(`s1', `r9')
define(`u0', `r8')
define(`v0', `r10') define(`v1', `r11')


ASM_START()
PROLOGUE(func)
	cmpldi	cr0, n, 13
	bgt	L(big)

	mtctr	n		C copy n in ctr
	INITCY(	r0)		C clear cy

	ld	v0, 0(vp)	C load v limb
	ld	u0, 0(up)	C load u limb
	addi	up, up, -8	C update up
	addi	rp, rp, -8	C update rp
	sldi	s1, v0, LSH
	bdz	L(ex1)		C If done, skip loop

	ALIGN(16)
L(lo0):	ld	v1, 8(vp)	C load v limb
	ADDSUBE	s1, s1, u0	C add limbs with cy, set cy
	ldu	u0, 16(up)	C load u limb and update up
	srdi	s0, v0, RSH	C shift down previous v limb
	std	s1, 8(rp)	C store result limb
	rldimi	s0, v1, LSH, 0	C left shift v limb and merge with prev v limb
	bdz	L(ex0)		C decrement ctr and exit if done
	ldu	v0, 16(vp)	C load v limb and update vp
	ADDSUBE	s0, s0, u0	C add limbs with cy, set cy
	ld	u0, 8(up)	C load u limb
	srdi	s1, v1, RSH	C shift down previous v limb
	stdu	s0, 16(rp)	C store result limb and update rp
	rldimi	s1, v0, LSH, 0	C left shift v limb and merge with prev v limb
	bdnz	L(lo0)		C decrement ctr and loop back

L(ex1):	ADDSUBE	r7, s1, u0
	std	r7, 8(rp)	C store last result limb
	srdi	r0, v0, RSH
	RETVAL(	r0)
	blr
L(ex0):	ADDSUBE	r7, s0, u0
	std	r7, 16(rp)	C store last result limb
	srdi	r0, v1, RSH
	RETVAL(	r0)
	blr


L(big):	rldicl.	r0, n, 0,63	C r0 = n & 1, set cr0
	addi	r6, n, -1	C ...for ctr
	srdi	r6, r6, 1	C ...for ctr
	mtctr	r6		C copy count into ctr
	beq	cr0, L(b0)

L(b1):	ld	v1, 0(vp)
	ld	u0, 0(up)
	sldi	s1, v1, LSH
	srdi	s0, v1, RSH
	ld	v0, 8(vp)
	ADDSUBC	s1, s1, u0	C add limbs without cy, set cy
	addi	rpx, rp, -16
	addi	rp, rp, -8
	sub	upx, up, rp
	sub	vpx, vp, rp
	sub	up, up, rpx
	sub	vp, vp, rpx
	addi	up, up, 8
	addi	upx, upx, 16
	addi	vp, vp, 16
	addi	vpx, vpx, 24
	b	L(mid)

L(b0):	ld	v0, 0(vp)
	ld	u0, 0(up)
	sldi	s0, v0, LSH
	srdi	s1, v0, RSH
	ld	v1, 8(vp)
	ADDSUBC	s0, s0, u0	C add limbs without cy, set cy
	addi	rpx, rp, -8
	addi	rp, rp, -16
	sub	upx, up, rpx
	sub	vpx, vp, rpx
	sub	up, up, rp
	sub	vp, vp, rp
	addi	up, up, 8
	addi	upx, upx, 16
	addi	vp, vp, 16
	addi	vpx, vpx, 24

	ALIGN(32)
L(top):	ldx	u0, rp, up
	ldx	v0, rp, vp
	rldimi	s1, v1, LSH, 0
	stdu	s0, 16(rp)
	srdi	s0, v1, RSH
	ADDSUBE	s1, s1, u0	C add limbs with cy, set cy
L(mid):	ldx	u0, rpx, upx
	ldx	v1, rpx, vpx
	rldimi	s0, v0, LSH, 0
	stdu	s1, 16(rpx)
	srdi	s1, v0, RSH
	ADDSUBE	s0, s0, u0	C add limbs with cy, set cy
	bdnz	L(top)		C decrement CTR and loop back

	ldx	u0, rp, up
	rldimi	s1, v1, LSH, 0
	std	s0, 16(rp)
	srdi	s0, v1, RSH
	ADDSUBE	s1, s1, u0	C add limbs with cy, set cy
	std	s1, 24(rp)

	RETVAL(	r0)
	blr
EPILOGUE()
@


1.1
log
@Initial revision
@
text
@@


1.1.1.1
log
@initial import GMP 5.1.3 sources.  changes include:

fixes for:
- mpn_sbpi1_div_qr_sec and mpn_sbpi1_div_r_sec
- mpz_powm_ui
- AMD family 11h
- mpz_powm_sec and mpn_powm_sec
- ASSERT() fixes
- gcd, gcdext, and invert function fixes
- some PPC division operations
@
text
@@


1.1.1.1.8.1
log
@file aorslshC_n.asm was added on branch tls-maxphys on 2014-08-19 23:59:51 +0000
@
text
@d1 156
@


1.1.1.1.8.2
log
@Rebase to HEAD as of a few days ago.
@
text
@a0 156
dnl  PowerPC-64 mpn_addlshC_n and mpn_sublshC_n, where C is a small constant.

dnl  Copyright 2003, 2005, 2009, 2010 Free Software Foundation, Inc.

dnl  This file is part of the GNU MP Library.

dnl  The GNU MP Library is free software; you can redistribute it and/or modify
dnl  it under the terms of the GNU Lesser General Public License as published
dnl  by the Free Software Foundation; either version 3 of the License, or (at
dnl  your option) any later version.

dnl  The GNU MP Library is distributed in the hope that it will be useful, but
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
dnl  License for more details.

dnl  You should have received a copy of the GNU Lesser General Public License
dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.

C                  cycles/limb
C POWER3/PPC630          1.83   (1.5 c/l should be possible)
C POWER4/PPC970          3      (2.0 c/l should be possible)
C POWER5                 3
C POWER6              3.5-47
C POWER7                 3

C STATUS
C  * Try combining upx+up, and vpx+vp.
C  * The worst case 47 c/l for POWER6 happens if the 3rd operand for ldx is
C    greater than the 2nd operand.  Yes, this addition is non-commutative wrt
C    performance.

C INPUT PARAMETERS
define(`rp', `r3')
define(`up', `r4')
define(`vp', `r5')
define(`n',  `r6')

define(`rpx', `r6')
define(`upx', `r7')
define(`vpx', `r12')

define(`s0', `r0')  define(`s1', `r9')
define(`u0', `r8')
define(`v0', `r10') define(`v1', `r11')


ASM_START()
PROLOGUE(func)
	cmpldi	cr0, n, 13
	bgt	L(big)

	mtctr	n		C copy n in ctr
	INITCY(	r0)		C clear cy

	ld	v0, 0(vp)	C load v limb
	ld	u0, 0(up)	C load u limb
	addi	up, up, -8	C update up
	addi	rp, rp, -8	C update rp
	sldi	s1, v0, LSH
	bdz	L(ex1)		C If done, skip loop

	ALIGN(16)
L(lo0):	ld	v1, 8(vp)	C load v limb
	ADDSUBE	s1, s1, u0	C add limbs with cy, set cy
	ldu	u0, 16(up)	C load u limb and update up
	srdi	s0, v0, RSH	C shift down previous v limb
	std	s1, 8(rp)	C store result limb
	rldimi	s0, v1, LSH, 0	C left shift v limb and merge with prev v limb
	bdz	L(ex0)		C decrement ctr and exit if done
	ldu	v0, 16(vp)	C load v limb and update vp
	ADDSUBE	s0, s0, u0	C add limbs with cy, set cy
	ld	u0, 8(up)	C load u limb
	srdi	s1, v1, RSH	C shift down previous v limb
	stdu	s0, 16(rp)	C store result limb and update rp
	rldimi	s1, v0, LSH, 0	C left shift v limb and merge with prev v limb
	bdnz	L(lo0)		C decrement ctr and loop back

L(ex1):	ADDSUBE	r7, s1, u0
	std	r7, 8(rp)	C store last result limb
	srdi	r0, v0, RSH
	RETVAL(	r0)
	blr
L(ex0):	ADDSUBE	r7, s0, u0
	std	r7, 16(rp)	C store last result limb
	srdi	r0, v1, RSH
	RETVAL(	r0)
	blr


L(big):	rldicl.	r0, n, 0,63	C r0 = n & 1, set cr0
	addi	r6, n, -1	C ...for ctr
	srdi	r6, r6, 1	C ...for ctr
	mtctr	r6		C copy count into ctr
	beq	cr0, L(b0)

L(b1):	ld	v1, 0(vp)
	ld	u0, 0(up)
	sldi	s1, v1, LSH
	srdi	s0, v1, RSH
	ld	v0, 8(vp)
	ADDSUBC	s1, s1, u0	C add limbs without cy, set cy
	addi	rpx, rp, -16
	addi	rp, rp, -8
	sub	upx, up, rp
	sub	vpx, vp, rp
	sub	up, up, rpx
	sub	vp, vp, rpx
	addi	up, up, 8
	addi	upx, upx, 16
	addi	vp, vp, 16
	addi	vpx, vpx, 24
	b	L(mid)

L(b0):	ld	v0, 0(vp)
	ld	u0, 0(up)
	sldi	s0, v0, LSH
	srdi	s1, v0, RSH
	ld	v1, 8(vp)
	ADDSUBC	s0, s0, u0	C add limbs without cy, set cy
	addi	rpx, rp, -8
	addi	rp, rp, -16
	sub	upx, up, rpx
	sub	vpx, vp, rpx
	sub	up, up, rp
	sub	vp, vp, rp
	addi	up, up, 8
	addi	upx, upx, 16
	addi	vp, vp, 16
	addi	vpx, vpx, 24

	ALIGN(32)
L(top):	ldx	u0, rp, up
	ldx	v0, rp, vp
	rldimi	s1, v1, LSH, 0
	stdu	s0, 16(rp)
	srdi	s0, v1, RSH
	ADDSUBE	s1, s1, u0	C add limbs with cy, set cy
L(mid):	ldx	u0, rpx, upx
	ldx	v1, rpx, vpx
	rldimi	s0, v0, LSH, 0
	stdu	s1, 16(rpx)
	srdi	s1, v0, RSH
	ADDSUBE	s0, s0, u0	C add limbs with cy, set cy
	bdnz	L(top)		C decrement CTR and loop back

	ldx	u0, rp, up
	rldimi	s1, v1, LSH, 0
	std	s0, 16(rp)
	srdi	s0, v1, RSH
	ADDSUBE	s1, s1, u0	C add limbs with cy, set cy
	std	s1, 24(rp)

	RETVAL(	r0)
	blr
EPILOGUE()
@


1.1.1.1.4.1
log
@file aorslshC_n.asm was added on branch yamt-pagecache on 2014-05-22 14:09:02 +0000
@
text
@d1 156
@


1.1.1.1.4.2
log
@sync with head.

for a reference, the tree before this commit was tagged
as yamt-pagecache-tag8.

this commit was splitted into small chunks to avoid
a limitation of cvs.  ("Protocol error: too many arguments")
@
text
@a0 156
dnl  PowerPC-64 mpn_addlshC_n and mpn_sublshC_n, where C is a small constant.

dnl  Copyright 2003, 2005, 2009, 2010 Free Software Foundation, Inc.

dnl  This file is part of the GNU MP Library.

dnl  The GNU MP Library is free software; you can redistribute it and/or modify
dnl  it under the terms of the GNU Lesser General Public License as published
dnl  by the Free Software Foundation; either version 3 of the License, or (at
dnl  your option) any later version.

dnl  The GNU MP Library is distributed in the hope that it will be useful, but
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
dnl  License for more details.

dnl  You should have received a copy of the GNU Lesser General Public License
dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.

C                  cycles/limb
C POWER3/PPC630          1.83   (1.5 c/l should be possible)
C POWER4/PPC970          3      (2.0 c/l should be possible)
C POWER5                 3
C POWER6              3.5-47
C POWER7                 3

C STATUS
C  * Try combining upx+up, and vpx+vp.
C  * The worst case 47 c/l for POWER6 happens if the 3rd operand for ldx is
C    greater than the 2nd operand.  Yes, this addition is non-commutative wrt
C    performance.

C INPUT PARAMETERS
define(`rp', `r3')
define(`up', `r4')
define(`vp', `r5')
define(`n',  `r6')

define(`rpx', `r6')
define(`upx', `r7')
define(`vpx', `r12')

define(`s0', `r0')  define(`s1', `r9')
define(`u0', `r8')
define(`v0', `r10') define(`v1', `r11')


ASM_START()
PROLOGUE(func)
	cmpldi	cr0, n, 13
	bgt	L(big)

	mtctr	n		C copy n in ctr
	INITCY(	r0)		C clear cy

	ld	v0, 0(vp)	C load v limb
	ld	u0, 0(up)	C load u limb
	addi	up, up, -8	C update up
	addi	rp, rp, -8	C update rp
	sldi	s1, v0, LSH
	bdz	L(ex1)		C If done, skip loop

	ALIGN(16)
L(lo0):	ld	v1, 8(vp)	C load v limb
	ADDSUBE	s1, s1, u0	C add limbs with cy, set cy
	ldu	u0, 16(up)	C load u limb and update up
	srdi	s0, v0, RSH	C shift down previous v limb
	std	s1, 8(rp)	C store result limb
	rldimi	s0, v1, LSH, 0	C left shift v limb and merge with prev v limb
	bdz	L(ex0)		C decrement ctr and exit if done
	ldu	v0, 16(vp)	C load v limb and update vp
	ADDSUBE	s0, s0, u0	C add limbs with cy, set cy
	ld	u0, 8(up)	C load u limb
	srdi	s1, v1, RSH	C shift down previous v limb
	stdu	s0, 16(rp)	C store result limb and update rp
	rldimi	s1, v0, LSH, 0	C left shift v limb and merge with prev v limb
	bdnz	L(lo0)		C decrement ctr and loop back

L(ex1):	ADDSUBE	r7, s1, u0
	std	r7, 8(rp)	C store last result limb
	srdi	r0, v0, RSH
	RETVAL(	r0)
	blr
L(ex0):	ADDSUBE	r7, s0, u0
	std	r7, 16(rp)	C store last result limb
	srdi	r0, v1, RSH
	RETVAL(	r0)
	blr


L(big):	rldicl.	r0, n, 0,63	C r0 = n & 1, set cr0
	addi	r6, n, -1	C ...for ctr
	srdi	r6, r6, 1	C ...for ctr
	mtctr	r6		C copy count into ctr
	beq	cr0, L(b0)

L(b1):	ld	v1, 0(vp)
	ld	u0, 0(up)
	sldi	s1, v1, LSH
	srdi	s0, v1, RSH
	ld	v0, 8(vp)
	ADDSUBC	s1, s1, u0	C add limbs without cy, set cy
	addi	rpx, rp, -16
	addi	rp, rp, -8
	sub	upx, up, rp
	sub	vpx, vp, rp
	sub	up, up, rpx
	sub	vp, vp, rpx
	addi	up, up, 8
	addi	upx, upx, 16
	addi	vp, vp, 16
	addi	vpx, vpx, 24
	b	L(mid)

L(b0):	ld	v0, 0(vp)
	ld	u0, 0(up)
	sldi	s0, v0, LSH
	srdi	s1, v0, RSH
	ld	v1, 8(vp)
	ADDSUBC	s0, s0, u0	C add limbs without cy, set cy
	addi	rpx, rp, -8
	addi	rp, rp, -16
	sub	upx, up, rpx
	sub	vpx, vp, rpx
	sub	up, up, rp
	sub	vp, vp, rp
	addi	up, up, 8
	addi	upx, upx, 16
	addi	vp, vp, 16
	addi	vpx, vpx, 24

	ALIGN(32)
L(top):	ldx	u0, rp, up
	ldx	v0, rp, vp
	rldimi	s1, v1, LSH, 0
	stdu	s0, 16(rp)
	srdi	s0, v1, RSH
	ADDSUBE	s1, s1, u0	C add limbs with cy, set cy
L(mid):	ldx	u0, rpx, upx
	ldx	v1, rpx, vpx
	rldimi	s0, v0, LSH, 0
	stdu	s1, 16(rpx)
	srdi	s1, v0, RSH
	ADDSUBE	s0, s0, u0	C add limbs with cy, set cy
	bdnz	L(top)		C decrement CTR and loop back

	ldx	u0, rp, up
	rldimi	s1, v1, LSH, 0
	std	s0, 16(rp)
	srdi	s0, v1, RSH
	ADDSUBE	s1, s1, u0	C add limbs with cy, set cy
	std	s1, 24(rp)

	RETVAL(	r0)
	blr
EPILOGUE()
@