[thirdparty/openssl.git] / crypto / bn / asm / x86_64-gcc.c

/*
 * x86_64 BIGNUM accelerator version 0.1, December 2002.
 *
 * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
 * project.
 *
 * Rights for redistribution and usage in source and binary forms are
 * granted according to the OpenSSL license. Warranty of any kind is
 * disclaimed.
 *
 * Q. Version 0.1? It doesn't sound like Andy, he used to assign real
 *    versions, like 1.0...
 * A. Well, that's because this code is basically a quick-n-dirty
 *    proof-of-concept hack. As you can see it's implemented with
 *    inline assembler, which means that you're bound to GCC and that
 *    there must be a room for fine-tuning.
 *
 * Q. Why inline assembler?
 * A. x86_64 features own ABI I'm not familiar with. Which is why
 *    I decided to let the compiler take care of subroutine
 *    prologue/epilogue as well as register allocation.
 *
 * Q. How much faster does it get?
 * A. Unfortunately people sitting on x86_64 hardware are prohibited
 *    to disclose the performance numbers, so they (SuSE labs to be
 *    specific) wouldn't tell me. However! Very similar coding technique
 *    (reaching out for 128-bit result from 64x64-bit multiplication)
 *    results in >3 times performance improvement on MIPS and I see no
 *    reason why gain on x86_64 would be so much different:-)
 */

#define BN_ULONG unsigned long

/*
Commit	Line	Data
2f98abbc AP	1	/*
	2	* x86_64 BIGNUM accelerator version 0.1, December 2002.
	3	*
	4	* Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
	5	* project.
	6	*
	7	* Rights for redistribution and usage in source and binary forms are
	8	* granted according to the OpenSSL license. Warranty of any kind is
	9	* disclaimed.
	10	*
	11	* Q. Version 0.1? It doesn't sound like Andy, he used to assign real
	12	* versions, like 1.0...
	13	* A. Well, that's because this code is basically a quick-n-dirty
	14	* proof-of-concept hack. As you can see it's implemented with
	15	* inline assembler, which means that you're bound to GCC and that
	16	* there must be a room for fine-tuning.
	17	*
	18	* Q. Why inline assembler?
	19	* A. x86_64 features own ABI I'm not familiar with. Which is why
	20	* I decided to let the compiler take care of subroutine
	21	* prologue/epilogue as well as register allocation.
	22	*
	23	* Q. How much faster does it get?
	24	* A. Unfortunately people sitting on x86_64 hardware are prohibited
	25	* to disclose the performance numbers, so they (SuSE labs to be
	26	* specific) wouldn't tell me. However! Very similar coding technique
	27	* (reaching out for 128-bit result from 64x64-bit multiplication)
	28	* results in >3 times performance improvement on MIPS and I see no
	29	* reason why gain on x86_64 would be so much different:-)
	30	*/
	31
	32	#define BN_ULONG unsigned long
	33
	34	/*
	35