mirror of
				https://github.com/cuberite/polarssl.git
				synced 2025-11-03 20:22:59 -05:00 
			
		
		
		
	Rework ecp_mod_p192()
On x86_64, this makes it 5x faster, and ecp_mul() 17% faster for this curve. The code is shorter too.
This commit is contained in:
		
							parent
							
								
									6888167e73
								
							
						
					
					
						commit
						d1e7a45fdd
					
				
							
								
								
									
										101
									
								
								library/ecp.c
									
									
									
									
									
								
							
							
						
						
									
										101
									
								
								library/ecp.c
									
									
									
									
									
								
							@ -475,25 +475,36 @@ cleanup:
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#if defined(POLARSSL_ECP_DP_SECP192R1_ENABLED)
 | 
			
		||||
/*
 | 
			
		||||
 * 192 bits in terms of t_uint
 | 
			
		||||
 */
 | 
			
		||||
#define P192_SIZE_INT   ( 192 / CHAR_BIT / sizeof( t_uint ) )
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Table to get S1, S2, S3 of FIPS 186-3 D.2.1:
 | 
			
		||||
 * -1 means let this chunk be 0
 | 
			
		||||
 * a positive value i means A_i.
 | 
			
		||||
 */
 | 
			
		||||
#define P192_CHUNKS         3
 | 
			
		||||
#define P192_CHUNK_CHAR     ( 64 / CHAR_BIT )
 | 
			
		||||
#define P192_CHUNK_INT      ( P192_CHUNK_CHAR / sizeof( t_uint ) )
 | 
			
		||||
/* Add 64-bit chunks (dst += src) and update carry */
 | 
			
		||||
static inline void add_64( t_uint *dst, t_uint *src, t_uint *carry )
 | 
			
		||||
{
 | 
			
		||||
    unsigned char i;
 | 
			
		||||
    t_uint c = 0;
 | 
			
		||||
    for( i = 0; i < 8 / sizeof( t_uint ); i++, dst++, src++ )
 | 
			
		||||
    {
 | 
			
		||||
        *dst += c;      c  = ( *dst < c );
 | 
			
		||||
        *dst += *src;   c += ( *dst < *src );
 | 
			
		||||
    }
 | 
			
		||||
    *carry += c;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const signed char p192_tbl[][P192_CHUNKS] = {
 | 
			
		||||
    { -1,   3,  3   }, /* S1 */
 | 
			
		||||
    { 4,    4,  -1  }, /* S2 */
 | 
			
		||||
    { 5,    5,  5   }, /* S3 */
 | 
			
		||||
};
 | 
			
		||||
/* Add carry to a 64-bit chunk and update carry */
 | 
			
		||||
static inline void carry64( t_uint *dst, t_uint *carry )
 | 
			
		||||
{
 | 
			
		||||
    unsigned char i;
 | 
			
		||||
    for( i = 0; i < 8 / sizeof( t_uint ); i++, dst++ )
 | 
			
		||||
    {
 | 
			
		||||
        *dst += *carry;
 | 
			
		||||
        *carry  = ( *dst < *carry );
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define OFFSET      ( 8 / sizeof( t_uint ) )
 | 
			
		||||
#define A( i )      ( N->p + ( i ) * OFFSET )
 | 
			
		||||
#define ADD( i )    add_64( p, A( i ), &c )
 | 
			
		||||
#define NEXT        p += OFFSET; carry64( p, &c )
 | 
			
		||||
#define LAST        p += OFFSET; *p = c; while( ++p < end ) *p = 0
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Fast quasi-reduction modulo p192 (FIPS 186-3 D.2.1)
 | 
			
		||||
@ -501,53 +512,27 @@ const signed char p192_tbl[][P192_CHUNKS] = {
 | 
			
		||||
static int ecp_mod_p192( mpi *N )
 | 
			
		||||
{
 | 
			
		||||
    int ret;
 | 
			
		||||
    unsigned char i, j, offset;
 | 
			
		||||
    signed char chunk;
 | 
			
		||||
    mpi tmp, acc;
 | 
			
		||||
    t_uint tmp_p[P192_SIZE_INT], acc_p[P192_SIZE_INT + 1];
 | 
			
		||||
    t_uint c = 0;
 | 
			
		||||
    t_uint *p, *end;
 | 
			
		||||
 | 
			
		||||
    tmp.s = 1;
 | 
			
		||||
    tmp.n = sizeof( tmp_p ) / sizeof( tmp_p[0] );
 | 
			
		||||
    tmp.p = tmp_p;
 | 
			
		||||
    /* Make sure we have the correct number of blocks */
 | 
			
		||||
    MPI_CHK( mpi_grow( N, 6 * OFFSET ) );
 | 
			
		||||
    p = N->p;
 | 
			
		||||
    end = p + N->n;
 | 
			
		||||
 | 
			
		||||
    acc.s = 1;
 | 
			
		||||
    acc.n = sizeof( acc_p ) / sizeof( acc_p[0] );
 | 
			
		||||
    acc.p = acc_p;
 | 
			
		||||
 | 
			
		||||
    MPI_CHK( mpi_grow( N, P192_SIZE_INT * 2 ) );
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
     * acc = T
 | 
			
		||||
     */
 | 
			
		||||
    memset( acc_p, 0, sizeof( acc_p ) );
 | 
			
		||||
    memcpy( acc_p, N->p, P192_CHUNK_CHAR * P192_CHUNKS );
 | 
			
		||||
 | 
			
		||||
    for( i = 0; i < sizeof( p192_tbl ) / sizeof( p192_tbl[0] ); i++)
 | 
			
		||||
    {
 | 
			
		||||
        /*
 | 
			
		||||
         * tmp = S_i
 | 
			
		||||
         */
 | 
			
		||||
        memset( tmp_p, 0, sizeof( tmp_p ) );
 | 
			
		||||
        for( j = 0, offset = P192_CHUNKS - 1; j < P192_CHUNKS; j++, offset-- )
 | 
			
		||||
        {
 | 
			
		||||
            chunk = p192_tbl[i][j];
 | 
			
		||||
            if( chunk >= 0 )
 | 
			
		||||
                memcpy( tmp_p + offset * P192_CHUNK_INT,
 | 
			
		||||
                        N->p + chunk * P192_CHUNK_INT,
 | 
			
		||||
                        P192_CHUNK_CHAR );
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        /*
 | 
			
		||||
         * acc += tmp
 | 
			
		||||
         */
 | 
			
		||||
        MPI_CHK( mpi_add_abs( &acc, &acc, &tmp ) );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    MPI_CHK( mpi_copy( N, &acc ) );
 | 
			
		||||
    ADD( 3 ); ADD( 5 );             NEXT; // A0 += A3 + A5
 | 
			
		||||
    ADD( 3 ); ADD( 4 ); ADD( 5 );   NEXT; // A1 += A3 + A4 + A5
 | 
			
		||||
    ADD( 4 ); ADD( 5 );             LAST; // A2 += A4 + A5
 | 
			
		||||
 | 
			
		||||
cleanup:
 | 
			
		||||
    return( ret );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#undef OFFSET
 | 
			
		||||
#undef A
 | 
			
		||||
#undef ADD
 | 
			
		||||
#undef NEXT
 | 
			
		||||
#undef LAST
 | 
			
		||||
#endif /* POLARSSL_ECP_DP_SECP192R1_ENABLED */
 | 
			
		||||
 | 
			
		||||
#if defined(POLARSSL_ECP_DP_SECP521R1_ENABLED)
 | 
			
		||||
 | 
			
		||||
@ -253,14 +253,26 @@ ECP gen keypair
 | 
			
		||||
depends_on:POLARSSL_ECP_DP_SECP192R1_ENABLED
 | 
			
		||||
ecp_gen_keypair:POLARSSL_ECP_DP_SECP192R1
 | 
			
		||||
 | 
			
		||||
ECP mod p192 small (more than 192 bits, less limbs than 2 * 192 bits)
 | 
			
		||||
depends_on:POLARSSL_ECP_DP_SECP192R1_ENABLED
 | 
			
		||||
ecp_fast_mod:POLARSSL_ECP_DP_SECP192R1:"0100000000000103010000000000010201000000000001010100000000000100"
 | 
			
		||||
 | 
			
		||||
ECP mod p192 readable
 | 
			
		||||
depends_on:POLARSSL_ECP_DP_SECP192R1_ENABLED
 | 
			
		||||
ecp_fast_mod:POLARSSL_ECP_DP_SECP192R1:"000000000000010500000000000001040000000000000103000000000000010200000000000001010000000000000100"
 | 
			
		||||
ecp_fast_mod:POLARSSL_ECP_DP_SECP192R1:"010000000000010501000000000001040100000000000103010000000000010201000000000001010100000000000100"
 | 
			
		||||
 | 
			
		||||
ECP mod p192 readable with carry
 | 
			
		||||
depends_on:POLARSSL_ECP_DP_SECP192R1_ENABLED
 | 
			
		||||
ecp_fast_mod:POLARSSL_ECP_DP_SECP192R1:"FF00000000010500FF00000000010400FF00000000010300FF00000000010200FF00000000010100FF00000000010000"
 | 
			
		||||
 | 
			
		||||
ECP mod p192 random
 | 
			
		||||
depends_on:POLARSSL_ECP_DP_SECP192R1_ENABLED
 | 
			
		||||
ecp_fast_mod:POLARSSL_ECP_DP_SECP192R1:"36CF96B45D706A0954D89E52CE5F38517A2270E0175849B6F3740151D238CCABEF921437E475881D83BB69E4AA258EBD"
 | 
			
		||||
 | 
			
		||||
ECP mod p192 (from a past failure case)
 | 
			
		||||
depends_on:POLARSSL_ECP_DP_SECP192R1_ENABLED
 | 
			
		||||
ecp_fast_mod:POLARSSL_ECP_DP_SECP192R1:"1AC2D6F96A2A425E9DD1776DD8368D4BBC86BF4964E79FEA713583BF948BBEFF0939F96FB19EC48C585BDA6A2D35C750"
 | 
			
		||||
 | 
			
		||||
ECP test vectors secp192r1 rfc 5114
 | 
			
		||||
depends_on:POLARSSL_ECP_DP_SECP192R1_ENABLED
 | 
			
		||||
ecp_test_vect:POLARSSL_ECP_DP_SECP192R1:"323FA3169D8E9C6593F59476BC142000AB5BE0E249C43426":"CD46489ECFD6C105E7B3D32566E2B122E249ABAADD870612":"68887B4877DF51DD4DC3D6FD11F0A26F8FD3844317916E9A":"631F95BB4A67632C9C476EEE9AB695AB240A0499307FCF62":"519A121680E0045466BA21DF2EEE47F5973B500577EF13D5":"FF613AB4D64CEE3A20875BDB10F953F6B30CA072C60AA57F":"AD420182633F8526BFE954ACDA376F05E5FF4F837F54FEBE":"4371545ED772A59741D0EDA32C671112B7FDDD51461FCF32"
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user