/** * \brief Multi-precision integer library, ESP32 hardware accelerated parts * * based on mbedTLS implementation * * Copyright (C) 2006-2015, ARM Limited, All Rights Reserved * Additions Copyright (C) 2016, Espressif Systems (Shanghai) PTE Ltd * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ #include #include #include #include "mbedtls/bignum.h" #include "mbedtls/bn_mul.h" #include "rom/bigint.h" #if defined(MBEDTLS_MPI_MUL_MPI_ALT) || defined(MBEDTLS_MPI_EXP_MOD_ALT) /* Constants from mbedTLS bignum.c */ #define ciL (sizeof(mbedtls_mpi_uint)) /* chars in limb */ #define biL (ciL << 3) /* bits in limb */ static _lock_t mpi_lock; /* At the moment these hardware locking functions aren't exposed publically for MPI. If you want to use the ROM bigint functions and co-exist with mbedTLS, please raise a feature request. */ static void esp_mpi_acquire_hardware( void ) { /* newlib locks lazy initialize on ESP-IDF */ _lock_acquire(&mpi_lock); ets_bigint_enable(); } static void esp_mpi_release_hardware( void ) { ets_bigint_disable(); _lock_release(&mpi_lock); } /* * Helper for mbedtls_mpi multiplication * copied/trimmed from mbedtls bignum.c */ static void mpi_mul_hlp( size_t i, mbedtls_mpi_uint *s, mbedtls_mpi_uint *d, mbedtls_mpi_uint b ) { mbedtls_mpi_uint c = 0, t = 0; for( ; i >= 16; i -= 16 ) { MULADDC_INIT MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_STOP } for( ; i >= 8; i -= 8 ) { MULADDC_INIT MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_CORE MULADDC_STOP } for( ; i > 0; i-- ) { MULADDC_INIT MULADDC_CORE MULADDC_STOP } t++; do { *d += c; c = ( *d < c ); d++; } while( c != 0 ); } /* * Helper for mbedtls_mpi subtraction * Copied/adapter from mbedTLS bignum.c */ static void mpi_sub_hlp( size_t n, mbedtls_mpi_uint *s, mbedtls_mpi_uint *d ) { size_t i; mbedtls_mpi_uint c, z; for( i = c = 0; i < n; i++, s++, d++ ) { z = ( *d < c ); *d -= c; c = ( *d < *s ) + z; *d -= *s; } while( c != 0 ) { z = ( *d < c ); *d -= c; c = z; i++; d++; } } /* The following 3 Montgomery arithmetic function are copied from mbedTLS bigint.c verbatim as they are static. TODO: find a way to support making the versions in mbedtls non-static. */ /* * Fast Montgomery initialization (thanks to Tom St Denis) */ static void mpi_montg_init( mbedtls_mpi_uint *mm, const mbedtls_mpi *N ) { mbedtls_mpi_uint x, m0 = N->p[0]; unsigned int i; x = m0; x += ( ( m0 + 2 ) & 4 ) << 1; for( i = biL; i >= 8; i /= 2 ) x *= ( 2 - ( m0 * x ) ); *mm = ~x + 1; } /* * Montgomery multiplication: A = A * B * R^-1 mod N (HAC 14.36) */ static int mpi_montmul( mbedtls_mpi *A, const mbedtls_mpi *B, const mbedtls_mpi *N, mbedtls_mpi_uint mm, const mbedtls_mpi *T ) { size_t i, n, m; mbedtls_mpi_uint u0, u1, *d; if( T->n < N->n + 1 || T->p == NULL ) return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); memset( T->p, 0, T->n * ciL ); d = T->p; n = N->n; m = ( B->n < n ) ? B->n : n; for( i = 0; i < n; i++ ) { /* * T = (T + u0*B + u1*N) / 2^biL */ u0 = A->p[i]; u1 = ( d[0] + u0 * B->p[0] ) * mm; mpi_mul_hlp( m, B->p, d, u0 ); mpi_mul_hlp( n, N->p, d, u1 ); *d++ = u0; d[n + 1] = 0; } memcpy( A->p, d, ( n + 1 ) * ciL ); if( mbedtls_mpi_cmp_abs( A, N ) >= 0 ) mpi_sub_hlp( n, N->p, A->p ); else /* prevent timing attacks */ mpi_sub_hlp( n, A->p, T->p ); return( 0 ); } /* * Montgomery reduction: A = A * R^-1 mod N */ static int mpi_montred( mbedtls_mpi *A, const mbedtls_mpi *N, mbedtls_mpi_uint mm, const mbedtls_mpi *T ) { mbedtls_mpi_uint z = 1; mbedtls_mpi U; U.n = U.s = (int) z; U.p = &z; return( mpi_montmul( A, &U, N, mm, T ) ); } /* Allocate parameters used by hardware MPI multiply, and copy mbedtls_mpi structures into them */ static int mul_pram_alloc(const mbedtls_mpi *A, const mbedtls_mpi *B, char **pA, char **pB, char **pX, size_t *bites) { char *sa, *sb, *sx; // int algn; int words, bytes; int abytes, bbytes; if (A->n > B->n) words = A->n; else words = B->n; bytes = (words / 16 + ((words % 16) ? 1 : 0 )) * 16 * 4 * 2; abytes = A->n * 4; bbytes = B->n * 4; sa = malloc(bytes); if (!sa) { return -1; } sb = malloc(bytes); if (!sb) { free(sa); return -1; } sx = malloc(bytes); if (!sx) { free(sa); free(sb); return -1; } memcpy(sa, A->p, abytes); memset(sa + abytes, 0, bytes - abytes); memcpy(sb, B->p, bbytes); memset(sb + bbytes, 0, bytes - bbytes); *pA = sa; *pB = sb; *pX = sx; *bites = bytes * 4; return 0; } #if defined(MBEDTLS_MPI_MUL_MPI_ALT) int mbedtls_mpi_mul_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B ) { int ret = -1; size_t i, j; char *s1 = NULL, *s2 = NULL, *dest = NULL; size_t bites; mbedtls_mpi TA, TB; mbedtls_mpi_init( &TA ); mbedtls_mpi_init( &TB ); if( X == A ) { MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TA, A ) ); A = &TA; } if( X == B ) { MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) ); B = &TB; } for( i = A->n; i > 0; i-- ) if( A->p[i - 1] != 0 ) break; for( j = B->n; j > 0; j-- ) if( B->p[j - 1] != 0 ) break; MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i + j ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) ); if (mul_pram_alloc(A, B, &s1, &s2, &dest, &bites)) { goto cleanup; } esp_mpi_acquire_hardware(); if (ets_bigint_mult_prepare((uint32_t *)s1, (uint32_t *)s2, bites)){ ets_bigint_wait_finish(); if (ets_bigint_mult_getz((uint32_t *)dest, bites) == true) { memcpy(X->p, dest, (i + j) * 4); ret = 0; } else { printf("ets_bigint_mult_getz failed\n"); } } else{ printf("Baseline multiplication failed\n"); } esp_mpi_release_hardware(); X->s = A->s * B->s; free(s1); free(s2); free(dest); cleanup: mbedtls_mpi_free( &TB ); mbedtls_mpi_free( &TA ); return( ret ); } #endif /* MBEDTLS_MPI_MUL_MPI_ALT */ #if defined(MBEDTLS_MPI_EXP_MOD_ALT) /* * Sliding-window exponentiation: X = A^E mod N (HAC 14.85) */ int mbedtls_mpi_exp_mod( mbedtls_mpi* X, const mbedtls_mpi* A, const mbedtls_mpi* E, const mbedtls_mpi* N, mbedtls_mpi* _RR ) { int ret; size_t wbits, wsize, one = 1; size_t i, j, nblimbs; size_t bufsize, nbits; mbedtls_mpi_uint ei, mm, state; mbedtls_mpi RR, T, W[ 2 << MBEDTLS_MPI_WINDOW_SIZE ], Apos; int neg; if( mbedtls_mpi_cmp_int( N, 0 ) < 0 || ( N->p[0] & 1 ) == 0 ) return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); if( mbedtls_mpi_cmp_int( E, 0 ) < 0 ) return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); /* * Init temps and window size */ mpi_montg_init( &mm, N ); mbedtls_mpi_init( &RR ); mbedtls_mpi_init( &T ); mbedtls_mpi_init( &Apos ); memset( W, 0, sizeof( W ) ); i = mbedtls_mpi_bitlen( E ); wsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 : ( i > 79 ) ? 4 : ( i > 23 ) ? 3 : 1; if( wsize > MBEDTLS_MPI_WINDOW_SIZE ) wsize = MBEDTLS_MPI_WINDOW_SIZE; j = N->n + 1; MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, j ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[1], j ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T, j * 2 ) ); /* * Compensate for negative A (and correct at the end) */ neg = ( A->s == -1 ); if( neg ) { MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &Apos, A ) ); Apos.s = 1; A = &Apos; } /* * If 1st call, pre-compute R^2 mod N */ if( _RR == NULL || _RR->p == NULL ) { MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &RR, 1 ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &RR, N->n * 2 * biL ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &RR, &RR, N ) ); if( _RR != NULL ) memcpy( _RR, &RR, sizeof( mbedtls_mpi) ); } else memcpy( &RR, _RR, sizeof( mbedtls_mpi) ); /* * W[1] = A * R^2 * R^-1 mod N = A * R mod N */ if( mbedtls_mpi_cmp_mpi( A, N ) >= 0 ) MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &W[1], A, N ) ); else MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[1], A ) ); mpi_montmul( &W[1], &RR, N, mm, &T ); /* * X = R^2 * R^-1 mod N = R mod N */ MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, &RR ) ); mpi_montred( X, N, mm, &T ); if( wsize > 1 ) { /* * W[1 << (wsize - 1)] = W[1] ^ (wsize - 1) */ j = one << ( wsize - 1 ); MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[j], N->n + 1 ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[j], &W[1] ) ); for( i = 0; i < wsize - 1; i++ ) mpi_montmul( &W[j], &W[j], N, mm, &T ); /* * W[i] = W[i - 1] * W[1] */ for( i = j + 1; i < ( one << wsize ); i++ ) { MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[i], N->n + 1 ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[i], &W[i - 1] ) ); mpi_montmul( &W[i], &W[1], N, mm, &T ); } } nblimbs = E->n; bufsize = 0; nbits = 0; wbits = 0; state = 0; while( 1 ) { if( bufsize == 0 ) { if( nblimbs == 0 ) break; nblimbs--; bufsize = sizeof( mbedtls_mpi_uint ) << 3; } bufsize--; ei = (E->p[nblimbs] >> bufsize) & 1; /* * skip leading 0s */ if( ei == 0 && state == 0 ) continue; if( ei == 0 && state == 1 ) { /* * out of window, square X */ mpi_montmul( X, X, N, mm, &T ); continue; } /* * add ei to current window */ state = 2; nbits++; wbits |= ( ei << ( wsize - nbits ) ); if( nbits == wsize ) { /* * X = X^wsize R^-1 mod N */ for( i = 0; i < wsize; i++ ) mpi_montmul( X, X, N, mm, &T ); /* * X = X * W[wbits] R^-1 mod N */ mpi_montmul( X, &W[wbits], N, mm, &T ); state--; nbits = 0; wbits = 0; } } /* * process the remaining bits */ for( i = 0; i < nbits; i++ ) { mpi_montmul( X, X, N, mm, &T ); wbits <<= 1; if( ( wbits & ( one << wsize ) ) != 0 ) mpi_montmul( X, &W[1], N, mm, &T ); } /* * X = A^E * R * R^-1 mod N = A^E mod N */ mpi_montred( X, N, mm, &T ); if( neg ) { X->s = -1; MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( X, N, X ) ); } cleanup: for( i = ( one << ( wsize - 1 ) ); i < ( one << wsize ); i++ ) mbedtls_mpi_free( &W[i] ); mbedtls_mpi_free( &W[1] ); mbedtls_mpi_free( &T ); mbedtls_mpi_free( &Apos ); if( _RR == NULL || _RR->p == NULL ) mbedtls_mpi_free( &RR ); return( ret ); } #endif /* MBEDTLS_MPI_EXP_MOD_ALT */ #endif /* MBEDTLS_MPI_MUL_MPI_ALT || MBEDTLS_MPI_EXP_MOD_ALT */