// PureNoise CryptoLib (c) 1997-2004, PureNoise Ltd Vaduz // Ruptor's Elliptic Curve Cryptography implementation (strongly recommended over legacy DH, RSA or El-Gamal) in pure C // The fastest Elliptic Curve implementation in the world! // Inspired by the famous Miracl Library (http://indigo.ie/~mscott) by Shamus Software Ltd // Uses and recommends Shamus Standard Curves (we believe that the US ECC standards are trapdoored by the NSA) #include #include #include #include #include "ecc.h" #include "../aes/aes.h" #undef CRYPTO_INLINE_ASM #ifndef int_abs #define int_abs(x) (((x) < 0) ? ((unsigned)(0-(signed)x)) : (x)) #endif #if (ECC_BITS == 512) unsigned long ECC_512_P [18] = { 16, 0x6D51C5EFU, 0x4FE1356DU, 0xF25F1437U, 0x302B0A6DU, 0xCD3A431BU, 0xEF9519B3U, 0x8E3404DDU, 0x514A0879U, 0x3B139B22U, 0x020BBEA6U, 0x8A67CC74U, 0x29024E08U, 0x80DC1CD1U, 0xC4C6628BU, 0x2168C234U, 0xC90FDAA2U, 0 }; // prime P unsigned long ECC_512_Q [18] = { 16, 0xF57A5F71U, 0x30D80897U, 0xDC6269B8U, 0x9188C4B7U, 0xB37E4259U, 0x987F1C9AU, 0xAFBC9EB7U, 0x5A2359C4U, 0x3B139B23U, 0x020BBEA6U, 0x8A67CC74U, 0x29024E08U, 0x80DC1CD1U, 0xC4C6628BU, 0x2168C234U, 0xC90FDAA2U, 0 }; // curve order Q unsigned long ECC_512_A [18] = { 16, 0xB54717BCU, 0x3F84D5B5U, 0xC97C50DDU, 0xC0AC29B7U, 0x34E90C6CU, 0xBE5466CFU, 0x38D01377U, 0x452821E6U, 0xEC4E6C89U, 0x082EFA98U, 0x299F31D0U, 0xA4093822U, 0x03707344U, 0x13198A2EU, 0x85A308D3U, 0x243F6A88U, 0 }; // nres (-3) unsigned long ECC_512_B [18] = { 16, 0xFC34D3F4U, 0x2912C0A4U, 0xC847F58DU, 0x1F92F60FU, 0x76AC393AU, 0xE2BAF870U, 0x1EBB511AU, 0xBBA4E880U, 0x64CC8606U, 0x6383B288U, 0xFF7F1470U, 0x4F02422CU, 0x74590763U, 0x9A6B1E0FU, 0x68EEF0E9U, 0x549966A9U, 0 }; // nres (B) unsigned long ECC_512_ONE [18] = { 16, 0x92AE3A11U, 0xB01ECA92U, 0x0DA0EBC8U, 0xCFD4F592U, 0x32C5BCE4U, 0x106AE64CU, 0x71CBFB22U, 0xAEB5F786U, 0xC4EC64DDU, 0xFDF44159U, 0x7598338BU, 0xD6FDB1F7U, 0x7F23E32EU, 0x3B399D74U, 0xDE973DCBU, 0x36F0255DU, 0 }; // nres (1) unsigned long ECC_512_N = 0xF43FF6F1U; // ECC_512_N * ECC_512_P[1] == -1; const ecc_point ECC_512_XYZ = { { 16, 0x70797243U, 0x4C2E6F74U, 0x28206269U, 0x31202963U, 0x2D373939U, 0x34303032U, 0x72755020U, 0x696F4E65U, 0x4C206573U, 0x56206474U, 0x7A756461U, 0x77772820U, 0x72632E77U, 0x6F747079U, 0x2E62696CU, 0x296D6F63U, 0, 0 }, // nres (X) <- common point, fixed for all apps, does not affect security as long as it's not 0 { 16, 0xFC9FE8DFU, 0xFEF7F18AU, 0xCFB3D483U, 0x8A7A85DDU, 0x28C3B047U, 0x70B77469U, 0x6D9DD567U, 0xF01B141EU, 0xA9AF25C8U, 0xFF014041U, 0x1C6CE0BFU, 0x1E4BC8DCU, 0x0F8096B8U, 0x0B64EBC0U, 0xB44FD846U, 0x2788A4D2U, 0, 0 }, // nres (Y) <- common point, fixed for all apps, does not affect security as long as it's not 0 { 16, 0x92AE3A11U, 0xB01ECA92U, 0x0DA0EBC8U, 0xCFD4F592U, 0x32C5BCE4U, 0x106AE64CU, 0x71CBFB22U, 0xAEB5F786U, 0xC4EC64DDU, 0xFDF44159U, 0x7598338BU, 0xD6FDB1F7U, 0x7F23E32EU, 0x3B399D74U, 0xDE973DCBU, 0x36F0255DU, 0, 0 }, // nres (1) 1 }; #elif (ECC_BITS == 256) unsigned long ECC_256_P [10] = { 8, 0x3B139C0BU, 0x020BBEA6U, 0x8A67CC74U, 0x29024E08U, 0x80DC1CD1U, 0xC4C6628BU, 0x2168C234U, 0xC90FDAA2U, 0 }; // prime P unsigned long ECC_256_Q [10] = { 8, 0x98A64437U, 0xD9CAECA1U, 0x40B37A88U, 0xFBEC42E9U, 0x80DC1CD0U, 0xC4C6628BU, 0x2168C234U, 0xC90FDAA2U, 0 }; // curve order Q unsigned long ECC_256_A [10] = { 8, 0xEC4E702CU, 0x082EFA98U, 0x299F31D0U, 0xA4093822U, 0x03707344U, 0x13198A2EU, 0x85A308D3U, 0x243F6A88U, 0 }; // nres (-3) unsigned long ECC_256_B [10] = { 8, 0x9CC3E93CU, 0x42398461U, 0xB4E43BD3U, 0x7CAF7AE4U, 0x93ECFC7DU, 0xBC835F52U, 0x1E0B6189U, 0x750534BFU, 0 }; // nres (B) unsigned long ECC_256_ONE [10] = { 8, 0xC4EC63F5U, 0xFDF44159U, 0x7598338BU, 0xD6FDB1F7U, 0x7F23E32EU, 0x3B399D74U, 0xDE973DCBU, 0x36F0255DU, 0 }; // nres (1) unsigned long ECC_256_N = 0xFE82F05DU; // ECC_512_N * ECC_512_P[1] == -1; const ecc_point ECC_256_XYZ = { { 8, 0x70797243U, 0x4C2E6F74U, 0x28206269U, 0x77202963U, 0x632E7777U, 0x74707972U, 0x62696C6FU, 0x6D6F632EU, 0, 0 }, // nres (X) <- common point, fixed for all apps, does not affect security as long as it's not 0 { 8, 0x53F78AC0U, 0xB72D544AU, 0x317CBF02U, 0x6BB138CDU, 0x43902EEBU, 0xA8A1445BU, 0xD6444496U, 0x776F1E82U, 0, 0 }, // nres (Y) <- common point, fixed for all apps, does not affect security as long as it's not 0 { 8, 0xC4EC63F5U, 0xFDF44159U, 0x7598338BU, 0xD6FDB1F7U, 0x7F23E32EU, 0x3B399D74U, 0xDE973DCBU, 0x36F0255DU, 0, 0 }, // nres (1) 1 }; #endif const unsigned long ECC_1[3] = { 1, 1, 0 }; const unsigned long ECC_2[3] = { 1, 2, 0 }; #define big_size(x) (((x)[0]>1)?((unsigned)(0-(signed)1)):(((x)[0]==1)?(x)[1]:0)) #define big_modsub(x,y,w) { if (big_compare(x,y)>=0) big_psub(x,y,w); else { big_psub(y,x,w); big_psub(ECC_PRIME,w,w); } } // w = x - y mod modulus static void big_shift (const unsigned long *x, int n, unsigned long *w) { // set w = x * (big_base^n) by shifting int i, bl; big_copy (x, w); if ((w[0]==0) || (n==0)) return; bl = (int) w[0] + n; if (bl <= 0) { w[0] = 0; w[1] = 0; return; } if (n > 0) { for (i = bl; i > n; i--) w[i] = w[i - n]; for (i = 1; i <= n; i++) w[i] = 0; } else { n = -n; for (i = 1; i <= bl; i++) w[i] = w[i + n]; for (i = 1; i <= n; i++) w[bl + i] = 0; } w[(w[0] = bl) + 1] = 0; } void big_padd (const unsigned long *x, const unsigned long *y, unsigned long *z) { unsigned long lx = x[0], ly = y[0], i = 1, carry = 0, psum, la, lz, f = 0; const unsigned long *r; if (ly > lx) { la = lx; lz = ly; r = y; if (x != z) big_copy (y, z); else f++; } else { la = ly; lz = lx; r = x; if (y != z) big_copy (x, z); else f++; } z[z[0] = lz + 1] = 0; for (; i <= la; i++) { // add by columns psum = x[i] + y[i] + carry; if (psum > x[i]) carry = 0; else if (psum < x[i]) carry = 1; z[i] = psum; } for (; (i <= lz) && (f || carry); i++) { // copy/carry the rest psum = r[i] + carry; if (psum > r[i]) carry = 0; else if (psum < r[i]) carry = 1; z[i] = psum; } if ((i > lz) && carry) { z[i] = carry; z[i+1] = 0; } else z[0]--; } void big_psub (const unsigned long *x,const unsigned long *y,unsigned long *z) { unsigned long lx = x[0], ly = y[0], i = 1, borrow = 0, pdiff; if (y != z) big_copy (x, z); z[0] = lx; for (; i <= ly; i++) { // subtract by columns pdiff = x[i] - y[i] - borrow; if (pdiff < x[i]) borrow = 0; else if (pdiff > x[i]) borrow = 1; z[i] = pdiff; } for (; (i <= lx) && ((y == z) || borrow); i++) { // copy/borrow the rest pdiff = x[i] - borrow; if (pdiff < x[i]) borrow = 0; else if (pdiff > x[i]) borrow = 1; z[i] = pdiff; } big_lzero (z); z[z[0] + 1] = 0; } #ifdef _MSC_VER #pragma warning (push) #pragma warning (disable:4731) #endif void big_pmul (const unsigned long *x, const unsigned long sn, unsigned long *z) { unsigned long m,xl; #if (CRYPTO_INLINE_ASM != 3) && (CRYPTO_INLINE_ASM != 4) OCTET p = {{0}}; #define carry p.D[ord2(1)] #else unsigned long carry = 0; #endif if (x != z) { z[0] = 0; z[1] = 0; if (sn == 0) return; } else if (sn == 0) { z[0] = 0; z[1] = 0; return; } else if (sn == 1) { big_copy (x, z); return; } m = 0; xl = x[0]; // inline 80x86 assembly - substitutes for loop below #if (CRYPTO_INLINE_ASM == 3) ASM mov ecx,xl ASM or ecx,ecx ASM je _arth1_out1 ASM mov ebx,sn ASM mov edi,z ASM mov esi,x ASM add esi,4 ASM add edi,4 ASM push ebp ASM xor ebp,ebp _arth1_tcl1: ASM mov eax,[esi] ASM add esi,4 ASM mul ebx ASM add eax,ebp ASM adc edx,0 ASM mov [edi],eax ASM add edi,4 ASM mov ebp,edx ASM dec ecx ASM jnz _arth1_tcl1 ASM mov eax,ebp ASM pop ebp ASM mov carry,eax _arth1_out1: #elif (CRYPTO_INLINE_ASM == 4) ASM ("\n" "movl %4,%%ecx\n" "orl %%ecx,%%ecx\n" "je 1f\n" "movl %3,%%ebx\n" "movl %1,%%edi\n" "movl %2,%%esi\n" "addl $4,%%esi\n" "addl $4,%%edi\n" "pushl %%ebp\n" "xorl %%ebp,%%ebp\n" "0:\n" "movl (%%esi),%%eax\n" "addl $4,%%esi\n" "mull %%ebx\n" "addl %%ebp,%%eax\n" "adcl $0,%%edx\n" "movl %%eax,(%%edi)\n" "addl $4,%%edi\n" "movl %%edx,%%ebp\n" "decl %%ecx\n" "jnz 0b\n" "movl %%ebp,%%eax\n" "popl %%ebp\n" "movl %%eax,%0\n" "1:" :"=g"(carry) :"g"(z),"g"(x),"g"(sn),"g"(xl) :"eax","edi","esi","ebx","ecx","edx","memory" ); #else for (m = 1; m <= xl; m++) { p.Q[0] = (unsigned __int64) x[m] * sn + p.D[ord2(1)]; z[m] = p.D[ord2(0)]; } #endif if (carry) { z[++xl] = carry; } z[0] = xl; z[xl + 1] = 0; #ifdef carry #undef carry #endif } unsigned long big_sdiv (const unsigned long *x, const unsigned long sn, unsigned long *z) { unsigned long xl = x[0]; #if (CRYPTO_INLINE_ASM != 3) && (CRYPTO_INLINE_ASM != 4) unsigned long i; OCTET p = {{0}}; #define sr p.D[ord2(1)] #else unsigned long sr; #endif if (sn==1) // special case { big_copy (x, z); return 0; } if (x != z) { z[0] = 0; z[1] = 0; } // inline - substitutes for loop below #if CRYPTO_INLINE_ASM == 3 ASM mov ecx,xl ASM or ecx,ecx ASM je _arth1_out2 ASM mov ebx,ecx ASM shl ebx,2 ASM mov esi, x ASM add esi,ebx ASM mov edi, z ASM add edi,ebx ASM mov ebx,sn ASM push ebp ASM xor ebp,ebp _arth1_tcl2: ASM mov edx,ebp ASM mov eax,[esi] ASM sub esi,4 ASM div ebx ASM mov ebp,edx ASM mov [edi],eax ASM sub edi,4 ASM dec ecx ASM jnz _arth1_tcl2 ASM mov eax,ebp ASM pop ebp ASM mov sr,eax _arth1_out2:; #elif CRYPTO_INLINE_ASM == 4 ASM ("\n" "movl %4,%%ecx\n" "orl %%ecx,%%ecx\n" "je 3f\n" "movl %%ecx,%%ebx\n" "shll $2,%%ebx\n" "movl %2,%%esi\n" "addl %%ebx,%%esi\n" "movl %1,%%edi\n" "addl %%ebx,%%edi\n" "movl %3,%%ebx\n" "pushl %%ebp\n" "xorl %%ebp,%%ebp\n" "2:\n" "movl %%ebp,%%edx\n" "movl (%%esi),%%eax\n" "subl $4,%%esi\n" "divl %%ebx\n" "movl %%edx,%%ebp\n" "movl %%eax,(%%edi)\n" "subl $4,%%edi\n" "decl %%ecx\n" "jnz 2b\n" "movl %%ebp,%%eax\n" "popl %%ebp\n" "movl %%eax,%0\n" "3:\n" :"=g"(sr) :"g"(z),"g"(x),"g"(sn),"g"(xl) :"eax","edi","esi","ebx","ecx","edx","memory" ); #else for (i = xl; i > 0; i--) { p.D[ord2(0)] = x[i]; z[i] = (unsigned long)(p.Q[0] / sn); p.D[ord2(1)] = (unsigned long)(p.Q[0] % sn); } #endif z[0] = x[0]; big_lzero (z); z[z[0] + 1] = 0; return sr; #ifdef sr #undef sr #endif } #ifdef _MSC_VER #pragma warning (pop) #endif static __forceinline unsigned long big_normalise (const unsigned long *x, unsigned long *y) { // normalise divisor register unsigned long len = x[0]; register unsigned long r = x[len] + 1; register unsigned long norm = (r == 0) ? 1 : (unsigned long) (((unsigned __int64) 1UL << (sizeof (unsigned long)*8)) / r); if (norm != 1) big_pmul (x, norm, y); else if (x != y) big_copy (x, y); return norm; } #ifdef _MSC_VER #pragma warning (push) #pragma warning (disable:4731) #endif void big_multiply (const unsigned long *x, const unsigned long *y, unsigned long *z) { int i, xl, yl; unsigned long w0[ECC_WORDS * 2 + 2]; #if (CRYPTO_INLINE_ASM != 3) && (CRYPTO_INLINE_ASM != 4) int ti, j; OCTET p; #endif if (y[0]==0 || x[0]==0) { z[0] = 0; z[1] = 0; return; } xl = (int) x[0]; yl = (int) y[0]; bzero (w0, sizeof(w0)); if (x==y) { // fast squaring for (i=1;i= 0) { // unsigned long quotient - so do up to four subtracts instead big_psub (w0, y, w0); d++; } } if (big_compare (w0, y) < 0) { // x less than y - so x becomes remainder if (x != z) // testing parameters { big_copy (w0, x); } if (y != z) { z[0] = (d != 0); z[1] = d; } return; } if (y0==1) { // y is int - so use subdiv instead p.D[ord2(0)] = big_sdiv (w0, y[1], w0); if (y != z) { big_copy (w0, z); } if (x != z) { x[0] = (p.D[ord2(0)] != 0); x[1] = p.D[ord2(0)]; } return; } if (y!=z) { z[0] = 0; z[1] = 0; } ldy = y[y0]; sdy = y[y0-1]; for (k = w00; k >= y0; k--) { // long division #if CRYPTO_INLINE_ASM == 3 ASM push esi ASM push edi ASM lea ebx,w0 ASM mov esi,k ASM shl esi,2 ASM add ebx,esi ASM mov edx,[ebx+4] ASM mov eax,[ebx] ASM cmp edx,ldy ASM jne _arth2_tcl8 ASM mov edi,0xffffffff ASM mov esi,eax ASM add esi,ldy ASM jc _arth2_tcl12 ASM jmp _arth2_tcl10 _arth2_tcl8: ASM div DWORD PTR ldy ASM mov edi,eax ASM mov esi,edx _arth2_tcl10: ASM mov eax,sdy ASM mul edi ASM cmp edx,esi ASM jb _arth2_tcl12 ASM jne _arth2_tcl11 ASM cmp eax,[ebx-4] ASM jbe _arth2_tcl12 _arth2_tcl11: ASM dec edi ASM add esi,ldy ASM jnc _arth2_tcl10 _arth2_tcl12: ASM mov trying,edi ASM pop edi ASM pop esi #elif CRYPTO_INLINE_ASM == 4 ASM ("\n" "leal %1,%%ebx\n" "movl %2,%%esi\n" "shll $2,%%esi\n" "addl %%esi,%%ebx\n" "movl 4(%%ebx),%%edx\n" "movl (%%ebx),%%eax\n" "cmpl %3,%%edx\n" "jne 8f\n" "movl $0xffffffffUL,%%edi\n" "movl %%eax,%%esi\n" "addl %3,%%esi\n" "jc 12f\n" "jmp 10f\n" "8:\n" "divl %3\n" "movl %%eax,%%edi\n" "movl %%edx,%%esi\n" "10:\n" "movl %4,%%eax\n" "mull %%edi\n" "cmpl %%esi,%%edx\n" "jb 12f\n" "jne 11f\n" "cmpl -4(%%ebx),%%eax\n" "jbe 12f\n" "11:\n" "decl %%edi\n" "addl %3,%%esi\n" "jnc 10b\n" "12:\n" "movl %%edi,%0\n" "\n" :"=g"(trying) :"g"(w0),"g"(k),"g"(ldy),"g"(sdy) :"eax","edi","esi","ebx","ecx","edx","memory" ); #else carry = 0; if (w0[k+1] == ldy) // guess next quotient digit { trying = (unsigned long) (-1); ra = ldy + w0[k]; if (ra < ldy) carry = 1; } else { p.D[ord2(0)] = w0[k]; p.D[ord2(1)] = w0[k+1]; trying = (unsigned long) (p.Q[0] / ldy); ra = (unsigned long) (p.Q[0] % ldy); } while (carry == 0) { p.Q[0] = (unsigned __int64) sdy * trying; if ((p.D[ord2(1)] < ra) || ((p.D[ord2(1)] == ra) && (p.D[ord2(0)] <= w0[k-1]))) { break; } trying--; // refine guess ra += ldy; if (ra < ldy) carry = 1; } #endif m = k - y0; if (trying > 0) { // do partial subtraction borrow = 0; // inline - substitutes for loop below #if CRYPTO_INLINE_ASM == 3 ASM push esi ASM push edi ASM mov ecx,y0 ASM mov esi,m ASM shl esi,2 ASM mov edi,trying ASM lea ebx,w0 ASM add ebx,esi ASM mov esi,y ASM add esi,4 ASM sub ebx,esi ASM push ebp ASM xor ebp,ebp _arth2_tcl3: ASM mov eax,[esi] ASM add esi,4 ASM mul edi ASM add eax,ebp ASM mov ebp,[esi+ebx] ASM adc edx,0 ASM sub ebp,eax ASM adc edx,0 ASM mov [esi+ebx],ebp ASM dec ecx ASM mov ebp,edx ASM jnz _arth2_tcl3 ASM mov eax,ebp ASM pop ebp ASM mov borrow,eax ASM pop edi ASM pop esi #elif CRYPTO_INLINE_ASM == 4 ASM ("\n" "movl %1,%%ecx\n" "movl %2,%%esi\n" "shll $2,%%esi\n" "movl %3,%%edi\n" "leal %4,%%ebx\n" "addl %%esi,%%ebx\n" "movl %5,%%esi\n" "addl $4,%%esi\n" "subl %%esi,%%ebx\n" "pushl %%ebp\n" "xorl %%ebp,%%ebp\n" "3:\n" "movl (%%esi),%%eax\n" "addl $4,%%esi\n" "mull %%edi\n" "addl %%ebp,%%eax\n" "movl (%%esi,%%ebx),%%ebp\n" "adcl $0,%%edx\n" "subl %%eax,%%ebp\n" "adcl $0,%%edx\n" "movl %%ebp,(%%esi,%%ebx)\n" "decl %%ecx\n" "movl %%edx,%%ebp\n" "jnz 3b\n" "movl %%ebp,%%eax\n" "popl %%ebp\n" "movl %%eax,%0\n" "\n" :"=g"(borrow) :"g"(y0),"g"(m),"g"(trying),"g"(w0),"g"(y) :"eax","edi","esi","ebx","ecx","edx","memory" ); #else for (i = 1; i <= y0; i++) { p.Q[0] = (unsigned __int64) trying * y[i] + p.D[ord2(1)]; if (w0[m+i] < p.D[ord2(0)]) p.D[ord2(1)]++; w0[m+i] -= p.D[ord2(0)]; } #endif if (w0[k+1] < borrow) { // whoops! - over did it w0[k+1] = 0; carry = 0; for (i = 1; i <= y0; i++) { // compensate for error ... psum = w0[m+i] + y[i] + carry; if (psum > y[i]) carry = 0; else if (psum < y[i]) carry = 1; w0[m+i] = psum; } trying--; // ... and adjust guess } else w0[k+1] -= borrow; } if ((k == w00) && (trying == 0)) w00--; else if (y != z) z[m+1] = trying; } #ifdef borrow #undef borrow #endif if (y != z) { z[0] = w00 - y0 + 1; // set length of result big_lzero (z); z[z[0] + 1] = 0; } w0[0] = y0; if (x != z) { big_lzero (w0); big_copy (w0, x); x[x[0] + 1] = 0; } } #ifdef _MSC_VER #pragma warning (pop) #endif void big_mad (const unsigned long *x, const unsigned long *y, const unsigned long *z, const unsigned long *w, unsigned long *q, unsigned long *r) { unsigned long w0[ECC_WORDS * 2 + 2]; w0[0] = ECC_WORDS * 2 + 1; big_multiply (x, y, w0); if ((x != z) && (y != z)) big_padd (w0, z, w0); // exactly why ECC_WORDS * 2 + 2 and not ECC_WORDS * 2 + 1 big_divide (w0, w, q); if (q != r) big_copy (w0, r); } void big_nmad (const unsigned long *x, const unsigned long *y, const unsigned long *z, const unsigned long *w, unsigned long *q, unsigned long *r) { unsigned long w0[ECC_WORDS * 2 + 2]; w0[0] = ECC_WORDS * 2 + 1; big_multiply (x, y, w0); if ((x != z) && (y != z)) big_psub (z, w0, w0); big_divide (w0, w, q); if (q != r) big_copy (w0, r); } static __forceinline void big_sftbit(const unsigned long *x,const int n,unsigned long *z) { // shift x by n bits int m; big_copy (x, z); if (n==0) return; m = int_abs (n); if (n > 0) { // shift left big_shift (z ,n / (sizeof (unsigned long)*8),z); big_pmul (z, (unsigned long) 1 << (m % (sizeof (unsigned long)*8)),z); } else { // shift right big_shift (z, n / (sizeof (unsigned long)*8),z); big_sdiv (z, (unsigned long) 1 << (m % (sizeof (unsigned long)*8)),z); } } void big_power (const unsigned long *x, unsigned long n, unsigned long *y) { // raise big number to int power y = x ^ n mod modulus unsigned long w0[ECC_WORDS * 2 + 2]; w0[0] = ECC_WORDS * 2 + 1; big_copy (x, w0); y[0] = 0; y[1] = 0; if (w0[0] == 0) return; big_convert (1, y); for (; n; n >>= 1, big_multiply (w0, w0, w0)) { if (n & 1) { big_multiply (y, w0, y); // "Russian peasant" exponentiation } } } void big_nres (const unsigned long *x, unsigned long *y, unsigned long *z) { // convert x to n-residue format mod z unsigned long w0[ECC_WORDS * 2 + 2]; w0[0] = ECC_WORDS * 2 + 1; big_copy (x, w0); big_divide (w0, z, z); big_shift (x, (int) z[0], w0); big_divide (w0, z, z); big_copy (w0, y); } #ifdef _MSC_VER #pragma warning (push) #pragma warning (disable:4731) #endif void big_redc (const unsigned long *x, unsigned long *y, const unsigned long *z, const unsigned long ndash) { // Montgomery's REDC function: y = redc(x) mod z; // also used to convert n-residues back to normal form // ndash = inv32 (-z[1]); unsigned long delay_carry; int i, rn, rn2p1; unsigned long w0[ECC_WORDS * 2 + 3]; #if (CRYPTO_INLINE_ASM != 3) && (CRYPTO_INLINE_ASM != 4) OCTET p, m; int j; #endif big_copy (x, w0); bzero (w0 + x[0] + 2, (ECC_WORDS * 2 - x[0]) * sizeof (unsigned long)); delay_carry = 0; rn = (int) z[0]; rn2p1 = rn+rn+1; for (i = 1; i <= rn; i++) { // inline - substitutes for loop below #if CRYPTO_INLINE_ASM == 3 ASM mov ecx,rn ASM mov esi,i ASM shl esi,2 ASM lea ebx,w0 ASM add ebx,esi ASM mov edi,[ebx] ASM imul edi,ndash ASM mov esi,z ASM add esi,4 ASM sub ebx,esi ASM sub ebx,4 ASM push ebp ASM xor ebp,ebp _monty_m1: ASM mov eax,[esi] ASM add esi,4 ASM mul edi ASM add eax,ebp ASM mov ebp,[esi+ebx] ASM adc edx,0 ASM add ebp,eax ASM adc edx,0 ASM mov [esi+ebx],ebp ASM dec ecx ASM mov ebp,edx ASM jnz _monty_m1 ASM pop ebp ASM mov eax,delay_carry ASM add [esi+ebx+4],eax ASM mov eax,0 ASM adc eax,0 ASM add [esi+ebx+4],edx ASM adc eax,0 ASM mov delay_carry,eax #elif CRYPTO_INLINE_ASM == 4 ASM __volatile__ ("\n" "movl %0,%%ecx\n" "movl %1,%%esi\n" "shll $2,%%esi\n" "leal %2,%%ebx\n" "addl %%esi,%%ebx\n" "movl (%%ebx),%%edi\n" "imull %3,%%edi\n" "leal %4,%%esi\n" "addl $4,%%esi\n" "subl %%esi,%%ebx\n" "subl $4,%%ebx\n" "pushl %%ebp\n" "xorl %%ebp,%%ebp\n" "1:\n" "movl (%%esi),%%eax\n" "addl $4,%%esi\n" "mull %%edi\n" "addl %%ebp,%%eax\n" "movl (%%esi,%%ebx),%%ebp\n" "adcl $0,%%edx\n" "addl %%eax,%%ebp\n" "adcl $0,%%edx\n" "movl %%ebp,(%%esi,%%ebx)\n" "decl %%ecx\n" "movl %%edx,%%ebp\n" "jnz 1b\n" "popl %%ebp\n" "movl %5,%%eax\n" "addl %%eax,4(%%esi,%%ebx)\n" "movl $0,%%eax\n" "adcl $0,%%eax\n" "addl %%edx,4(%%esi,%%ebx)\n" "adcl $0,%%eax\n" "movl %%eax,%5\n" "\n" : :"g"(rn),"g"(i),"g"(w0),"g"(ndash),"g"(z),"g"(delay_carry) :"eax","edi","esi","ebx","ecx","edx","memory" ); #else m.Q[0] = ndash * w0[i]; // Note that after this time around the loop, w0[i]=0 p.D[ord2(1)] = 0; for (j=1;j<=rn;j++) { p.Q[0] = m.Q[0] * z[j] + p.D[ord2(1)] + w0[i+j-1]; w0[i+j-1] = p.D[ord2(0)]; } w0[rn+i] += delay_carry; delay_carry = (w0[rn+i] < delay_carry); w0[rn+i] += p.D[ord2(1)]; if (w0[rn+i] < p.D[ord2(1)]) delay_carry = 1; #endif // CRYPTO_INLINE_ASM } w0[rn2p1] = delay_carry; w0[0] = rn2p1; big_lzero (w0); big_shift (w0, (-rn), w0); if (big_compare (w0, z) >= 0) big_psub (w0, z, y); else big_copy (w0, y); } #ifdef _MSC_VER #pragma warning (pop) #endif static int big_window (const unsigned long *x, const int i, int *nbs, int *nzs) { // returns sliding window value, max. of 5 bits, // starting at i-th bit of big x. nbs is number of bits // processed, nzs is the number of additional trailing // zeros detected. Returns valid bit pattern 1x..x1 with // no two adjacent 0's. So 10101 will return 21 with // nbs=5, nzs=0. 11001 will return 3, with nbs=2, nzs=2, // having stopped after the first 11.. int j,r,w; w=5; // check for leading 0 bit *nbs=1; *nzs=0; if (!big_testbit(x,i)) return 0; // adjust window size if not enough bits left if (i-w+1<0) w=i+1; r=1; for (j=i-1;j>i-w;j--) { // accumulate bits. Abort if two 0's in a row (*nbs)++; r*=2; if (big_testbit(x,j)) r+=1; if (r%4==0) { // oops - too many zeros - shorten window r/=4; *nbs-=2; *nzs=2; break; } } if (r%2==0) { // remove trailing 0 r/=2; *nzs=1; (*nbs)--; } return r; } static int big_window2 (const unsigned long *x, const unsigned long *y, const int i, int *nbs, int *nzs) { // two bit window for double exponentiation int r,w = 2,a,b,c,d; *nbs=1; *nzs=0; // check for two leading 0's a=big_testbit(x,i); b=big_testbit(y,i); if (!a && !b) return 0; if (i<1) w=1; if (a) { if (b) r=3; else r=2; } else r=1; if (w==1) return r; c=big_testbit(x,i-1); d=big_testbit(y,i-1); if (!c && !d) { *nzs=1; return r; } *nbs=2; r*=4; if (c) { if (d) r+=3; else r+=2; } else r+=1; return r; } void nres_powltr (const unsigned long x, const unsigned long *y, unsigned long *w, unsigned long *z, unsigned long ndash) { // calculates w=x^y mod z using Left to Right Method // uses only n^2 modular squarings, because x is small // Note: x is NOT an nresidue unsigned long i, nb = y[0]; unsigned long w1[ECC_WORDS + 2]; if ((x == 0) && (nb != 0)) { w[0] = 0; w[1] = 0; return; } if (nb == 0) { big_copy (ECC_ONE, z); return; } for (i = y[nb], nb *= (sizeof (unsigned long)*8); (signed) i > 0; nb--) i <<= 1; big_copy (y, w1); big_convert (x, w); big_nres (w, w, z); if (nb > 1) for (i = nb - 2; (signed) i >= 0; i--) { // Left to Right binary method nres_modmult (w, w, w, z, ndash); if (big_testbit (w1, i)) { // this is quick big_pmul (w, x, w); // another reason why we need ECC_WORDS + 2 and not ECC_WORDS + 1 big_divide (w, z, z); } } } void nres_powmod (const unsigned long *x, const unsigned long *y, unsigned long *w, unsigned long *z, const unsigned long ndash) { // calculates w=x^y mod z, using n-residues // See "Analysis of Sliding Window Techniques for // Exponentiation, C.K. Koc, Computers. Math. & // Applic. Vol. 30 pp17-24 1995. Uses work-space // variables for pre-computed table. int i,j,k,t,nb,nbw,nzs,n; unsigned long *table[16]; unsigned long w1 [ECC_WORDS + 3]; unsigned long w2 [ECC_WORDS + 3]; unsigned long w3 [ECC_WORDS + 3]; unsigned long w4 [ECC_WORDS + 3]; unsigned long w5 [ECC_WORDS + 3]; unsigned long w5d[ECC_WORDS + 3]; unsigned long w6 [ECC_WORDS + 3]; unsigned long w6d[ECC_WORDS + 3]; unsigned long w8 [ECC_WORDS + 3]; unsigned long w9 [ECC_WORDS + 3]; unsigned long w10[ECC_WORDS + 3]; unsigned long w11[ECC_WORDS + 3]; unsigned long w12[ECC_WORDS + 3]; unsigned long w13[ECC_WORDS + 3]; if ((x[0] == 0) && (y[0] != 0)) { w[0] = 0; w[1] = 0; return; } if (y[0] == 0) { big_convert (1, w); big_nres (w, w, z); return; } big_copy (y, w1); big_copy (x, w3); // build a table. Up to 5-bit sliding windows. Windows with // two adjacent 0 bits simply won't happen table[ 0]=w3; table[ 1]=w4; table[ 2]=w5; table[ 3]=w5d; table[ 4]=NULL; table[ 5]=w6; table[ 6]=w6d; table[ 7]=w8; table[ 8]=NULL; table[ 9]=NULL; table[10]=w9; table[11]=w10; table[12]=NULL; table[13]=w11; table[14]=w12; table[15]=w13; nres_modmult (w3, w3, w2, z, ndash); // x^2 n = 15; j = 0; do { // pre-computations t = 1; k = j + 1; while (table[k] == NULL) {k++; t++;} big_copy (table[j], table[k]); for (i = 0; i < t; i++) nres_modmult (table[k], w2, table[k], z, ndash); j = k; } while (j < n); nb = big_logb2 (w1); big_copy (w3, w); if (nb > 1) for (i = nb - 2; i >=0; ) { // Left to Right method n = big_window (w1, i, &nbw, &nzs); for (j = 0; j < nbw; j++) { nres_modmult (w, w, w, z, ndash); } if (n > 0) { nres_modmult (w, table[n/2], w, z, ndash); } i -= nbw; if (nzs) { for (j = 0; j < nzs; j++) { nres_modmult (w, w, w, z, ndash); } i -= nzs; } } } void nres_powmod2 (const unsigned long *x, const unsigned long *y, const unsigned long *a, const unsigned long *b, unsigned long *w) { // finds w = x^y.a^b mod z. Fast for some cryptosystems int i,j,nb,nb2,nbw,nzs,n; unsigned long *table[16]; unsigned long w1 [ECC_WORDS + 3]; unsigned long w2 [ECC_WORDS + 3]; unsigned long w3 [ECC_WORDS + 3]; unsigned long w4 [ECC_WORDS + 3]; unsigned long w5 [ECC_WORDS + 3]; unsigned long w5d[ECC_WORDS + 3]; unsigned long w6 [ECC_WORDS + 3]; unsigned long w6d[ECC_WORDS + 3]; unsigned long w8 [ECC_WORDS + 3]; unsigned long w9 [ECC_WORDS + 3]; unsigned long w10[ECC_WORDS + 3]; unsigned long w11[ECC_WORDS + 3]; unsigned long w12[ECC_WORDS + 3]; unsigned long w13[ECC_WORDS + 3]; //unsigned long w7 [ECC_WORDS + 3]; //unsigned long w14[ECC_WORDS + 3]; //unsigned long w15[ECC_WORDS + 3]; big_copy (y, w1); big_copy (x, w2); big_copy (b, w3); big_copy (a, w4); w[0] = 0; w[1] = 0; if ((w2[0] == 0) || (w4[0] == 0)) return; big_convert (1, w); big_nres (w, w, (unsigned long *) ECC_PRIME); if ((w1[0] == 0) && (w3[0] == 0)) { return; } // uses 2-bit sliding window. This is 25% faster! nres_ecc_modmult (w2, w4, w5); nres_ecc_modmult (w2, w2, w12); nres_ecc_modmult (w4, w4, w13); nres_ecc_modmult (w4, w13, w5d); nres_ecc_modmult (w2, w13, w6); nres_ecc_modmult (w6, w4, w6d); nres_ecc_modmult (w4, w12, w8); nres_ecc_modmult (w2, w12, w9); nres_ecc_modmult (w4, w9, w10); nres_ecc_modmult (w5d, w12, w11); nres_ecc_modmult (w9, w13, w12); nres_ecc_modmult (w10, w13, w13); table[ 0]=NULL; table[ 1]=w4; table[ 2]=w2; table[ 3]=w5; table[ 4]=NULL; table[ 5]=w5d; table[ 6]=w6; table[ 7]=w6d; table[ 8]=NULL; table[ 9]=w8; table[10]=w9; table[11]=w10; table[12]=NULL; table[13]=w11; table[14]=w12; table[15]=w13; nb=big_logb2(w1); if ((nb2=big_logb2(w3))>nb) nb=nb2; for (i=nb-1;i>=0;) { n=big_window2(w1,w3,i,&nbw,&nzs); for (j=0;j0) nres_ecc_modmult (w, table[n], w); i-=nbw; if (nzs) { nres_ecc_modmult (w, w, w); i--; } } } static unsigned long ECC_FIVE[3] = { 1, 5, 0 }; static unsigned long nres_sqroot (const unsigned long *x, unsigned long *w) { // w = sqrt (x) mod p. This depends on p being prime! //unsigned long cat; unsigned long w14[ECC_WORDS + 3], w1[ECC_WORDS + 3], w4[ECC_WORDS + 3], w15[ECC_WORDS + 3], w2[ECC_WORDS + 3], e, n, r, i; if (x[0] == 0) goto _bye; if (big_compare (x, ECC_ONE) == 0) { big_copy (ECC_ONE, w); return 1; } switch (ECC_NDASH & 7) { case 5: case 1: // the easiest cases big_padd (ECC_PRIME, ECC_1, w14); // that's why w14 must be ECC_WORDS + 2 and not ECC_WORDS + 1 big_sdiv (w14, 4, w14); // big_shr (w14, 2); nres_powmod (x, w14, w, ECC_PRIME, ECC_NDASH); // w = (p+1) / 4 * x nres_ecc_modmult (w, w, w14); // w = ((p+1) / 4 * x) ^ 2 break; case 3: // fairly easy case big_modadd (x, x, w15); // x * 2 big_psub (ECC_PRIME, ECC_FIVE, w14); // w14 = p - 5 big_sdiv (w14, 8, w14); // big_shr (w14, 3); nres_powmod (w15, w14, w, ECC_PRIME, ECC_NDASH); // w = x * 2 / ((p - 5) / 8) nres_ecc_modmult (w, w, w14); // w14 = w ^ 2 nres_ecc_modmult (w15, w14, w14); // w14 *= x * 2 big_modsub (w14, ECC_ONE, w14); // w14-- nres_ecc_modmult (w14, w, w); // w14 *= w if (w15[1] & 1) big_padd (w15, ECC_PRIME, w15); big_sdiv (w15, 2, w15); // big_shr (w15, 1); nres_ecc_modmult (w, w15, w); nres_ecc_modmult (w, w, w14); if (big_compare (w14, w15) == 0) return 1; goto _bye; case 7: // difficult case. Shank's method big_psub (ECC_PRIME, ECC_1, w14); e = 0; while ((w14[1] & 1) == 0) { big_sdiv (w14, 2, w14); // big_shr (w14, 1); e++; } for (r = 2; ; r++) { nres_powltr (r, w14, w, (unsigned long *) ECC_PRIME, ECC_NDASH); if (big_compare (w, ECC_ONE) == 0) continue; big_copy (w, w4); big_psub (ECC_PRIME, ECC_ONE, w1); // w1 = -1 mod big_modulus n = 0; for (i = 1; i < e; i++) { // check for composite big_modulus if (big_compare (w4, w1) == 0) n = 1; nres_ecc_modmult (w4, w4, w4); if (!n && (big_compare (w4, ECC_ONE) == 0)) goto _bye; } if (big_compare (w4, w1) == 0) break; if (!n) goto _bye; } // w = y nres_powmod (x, w14, w15, ECC_PRIME, ECC_NDASH); // w15 = w big_padd (w14, ECC_1, w14); // w14++ big_sdiv (w14, 2, w14); // big_shr (w14, 1); nres_powmod (x, w14, w14, ECC_PRIME, ECC_NDASH); // w14 = v for (;;) { if (big_compare (w, ECC_ONE) == 0) break; big_copy (w15, w2); for (n = 0; big_compare (w2, ECC_ONE) != 0; n++) nres_ecc_modmult (w2, w2, w2); if (n >= e) goto _bye; r = e - n - 1; for (i = 0; i < r; i++) nres_ecc_modmult (w, w, w); nres_ecc_modmult (w14, w, w14); nres_ecc_modmult (w, w, w); nres_ecc_modmult (w15, w, w15); e = n; } big_copy (w14, w); nres_ecc_modmult (w, w, w14); break; default: goto _bye; // 0, 2, 4, 6: even modulus can't be } if (big_compare (w14, x) == 0) return 1; _bye: w[0] = 0; w[1] = 0; return 0; } unsigned long big_inverse (const unsigned long *x, unsigned long *y) { // Euclids method extended to calculate multiplicative inverse // x * xd = 1 mod ECC_PRIME int q; unsigned long m, a = 0, b = 0, c = 0, d = 0, r, s1 = 1, s2 = 1, s3 = 1, s4 = 1, dplus = 1; unsigned long w[6][ECC_WORDS + 3]; unsigned long *w0 = w[0], *w1 = w[1], *w2 = w[2], *w3 = w[3], *w4 = w[4], *w5 = w[5]; OCTET u,v,lq,lr,p; int last = 0; unsigned long *t; bzero (w, sizeof (w)); big_copy (x, w[1]); big_copy (ECC_PRIME, w[2]); big_convert (1, w[3]); // w[4][0] = 0; w[4][1] = 0; while (w2[0] != 0) { if (b == 0) { // update w1 and w2 big_divide (w1, w2, w5); t = w1; w1 = w2; w2 = t; // swap (x, y) r = s1; s1 = s2; s2 = r; // swap (x, y) signs big_multiply (w4, w5, w0); if (s1 ^ s2 ^ s3 ^ s4) // w3 and (w1*w2*w4) have opposite signs { big_padd (w3, w0, w3); } // sign of s3 doesn't change else if (big_compare (w3, w0) > 0) { big_psub (w3, w0, w3); } // sign of s3 doesn't change as it was greater else { big_psub (w0, w3, w3); s3 ^= 1; } t = w3; w3 = w4; w4 = t; // swap (xd, yd) r = s3; s3 = s4; s4 = r; // swap (xd, yd) signs } else { big_pmul (w1, c, w5); // w5 = w1*c big_pmul (w1, a, w1); // w1 = w1*a big_pmul (w2, b, w0); // w0 = w2*b big_pmul (w2, d, w2); // w2 = w2*d m = s1 ^ s2; if (m) // w1*a and w2*b have the same sign? w1*c and w2*d have the same sign? { big_padd (w1, w0, w1); s1 ^= !dplus; big_padd (w2, w5, w2); s2 ^= !dplus; } else { if (big_compare (w1, w0) > 0) { big_psub (w1, w0, w1); s1 ^= !dplus; } else { big_psub (w0, w1, w1); s1 ^= dplus; } if (big_compare (w2, w5) > 0) { big_psub (w2, w5, w2); s2 ^= !dplus; } else { big_psub (w5, w2, w2); s2 ^= dplus; } } big_pmul (w3, c, w5); // w5 = w3*c big_pmul (w3, a, w3); // w3 = w3*a big_pmul (w4, b, w0); // w0 = w4*b big_pmul (w4, d, w4); // w4 = w4*d m = s3 ^ s4; if (m) // w3*a and w4*b have the same sign? w3*c and w4*d have the same sign? { big_padd (w3, w0, w3); s3 ^= !dplus; big_padd (w4, w5, w4); s4 ^= !dplus; } else { if (big_compare (w3, w0) > 0) { big_psub (w3, w0, w3); s3 ^= !dplus; } else { big_psub (w0, w3, w3); s3 ^= dplus; } if (big_compare (w4, w5) > 0) { big_psub (w4, w5, w4); s4 ^= !dplus; } else { big_psub (w5, w4, w4); s4 ^= dplus; } } } if (w2[0] == 0) break; r = (int) w1[0]; if (r == 1) { last = 1; u.Q[0] = w1[1]; v.Q[0] = w2[1]; } else { m = w1[r]+1; if ((r > 2) && (m != 0)) { // squeeze out as much significance as possible p.D[ord2(1)] = w1[r]; p.D[ord2(0)] = w1[r-1]; u.D[ord2(1)] = (unsigned long) (p.Q[0] / m); p.D[ord2(1)] = (unsigned long) (p.Q[0] % m); p.D[ord2(0)] = w1[r-2]; u.D[ord2(0)] = (unsigned long) (p.Q[0] / m); p.D[ord2(1)] = w2[r]; p.D[ord2(0)] = w2[r-1]; v.D[ord2(1)] = (unsigned long) (p.Q[0] / m); p.D[ord2(1)] = (unsigned long) (p.Q[0] % m); p.D[ord2(0)] = w2[r-2]; v.D[ord2(0)] = (unsigned long) (p.Q[0] / m); } else { u.D[ord2(1)] = w1[r]; u.D[ord2(0)] = w1[r-1]; v.D[ord2(1)] = w2[r]; v.D[ord2(0)] = w2[r-1]; if (r == 2) last = 1; } } a = 1; b = 0; c = 0; d = 1; dplus = 1; for (;;) { // work only with most significant piece if (last) { if (v.Q[0] == 0) break; lq.Q[0] = u.Q[0] / v.Q[0]; } else { if (dplus) { if (((v.Q[0] - c) == 0) || ((v.Q[0] + d) == 0)) break; lq.Q[0] = (u.Q[0] + a) / (v.Q[0] - c); if (lq.Q[0] != (u.Q[0] - b) / (v.Q[0] + d)) break; } else { if (((v.Q[0] + c) == 0) || ((v.Q[0] - d) == 0)) break; lq.Q[0] = (u.Q[0] - a) / (v.Q[0] + c); if (lq.Q[0] != (u.Q[0] + b) / (v.Q[0] - d)) break; } } if (lq.Q[0] >= ((((unsigned)(0-(signed)1))>>1)+1-b) / int_abs(d)) { break; } q=lq.D[ord2(0)]; r=a+q*c; a=c; c=r; r=b+q*d; b=d; d=r; lr.Q[0] = u.Q[0] - q * v.Q[0]; u.Q[0] = v.Q[0]; v.Q[0] = lr.Q[0]; dplus = !dplus; } } if ((w3[0] == 0) || (s3 == 0)) big_psub (ECC_PRIME, w3, y); else big_copy (w3, y); return big_size(w1); } static __forceinline unsigned long nres_moddiv (const unsigned long *x, const unsigned long *y, unsigned long *w) { register unsigned long gcd; unsigned long w0[ECC_WORDS + 3]; big_redc (y, w0, ECC_PRIME, ECC_NDASH); gcd = big_inverse (w0, w0); big_mad (x, w0, x, ECC_PRIME, (unsigned long *) ECC_PRIME, w); return gcd; } // Modular division using n-residues w = x / y mod modulus ////\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//// //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\// ////\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//// //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\// ////\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//// //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\/ ELLIPTIC CURVE FUNCTIONS \//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\// ////\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//// //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\// ////\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//// //\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\// ////\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//\\//// int ecc_point_set (const unsigned long *x, ecc_point *p) { // initialise a point on the curve // if x == NULL, set to point at infinity // the y co-ordinate is calculated if valid. // Returns 1 for valid point, otherwise 0. unsigned long w0[ECC_WORDS + 3]; unsigned long y_sign; if (x == NULL) { big_copy (ECC_ONE, p->X); big_copy (ECC_ONE, p->Y); big_copy (ECC_ONE, p->Z); p->marker = ECC_POINT_INFINITY; return 1; } big_copy (ECC_ONE, p->Z); p->marker = ECC_POINT_NORMALIZED; // find y^2 = (x^2-3)*x+B y_sign = x[x[0] + 1]; // the sign is kept as the last word after the X coordinate. it's not copied but reset to 0 by big_copy! big_copy (x, p->X); // x nres_ecc_modmult (p->X, p->X, w0); // x^2 big_modadd (w0, ECC_A, w0); // x^2 + (-3) nres_ecc_modmult (w0, p->X, w0); // (x^2 + (-3)) * x big_modadd (w0, ECC_B, w0); // (x^2 + (-3)) * x + B if (!nres_sqroot (w0, p->Y)) // w0 = y^2 ? return 0; // it isn't :( big_redc (p->Y, w0, ECC_PRIME, ECC_NDASH); // check LSB if ((w0[1] ^ y_sign) & 1) // is it the right root? { big_psub (ECC_PRIME, p->Y, w0); // it is now big_copy (w0, p->Y); } return 1; } static void ecc_point_double (ecc_point *p) { // double ecc_point on elliptic curve unsigned long w[4][ECC_WORDS + 3]; if (p->marker == ECC_POINT_INFINITY) return; // 2 times infinity == infinity ! if (p->Y[0] == 0) { // set to point at infinity ecc_point_set (NULL, p); return; } if (p->marker == ECC_POINT_NORMALIZED) { big_copy (ECC_ONE, w[0]); } else { nres_ecc_modmult (p->Z, p->Z, w[0]); } big_modsub (p->X, w[0], w[1]); big_modadd (p->X, w[0], w[2]); nres_ecc_modmult(w[1], w[2], w[1]); big_modadd (w[1], w[1], w[2]); big_modadd (w[2], w[1], w[2]); nres_ecc_modmult(p->Y, p->Z, p->Z); big_modadd (p->Z, p->Z, p->Z); nres_ecc_modmult(p->Y, p->Y, w[0]); nres_ecc_modmult(p->X, w[0], w[1]); big_modadd (w[1], w[1], w[1]); big_modadd (w[1], w[1], w[1]); nres_ecc_modmult(w[2], w[2], w[3]); big_modadd (w[1], w[1], p->X); big_modsub (w[3], p->X, p->X); nres_ecc_modmult(w[0], w[0], w[0]); big_modadd (w[0], w[0], w[0]); big_modadd (w[0], w[0], w[0]); big_modadd (w[0], w[0], w[0]); big_modsub (w[1], p->X, w[1]); nres_ecc_modmult(w[1], w[2], w[1]); big_modsub (w[1], w[0], p->Y); p->marker = ECC_POINT_GENERAL; } static int ecc_point_padd (const ecc_point *p, ecc_point *pa) { // primitive add two points on elliptic curve - pa+=p; // note that if p is normalized, its Z coordinate isn't used unsigned long w[6][ECC_WORDS + 3]; if (p->marker != ECC_POINT_NORMALIZED) { nres_ecc_modmult (p->Z, p->Z, w[0]); nres_ecc_modmult (pa->X, w[0], w[1]); nres_ecc_modmult (w[0], p->Z, w[0]); nres_ecc_modmult (pa->Y, w[0], w[2]); } else { big_copy (pa->X, w[1]); big_copy (pa->Y, w[2]); } nres_ecc_modmult(pa->Z, pa->Z, w[0]); nres_ecc_modmult(p->X, w[0], w[4]); nres_ecc_modmult(w[0], pa->Z, w[0]); nres_ecc_modmult(p->Y, w[0], w[5]); big_modsub (w[1], w[4], w[1]); big_modsub (w[2], w[5], w[2]); if (w[1][0] == 0) { if (w[2][0] == 0) { // should have doubled ! return 0; } else { // point at infinity ecc_point_set (NULL, pa); return 1; } } big_modadd (w[4], w[4], w[0]); big_modadd (w[1], w[0], w[4]); big_modadd (w[5], w[5], w[0]); big_modadd (w[2], w[0], w[5]); if (p->marker != ECC_POINT_NORMALIZED) { nres_ecc_modmult (pa->Z, p->Z, w[3]); nres_ecc_modmult (w[3], w[1], pa->Z); } else { nres_ecc_modmult (pa->Z, w[1], pa->Z); } nres_ecc_modmult(w[1], w[1], w[0]); nres_ecc_modmult(w[1], w[0], w[1]); nres_ecc_modmult(w[0], w[4], w[0]); nres_ecc_modmult(w[2], w[2], w[4]); big_modsub (w[4], w[0], pa->X); big_modsub (w[0], pa->X, w[0]); big_modsub (w[0], pa->X, w[0]); nres_ecc_modmult(w[2], w[0], w[2]); nres_ecc_modmult(w[1], w[5], w[1]); big_modsub (w[2], w[1], w[5]); // divide by 2 if (big_sdiv (w[5], 2, pa->Y) != 0) { big_padd (ECC_PRIME, w[5], w[5]); // result might be ECC_WORDS + 2 long, not usual ECC_WORDS big_sdiv (w[5], 2, pa->Y); } pa->marker = ECC_POINT_GENERAL; return 1; } void ecc_point_add (const ecc_point *pa, ecc_point *pb) { // pb += pa; if (pa == pb) { ecc_point_double(pb); return; } if (pb->marker == ECC_POINT_INFINITY) { ecc_point_copy (pa, pb); return; } if (pa->marker == ECC_POINT_INFINITY) { return; } if (!ecc_point_padd (pa, pb)) ecc_point_double (pb); } static __forceinline void ecc_point_negate (ecc_point *p) { // negate a point if (p->marker == ECC_POINT_INFINITY) return; big_psub (ECC_PRIME, p->Y, p->Y); } static __forceinline void ecc_point_sub (ecc_point *p, ecc_point *pa) { if (p == pa) { ecc_point_set (NULL, pa); return; } if (p->marker == ECC_POINT_INFINITY) { return; } ecc_point_negate (p); ecc_point_add (p, pa); ecc_point_negate (p); } static int naf_window (const unsigned long *x, const unsigned long *x3, const int i, int *nbs, int *nzs) { // returns sliding window value, max of 5 bits // starting at i-th bit of x. nbs is number of bits // processed. nzs is number of additional trailing // zeros detected. x and x3 (which is 3*x) are // combined to produce the NAF (non-adjacent form) // So if x=11011(27) and x3 is 1010001, the LSB is // ignored and the value 100T0T (32-4-1=27) processed, // where T is -1. Note x.P = (3x-x)/2.P. This value will // return +7, with nbs=4 and nzs=1, having stopped after // the first 4 bits. Note in an NAF non-zero elements // are never side by side, so 10T10T won't happen int w = 5; int j = i - 1; int nb = big_testbit(x3,i) - big_testbit(x,i); // get first bit int r = (nb > 0) ? 1 : -1; int last; *nbs=1; *nzs=0; if (nb==0) return 0; if (i <= w) { w = i; last = 1; } else { last = 0; } for (; j > i - w; j--) // scan the bits { (*nbs)++; r <<= 1; nb = big_testbit (x3, j) - big_testbit (x, j); if (nb==0) continue; if (nb>0) r+=1; if (nb<0) r-=1; } if (!last && (r & 1)) (*nzs)++; while ((r & 1) == 0) // remove trailing zeros { r >>= 1; (*nzs)++; (*nbs)--; } return r; } int ecc_point_norm (ecc_point *p) { // normalise a point unsigned long w[2][ECC_WORDS + 3]; if (p->marker != ECC_POINT_GENERAL) return 1; if (nres_moddiv (ECC_ONE, p->Z, w[0]) > 1) // 1 / Z { return 0; } nres_ecc_modmult (w[0], w[0], w[1]); // 1 / ZZ nres_ecc_modmult (p->X, w[1], p->X); // X / ZZ nres_ecc_modmult (w[1], w[0], w[1]); // 1 / ZZZ nres_ecc_modmult (p->Y, w[1], p->Y); // Y / ZZZ big_copy (ECC_ONE, p->Z); p->marker = ECC_POINT_NORMALIZED; return 1; } void ecc_point_mult (const unsigned long *e, const ecc_point *p_in, ecc_point *p_out) { int i,j,n,nb,nbs,nzs; ecc_point p, table[11]; unsigned long w[2][ECC_WORDS + 3]; if (e[0] == 0) { // multiplied by 0 ecc_point_set (NULL, p_out); return; } big_copy (e, w[0]); // will add trailing zero to w[0] ecc_point_copy (p_in, p_out); // will add trailing zeroes to p_out's coordinates ecc_point_norm (p_out); if ((w[0][0] == 1) && (w[0][1] != 0)) { return; } big_pmul (w[0], 3, w[1]); // h = 3 * e (might happen to be 1 word longer) ecc_point_copy (p_out, &p); ecc_point_copy (&p, &table[0]); ecc_point_double (&p); for (i = 1; i <= 10; i++) { // precomputation ecc_point_copy (&table[i-1], &table[i]); ecc_point_add (&p, &table[i]); } // note that normalising this table doesn't really help nb = big_logb2 (w[1]); for (i = nb - 2; i >=1; ) { // add/subtract n = naf_window (w[0], w[1], i, &nbs, &nzs); for (j = 0; j < nbs; j++) ecc_point_double (p_out); if (n > 0) ecc_point_add (&table[n/2], p_out); else if (n < 0) ecc_point_sub (&table[(-n)/2], p_out); i -= nbs; if (nzs) { for (j = 0; j < nzs; j++) ecc_point_double (p_out); i -= nzs; } } } /* static __forceinline unsigned long big_shr (unsigned long *x, unsigned long n) { // shifts big number x right by n bits (n < (sizeof (unsigned long)*8)) #ifdef CRYPTO_NOASM #error NO C CODE FOR big_shr YET! #elif CRYPTO_INLINE_ASM == 3 ASM { mov esi,x mov ecx,n mov ebx,[esi] lea edi,[esi + 4] and ecx,0x1F mov edx,[edi] //jz __chc1 dec ebx //jz __chc2 __chc3: mov eax,edx mov edx,[edi + 4] shrd eax,edx,cl dec ebx mov [edi],eax lea edi,[edi + 4] jnz __chc3 //__chc2: shr edx,cl mov [edi],edx jnz __chc1 dec dword ptr [esi] __chc1: } #else #error NO ASSEMBLY VERSION OF big_shr FOR THIS PROCESSOR YET! #endif } */