/*\
 *
 * Ruptor's Super-Collider Version 1.2
 * Copyright (c) 2007 Marcos el Ruptor
 * Public domain for educational purposes.
 * What else is it good for anyway?
 *
 * It works on all caterpillar structures beginning with block 0,
 * with a round function of a form x[i] = f(x[i-1],x[i],x[i+1]).
 *
 * It requires a total of about 2^(AROUND*BITS/2) in resources to find a collision,
 * but certainly a lot less for simple round functions,
 * where single-bit words and reduced sets of words can be used.
 *
 * cl /Oxpb2 supercollider.c
 *
\*/

#include <stdio.h>
#include <string.h>
#include <windows.h>

#define WORDS			4096	// You can set it to 32 or even a smaller value to speed up testing although XXTEA would actually use more rounds on smaller blocks
#define BITS			8
#define USE_EXTRA_RAM

#if BITS==32
	#define AROUND		3
	#define ut			u32
	#define WS			"%08X"
	#define CHECKS		4	// how many words can we trust not to make us reencrypt the whole block too often?
#elif BITS==16
	#define AROUND		4
	#define ut			u16
	#define WS			"%04X"
	#define CHECKS		4	// how many words can we trust not to make us reencrypt the whole block too often?
#elif BITS==8
	#define AROUND		2
	#define ut			u8
	#define WS			"%02X"
	#define CHECKS		8	// how many words can we trust not to make us reencrypt the whole block too often?
#endif

#ifdef USE_EXTRA_RAM
#define PRESENT_BYTES	(1 << 29)	// not necessary at all, but speeds up the search a few extra % (just because i can)
#endif

#define u8				unsigned char
#define u16				unsigned short
#define u32				unsigned long
#define u64				unsigned __int64
#define BYTES			(WORDS*sizeof(ut))
#define UT				((ut)(1<<BITS)-1)
#define bit32(x,n)		((((x)[(n)>>5])>>((n)))&1)
#define bit(x,n)		(((x)>>((n)&31))&1)
#define set32(x,i,n)	((x)[(i)>>5]|=((u32)(n))<<((i)&31))
#define set(x,n)		((x)|=((u32)(n))<<((i)&31))
#define inv32(x,i,n)	((x)[(i)>>5]^=((u32)(n))<<((i)&31))
#define inv(x,n)		((x)^=((u32)(n))<<((i)&31))
#define rotr(x,n)		((((x)>>(n))&UT)+(((x)<<((0-(n))&(BITS-1)))&UT))
#define rf(a,b)			((rotr(a,9)*9^(b))*9)	// a sample strong combiner
static __forceinline u64 clock_counter() { __asm rdtsc }
#define rand32()		((u32) (_rnd64b += (((_rnd64a << 53) ^ (_rnd64a >> 11)) * 0x9C8F075E1A6B243DUL + 0x5AC734E821DF60B9UL) ^ clock_counter (), _rnd64a += ((_rnd64b << 59) ^ (_rnd64b >> 5)) * 0x2C0DE96B357481AFUL ^ 0xD8725E3901A4F6CBUL))
static u64				_rnd64a = 0xE4FDC25B98A63017UL;
static u64				_rnd64b = 0x5EA93C21F7604D8BUL;

// XXTEA and RUPTEST here are our lab rats.
// The actual round function is irrelevant.

#define MX ((z>>5)^(y<<2))+((y>>3)^(z<<4))^(s^y)+(k[(p^(s>>2))&3]^z);

static u32				key[4] = {0x11111111,0x22222222,0x33333333,0x44444444};	// Whatever key. Can it be recovered from partial collisions?

void xxtea (u32 *v, u32 *k, u32 n)
{
	u32	p, q, y, z, s = 0;
	
	for (q = AROUND; q; q--)
	{
		s += 0x9E3779B9;
		for (p = 0; p < n-1; p++) y = v[p+1], z = v[p] += MX;
		y = v[0], z = v[n-1] += MX;
	}
}

static void ruptest (ut * const x)	// anything of a form x[i] = f(x[i-1],x[i],x[i+1]); this one works on words of any length
{
	u32			i, j, r;
	ut			a = x[WORDS-1];
	
	for (j = 0, r = 0; j < AROUND; j++)
	{
		for (i = 0; i < WORDS-1; i++)
		x[i] = a = (ut) (x[i      ]^rf(rf(a,x[i+1]+x[(i+WORDS/8+1)%WORDS]+x[(i+WORDS-WORDS/8-1)%WORDS]+r),0x55555555))&UT, r++;
		x[i] = a = (ut) (x[WORDS-1]^rf(rf(a,x[0  ]+x[   WORDS/8         ]+x[   WORDS-WORDS/8-2       ]+r),0x55555555))&UT, r++;
	}
}

#if 0	// 1 - XXTEA, 0 - RUPTEST (good for testing it on 8-bit, 16-bit and 32-bit words)
#define encrypt(x,y)	memcpy(y,x,BYTES),xxtea(y,key,WORDS)
#else
#define encrypt(x,y)	memcpy(y,x,BYTES),ruptest(y)
#endif

static u32 equality (const ut * const x, const ut * const y, const u32 words) {u32 i,n;for(i=0,n=0;i<words;i++)n+=(x[i]==y[i]);return n;}
static void printx (const ut * const x) {u32 t; for (t = 0; t <= 7; t++) printf (" "WS, (x)[WORDS/2+t]); printf ("\n");}

// note: these sets do not depend on the size of the block or the key
#define SBS			(1<<16)			// now this is the main "memory requirement" parameter, currently set at 2^24. more requires more precomputation, less results in longer searches.
#define DIVIDER		(1<<(BITS/2))	// optimal value. less will cause more long checks, more will cause more collisions and subsequently larger MARGIN
#define MARGIN		8	// 2 for 0x80000 seems to cause no overflows over 64 elements, 8 for 0x100000...
static ut			z[SBS][AROUND];	// original random "plaintext" set
static ut			y[SBS][CHECKS];	// the set of ciphertexts to compare with
static u32			w[DIVIDER][SBS*MARGIN/DIVIDER], ws[DIVIDER];	// grouped pointers at the set of ciphertexts

#ifdef USE_EXTRA_RAM
static u32			*present;	// a bit-mask array, not absolutely necessary but speeds up the search just a little a bit more. remove it to save half a gig
#endif

// the precomputation stage, choosing a bunch of random sets of words with an all-0 block in front of them

static void setup (ut *x)
{
	u32				i, j, hw;
	ut				t[WORDS];
	
	for (i = 0; i < SBS; i++)
	{
		// The position of words depends on the round function.
		// These ones are as far as possible from the center where we check for collisions,
		// and as far as possible from the first round,
		// but every second word and other patterns can be used just as well:
		for (j = 0; j < AROUND; j++) z[i][j] = x[WORDS-4-AROUND+j] = (ut) rand32();
		encrypt (x, t);
		// hashing the center (adding is enough since it's supposed to be encrypted already)
		for (j = 0, hw = 0; j < CHECKS; j++) hw += t[WORDS/2-CHECKS/2+j]; hw &= DIVIDER-1;
		if (ws[hw] >= SBS*2/DIVIDER) continue;	// bad luck, no difference
		// each of the divided arrays will contain its set of indexes that all have the same hash
		w[hw][ws[hw]++] = i;
		// saving the center of the ciphertext too
		memcpy (y[i], t+WORDS/2-CHECKS/2, CHECKS*sizeof (ut));
		#ifdef USE_EXTRA_RAM
			// another pointer for the bit mask
			hw = * (u32 *) (t + WORDS/2);
			set32(present,hw,1);
		#endif
	}
	// this is just a debugging print-out to see how close we got to blowing the margin
	for (i = 0, j = 0; i < DIVIDER; i++) if (ws[i] > j) j = ws[i];
	printf (" %u/%u\n", j, SBS*2/DIVIDER);
}

int main (void)
{
	ut			x[WORDS], e[WORDS], t[WORDS], u[WORDS];
	u32			i, j, k, c, hw;
	
	// let's not lock the machine completely over all this useless number crunching
	SetPriorityClass (GetCurrentProcess (), IDLE_PRIORITY_CLASS);
	SetThreadPriority (GetCurrentThread (), THREAD_PRIORITY_IDLE);
	#ifdef USE_EXTRA_RAM
		present = malloc (PRESENT_BYTES);
		if (!present) { printf ("Bummer! Out of memory.\n"); return -1; }
		memset (present, 0, PRESENT_BYTES);
	#endif
	memset (z, 0, sizeof (z));
	memset (y, 0, sizeof (y));
	memset (w, 0, sizeof (w));
	memset (ws, 0, sizeof (ws));
	memset (x, 0, BYTES);	// careful: i remember that x and t have 0s in all the right words all the time
	memset (t, 0, BYTES);	// careful: i remember that x and t have 0s in all the right words all the time
	// precompute the array
	setup (x);
	// and search forever
	for (k=0;;k++)
	{
		if (!(k&0xFFFF)) printf ("\r"WS, k);
		// every set of words we try must have at least one bit in front of it to avoid accidental 100% collisions
		x[WORDS-5-AROUND] = (ut) (1 << (rand32()&(BITS-1)));
		// random works, but chosen works even better...
		// single bit changes work perfectly for causing 2-3-round collisions with simple round functions, super-fast too.
		for (j = 0; j < AROUND; j++) x[WORDS-4-AROUND+j] = (ut) rand32();
		encrypt (x, e);
		#ifdef USE_EXTRA_RAM
			// let's see first what the bit mask says, but this word is in y anyway.
			hw = * (u32 *) (e + WORDS/2);
			if (!bit32 (present, hw)) continue;
		#endif
		// the same hash of the center now
		for (j = 0, hw = 0; j < CHECKS; j++) hw += e[WORDS/2-CHECKS/2+j]; hw &= DIVIDER-1;
		// and let's see if it can be found in the right set of encrypted blocks
		for (j = 0; j < ws[hw]; j++)
		{
			i = w[hw][j];
			for (c = 0; c < CHECKS; c++) if (e[WORDS/2-CHECKS/2+c]!=y[i][c]) break;
			if (c < CHECKS) continue;
			// 4 words match, good enough. now let's re-encrypt the entire block from the set and compare them.
			for (j = 0; j < AROUND; j++) t[WORDS-4-AROUND+j] = z[i][j];
			encrypt (t, u);
			hw = equality (e, u, WORDS);
			printf ("\r"); for (j = 0; j < AROUND; j++) printf (WS" ", x[WORDS-4-AROUND+j]);
			printf ("\n"); for (j = 0; j < AROUND; j++) printf (WS" ", z[i][j]);
			printf ("\nCOLLISION in %3u words\n", hw);
			break;
		}
	}
	return 0;
}
