diff -urpN john-1.7.9-jumbo-5/doc/README-CUDA john-1.7.9-jumbo-5-cuda-2/doc/README-CUDA
--- john-1.7.9-jumbo-5/doc/README-CUDA	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/doc/README-CUDA	2012-02-17 02:28:07.205475338 +0000
@@ -0,0 +1,14 @@
+README CUDA:
+If you have problems with JtR CUDA support:
+  Please check that paths to yours CUDA library are proper in Makefile:
+    CUDAPATH = /usr/local/cuda/lib
+    CUDA64PATH = /usr/local/cuda/lib64
+  Please check that yours PATH contains cuda lib,bin and include paths.
+  Please check that yours LD_LIBRARY_PATH contains cuda lib path
+
+Performance issues:
+  If you have got Fermi or newer card change "-arch sm_10" to "-arch sm_20" in the NVCC_FLAGS (Makefile).
+  Default THREADS and BLOCKS settings might not be optimal.
+  To get better performance you can experiment with THREADS and BLOCKS macros defined for each format in cuda*.h file.
+
+You can contact me at lukas[dot]odzioba[at]gmail[dot]com or john-dev mailing list
diff -urpN john-1.7.9-jumbo-5/src/Makefile john-1.7.9-jumbo-5-cuda-2/src/Makefile
--- john-1.7.9-jumbo-5/src/Makefile	2011-12-16 19:12:33.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/Makefile	2012-02-17 15:43:10.197474790 +0000
@@ -23,6 +23,11 @@ CPPFLAGS = -E
 #CC = mpicc -DHAVE_MPI -DJOHN_MPI_BARRIER -DJOHN_MPI_ABORT
 #MPIOBJ = john-mpi.o
 
+CUDAPATH = /usr/local/cuda/lib
+CUDA64PATH = /usr/local/cuda/lib64
+NVCC_FLAGS = -c -Xptxas -v -arch sm_10
+
+
 OMPFLAGS =
 # gcc with OpenMP
 #OMPFLAGS = -fopenmp
@@ -91,6 +96,17 @@ JOHN_OBJS = \
 	undrop.o \
 	unique.o
 
+CUDA_OBJS = \
+	cuda_common.o \
+	cuda_cryptmd5_fmt.o cuda_cryptmd5.o \
+	cuda_phpass_fmt.o cuda_phpass.o \
+	cuda_cryptsha256_fmt.o cuda_cryptsha256.o \
+	cuda_cryptsha512_fmt.o cuda_cryptsha512.o \
+	cuda_mscash2_fmt.o cuda_mscash2.o \
+	cuda_rawsha256_fmt.o cuda_rawsha256.o \
+	cuda_rawsha224_fmt.o cuda_rawsha224.o \
+	cuda_mscash_fmt.o cuda_mscash.o
+
 BENCH_DES_OBJS_ORIG = \
 	DES_fmt.o DES_std.o
 
@@ -142,6 +158,7 @@ default:
 	@echo "	make clean SYSTEM"
 	@echo "where SYSTEM can be one of the following:"
 	@echo "([i] is an optional letter for pre-built intrinsics, eg. -sse2i vs -sse2):"
+	@echo "linux-x86-64-cuda        Linux, x86-64 with SSE2 and CUDA (experimental)"
 	@echo "linux-x86-64-avx         Linux, x86-64 with AVX (2011+ Intel CPUs)"
 	@echo "linux-x86-64-xop         Linux, x86-64 with AVX and XOP (2011+ AMD CPUs)"
 	@echo "linux-x86-64[i]          Linux, x86-64 with SSE2 (most common)"
@@ -150,6 +167,7 @@ default:
 #	@echo "linux-x86-64-32-sse2[i]  Linux, x86-64, 32-bit with SSE2 (for regression tests)"
 #	@echo "linux-x86-64-32-mmx      Linux, x86-64, 32-bit with MMX (for regression tests)"
 #	@echo "linux-x86-64-32-any      Linux, x86-64, 32-bit (for regression tests)"
+	@echo "linux-x86-cuda           Linux, x86 32-bit with SSE2 and CUDA (experimental)"
 	@echo "linux-x86-sse2[i]        Linux, x86 32-bit with SSE2 (most common, 32-bit)"
 	@echo "linux-x86-mmx            Linux, x86 32-bit with MMX (for old computers)"
 	@echo "linux-x86-any            Linux, x86 32-bit (for truly ancient computers)"
@@ -240,6 +258,15 @@ linux-x86-64-xop:
 		ASFLAGS="$(ASFLAGS) -mxop" \
 		LDFLAGS="$(LDFLAGS) -lcrypt -ldl"
 
+
+linux-x86-64-cuda:
+	$(LN) x86-64.h arch.h
+	@echo "#define JOHN_BLD" '"'$@'"' > john_build_rule.h
+	$(MAKE) $(PROJ) \
+		JOHN_OBJS="$(JOHN_OBJS) $(CUDA_OBJS) c3_fmt.o x86-64.o sse-intrinsics.o" \
+		CFLAGS="$(CFLAGS) -DHAVE_CRYPT -DHAVE_DL -DHAVE_CUDA" \
+		LDFLAGS="$(LDFLAGS) -L$(CUDA64PATH) -lcrypt -ldl -lcudart"
+
 linux-x86-64:
 	$(LN) x86-64.h arch.h
 	@echo "#define JOHN_BLD" '"'$@'"' > john_build_rule.h
@@ -330,6 +357,14 @@ linux-x86-xop:
 		ASFLAGS="$(ASFLAGS) -m32 -mxop" \
 		LDFLAGS="$(LDFLAGS) -m32 -lcrypt"
 
+linux-x86-cuda:
+	$(LN) x86-sse.h arch.h
+	@echo "#define JOHN_BLD" '"'$@'"' > john_build_rule.h
+	$(MAKE) $(PROJ) \
+		JOHN_OBJS="$(JOHN_OBJS) $(CUDA_OBJS) c3_fmt.o x86.o x86-sse.o sha1-mmx.o md4-mmx.o md5-mmx.o" \
+		CFLAGS="$(CFLAGS) -DHAVE_CRYPT -DHAVE_DL -DHAVE_CUDA" \
+		LDFLAGS="$(LDFLAGS) -L$(CUDAPATH) -lcrypt -ldl -lcudart"
+
 linux-x86-sse2:
 	$(LN) x86-sse.h arch.h
 	@echo "#define JOHN_BLD" '"'$@'"' > john_build_rule.h
@@ -1148,6 +1183,67 @@ generic.h:
 bench: $(BENCH_OBJS)
 	$(LD) $(BENCH_OBJS) $(LDFLAGS) -o bench
 
+
+cuda_common.o:	cuda/cuda_common.cuh cuda/cuda_common.cu
+	cd cuda; nvcc $(NVCC_FLAGS) cuda_common.cu
+	mv cuda/cuda_common.o cuda_common.o
+
+cuda_cryptmd5.o:  cuda_cryptmd5.h cuda/cryptmd5.cu
+	cd cuda; nvcc $(NVCC_FLAGS) cryptmd5.cu
+	mv cuda/cryptmd5.o cuda_cryptmd5.o
+
+cuda_cryptmd5_fmt.o: cuda_cryptmd5.o cuda_cryptmd5_fmt.c cuda_cryptmd5.h
+	$(CC)  $(CFLAGS) cuda_cryptmd5_fmt.c
+
+cuda_phpass.o:  cuda_phpass.h cuda/phpass.cu
+	cd cuda; nvcc $(NVCC_FLAGS) phpass.cu
+	mv cuda/phpass.o cuda_phpass.o
+
+cuda_phpass_fmt.o: cuda_phpass.o cuda_phpass_fmt.c cuda_phpass.h
+	$(CC)  $(CFLAGS) cuda_phpass_fmt.c
+
+cuda_cryptsha256.o:  cuda_cryptsha256.h cuda/cryptsha256.cu
+	cd cuda; nvcc $(NVCC_FLAGS) cryptsha256.cu
+	mv cuda/cryptsha256.o cuda_cryptsha256.o
+
+cuda_cryptsha256_fmt.o: cuda_cryptsha256.o cuda_cryptsha256_fmt.c cuda_cryptsha256.h
+	$(CC)  $(CFLAGS) cuda_cryptsha256_fmt.c
+
+cuda_cryptsha512.o:  cuda_cryptsha512.h cuda/cryptsha512.cu
+	cd cuda; nvcc $(NVCC_FLAGS) cryptsha512.cu
+	mv cuda/cryptsha512.o cuda_cryptsha512.o
+
+cuda_cryptsha512_fmt.o: cuda_cryptsha512.o cuda_cryptsha512_fmt.c cuda_cryptsha512.h
+	$(CC)  $(CFLAGS) cuda_cryptsha512_fmt.c
+
+cuda_mscash2.o:  cuda_mscash2.h cuda/mscash2.cu cuda_common.o
+	cd cuda; nvcc $(NVCC_FLAGS) mscash2.cu
+	mv cuda/mscash2.o cuda_mscash2.o
+
+cuda_mscash2_fmt.o: cuda_mscash2.o cuda_mscash2_fmt.c cuda_mscash2.h
+	$(CC)  $(CFLAGS) cuda_mscash2_fmt.c
+
+cuda_mscash.o:  cuda_mscash.h cuda/mscash.cu cuda_common.o
+	cd cuda; nvcc $(NVCC_FLAGS) mscash.cu
+	mv cuda/mscash.o cuda_mscash.o
+
+cuda_mscash_fmt.o: cuda_mscash.o cuda_mscash_fmt.c cuda_mscash.h
+	$(CC)  $(CFLAGS) cuda_mscash_fmt.c
+
+cuda_rawsha256.o:  cuda_rawsha256.h cuda/rawsha256.cu cuda_common.o
+	cd cuda; nvcc $(NVCC_FLAGS) -DSHA256 rawsha256.cu
+	mv cuda/rawsha256.o cuda_rawsha256.o
+
+cuda_rawsha256_fmt.o: cuda_rawsha256.o cuda_rawsha256_fmt.c cuda_rawsha256.h
+	$(CC)  $(CFLAGS) -DSHA256 cuda_rawsha256_fmt.c
+
+cuda_rawsha224.o:  cuda_rawsha256.h cuda/rawsha256.cu cuda_common.o
+	cd cuda; nvcc $(NVCC_FLAGS) -DSHA224 rawsha256.cu
+	mv cuda/rawsha256.o cuda_rawsha224.o
+
+cuda_rawsha224_fmt.o: cuda_rawsha224.o cuda_rawsha256_fmt.c cuda_rawsha256.h
+	$(CC)  $(CFLAGS) -DSHA224 cuda_rawsha256_fmt.c -o cuda_rawsha224_fmt.o
+
 ../run/john: $(JOHN_OBJS)
 	$(LD) $(JOHN_OBJS) $(LDFLAGS) -o ../run/john
 
@@ -1350,6 +1446,7 @@ test_utf8:
 clean:
 	$(RM) $(PROJ) $(PROJ_DOS) $(PROJ_WIN32) $(PROJ_WIN32_MINGW)
 	$(RM) ../run/john.exe john-macosx-* *.o *.bak core
+	$(RM) cuda/*.o cuda/*~ *~
 	$(RM) detect bench generic.h arch.h tmp.s
 	$(RM) fmt_registers.h fmt_externs.h john_build_rule.h
 	$(CP) $(NULL) Makefile.dep
diff -urpN john-1.7.9-jumbo-5/src/cuda/cryptmd5.cu john-1.7.9-jumbo-5-cuda-2/src/cuda/cryptmd5.cu
--- john-1.7.9-jumbo-5/src/cuda/cryptmd5.cu	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda/cryptmd5.cu	2012-02-15 01:35:47.305632458 +0000
@@ -0,0 +1,246 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include "../cuda_cryptmd5.h"
+#include "cuda_common.cuh"
+
+extern "C" void md5_crypt_gpu(crypt_md5_password *, uint32_t *,
+    crypt_md5_salt *);
+
+__device__ __constant__ char md5_salt_prefix_cu[] = "$1$";
+__device__ __constant__ char apr1_salt_prefix_cu[] = "$apr1$";
+__device__ __constant__ crypt_md5_salt cuda_salt[1];
+
+__device__ void md5_process_block_cu(const void *, size_t, md5_ctx *);
+__device__ void md5_process_bytes_cu(const void *, size_t, md5_ctx *);
+
+__device__ void ctx_init(md5_ctx * ctx, uint8_t * ctx_buflen)
+{
+	uint32_t *buf = (uint32_t *) ctx->buffer;
+	int i = 14;
+	while (i--)
+		*buf++ = 0;
+	*ctx_buflen = 0;
+}
+
+__device__ void ctx_update(md5_ctx * ctx, const char *string, uint8_t len,
+    uint8_t * ctx_buflen)
+{
+	uint8_t *dest = &ctx->buffer[*ctx_buflen];
+	uint8_t *src = (uint8_t *) string;
+	*ctx_buflen += len;
+	memcpy(dest, src, len);
+}
+
+__device__ void md5_digest(md5_ctx * ctx, uint32_t * result,
+    uint8_t * ctx_buflen)
+{
+	uint32_t len = *ctx_buflen;
+	uint32_t *x = (uint32_t *) ctx->buffer;
+	x[len / 4] |= (((uint32_t) 0x80) << ((len & 0x3) << 3));
+	len <<= 3;
+
+	uint32_t b = 0xefcdab89;
+	uint32_t c = 0x98badcfe;
+	uint32_t d = 0x10325476;
+	uint32_t a = ROTATE_LEFT(AC1 + x[0], S11);
+	a += b;			/* 1 */
+	d = ROTATE_LEFT((c ^ (a & MASK1)) + x[1] + AC2pCd, S12);
+	d += a;			/* 2 */
+	c = ROTATE_LEFT(F(d, a, b) + x[2] + AC3pCc, S13);
+	c += d;			/* 3 */
+	b = ROTATE_LEFT(F(c, d, a) + x[3] + AC4pCb, S14);
+	b += c;			/* 4 */
+	FF(a, b, c, d, x[4], S11, 0xf57c0faf);	/* 5 */
+	FF(d, a, b, c, x[5], S12, 0x4787c62a);	/* 6 */
+	FF(c, d, a, b, x[6], S13, 0xa8304613);	/* 7 */
+	FF(b, c, d, a, x[7], S14, 0xfd469501);	/* 8 */
+	FF(a, b, c, d, x[8], S11, 0x698098d8);	/* 9 */
+	FF(d, a, b, c, x[9], S12, 0x8b44f7af);	/* 10 */
+	FF(c, d, a, b, x[10], S13, 0xffff5bb1);	/* 11 */
+	FF(b, c, d, a, x[11], S14, 0x895cd7be);	/* 12 */
+	FF(a, b, c, d, x[12], S11, 0x6b901122);	/* 13 */
+	FF(d, a, b, c, x[13], S12, 0xfd987193);	/* 14 */
+	FF(c, d, a, b, len, S13, 0xa679438e);	/* 15 */
+	FF2(b, c, d, a, S14, 0x49b40821);	/* 16 */
+
+
+	GG(a, b, c, d, x[1], S21, 0xf61e2562);	/* 17 */
+	GG(d, a, b, c, x[6], S22, 0xc040b340);	/* 18 */
+	GG(c, d, a, b, x[11], S23, 0x265e5a51);	/* 19 */
+	GG(b, c, d, a, x[0], S24, 0xe9b6c7aa);	/* 20 */
+	GG(a, b, c, d, x[5], S21, 0xd62f105d);	/* 21 */
+	GG(d, a, b, c, x[10], S22, 0x2441453);	/* 22 */
+	GG2(c, d, a, b, S23, 0xd8a1e681);	/* 23 */
+	GG(b, c, d, a, x[4], S24, 0xe7d3fbc8);	/* 24 */
+	GG(a, b, c, d, x[9], S21, 0x21e1cde6);	/* 25 */
+	GG(d, a, b, c, len, S22, 0xc33707d6);	/* 26 */
+	GG(c, d, a, b, x[3], S23, 0xf4d50d87);	/* 27 */
+	GG(b, c, d, a, x[8], S24, 0x455a14ed);	/* 28 */
+	GG(a, b, c, d, x[13], S21, 0xa9e3e905);	/* 29 */
+	GG(d, a, b, c, x[2], S22, 0xfcefa3f8);	/* 30 */
+	GG(c, d, a, b, x[7], S23, 0x676f02d9);	/* 31 */
+	GG(b, c, d, a, x[12], S24, 0x8d2a4c8a);	/* 32 */
+
+
+	HH(a, b, c, d, x[5], S31, 0xfffa3942);	/* 33 */
+	HH(d, a, b, c, x[8], S32, 0x8771f681);	/* 34 */
+	HH(c, d, a, b, x[11], S33, 0x6d9d6122);	/* 35 */
+	HH(b, c, d, a, len, S34, 0xfde5380c);	/* 36 */
+	HH(a, b, c, d, x[1], S31, 0xa4beea44);	/* 37 */
+	HH(d, a, b, c, x[4], S32, 0x4bdecfa9);	/* 38 */
+	HH(c, d, a, b, x[7], S33, 0xf6bb4b60);	/* 39 */
+	HH(b, c, d, a, x[10], S34, 0xbebfbc70);	/* 40 */
+	HH(a, b, c, d, x[13], S31, 0x289b7ec6);	/* 41 */
+	HH(d, a, b, c, x[0], S32, 0xeaa127fa);	/* 42 */
+	HH(c, d, a, b, x[3], S33, 0xd4ef3085);	/* 43 */
+	HH(b, c, d, a, x[6], S34, 0x4881d05);	/* 44 */
+	HH(a, b, c, d, x[9], S31, 0xd9d4d039);	/* 45 */
+	HH(d, a, b, c, x[12], S32, 0xe6db99e5);	/* 46 */
+	HH2(c, d, a, b, S33, 0x1fa27cf8);	/* 47 */
+	HH(b, c, d, a, x[2], S34, 0xc4ac5665);	/* 48 */
+
+
+	II(a, b, c, d, x[0], S41, 0xf4292244);	/* 49 */
+	II(d, a, b, c, x[7], S42, 0x432aff97);	/* 50 */
+	II(c, d, a, b, len, S43, 0xab9423a7);	/* 51 */
+	II(b, c, d, a, x[5], S44, 0xfc93a039);	/* 52 */
+	II(a, b, c, d, x[12], S41, 0x655b59c3);	/* 53 */
+	II(d, a, b, c, x[3], S42, 0x8f0ccc92);	/* 54 */
+	II(c, d, a, b, x[10], S43, 0xffeff47d);	/* 55 */
+	II(b, c, d, a, x[1], S44, 0x85845dd1);	/* 56 */
+	II(a, b, c, d, x[8], S41, 0x6fa87e4f);	/* 57 */
+	II2(d, a, b, c, S42, 0xfe2ce6e0);	/* 58 */
+	II(c, d, a, b, x[6], S43, 0xa3014314);	/* 59 */
+	II(b, c, d, a, x[13], S44, 0x4e0811a1);	/* 60 */
+	II(a, b, c, d, x[4], S41, 0xf7537e82);	/* 61 */
+	II(d, a, b, c, x[11], S42, 0xbd3af235);	/* 62 */
+	II(c, d, a, b, x[2], S43, 0x2ad7d2bb);	/* 63 */
+	II(b, c, d, a, x[9], S44, 0xeb86d391);	/* 64 */
+
+	result[0] = a + 0x67452301;
+	result[1] = b + 0xefcdab89;
+	result[2] = c + 0x98badcfe;
+	result[3] = d + 0x10325476;
+}
+
+
+__device__ void md5crypt(const char *gpass, size_t keysize, uint32_t * result,
+    uint32_t idx)
+{
+
+	uint32_t i;
+	__shared__ uint32_t alt_result[THREADS][4 + 1];
+	__shared__ char spass[THREADS][16 + 4];
+
+	uint8_t ctx_buflen;
+	char *pass = spass[threadIdx.x];
+	memcpy(pass, gpass, 16);
+	uint8_t pass_len = keysize;
+	uint8_t salt_len = cuda_salt[0].saltlen;
+	char *salt = cuda_salt[0].salt;
+	md5_ctx ctx;
+	ctx_init(&ctx, &ctx_buflen);
+	
+	ctx_update(&ctx, pass, pass_len, &ctx_buflen);
+	ctx_update(&ctx, salt, salt_len, &ctx_buflen);
+	ctx_update(&ctx, pass, pass_len, &ctx_buflen);
+	md5_digest(&ctx, alt_result[threadIdx.x], &ctx_buflen);
+		
+	ctx_init(&ctx, &ctx_buflen);
+
+		
+	ctx_update(&ctx, pass, pass_len, &ctx_buflen);
+	if (cuda_salt[0].prefix == '1') {
+		ctx_update(&ctx, md5_salt_prefix_cu, 3, &ctx_buflen);
+	} else
+		ctx_update(&ctx, apr1_salt_prefix_cu, 6, &ctx_buflen);
+
+	ctx_update(&ctx, salt, salt_len, &ctx_buflen);
+	
+
+	ctx_update(&ctx, (const char *) alt_result[threadIdx.x], pass_len,
+	    &ctx_buflen);
+
+	*alt_result[threadIdx.x] = 0;
+
+	for (i = pass_len; i > 0; i >>= 1)
+		if ((i & 1) != 0)
+			ctx.buffer[ctx_buflen++] =
+			    ((const char *) alt_result[threadIdx.x])[0];
+		else
+			ctx.buffer[ctx_buflen++] = pass[0];
+
+	md5_digest(&ctx, alt_result[threadIdx.x], &ctx_buflen);
+
+	for (i = 0; i < 1000; i++) {
+		ctx_init(&ctx, &ctx_buflen);
+
+		if ((i & 1) != 0)
+			ctx_update(&ctx, pass, pass_len, &ctx_buflen);
+		else
+			ctx_update(&ctx,
+			    (const char *) alt_result[threadIdx.x], 16,
+			    &ctx_buflen);
+
+		if (i % 3 != 0)
+			ctx_update(&ctx, salt, salt_len, &ctx_buflen);
+
+		if (i % 7 != 0)
+			ctx_update(&ctx, pass, pass_len, &ctx_buflen);
+
+		if ((i & 1) != 0)
+			ctx_update(&ctx,
+			    (const char *) alt_result[threadIdx.x], 16,
+			    &ctx_buflen);
+		else
+			ctx_update(&ctx, pass, pass_len, &ctx_buflen);
+		md5_digest(&ctx, alt_result[threadIdx.x], &ctx_buflen);
+	}
+
+	result[address(0, idx)] = alt_result[threadIdx.x][0];
+	result[address(1, idx)] = alt_result[threadIdx.x][1];
+	result[address(2, idx)] = alt_result[threadIdx.x][2];
+	result[address(3, idx)] = alt_result[threadIdx.x][3];
+}
+
+
+__global__ void kernel_crypt_r(crypt_md5_password * inbuffer,
+    uint32_t * outbuffer)
+{
+	uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+	md5crypt((char *) inbuffer[idx].v, inbuffer[idx].length,
+	    outbuffer, idx);
+}
+
+__host__ void md5_crypt_gpu(crypt_md5_password * inbuffer,
+    uint32_t * outbuffer, crypt_md5_salt * host_salt)
+{
+	HANDLE_ERROR(cudaMemcpyToSymbol(cuda_salt, host_salt,
+		sizeof(crypt_md5_salt)));
+	crypt_md5_password *cuda_inbuffer;
+	uint32_t *cuda_outbuffer;
+
+	size_t insize = sizeof(crypt_md5_password) * KEYS_PER_CRYPT;
+	size_t outsize = sizeof(crypt_md5_hash) * KEYS_PER_CRYPT;
+
+	HANDLE_ERROR(cudaMalloc(&cuda_inbuffer, insize));
+	HANDLE_ERROR(cudaMalloc(&cuda_outbuffer, outsize));
+	HANDLE_ERROR(cudaMemcpy(cuda_inbuffer, inbuffer, insize,
+		cudaMemcpyHostToDevice));
+
+	kernel_crypt_r <<< BLOCKS, THREADS >>> (cuda_inbuffer,
+	    cuda_outbuffer);
+
+	HANDLE_ERROR(cudaMemcpy(outbuffer, cuda_outbuffer, outsize,
+		cudaMemcpyDeviceToHost));
+
+	HANDLE_ERROR(cudaFree(cuda_inbuffer));
+	HANDLE_ERROR(cudaFree(cuda_outbuffer));
+}
diff -urpN john-1.7.9-jumbo-5/src/cuda/cryptsha256.cu john-1.7.9-jumbo-5-cuda-2/src/cuda/cryptsha256.cu
--- john-1.7.9-jumbo-5/src/cuda/cryptsha256.cu	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda/cryptsha256.cu	2012-02-15 02:00:26.972382376 +0000
@@ -0,0 +1,325 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com>
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include "../cuda_cryptsha256.h"
+#include "cuda_common.cuh"
+
+extern "C" void sha256_crypt_gpu(crypt_sha256_password * inbuffer,
+    crypt_sha256_hash * outbuffer, crypt_sha256_salt * host_salt);
+
+__constant__ crypt_sha256_salt cuda_salt[1];
+__constant__ uint32_t k[] = {
+	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1,
+	0x923f82a4, 0xab1c5ed5,
+	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe,
+	0x9bdc06a7, 0xc19bf174,
+	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa,
+	0x5cb0a9dc, 0x76f988da,
+	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147,
+	0x06ca6351, 0x14292967,
+	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb,
+	0x81c2c92e, 0x92722c85,
+	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624,
+	0xf40e3585, 0x106aa070,
+	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a,
+	0x5b9cca4f, 0x682e6ff3,
+	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb,
+	0xbef9a3f7, 0xc67178f2
+};
+
+
+
+__device__ void init_ctx(sha256_ctx * ctx)
+{
+	ctx->H[0] = 0x6a09e667;
+	ctx->H[1] = 0xbb67ae85;
+	ctx->H[2] = 0x3c6ef372;
+	ctx->H[3] = 0xa54ff53a;
+	ctx->H[4] = 0x510e527f;
+	ctx->H[5] = 0x9b05688c;
+	ctx->H[6] = 0x1f83d9ab;
+	ctx->H[7] = 0x5be0cd19;
+	ctx->total = 0;
+	ctx->buflen = 0;
+}
+
+__device__ void insert_to_buffer(sha256_ctx * ctx, const uint8_t * string,
+    uint8_t len)
+{
+	int i = len;
+	uint8_t *d = &ctx->buffer[ctx->buflen];
+	while (i--)
+		*d++ = *string++;
+	ctx->buflen += len;
+}
+
+__device__ void sha256_block(sha256_ctx * ctx)
+{
+	int i;
+	uint32_t a = ctx->H[0];
+	uint32_t b = ctx->H[1];
+	uint32_t c = ctx->H[2];
+	uint32_t d = ctx->H[3];
+	uint32_t e = ctx->H[4];
+	uint32_t f = ctx->H[5];
+	uint32_t g = ctx->H[6];
+	uint32_t h = ctx->H[7];
+	uint32_t w[16];
+	uint32_t *data = (uint32_t *) ctx->buffer;
+
+    #pragma unroll 16
+	  for (i = 0; i < 16; i++)
+		w[i] = SWAP(data[i]);
+	
+	uint32_t t1, t2;
+	for (i = 0; i < 16; i++) {
+		t1 = k[i] + w[i] + h + Sigma1(e) + Ch(e, f, g);
+		t2 = Maj(a, b, c) + Sigma0(a);
+
+		h = g;
+		g = f;
+		f = e;
+		e = d + t1;
+		d = c;
+		c = b;
+		b = a;
+		a = t1 + t2;
+	}
+
+	for (i = 16; i < 64; i++) {
+
+		w[i & 15] =
+		    sigma1(w[(i - 2) & 15]) + sigma0(w[(i - 15) & 15]) + w[(i -
+			16) & 15] + w[(i - 7) & 15];
+		t1 = k[i] + w[i & 15] + h + Sigma1(e) + Ch(e, f, g);
+		t2 = Maj(a, b, c) + Sigma0(a);
+
+		h = g;
+		g = f;
+		f = e;
+		e = d + t1;
+		d = c;
+		c = b;
+		b = a;
+		a = t1 + t2;
+	}
+
+	ctx->H[0] += a;
+	ctx->H[1] += b;
+	ctx->H[2] += c;
+	ctx->H[3] += d;
+	ctx->H[4] += e;
+	ctx->H[5] += f;
+	ctx->H[6] += g;
+	ctx->H[7] += h;
+
+}
+
+__device__ void ctx_update(sha256_ctx * ctx, const char *string, uint8_t len)
+{
+	ctx->total += len;
+	uint8_t startpos = ctx->buflen;
+	uint8_t partsize;
+	if (startpos + len <= 64) {
+		partsize = len;
+	} else
+		partsize = 64 - startpos;
+
+	insert_to_buffer(ctx, (const uint8_t *) string, partsize);
+	if (ctx->buflen == 64) {
+		uint8_t offset = 64 - startpos;
+		sha256_block(ctx);
+		ctx->buflen = 0;
+		insert_to_buffer(ctx, (const uint8_t *) (string + offset),
+		    len - offset);
+	}
+}
+
+/**
+  Add 0x80 byte to ctx->buffer and clean the rest of it
+**/
+__device__ void ctx_append_1(sha256_ctx * ctx)
+{
+	int i = 63 - ctx->buflen;
+	uint8_t *d = &ctx->buffer[ctx->buflen];
+	*d++ = 0x80;	
+	while (i--)
+	{
+	  *d++ = 0;
+	}
+
+}
+
+/**
+  Add ctx->bufflen at the end of ctx->buffer
+**/
+__device__ void ctx_add_length(sha256_ctx * ctx)
+{
+	uint32_t *blocks = (uint32_t *) ctx->buffer;
+	blocks[15] = SWAP(ctx->total * 8);
+}
+
+__device__ void finish_ctx(sha256_ctx * ctx)
+{
+	ctx_append_1(ctx);
+	ctx_add_length(ctx);
+	ctx->buflen = 0;
+}
+
+__device__ void clear_ctx_buffer(sha256_ctx * ctx)
+{
+	uint32_t *w = (uint32_t *) ctx->buffer;
+#pragma unroll 16
+	for (int i = 0; i < 16; i++)
+		w[i] = 0;
+	ctx->buflen = 0;
+
+}
+
+__device__ void sha256_digest(sha256_ctx * ctx, uint32_t * result)
+{
+	uint8_t i;
+	if (ctx->buflen <= 55) {	//data+0x80+datasize fits in one 512bit block
+		finish_ctx(ctx);
+		sha256_block(ctx);
+	} else {
+		uint8_t moved = 1;
+		if (ctx->buflen < 64) {	//data and 0x80 fits in one block
+			ctx_append_1(ctx);
+			moved = 0;
+		}
+		sha256_block(ctx);
+		clear_ctx_buffer(ctx);
+		if (moved)
+			ctx->buffer[0] = 0x80;	//append 1,the rest is already clean
+		ctx_add_length(ctx);
+		sha256_block(ctx);
+	}
+#pragma unroll 8
+	for (i = 0; i < 8; i++)
+		result[i] = SWAP(ctx->H[i]);
+}
+
+__device__ void sha256crypt(const char *pass, uint8_t passlength,
+    uint32_t * tresult, uint32_t idx, uint32_t rounds)
+{
+
+	uint32_t i, alt_result[8], temp_result[8];
+
+	sha256_ctx ctx, alt_ctx;
+	init_ctx(&ctx);
+	init_ctx(&alt_ctx);
+
+	ctx_update(&ctx, pass, passlength);
+	ctx_update(&ctx, cuda_salt[0].salt, cuda_salt[0].saltlen);
+
+	ctx_update(&alt_ctx, pass, passlength);
+	ctx_update(&alt_ctx, cuda_salt[0].salt, cuda_salt[0].saltlen);
+	ctx_update(&alt_ctx, pass, passlength);
+
+	sha256_digest(&alt_ctx, alt_result);
+
+	ctx_update(&ctx, (const char *) alt_result, passlength);
+
+
+	for (i = passlength; i > 0; i >>= 1) {
+		if ((i & 1) != 0)
+			ctx_update(&ctx, (const char *) alt_result, 32);
+		else
+			ctx_update(&ctx, pass, passlength);
+	}
+	sha256_digest(&ctx, alt_result);
+
+	init_ctx(&alt_ctx);
+	for (i = 0; i < passlength; i++)
+		ctx_update(&alt_ctx, pass, passlength);
+
+	sha256_digest(&alt_ctx, temp_result);
+
+	__shared__ char sp_sequence[THREADS][16+4];
+	char *p_sequence=sp_sequence[threadIdx.x];
+	memcpy(p_sequence, temp_result, passlength);
+
+	init_ctx(&alt_ctx);
+	for (i = 0; i < 16 + ((unsigned char *) alt_result)[0]; i++)
+		ctx_update(&alt_ctx, cuda_salt[0].salt, cuda_salt[0].saltlen);
+
+	sha256_digest(&alt_ctx, temp_result);
+
+	uint8_t saltlength = cuda_salt[0].saltlen;
+
+	__shared__ char ss_sequence[THREADS][16+4];
+	char *s_sequence=ss_sequence[threadIdx.x];
+	memcpy(s_sequence, temp_result, saltlength);
+
+	for (i = 0; i < rounds; i++) {
+		init_ctx(&ctx);
+
+		if ((i & 1) != 0)
+			ctx_update(&ctx, p_sequence, passlength);
+		else
+			ctx_update(&ctx, (const char *) alt_result, 32);
+
+		if ((i % 3) != 0)
+			ctx_update(&ctx, s_sequence, saltlength);
+
+		if ((i % 7) != 0)
+			ctx_update(&ctx, p_sequence, passlength);
+
+		if ((i & 1) != 0)
+			ctx_update(&ctx, (const char *) alt_result, 32);
+		else
+			ctx_update(&ctx, p_sequence, passlength);
+
+
+		sha256_digest(&ctx, alt_result);
+	}
+	__syncthreads();
+#pragma unroll 8
+	for (i = 0; i < 8; i++)
+		tresult[hash_addr(i, idx)] = alt_result[i];
+}
+
+__global__ void kernel_crypt_r(crypt_sha256_password * inbuffer,
+    uint32_t * outbuffer)
+{
+	uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+	sha256crypt((const char *) inbuffer[idx].v, inbuffer[idx].length,
+	    outbuffer, idx, cuda_salt[0].rounds);
+
+}
+
+
+void sha256_crypt_gpu(crypt_sha256_password * inbuffer,
+    crypt_sha256_hash * outbuffer, crypt_sha256_salt * host_salt)
+{
+
+	HANDLE_ERROR(cudaMemcpyToSymbol(cuda_salt, host_salt,
+		sizeof(crypt_sha256_salt)));
+
+	crypt_sha256_password *cuda_inbuffer;
+	uint32_t *cuda_outbuffer;
+	size_t insize = sizeof(crypt_sha256_password) * KEYS_PER_CRYPT;
+	size_t outsize = sizeof(crypt_sha256_hash) * KEYS_PER_CRYPT;
+	HANDLE_ERROR(cudaMalloc(&cuda_inbuffer, insize));
+	HANDLE_ERROR(cudaMalloc(&cuda_outbuffer, outsize));
+
+	HANDLE_ERROR(cudaMemcpy(cuda_inbuffer, inbuffer, insize,
+		cudaMemcpyHostToDevice));
+	dim3 dimGrid(BLOCKS);
+	dim3 dimBlock(THREADS);
+	kernel_crypt_r <<< dimGrid, dimBlock >>> (cuda_inbuffer,
+	    cuda_outbuffer);
+	cudaThreadSynchronize();
+	HANDLE_ERROR(cudaMemcpy(outbuffer, cuda_outbuffer, outsize,
+		cudaMemcpyDeviceToHost));
+
+	HANDLE_ERROR(cudaFree(cuda_inbuffer));
+	HANDLE_ERROR(cudaFree(cuda_outbuffer));
+}
diff -urpN john-1.7.9-jumbo-5/src/cuda/cryptsha512.cu john-1.7.9-jumbo-5-cuda-2/src/cuda/cryptsha512.cu
--- john-1.7.9-jumbo-5/src/cuda/cryptsha512.cu	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda/cryptsha512.cu	2012-02-15 02:00:32.523382980 +0000
@@ -0,0 +1,355 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <stdio.h>
+#include "../cuda_cryptsha512.h"
+#include "cuda_common.cuh"
+
+__constant__ uint64_t k[] = {
+	0x428a2f98d728ae22LL, 0x7137449123ef65cdLL, 0xb5c0fbcfec4d3b2fLL,
+	    0xe9b5dba58189dbbcLL,
+	0x3956c25bf348b538LL, 0x59f111f1b605d019LL, 0x923f82a4af194f9bLL,
+	    0xab1c5ed5da6d8118LL,
+	0xd807aa98a3030242LL, 0x12835b0145706fbeLL, 0x243185be4ee4b28cLL,
+	    0x550c7dc3d5ffb4e2LL,
+	0x72be5d74f27b896fLL, 0x80deb1fe3b1696b1LL, 0x9bdc06a725c71235LL,
+	    0xc19bf174cf692694LL,
+	0xe49b69c19ef14ad2LL, 0xefbe4786384f25e3LL, 0x0fc19dc68b8cd5b5LL,
+	    0x240ca1cc77ac9c65LL,
+	0x2de92c6f592b0275LL, 0x4a7484aa6ea6e483LL, 0x5cb0a9dcbd41fbd4LL,
+	    0x76f988da831153b5LL,
+	0x983e5152ee66dfabLL, 0xa831c66d2db43210LL, 0xb00327c898fb213fLL,
+	    0xbf597fc7beef0ee4LL,
+	0xc6e00bf33da88fc2LL, 0xd5a79147930aa725LL, 0x06ca6351e003826fLL,
+	    0x142929670a0e6e70LL,
+	0x27b70a8546d22ffcLL, 0x2e1b21385c26c926LL, 0x4d2c6dfc5ac42aedLL,
+	    0x53380d139d95b3dfLL,
+	0x650a73548baf63deLL, 0x766a0abb3c77b2a8LL, 0x81c2c92e47edaee6LL,
+	    0x92722c851482353bLL,
+	0xa2bfe8a14cf10364LL, 0xa81a664bbc423001LL, 0xc24b8b70d0f89791LL,
+	    0xc76c51a30654be30LL,
+	0xd192e819d6ef5218LL, 0xd69906245565a910LL, 0xf40e35855771202aLL,
+	    0x106aa07032bbd1b8LL,
+	0x19a4c116b8d2d0c8LL, 0x1e376c085141ab53LL, 0x2748774cdf8eeb99LL,
+	    0x34b0bcb5e19b48a8LL,
+	0x391c0cb3c5c95a63LL, 0x4ed8aa4ae3418acbLL, 0x5b9cca4f7763e373LL,
+	    0x682e6ff3d6b2b8a3LL,
+	0x748f82ee5defb2fcLL, 0x78a5636f43172f60LL, 0x84c87814a1f0ab72LL,
+	    0x8cc702081a6439ecLL,
+	0x90befffa23631e28LL, 0xa4506cebde82bde9LL, 0xbef9a3f7b2c67915LL,
+	    0xc67178f2e372532bLL,
+	0xca273eceea26619cLL, 0xd186b8c721c0c207LL, 0xeada7dd6cde0eb1eLL,
+	    0xf57d4f7fee6ed178LL,
+	0x06f067aa72176fbaLL, 0x0a637dc5a2c898a6LL, 0x113f9804bef90daeLL,
+	    0x1b710b35131c471bLL,
+	0x28db77f523047d84LL, 0x32caab7b40c72493LL, 0x3c9ebe0a15c9bebcLL,
+	    0x431d67c49c100d4cLL,
+	0x4cc5d4becb3e42b6LL, 0x597f299cfc657e2aLL, 0x5fcb6fab3ad6faecLL,
+	    0x6c44198c4a475817LL,
+};
+
+__constant__ crypt_sha512_salt cuda_salt[1];
+
+extern "C" void sha512_crypt_gpu(crypt_sha512_password * inbuffer,
+    crypt_sha512_hash * outbuffer, crypt_sha512_salt * salt);
+
+
+__device__ void init_ctx(sha512_ctx * ctx)
+{
+	ctx->H[0] = 0x6a09e667f3bcc908LL;
+	ctx->H[1] = 0xbb67ae8584caa73bLL;
+	ctx->H[2] = 0x3c6ef372fe94f82bLL;
+	ctx->H[3] = 0xa54ff53a5f1d36f1LL;
+	ctx->H[4] = 0x510e527fade682d1LL;
+	ctx->H[5] = 0x9b05688c2b3e6c1fLL;
+	ctx->H[6] = 0x1f83d9abfb41bd6bLL;
+	ctx->H[7] = 0x5be0cd19137e2179LL;
+	ctx->total = 0;
+	ctx->buflen = 0;
+}
+
+
+__device__ void insert_to_buffer(sha512_ctx * ctx, const uint8_t * string,
+    uint8_t len)
+{
+	uint8_t *d = &ctx->buffer[ctx->buflen];
+	memcpy(d,string,len);
+	ctx->buflen += len;
+}
+
+
+__device__ void sha512_block(sha512_ctx * ctx)
+{
+	int i;
+	uint64_t a = ctx->H[0];
+	uint64_t b = ctx->H[1];
+	uint64_t c = ctx->H[2];
+	uint64_t d = ctx->H[3];
+	uint64_t e = ctx->H[4];
+	uint64_t f = ctx->H[5];
+	uint64_t g = ctx->H[6];
+	uint64_t h = ctx->H[7];
+
+
+	uint64_t w[16];
+
+	uint64_t *data = (uint64_t *) ctx->buffer;
+//#pragma unroll 16
+	for (i = 0; i < 16; i++)
+		w[i] = SWAP64(data[i]);
+
+	uint64_t t1, t2;
+//#pragma unroll 16
+	for (i = 0; i < 16; i++) {
+
+
+		t1 = k[i] + w[i] + h + Sigma1(e) + Ch(e, f, g);
+		t2 = Maj(a, b, c) + Sigma0(a);
+
+		h = g;
+		g = f;
+		f = e;
+		e = d + t1;
+		d = c;
+		c = b;
+		b = a;
+		a = t1 + t2;
+
+	}
+
+
+	for (i = 16; i < 80; i++) {
+
+
+		w[i & 15] =sigma1(w[(i - 2) & 15]) + sigma0(w[(i - 15) & 15]) + w[(i -16) & 15] + w[(i - 7) & 15];
+		t1 = k[i] + w[i & 15] + h + Sigma1(e) + Ch(e, f, g);
+		t2 = Maj(a, b, c) + Sigma0(a);
+
+		h = g;
+		g = f;
+		f = e;
+		e = d + t1;
+		d = c;
+		c = b;
+		b = a;
+		a = t1 + t2;
+
+	}
+
+	ctx->H[0] += a;
+	ctx->H[1] += b;
+	ctx->H[2] += c;
+	ctx->H[3] += d;
+	ctx->H[4] += e;
+	ctx->H[5] += f;
+	ctx->H[6] += g;
+	ctx->H[7] += h;
+}
+
+
+__device__ void ctx_append_1(sha512_ctx * ctx)
+{
+	uint32_t length=ctx->buflen;
+	int i = 127 - length;
+	uint32_t *x = (uint32_t *) ctx->buffer;
+	uint8_t *d = &ctx->buffer[length];
+	*d++ = 0x80;
+	while(++length%4!=0)
+	{  *d++=0;
+	i--;
+	}
+	x=(uint32_t*)d;
+	while(i>0)
+	{  i-=4;
+	    *x++=0;
+	}
+}
+
+__device__ void ctx_add_length(sha512_ctx * ctx)
+{
+	uint64_t *blocks = (uint64_t *) ctx->buffer;
+	blocks[15] = SWAP64((uint64_t) ctx->total * 8);
+}
+
+__device__ void finish_ctx(sha512_ctx * ctx)
+{
+	ctx_append_1(ctx);
+	ctx_add_length(ctx);
+	ctx->buflen = 0;
+}
+
+
+__device__ void ctx_update(sha512_ctx * ctx, const char *string, uint8_t len)
+{
+	ctx->total += len;
+	uint8_t startpos = ctx->buflen;
+	uint8_t partsize;
+	if (startpos + len <= 128) {
+		partsize = len;
+	} else
+		partsize = 128 - startpos;
+
+	insert_to_buffer(ctx, (const uint8_t *) string, partsize);
+	if (ctx->buflen == 128) {
+		uint8_t offset = 128 - startpos;
+		sha512_block(ctx);
+		ctx->buflen = 0;
+		insert_to_buffer(ctx, (const uint8_t *) (string + offset),
+		    len - offset);
+	}
+}
+
+__device__ void clear_ctx_buffer(sha512_ctx * ctx)
+{
+	uint32_t *w = (uint32_t *) ctx->buffer;
+//#pragma unroll 30
+	for (int i = 0; i < 30; i++)
+		w[i] = 0;
+	  
+	  ctx->buflen = 0;
+}
+
+__device__ void sha512_digest(sha512_ctx * ctx, uint64_t * result)
+{
+	uint8_t i;
+	if (ctx->buflen <= 111) {	//data+0x80+datasize fits in one 1024bit block
+		finish_ctx(ctx);
+		sha512_block(ctx);
+	} else {
+		uint8_t moved = 1;
+		if (ctx->buflen < 128) {	//data and 0x80 fits in one block
+			ctx_append_1(ctx);
+			moved = 0;
+		}
+		sha512_block(ctx);
+		clear_ctx_buffer(ctx);
+		if (moved)
+			ctx->buffer[0] = 0x80;	//append 1,the rest is already clean
+		ctx_add_length(ctx);
+		sha512_block(ctx);
+	}
+//#pragma unroll 8
+	for (i = 0; i < 8; i++)
+		result[i] = SWAP64(ctx->H[i]);
+}
+
+
+__device__ void sha512crypt(const char *pass, uint8_t passlength,
+    uint64_t * tresult, uint32_t idx, uint32_t rounds)
+{
+
+	uint64_t  alt_result[8], temp_result[8];
+	int i;
+	sha512_ctx ctx;
+	init_ctx(&ctx);
+
+	ctx_update(&ctx, pass, passlength);
+	ctx_update(&ctx, cuda_salt[0].salt, cuda_salt[0].saltlen);
+	ctx_update(&ctx, pass, passlength);
+
+	sha512_digest(&ctx, alt_result);
+	init_ctx(&ctx);
+	
+	ctx_update(&ctx, pass, passlength);
+	ctx_update(&ctx, cuda_salt[0].salt, cuda_salt[0].saltlen);
+	ctx_update(&ctx, (const char *) alt_result, passlength);
+
+
+	for (i = passlength; i > 0; i >>= 1) {
+		if ((i & 1) != 0)
+			ctx_update(&ctx, (const char *) alt_result, 64);
+		else
+			ctx_update(&ctx, pass, passlength);
+	}
+	sha512_digest(&ctx, alt_result);
+
+
+	init_ctx(&ctx);
+	for (i = 0; i < passlength; i++)
+		ctx_update(&ctx, pass, passlength);
+
+	sha512_digest(&ctx, temp_result);
+
+	__shared__ char sp_sequence[THREADS][16+4];
+	char *p_sequence=sp_sequence[threadIdx.x];
+	memcpy(p_sequence, temp_result, passlength);
+
+	init_ctx(&ctx);
+	for (i = 0; i < 16 + ((unsigned char *) alt_result)[0]; i++)
+		ctx_update(&ctx, cuda_salt[0].salt, cuda_salt[0].saltlen);
+
+	sha512_digest(&ctx, temp_result);
+
+	uint8_t saltlength = cuda_salt[0].saltlen;
+
+	__shared__ char ss_sequence[THREADS][16+4];
+	char *s_sequence=ss_sequence[threadIdx.x];
+	memcpy(s_sequence, temp_result, saltlength);
+
+	for (i = 0; i < rounds; i++) {
+		init_ctx(&ctx);
+
+		if ((i & 1) != 0)
+			ctx_update(&ctx, p_sequence, passlength);
+		else
+			ctx_update(&ctx, (const char *) alt_result, 64);
+
+		if ((i % 3) != 0)
+			ctx_update(&ctx, s_sequence, saltlength);
+
+		if ((i % 7) != 0)
+			ctx_update(&ctx, p_sequence, passlength);
+
+		if ((i & 1) != 0)
+			ctx_update(&ctx, (const char *) alt_result, 64);
+		else
+			ctx_update(&ctx, p_sequence, passlength);
+
+
+		sha512_digest(&ctx, alt_result);
+	}
+//#pragma unroll 8
+	for (i = 0; i < 8; i++)
+		tresult[i] = alt_result[i];
+}
+
+
+
+__global__ void kernel_crypt_r(crypt_sha512_password * inbuffer,
+    crypt_sha512_hash * outbuffer)
+{
+	uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+	sha512crypt((const char *) inbuffer[idx].v, inbuffer[idx].length,
+	    outbuffer[idx].v, idx, cuda_salt[0].rounds);
+}
+
+void sha512_crypt_gpu(crypt_sha512_password * inbuffer,
+    crypt_sha512_hash * outbuffer, crypt_sha512_salt * host_salt)
+{
+
+	crypt_sha512_password *cuda_inbuffer;
+	crypt_sha512_hash *cuda_outbuffer;
+	size_t insize = sizeof(crypt_sha512_password) * KEYS_PER_CRYPT;
+	size_t outsize = sizeof(crypt_sha512_hash) * KEYS_PER_CRYPT;
+
+	HANDLE_ERROR(cudaMalloc(&cuda_inbuffer, insize));
+	HANDLE_ERROR(cudaMalloc(&cuda_outbuffer, outsize));
+
+	HANDLE_ERROR(cudaMemcpy(cuda_inbuffer, inbuffer, insize,
+		cudaMemcpyHostToDevice));
+	HANDLE_ERROR(cudaMemcpyToSymbol(cuda_salt, host_salt,
+		sizeof(crypt_sha512_salt)));
+
+	dim3 dimGrid(BLOCKS);
+	dim3 dimBlock(THREADS);
+	kernel_crypt_r <<< dimGrid, dimBlock >>> (cuda_inbuffer,
+	    cuda_outbuffer);
+	cudaThreadSynchronize();
+	HANDLE_ERROR(cudaMemcpy(outbuffer, cuda_outbuffer, outsize,
+		cudaMemcpyDeviceToHost));
+
+	HANDLE_ERROR(cudaFree(cuda_inbuffer));
+	HANDLE_ERROR(cudaFree(cuda_outbuffer));
+}
diff -urpN john-1.7.9-jumbo-5/src/cuda/cuda_common.cu john-1.7.9-jumbo-5-cuda-2/src/cuda/cuda_common.cu
--- john-1.7.9-jumbo-5/src/cuda/cuda_common.cu	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda/cuda_common.cu	2011-08-26 12:27:32.000000000 +0000
@@ -0,0 +1,38 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#ifndef _CUDA_COMMON_CU
+#define _CUDA_COMMON_CU
+
+#include <stdio.h>
+#include "cuda_common.cuh"
+
+extern "C" 
+void HandleError(cudaError_t err, const char *file, int line)
+{
+	if (err != cudaSuccess) {
+		printf("%s in %s at line %d\n", cudaGetErrorString(err), file,
+		    line);
+		exit(EXIT_FAILURE);
+	}
+}
+
+#define HANDLE_ERROR(err) (HandleError(err,__FILE__,__LINE__))
+
+extern "C" 
+void cuda_init(unsigned int gpu_id)
+{
+	int devices;
+	HANDLE_ERROR(cudaGetDeviceCount(&devices));
+	if (gpu_id < devices && devices > 0 )
+		cudaSetDevice(gpu_id);
+	else {
+		printf("Invalid CUDA device id = %u\n", gpu_id);
+		//fprintf(stderr,
+		exit(1);
+	}
+}
+
+#endif
\ No newline at end of file
diff -urpN john-1.7.9-jumbo-5/src/cuda/cuda_common.cuh john-1.7.9-jumbo-5-cuda-2/src/cuda/cuda_common.cuh
--- john-1.7.9-jumbo-5/src/cuda/cuda_common.cuh	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda/cuda_common.cuh	2011-08-26 12:27:18.000000000 +0000
@@ -0,0 +1,17 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#ifndef _CUDA_COMMON_CUH
+#define _CUDA_COMMON_CUH
+
+extern "C" 
+void HandleError(cudaError_t err, const char *file, int line);
+
+#define HANDLE_ERROR(err) (HandleError(err,__FILE__,__LINE__))
+
+extern "C" 
+void cuda_init(unsigned int gpu_id);
+
+#endif
\ No newline at end of file
diff -urpN john-1.7.9-jumbo-5/src/cuda/mscash.cu john-1.7.9-jumbo-5-cuda-2/src/cuda/mscash.cu
--- john-1.7.9-jumbo-5/src/cuda/mscash.cu	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda/mscash.cu	2012-02-17 15:46:24.815520116 +0000
@@ -0,0 +1,250 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba
+* <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+* Based on Alain Espinosa implementation http://openwall.info/wiki/john/MSCash
+*/
+
+#include <stdio.h>
+#include "../cuda_mscash.h"
+#include "cuda_common.cuh"
+
+/*
+static void HandleError(cudaError_t err, const char *file, int line)
+{
+	if (err != cudaSuccess) {
+		printf("%s in %s at line %d\n", cudaGetErrorString(err), file,
+		    line);
+		exit(EXIT_FAILURE);
+	}
+}
+
+#define HANDLE_ERROR(err) (HandleError(err,__FILE__,__LINE__))
+*/
+//extern "C" void mscash_init(int gpuid);
+extern "C" void cuda_mscash(mscash_password *, mscash_hash *, mscash_salt *);
+
+__constant__ mscash_salt cuda_salt[1];
+/*__host__ void mscash_init(int gpuid)
+{
+	int count;
+	HANDLE_ERROR(cudaGetDeviceCount(&count));
+	if (gpuid < count)
+		cudaSetDevice(gpuid);
+	else {
+		printf("Invalid CUDA device id = %d\n", gpuid);
+		exit(1);
+	}
+}*/
+
+
+__device__ static void md4_crypt(uint32_t * output, uint32_t * nt_buffer)
+{
+	unsigned int a = INIT_A;
+	unsigned int b = INIT_B;
+	unsigned int c = INIT_C;
+	unsigned int d = INIT_D;
+
+	/* Round 1 */
+	a += (d ^ (b & (c ^ d))) + nt_buffer[0];
+	a = (a << 3) | (a >> 29);
+	d += (c ^ (a & (b ^ c))) + nt_buffer[1];
+	d = (d << 7) | (d >> 25);
+	c += (b ^ (d & (a ^ b))) + nt_buffer[2];
+	c = (c << 11) | (c >> 21);
+	b += (a ^ (c & (d ^ a))) + nt_buffer[3];
+	b = (b << 19) | (b >> 13);
+
+	a += (d ^ (b & (c ^ d))) + nt_buffer[4];
+	a = (a << 3) | (a >> 29);
+	d += (c ^ (a & (b ^ c))) + nt_buffer[5];
+	d = (d << 7) | (d >> 25);
+	c += (b ^ (d & (a ^ b))) + nt_buffer[6];
+	c = (c << 11) | (c >> 21);
+	b += (a ^ (c & (d ^ a))) + nt_buffer[7];
+	b = (b << 19) | (b >> 13);
+
+	a += (d ^ (b & (c ^ d))) + nt_buffer[8];
+	a = (a << 3) | (a >> 29);
+	d += (c ^ (a & (b ^ c))) + nt_buffer[9];
+	d = (d << 7) | (d >> 25);
+	c += (b ^ (d & (a ^ b))) + nt_buffer[10];
+	c = (c << 11) | (c >> 21);
+	b += (a ^ (c & (d ^ a))) + nt_buffer[11];
+	b = (b << 19) | (b >> 13);
+
+	a += (d ^ (b & (c ^ d))) + nt_buffer[12];
+	a = (a << 3) | (a >> 29);
+	d += (c ^ (a & (b ^ c))) + nt_buffer[13];
+	d = (d << 7) | (d >> 25);
+	c += (b ^ (d & (a ^ b))) + nt_buffer[14];
+	c = (c << 11) | (c >> 21);
+	b += (a ^ (c & (d ^ a))) + nt_buffer[15];
+	b = (b << 19) | (b >> 13);
+
+	/* Round 2 */
+	a += ((b & (c | d)) | (c & d)) + nt_buffer[0] + SQRT_2;
+	a = (a << 3) | (a >> 29);
+	d += ((a & (b | c)) | (b & c)) + nt_buffer[4] + SQRT_2;
+	d = (d << 5) | (d >> 27);
+	c += ((d & (a | b)) | (a & b)) + nt_buffer[8] + SQRT_2;
+	c = (c << 9) | (c >> 23);
+	b += ((c & (d | a)) | (d & a)) + nt_buffer[12] + SQRT_2;
+	b = (b << 13) | (b >> 19);
+
+	a += ((b & (c | d)) | (c & d)) + nt_buffer[1] + SQRT_2;
+	a = (a << 3) | (a >> 29);
+	d += ((a & (b | c)) | (b & c)) + nt_buffer[5] + SQRT_2;
+	d = (d << 5) | (d >> 27);
+	c += ((d & (a | b)) | (a & b)) + nt_buffer[9] + SQRT_2;
+	c = (c << 9) | (c >> 23);
+	b += ((c & (d | a)) | (d & a)) + nt_buffer[13] + SQRT_2;
+	b = (b << 13) | (b >> 19);
+
+	a += ((b & (c | d)) | (c & d)) + nt_buffer[2] + SQRT_2;
+	a = (a << 3) | (a >> 29);
+	d += ((a & (b | c)) | (b & c)) + nt_buffer[6] + SQRT_2;
+	d = (d << 5) | (d >> 27);
+	c += ((d & (a | b)) | (a & b)) + nt_buffer[10] + SQRT_2;
+	c = (c << 9) | (c >> 23);
+	b += ((c & (d | a)) | (d & a)) + nt_buffer[14] + SQRT_2;
+	b = (b << 13) | (b >> 19);
+
+	a += ((b & (c | d)) | (c & d)) + nt_buffer[3] + SQRT_2;
+	a = (a << 3) | (a >> 29);
+	d += ((a & (b | c)) | (b & c)) + nt_buffer[7] + SQRT_2;
+	d = (d << 5) | (d >> 27);
+	c += ((d & (a | b)) | (a & b)) + nt_buffer[11] + SQRT_2;
+	c = (c << 9) | (c >> 23);
+	b += ((c & (d | a)) | (d & a)) + nt_buffer[15] + SQRT_2;
+	b = (b << 13) | (b >> 19);
+
+	/* Round 3 */
+	a += (d ^ c ^ b) + nt_buffer[0] + SQRT_3;
+	a = (a << 3) | (a >> 29);
+	d += (c ^ b ^ a) + nt_buffer[8] + SQRT_3;
+	d = (d << 9) | (d >> 23);
+	c += (b ^ a ^ d) + nt_buffer[4] + SQRT_3;
+	c = (c << 11) | (c >> 21);
+	b += (a ^ d ^ c) + nt_buffer[12] + SQRT_3;
+	b = (b << 15) | (b >> 17);
+
+	a += (d ^ c ^ b) + nt_buffer[2] + SQRT_3;
+	a = (a << 3) | (a >> 29);
+	d += (c ^ b ^ a) + nt_buffer[10] + SQRT_3;
+	d = (d << 9) | (d >> 23);
+	c += (b ^ a ^ d) + nt_buffer[6] + SQRT_3;
+	c = (c << 11) | (c >> 21);
+	b += (a ^ d ^ c) + nt_buffer[14] + SQRT_3;
+	b = (b << 15) | (b >> 17);
+
+	a += (d ^ c ^ b) + nt_buffer[1] + SQRT_3;
+	a = (a << 3) | (a >> 29);
+	d += (c ^ b ^ a) + nt_buffer[9] + SQRT_3;
+	d = (d << 9) | (d >> 23);
+	c += (b ^ a ^ d) + nt_buffer[5] + SQRT_3;
+	c = (c << 11) | (c >> 21);
+	b += (a ^ d ^ c) + nt_buffer[13] + SQRT_3;
+	b = (b << 15) | (b >> 17);
+
+	a += (d ^ c ^ b) + nt_buffer[3] + SQRT_3;
+	a = (a << 3) | (a >> 29);
+	d += (c ^ b ^ a) + nt_buffer[11] + SQRT_3;
+	d = (d << 9) | (d >> 23);
+	c += (b ^ a ^ d) + nt_buffer[7] + SQRT_3;
+	c = (c << 11) | (c >> 21);
+	b += (a ^ d ^ c) + nt_buffer[15] + SQRT_3;
+	b = (b << 15) | (b >> 17);
+
+	output[0] = a + INIT_A;
+	output[1] = b + INIT_B;
+	output[2] = c + INIT_C;
+	output[3] = d + INIT_D;
+}
+
+__device__ void prepare_key(uint8_t * key, int length, uint32_t * nt_buffer)
+{
+	int i = 0;
+	for (i = 0; i < 16; i++)
+		nt_buffer[i] = 0;
+	for (i = 0; i < length / 2; i++)
+		nt_buffer[i] = key[2 * i] | (key[2 * i + 1] << 16);
+	if (length % 2 == 1)
+		nt_buffer[i] = key[length - 1] | 0x800000;
+	else
+		nt_buffer[i] = 0x80;
+	nt_buffer[14] = length << 4;
+}
+
+__device__ void prepare_login(uint8_t * login, int length,
+    uint32_t * login_buffer)
+{
+	int i = 0;
+	for (i = 0; i < 12; i++)
+		login_buffer[i] = 0;
+	for (i = 0; i < length / 2; i++)
+		login_buffer[i] = login[2 * i] | (login[2 * i + 1] << 16);
+	if (length % 2 == 1)
+		login_buffer[i] = login[length - 1] | 0x800000;
+	else
+		login_buffer[i] = 0x80;
+	login_buffer[10] = (length << 4) + 128;
+}
+
+
+__global__ void mscash_kernel(mscash_password * inbuffer,
+    mscash_hash * outbuffer)
+{
+	uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+	uint8_t *login = cuda_salt[0].salt;
+	uint8_t loginlength = cuda_salt[0].length;
+
+	uint8_t *password = inbuffer[idx].v;
+	uint8_t passwordlength = inbuffer[idx].length;
+
+	int i;
+	uint32_t nt_buffer[16];
+	uint32_t login_buffer[12];
+	uint32_t output[4];
+
+	prepare_key(password, passwordlength, nt_buffer);
+	md4_crypt(output, nt_buffer);
+	memcpy(nt_buffer, output, 4 * 4);
+
+	prepare_login(login, loginlength, login_buffer);
+	memcpy(nt_buffer + 4, login_buffer, 12 * 4);
+	md4_crypt(output, nt_buffer);
+
+	for (i = 0; i < 4; i++)
+		outbuffer[idx].v[i] = output[i];
+
+}
+
+__host__ void cuda_mscash(mscash_password * inbuffer, mscash_hash * outbuffer,
+    mscash_salt * host_salt)
+{
+
+	HANDLE_ERROR(cudaMemcpyToSymbol(cuda_salt, host_salt,
+		sizeof(mscash_salt)));
+	mscash_password *cuda_inbuffer;
+	mscash_hash *cuda_outbuffer;
+
+	size_t insize = sizeof(mscash_password) * KEYS_PER_CRYPT;
+	size_t outsize = sizeof(mscash_hash) * KEYS_PER_CRYPT;
+
+	HANDLE_ERROR(cudaMalloc(&cuda_inbuffer, insize));
+	HANDLE_ERROR(cudaMalloc(&cuda_outbuffer, outsize));
+
+	HANDLE_ERROR(cudaMemcpy(cuda_inbuffer, inbuffer, insize,
+		cudaMemcpyHostToDevice));
+
+	mscash_kernel <<< BLOCKS, THREADS >>> (cuda_inbuffer, cuda_outbuffer);
+
+	HANDLE_ERROR(cudaMemcpy(outbuffer, cuda_outbuffer, outsize,
+		cudaMemcpyDeviceToHost));
+
+	HANDLE_ERROR(cudaFree(cuda_inbuffer));
+	HANDLE_ERROR(cudaFree(cuda_outbuffer));
+
+}
diff -urpN john-1.7.9-jumbo-5/src/cuda/mscash2.cu john-1.7.9-jumbo-5-cuda-2/src/cuda/mscash2.cu
--- john-1.7.9-jumbo-5/src/cuda/mscash2.cu	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda/mscash2.cu	2012-02-16 02:33:04.316474856 +0000
@@ -0,0 +1,386 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+* Based on S3nf implementation http://openwall.info/wiki/john/MSCash2
+*/
+
+#include <stdio.h>
+#include "../cuda_mscash2.h"
+#include "cuda_common.cuh"
+extern "C" void mscash2_gpu(mscash2_password *, mscash2_hash *, mscash2_salt *);
+
+__constant__ mscash2_salt cuda_salt[1];
+
+__host__ void md4_crypt(uint32_t * buffer, uint32_t * hash)
+{
+	uint32_t a;
+	uint32_t b;
+	uint32_t c;
+	uint32_t d;
+
+	a = 0xFFFFFFFF + buffer[0];
+	a = (a << 3) | (a >> 29);
+	d = INIT_D + (INIT_C ^ (a & 0x77777777)) + buffer[1];
+	d = (d << 7) | (d >> 25);
+	c = INIT_C + (INIT_B ^ (d & (a ^ INIT_B))) + buffer[2];
+	c = (c << 11) | (c >> 21);
+	b = INIT_B + (a ^ (c & (d ^ a))) + buffer[3];
+	b = (b << 19) | (b >> 13);
+
+	a += (d ^ (b & (c ^ d))) + buffer[4];
+	a = (a << 3) | (a >> 29);
+	d += (c ^ (a & (b ^ c))) + buffer[5];
+	d = (d << 7) | (d >> 25);
+	c += (b ^ (d & (a ^ b))) + buffer[6];
+	c = (c << 11) | (c >> 21);
+	b += (a ^ (c & (d ^ a))) + buffer[7];
+	b = (b << 19) | (b >> 13);
+
+	a += (d ^ (b & (c ^ d))) + buffer[8];
+	a = (a << 3) | (a >> 29);
+	d += (c ^ (a & (b ^ c))) + buffer[9];
+	d = (d << 7) | (d >> 25);
+	c += (b ^ (d & (a ^ b))) + buffer[10];
+	c = (c << 11) | (c >> 21);
+	b += (a ^ (c & (d ^ a))) + buffer[11];
+	b = (b << 19) | (b >> 13);
+
+	a += (d ^ (b & (c ^ d))) + buffer[12];
+	a = (a << 3) | (a >> 29);
+	d += (c ^ (a & (b ^ c))) + buffer[13];
+	d = (d << 7) | (d >> 25);
+	c += (b ^ (d & (a ^ b))) + buffer[14];
+	c = (c << 11) | (c >> 21);
+	b += (a ^ (c & (d ^ a))) + buffer[15];
+	b = (b << 19) | (b >> 13);
+
+	a += ((b & (c | d)) | (c & d)) + buffer[0] + SQRT_2;
+	a = (a << 3) | (a >> 29);
+	d += ((a & (b | c)) | (b & c)) + buffer[4] + SQRT_2;
+	d = (d << 5) | (d >> 27);
+	c += ((d & (a | b)) | (a & b)) + buffer[8] + SQRT_2;
+	c = (c << 9) | (c >> 23);
+	b += ((c & (d | a)) | (d & a)) + buffer[12] + SQRT_2;
+	b = (b << 13) | (b >> 19);
+
+	a += ((b & (c | d)) | (c & d)) + buffer[1] + SQRT_2;
+	a = (a << 3) | (a >> 29);
+	d += ((a & (b | c)) | (b & c)) + buffer[5] + SQRT_2;
+	d = (d << 5) | (d >> 27);
+	c += ((d & (a | b)) | (a & b)) + buffer[9] + SQRT_2;
+	c = (c << 9) | (c >> 23);
+	b += ((c & (d | a)) | (d & a)) + buffer[13] + SQRT_2;
+	b = (b << 13) | (b >> 19);
+
+	a += ((b & (c | d)) | (c & d)) + buffer[2] + SQRT_2;
+	a = (a << 3) | (a >> 29);
+	d += ((a & (b | c)) | (b & c)) + buffer[6] + SQRT_2;
+	d = (d << 5) | (d >> 27);
+	c += ((d & (a | b)) | (a & b)) + buffer[10] + SQRT_2;
+	c = (c << 9) | (c >> 23);
+	b += ((c & (d | a)) | (d & a)) + buffer[14] + SQRT_2;
+	b = (b << 13) | (b >> 19);
+
+	a += ((b & (c | d)) | (c & d)) + buffer[3] + SQRT_2;
+	a = (a << 3) | (a >> 29);
+	d += ((a & (b | c)) | (b & c)) + buffer[7] + SQRT_2;
+	d = (d << 5) | (d >> 27);
+	c += ((d & (a | b)) | (a & b)) + buffer[11] + SQRT_2;
+	c = (c << 9) | (c >> 23);
+	b += ((c & (d | a)) | (d & a)) + buffer[15] + SQRT_2;
+	b = (b << 13) | (b >> 19);
+
+	a += (d ^ c ^ b) + buffer[0] + SQRT_3;
+	a = (a << 3) | (a >> 29);
+	d += (c ^ b ^ a) + buffer[8] + SQRT_3;
+	d = (d << 9) | (d >> 23);
+	c += (b ^ a ^ d) + buffer[4] + SQRT_3;
+	c = (c << 11) | (c >> 21);
+	b += (a ^ d ^ c) + buffer[12] + SQRT_3;
+	b = (b << 15) | (b >> 17);
+
+	a += (d ^ c ^ b) + buffer[2] + SQRT_3;
+	a = (a << 3) | (a >> 29);
+	d += (c ^ b ^ a) + buffer[10] + SQRT_3;
+	d = (d << 9) | (d >> 23);
+	c += (b ^ a ^ d) + buffer[6] + SQRT_3;
+	c = (c << 11) | (c >> 21);
+	b += (a ^ d ^ c) + buffer[14] + SQRT_3;
+	b = (b << 15) | (b >> 17);
+
+	a += (d ^ c ^ b) + buffer[1] + SQRT_3;
+	a = (a << 3) | (a >> 29);
+	d += (c ^ b ^ a) + buffer[9] + SQRT_3;
+	d = (d << 9) | (d >> 23);
+	c += (b ^ a ^ d) + buffer[5] + SQRT_3;
+	c = (c << 11) | (c >> 21);
+	b += (a ^ d ^ c) + buffer[13] + SQRT_3;
+	b = (b << 15) | (b >> 17);
+
+	a += (d ^ c ^ b) + buffer[3] + SQRT_3;
+	a = (a << 3) | (a >> 29);
+
+	d += (c ^ b ^ a) + buffer[11] + SQRT_3;
+	d = (d << 9) | (d >> 23);
+	c += (b ^ a ^ d) + buffer[7] + SQRT_3;
+	c = (c << 11) | (c >> 21);
+	b += (a ^ d ^ c) + buffer[15] + SQRT_3;
+	b = (b << 15) | (b >> 17);
+
+	hash[0] = a + INIT_A;
+	hash[1] = b + INIT_B;
+	hash[2] = c + INIT_C;
+	hash[3] = d + INIT_D;
+}
+
+__device__ __host__ void preproc(const uint8_t * key, uint32_t keylen,
+    uint32_t * state, uint8_t var)
+{
+	int i;
+	uint32_t W[16], temp;
+	uint8_t ipad[64];
+
+	for (i = 0; i < 64; i++)
+		ipad[i] = var;
+
+	for (i = 0; i < keylen; i++)
+		ipad[i] = ipad[i] ^ key[i];
+
+#pragma unroll 16
+	for (i = 0; i < 16; i++)
+		GET_WORD_32_BE(W[i], ipad, i * 4);
+	
+	uint32_t A = INIT_A;
+	uint32_t B = INIT_B;
+	uint32_t C = INIT_C;
+	uint32_t D = INIT_D;
+	uint32_t E = INIT_E;
+
+	SHA1(A, B, C, D, E, W);
+
+	state[0] = A + INIT_A;
+	state[1] = B + INIT_B;
+	state[2] = C + INIT_C;
+	state[3] = D + INIT_D;
+	state[4] = E + INIT_E;
+
+}
+
+__device__ void hmac_sha1(const uint8_t * key, uint32_t keylen,
+    const uint8_t * input, uint32_t inputlen, uint8_t * output,
+    uint32_t * ipad_state, uint32_t * opad_state)
+{
+	int i;
+	uint32_t temp, W[16];
+	uint32_t A, B, C, D, E;
+	uint32_t state_A,state_B,state_C,state_D,state_E;
+	uint8_t buf[64];
+	uint32_t *src=(uint32_t*)buf;
+	i=64/4;
+	while(i--)
+	  *src++=0;
+
+	memcpy(buf, input, inputlen);
+	buf[inputlen] = 0x80;
+	PUT_WORD_32_BE((64 + inputlen) << 3, buf, 60);
+
+	A = ipad_state[0];
+	B = ipad_state[1];
+	C = ipad_state[2];
+	D = ipad_state[3];
+	E = ipad_state[4];
+	
+	state_A=A;
+	state_B=B;
+	state_C=C;
+	state_D=D;
+	state_E=E;
+
+	for (i = 0; i < 16; i++)
+		GET_WORD_32_BE(W[i], buf, i * 4);
+
+	SHA1(A, B, C, D, E, W);
+
+	A += state_A;
+	B += state_B;
+	C += state_C;
+	D += state_D;
+	E += state_E;
+
+	PUT_WORD_32_BE(A, buf, 0);
+	PUT_WORD_32_BE(B, buf, 4);
+	PUT_WORD_32_BE(C, buf, 8);
+	PUT_WORD_32_BE(D, buf, 12);
+	PUT_WORD_32_BE(E, buf, 16);
+
+	buf[20] = 0x80;
+	PUT_WORD_32_BE(0x2A0, buf, 60);
+
+	A = opad_state[0];
+	B = opad_state[1];
+	C = opad_state[2];
+	D = opad_state[3];
+	E = opad_state[4];
+	
+	state_A=A;
+	state_B=B;
+	state_C=C;
+	state_D=D;
+	state_E=E;
+
+	for (i = 0; i < 16; i++)
+		GET_WORD_32_BE(W[i], buf, i * 4);
+
+	SHA1(A, B, C, D, E, W);
+
+	A += state_A;
+	B += state_B;
+	C += state_C;
+	D += state_D;
+	E += state_E;
+
+	PUT_WORD_32_BE(A, output, 0);
+	PUT_WORD_32_BE(B, output, 4);
+	PUT_WORD_32_BE(C, output, 8);
+	PUT_WORD_32_BE(D, output, 12);
+	PUT_WORD_32_BE(E, output, 16);
+}
+
+__device__ void pbkdf2(const uint8_t * pass, const uint8_t * salt,
+    int saltlen, uint8_t * out)
+{
+	uint8_t temp[SHA1_DIGEST_LENGTH];
+	__shared__ uint8_t sbuf[THREADS][48];
+	uint8_t* buf=sbuf[threadIdx.x];
+	uint32_t ipad_state[5];
+	uint32_t opad_state[5];
+	int i, j;
+	uint8_t tmp_out[16];
+
+	i=48/4;
+	uint32_t *src=(uint32_t*)buf;
+	while(i--)
+	  *src++=0;
+
+	memcpy(buf, salt, saltlen);
+	buf[saltlen + 3] = 0x01;
+
+	preproc(pass, 16, ipad_state, 0x36);
+	preproc(pass, 16, opad_state, 0x5c);
+
+	hmac_sha1(pass, 16, buf, saltlen + 4, temp, ipad_state, opad_state);
+
+	memcpy(tmp_out, temp, 20);
+
+	for (i = 1; i < ITERATIONS; i++) {
+		hmac_sha1(pass, 16, temp, SHA1_DIGEST_LENGTH, temp, ipad_state,
+		    opad_state);
+
+#pragma unroll 16
+		for (j = 0; j < 16; j++)
+			tmp_out[j] ^= temp[j];
+	}
+	memcpy(out, tmp_out, 20);
+}
+
+
+__global__ void pbkdf2_kernel(mscash2_password * inbuffer,
+    mscash2_hash * outbuffer)
+{
+	uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+
+	uint32_t username_len = (uint32_t) cuda_salt[0].length;
+
+	pbkdf2((uint8_t *) inbuffer[idx].dcc_hash,
+	    cuda_salt[0].unicode_salt, username_len << 1,
+	    (uint8_t *) outbuffer[idx].v);
+
+}
+
+__host__ void mscash_cpu(mscash2_password * inbuffer, mscash2_hash * outbuffer,
+    mscash2_salt * host_salt)
+    {
+      
+      int i,idx = 0;
+	uint32_t buffer[16];
+	uint32_t nt_hash[16];
+	uint8_t salt[64];
+	memset(salt,0,64);
+	uint8_t *username = host_salt->salt;
+	uint32_t username_len = (uint32_t) host_salt->length;
+	
+
+	for (i = 0; i < (username_len >> 1) + 1; i++)
+		((uint32_t *) salt)[i] =
+		    username[2 * i] | (username[2 * i + 1] << 16);
+	memcpy(host_salt->unicode_salt, salt, 64);
+
+
+
+	for (idx = 0; idx < KEYS_PER_CRYPT; idx++) {
+
+		uint8_t *password = inbuffer[idx].v;
+		uint32_t password_len = inbuffer[idx].length;
+		memset(nt_hash, 0, 64);
+		memset(buffer, 0, 64);
+
+		for (i = 0; i < password_len >> 1; i++)
+			buffer[i] =
+			    password[2 * i] | (password[2 * i + 1] << 16);
+
+		if (password_len % 2 == 1)
+			buffer[i] = password[password_len - 1] | 0x800000;
+		else
+			buffer[i] = 0x80;
+
+		buffer[14] = password_len << 4;
+
+		md4_crypt(buffer, nt_hash);
+
+		memcpy((uint8_t *) nt_hash + 16, salt, username_len << 1);
+
+		i = username_len + 8;
+
+		if (username_len % 2 == 1)
+			nt_hash[i >> 1] =
+			    username[username_len - 1] | 0x800000;
+		else
+			nt_hash[i >> 1] = 0x80;
+
+		nt_hash[14] = i << 4;
+
+		md4_crypt(nt_hash, inbuffer[idx].dcc_hash);
+
+	}
+
+      
+    }
+__host__ void mscash2_gpu(mscash2_password * inbuffer, mscash2_hash * outbuffer,
+    mscash2_salt * host_salt)
+{
+	
+	mscash_cpu(inbuffer,outbuffer,host_salt);
+	mscash2_password *cuda_inbuffer;
+	mscash2_hash *cuda_outbuffer;
+	size_t insize = sizeof(mscash2_password) * KEYS_PER_CRYPT;
+	size_t outsize = sizeof(mscash2_hash) * KEYS_PER_CRYPT;
+	
+	HANDLE_ERROR(cudaMemcpyToSymbol(cuda_salt, host_salt,
+		sizeof(mscash2_salt)));
+	
+	HANDLE_ERROR(cudaMalloc(&cuda_inbuffer, insize));
+	HANDLE_ERROR(cudaMalloc(&cuda_outbuffer, outsize));
+
+	HANDLE_ERROR(cudaMemcpy(cuda_inbuffer, inbuffer, insize,
+		cudaMemcpyHostToDevice));
+
+	pbkdf2_kernel <<< BLOCKS, THREADS >>> (cuda_inbuffer, cuda_outbuffer);
+
+	HANDLE_ERROR(cudaMemcpy(outbuffer, cuda_outbuffer, outsize,
+		cudaMemcpyDeviceToHost));
+
+	HANDLE_ERROR(cudaFree(cuda_inbuffer));
+	HANDLE_ERROR(cudaFree(cuda_outbuffer));
+
+}
diff -urpN john-1.7.9-jumbo-5/src/cuda/phpass.cu john-1.7.9-jumbo-5-cuda-2/src/cuda/phpass.cu
--- john-1.7.9-jumbo-5/src/cuda/phpass.cu	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda/phpass.cu	2012-02-15 01:35:52.361382846 +0000
@@ -0,0 +1,298 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include "../cuda_phpass.h"
+#include "cuda_common.cuh"
+
+const uint SETTING_SIZE = 12;
+const uint DATA_IN_SIZE = KEYS_PER_CRYPT * sizeof(phpass_password);
+const uint DATA_OUT_SIZE = KEYS_PER_CRYPT * sizeof(phpass_hash);
+
+__device__ unsigned char *cuda_data = NULL;
+__device__ uint32_t *cuda_data_out = NULL;
+__device__ char *cuda_setting = NULL;
+
+unsigned char *host_data = NULL;
+uint32_t *host_data_out = NULL;
+
+extern "C" void mem_init(unsigned char *, uint32_t *, char *, char *, int);
+extern "C" void mem_clear(void);
+extern "C" void gpu_phpass(void);
+
+__global__ void kernel_phpass(unsigned char *data, uint32_t * data_out,
+    char *, int);
+
+int cuda_count_log2;
+__host__ void gpu_phpass(void)
+{
+	dim3 dimGrid(BLOCKS);
+	dim3 dimBlock(THREADS);
+	kernel_phpass <<< dimGrid, dimBlock >>> (cuda_data, cuda_data_out,
+	    cuda_setting, cuda_count_log2);
+	HANDLE_ERROR(cudaThreadSynchronize());
+}
+
+__host__ void mem_init(unsigned char *p, uint32_t * h, char *setting,
+    char *itoa, int count_log2)
+{
+	cuda_count_log2 = count_log2;
+	host_data = p;
+	host_data_out = h;
+	HANDLE_ERROR(cudaMalloc(&cuda_setting, SETTING_SIZE));
+	HANDLE_ERROR(cudaMalloc(&cuda_data, DATA_IN_SIZE));
+	HANDLE_ERROR(cudaMalloc(&cuda_data_out, DATA_OUT_SIZE));
+	HANDLE_ERROR(cudaMemcpy(cuda_data, host_data, DATA_IN_SIZE,
+		cudaMemcpyHostToDevice));
+	HANDLE_ERROR(cudaMemcpy(cuda_setting, setting, SETTING_SIZE,
+		cudaMemcpyHostToDevice));
+}
+
+__host__ void mem_clear()
+{
+	HANDLE_ERROR(cudaMemcpy(host_data_out, cuda_data_out, DATA_OUT_SIZE,
+		cudaMemcpyDeviceToHost));
+	HANDLE_ERROR(cudaFree(cuda_data));
+	HANDLE_ERROR(cudaFree(cuda_data_out));
+	HANDLE_ERROR(cudaFree(cuda_setting));
+}
+
+__device__ void cuda_md5(char len, uint32_t * internal_ret, uint32_t * x)
+{
+	x[len / 4] |= (((uint32_t) 0x80) << ((len & 0x3) << 3));
+	uint32_t x14 = len << 3;
+
+	uint32_t a = 0x67452301;
+	uint32_t b = 0xefcdab89;
+	uint32_t c = 0x98badcfe;
+	uint32_t d = 0x10325476;
+
+	a = AC1 + x[0];
+	a = ROTATE_LEFT(a, S11);
+	a += b;			/* 1 */
+	d = (c ^ (a & MASK1)) + x[1] + AC2pCd;
+	d = ROTATE_LEFT(d, S12);
+	d += a;			/* 2 */
+	c = F(d, a, b) + x[2] + AC3pCc;
+	c = ROTATE_LEFT(c, S13);
+	c += d;			/* 3 */
+	b = F(c, d, a) + x[3] + AC4pCb;
+	b = ROTATE_LEFT(b, S14);
+	b += c;
+	FF(a, b, c, d, x[4], S11, 0xf57c0faf);
+	FF(d, a, b, c, x[5], S12, 0x4787c62a);
+	FF(c, d, a, b, x[6], S13, 0xa8304613);
+	FF(b, c, d, a, x[7], S14, 0xfd469501);
+	FF(a, b, c, d, 0, S11, 0x698098d8);
+	FF(d, a, b, c, 0, S12, 0x8b44f7af);
+	FF(c, d, a, b, 0, S13, 0xffff5bb1);
+	FF(b, c, d, a, 0, S14, 0x895cd7be);
+	FF(a, b, c, d, 0, S11, 0x6b901122);
+	FF(d, a, b, c, 0, S12, 0xfd987193);
+	FF(c, d, a, b, x14, S13, 0xa679438e);
+	FF(b, c, d, a, 0, S14, 0x49b40821);
+
+	GG(a, b, c, d, x[1], S21, 0xf61e2562);
+	GG(d, a, b, c, x[6], S22, 0xc040b340);
+	GG(c, d, a, b, 0, S23, 0x265e5a51);
+	GG(b, c, d, a, x[0], S24, 0xe9b6c7aa);
+	GG(a, b, c, d, x[5], S21, 0xd62f105d);
+	GG(d, a, b, c, 0, S22, 0x2441453);
+	GG(c, d, a, b, 0, S23, 0xd8a1e681);
+	GG(b, c, d, a, x[4], S24, 0xe7d3fbc8);
+	GG(a, b, c, d, 0, S21, 0x21e1cde6);
+	GG(d, a, b, c, x14, S22, 0xc33707d6);
+	GG(c, d, a, b, x[3], S23, 0xf4d50d87);
+	GG(b, c, d, a, 0, S24, 0x455a14ed);
+	GG(a, b, c, d, 0, S21, 0xa9e3e905);
+	GG(d, a, b, c, x[2], S22, 0xfcefa3f8);
+	GG(c, d, a, b, x[7], S23, 0x676f02d9);
+	GG(b, c, d, a, 0, S24, 0x8d2a4c8a);
+
+	HH(a, b, c, d, x[5], S31, 0xfffa3942);
+	HH(d, a, b, c, 0, S32, 0x8771f681);
+	HH(c, d, a, b, 0, S33, 0x6d9d6122);
+	HH(b, c, d, a, x14, S34, 0xfde5380c);
+	HH(a, b, c, d, x[1], S31, 0xa4beea44);
+	HH(d, a, b, c, x[4], S32, 0x4bdecfa9);
+	HH(c, d, a, b, x[7], S33, 0xf6bb4b60);
+	HH(b, c, d, a, 0, S34, 0xbebfbc70);
+	HH(a, b, c, d, 0, S31, 0x289b7ec6);
+	HH(d, a, b, c, x[0], S32, 0xeaa127fa);
+	HH(c, d, a, b, x[3], S33, 0xd4ef3085);
+	HH(b, c, d, a, x[6], S34, 0x4881d05);
+	HH(a, b, c, d, 0, S31, 0xd9d4d039);
+	HH(d, a, b, c, 0, S32, 0xe6db99e5);
+	HH(c, d, a, b, 0, S33, 0x1fa27cf8);
+	HH(b, c, d, a, x[2], S34, 0xc4ac5665);
+
+	II(a, b, c, d, x[0], S41, 0xf4292244);
+	II(d, a, b, c, x[7], S42, 0x432aff97);
+	II(c, d, a, b, x14, S43, 0xab9423a7);
+	II(b, c, d, a, x[5], S44, 0xfc93a039);
+	II(a, b, c, d, 0, S41, 0x655b59c3);
+	II(d, a, b, c, x[3], S42, 0x8f0ccc92);
+	II(c, d, a, b, 0, S43, 0xffeff47d);
+	II(b, c, d, a, x[1], S44, 0x85845dd1);
+	II(a, b, c, d, 0, S41, 0x6fa87e4f);
+	II(d, a, b, c, 0, S42, 0xfe2ce6e0);
+	II(c, d, a, b, x[6], S43, 0xa3014314);
+	II(b, c, d, a, 0, S44, 0x4e0811a1);
+	II(a, b, c, d, x[4], S41, 0xf7537e82);
+	II(d, a, b, c, 0, S42, 0xbd3af235);
+	II(c, d, a, b, x[2], S43, 0x2ad7d2bb);
+	II(b, c, d, a, 0, S44, 0xeb86d391);
+
+	internal_ret[0] = a + 0x67452301;
+	internal_ret[1] = b + 0xefcdab89;
+	internal_ret[2] = c + 0x98badcfe;
+	internal_ret[3] = d + 0x10325476;
+}
+
+__device__ void clear_ctx(uint32_t * x)
+{
+	int i;
+#pragma unroll 8
+	for (i = 0; i < 8; i++)
+		*x++ = 0;
+}
+
+__global__ void kernel_phpass(unsigned char *password, uint32_t * data_out,
+    char *setting, int count_log2)
+{
+	uint32_t x[8];
+	clear_ctx(x);
+
+	int length, count, i;
+	unsigned char *buff = (unsigned char *) x;
+	uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+
+	length = password[address(15, idx)];
+
+#pragma unroll 8
+	for (i = 0; i < 8; i++)
+		buff[i] = setting[i + 4];
+
+	for (i = 8; i < 8 + length; i++) {
+		buff[i] = password[address(i - 8, idx)];
+	}
+
+	cuda_md5(8 + length, x, x);
+	count = 1 << count_log2;
+	for (i = 16; i < 16 + length; i++)
+		buff[i] = password[address(i - 16, idx)];
+
+
+	uint32_t a, b, c, d, x0, x1, x2, x3, x4, x5, x6, x7;
+	uint32_t len = 16 + length;
+	uint32_t x14 = len << 3;
+
+	x[len / 4] |= ((0x80) << ((len & 0x3) << 3));
+	x0 = x[0];
+	x1 = x[1];
+	x2 = x[2];
+	x3 = x[3];
+	x4 = x[4];
+	x5 = x[5];
+	x6 = x[6];
+	x7 = x[7];
+	do {
+
+		b = 0xefcdab89;
+		c = 0x98badcfe;
+		d = 0x10325476;
+
+		a = AC1 + x0;
+		a = ROTATE_LEFT(a, S11);
+		a += b;		/* 1 */
+		d = (c ^ (a & MASK1)) + x1 + AC2pCd;
+		d = ROTATE_LEFT(d, S12);
+		d += a;		/* 2 */
+		c = F(d, a, b) + x2 + AC3pCc;
+		c = ROTATE_LEFT(c, S13);
+		c += d;		/* 3 */
+		b = F(c, d, a) + x3 + AC4pCb;
+		b = ROTATE_LEFT(b, S14);
+		b += c;
+		FF(a, b, c, d, x4, S11, 0xf57c0faf);
+		FF(d, a, b, c, x5, S12, 0x4787c62a);
+		FF(c, d, a, b, x6, S13, 0xa8304613);
+		FF(b, c, d, a, x7, S14, 0xfd469501);
+		FF(a, b, c, d, 0, S11, 0x698098d8);
+		FF(d, a, b, c, 0, S12, 0x8b44f7af);
+		FF(c, d, a, b, 0, S13, 0xffff5bb1);
+		FF(b, c, d, a, 0, S14, 0x895cd7be);
+		FF(a, b, c, d, 0, S11, 0x6b901122);
+		FF(d, a, b, c, 0, S12, 0xfd987193);
+		FF(c, d, a, b, x14, S13, 0xa679438e);
+		FF(b, c, d, a, 0, S14, 0x49b40821);
+
+		GG(a, b, c, d, x1, S21, 0xf61e2562);
+		GG(d, a, b, c, x6, S22, 0xc040b340);
+		GG(c, d, a, b, 0, S23, 0x265e5a51);
+		GG(b, c, d, a, x0, S24, 0xe9b6c7aa);
+		GG(a, b, c, d, x5, S21, 0xd62f105d);
+		GG(d, a, b, c, 0, S22, 0x2441453);
+		GG(c, d, a, b, 0, S23, 0xd8a1e681);
+		GG(b, c, d, a, x4, S24, 0xe7d3fbc8);
+		GG(a, b, c, d, 0, S21, 0x21e1cde6);
+		GG(d, a, b, c, x14, S22, 0xc33707d6);
+		GG(c, d, a, b, x3, S23, 0xf4d50d87);
+		GG(b, c, d, a, 0, S24, 0x455a14ed);
+		GG(a, b, c, d, 0, S21, 0xa9e3e905);
+		GG(d, a, b, c, x2, S22, 0xfcefa3f8);
+		GG(c, d, a, b, x7, S23, 0x676f02d9);
+		GG(b, c, d, a, 0, S24, 0x8d2a4c8a);
+
+		HH(a, b, c, d, x5, S31, 0xfffa3942);
+		HH(d, a, b, c, 0, S32, 0x8771f681);
+		HH(c, d, a, b, 0, S33, 0x6d9d6122);
+		HH(b, c, d, a, x14, S34, 0xfde5380c);
+		HH(a, b, c, d, x1, S31, 0xa4beea44);
+		HH(d, a, b, c, x4, S32, 0x4bdecfa9);
+		HH(c, d, a, b, x7, S33, 0xf6bb4b60);
+		HH(b, c, d, a, 0, S34, 0xbebfbc70);
+		HH(a, b, c, d, 0, S31, 0x289b7ec6);
+		HH(d, a, b, c, x0, S32, 0xeaa127fa);
+		HH(c, d, a, b, x3, S33, 0xd4ef3085);
+		HH(b, c, d, a, x6, S34, 0x4881d05);
+		HH(a, b, c, d, 0, S31, 0xd9d4d039);
+		HH(d, a, b, c, 0, S32, 0xe6db99e5);
+		HH(c, d, a, b, 0, S33, 0x1fa27cf8);
+		HH(b, c, d, a, x2, S34, 0xc4ac5665);
+
+		II(a, b, c, d, x0, S41, 0xf4292244);
+		II(d, a, b, c, x7, S42, 0x432aff97);
+		II(c, d, a, b, x14, S43, 0xab9423a7);
+		II(b, c, d, a, x5, S44, 0xfc93a039);
+		II(a, b, c, d, 0, S41, 0x655b59c3);
+		II(d, a, b, c, x3, S42, 0x8f0ccc92);
+		II(c, d, a, b, 0, S43, 0xffeff47d);
+		II(b, c, d, a, x1, S44, 0x85845dd1);
+		II(a, b, c, d, 0, S41, 0x6fa87e4f);
+		II(d, a, b, c, 0, S42, 0xfe2ce6e0);
+		II(c, d, a, b, x6, S43, 0xa3014314);
+		II(b, c, d, a, 0, S44, 0x4e0811a1);
+		II(a, b, c, d, x4, S41, 0xf7537e82);
+		II(d, a, b, c, 0, S42, 0xbd3af235);
+		II(c, d, a, b, x2, S43, 0x2ad7d2bb);
+		II(b, c, d, a, 0, S44, 0xeb86d391);
+
+		x0 = a + 0x67452301;
+		x1 = b + 0xefcdab89;
+		x2 = c + 0x98badcfe;
+		x3 = d + 0x10325476;
+
+	} while (--count);
+
+	data_out[address(0, idx)] = x0;
+	data_out[address(1, idx)] = x1;
+	data_out[address(2, idx)] = x2;
+	data_out[address(3, idx)] = x3;
+
+}
diff -urpN john-1.7.9-jumbo-5/src/cuda/rawsha256.cu john-1.7.9-jumbo-5-cuda-2/src/cuda/rawsha256.cu
--- john-1.7.9-jumbo-5/src/cuda/rawsha256.cu	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda/rawsha256.cu	2012-02-17 06:15:19.315725754 +0000
@@ -0,0 +1,85 @@
+/**
+This file is shared by cuda-rawsha224 and cuda-rawsha256 formats, 
+SHA256 definition is used to distinguish between them. 
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include "../cuda_rawsha256.h"
+
+static void cuda_rawsha256(sha256_password *,void *);
+
+#ifdef SHA256
+  #define SHA_HASH sha256_hash
+  __constant__ const uint32_t H[]={
+   0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
+  extern "C" void gpu_rawsha256(sha256_password *i,SHA_HASH*o){cuda_rawsha256(i,o);}
+#endif
+#ifdef SHA224
+  #define SHA_HASH sha224_hash
+   __constant__ const uint32_t H[]={
+   0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31,  0x68581511, 0x64f98fa7, 0xbefa4fa4};
+   extern "C" void gpu_rawsha224(sha256_password *i,SHA_HASH*o){cuda_rawsha256(i,o);}
+#endif
+
+const uint32_t DATA_IN_SIZE=KEYS_PER_CRYPT*sizeof(sha256_password);
+const uint32_t DATA_OUT_SIZE=KEYS_PER_CRYPT*sizeof(SHA_HASH);
+
+static sha256_password *cuda_data=NULL;		///candidates
+static SHA_HASH *cuda_data_out=NULL;		///sha256(candidate) or sha224(candidate)
+
+__global__ void kernel_sha256(sha256_password *data,SHA_HASH *data_out);
+static void cuda_rawsha256(sha256_password *host_in,void *out)
+{
+  SHA_HASH* host_out = (SHA_HASH *)out;
+ ///Aloc memory and copy data to gpu
+  cudaMalloc(&cuda_data,DATA_IN_SIZE);
+  cudaMalloc(&cuda_data_out,DATA_OUT_SIZE);
+  cudaMemcpy(cuda_data,host_in,DATA_IN_SIZE,cudaMemcpyHostToDevice);
+ ///Run kernel and wait for execution end
+  kernel_sha256<<<BLOCKS,THREADS>>>(cuda_data,cuda_data_out);
+  cudaThreadSynchronize(); 
+ ///Free memory and copy results back
+  cudaMemcpy(host_out,cuda_data_out,DATA_OUT_SIZE,cudaMemcpyDeviceToHost);
+  cudaFree(cuda_data);
+  cudaFree(cuda_data_out);  
+}
+
+   
+__global__ void kernel_sha256(sha256_password *data,SHA_HASH* data_out){ /// todo - use shared memory
+  uint32_t idx = blockIdx.x*blockDim.x + threadIdx.x;
+ const uint32_t k[]={
+   0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+   0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+   0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+   0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+   0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+   0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+   0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+   0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2};
+  uint32_t w[64]={0};
+  SHA_HASH* out=&data_out[idx];
+
+  #pragma unroll 64
+  for(uint32_t j=0;j<64;j++){
+    if(j<16) w[j]=data[idx].v[j];
+  else w[j]=sigma1(w[j-2])+w[j-7]+sigma0(w[j-15])+w[j-16];
+  }
+  
+  uint32_t a=H[0];uint32_t b=H[1];uint32_t c=H[2];uint32_t d=H[3];
+  uint32_t e=H[4];uint32_t f=H[5];uint32_t g=H[6];uint32_t h=H[7];
+  #pragma unroll 64
+  for(uint32_t j=0;j<64;j++){
+   uint32_t t1=h+Sigma1(e)+Ch(e,f,g)+k[j]+w[j];
+   uint32_t t2=Sigma0(a)+Maj(a,b,c);
+   h=g;g=f;f=e;e=d+t1;d=c;c=b;b=a;a=t1+t2;
+  }
+  out->v[0]=a+H[0];out->v[1]=b+H[1];
+  out->v[2]=c+H[2];out->v[3]=d+H[3];
+  out->v[4]=e+H[4];out->v[5]=f+H[5];
+  out->v[6]=g+H[6];
+  #ifdef SHA256
+  out->v[7]=h+H[7];
+  #endif
+}
\ No newline at end of file
diff -urpN john-1.7.9-jumbo-5/src/cuda_common.h john-1.7.9-jumbo-5-cuda-2/src/cuda_common.h
--- john-1.7.9-jumbo-5/src/cuda_common.h	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda_common.h	2012-02-23 06:17:10.111451951 +0000
@@ -0,0 +1,26 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#ifndef _CUDA_COMMON_H
+#define _CUDA_COMMON_H
+
+/*
+* CUDA device id specified by -gpu parameter
+*/
+unsigned int gpu_id;
+
+extern void cuda_init(unsigned int gpu_id);
+
+#define check_mem_allocation(inbuffer,outbuffer)\
+    if(inbuffer==NULL){\
+      fprintf(stderr,"Cannot alocate memory for passwords file:%s line:%d\n",__FILE__,__LINE__);\
+      exit(1);\
+    }\
+    if(inbuffer==NULL){\
+      fprintf(stderr,"Cannot alocate memory for hashes file:%s line:%d\n",__FILE__,__LINE__);\
+      exit(1);\
+    }
+    
+#endif
\ No newline at end of file
diff -urpN john-1.7.9-jumbo-5/src/cuda_cryptmd5.h john-1.7.9-jumbo-5-cuda-2/src/cuda_cryptmd5.h
--- john-1.7.9-jumbo-5/src/cuda_cryptmd5.h	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda_cryptmd5.h	2011-08-30 22:05:51.000000000 +0000
@@ -0,0 +1,105 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#ifndef _CRYPTMD5_H
+#define _CRYPTMD5_H
+#include <assert.h>
+#include "common.h"
+#include <stdint.h>
+#include <stdbool.h>
+
+#define uint32_t unsigned int
+#define uint8_t unsigned char
+
+#define MIN(a,b) ((a)<(b)?(a):(b))
+#define MAX(a,b) ((a)>(b)?(a):(b))
+
+#define BLOCKS (28)
+#define THREADS 256 
+#define KEYS_PER_CRYPT BLOCKS*THREADS
+#define PLAINTEXT_LENGTH	15
+
+typedef struct {
+	unsigned char saltlen;
+	char salt[8];
+	char prefix;		// 'a' when $apr1$ or '1' when $1$
+} crypt_md5_salt;
+
+typedef struct {
+	unsigned char length;
+	unsigned char v[PLAINTEXT_LENGTH];
+} crypt_md5_password;
+
+typedef struct {
+	uint32_t v[4];		//128 bits
+} crypt_md5_hash;
+
+typedef struct __attribute__((__aligned__(4))){
+	uint8_t buffer[64];
+} md5_ctx ;
+
+static const char md5_salt_prefix[] = "$1$";
+static const char apr1_salt_prefix[] = "$apr1$";
+
+#define address(j,idx) 			(((j)*KEYS_PER_CRYPT)+(idx))
+
+#define ROTATE_LEFT(x, s) ((x << s) | (x >> (32 - s)))
+
+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
+
+#define H(x, y, z) (x^y^z)
+#define I(x, y, z) (y^(x|~z))
+
+#define FF(v, w, x, y, z, s, ac) { \
+ v = ROTATE_LEFT(v + z + ac + F(w, x, y), s) + w; \
+ }
+#define GG(v, w, x, y, z, s, ac) { \
+ v = ROTATE_LEFT(v + z + ac + G(w, x, y), s) + w; \
+ }
+#define HH(v, w, x, y, z, s, ac) { \
+ v = ROTATE_LEFT(v + z + ac + H(w, x, y), s) + w; \
+ }
+#define II(v, w, x, y, z, s, ac) { \
+ v = ROTATE_LEFT(v + z + ac + I(w, x, y), s) + w; \
+ }
+#define FF2(v, w, x, y, s, ac) { \
+ v = ROTATE_LEFT(v + ac + F(w, x, y), s) + w; \
+ }
+#define GG2(v, w, x, y, s, ac) { \
+ v = ROTATE_LEFT(v + ac + G(w, x, y), s) + w; \
+ }
+#define HH2(v, w, x, y, s, ac) { \
+ v = ROTATE_LEFT(v + ac + H(w, x, y), s) + w; \
+ }
+#define II2(v, w, x, y, s, ac) { \
+ v = ROTATE_LEFT(v + ac + I(w, x, y), s) + w; \
+ }
+
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+
+#define AC1				0xd76aa477
+#define AC2pCd				0xf8fa0bcc
+#define AC3pCc				0xbcdb4dd9
+#define AC4pCb				0xb18b7a77
+#define MASK1				0x77777777
+
+
+#endif
diff -urpN john-1.7.9-jumbo-5/src/cuda_cryptmd5_fmt.c john-1.7.9-jumbo-5-cuda-2/src/cuda_cryptmd5_fmt.c
--- john-1.7.9-jumbo-5/src/cuda_cryptmd5_fmt.c	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda_cryptmd5_fmt.c	2012-02-24 17:13:41.797314002 +0000
@@ -0,0 +1,370 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <string.h>
+#include <unistd.h>
+#include "arch.h"
+#include "formats.h"
+#include "common.h"
+#include "misc.h"
+#include "cuda_common.h"
+#include "cuda_cryptmd5.h"
+
+#define FORMAT_LABEL		"cryptmd5-cuda"
+#define FORMAT_NAME		FORMAT_LABEL
+
+#define CRYPT_TYPE		"MD5-based CRYPT"
+
+#define BENCHMARK_COMMENT	""
+#define BENCHMARK_LENGTH	-1
+
+#define BINARY_SIZE		16
+#define SALT_SIZE		(8+1)
+#define MIN_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+#define MAX_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+
+void md5_crypt_gpu(crypt_md5_password *,uint32_t *, crypt_md5_salt *);
+
+static crypt_md5_password *inbuffer;//[MAX_KEYS_PER_CRYPT];			/** plaintext ciphertexts **/
+static uint32_t *outbuffer;//[MAX_KEYS_PER_CRYPT*4];			/** calculated hashes **/
+static crypt_md5_salt host_salt;					/** salt **/
+
+
+//tests are unified for 8+8 length
+static struct fmt_tests tests[] = {
+/*	   {"$1$Btiy90iG$bGn4vzF3g1rIVGZ5odGIp/","qwerty"},
+	   {"$1$salt$c813W/s478KCzR0NnHx7j0","qwerty"},
+	   {"$1$salt$8LO.EVfsTf.HATV1Bd0ZP/","john"},
+	   {"$1$salt$TelRRxWBCxlpXmgAeB82R/","openwall"},
+	   {"$1$salt$l9PzDiECW83MOIMFTRL4Y1","summerofcode"},
+	   {"$1$salt$wZ2yVsplRoPoD7IfTvRsa0","IamMD5"},
+	   {"$1$saltstri$9S4.PyBpUZBRZw6ZsmFQE/","john"},
+	   {"$1$saltstring$YmP55hH3qcHg2cCffyxrq/","ala"},
+*/
+//         {"$1$salt1234$mdji1uBBCWZ5m2mIWKvLW.","a"},
+//         {"$1$salt1234$/JUvhIWHD.csWSCPvr7po0","ab"},
+//         {"$1$salt1234$GrxHg1bgkN2HB5CRCdrmF.","abc"},
+//         {"$1$salt1234$iZuyvTkrucWx8kVn5BN4M/","abcd"},
+//         {"$1$salt1234$wn0RbuDtbJlD1Q.X7.9wG/","abcde"},
+
+//         {"$1$salt1234$lzB83HS4FjzbcD4yMcjl01","abcdef"},
+//          {"$1$salt1234$bklJHN73KS04Kh6j6qPnr.","abcdefg"}, 
+	{"$1$salt1234$u4RMKGXG2b/Ud2rFmhqi70", "abcdefgh"},	//saltlen=8,passlen=8
+//         {"$1$salt1234$QjP48HUerU7aUYc/aJnre1","abcdefghi"},
+//         {"$1$salt1234$9jmu9ldi9vNw.XDO3TahR.","abcdefghij"},
+
+//         {"$1$salt1234$d3.LnlDWfkTIej5Ef1sCU/","abcdefghijk"},
+//         {"$1$salt1234$pDV0xEgZR14EpQMmhZ6Hg0","abcdefghijkl"},
+//         {"$1$salt1234$WumpbolX2y45Dlv0.A1Mj1","abcdefghijklm"},
+//         {"$1$salt1234$FXBreA27b7N7diemBGn5I1","abcdefghijklmn"},
+//         {"$1$salt1234$8d5IPIbTd7J/WNEG4b4cl.","abcdefghijklmno"},
+
+	//tests from korelogic2010 contest
+/*	   {"$1$bn6UVs3/$S6CQRLhmenR8OmVp3Jm5p0","sparky"},
+	   {"$1$qRiPuG5Z$pLLczmBnwEOD75Vb7YZLg1","walter"},
+	   {"$1$E.qsK.Hy$.eX0H6arTHaGOIFkf6o.a.","heaven"},
+	   {"$1$Hul2mrWs$.NGCgz3fBGDyG7RMGJAdM0","bananas"},
+	   {"$1$1l88Y.UV$swt2d0SPMrBPkdAD8RwSj0","horses"},
+	   {"$1$DiHrL6V7$fCVDD1GEAKB.BjAgJL1ZX0","maddie"},
+	   {"$1$7fpfV7kr$7LgF64DGPtHPktVKdLM490","bitch1"},
+	   {"$1$VKjk2PJc$5wbrtc9oa8kdEO/ocyi06/","crystal"},
+	   {"$1$S66DxkFm$kG.QfeHNLifEDTDmf4pzJ/","claudia"},
+	   {"$1$T2JMeEYj$Y.wDzFvyb9nlH1EiSCI3M/","august"}, 
+	 
+	   //tests from MD5_fmt.c
+	   {"$1$12345678$aIccj83HRDBo6ux1bVx7D1", "0123456789ABCDE"},
+	   {"$apr1$Q6ZYh...$RV6ft2bZ8j.NGrxLYaJt9.", "test"},
+	   {"$1$12345678$f8QoJuo0DpBRfQSD0vglc1", "12345678"},
+	   {"$1$$qRPK7m23GJusamGpoGLby/", ""},
+	   {"$apr1$a2Jqm...$grFrwEgiQleDr0zR4Jx1b.", "15 chars is max"},
+	   {"$1$$AuJCr07mI7DSew03TmBIv/", "no salt"},
+	   {"$1$`!@#%^&*$E6hD76/pKTS8qToBCkux30", "invalid salt"},
+	   {"$1$12345678$xek.CpjQUVgdf/P2N9KQf/", ""},
+	   {"$1$1234$BdIMOAWFOV2AQlLsrN/Sw.", "1234"},
+	   {"$apr1$rBXqc...$NlXxN9myBOk95T0AyLAsJ0", "john"},
+	   {"$apr1$Grpld/..$qp5GyjwM2dnA5Cdej9b411", "the"},
+	   {"$apr1$GBx.D/..$yfVeeYFCIiEXInfRhBRpy/", "ripper"},
+*/
+	{NULL}
+};
+
+static void cleanup()
+{
+ free(inbuffer);
+ free(outbuffer);
+}
+
+static void init(struct fmt_main *pFmt)
+{  
+  //Alocate memory for hashes and passwords
+  inbuffer=(crypt_md5_password*)malloc(sizeof(crypt_md5_password)*MAX_KEYS_PER_CRYPT);
+  outbuffer=(uint32_t*)malloc(sizeof(uint32_t)*MAX_KEYS_PER_CRYPT*4);
+  check_mem_allocation(inbuffer,outbuffer);
+  atexit(cleanup);
+  //Initialize CUDA
+  cuda_init(gpu_id);
+}
+
+static int valid(char *ciphertext,struct fmt_main *pFmt)
+{
+	uint8_t i, len = strlen(ciphertext), prefix = 0;
+
+	if (strncmp(ciphertext, md5_salt_prefix, strlen(md5_salt_prefix)) == 0)
+		prefix |= 1;
+	if (strncmp(ciphertext, apr1_salt_prefix,
+		strlen(apr1_salt_prefix)) == 0)
+		prefix |= 2;
+	if (prefix == 0)
+		return 0;
+
+	char *p = strrchr(ciphertext, '$');
+	if (p == NULL)
+		return 0;
+	for (i = p - ciphertext + 1; i < len; i++) {
+		uint8_t z = ARCH_INDEX(ciphertext[i]);
+		if (ARCH_INDEX(atoi64[z]) == 0x7f)
+			return 0;
+	}
+	if (len - (p - ciphertext + 1) != 22)
+		return 0;
+	return 1;
+};
+
+static int findb64(char c)
+{
+	int ret = ARCH_INDEX(atoi64[(uint8_t) c]);
+	return ret != 0x7f ? ret : 0;
+}
+
+static void to_binary(char *crypt, char *alt)
+{
+
+#define _24bit_from_b64(I,B2,B1,B0) \
+  {\
+      unsigned char c1=findb64(crypt[I+0]);\
+      unsigned char c2=findb64(crypt[I+1]);\
+      unsigned char c3=findb64(crypt[I+2]);\
+      unsigned char c4=findb64(crypt[I+3]);\
+      unsigned int w=c4<<18|c3<<12|c2<<6|c1;\
+      unsigned char b2=w&0xff;w>>=8;\
+      unsigned char b1=w&0xff;w>>=8;\
+      unsigned char b0=w&0xff;w>>=8;\
+      alt[B2]=b0;\
+      alt[B1]=b1;\
+      alt[B0]=b2;\
+  }
+
+	_24bit_from_b64(0, 0, 6, 12);
+	_24bit_from_b64(4, 1, 7, 13);
+	_24bit_from_b64(8, 2, 8, 14);
+	_24bit_from_b64(12, 3, 9, 15);
+	_24bit_from_b64(16, 4, 10, 5);
+	uint32_t w = findb64(crypt[21]) << 6 | findb64(crypt[20]) << 0;
+	alt[11] = (w & 0xff);
+}
+
+static void *binary(char *ciphertext)
+{
+	static char b[BINARY_SIZE];
+	memset(b, 0, BINARY_SIZE);
+	char *p = strrchr(ciphertext, '$') + 1;
+	to_binary(p, b);
+	return (void *) b;
+}
+
+
+static void *salt(char *ciphertext)
+{
+	static uint8_t ret[SALT_SIZE];
+	memset(ret, 0, SALT_SIZE);
+	uint8_t i, *pos = (uint8_t *) ciphertext, *dest = ret, *end;
+
+	if (strncmp(ciphertext, md5_salt_prefix, strlen(md5_salt_prefix)) == 0) {
+		pos += strlen(md5_salt_prefix);
+		ret[8] = '1';
+	}
+	if (strncmp(ciphertext, apr1_salt_prefix,
+		strlen(apr1_salt_prefix)) == 0) {
+		pos += strlen(apr1_salt_prefix);
+		ret[8] = 'a';
+	}
+	end = pos;
+	for (i = 0; i < 8 && *end != '$'; i++, end++);
+	while (pos != end)
+		*dest++ = *pos++;
+	return (void *) ret;
+}
+
+static int binary_hash_0(void *binary)
+{
+	return (((ARCH_WORD_32 *) binary)[0] & 0xf);
+}
+
+static int binary_hash_1(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xff;
+}
+
+static int binary_hash_2(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfff;
+}
+
+static int binary_hash_3(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xffff;
+}
+
+static int binary_hash_4(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfffff;
+}
+
+static int binary_hash_5(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xffffff;
+}
+
+static int binary_hash_6(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0x7ffffff;
+}
+static void set_salt(void *salt)
+{
+	uint8_t *s = salt;
+	uint8_t len;
+	for (len = 0; len < 8 && s[len]; len++);
+	host_salt.saltlen = len;
+	memcpy(host_salt.salt, s, host_salt.saltlen);
+	host_salt.prefix = s[8];
+}
+
+static void set_key(char *key, int index)
+{
+	uint32_t len = strlen(key);
+	inbuffer[index].length = len;
+	memcpy((char *) inbuffer[index].v, key, len);
+}
+
+static char *get_key(int index)
+{
+	static char ret[PLAINTEXT_LENGTH + 1];
+	memcpy(ret, inbuffer[index].v, PLAINTEXT_LENGTH);
+	ret[inbuffer[index].length] = '\0';
+	return ret;
+}
+
+static void crypt_all(int count)
+{
+	md5_crypt_gpu(inbuffer, outbuffer, &host_salt);
+}
+
+static int get_hash_0(int index)
+{
+	return outbuffer[address(0, index)] & 0xf;
+}
+
+static int get_hash_1(int index)
+{
+	return outbuffer[address(0, index)] & 0xff;
+}
+
+static int get_hash_2(int index)
+{
+	return outbuffer[address(0, index)] & 0xfff;
+}
+
+static int get_hash_3(int index)
+{
+	return outbuffer[address(0, index)] & 0xffff;
+}
+
+static int get_hash_4(int index)
+{
+	return outbuffer[address(0, index)] & 0xfffff;
+}
+
+static int get_hash_5(int index)
+{
+	return outbuffer[address(0, index)] & 0xffffff;
+}
+
+static int get_hash_6(int index)
+{
+	return outbuffer[address(0, index)] & 0x7ffffff;
+}
+
+static int cmp_all(void *binary, int count)
+{
+	uint32_t i, b = ((uint32_t *) binary)[0];
+	for (i = 0; i < count; i++)
+		if(b==outbuffer[address(0,i)])
+		return 1;
+	return 0;
+}
+
+static int cmp_one(void *binary, int index)
+{
+	uint32_t i, *t = (uint32_t *) binary;
+	for (i = 0; i < 4; i++)
+		if(t[i]!=outbuffer[address(i, index)])
+		  return 0;
+	return 1;
+}
+
+static int cmp_exact(char *source, int count)
+{
+	return 1;
+}
+
+struct fmt_main fmt_cuda_cryptmd5 = {
+	{
+		    FORMAT_LABEL,
+		    FORMAT_NAME,
+		    CRYPT_TYPE,
+		    BENCHMARK_COMMENT,
+		    BENCHMARK_LENGTH,
+		    PLAINTEXT_LENGTH,
+		    BINARY_SIZE,
+		    SALT_SIZE,
+		    MIN_KEYS_PER_CRYPT,
+		    MAX_KEYS_PER_CRYPT,
+		    FMT_CASE | FMT_8_BIT,
+	    tests},
+	{
+		    init,
+		    fmt_default_prepare,
+		    valid,
+		    fmt_default_split,
+		    binary,
+		    salt,
+		    {
+				binary_hash_0,
+				binary_hash_1,
+				binary_hash_2,
+				binary_hash_3,
+				binary_hash_4,
+				binary_hash_5,
+				binary_hash_6
+		    },
+		    fmt_default_salt_hash,
+		    set_salt,
+		    set_key,
+		    get_key,
+		    fmt_default_clear_keys,
+		    crypt_all,
+		    {
+				get_hash_0,
+				get_hash_1,
+				get_hash_2,
+				get_hash_3,
+			get_hash_4,
+			get_hash_5,
+			get_hash_6
+		    },
+		    cmp_all,
+		    cmp_one,
+	    cmp_exact}
+};
diff -urpN john-1.7.9-jumbo-5/src/cuda_cryptsha256.h john-1.7.9-jumbo-5-cuda-2/src/cuda_cryptsha256.h
--- john-1.7.9-jumbo-5/src/cuda_cryptsha256.h	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda_cryptsha256.h	2011-08-30 14:37:03.000000000 +0000
@@ -0,0 +1,66 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com>
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#ifndef _CRYPTSHA256_H
+#define _CRYPTSHA256_H
+
+#include <assert.h>
+#include "common.h"
+#include <stdint.h>
+#include <stdbool.h>
+
+#define BLOCKS 14
+#define THREADS 192//set 320 for fermi
+
+#define KEYS_PER_CRYPT BLOCKS*THREADS
+
+#define uint32_t unsigned int
+#define uint8_t unsigned char
+
+#define rol(x,n) ((x << n) | (x >> (32-n)))
+#define ror(x,n) ((x >> n) | (x << (32-n)))
+#define Ch(x,y,z) ((x & y) ^ ( (~x) & z))
+#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
+#define Sigma0(x) ((ror(x,2))  ^ (ror(x,13)) ^ (ror(x,22)))
+#define Sigma1(x) ((ror(x,6))  ^ (ror(x,11)) ^ (ror(x,25)))
+#define sigma0(x) ((ror(x,7))  ^ (ror(x,18)) ^(x>>3))
+#define sigma1(x) ((ror(x,17)) ^ (ror(x,19)) ^(x>>10))
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define SHOW(x) printf("%s = %08x\n",#x,(x))
+# define SWAP(n) \
+    (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
+
+#define SALT_LEN_MAX 16
+#define ROUNDS_DEFAULT 5000
+#define ROUNDS_MIN 1000
+#define ROUNDS_MAX 999999999
+
+typedef struct {
+	uint32_t H[8];
+	uint32_t total;
+	uint32_t buflen;
+	uint8_t buffer[64];
+} sha256_ctx;
+
+typedef struct {
+	unsigned char saltlen;
+	uint32_t rounds;
+	char salt[16];
+} crypt_sha256_salt;
+
+typedef struct {
+	unsigned char length;
+	unsigned char v[15];
+} crypt_sha256_password;
+
+
+#define hash_addr(j,idx) (((j)*(KEYS_PER_CRYPT))+(idx))
+typedef struct {
+	uint32_t v[8];		//256 bits
+} crypt_sha256_hash;
+
+#endif
diff -urpN john-1.7.9-jumbo-5/src/cuda_cryptsha256_fmt.c john-1.7.9-jumbo-5-cuda-2/src/cuda_cryptsha256_fmt.c
--- john-1.7.9-jumbo-5/src/cuda_cryptsha256_fmt.c	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda_cryptsha256_fmt.c	2012-02-29 19:29:49.603447100 +0000
@@ -0,0 +1,398 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba
+* <lukas dot odzioba at gmail dot com>
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <string.h>
+#include "arch.h"
+#include "formats.h"
+#include "common.h"
+#include "misc.h"
+#include "cuda_cryptsha256.h"
+#include "cuda_common.h"
+#include <unistd.h>
+
+#define FORMAT_LABEL		"cryptsha256-cuda"
+#define FORMAT_NAME		FORMAT_LABEL
+
+#define CRYPT_TYPE		"SHA256-based CRYPT"
+
+#define BENCHMARK_COMMENT	""
+#define BENCHMARK_LENGTH	-1
+
+#define PLAINTEXT_LENGTH	15
+#define BINARY_SIZE		32
+#define MD5_DIGEST_LENGTH 	16
+
+#define SALT_SIZE		(3+7+9+16)
+
+#define MIN_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+#define MAX_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+
+extern void sha256_crypt_gpu(crypt_sha256_password * inbuffer,
+    uint32_t * outbuffer, crypt_sha256_salt * host_salt);
+
+static crypt_sha256_password *inbuffer;//[MAX_KEYS_PER_CRYPT];			/** plaintext ciphertexts **/
+static uint32_t *outbuffer;//[MAX_KEYS_PER_CRYPT * 8];				/** calculated hashes **/
+
+static char currentsalt[64];
+static crypt_sha256_salt host_salt;
+
+void sha256_crypt_cpu(crypt_sha256_password * passwords,
+    crypt_sha256_hash * output, crypt_sha256_salt * salt);
+
+
+static struct fmt_tests tests[] = {
+	{"$5$saltstring$5B8vYYiY.CVt1RlTTf8KbXBH3hsxY/GNooZaBBGWEc5",
+	    "Hello world!"},
+	/*{"$5$UOUBPEMKQRHHRFML$zicoLpMLhBsNGtEplY/ehM0NtiAqxijiBCrolt7WBW0","jjti"},
+
+	   {"$5$XSLWLBSQUCNOWXOB$i7Ho5wUAIjsH2e2zA.WarqYLWir5nmZbUEcjK//Or7.","hgnirgayjnhvi"},
+	   {"$5$VDCTRFOIDQXRQVHR$uolqT0wEwU.pvI9jq5xU457JQpiwTTKX3PB/9RS4/h4","o"},
+	   {"$5$WTYWNCYHNPMXPG$UwZyrq0irhWs4OcLKcqSbFdktZaNAD2by1CiNNw7oID","tcepf"},
+	   {"$5$DQUHKJNMVOEBGBG$91u2d/jMN5QuW3/kBEPG0xC2G8y1TuDU7SGAUYTX.y0","wbfhoc"},
+
+
+	   {"$5$saltstring$0Az3qME7zTXm78kfHrR2OtT8WOu2gd8bcVn/9Y.3l/7", "john"},
+
+	   {"$5$saltstring$7cz4bTeQ7MnNssphNhFVrITtuJYY/1tdvLL2uzLvOk8","a"},
+	   {"$5$saltstring$4Wjlxdm/Hbpo8ZQzKFazuvfUZPVVUQn6v1oPTX3nwX/","ab"},
+	   {"$5$saltstring$tDHA0KPsYQ8V.LDB1/fgW7cvROod5ZajSrx1tZU2JG9","abc"},
+	   {"$5$saltstring$LfhGTHVGfbAkxy/xKLgvSfXyeE7hZheoMRKhjfvNF6.","abcd"},
+	   {"$5$saltstring$Qg0Xm9f2VY.ePLAwNXnOPU/s8btLptK/tEU/gFnn8BD","abcde"},
+	   {"$5$saltstring$2Snf.yaHnLnLI3Qhsk2S119X4vKbwQyiTMOHp3Oy7F5","abcdef"},
+	   {"$5$saltstring$4Y5UR.6zwplRx6y93NJVyNkxqdlyT64EV68F2mCrZ16","abcdefg"},
+	   {"$5$saltstring$bEM3iuUR.CTgy8Wygh4zu.CAgmlwx3uxm3dGA34.Ij4","abcdefgh"},
+	   {"$5$saltstring$1/OrKXZSFlaEE2DKMhKKE8qCld5X0Ez0vtz5TvO3U3D","abcdefghi"},
+	   {"$5$saltstring$1IbZU70/Wo9m1b40ha6Ao8d.v6Ja0.bAFg5/QFVzoX/","abcdefghij"},
+
+	   {"$5$saltstring$S4gCgloAzqAXE5sRz9DShPvaXrwt4vjDJ4fYgIMbLo1","abcdefghijk"},
+	   {"$5$saltstring$AFNSzsWaoMDvt7lk2bx0rPapzCz2zGahXDdFeoXrNE9","abcdefghijkl"},
+	   {"$5$saltstring$QfHc8JBd2DfyloVL0YLDa23Dc67N9mbdYqyRJQlFqZ5","abcdefghijklm"},
+	   {"$5$saltstring$XKHiS.SSJ545PvJJr2t.HyUpmPZDAIT8fVvzr/HGhd0","abcdefghijklmn"},
+	   {"$5$saltstring$VxW44bFDcvixlQoTE4E.k5c8v1w0fGMyZ4tn8nGcWn0","abcdefghijklmno"},
+
+	   {"$5$QSTVVEKDIDYRNK$4j8TST.29P07GHASD.BUHd0UTaFz7h.Mz//zcHokoZ5","cgyihfkqk"},
+
+	 */
+
+	//{"$5$rounds=5000$abcdefghijklmnop$BAYQep7SsuSczAeXlks3F54SpxMUUludHi1C4JVOqpD","abcdefghijklmno"},
+	{NULL}
+};
+
+static void cleanup()
+{
+ free(inbuffer);
+ free(outbuffer);
+}
+
+static void init(struct fmt_main *pFmt)
+{
+  //Alocate memory for hashes and passwords
+  inbuffer=(crypt_sha256_password*)malloc(sizeof(crypt_sha256_password)*MAX_KEYS_PER_CRYPT);
+  outbuffer=(uint32_t*)malloc(sizeof(uint32_t)*MAX_KEYS_PER_CRYPT*8);
+  check_mem_allocation(inbuffer,outbuffer);
+  atexit(cleanup);
+  //Initialize CUDA
+  cuda_init(gpu_id);
+}
+
+static int valid(char *ciphertext,struct fmt_main *pFmt)
+{
+	uint32_t i, j;
+	int len = strlen(ciphertext);
+
+	if (strncmp(ciphertext, "$5$", 3) != 0)
+		return 0;
+	char *p = strrchr(ciphertext, '$');
+	if (p == NULL)
+		return 0;
+	for (i = p - ciphertext + 1; i < len; i++) {
+		int found = 0;
+		for (j = 0; j < 64; j++)
+			if (itoa64[j] == ARCH_INDEX(ciphertext[i])) {
+				found = 1;
+				break;
+			}
+		if (found == 0)
+			return 0;
+	}
+	if (len - (p - ciphertext + 1) != 43)
+		return 0;
+	return 1;
+};
+
+static int findb64(char c)
+{
+	int ret = ARCH_INDEX(atoi64[(uint8_t) c]);
+	return ret != 0x7f ? ret : 0;
+}
+
+static void magic(char *crypt, char *alt)
+{
+
+#define _24bit_from_b64(I,B2,B1,B0) \
+  {\
+      unsigned char c1=findb64(crypt[I+0]);\
+      unsigned char c2=findb64(crypt[I+1]);\
+      unsigned char c3=findb64(crypt[I+2]);\
+      unsigned char c4=findb64(crypt[I+3]);\
+      unsigned int w=c4<<18|c3<<12|c2<<6|c1;\
+      unsigned char b2=w&0xff;w>>=8;\
+      unsigned char b1=w&0xff;w>>=8;\
+      unsigned char b0=w&0xff;w>>=8;\
+      alt[B2]=b0;\
+      alt[B1]=b1;\
+      alt[B0]=b2;\
+  }
+
+	_24bit_from_b64(0, 0, 10, 20);
+	_24bit_from_b64(4, 21, 1, 11);
+	_24bit_from_b64(8, 12, 22, 2);
+	_24bit_from_b64(12, 3, 13, 23);
+	_24bit_from_b64(16, 24, 4, 14);
+	_24bit_from_b64(20, 15, 25, 5);
+	_24bit_from_b64(24, 6, 16, 26);
+	_24bit_from_b64(28, 27, 7, 17);
+	_24bit_from_b64(32, 18, 28, 8);
+	_24bit_from_b64(36, 9, 19, 29);
+	unsigned int w =
+	    findb64(crypt[42]) << 12 | findb64(crypt[41]) << 6 |
+	    findb64(crypt[40]);
+	alt[30] = w & 0xff;
+	w >>= 8;
+	alt[31] = w & 0xff;
+	w >>= 8;
+}
+
+
+static void *binary(char *ciphertext)
+{
+	static char b[BINARY_SIZE];
+	memset(b, 0, BINARY_SIZE);
+	char *p = strrchr(ciphertext, '$');
+	if(p!=NULL)
+	magic(p+1, b);
+	return (void *) b;
+}
+
+
+static void *salt(char *ciphertext)
+{
+	int end = 0, i, len = strlen(ciphertext);
+	for (i = len - 1; i >= 0; i--)
+		if (ciphertext[i] == '$') {
+			end = i;
+			break;
+
+		}
+
+	static unsigned char ret[64];
+	for (i = 0; i < end; i++)
+		ret[i] = ciphertext[i];
+	ret[end] = 0;
+	return (void *) ret;
+}
+
+static int binary_hash_0(void *binary)
+{
+	return (((ARCH_WORD_32 *) binary)[0] & 0xf);
+}
+
+static int binary_hash_1(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xff;
+}
+
+static int binary_hash_2(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfff;
+}
+
+static int binary_hash_3(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xffff;
+}
+
+static int binary_hash_4(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfffff;
+}
+
+static int binary_hash_5(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xffffff;
+}
+
+static int binary_hash_6(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0x7ffffff;
+}
+
+
+static void set_salt(void *salt)
+{
+	unsigned char *s = salt;
+	int len = strlen(salt);
+	memcpy(currentsalt,s,len+1);
+	unsigned char offset = 0;
+	host_salt.rounds = ROUNDS_DEFAULT;
+
+	if (strncmp((char *) "$5$", (char *) currentsalt, 3) == 0)
+		offset += 3;
+
+	if (strncmp((char *) currentsalt + offset, (char *) "rounds=", 7) == 0) {
+		const char *num = currentsalt + offset + 7;
+		char *endp;
+		unsigned long int srounds = strtoul(num, &endp, 10);
+
+		if (*endp == '$') {
+			endp += 1;
+			host_salt.rounds =
+			    MAX(ROUNDS_MIN, MIN(srounds, ROUNDS_MAX));
+		}
+		offset = endp - currentsalt;
+	}
+	memcpy(host_salt.salt, currentsalt + offset, 16);
+	host_salt.saltlen = strlen(host_salt.salt);
+}
+
+static void set_key(char *key, int index)
+{
+	int len = strlen(key);
+	inbuffer[index].length = len;
+	memcpy(inbuffer[index].v, key, len);
+}
+
+static char *get_key(int index)
+{
+	static char ret[PLAINTEXT_LENGTH + 1];
+	memcpy(ret, inbuffer[index].v, PLAINTEXT_LENGTH);
+	ret[inbuffer[index].length] = '\0';
+	return ret;
+}
+
+static void crypt_all(int count)
+{
+	sha256_crypt_gpu(inbuffer, outbuffer, &host_salt);
+}
+
+static int get_hash_0(int index)
+{
+
+	uint32_t *out = outbuffer;
+	return out[hash_addr(0, index)] & 0xf;
+}
+
+static int get_hash_1(int index)
+{
+	uint32_t *out = outbuffer;
+	return out[hash_addr(0, index)] & 0xff;
+}
+
+static int get_hash_2(int index)
+{
+	uint32_t *out = outbuffer;
+	return out[hash_addr(0, index)] & 0xfff;
+}
+
+static int get_hash_3(int index)
+{
+	uint32_t *out = outbuffer;
+	return out[hash_addr(0, index)] & 0xffff;
+}
+
+static int get_hash_4(int index)
+{
+	uint32_t *out = outbuffer;
+	return out[hash_addr(0, index)] & 0xfffff;
+}
+
+static int get_hash_5(int index)
+{
+	uint32_t *out = outbuffer;
+	return out[hash_addr(0, index)] & 0xffffff;
+}
+
+static int get_hash_6(int index)
+{
+	uint32_t *out = outbuffer;
+	return out[hash_addr(0, index)] & 0x7ffffff;
+}
+
+static int cmp_all(void *binary, int count)
+{
+	uint32_t i;
+	uint32_t b = ((uint32_t *) binary)[0];
+	uint32_t *out = outbuffer;
+	for (i = 0; i < count; i++)
+		if (b == out[hash_addr(0, i)])
+			return 1;
+	return 0;
+}
+
+static int cmp_one(void *binary, int index)
+{
+	int i;
+	uint32_t *t = (uint32_t *) binary;
+	uint32_t *out = outbuffer;
+
+	for (i = 0; i < 8; i++)
+		if (t[i] != out[hash_addr(i, index)])
+			return 0;
+	return 1;
+}
+
+static int cmp_exact(char *source, int count)
+{
+	return 1;
+}
+
+struct fmt_main fmt_cuda_cryptsha256 = {
+	{
+		    FORMAT_LABEL,
+		    FORMAT_NAME,
+		    CRYPT_TYPE,
+		    BENCHMARK_COMMENT,
+		    BENCHMARK_LENGTH,
+		    PLAINTEXT_LENGTH,
+		    BINARY_SIZE,
+		    SALT_SIZE,
+		    MIN_KEYS_PER_CRYPT,
+		    MAX_KEYS_PER_CRYPT,
+		    FMT_CASE | FMT_8_BIT,
+	    tests},
+	{
+		    init,
+		    fmt_default_prepare,
+		    valid,
+		    fmt_default_split,
+		    binary,
+		    salt,
+		    {
+				binary_hash_0,
+				binary_hash_1,
+				binary_hash_2,
+				binary_hash_3,
+			binary_hash_4,
+		    binary_hash_5,
+		    binary_hash_6},
+		    fmt_default_salt_hash,
+		    set_salt,
+		    set_key,
+		    get_key,
+		    fmt_default_clear_keys,
+		    crypt_all,
+		    {
+				get_hash_0,
+				get_hash_1,
+				get_hash_2,
+				get_hash_3,
+			get_hash_4,
+		    get_hash_5,
+		    get_hash_6},
+		    cmp_all,
+		    cmp_one,
+	    cmp_exact}
+};
diff -urpN john-1.7.9-jumbo-5/src/cuda_cryptsha512.h john-1.7.9-jumbo-5-cuda-2/src/cuda_cryptsha512.h
--- john-1.7.9-jumbo-5/src/cuda_cryptsha512.h	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda_cryptsha512.h	2011-08-30 14:36:37.000000000 +0000
@@ -0,0 +1,80 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#ifndef _CRYPTSHA512_H
+#define _CRYPTSHA512_H
+#include <assert.h>
+#include "common.h"
+#include <stdint.h>
+#include <stdbool.h>
+
+#define uint8_t  unsigned char
+#define uint32_t unsigned int
+#define uint64_t unsigned long long int
+
+#define BLOCKS 14
+#define THREADS 128//set 256 on fermi
+
+#define KEYS_PER_CRYPT BLOCKS*THREADS
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define SALT_LEN_MAX 16
+#define ROUNDS_DEFAULT 5000
+#define ROUNDS_MIN 1000
+#define ROUNDS_MAX 999999999
+
+static const char sha512_salt_prefix[] = "$6$";
+static const char sha256_rounds_prefix[] = "rounds=";
+
+
+#define rol(x,n) ((x << n) | (x >> (64-n)))
+#define ror(x,n) ((x >> n) | (x << (64-n)))
+#define Ch(x,y,z) ((x & y) ^ ( (~x) & z))
+#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
+#define Sigma0(x) ((ror(x,28))  ^ (ror(x,34)) ^ (ror(x,39)))
+#define Sigma1(x) ((ror(x,14))  ^ (ror(x,18)) ^ (ror(x,41)))
+#define sigma0(x) ((ror(x,1))  ^ (ror(x,8)) ^(x>>7))
+#define sigma1(x) ((ror(x,19)) ^ (ror(x,61)) ^(x>>6))
+
+# define SWAP32(n) \
+    (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
+
+# define SWAP64(n) \
+  (((n) << 56)					\
+   | (((n) & 0xff00) << 40)			\
+   | (((n) & 0xff0000) << 24)			\
+   | (((n) & 0xff000000) << 8)			\
+   | (((n) >> 8) & 0xff000000)			\
+   | (((n) >> 24) & 0xff0000)			\
+   | (((n) >> 40) & 0xff00)			\
+   | ((n) >> 56))
+
+#define hash_addr(j,idx) (((j)*(KEYS_PER_CRYPT))+(idx))
+
+
+typedef struct {
+	uint64_t H[8];
+	uint32_t total;
+	uint32_t buflen;
+	uint8_t buffer[128];	//1024bits
+} sha512_ctx;
+
+typedef struct {
+	uint32_t rounds;
+	uint8_t saltlen;
+	char salt[63];
+} crypt_sha512_salt;
+
+typedef struct {
+	uint8_t length;
+	uint8_t v[16];
+} crypt_sha512_password;
+
+typedef struct {
+	uint64_t v[8];		//512 bits
+} crypt_sha512_hash;
+
+#endif
diff -urpN john-1.7.9-jumbo-5/src/cuda_cryptsha512_fmt.c john-1.7.9-jumbo-5-cuda-2/src/cuda_cryptsha512_fmt.c
--- john-1.7.9-jumbo-5/src/cuda_cryptsha512_fmt.c	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda_cryptsha512_fmt.c	2012-02-29 19:33:05.595196829 +0000
@@ -0,0 +1,379 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <string.h>
+#include "arch.h"
+#include "formats.h"
+#include "common.h"
+#include "misc.h"
+#include "cuda_cryptsha512.h"
+#include "cuda_common.h"
+
+#define FORMAT_LABEL		"cryptsha512-cuda"
+#define FORMAT_NAME		FORMAT_LABEL
+
+#define PHPASS_TYPE		"SHA512-based CRYPT"
+
+#define BENCHMARK_COMMENT	""
+#define BENCHMARK_LENGTH	-1
+
+#define PLAINTEXT_LENGTH	15
+#define CIPHERTEXT_LENGTH	34
+#define BINARY_SIZE		(3+16+86)
+#define MD5_DIGEST_LENGTH 	16
+
+
+#define SALT_SIZE		(3+7+9+16)
+
+#define MIN_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+#define MAX_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+
+static crypt_sha512_password *inbuffer;		/** plaintext ciphertexts **/
+static crypt_sha512_hash *outbuffer;		/** calculated hashes **/
+
+void sha512_crypt_gpu(crypt_sha512_password * inbuffer,
+    crypt_sha512_hash * outbuffer, crypt_sha512_salt * host_salt);
+
+static char currentsalt[64];
+static crypt_sha512_salt _salt;
+
+static struct fmt_tests tests[] = {
+
+	{"$6$saltstring$svn8UoSVapNtMuq1ukKS4tPQd8iKwSMHWjl/O817G3uBnIFNjnQJuesI68u4OTLiBFdcbYEdFCoEOfaS35inz1", "Hello world!"},
+	//{"$6$saltstring$fgNTR89zXnDUV97U5dkWayBBRaB0WIBnu6s4T7T8Tz1SbUyewwiHjho25yWVkph2p18CmUkqXh4aIyjPnxdgl0","john"},
+	//{"$6$saltstring$MO53nAXQUKXVLlsbiXyPgMsR6q10N7eF7sPvanwdXnEeCj5kE3eYaRvFv0wVW1UZ4SnNTzc1v4OCOq1ASDQZY0","a"},
+	//{"$6$saltstring$q.eQ9PCFPe/tOHJPT7lQwnVQ9znjTT89hsg1NWHCRCAMsbtpBLbg1FLq7xo1BaCM0y/z46pXv4CGESVWQlOk30","ab"},
+	//{"$6$saltstring$pClZZISU0lxEwKr1z81EuJdiMLwWncjShXap25hiDGVMnCvlF5zS3ysvBdVRZqPDCdSTj06rwjrLX3bOS1Cak/","abc"},
+	//{"$6$saltstring$FJJAXr3hydAPJXM311wrzFhzheQ6LJHrufrYl2kBMnRD2pUi6jdS.fSBJ2J1Qfhcz9tPnlJOzeL7aIYi/dytg.","abcd"},
+	//{"$6$saltstring$XDecvJ/rq8tgbE1Pfuu1cTiZlhnbF5OA/vyP6HRPpDengVqhB38vbZTK/BDfPP6XBgvMzE.q9rj6Ck5blj/FK.","abcde"},
+	//{"$6$saltstring$hYPEYaHik6xSMGV1lDWhF0EerSUyCsC150POu9ksaftUWKWwV8TuqSeSLZUkUhjGy7cn.max5qd5IPSICeklL1","abcdef"},
+	//{"$6$saltstring$YBQ5J5EMRuC6k7B2GTsNaXx8u/957XMB.slQmY/lOjKd1zTIQF.ulLmy8O0VnJJ3cV.1pjP.KCgEjjMpz4pnS1","abcdefg"},
+	//{"$6$saltstring$AQapizZGhnIjtXF8OCvbSxQJBuOKvpzf1solf9b76wXFX0VRkqids5AC4YSibbhMSX0z4463sq1uAd9LvKNuO/","abcdefgh"},
+	//{"$6$saltstring$xc66FVXO.Zvv5pS02B4bCmJh5FCBAZpqTK3NoFxTU9U5b6BokbHwmeqQfMqrrkB3j9CXhCzgvC/pvoGPM1xgM1","abcdefghi"},
+	//{"$6$saltstring$Xet3A8EEzzSuL9hZZ31SfDVPT87qz3a.xxcH7eU50aqARlmXywdlfJ.6Cp/TFG1RcguqwrfUbZBbFn1BQ93Kv.","abcdefghij"},
+
+	//{"$6$saltstring$MeML1shJ8psyh5R9YJUZNYNqKzYeBvIsITqc/VqJfUDs8xO5YoUhCn4Db7CXuarMDVkBzIUfYq1d8Tj/T1WBU0","abcdefghijk"},
+	//{"$6$saltstring$i/3NHph8ZV2klLuOc5yX5kOnJWj9zuWbKiaa/NNEkYpNyamdQS1c7n2XQS3.B2Cs/eVyKwHf62PnOayqLLTOZ.","abcdefghijkl"},
+	//{"$6$saltstring$l2IxCS4o2S/vud70F1S5Z7H1WE67QFIXCYqskySdLFjjorEJdAnAp1ZqdgfNuZj2orjmeVDTsTXHpZ1IoxSKd.","abcdefghijklm"},
+	//{"$6$saltstring$PFzjspQs/CDXWALauDTav3u5bHB3n21xWrfwjnjpFO5eM5vuP0qKwDCXmlyZ5svEgsIH1oiZiGlRqkcBP5PiB.","abcdefghijklmn"},
+	//{"$6$saltstring$rdREv5Pd9C9YGtg.zXEQMb6m0sPeq4b6zFW9oWY9w4ZltmjH3yzMLgl9iBuez9DFFUvF5nJH3Y2xidiq1dH9M.", "abcdefghijklmno"},
+
+	{NULL}
+};
+static void cleanup()
+{
+ free(inbuffer);
+ free(outbuffer);
+}
+static void init(struct fmt_main *pFmt)
+{
+  //Alocate memory for hashes and passwords
+  inbuffer=(crypt_sha512_password*)malloc(sizeof(crypt_sha512_password)*MAX_KEYS_PER_CRYPT);
+  outbuffer=(crypt_sha512_hash*)malloc(sizeof(crypt_sha512_hash)*MAX_KEYS_PER_CRYPT);
+  check_mem_allocation(inbuffer,outbuffer);
+  atexit(cleanup);
+  //Initialize CUDA
+  cuda_init(gpu_id);
+}
+
+static int valid(char *ciphertext,struct fmt_main *pFmt)
+{
+	uint32_t i, j;
+	int len = strlen(ciphertext);
+
+	if (strncmp(ciphertext, "$6$", 3) != 0)
+		return 0;
+	char *p = strrchr(ciphertext, '$');
+	if (p == NULL)
+		return 0;
+	for (i = p - ciphertext + 1; i < len; i++) {
+		int found = 0;
+		for (j = 0; j < 64; j++)
+			if (itoa64[j] == ARCH_INDEX(ciphertext[i]))
+				found = 1;
+		if (found == 0) {
+			puts("not found");
+			return 0;
+		}
+	}
+	if (len - (p - ciphertext + 1) != 86)
+		return 0;
+	return 1;
+};
+
+static int findb64(char c)
+{
+	int ret = ARCH_INDEX(atoi64[(uint8_t) c]);
+	return ret != 0x7f ? ret : 0;
+}
+
+static void magic(char *crypt, unsigned char *alt)
+{
+
+#define _24bit_from_b64(I,B2,B1,B0) \
+  {\
+      unsigned char c1=findb64(crypt[I+0]);\
+      unsigned char c2=findb64(crypt[I+1]);\
+      unsigned char c3=findb64(crypt[I+2]);\
+      unsigned char c4=findb64(crypt[I+3]);\
+      unsigned int w=c4<<18|c3<<12|c2<<6|c1;\
+      unsigned char b2=w&0xff;w>>=8;\
+      unsigned char b1=w&0xff;w>>=8;\
+      unsigned char b0=w&0xff;w>>=8;\
+      alt[B2]=b0;\
+      alt[B1]=b1;\
+      alt[B0]=b2;\
+  }
+	_24bit_from_b64(0, 0, 21, 42);
+	_24bit_from_b64(4, 22, 43, 1);
+	_24bit_from_b64(8, 44, 2, 23);
+	_24bit_from_b64(12, 3, 24, 45);
+	_24bit_from_b64(16, 25, 46, 4);
+	_24bit_from_b64(20, 47, 5, 26);
+	_24bit_from_b64(24, 6, 27, 48);
+	_24bit_from_b64(28, 28, 49, 7);
+	_24bit_from_b64(32, 50, 8, 29);
+	_24bit_from_b64(36, 9, 30, 51);
+	_24bit_from_b64(40, 31, 52, 10);
+	_24bit_from_b64(44, 53, 11, 32);
+	_24bit_from_b64(48, 12, 33, 54);
+	_24bit_from_b64(52, 34, 55, 13);
+	_24bit_from_b64(56, 56, 14, 35);
+	_24bit_from_b64(60, 15, 36, 57);
+	_24bit_from_b64(64, 37, 58, 16);
+	_24bit_from_b64(68, 59, 17, 38);
+	_24bit_from_b64(72, 18, 39, 60);
+	_24bit_from_b64(76, 40, 61, 19);
+	_24bit_from_b64(80, 62, 20, 41);
+
+
+	uint32_t w = findb64(crypt[85]) << 6 | findb64(crypt[84]) << 0;
+	alt[63] = (w & 0xff);
+}
+
+
+static void *binary(char *ciphertext)
+{
+	static unsigned char b[BINARY_SIZE];
+	memset(b, 0, BINARY_SIZE);
+	char *p = strrchr(ciphertext, '$');
+	if(p!=NULL)
+	  magic(p+1, b);
+	return (void *) b;
+}
+
+
+static void *salt(char *ciphertext)
+{
+	int end = 0, i, len = strlen(ciphertext);
+	for (i = len - 1; i >= 0; i--)
+		if (ciphertext[i] == '$') {
+			end = i;
+			break;
+		}
+
+	static unsigned char ret[50];
+	for (i = 0; i < end; i++)
+		ret[i] = ciphertext[i];
+	ret[end] = 0;
+	return (void *) ret;
+}
+
+static int binary_hash_0(void *binary)
+{
+	return (((uint64_t *) binary)[0] & 0xf);
+}
+
+static int binary_hash_1(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xff;
+}
+
+static int binary_hash_2(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfff;
+}
+
+static int binary_hash_3(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xffff;
+}
+
+static int binary_hash_4(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfffff;
+}
+
+static int binary_hash_5(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xffffff;
+}
+
+static int binary_hash_6(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0x7ffffff;
+}
+
+static void set_salt(void *salt)
+{
+	unsigned char *s = salt;
+	int len = strlen(salt);
+	memcpy(currentsalt,s,len+1);
+	unsigned char offset = 0;
+	_salt.rounds = ROUNDS_DEFAULT;
+
+	if (strncmp((char *) "$6$", (char *) currentsalt, 3) == 0)
+		offset += 3;
+
+	if (strncmp((char *) currentsalt + offset, (char *) "rounds=", 7) == 0) {
+		const char *num = currentsalt + offset + 7;
+		char *endp;
+		unsigned long int srounds = strtoul(num, &endp, 10);
+
+		if (*endp == '$') {
+			endp += 1;
+			_salt.rounds =
+			    MAX(ROUNDS_MIN, MIN(srounds, ROUNDS_MAX));
+		}
+		offset = endp - currentsalt;
+	}
+	memcpy(_salt.salt, currentsalt + offset, 16);
+	_salt.saltlen = strlen(_salt.salt);
+}
+
+static void set_key(char *key, int index)
+{
+	int len = strlen(key);
+	inbuffer[index].length = len;
+	memcpy(inbuffer[index].v, key, len);
+}
+
+static char *get_key(int index)
+{
+	static char ret[PLAINTEXT_LENGTH + 1];
+	memcpy(ret, inbuffer[index].v, PLAINTEXT_LENGTH);
+	ret[inbuffer[index].length] = '\0';
+	return ret;
+}
+
+
+static void gpu_crypt_all(int count)
+{
+	sha512_crypt_gpu(inbuffer, outbuffer, &_salt);
+}
+
+static void crypt_all(int count)
+{
+	gpu_crypt_all(count);
+}
+
+static int get_hash_0(int index)
+{
+	return outbuffer[index].v[0] & 0xf;
+}
+
+static int get_hash_1(int index)
+{
+	return outbuffer[index].v[0] & 0xff;
+}
+
+static int get_hash_2(int index)
+{
+	return outbuffer[index].v[0] & 0xfff;
+}
+
+static int get_hash_3(int index)
+{
+	return outbuffer[index].v[0] & 0xffff;
+}
+
+static int get_hash_4(int index)
+{
+	return outbuffer[index].v[0] & 0xfffff;
+}
+
+static int get_hash_5(int index)
+{
+	return outbuffer[index].v[0] & 0xffffff;
+}
+static int get_hash_6(int index)
+{
+	return outbuffer[index].v[0] & 0x7ffffff;
+}
+
+
+static int cmp_all(void *binary, int count)
+{
+	uint32_t i;
+	uint64_t b = ((uint64_t *) binary)[0];
+	for (i = 0; i < count; i++)
+		if (b == outbuffer[i].v[0])
+			return 1;
+	return 0;
+}
+
+static int cmp_one(void *binary, int index)
+{
+	int i;
+	uint64_t *t = (uint64_t *) binary;
+	for (i = 0; i < 8; i++) {
+		if (t[i] != outbuffer[index].v[i])
+			return 0;
+	}
+	return 1;
+}
+
+static int cmp_exact(char *source, int count)
+{
+	return 1;
+}
+
+struct fmt_main fmt_cuda_cryptsha512 = {
+	{
+		    FORMAT_LABEL,
+		    FORMAT_NAME,
+		    PHPASS_TYPE,
+		    BENCHMARK_COMMENT,
+		    BENCHMARK_LENGTH,
+		    PLAINTEXT_LENGTH,
+		    BINARY_SIZE,
+		    SALT_SIZE,
+		    MIN_KEYS_PER_CRYPT,
+		    MAX_KEYS_PER_CRYPT,
+		    FMT_CASE | FMT_8_BIT,
+	    tests},
+	{
+		    init,
+		    fmt_default_prepare,
+		    valid,
+		    fmt_default_split,
+		    binary,
+		    salt,
+		    {
+				binary_hash_0,
+				binary_hash_1,
+				binary_hash_2,
+				binary_hash_3,
+			binary_hash_4,
+		    binary_hash_5,
+		    binary_hash_6},
+		    fmt_default_salt_hash,
+		    set_salt,
+		    set_key,
+		    get_key,
+		    fmt_default_clear_keys,
+		    crypt_all,
+		    {
+				get_hash_0,
+				get_hash_1,
+				get_hash_2,
+				get_hash_3,
+			get_hash_4,
+		    get_hash_5,
+		    get_hash_6},
+		    cmp_all,
+		    cmp_one,
+	    cmp_exact}
+};
diff -urpN john-1.7.9-jumbo-5/src/cuda_mscash.h john-1.7.9-jumbo-5-cuda-2/src/cuda_mscash.h
--- john-1.7.9-jumbo-5/src/cuda_mscash.h	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda_mscash.h	2012-02-17 15:39:45.503475567 +0000
@@ -0,0 +1,56 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba
+* <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+* Based on Alain Espinosa implementation http://openwall.info/wiki/john/MSCash
+*/
+#ifndef _MSCASH_H
+#define _MSCASH_H
+
+#define uint8_t			unsigned char
+#define uint16_t		unsigned short
+#define uint32_t		unsigned int
+
+#define THREADS			512
+#define BLOCKS			21
+#define	KEYS_PER_CRYPT		(THREADS)*(BLOCKS)
+
+#define BINARY_SIZE		16
+#define PLAINTEXT_LENGTH	15
+#define SALT_SIZE		sizeof(mscash_salt)
+
+#define MIN_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+#define MAX_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+
+#define MAX(x,y)		((x) > (y) ? (x) : (y))
+#define MIN(x,y)		((x) < (y) ? (x) : (y))
+#define SHOW(x)			(printf("%s = %08x\n",#x,(x)))
+# define SWAP(n) \
+    (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
+
+#define INIT_A			0x67452301
+#define INIT_B			0xefcdab89
+#define INIT_C			0x98badcfe
+#define INIT_D			0x10325476
+
+#define SQRT_2			0x5a827999
+#define SQRT_3			0x6ed9eba1
+
+static const char mscash_prefix[] = "M$";
+
+typedef struct {
+	uint8_t length;
+	uint8_t v[15];
+} mscash_password;
+
+typedef struct {
+	uint32_t v[8];
+} mscash_hash;
+
+typedef struct {
+	uint8_t length;
+	uint8_t salt[15];
+} mscash_salt;
+
+#endif
diff -urpN john-1.7.9-jumbo-5/src/cuda_mscash2.h john-1.7.9-jumbo-5-cuda-2/src/cuda_mscash2.h
--- john-1.7.9-jumbo-5/src/cuda_mscash2.h	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda_mscash2.h	2011-09-03 23:57:55.000000000 +0000
@@ -0,0 +1,202 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+* Based on S3nf implementation http://openwall.info/wiki/john/MSCash2
+*/
+#ifndef _MSCASH2_H
+#define _MSCASH2_H
+
+#define uint8_t			unsigned char
+#define uint16_t		unsigned short
+#define uint32_t		unsigned int
+
+#define THREADS			128//set 256 on fermi
+#define BLOCKS			14
+#define	KEYS_PER_CRYPT		(THREADS)*(BLOCKS)
+
+#define BINARY_SIZE		16
+#define PLAINTEXT_LENGTH	15
+#define SALT_SIZE		sizeof(mscash2_salt)
+
+#define MIN_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+#define MAX_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+
+#define MAX(x,y)		((x) > (y) ? (x) : (y))
+#define MIN(x,y)		((x) < (y) ? (x) : (y))
+# define SWAP(n) \
+    (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
+
+#define ITERATIONS		10240
+
+#define INIT_A			0x67452301
+#define INIT_B			0xefcdab89
+#define INIT_C			0x98badcfe
+#define INIT_D			0x10325476
+#define INIT_E			0xc3d2e1f0
+
+#define SQRT_2			0x5a827999
+#define SQRT_3			0x6ed9eba1
+
+#define SHA1_DIGEST_LENGTH	20
+
+#define K1			0x5a827999
+#define K2			0x6ed9eba1
+#define K3			0x8f1bbcdc
+#define K4			0xca62c1d6
+
+#define F1(x,y,z)		(z ^ (x & (y ^ z)))
+#define F2(x,y,z)		(x ^ y ^ z)
+#define F3(x,y,z)		((x & y) | (z & (x | y)))
+#define F4(x,y,z)		(x ^ y ^ z)
+
+#ifndef GET_WORD_32_BE
+#define GET_WORD_32_BE(n,b,i)                           \
+{                                                       \
+    (n) = ( (unsigned long) (b)[(i)    ] << 24 )        \
+        | ( (unsigned long) (b)[(i) + 1] << 16 )        \
+        | ( (unsigned long) (b)[(i) + 2] <<  8 )        \
+        | ( (unsigned long) (b)[(i) + 3]       );       \
+}
+#endif
+
+#ifndef PUT_WORD_32_BE
+#define PUT_WORD_32_BE(n,b,i)                           \
+{                                                       \
+    (b)[(i)    ] = (unsigned char) ( (n) >> 24 );       \
+    (b)[(i) + 1] = (unsigned char) ( (n) >> 16 );       \
+    (b)[(i) + 2] = (unsigned char) ( (n) >>  8 );       \
+    (b)[(i) + 3] = (unsigned char) ( (n)       );       \
+}
+#endif
+
+#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))
+
+#define R(t)                                            \
+(                                                       \
+    temp = W[(t -  3) & 0x0F] ^ W[(t - 8) & 0x0F] ^     \
+           W[(t - 14) & 0x0F] ^ W[ t      & 0x0F],      \
+    ( W[t & 0x0F] = S(temp,1) )                         \
+)
+
+#define P1(a,b,c,d,e,x)                                  \
+{                                                       \
+    e += S(a,5) + F1(b,c,d) + K1 + x; b = S(b,30);        \
+}
+
+#define P2(a,b,c,d,e,x)                                  \
+{                                                       \
+    e += S(a,5) + F2(b,c,d) + K2 + x; b = S(b,30);        \
+}
+
+#define P3(a,b,c,d,e,x)                                  \
+{                                                       \
+    e += S(a,5) + F3(b,c,d) + K3 + x; b = S(b,30);        \
+}
+
+#define P4(a,b,c,d,e,x)                                  \
+{                                                       \
+    e += S(a,5) + F4(b,c,d) + K4 + x; b = S(b,30);        \
+}
+
+#define SHA1(A,B,C,D,E,W) \
+    P1(A, B, C, D, E, W[0] );\
+    P1(E, A, B, C, D, W[1] );\
+    P1(D, E, A, B, C, W[2] );\
+    P1(C, D, E, A, B, W[3] );\
+    P1(B, C, D, E, A, W[4] );\
+    P1(A, B, C, D, E, W[5] );\
+    P1(E, A, B, C, D, W[6] );\
+    P1(D, E, A, B, C, W[7] );\
+    P1(C, D, E, A, B, W[8] );\
+    P1(B, C, D, E, A, W[9] );\
+    P1(A, B, C, D, E, W[10]);\
+    P1(E, A, B, C, D, W[11]);\
+    P1(D, E, A, B, C, W[12]);\
+    P1(C, D, E, A, B, W[13]);\
+    P1(B, C, D, E, A, W[14]);\
+    P1(A, B, C, D, E, W[15]);\
+    P1(E, A, B, C, D, R(16));\
+    P1(D, E, A, B, C, R(17));\
+    P1(C, D, E, A, B, R(18));\
+    P1(B, C, D, E, A, R(19));\
+    P2(A, B, C, D, E, R(20));\
+    P2(E, A, B, C, D, R(21));\
+    P2(D, E, A, B, C, R(22));\
+    P2(C, D, E, A, B, R(23));\
+    P2(B, C, D, E, A, R(24));\
+    P2(A, B, C, D, E, R(25));\
+    P2(E, A, B, C, D, R(26));\
+    P2(D, E, A, B, C, R(27));\
+    P2(C, D, E, A, B, R(28));\
+    P2(B, C, D, E, A, R(29));\
+    P2(A, B, C, D, E, R(30));\
+    P2(E, A, B, C, D, R(31));\
+    P2(D, E, A, B, C, R(32));\
+    P2(C, D, E, A, B, R(33));\
+    P2(B, C, D, E, A, R(34));\
+    P2(A, B, C, D, E, R(35));\
+    P2(E, A, B, C, D, R(36));\
+    P2(D, E, A, B, C, R(37));\
+    P2(C, D, E, A, B, R(38));\
+    P2(B, C, D, E, A, R(39));\
+    P3(A, B, C, D, E, R(40));\
+    P3(E, A, B, C, D, R(41));\
+    P3(D, E, A, B, C, R(42));\
+    P3(C, D, E, A, B, R(43));\
+    P3(B, C, D, E, A, R(44));\
+    P3(A, B, C, D, E, R(45));\
+    P3(E, A, B, C, D, R(46));\
+    P3(D, E, A, B, C, R(47));\
+    P3(C, D, E, A, B, R(48));\
+    P3(B, C, D, E, A, R(49));\
+    P3(A, B, C, D, E, R(50));\
+    P3(E, A, B, C, D, R(51));\
+    P3(D, E, A, B, C, R(52));\
+    P3(C, D, E, A, B, R(53));\
+    P3(B, C, D, E, A, R(54));\
+    P3(A, B, C, D, E, R(55));\
+    P3(E, A, B, C, D, R(56));\
+    P3(D, E, A, B, C, R(57));\
+    P3(C, D, E, A, B, R(58));\
+    P3(B, C, D, E, A, R(59));\
+    P4(A, B, C, D, E, R(60));\
+    P4(E, A, B, C, D, R(61));\
+    P4(D, E, A, B, C, R(62));\
+    P4(C, D, E, A, B, R(63));\
+    P4(B, C, D, E, A, R(64));\
+    P4(A, B, C, D, E, R(65));\
+    P4(E, A, B, C, D, R(66));\
+    P4(D, E, A, B, C, R(67));\
+    P4(C, D, E, A, B, R(68));\
+    P4(B, C, D, E, A, R(69));\
+    P4(A, B, C, D, E, R(70));\
+    P4(E, A, B, C, D, R(71));\
+    P4(D, E, A, B, C, R(72));\
+    P4(C, D, E, A, B, R(73));\
+    P4(B, C, D, E, A, R(74));\
+    P4(A, B, C, D, E, R(75));\
+    P4(E, A, B, C, D, R(76));\
+    P4(D, E, A, B, C, R(77));\
+    P4(C, D, E, A, B, R(78));\
+    P4(B, C, D, E, A, R(79));
+
+static const char mscash2_prefix[] = "$DCC2$";
+
+typedef struct {
+	uint8_t length;
+	uint8_t v[15];
+	uint32_t dcc_hash[4];
+} mscash2_password;
+
+typedef struct {
+	uint32_t v[8];
+} mscash2_hash;
+
+typedef struct {
+	uint8_t length;
+	uint8_t salt[15];
+	uint8_t unicode_salt[64];
+} mscash2_salt;
+
+#endif
diff -urpN john-1.7.9-jumbo-5/src/cuda_mscash2_fmt.c john-1.7.9-jumbo-5-cuda-2/src/cuda_mscash2_fmt.c
--- john-1.7.9-jumbo-5/src/cuda_mscash2_fmt.c	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda_mscash2_fmt.c	2012-02-24 17:14:07.021188948 +0000
@@ -0,0 +1,280 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+* Based on S3nf implementation http://openwall.info/wiki/john/MSCash2
+*/
+#include <string.h>
+#include "arch.h"
+#include "formats.h"
+#include "common.h"
+#include "misc.h"
+#include "cuda_mscash2.h"
+#include "cuda_common.h"
+
+#define FORMAT_LABEL		"mscash2-cuda"
+#define FORMAT_NAME		FORMAT_LABEL
+#define ALGORITHM_NAME		"GPU"
+
+#define BENCHMARK_COMMENT	""
+#define BENCHMARK_LENGTH	-1
+
+//#define _MSCASH2_DEBUG
+
+static mscash2_password *inbuffer;
+static mscash2_hash *outbuffer;
+static mscash2_salt currentsalt;
+
+static struct fmt_tests tests[] = {
+	//{"$DCC2$test#a86012faf7d88d1fc037a69764a92cac", "password"},
+	{"$DCC2$administrator#a150f71752b5d605ef0b2a1e98945611","a"},
+	//{"$DCC2$administrator#c14eb8279e4233ec14e9d393637b65e2","ab"},
+	//{"$DCC2$administrator#8ce9c0279b4e6f226f52d559f9c2c5f3","abc"},
+	//{"$DCC2$administrator#2fc788d09fad7e26a92d12356fa44bdf","abcd"},
+	//{"$DCC2$administrator#6aa19842ffea11f0f0c89f8ca8d245bd","abcde"},
+	{NULL}
+};
+
+extern void mscash2_gpu(mscash2_password *, mscash2_hash *, mscash2_salt *);
+
+static void cleanup()
+{
+ free(inbuffer);
+ free(outbuffer);
+}
+
+static void init(struct fmt_main *pFmt)
+{
+  //Alocate memory for hashes and passwords
+  inbuffer=(mscash2_password*)malloc(sizeof(mscash2_password)*MAX_KEYS_PER_CRYPT);
+  outbuffer=(mscash2_hash*)malloc(sizeof(mscash2_hash)*MAX_KEYS_PER_CRYPT);
+  check_mem_allocation(inbuffer,outbuffer);
+  atexit(cleanup);
+  //Initialize CUDA
+  cuda_init(gpu_id);
+}
+
+static int valid(char *ciphertext,struct fmt_main *pFmt)
+{
+	if (strncmp(ciphertext, mscash2_prefix, strlen(mscash2_prefix)) != 0)
+		return 0;
+	char *hash = strrchr(ciphertext, '#') + 1;
+	int hashlength = 0;
+	if (hash == NULL)
+		return 0;
+	while (hash < ciphertext + strlen(ciphertext)) {
+		if (atoi16[ARCH_INDEX(*hash++)] == 0x7f)
+			return 0;
+		hashlength++;
+	}
+	if (hashlength != 32)
+		return 0;
+	return 1;
+}
+
+static void *binary(char *ciphertext)
+{
+	static uint32_t binary[4];
+	char *hash = strrchr(ciphertext, '#') + 1;
+	if (hash == NULL)
+		return binary;
+	int i;
+	for (i = 0; i < 4; i++) {
+		sscanf(hash + (8 * i), "%08x", &binary[i]);
+		binary[i] = SWAP(binary[i]);
+	}
+	return binary;
+
+}
+
+static void *salt(char *ciphertext)
+{
+	static mscash2_salt salt;
+	char *pos = ciphertext + strlen(mscash2_prefix);
+	int length = 0;
+	while (*pos != '#')
+		salt.salt[length++] = *pos++;
+	salt.length = length;
+	return &salt;
+}
+
+static void set_salt(void *salt)
+{
+	memcpy(&currentsalt, salt, sizeof(mscash2_salt));
+}
+
+static void set_key(char *key, int index)
+{
+	uint8_t length = strlen(key);
+	inbuffer[index].length = length;
+	memcpy(inbuffer[index].v, key, length);
+}
+
+static char *get_key(int index)
+{
+	static char ret[PLAINTEXT_LENGTH + 1];
+	uint8_t length = inbuffer[index].length;
+	memcpy(ret, inbuffer[index].v, length);
+	ret[length] = '\0';
+	return ret;
+}
+
+static void crypt_all(int count)
+{
+	mscash2_gpu(inbuffer, outbuffer, &currentsalt);
+}
+
+static int binary_hash_0(void *binary)
+{
+#ifdef _MSCASH2_DEBUG
+	puts("binary");
+	uint32_t i, *b = binary;
+	for (i = 0; i < 4; i++)
+		printf("%08x ", b[i]);
+	puts("");
+#endif
+	return (((uint32_t *) binary)[0] & 0xf);
+}
+
+static int binary_hash_1(void *binary)
+{
+	return ((uint32_t *) binary)[0] & 0xff;
+}
+
+static int binary_hash_2(void *binary)
+{
+	return ((uint32_t *) binary)[0] & 0xfff;
+}
+
+static int binary_hash_3(void *binary)
+{
+	return ((uint32_t *) binary)[0] & 0xffff;
+}
+
+static int binary_hash_4(void *binary)
+{
+	return ((uint32_t *) binary)[0] & 0xfffff;
+}
+
+static int binary_hash_5(void *binary)
+{
+	return ((uint32_t *) binary)[0] & 0xffffff;
+}
+
+static int binary_hash_6(void *binary)
+{
+	return ((uint32_t *) binary)[0] & 0x7ffffff;
+}
+
+static int get_hash_0(int index)
+{
+#ifdef _MSCASH2_DEBUG
+	int i;
+	puts("get_hash");
+	for (i = 0; i < 4; i++)
+		printf("%08x ", outbuffer[index].v[i]);
+	puts("");
+#endif
+	return outbuffer[index].v[0] & 0xf;
+}
+
+static int get_hash_1(int index)
+{
+	return outbuffer[index].v[0] & 0xff;
+}
+
+static int get_hash_2(int index)
+{
+	return outbuffer[index].v[0] & 0xfff;
+}
+
+static int get_hash_3(int index)
+{
+	return outbuffer[index].v[0] & 0xffff;
+}
+
+static int get_hash_4(int index)
+{
+	return outbuffer[index].v[0] & 0xfffff;
+}
+
+static int get_hash_5(int index)
+{
+	return outbuffer[index].v[0] & 0xffffff;
+}
+
+static int get_hash_6(int index)
+{
+	return outbuffer[index].v[0] & 0x7ffffff;
+}
+
+static int cmp_all(void *binary, int count)
+{
+	uint32_t i, b = ((uint32_t *) binary)[0];
+	for (i = 0; i < count; i++)
+		if (b == outbuffer[i].v[0])
+			return 1;
+	return 0;
+}
+
+static int cmp_one(void *binary, int index)
+{
+	uint32_t i, *b = (uint32_t *) binary;
+	for (i = 0; i < 4; i++)
+		if (b[i] != outbuffer[index].v[i])
+			return 0;
+	return 1;
+}
+
+static int cmp_exact(char *source, int count)
+{
+	return 1;
+}
+
+struct fmt_main fmt_cuda_mscash2 = {
+	{
+		    FORMAT_LABEL,
+		    FORMAT_NAME,
+		    ALGORITHM_NAME,
+		    BENCHMARK_COMMENT,
+		    BENCHMARK_LENGTH,
+		    PLAINTEXT_LENGTH,
+		    BINARY_SIZE,
+		    SALT_SIZE,
+		    MIN_KEYS_PER_CRYPT,
+		    MAX_KEYS_PER_CRYPT,
+		    FMT_CASE | FMT_8_BIT,
+	    tests},
+	{
+		    init,
+		    fmt_default_prepare,
+		    valid,
+		    fmt_default_split,
+		    binary,
+		    salt,
+		    {
+				binary_hash_0,
+				binary_hash_1,
+				binary_hash_2,
+				binary_hash_3,
+			binary_hash_4,
+		    binary_hash_5,
+		    binary_hash_6},
+		    fmt_default_salt_hash,
+		    set_salt,
+		    set_key,
+		    get_key,
+		    fmt_default_clear_keys,
+		    crypt_all,
+		    {
+				get_hash_0,
+				get_hash_1,
+				get_hash_2,
+				get_hash_3,
+			get_hash_4,
+		    get_hash_5,
+		    get_hash_6},
+		    cmp_all,
+		    cmp_one,
+	    cmp_exact}
+};
diff -urpN john-1.7.9-jumbo-5/src/cuda_mscash_fmt.c john-1.7.9-jumbo-5-cuda-2/src/cuda_mscash_fmt.c
--- john-1.7.9-jumbo-5/src/cuda_mscash_fmt.c	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda_mscash_fmt.c	2012-02-24 17:14:45.382065132 +0000
@@ -0,0 +1,252 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba
+* <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+* Based on Alain Espinosa implementation http://openwall.info/wiki/john/MSCash
+*/
+#include <string.h>
+#include "arch.h"
+#include "formats.h"
+#include "common.h"
+#include "misc.h"
+#include "cuda_mscash.h"
+#include "cuda_common.h"
+
+#define FORMAT_LABEL		"mscash-cuda"
+#define ALGORITHM_NAME		""
+
+#define BENCHMARK_COMMENT	" len(pass)=8, len(salt)=13"
+#define BENCHMARK_LENGTH	-1
+
+static mscash_password *inbuffer;
+static mscash_hash *outbuffer;
+static mscash_salt currentsalt;
+
+static struct fmt_tests tests[] = {
+	{"M$administrator#25fd08fa89795ed54207e6e8442a6ca0", "password"},
+	{NULL}
+};
+
+extern void cuda_mscash(mscash_password *, mscash_hash *, mscash_salt *);
+
+static void cleanup()
+{
+ free(inbuffer);
+ free(outbuffer);
+}
+
+static void init(struct fmt_main *pFmt)
+{
+  //Alocate memory for hashes and passwords
+  inbuffer=(mscash_password*)malloc(sizeof(mscash_password)*MAX_KEYS_PER_CRYPT);
+  outbuffer=(mscash_hash*)malloc(sizeof(mscash_hash)*MAX_KEYS_PER_CRYPT);
+  check_mem_allocation(inbuffer,outbuffer);
+  atexit(cleanup);
+  //Initialize CUDA
+  cuda_init(gpu_id);
+}
+
+static int valid(char *ciphertext, struct fmt_main *pFmt)
+{
+	if (strncmp(ciphertext, mscash_prefix, strlen(mscash_prefix)) != 0)
+		return 0;
+	char *hash = strrchr(ciphertext, '#') + 1;
+	while (hash < ciphertext + strlen(ciphertext))
+		if (atoi16[(int)*hash++] == 0x7f)
+			return 0;
+	return 1;
+}
+
+static void *binary(char *ciphertext)
+{
+	static uint32_t binary[4];
+	char *hash = strrchr(ciphertext, '#') + 1;
+	int i;
+	for (i = 0; i < 4; i++) {
+		sscanf(hash + (8 * i), "%08x", &binary[i]);
+		binary[i] = SWAP(binary[i]);
+	}
+	return binary;
+
+}
+
+static void *salt(char *ciphertext)
+{
+	static mscash_salt salt;
+	char *pos = ciphertext + strlen(mscash_prefix);
+	int length = 0;
+	while (*pos != '#')
+		salt.salt[length++] = *pos++;
+	salt.length = length;
+	return &salt;
+}
+
+static void set_salt(void *salt)
+{
+	memcpy(&currentsalt, salt, sizeof(mscash_salt));
+}
+
+static void set_key(char *key, int index)
+{
+	uint8_t length = strlen(key);
+	inbuffer[index].length = length;
+	memcpy(inbuffer[index].v, key, length);
+}
+
+static char *get_key(int index)
+{
+	static char ret[PLAINTEXT_LENGTH + 1];
+	uint8_t length = inbuffer[index].length;
+	memcpy(ret, inbuffer[index].v, length);
+	ret[length] = '\0';
+	return ret;
+}
+
+static void crypt_all(int count)
+{
+	cuda_mscash(inbuffer, outbuffer, &currentsalt);
+}
+
+static int binary_hash_0(void *binary)
+{
+	return (((uint32_t *) binary)[0] & 0xf);
+}
+
+static int binary_hash_1(void *binary)
+{
+	return ((uint32_t *) binary)[0] & 0xff;
+}
+
+static int binary_hash_2(void *binary)
+{
+	return ((uint32_t *) binary)[0] & 0xfff;
+}
+
+static int binary_hash_3(void *binary)
+{
+	return ((uint32_t *) binary)[0] & 0xffff;
+}
+
+static int binary_hash_4(void *binary)
+{
+	return ((uint32_t *) binary)[0] & 0xfffff;
+}
+
+static int binary_hash_5(void *binary)
+{
+	return ((uint32_t *) binary)[0] & 0xffffff;
+}
+static int binary_hash_6(void *binary)
+{
+	return ((uint32_t *) binary)[0] & 0x7ffffff;
+}
+
+static int get_hash_0(int index)
+{
+	return outbuffer[index].v[0] & 0xf;
+}
+
+static int get_hash_1(int index)
+{
+	return outbuffer[index].v[0] & 0xff;
+}
+
+static int get_hash_2(int index)
+{
+	return outbuffer[index].v[0] & 0xfff;
+}
+
+static int get_hash_3(int index)
+{
+	return outbuffer[index].v[0] & 0xffff;
+}
+
+static int get_hash_4(int index)
+{
+	return outbuffer[index].v[0] & 0xfffff;
+}
+
+static int get_hash_5(int index)
+{
+	return outbuffer[index].v[0] & 0xffffff;
+}
+
+static int get_hash_6(int index)
+{
+	return outbuffer[index].v[0] & 0x7ffffff;
+}
+
+
+static int cmp_all(void *binary, int count)
+{
+	uint32_t i, b = ((uint32_t *) binary)[0];
+	for (i = 0; i < count; i++)
+		if (b == outbuffer[i].v[0])
+			return 1;
+	return 0;
+}
+
+static int cmp_one(void *binary, int index)
+{
+	uint32_t i, *b = (uint32_t *) binary;
+	for (i = 0; i < 4; i++)
+		if (b[i] != outbuffer[index].v[i])
+			return 0;
+	return 1;
+}
+
+static int cmp_exact(char *source, int count)
+{
+	return 1;
+}
+
+struct fmt_main fmt_cuda_mscash = {
+	{
+		    FORMAT_LABEL,
+		    FORMAT_LABEL,
+		    ALGORITHM_NAME,
+		    BENCHMARK_COMMENT,
+		    BENCHMARK_LENGTH,
+		    PLAINTEXT_LENGTH,
+		    BINARY_SIZE,
+		    SALT_SIZE,
+		    MIN_KEYS_PER_CRYPT,
+		    MAX_KEYS_PER_CRYPT,
+		    FMT_CASE | FMT_8_BIT,
+	    tests},
+	{
+		    init,
+		    fmt_default_prepare,
+		    valid,
+		    fmt_default_split,
+		    binary,
+		    salt,
+		    {
+				binary_hash_0,
+				binary_hash_1,
+				binary_hash_2,
+				binary_hash_3,
+			binary_hash_4,
+			binary_hash_5,
+			binary_hash_6
+		    },
+		    fmt_default_salt_hash,
+		    set_salt,
+		    set_key,
+		    get_key,
+		    fmt_default_clear_keys,
+		    crypt_all,
+		    {
+				get_hash_0,
+				get_hash_1,
+				get_hash_2,
+				get_hash_3,
+			get_hash_4,
+			get_hash_5,
+			get_hash_6
+		    },
+		    cmp_all,
+		    cmp_one,
+	    cmp_exact}
+};
diff -urpN john-1.7.9-jumbo-5/src/cuda_phpass.h john-1.7.9-jumbo-5-cuda-2/src/cuda_phpass.h
--- john-1.7.9-jumbo-5/src/cuda_phpass.h	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda_phpass.h	2011-08-22 22:15:26.000000000 +0000
@@ -0,0 +1,81 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba
+* <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#ifndef _PHPASS_H
+#define _PHPASS_H
+#include "common.h"
+#define uint8_t 			unsigned char
+#define uint32_t 			unsigned int
+
+#define ROTATE_LEFT(x, s) 		((x << s) | (x >> (32 - s)))
+#define BLOCKS 				126*3	//it must be always something*3
+#define THREADS 			256
+#define KEYS_PER_CRYPT 			BLOCKS*THREADS
+
+#define F(x, y, z)			((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z)			((y) ^ ((z) & ((x) ^ (y))))
+#define H(x, y, z)			((x) ^ (y) ^ (z))
+#define I(x, y, z)			((y) ^ ((x) | ~(z)))
+
+#define address(j,idx) 			(((j)*KEYS_PER_CRYPT)+(idx))
+
+#define FF(a, b, c, d, x, s, ac) \
+  {(a) += F ((b), (c), (d)) + (x) + (uint32_t)(ac); \
+   (a) = ROTATE_LEFT ((a), (s)); \
+   (a) += (b); \
+  }
+#define GG(a, b, c, d, x, s, ac) \
+  {(a) += G ((b), (c), (d)) + (x) + (uint32_t)(ac); \
+   (a) = ROTATE_LEFT ((a), (s)); \
+   (a) += (b); \
+  }
+#define HH(a, b, c, d, x, s, ac) \
+  {(a) += H ((b), (c), (d)) + (x) + (uint32_t)(ac); \
+   (a) = ROTATE_LEFT ((a), (s)); \
+   (a) += (b); \
+  }
+#define II(a, b, c, d, x, s, ac) \
+  {(a) += I ((b), (c), (d)) + (x) + (uint32_t)(ac); \
+   (a) = ROTATE_LEFT ((a), (s)); \
+   (a) += (b); \
+  }
+
+#define S11				7
+#define S12				12
+#define S13				17
+#define S14				22
+#define S21				5
+#define S22				9
+#define S23				14
+#define S24				20
+#define S31				4
+#define S32				11
+#define S33				16
+#define S34				23
+#define S41				6
+#define S42				10
+#define S43				15
+#define S44				21
+
+#define AC1				0xd76aa477
+#define AC2pCd				0xf8fa0bcc
+#define AC3pCc				0xbcdb4dd9
+#define AC4pCb				0xb18b7a77
+#define MASK1				0x77777777
+
+
+static char phpass_prefix[] = "$P$";
+
+typedef struct {
+	unsigned char v[15];
+	unsigned char length;
+} phpass_password;
+
+typedef struct {
+	uint32_t v[4];		///128bits for hash
+} phpass_hash;
+
+#endif
diff -urpN john-1.7.9-jumbo-5/src/cuda_phpass_fmt.c john-1.7.9-jumbo-5-cuda-2/src/cuda_phpass_fmt.c
--- john-1.7.9-jumbo-5/src/cuda_phpass_fmt.c	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda_phpass_fmt.c	2012-02-29 19:05:11.423299767 +0000
@@ -0,0 +1,331 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <string.h>
+#include "arch.h"
+#include "formats.h"
+#include "common.h"
+#include "misc.h"
+#include "cuda_phpass.h"
+#include "cuda_common.h"
+
+#define FORMAT_LABEL		"phpass-cuda"
+#define FORMAT_NAME		FORMAT_LABEL
+
+#define PHPASS_TYPE		"PORTABLE-MD5"
+
+#define BENCHMARK_COMMENT	""
+#define BENCHMARK_LENGTH	-1
+
+#define PLAINTEXT_LENGTH	15
+#define CIPHERTEXT_LENGTH	34	/// header = 3 | loopcnt = 1 | salt = 8 | ciphertext = 22
+#define BINARY_SIZE		16
+#define MD5_DIGEST_LENGTH 	16
+
+
+#define SALT_SIZE		8
+
+#define MIN_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+#define MAX_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+
+static unsigned char *inbuffer;//[MAX_KEYS_PER_CRYPT * sizeof(phpass_password)];			/** plaintext ciphertexts **/
+static uint32_t *outbuffer;//[MAX_KEYS_PER_CRYPT * 4];						/** calculated hashes **/
+
+static char currentsalt[SALT_SIZE];
+static char loopChar = '*';
+
+extern void mem_init(unsigned char *, uint32_t *, char *, char *, int);
+extern void mem_clear(void);
+extern void gpu_phpass(void);
+
+
+static struct fmt_tests tests[] = {
+/*	{"$P$900000000jPBDh/JWJIyrF0.DmP7kT.", "ala"},
+	{"$P$900000000a94rg7R/nUK0icmALICKj1", "john"},
+	{"$P$900000001ahWiA6cMRZxkgUxj4x/In0", "john"},
+	{"$P$900000000m6YEJzWtTmNBBL4jypbHv1", "openwall"},
+	{"$P$900000000zgzuX4Dc2091D8kak8RdR0", "h3ll00"},
+	{"$P$900000000qZTL5A0XQUX9hq0t8SoKE0", "1234567890"},
+	{"$P$900112200B9LMtPy2FSq910c1a6BrH0", "1234567890"},
+	{"$P$900000000a94rg7R/nUK0icmALICKj1", "john"},
+	{"$P$9sadli2.wzQIuzsR2nYVhUSlHNKgG/0", "john"},
+	{"$P$90000000000tbNYOc9TwXvLEI62rPt1", ""},
+*/
+      {"$P$9saltstriAcRMGl.91RgbAD6WSq64z.","a"},
+      {"$P$9saltstriMljTzvdluiefEfDeGGQEl/","ab"},
+      {"$P$9saltstrikCftjZCE7EY2Kg/pjbl8S.","abc"},
+      {"$P$9saltstriV/GXRIRi9UVeMLMph9BxF0","abcd"},
+      {"$P$9saltstri3JPgLni16rBZtI03oeqT.0","abcde"},
+      {"$P$9saltstri0D3A6JyITCuY72ZoXdejV.","abcdef"},
+      {"$P$9saltstriXeNc.xV8N.K9cTs/XEn13.","abcdefg"},
+      {"$P$9saltstrinwvfzVRP3u1gxG2gTLWqv.","abcdefgh"},
+      {"$P$9saltstriSUQTD.yC2WigjF8RU0Q.Z.","abcdefghi"},
+      {"$P$9saltstriWPpGLG.jwJkwGRwdKNEsg.","abcdefghij"},
+
+      {"$P$9saltstrizjDEWUMXTlQHQ3/jhpR4C.","abcdefghijk"},
+      {"$P$9saltstriGLUwnE6bl91BPJP6sxyka.","abcdefghijkl"},
+      {"$P$9saltstriq7s97e2m7dXnTEx2mtPzx.","abcdefghijklm"},
+      {"$P$9saltstriTWMzWKsEeiE7CKOVVU.rS0","abcdefghijklmn"},
+      {"$P$9saltstriXt7EDPKtkyRVOqcqEW5UU.", "abcdefghijklmno"},
+	{NULL}
+};
+
+static void cleanup()
+{
+  free(inbuffer);
+  free(outbuffer);
+}
+
+static void init(struct fmt_main *pFmt)
+{
+    //Alocate memory for hashes and passwords
+    inbuffer=(unsigned char*)malloc(MAX_KEYS_PER_CRYPT * sizeof(phpass_password)*sizeof(char));
+    outbuffer=(uint32_t *)malloc(MAX_KEYS_PER_CRYPT*4*sizeof(uint32_t));
+    check_mem_allocation(inbuffer,outbuffer);
+    atexit(cleanup);
+    //Initialize CUDA
+    cuda_init(gpu_id);
+}
+
+static int valid(char *ciphertext,struct fmt_main *pFmt)
+{
+	uint32_t i, count_log2;
+
+	if (strlen(ciphertext) != CIPHERTEXT_LENGTH)
+		return 0;
+	if (strncmp(ciphertext, phpass_prefix, 3) != 0)
+		return 0;
+
+	for (i = 3; i < CIPHERTEXT_LENGTH; i++)
+		if (atoi64[ARCH_INDEX(ciphertext[i])] == 0x7F)
+			return 0;
+
+	count_log2 = atoi64[ARCH_INDEX(ciphertext[3])];
+	if (count_log2 < 7 || count_log2 > 31)
+		return 0;
+
+	return 1;
+};
+
+//code from historical JtR phpass patch
+static void *binary(char *ciphertext)
+{
+	static unsigned char b[BINARY_SIZE];
+	memset(b, 0, BINARY_SIZE);
+	int i, bidx = 0;
+	unsigned sixbits;
+	char *pos = &ciphertext[3 + 1 + 8];
+
+	for (i = 0; i < 5; i++) {
+		sixbits = atoi64[ARCH_INDEX(*pos++)];
+		b[bidx] = sixbits;
+		sixbits = atoi64[ARCH_INDEX(*pos++)];
+		b[bidx++] |= (sixbits << 6);
+		sixbits >>= 2;
+		b[bidx] = sixbits;
+		sixbits = atoi64[ARCH_INDEX(*pos++)];
+		b[bidx++] |= (sixbits << 4);
+		sixbits >>= 4;
+		b[bidx] = sixbits;
+		sixbits = atoi64[ARCH_INDEX(*pos++)];
+		b[bidx++] |= (sixbits << 2);
+	}
+	sixbits = atoi64[ARCH_INDEX(*pos++)];
+	b[bidx] = sixbits;
+	sixbits = atoi64[ARCH_INDEX(*pos++)];
+	b[bidx] |= (sixbits << 6);
+	return (void *) b;
+}
+
+static void *salt(char *ciphertext)
+{
+	static unsigned char salt[SALT_SIZE + 2];
+	memcpy(salt, &ciphertext[4], 8);
+	salt[8] = ciphertext[3];
+	salt[9] = 0;
+	return salt;
+}
+
+static int binary_hash_0(void *binary)
+{
+	return (((ARCH_WORD_32 *) binary)[0] & 0xf);
+}
+
+static int binary_hash_1(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xff;
+}
+
+static int binary_hash_2(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfff;
+}
+
+static int binary_hash_3(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xffff;
+}
+
+static int binary_hash_4(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfffff;
+}
+
+static int binary_hash_5(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xffffff;
+}
+
+static int binary_hash_6(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0x7ffffff;
+}
+
+static void set_salt(void *salt)
+{
+	unsigned char *csalt = salt;
+	memcpy(currentsalt,csalt,8);
+	loopChar = csalt[8];
+}
+
+static void set_key(char *key, int index)
+{
+	int i, len = strlen(key);
+	inbuffer[address(15, index)] = len;
+	for (i = 0; i < len; i++)
+		inbuffer[address(i, index)] = key[i];
+}
+
+static char *get_key(int index)
+{
+	static char r[PLAINTEXT_LENGTH + 1];
+	int i;
+	for (i = 0; i < PLAINTEXT_LENGTH; i++)
+		r[i] = inbuffer[address(i, index)];
+	r[inbuffer[address(15,index)]] = '\0';
+	return r;
+}
+
+static void crypt_all(int count)
+{
+	char setting[40];
+	strcpy(setting, phpass_prefix);
+	setting[3] = loopChar;
+	int count_log2 = 0;
+	count_log2 = atoi64[ARCH_INDEX(setting[3])];
+	strcpy(setting + 4, currentsalt);
+	mem_init(inbuffer, outbuffer, setting, itoa64, count_log2);
+	gpu_phpass();
+	mem_clear();
+}
+
+static int get_hash_0(int index)
+{
+	return outbuffer[address(0, index)] & 0xf;
+}
+
+static int get_hash_1(int index)
+{
+	return outbuffer[address(0, index)] & 0xff;
+}
+
+static int get_hash_2(int index)
+{
+	return outbuffer[address(0, index)] & 0xfff;
+}
+
+static int get_hash_3(int index)
+{
+	return outbuffer[address(0, index)] & 0xffff;
+}
+
+static int get_hash_4(int index)
+{
+	return outbuffer[address(0, index)] & 0xfffff;
+}
+
+static int get_hash_5(int index)
+{
+	return outbuffer[address(0, index)] & 0xffffff;
+}
+
+static int get_hash_6(int index)
+{
+	return outbuffer[address(0, index)] & 0x7ffffff;
+}
+
+
+static int cmp_all(void *binary, int count)
+{
+	uint32_t i;
+	uint32_t b = ((uint32_t *) binary)[0];
+	for (i = 0; i < count; i++)
+		if (b == outbuffer[address(0, i)])
+			return 1;
+	return 0;
+}
+
+static int cmp_one(void *binary, int index)
+{
+	int i;
+	uint32_t *t = (uint32_t *) binary;
+	for (i = 0; i < 4; i++)
+		if (t[i] != outbuffer[address(i, index)])
+			return 0;
+	return 1;
+}
+
+static int cmp_exact(char *source, int count)
+{
+	return 1;
+}
+
+struct fmt_main fmt_cuda_phpass = {
+	{
+		    FORMAT_LABEL,
+		    FORMAT_NAME,
+		    PHPASS_TYPE,
+		    BENCHMARK_COMMENT,
+		    BENCHMARK_LENGTH,
+		    PLAINTEXT_LENGTH,
+		    BINARY_SIZE,
+		    SALT_SIZE + 1,
+		    MIN_KEYS_PER_CRYPT,
+		    MAX_KEYS_PER_CRYPT,
+		    FMT_CASE | FMT_8_BIT,
+	    tests},
+	{
+		    init,
+		    fmt_default_prepare,
+		    valid,
+		    fmt_default_split,
+		    binary,
+		    salt,
+		    {
+				binary_hash_0,
+				binary_hash_1,
+				binary_hash_2,
+				binary_hash_3,
+			binary_hash_4,
+		    binary_hash_5,
+		    binary_hash_6},
+		    fmt_default_salt_hash,
+		    set_salt,
+		    set_key,
+		    get_key,
+		    fmt_default_clear_keys,
+		    crypt_all,
+		    {
+				get_hash_0,
+				get_hash_1,
+				get_hash_2,
+				get_hash_3,
+			get_hash_4,
+		    get_hash_5,
+		    get_hash_6},
+		    cmp_all,
+		    cmp_one,
+	    cmp_exact}
+};
diff -urpN john-1.7.9-jumbo-5/src/cuda_rawsha256.h john-1.7.9-jumbo-5-cuda-2/src/cuda_rawsha256.h
--- john-1.7.9-jumbo-5/src/cuda_rawsha256.h	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda_rawsha256.h	2012-02-17 03:09:50.521475923 +0000
@@ -0,0 +1,34 @@
+/**
+This file is shared by cuda-rawsha224 and cuda-rawsha256 formats 
+*/
+#ifndef _SHA256_H
+#define _SHA256_H
+
+#ifndef uint32_t
+  #define uint32_t unsigned int  
+#endif
+
+#define rol(x,n) ((x << n) | (x >> (32-n)))
+#define ror(x,n) ((x >> n) | (x << (32-n)))
+#define Ch(x,y,z) ((x & y) ^ ( (~x) & z)) 
+#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
+#define Sigma0(x) ((ror(x,2))  ^ (ror(x,13)) ^ (ror(x,22)))
+#define Sigma1(x) ((ror(x,6))  ^ (ror(x,11)) ^ (ror(x,25)))
+#define sigma0(x) ((ror(x,7))  ^ (ror(x,18)) ^(x>>3))
+#define sigma1(x) ((ror(x,17)) ^ (ror(x,19)) ^(x>>10))
+
+#define THREADS 128
+#define BLOCKS 256
+#define KEYS_PER_CRYPT THREADS*BLOCKS
+typedef struct{
+  uint32_t v[16];  				///512bits
+}sha256_password;
+
+typedef struct{
+  uint32_t v[8]; 				///256bits
+}sha256_hash;
+
+typedef struct{
+  uint32_t v[7]; 				///224bits
+}sha224_hash;
+#endif
\ No newline at end of file
diff -urpN john-1.7.9-jumbo-5/src/cuda_rawsha256_fmt.c john-1.7.9-jumbo-5-cuda-2/src/cuda_rawsha256_fmt.c
--- john-1.7.9-jumbo-5/src/cuda_rawsha256_fmt.c	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/cuda_rawsha256_fmt.c	2012-02-24 17:36:03.967064029 +0000
@@ -0,0 +1,258 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba
+* <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+* This file is shared by cuda-rawsha224 and cuda-rawsha256 formats, 
+* SHA256 definition is used to distinguish between them. 
+*/
+#include <string.h>
+#include "arch.h"
+#include "formats.h"
+#include "common.h"
+#include "misc.h"
+#include "cuda_common.h"
+#include "cuda_rawsha256.h"
+
+#define BENCHMARK_COMMENT	""
+#define BENCHMARK_LENGTH	-1 /// Raw benchmark
+#define PLAINTEXT_LENGTH	54
+#define SALT_SIZE		0
+
+#define MIN_KEYS_PER_CRYPT	KEYS_PER_CRYPT	
+#define MAX_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+
+#ifdef SHA256
+  #define FORMAT_NAME		"raw-sha256-cuda"
+  #define SHA_TYPE		"SHA256" 
+  #define CIPHERTEXT_LENGTH	64 ///256bit
+  #define BINARY_SIZE		32
+  #define SHA_HASH		sha256_hash
+  #define TESTS			sha256_tests
+  #define FMT_MAIN		fmt_cuda_rawsha256
+  static struct fmt_tests sha256_tests[]={
+  {"a49c2c9d0c006c8cb55a9a7a38822b83e0cd442614cb416af952fa50156761dc","openwall"},
+  {NULL}  
+  };
+#endif
+#ifdef SHA224
+  #define FORMAT_NAME		"raw-sha224-cuda"
+  #define SHA_TYPE		"SHA224" 
+  #define CIPHERTEXT_LENGTH	56 ///224bit
+  #define BINARY_SIZE		32 
+  #define SHA_HASH 		sha224_hash
+  #define TESTS			sha224_tests
+  #define FMT_MAIN		fmt_cuda_rawsha224
+  static struct fmt_tests sha224_tests[]={
+  {"d6d8ff02342ea04cf65f8ab446b22c4064984c29fe86f858360d0319","openwall"},
+  {NULL}  
+  };
+#endif
+extern void gpu_rawsha256(sha256_password *,SHA_HASH*);
+extern void gpu_rawsha224(sha256_password *,SHA_HASH*);
+static char saved_keys[MAX_KEYS_PER_CRYPT][PLAINTEXT_LENGTH+1];		/** plaintext ciphertexts **/
+static sha256_password 	*inbuffer;			/** binary ciphertexts **/
+static SHA_HASH	*outbuffer;				/** calculated hashes **/
+
+static void preproc(char *key, int index){ /// todo - move to gpu
+  uint32_t dl=strlen(key),j;
+  uint32_t *blocks = inbuffer[index].v;
+  memset(inbuffer[index].v,0,sizeof(sha256_password));
+  for(j=0;j<dl;j++){
+      uint32_t tmp=0;
+      tmp |= (((uint32_t) key[j]) << ((3-(j & 0x3)) << 3));
+      blocks[j/4]|=tmp;
+    }
+    blocks[j / 4] |= (((uint32_t) 0x80) << ((3-(j & 0x3)) << 3));
+    blocks[15]=0x00000000|(dl*8);
+}
+
+static void cleanup()
+{
+ free(inbuffer);
+ free(outbuffer);
+}
+
+static void init(struct fmt_main *pFmt){
+   //Alocate memory for hashes and passwords
+  inbuffer=(sha256_password*)malloc(sizeof(sha256_password)*MAX_KEYS_PER_CRYPT);
+  outbuffer=(SHA_HASH*)malloc(sizeof(SHA_HASH)*MAX_KEYS_PER_CRYPT);
+  check_mem_allocation(inbuffer,outbuffer);
+  atexit(cleanup);
+  //Initialize CUDA
+  cuda_init(gpu_id);
+}
+
+static int valid(char * ciphertext,struct fmt_main *pFmt){
+  int i;
+  if(strlen(ciphertext)!=CIPHERTEXT_LENGTH) return 0;
+  for(i=0;i<CIPHERTEXT_LENGTH;i++){
+    if(!(
+      (ciphertext[i]>='0' && ciphertext[i]<='9')||
+      (ciphertext[i]>='a' && ciphertext[i]<='f')||
+      (ciphertext[i]>='A' && ciphertext[i]<='Z')))
+	return 0;
+  }
+  return 1;
+};
+
+
+static void *binary(char *ciphertext){
+  static char realcipher[BINARY_SIZE];
+  memset(realcipher,0,BINARY_SIZE);
+  int i;
+  for(i=0;i<BINARY_SIZE;i+=4){
+      realcipher[i]=atoi16[ARCH_INDEX(ciphertext[(i+3)*2])]*16+atoi16[ARCH_INDEX(ciphertext[(i+3)*2+1])];
+      realcipher[i+1]=atoi16[ARCH_INDEX(ciphertext[(i+2)*2])]*16+atoi16[ARCH_INDEX(ciphertext[(i+2)*2+1])];
+      realcipher[i+2]=atoi16[ARCH_INDEX(ciphertext[(i+1)*2])]*16+atoi16[ARCH_INDEX(ciphertext[(i+1)*2+1])];
+      realcipher[i+3]=atoi16[ARCH_INDEX(ciphertext[(i)*2])]*16+atoi16[ARCH_INDEX(ciphertext[(i)*2+1])];
+  }
+  return (void*)realcipher;
+}
+
+static int binary_hash_0(void *binary){
+   return (((ARCH_WORD_32*)binary)[0] & 0xf);
+}
+
+static int binary_hash_1(void *binary){
+  return ((ARCH_WORD_32*)binary)[0] & 0xff;
+}
+
+static int binary_hash_2(void *binary){
+  return ((ARCH_WORD_32*)binary)[0] & 0xfff;
+}
+
+static int binary_hash_3(void *binary){
+  return ((ARCH_WORD_32*)binary)[0] & 0xffff;
+}
+
+static int binary_hash_4(void *binary){
+  return ((ARCH_WORD_32*)binary)[0] & 0xfffff;
+}
+
+static int binary_hash_5(void *binary){
+  return ((ARCH_WORD_32*)binary)[0] & 0xffffff;
+}
+
+static int binary_hash_6(void *binary){
+  return ((ARCH_WORD_32*)binary)[0] & 0x7ffffff;
+}
+
+static void set_salt(void *salt){}
+static void set_key(char *key, int index){
+    memset(saved_keys[index],0,PLAINTEXT_LENGTH+1);	
+    strnzcpy(saved_keys[index],key,PLAINTEXT_LENGTH);
+    preproc(key,index);
+}
+static char *get_key(int index){
+  return saved_keys[index];
+}
+
+static void crypt_all(int count){
+  #ifdef SHA256
+  gpu_rawsha256(inbuffer,outbuffer);
+  #else
+  gpu_rawsha224(inbuffer,outbuffer);
+  #endif
+}
+
+static int get_hash_0(int index){
+  return ((ARCH_WORD_32*)outbuffer[index].v)[0] & 0xf;
+}
+
+static int get_hash_1(int index){
+  return ((ARCH_WORD_32*)outbuffer[index].v)[0] & 0xff;
+}
+
+static int get_hash_2(int index){
+  return ((ARCH_WORD_32*)outbuffer[index].v)[0] & 0xfff;
+}
+
+static int get_hash_3(int index){
+  return ((ARCH_WORD_32*)outbuffer[index].v)[0] & 0xffff;
+}
+
+static int get_hash_4(int index){
+  return ((ARCH_WORD_32*)outbuffer[index].v)[0] & 0xfffff;
+}
+
+static int get_hash_5(int index){
+  return ((ARCH_WORD_32*)outbuffer[index].v)[0] & 0xffffff;
+}
+
+static int get_hash_6(int index){
+  return ((ARCH_WORD_32*)outbuffer[index].v)[0] & 0x7ffffff;
+}
+
+static int cmp_all(void *binary,int count){
+  uint32_t i;
+  uint32_t b=((uint32_t *)binary)[0];
+  for(i=0;i<count;i++)
+    if(b==outbuffer[i].v[0])
+      return 1;
+  return 0;
+}
+
+static int cmp_one(void *binary,int index){
+  int i;
+  uint32_t *t=(uint32_t *)binary;
+  for(i=0;i<CIPHERTEXT_LENGTH/8;i++)
+    if(t[i]!=outbuffer[index].v[i])
+      return 0;
+  return 1;
+}
+static int cmp_exact(char *source,int count){
+  return 1;
+}
+
+struct fmt_main FMT_MAIN={
+  {
+    FORMAT_NAME,
+    FORMAT_NAME,
+    SHA_TYPE,
+    BENCHMARK_COMMENT,
+    BENCHMARK_LENGTH,
+    PLAINTEXT_LENGTH,
+    BINARY_SIZE,
+    SALT_SIZE,
+    MIN_KEYS_PER_CRYPT,
+    MAX_KEYS_PER_CRYPT,
+    FMT_CASE | FMT_8_BIT ,
+    TESTS
+  },
+  {
+    init,
+    fmt_default_prepare,
+    valid,
+    fmt_default_split,
+    binary,
+    fmt_default_salt,
+    {
+      binary_hash_0,	
+      binary_hash_1,	
+      binary_hash_2,	
+      binary_hash_3,
+      binary_hash_4,
+      binary_hash_5,
+      binary_hash_6
+    },
+    fmt_default_salt_hash,
+    set_salt,
+    set_key,
+    get_key,	
+    fmt_default_clear_keys,
+    crypt_all,
+    {
+      get_hash_0,	
+      get_hash_1,	
+      get_hash_2,
+      get_hash_3,
+      get_hash_4,
+      get_hash_5,
+      get_hash_6
+    },
+    cmp_all,	
+    cmp_one,	
+    cmp_exact	
+  }
+};
\ No newline at end of file
diff -urpN john-1.7.9-jumbo-5/src/john.c john-1.7.9-jumbo-5-cuda-2/src/john.c
--- john-1.7.9-jumbo-5/src/john.c	2011-12-15 22:23:32.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/john.c	2012-02-17 15:44:09.189725314 +0000
@@ -87,6 +87,17 @@ extern struct fmt_main fmt_SybaseASE;
 extern struct fmt_main fmt_SKEY;
 #endif
 
+#ifdef HAVE_CUDA
+extern struct fmt_main fmt_cuda_cryptmd5;
+extern struct fmt_main fmt_cuda_phpass;
+extern struct fmt_main fmt_cuda_cryptsha256;
+extern struct fmt_main fmt_cuda_cryptsha512;
+extern struct fmt_main fmt_cuda_mscash;
+extern struct fmt_main fmt_cuda_mscash2;
+extern struct fmt_main fmt_cuda_rawsha256;
+extern struct fmt_main fmt_cuda_rawsha224;
+#endif
+
 extern struct fmt_main fmt_ssh;
 extern struct fmt_main fmt_pdf;
 extern struct fmt_main rar_fmt;
@@ -168,6 +179,17 @@ static void john_register_all(void)
 	john_register_one(&zip_fmt);
 	john_register_one(&fmt_dummy);
 
+#ifdef HAVE_CUDA
+	john_register_one(&fmt_cuda_cryptmd5);
+	john_register_one(&fmt_cuda_phpass);
+	john_register_one(&fmt_cuda_cryptsha256);
+	john_register_one(&fmt_cuda_cryptsha512);
+	john_register_one(&fmt_cuda_mscash);
+	john_register_one(&fmt_cuda_mscash2);
+	john_register_one(&fmt_cuda_rawsha256);
+	john_register_one(&fmt_cuda_rawsha224);
+#endif
+	
 #ifdef HAVE_DL
 	if (options.fmt_dlls)
 	register_dlls ( options.fmt_dlls,
diff -urpN john-1.7.9-jumbo-5/src/options.c john-1.7.9-jumbo-5-cuda-2/src/options.c
--- john-1.7.9-jumbo-5/src/options.c	2011-12-15 22:23:47.000000000 +0000
+++ john-1.7.9-jumbo-5-cuda-2/src/options.c	2012-02-29 19:09:28.950446673 +0000
@@ -39,6 +39,9 @@
 #endif
 #define _PER_NODE ""
 #endif
+#ifdef HAVE_CUDA
+extern unsigned int gpu_id;
+#endif
 
 struct options_main options;
 static char *field_sep_char_string;
@@ -125,6 +128,10 @@ static struct opt_entry opt_list[] = {
 	{"crack-status", FLG_CRKSTAT, FLG_CRKSTAT},
 	{"mkpc", FLG_NONE, FLG_NONE, 0, OPT_REQ_PARAM,
 		"%u", &options.mkpc},
+#ifdef HAVE_CUDA
+	{"gpu", FLG_NONE, FLG_NONE, 0, OPT_REQ_PARAM,
+		"%u", &gpu_id},
+#endif
 	{NULL}
 };
 
@@ -184,6 +191,10 @@ static struct opt_entry opt_list[] = {
 
 #define JOHN_USAGE_PLUGIN \
 "--plugin=NAME[,..]        load this (these) dynamic plugin(s)\n"
+#ifdef HAVE_CUDA
+#define JOHN_USAGE_GPU \
+"--gpu=GPUID               set active GPU for execution (CUDA)\n"
+#endif
 
 static void print_usage(char *name)
 {
@@ -216,6 +227,9 @@ static void print_usage(char *name)
 #ifdef HAVE_DL
 	printf("%s", JOHN_USAGE_PLUGIN);
 #endif
+#ifdef HAVE_CUDA
+	printf("%s", JOHN_USAGE_GPU);
+#endif
 
 	exit(0);
 }
