diff --git a/CUDA_version/sha256_header.h b/CUDA_version/sha256_header.h
index e0a2730..8a29be2 100755
--- a/CUDA_version/sha256_header.h
+++ b/CUDA_version/sha256_header.h
@@ -31,22 +31,18 @@
 
 
 #if (__CUDA_ARCH__ >= 500)
-		//	IADD3 R23, R25, R14, R16;
 		static __device__ __forceinline__ uint32_t IADD3(uint32_t a, uint32_t b, uint32_t c) {
 			uint32_t d;
 			asm("iadd3 %0, %1, %2, %3;" : "=r"(d) : "r"(a), "r"(b), "r"(c));
 			return d;
 		}
 
-		//LOP3.LUT R14, R22, R16, R19, 0x96;
 		static __device__ __forceinline__ uint32_t LOP3LUT_XOR(uint32_t a, uint32_t b, uint32_t c) {
 			uint32_t d;
 			asm("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(d) : "r"(a), "r"(b), "r"(c));
 			return d;
 		}
 
-		//a ^ ((c ^ a) & b)
-		//g ^ (e & (f ^ g))
 		static __device__ __forceinline__ uint32_t LOP3LUT_XORAND(uint32_t a, uint32_t b, uint32_t c) {
 			uint32_t d;
 			asm("lop3.b32 %0, %1, %2, %3, 0xb8;" : "=r"(d) : "r"(a), "r"(b), "r"(c));
@@ -77,8 +73,6 @@
         }
 #endif
 
-
-
 #define SCHEDULE0()  \
 		schedule0 = schedule16 + schedule25 \
 			+ LOP3LUT_XOR(ROR7(schedule17) , ROR18(schedule17) , (schedule17 >> 3)) \
@@ -313,8 +307,6 @@
 #define ROR13(x) (((x) << 19) | ((x) >> 13))
 #define ROR22(x) (((x) << 10) | ((x) >> 22))
 
-//		h += LOP3LUT_XOR(ROR6(e), ROR11(e), ROR25(e)) + (g ^ (e & (f ^ g))) + UINT32_C(k) + W; \
-
 #define ROUND(a, b, c, d, e, f, g, h, W, k) \
 		h += LOP3LUT_XOR(ROR6(e), ROR11(e), ROR25(e)) + LOP3LUT_XORAND(g,e,f) + UINT32_C(k) + W; \
 		d += h;  \
@@ -325,7 +317,6 @@
 		d += h;  \
 		h += LOP3LUT_XOR(ROR2(a), ROR13(a), ROR22(a)) + LOP3LUT_ANDOR(a,b,c);
 
-//W-block evaluate
 #define LOADSCHEDULE_WPRE(i, j)  \
 		w_blocks_d[j] =                           \
 			  (uint32_t)block[i * 4 + 0] << 24  \
@@ -337,39 +328,3 @@
 		w_blocks_d[i] = w_blocks_d[i - 16] + w_blocks_d[i - 7]  \
 			+ (ROR(w_blocks_d[i - 15], 7) ^ ROR(w_blocks_d[i - 15], 18) ^ (w_blocks_d[i - 15] >> 3))  \
 			+ (ROR(w_blocks_d[i - 2], 17) ^ ROR(w_blocks_d[i - 2], 19) ^ (w_blocks_d[i - 2] >> 10));
-
-
-		
-
-
-/*
-static __device__ __forceinline__ uint2 ROR2(const uint2 a, const int offset)
-{
-uint2 result;
-#if __CUDA_ARCH__ > 300
-	if (offset < 32) {
-		asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.x), "r"(a.y), "r"(offset));
-		asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset));
-	} else // if (offset < 64)
-	{
-		// offset SHOULD BE < 64 !
-		asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.y), "r"(a.x), "r"(offset));
-		asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset));
-	}
-#else
-	if (!offset)
-		result = a;
-	else if (offset < 32) {
-		result.y = ((a.y >> offset) | (a.x << (32 - offset)));
-		result.x = ((a.x >> offset) | (a.y << (32 - offset)));
-	} else if (offset == 32) {
-		result.y = a.x;
-		result.x = a.y;
-	} else {
-		result.y = ((a.x >> (offset - 32)) | (a.y << (64 - offset)));
-		result.x = ((a.y >> (offset - 32)) | (a.x << (64 - offset)));
-	}
-#endif
-	return result;
-}
-*/
\ No newline at end of file