Project Euler task 59 (CUDA)

Each character on a computer is assigned a unique code and the preferred standard is ASCII (American Standard Code for Information Interchange). For example, uppercase A = 65, asterisk (*) = 42, and lowercase k = 107.

A modern encryption method is to take a text file, convert the bytes to ASCII, then XOR each byte with a given value, taken from a secret key. The advantage with the XOR function is that using the same encryption key on the cipher text, restores the plain text; for example, 65 XOR 42 = 107, then 107 XOR 42 = 65.

For unbreakable encryption, the key is the same length as the plain text message, and the key is made up of random bytes. The user would keep the encrypted message and the encryption key in different locations, and without both "halves", it is impossible to decrypt the message.

Unfortunately, this method is impractical for most users, so the modified method is to use a password as a key. If the password is shorter than the message, which is likely, the key is repeated cyclically throughout the message. The balance for this method is using a sufficiently long password key for security, but short enough to be memorable.

Your task has been made easy, as the encryption key consists of three lower case characters. Using cipher1.txt (right click and 'Save Link/Target As...'), a file containing the encrypted ASCII codes, and the knowledge that the plain text must contain common English words, decrypt the message and find the sum of the ASCII values in the original text.

-------------------------------------------------------------------------------------
#include < iostream >
#include < fstream >

using namespace std;


__device__ unsigned int doesContainSeq(
unsigned char * _pInput,
const int _nInputLen,
const unsigned char * _pWord,
const int _nWordSize,
const unsigned char * _pKey,
const int _pKeySize,
bool bRewrite = false)
{ //////////////////////////////
int iPass = 0;
int iRes = 0;
unsigned int iSum = 0;

for (int i = 0; i < _nInputLen; i++) {
unsigned char ch = _pInput[i] ^ _pKey[i % _pKeySize];

iSum += (unsigned int) ch;

if (ch == _pWord[iPass])
iPass++;
else
iPass = 0;

if (iPass == _nWordSize)
iRes = 1;

if (bRewrite)
_pInput[i] = ch;
}

if (iRes)
return iSum;

return 0;
}

__global__ void decipher(unsigned char * _pInput, const int _nTextLen, unsigned int * _pSyncArray) {

int idx = blockIdx.x * blockDim.x + threadIdx.x;

// number of chunks
int iDiff = (int) 'z' - (int) 'a' + 1;

// idx varies from {0, 0, 0} to {iDiff, iDiff, iDiff}
// the maximum idx is iDiff * iDiff * iDiff
int iDiffSqr = iDiff * iDiff;
int iVariants = iDiff * iDiffSqr;

if (idx < iVariants) {

unsigned char pKey[3];

pKey[0] = (unsigned char) ((int) idx / iDiffSqr);
pKey[1] = (unsigned char) ((int) (idx - pKey[0] * iDiffSqr) / iDiff);
pKey[2] = (unsigned char) ((int) (idx - pKey[0] * iDiffSqr - pKey[1] * iDiff));

pKey[0] += 'a';
pKey[1] += 'a';
pKey[2] += 'a';

unsigned char pWord[] = {' ', 't', 'h', 'e', ' '};
unsigned int iRes0 = doesContainSeq(_pInput, _nTextLen, pWord, sizeof(pWord), pKey, sizeof(pKey));

_pSyncArray[idx] = iRes0 ? iRes0 : 0;

__syncthreads();

for (int i = 0; i < iVariants; i++)
if ((_pSyncArray[i] != 0) && (idx == i)) {
doesContainSeq(_pInput, _nTextLen, pWord, sizeof(pWord), pKey, sizeof(pKey), true);
break;
}
}
}

const int iMaxStr = 255;

int main() {

ifstream ifs("cipher1.txt", ifstream::in);

char pStr[iMaxStr];
int iLines = 0;
while (ifs) {
ifs.getline(pStr, iMaxStr, ',');
iLines++;
}

ifs.clear();
ifs.seekg(0, ios::beg);

unsigned char * pText = new unsigned char[iLines];
int i = 0;
while (ifs) {
ifs.getline(pStr, iMaxStr, ',');
pText[i++] = (int) atoi(pStr);
}

ifs.close();

// copy text data to gpu
unsigned char * dpText;
cudaMalloc(&dpText, iLines * sizeof(unsigned char));
cudaMemcpy(dpText, pText, iLines * sizeof(unsigned char), cudaMemcpyHostToDevice);

// calculate the number of possible variants
int iDiff = (int) 'z' - (int) 'a' + 1;
int iVariants = iDiff * iDiff * iDiff;
cout << "variants: " << iVariants << endl;

// calculate the scaling factors
int threadsPerBlock = 128;
int blocksNum = (iVariants / threadsPerBlock) + 1;

unsigned int * dpSyncArray;
cudaMalloc(&dpSyncArray, iVariants * sizeof(unsigned int));

cout << "threadsPerBlock " << threadsPerBlock << endl;
cout << "blocksNum " << blocksNum << endl;

// ################################################
// ################# kernel call ##################
// ################################################
decipher <<< blocksNum, threadsPerBlock >>> (dpText, iLines, dpSyncArray);

// copy synchronization array (with result flags)
unsigned int * pSyncArray = new unsigned int[iVariants];
cudaMemcpy(pSyncArray, dpSyncArray, iVariants * sizeof(unsigned int), cudaMemcpyDeviceToHost);

// free device memory
cudaFree(dpSyncArray);

// copy result back into pText (for debugging purposes)
cudaMemcpy(pText, dpText, iLines * sizeof(char), cudaMemcpyDeviceToHost);

for (int i = 0; i < iVariants; i++)
if (pSyncArray[i]) {
cout << pSyncArray[i] << endl;
break;
}

for (int i = 0; i < iLines; i++)
cout << pText[i];
cout << endl;

// free mem
delete [] pSyncArray;
cudaFree(dpText);
delete [] pText;

return 0;
}

Comments

Popular Posts