Files
project_euler/e_59.py
Julien Lengrand-Lambert d6a04fbe42 Solves problem 59
2014-05-25 11:39:57 +02:00

255 lines
7.8 KiB
Python

#!/usr/bin/env python
'''
Created on 10 feb. 2012
@author: Julien Lengrand-Lambert
DESCRIPTION: Solves problem 59 of Project Euler
Each character on a computer is assigned a unique code
and the preferred standard is ASCII (American Standard Code for Information Interchange).
For example, uppercase A = 65, asterisk (*) = 42, and lowercase k = 107.
A modern encryption method is to take a text file, convert the bytes to ASCII,
then XOR each byte with a given value, taken from a secret key.
The advantage with the XOR function is that using the same encryption key
on the cipher text, restores the plain text; for example, 65 XOR 42 = 107,
then 107 XOR 42 = 65.
For unbreakable encryption, the key is the same length as the plain text message,
and the key is made up of random bytes.
The user would keep the encrypted message and the encryption key in different locations,
and without both "halves", it is impossible to decrypt the message.
Unfortunately, this method is impractical for most users,
so the modified method is to use a password as a key. If the password is
shorter than the message, which is likely, the key is repeated cyclically
throughout the message. The balance for this method is using a sufficiently
long password key for security, but short enough to be memorable.
Your task has been made easy, as the encryption key consists of three lower
case characters. Using cipher1.txt (right click and 'Save Link/Target As...'),
a file containing the encrypted ASCII codes, and the knowledge that the plain
text must contain common English words, decrypt the message and find
the sum of the ASCII values in the original text.
'''
import itertools
import string
base = 16 # hey, its hex!
printable_letters = string.printable # list of all printable characters
#frequency of english letters
en_freq = {'a': 0.08167,
'b': 0.01492,
'c':0.02782,
'd':0.04253,
'e':0.12702,
'f':0.02228,
'g':0.02015,
'h':0.06094,
'i':0.06966,
'j':0.00153,
'k':0.00772,
'l':0.04025,
'm':0.02406,
'n':0.06749,
'o':0.07507,
'p':0.01929,
'q':0.00095,
'r':0.05987,
's':0.06327,
't':0.09056,
'u':0.02758,
'v':0.00978,
'w':0.02360,
'x':0.00150,
'y':0.01974,
'z':0.00074}
def _xorhex(a, b):
"""
XORs a against b and returns the result
a and b are expected to be int values
XOR Truth Table
Input Output
A B
0 0 0
0 1 1
1 0 1
1 1 0
"""
long_res = a ^ b
string_res = '%x' % long_res # returns a string version
return long_res
def load_data(filename):
"Loads the data of the filename into a table"
file = open(filename, "r")
stuff = file.read()
return [int(val) for val in stuff.split(',')]
def all_passwords(range, length):
'''
Returns all possible passwords given a range of ascii_values, and the length of the password
'''
return list(set(itertools.combinations(range*3, length)))
def apply_password(value, password):
"""
Applies a given password to the message.
"""
long_pass = repeat_password(password, len(value))
return [_xorhex(a, b) for a, b in zip(value, long_pass)]
def repeat_password(password, length):
"""
Repeats a password to match the requested size.
Ex : [1, 2, 3], 7 => [1, 2, 3, 1, 2, 3, 1]
"""
repeat = length / len(password)
rest = length % len(password)
return password * repeat + password[:rest]
def all_solutions(data, pass_range, length):
"""
Prints all possible solutions
"""
res = []
all_pass = all_passwords(pass_range, length)
total = len(all_pass)
print total
idx = 0
for password in all_pass:
idx += 1
print "%d / %d" % (idx, total)
sol = apply_password(data, password)
res.append([to_ascii(sol), password])
return res
def to_ascii(data):
"""
Turns a list of ascii values into a proper string
"""
res = ""
for el in data:
res += chr(el)
return res
def discard_non_printable(solutions):
"""
We discard from a list of hex all the elements that cannot be printed.
#FIXME: Do better
"""
res = []
for val in solutions:
is_printable = True
for v in val[0]:
if not(v in printable_letters):
is_printable = False
if is_printable:
res.append(val)
return res
def letter_freq(work_val):
"""
Calculate the frequency of the elements in the string val
Returns a dictionary of the form {element: frequency} for all elements of val.
Only present elements will be present in the dictionary (frequency > 0)
"""
counts = map(work_val.count ,work_val) # gets count of each element of hex_val
freqs = [(float(x) / len(work_val)) for x in counts]
#freqs = [x for x in counts]
return dict(zip(work_val,freqs)) # transforms into dict of unique values associated to frequencies
def chi2(ref_dict, test_dict, length):
"""
Performs a chi2 adequation testing.
Given a reference dictionary and a corresponding dctionary to be tested, returns a score corresponding to
the similitude between the test data and the reference data.
The lower the score, the higher the similitude.
Dictionaries are expected to be under the {value:frequency} form.
Example : chi2({a:0.08, b:0.92}, {a:0.80, b:0.20}, 12)
length corresponds to the number of elements needed to acquire test_dict
Returns a float value
"""
my_keys = ref_dict.keys()
score = 0
for m in my_keys:
if test_dict.has_key(m): # testing if occurence happened
freq_val = length * test_dict[m]
else:
freq_val = 0
ref_val = length * ref_dict[m]
score += pow(freq_val - ref_val, 2) / ref_val
return score
def strip_string_to_lowercase(s):
"""
Returns s in lowercase, with all its special characters stripped
Taken from here: http://stackoverflow.com/questions/638893/what-is-the-most-efficient-way-in-python-to-convert-a-string-to-all-lowercase-st#639325
"""
tmpStr = s.lower().strip()
retStrList = []
for x in tmpStr:
if x in string.ascii_lowercase:
retStrList.append(x)
return ''.join(retStrList)
def close_to_english(element):
"""
Returns a score of likeliness for element to be english
The computed score is the chi2 adequation test.
The closest to 0, the most probable the sentence is to be in english
Table can be found here, solutions at row 25 (english is 26 degrees of liberty).
http://www.medcalc.org/manual/chi-square-table.php to get the values to compare to
"""
# cleans the hex val of special characters, and puts to lowercase
stripped_el = strip_string_to_lowercase(element)
length_el = len(stripped_el)
#calculates the letters frequency of element
freqs = letter_freq(stripped_el)
return chi2(en_freq, freqs, length_el)
def find_most_probable(data, pass_range, length):
solutions = all_solutions(data, pass_range, length)
# discards easy fuck ups
solutions = discard_non_printable(solutions)
best_candidate = ""
best_chi = 1000000
for sol in solutions:
score = close_to_english(sol[0])
if score < best_chi:
best_chi = score
best_candidate = sol
return best_candidate
def get_solution(data, pass_range, length):
solution = find_most_probable(data, ascii_range, 3)[0]
sum = 0
for val in solution:
sum+=ord(val)
return sum
if __name__ == '__main__':
data = load_data("e_59_cipher1.txt")
ascii_range = range(ord('a'), ord('z') + 1 )
print get_solution(data, ascii_range, 3)