# Huffman encoding trees def huffman_leaf(letter, weight): """A leaf of a Huffman tree, which has a weight at the root.""" return tree(weight, [tree(letter)]) def huffman_tree(left, right): """A Huffman encoding tree; left and right are also Huffman trees.""" return tree(root(left) + root(right), [left, right]) def weight(tree): """The weight of a Huffman encoding tree.""" return root(tree) def is_huffman_leaf(tree): """Whether this Huffman tree is a Huffman leaf.""" return not is_leaf(tree) and is_leaf(branches(tree)[0]) def letter(leaf): """The letter of a Huffman leaf.""" return root(branches(leaf)[0]) # Trees (from lecture) def tree(root, branches=[]): for branch in branches: assert is_tree(branch), 'branches must be trees' return [root] + list(branches) def root(tree): return tree[0] def branches(tree): return tree[1:] def is_tree(tree): if type(tree) != list or len(tree) < 1: return False for branch in branches(tree): if not is_tree(branch): return False return True def is_leaf(tree): return not branches(tree) CD = huffman_tree(huffman_leaf('c', 1), huffman_leaf('d', 1)) EF = huffman_tree(huffman_leaf('e', 1), huffman_leaf('f', 1)) GH = huffman_tree(huffman_leaf('g', 1), huffman_leaf('h', 1)) EFGH = huffman_tree(EF, GH) BCD = huffman_tree(huffman_leaf('b', 3), CD) BCDEFGH = huffman_tree(BCD, EFGH) example_tree = huffman_tree(huffman_leaf('a', 8), BCDEFGH) def letters(tree): """Return a list of all letters encoded in Huffman encoding TREE. >>> letters(example_tree) ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] """ "*** YOUR CODE HERE ***" def decode(tree, code): """Decode CODE, a list of 0's and 1's using the Huffman encoding TREE. >>> decode(example_tree, [1, 0, 0, 0, 1, 1, 1, 1]) 'bah' """ word = '' while code: word += decode_one(tree, code) return word def decode_one(tree, code): """Decode and remove the first letter in CODE, using TREE. >>> code = [1, 0, 0, 0, 1, 1, 1, 1] >>> decode_one(example_tree, code) 'b' >>> code # The initial 1, 0, and 0 are removed by decode_one [0, 1, 1, 1, 1] """ "*** YOUR CODE HERE ***" def encodings(tree): """Return all encodings in a TREE as a dictionary that maps symbols to bit lists. >>> e = encodings(example_tree) >>> set(e.keys()) == set('abcdefgh') True >>> e['a'] [0] >>> e['c'] [1, 0, 1, 0] >>> e['h'] [1, 1, 1, 1] """ "*** YOUR CODE HERE ***" def huffman(frequencies): """Return a Huffman encoding for FREQUENCIES, a list of (symbol, frequency) pairs. >>> frequencies = [('a', 8), ('b', 3), ('c', 1), ('d', 1)] >>> h = huffman(frequencies) >>> for letter, code in sorted(encodings(h).items()): ... print(letter + ':', code) a: [1] b: [0, 1] c: [0, 0, 0] d: [0, 0, 1] """ frequencies.sort(key=lambda freq: freq[1]) # lowest frequencies first leaves = [huffman_leaf(letter, freq) for letter, freq in frequencies] "*** YOUR CODE HERE ***" def huffman_wiki(): """Return a Huffman encoding tree for the text of the Huffman coding page on Wikipedia. (Internet connection required!) >>> e = encodings(huffman_wiki()) >>> [[letter, e[letter]] for letter in ['a', 'b', 'c']] [['a', [0, 0, 1, 0]], ['b', [1, 0, 0, 0, 1, 0]], ['c', [0, 1, 0, 1, 1]]] """ from urllib.request import urlopen from json import loads from collections import Counter huff = urlopen('http://goo.gl/w1Jdjj').read().decode() content = loads(huff)['query']['pages']['13883']['revisions'][0]['*'] return huffman(list(Counter(content).items()))