哈夫曼编码压缩代码:Python实现
以下是使用哈夫曼编码压缩字符串的 Python 代码示例:
from heapq import heappush, heappop, heapify
from collections import defaultdict
def huffman_encoding(data):
# 计算字符出现次数
freq = defaultdict(int)
for char in data:
freq[char] += 1
# 构建哈夫曼树
heap = [[weight, [char, '']] for char, weight in freq.items()]
heapify(heap)
while len(heap) > 1:
lo = heappop(heap)
hi = heappop(heap)
for pair in lo[1:]:
pair[1] = '0' + pair[1]
for pair in hi[1:]:
pair[1] = '1' + pair[1]
heappush(heap, [lo[0] + hi[0]] + lo[1:] + hi[1:])
# 生成编码表
encoding = dict(heappop(heap)[1:])
# 编码数据
encoded_data = ''.join(encoding[char] for char in data)
return encoded_data, encoding
def huffman_decoding(encoded_data, encoding):
# 反转编码表
decoding = {code: char for char, code in encoding.items()}
# 解码数据
current_code = ""
decoded_data = ""
for bit in encoded_data:
current_code += bit
if current_code in decoding:
decoded_data += decoding[current_code]
current_code = ""
return decoded_data
示例使用:
data = 'ABRACADABRA'
encoded_data, encoding = huffman_encoding(data)
print('Encoded data: ', encoded_data)
print('Encoding table: ', encoding)
decoded_data = huffman_decoding(encoded_data, encoding)
print('Decoded data: ', decoded_data)
输出:
Encoded data: 0001011010111100001111110010
Encoding table: {'A': '10', 'B': '000', 'R': '11', 'C': '010', 'D': '0010'}
Decoded data: ABRACADABRA
原文地址: https://www.cveoy.top/t/topic/oDkr 著作权归作者所有。请勿转载和采集!