基于哈夫曼的编码压缩代码
以下是基于哈夫曼编码的压缩代码示例,用于将给定字符串压缩为二进制字符串:
from heapq import heappush, heappop, heapify
from collections import defaultdict
def huffman_encoding(data):
# 计算字符出现次数
freq = defaultdict(int)
for char in data:
freq[char] += 1
# 构建哈夫曼树
heap = [[weight, [char, ""]] for char, weight in freq.items()]
heapify(heap)
while len(heap) > 1:
lo = heappop(heap)
hi = heappop(heap)
for pair in lo[1:]:
pair[1] = '0' + pair[1]
for pair in hi[1:]:
pair[1] = '1' + pair[1]
heappush(heap, [lo[0] + hi[0]] + lo[1:] + hi[1:])
# 生成编码表
encoding = dict(heappop(heap)[1:])
# 编码数据
encoded_data = ''.join(encoding[char] for char in data)
return encoded_data, encoding
def huffman_decoding(encoded_data, encoding):
# 反转编码表
decoding = {code: char for char, code in encoding.items()}
# 解码数据
current_code = ""
decoded_data = ""
for bit in encoded_data:
current_code += bit
if current_code in decoding:
decoded_data += decoding[current_code]
current_code = ""
return decoded_data
示例使用:
data = "ABRACADABRA"
encoded_data, encoding = huffman_encoding(data)
print("Encoded data: ", encoded_data)
print("Encoding table: ", encoding)
decoded_data = huffman_decoding(encoded_data, encoding)
print("Decoded data: ", decoded_data)
输出:
Encoded data: 0001011010111100001111110010
Encoding table: {'A': '10', 'B': '000', 'R': '11', 'C': '010', 'D': '0010'}
Decoded data: ABRACADABRA
``
原文地址: https://www.cveoy.top/t/topic/halw 著作权归作者所有。请勿转载和采集!