基于哈夫曼的编码压缩代码
以下是基于哈夫曼编码的压缩代码示例,用于将给定字符串压缩为二进制字符串:
from heapq import heappush, heappop, heapify
from collections import defaultdict
def huffman_encoding(data):
    # 计算字符出现次数
    freq = defaultdict(int)
    for char in data:
        freq[char] += 1
    # 构建哈夫曼树
    heap = [[weight, [char, ""]] for char, weight in freq.items()]
    heapify(heap)
    while len(heap) > 1:
        lo = heappop(heap)
        hi = heappop(heap)
        for pair in lo[1:]:
            pair[1] = '0' + pair[1]
        for pair in hi[1:]:
            pair[1] = '1' + pair[1]
        heappush(heap, [lo[0] + hi[0]] + lo[1:] + hi[1:])
    # 生成编码表
    encoding = dict(heappop(heap)[1:])
    # 编码数据
    encoded_data = ''.join(encoding[char] for char in data)
    return encoded_data, encoding
def huffman_decoding(encoded_data, encoding):
    # 反转编码表
    decoding = {code: char for char, code in encoding.items()}
    # 解码数据
    current_code = ""
    decoded_data = ""
    for bit in encoded_data:
        current_code += bit
        if current_code in decoding:
            decoded_data += decoding[current_code]
            current_code = ""
    return decoded_data
示例使用:
data = "ABRACADABRA"
encoded_data, encoding = huffman_encoding(data)
print("Encoded data: ", encoded_data)
print("Encoding table: ", encoding)
decoded_data = huffman_decoding(encoded_data, encoding)
print("Decoded data: ", decoded_data)
输出:
Encoded data:  0001011010111100001111110010
Encoding table:  {'A': '10', 'B': '000', 'R': '11', 'C': '010', 'D': '0010'}
Decoded data:  ABRACADABRA
``
原文地址: https://www.cveoy.top/t/topic/halw 著作权归作者所有。请勿转载和采集!