C语言实现的哈夫曼编码算法详解

#include <stdio.h> #include <stdlib.h> #include <string.h>

// 定义链表节点结构体 typedef struct ListNode { int c; // 存储字符（使用 int 类型，能够正确处理特殊字符） int frequency; // 存储字符出现频度 char *code; // 存储字符编码（在后续实现哈夫曼编码时使用） int order; // 存储节点插入先后顺序 struct ListNode *parent; //结点的双亲结点(对哈夫曼树结点有效) struct ListNode *left; //结点的左子树(对哈夫曼树结点有效) struct ListNode *right; // 结点的右子树(对哈夫曼树结点有效) struct ListNode *next; // 结点的后继结点(对频度链表结点有效) int frequencyCount; // 存储节点个数计数器 } ListNode, HuffmanTree;

// 将一个字符插入到链表中，如果已存在则更新频度 void insert(ListNode *head, int c) { ListNode *p = head; while (p->next != NULL) { if (p->next->c == c) { // 如果已存在该字符，则更新频度 p->next->frequency++; return; } p = p->next; } ListNode *newNode = (ListNode *) malloc(sizeof(ListNode)); // 新建一个节点 newNode->c = c; newNode->frequency = 1; newNode->code = NULL; newNode->next = NULL; newNode->order = ++head->frequencyCount; p->next = newNode; }// 将新节点插入到链表尾部

// 对链表进行选择排序，按照字符频度和插入先后顺序排序 void sort(ListNode *head) { ListNode *p, *q; int tmpc; // 定义临时变量存储字符 int tmpfrequency, tmporder; p = head->next; while(p != NULL) { q = p->next; while(q != NULL) { if(p->frequency < q->frequency || (p->frequency == q->frequency && p->order > q->order)) { // 根据频度和先后顺序进行比较 // 如果 p 节点的频度小于 q 节点的频度，或者 p 和 q 节点的频度相等但 p 的插入先后顺序大于 q 的，则交换两个节点的数据域和指针域 tmpc = q->c; q->c = p->c; p->c = tmpc; tmpfrequency = q->frequency; q->frequency = p->frequency; p->frequency = tmpfrequency; tmporder = q->order; q->order = p->order; p->order = tmporder; p->code += q->code - (q->code = p->code); } q = q->next; } p = p->next; } }

// 建立哈夫曼树 void buildHuffmanTree(ListNode *head) { while (head->next->next != NULL) { // 循环执行直到除根结点外的所有结点都具有双亲指针 ListNode *min1 = head->next; // 频度最小的结点 ListNode *min2 = min1->next; // 频度次小的结点 head->next = min2->next; // 将 min2 从链表中删除 min1->next = NULL; min2->next = NULL; ListNode *newNode = (ListNode *) malloc(sizeof(ListNode)); // 新建一个节点作为 min1 和 min2 的双亲节点 newNode->c = -1; // 标记为非字符节点 newNode->frequency = min1->frequency + min2->frequency; newNode->code = NULL; newNode->parent = NULL; newNode->left = min1; newNode->right = min2; min1->parent = newNode; min2->parent = newNode; ListNode *p = head; while (p->next != NULL && p->next->frequency < newNode->frequency) { // 将新节点插入到频度链表中 p = p->next; } newNode->next = p->next; p->next = newNode; } }

// 先序遍历哈夫曼树，得到各个叶子结点的哈夫曼编码 void getHuffmanCode(HuffmanTree *root, char *code, int len) { if (root->left == NULL && root->right == NULL) { // 叶子结点 root->code = (char *) malloc(sizeof(char) * (len + 1)); strcpy(root->code, code); } else { if (root->left != NULL) { char *leftCode = (char *) malloc(sizeof(char) * (len + 2)); // 左子树编码加上 '0' strcpy(leftCode, code); strcat(leftCode, '0'); getHuffmanCode(root->left, leftCode, len + 1); } if (root->right != NULL) { char *rightCode = (char *) malloc(sizeof(char) * (len + 2)); // 右子树编码加上 '1' strcpy(rightCode, code); strcat(rightCode, '1'); getHuffmanCode(root->right, rightCode, len + 1); } } }

// 按所给出的顺序输出相应的哈夫曼编码 void printHuffmanCode(ListNode *head) { ListNode *p = head->next; while (p != NULL) { if (p->c == '\n') { // 对换行符进行特殊处理 printf(''\n' %s\n', p->code); } else { printf(''%c' %s\n', p->c, p->code); } p = p->next; } }

// 计算哈夫曼树的带权路径长度，权重为字符的频度 int getWeightedPathLength(ListNode *head) { int sum = 0; ListNode *p = head->next; while (p != NULL) { sum += p->frequency * strlen(p->code); p = p->next; } return sum; }

int main() { ListNode *head = (ListNode *) malloc(sizeof(ListNode)); head->next = NULL; head->frequency = 0; int c; while ((c = getchar()) != EOF) { //使用 getchar() 函数读取输入字符流 insert(head, c); } sort(head); buildHuffmanTree(head); getHuffmanCode(head->next, '', 0); printHuffmanCode(head); int weightedPathLength = getWeightedPathLength(head); printf('%d', weightedPathLength); return 0; }