手记

Huffman编码使用介绍

1.完整代码——Python语言实现:

#节点类,当树中叶节点最大数为n时,霍夫曼树的总节点数为2n-1
class Node(object):
    def __init__(self,name=None,value=None):
        self._name=name
        self._value=value
        self._left=None
        self._right=None
        self._codevalue='0'

#哈夫曼树类
class HuffmanTree(object):
    #根据Huffman树的思想:以叶子节点为基础,反向建立Huffman树
    def __init__(self,char_weights):
        self.codeway = {}#存储节点对应的编码数,1or0
        self.a=[Node(part[0],part[1]) for part in char_weights]  #根据输入的字符及其频数生成叶子节点
        while len(self.a)!=1:#如果没有到达根节点
            self.a.sort(key=lambda node:node._value,reverse=True)#根据节点的value进行从大到小的排序
            c=Node(value=(self.a[-1]._value+self.a[-2]._value))#对于权值最小的两个节点相加作为新节点c的value
            c._left=self.a.pop(-1)#把排序完的树叶节点挂靠在c上,作为左右节点
            c._right=self.a.pop(-1)
            self.a.append(c)#bac作为节点连在树上
        self.root=self.a[0]
#方法一
    def pre(self, tree, length,l):
        if l==0:
            self.b=[0]*length
        node = tree
        if (not node):
            return
        elif node._name:
            print(node._name + '的编码为:',end=' ')
            for i in range(l):
                print(self.b[i],end=' ')
            print('\n')
            return

        #是树的左子节点的话赋值为0,直到到达叶结点之后print了编码,再对考虑右子树
        self.b[l] = 0
        self.pre(node._left, length,l + 1)
        self.b[l] = 1
        self.pre(node._right,length,l + 1)

#方法二
    # 函数得到每一个叶结点的路径
    def binaryTreePaths(self, root):
        if not root:#如果root不存在
            return []
        if not root._left and not root._right:#如果root就是一个叶节点
            return [str(root._value)]
        pathList = []#用来存储路径
        if root._left:#沿左子树下行递归遍历直到到达了叶节点
            root._left._codevalue = '0'
            pathList+=self.binaryTreePaths(root._left)
            self.codeway[root._left._value]=root._left._codevalue
        if root._right:
            root._right._codevalue='1'
            pathList+=self.binaryTreePaths(root._right)
            self.codeway[root._right._value] = root._right._codevalue
        for index, path in enumerate(pathList):#枚举pathList,得到对应节点路径的存放路径
            pathList[index]=str(root._value) + ' ' + path
        # for index,path in enumerate(self.codeway):

            # self.codeway[index] =str(root._codevalue) + ' ' + path
            # print(root._value,root._codevalue)
            #用这个打印每一节点对应的编码
        return pathList
     #生成哈夫曼编码
    def code_way(self):
        return self.codeway
if __name__=='__main__':
    #输入的是字符及其频数
    char_weights=[('a',9),('b',12),('c',6),('d',3),('e',5),('f',15)]
    tree=HuffmanTree(char_weights)
    length=len(char_weights)
#方法二
    pathList = tree.binaryTreePaths(tree.root)
    codeway=tree.code_way()
    print(codeway)
    print("路径输出为: ",pathList)
    for i in pathList:
        i=i.split(' ')[1:]#根节点不参与编码,所以切掉
        print("数字 ",i[-1],"的编码是:",end='')
        for j in i:
            print(codeway[int(j)],end='')
        print('\n')
#方法一
    tree.pre(tree.root,length,0)

运行结果:

2.完整代码——C语言实现:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>

#define MAX_CHAR_KINDS 128//字符种类最大值。。
#define MAX_NUM 1000//字符串最大长度。。
FILE *in, *ou;

typedef struct TreeNode
{
	int weight;
	int id;
	short isLeaf;
	char data;
	char bin;//0或者1。。 
	struct TreeNode *parent;
	struct TreeNode *lChild, *rChild;
} TreeNode;

typedef struct
{
	char data;
	char code[MAX_CHAR_KINDS];
} Code;

//字符串倒置。。 
void ReverseStr(char *str)
{
    int i;
    char c;
    int length;
    length = strlen(str);
    for (i = 0; i < length / 2; i++)
    {
        c = str[length - 1 - i];
        str[length - 1 - i] = str[i];
        str[i] = c;
    }
}

void PreOrderTraverse(TreeNode *t)
{
    if ((t->rChild == NULL) && (t->lChild == NULL))
    {
        t->isLeaf = 1;
    }
    else
    {
        t->isLeaf = 0;
    }
    if (t->parent != NULL)
    {
        t->id = 2 * t->parent->id + (int)t->bin - 48;
    }
    else
    {
        t->id = 1;
        t->bin = ' ';
    }
    if (t->isLeaf == 1)
    {
        fprintf(ou, "%6d%5c%8d   '%c'\n", t->id, t->bin, t->isLeaf, t->data);
    }
    else
    {
        fprintf(ou, "%6d%5c%8d\n", t->id, t->bin, t->isLeaf);
    }
    if (t->lChild != NULL)
    {
        PreOrderTraverse(t->lChild);
    }
    if (t->rChild != NULL)
    {
        PreOrderTraverse(t->rChild);
    }
}

int main()
{
	char str[MAX_NUM];
	char pwd[MAX_NUM];
	TreeNode *HFTree;
	int i, j;
	int length;//字符串长度。。
	int count;//不同字符个数。。
	TreeNode *tree[MAX_CHAR_KINDS];//初始的几个小树。。
	TreeNode *eachChar[MAX_CHAR_KINDS];
	TreeNode *temp, *p;
	Code *HFCode;
	int codeBit;
	short existed;

	//输入,初始化。。
	printf("Input string to encode:\n"); 
	gets(str);
	printf("\n");
	length = strlen(str);
	count = 0;

	//开始统计字符串中各个字符出现的次数。。
	for (i = 0; i < length; i++)
	{
		existed = 0;
		for (j = 0; j < count; j++)
		{
			if (str[i] == tree[j]->data)
			{
				tree[j]->weight++;
				existed = 1;
				break;
			}
		}
		//如果不是现有的字符,拿个新盒子装。。
		if (existed == 0)
		{
			tree[count] = (TreeNode *)malloc(sizeof(TreeNode));
			tree[count]->weight = 1;
			tree[count]->data = str[i];
			tree[count]->parent = NULL;
			tree[count]->lChild = NULL;
			tree[count]->rChild = NULL;
			eachChar[count] = tree[count];//备份。。 
			count++;
		}
	}
	
	//非法输入。。 
	if (count == 0)
	{
        printf("No char!\n"); 
        getch();
        return (0);
    }
    else if (count == 1)
    {
        printf("At least 2 kinds of char!\n"); 
        getch();
        return (0);
    }

	//冒泡,升序。。
	for (i = 0; i < count - 1; i++)
	{
		for (j = 0; j < count - 1 - i; j++)
		{
			if (tree[j]->weight > tree[j+1]->weight)
			{
				temp = tree[j];
				tree[j] = tree[j+1];
				tree[j+1] = temp;
			}
		}
	}

	//生成Huffman树。。
	for (i = 1; i < count; i++)
	{
        temp = (TreeNode *)malloc(sizeof(TreeNode));
        temp->lChild = tree[i-1];
        temp->rChild = tree[i];
        temp->parent = NULL;
        temp->weight = tree[i-1]->weight + tree[i]->weight;
        tree[i-1]->parent = temp;
        tree[i-1]->bin = '0';
        tree[i]->parent = temp;
        tree[i]->bin = '1';
        tree[i] = temp;
        for (j = i; j < count - 1; j++)
        {
            if (tree[j]->weight > tree[j+1]->weight)
            {
                temp = tree[j];
				tree[j] = tree[j+1];
				tree[j+1] = temp;
            }
            else
            {
                break;
            }
        }
	}
	HFTree = tree[count-1];
	
	//保存Huffman Tree到HuffmanTree.txt文件中。。
	if ((ou = fopen("HuffmanTree.txt", "w")) == NULL)
	{
        printf("Cannot open file HuffmanTree.txt\n");
        getch();
        exit(0);
    }
    fprintf(ou, "%6s%5s%8s%6s\n", "NodeID", "Bin", "IsLeaf", "Data");
    PreOrderTraverse(HFTree);
    fclose(ou);

    //冒泡后求各个字符的哈夫曼编码。。
    for (i = 0; i < count - 1; i++)
	{
		for (j = 0; j < count - 1 - i; j++)
		{
			if (eachChar[j]->weight > eachChar[j+1]->weight)
			{
				temp = eachChar[j];
				eachChar[j] = eachChar[j+1];
				eachChar[j+1] = temp;
			}
		}
	}
    HFCode = (Code *)malloc(count * sizeof(Code));
    for (i = 0; i < count; i++)
    {
        temp = eachChar[i];
        HFCode[i].data = temp->data;
        codeBit = 0;
        while (temp->parent != NULL)
        {
            HFCode[i].code[codeBit] = temp->bin;
            temp = temp->parent;
            codeBit++;
        }
        HFCode[i].code[codeBit] = '\0';
        ReverseStr(HFCode[i].code);
    }
    
	//输出,同时保存到HuffmanCode.txt文件里。。
	if ((ou = fopen("HuffmanCode.txt", "w")) == NULL)
	{
        printf("Cannot open file HuffmanCode.txt\n");
        getch();
        exit(0);
    }
	printf("%5s%8s%15s\n", "char", "count", "Huffman code");
	fprintf(ou, "%5s%8s%15s\n", "char", "count", "Huffman code");
	for (i = 0; i < count; i++)
	{
		printf("  '%c'%8d%15s\n", eachChar[i]->data, eachChar[i]->weight, HFCode[i].code);
		fprintf(ou, "  '%c'%8d%15s\n", eachChar[i]->data, eachChar[i]->weight, HFCode[i].code);
	}
	printf("\n");
	fclose(ou);
    
    //解码。。 
    printf("Input string to decode(only including '0' & '1'):\n");
    gets(str);
    length = strlen(str);
    p = HFTree;
    j = 0;
    codeBit = 0;
    for (i = 0; i < length; i++)
    {
        if (str[i] == '0')
        {
            p = p->lChild;
        }
        else if (str[i] == '1')
        {
            p = p->rChild;
        }
        else
        {
            printf("\nInput illegal..\n");
            getch();
            exit(0);
        }
        if ((p->lChild == NULL) && (p->lChild == NULL))
        {
            pwd[codeBit] = p->data;
            p = HFTree;
            codeBit++;
        }
    }
    pwd[codeBit] = '\0';
    printf("\nThe code is: \"%s\"\n", pwd);
    
    getch();
	return (0);
}

运行结果:

3.步骤分解:

原始text文件:

1.以字典的形式统计每个字母出现的频率:

2.创建Huffman树.首先定义一个节点的类,包含名称,概率,左子节点和右子节点
输入的是上一步输出的概率表
输出的是Huffman树的根节点,因为只要知道根节点,其实整棵树的信息就都知道了

3.根据Huffman树进行编码

运行结果:

4.解码,根据编码返回文本

解码的结果:

完整代码:

def findTheFrequency(text):
    result = dict()
    with open(text,'r') as f:
        for line in f.readlines():
            line = line.lower()
            for i in line:
                if i.isalpha():
                    if i in result:
                        result[i] += 1
                    else:
                        result.update({i:1})
    return result


text = "Huffman_origin.txt"
result = findTheFrequency(text)

class Node:
    def __init__(self):
        self.frequency = 0 
        self.name = None 
        self.lchild = None 
        self.rchild = None 
        self.code   = None 
    #判断self对象是否小于other对象
    def __lt__(self,other):
        return self.frequency < other.frequency

def establishHuffmanTree(info_dict):
    node_list = []
    for i in info_dict:
        a = Node()
        a.frequency = info_dict[i]
        a.name = i 
        node_list.append(a)
    while len(node_list) > 1:
        node_list.sort(reverse = True)
        node_1 = node_list.pop()
        node_2 = node_list.pop()
        new_node = Node()
        new_node.frequency = node_1.frequency + node_2.frequency
        new_node.lchild = node_1
        new_node.rchild = node_2
        node_list.append(new_node)
    return new_node 


base_node = establishHuffmanTree(result)  

def encode(node,rst_dict,code):
    if node.name:
        rst_dict.update({node.name:code})
        return 
    code += '0'
    #回溯思想
    encode(node.lchild,rst_dict,code)
    code = code[:-1]
    code += '1'
    encode(node.rchild,rst_dict,code)
    print (rst_dict)
    return rst_dict

code_dict = encode(base_node,{},'')
code_text = "Huffman_code.txt"

def encode_text(code_dict,text,code_text):
    string = ''
    with open(text,'r') as f:
        for line in f.readlines():
            line = line.lower()
            for i in line:
                if i.isalpha():
                    string += code_dict[i]
                else:
                    string += '\n'
    with open(code_text,'w') as f:
        f.write(string)

encode_text(code_dict,text,code_text)

def decode(text_raw,result_address,base_node):
    text_string = ''
    a = base_node
    with open(text_raw,'r') as f:
        for line in f.readlines():
            for i in line:
                if i == '0':
                    b = a.lchild
                    if b.name:
                        text_string += b.name
                        a = base_node
                    else:
                        a = b 

                elif i == '1':
                    b = a.rchild
                    if b.name:
                        text_string += b.name 
                        a = base_node
                    else:
                        a = b 

                else:
                    text_string += '\n'

    with open(result_address,'w') as f:
        f.write(text_string)

result_address = "Huffman_result.txt"
text_raw = code_text
decode(text_raw,result_address,base_node)







0人推荐
随时随地看视频
慕课网APP