Java 实现LZ78压缩算法的示例代码
作者:简简单单OnlineZuozuo
这篇文章主要介绍了Java 实现LZ78压缩算法的示例代码,代码简单易懂,对大家的学习或工作具有一定的参考借鉴价值,需要的朋友可以参考下
LZ78 压缩算法的 Java 实现
1、压缩算法的实现
通过多路搜索树提高检索速度
package com.wretchant.lz78; import java.util.*; /** 多路英文单词查找树 */ class Trie { private TrieNode root; public Trie() { root = new TrieNode(); root.wordEnd = false; } public void insert(String word) { TrieNode node = root; for (int i = 0; i < word.length(); i++) { Character c = word.charAt(i); if (!node.childdren.containsKey(c)) { node.childdren.put(c, new TrieNode()); } node = node.childdren.get(c); } node.wordEnd = true; } public boolean search(String word) { TrieNode node = root; for (int i = 0; i < word.length(); i++) { Character c = word.charAt(i); if (!node.childdren.containsKey(c)) { return false; } node = node.childdren.get(c); } return node.wordEnd; } } class TrieNode { Map<Character, TrieNode> childdren; boolean wordEnd; public TrieNode() { childdren = new HashMap<Character, TrieNode>(); wordEnd = false; } } /** 编码表 */ class Output { private Integer index; private Character character; Output(Integer index, Character character) { this.index = index; this.character = character; } public Integer getIndex() { return index; } public Character getCharacter() { return character; } } class LZencode { @FunctionalInterface interface Encode { List<Output> encode(String message); } /** 构建多路搜索树 */ static Trie buildTree(Set<String> keys) { Trie trie = new Trie(); keys.forEach(trie::insert); return trie; } public static final Encode ENCODE = message -> { // 构建压缩后的编码表 List<Output> outputs = new ArrayList<>(); Map<String, Integer> treeDict = new HashMap<>(); int mLen = message.length(); int i = 0; while (i < mLen) { Set<String> keySet = treeDict.keySet(); // 生成多路搜索树 Trie trie = buildTree(keySet); char messageI = message.charAt(i); String messageIStr = String.valueOf(messageI); // 使用多路树进行搜索 if (!trie.search(messageIStr)) { outputs.add(new Output(0, messageI)); treeDict.put(messageIStr, treeDict.size() + 1); i++; } else if (i == mLen - 1) { outputs.add(new Output(treeDict.get(messageIStr), ' ')); i++; } else { for (int j = i + 1; j < mLen; j++) { String substring = message.substring(i, j + 1); String str = message.substring(i, j); // 使用多路树进行搜索 if (!trie.search(substring)) { outputs.add(new Output(treeDict.get(str), message.charAt(j))); treeDict.put(substring, treeDict.size() + 1); i = j + 1; break; } if (j == mLen - 1) { outputs.add(new Output(treeDict.get(substring), ' ')); i = j + 1; } } } } return outputs; }; }
2、解压缩算法的实现
package com.wretchant.lz78; import java.util.HashMap; import java.util.List; import java.util.Map; public class LZdecode { @FunctionalInterface interface Decode { /** @param outputs 编码表 @return 解码后的字符串 */ String decode(List<Output> outputs); } /** 根据编码表进行解码 */ public static final Decode DECODE = (List<Output> outputs) -> { StringBuilder unpacked = new StringBuilder(); Map<Integer, String> treeDict = new HashMap<>(); for (Output output : outputs) { Integer index = output.getIndex(); Character character = output.getCharacter(); if (index == 0) { unpacked.append(character); treeDict.put(treeDict.size() + 1, character.toString()); continue; } String term = "" + treeDict.get(index) + character; unpacked.append(term); treeDict.put(treeDict.size() + 1, term); } return unpacked.toString(); }; }
3、测试和使用
package com.wretchant.lz78; import java.io.InputStream; import java.util.List; import java.util.Scanner; import java.util.function.ToIntFunction; public class LZpack { public static final ToIntFunction<List<Output>> DICT_PRINT = outputs -> { outputs.forEach(output -> { System.out.println("index :" + output.getIndex() + " char :" + output.getCharacter()); }); return 1; }; public static void main(String[] args) { Scanner scanner = new Scanner(System.in); System.out.println("Please input text "); String input = scanner.nextLine(); LZencode.Encode encode = LZencode.ENCODE; List<Output> outputs = encode.encode(input); DICT_PRINT.applyAsInt(outputs); } }
测试结果如下
4、Python 版本的实现代码
def compress(message): tree_dict, m_len, i = {}, len(message), 0 while i < m_len: # case I if message[i] not in tree_dict.keys(): yield (0, message[i]) tree_dict[message[i]] = len(tree_dict) + 1 i += 1 # case III elif i == m_len - 1: yield (tree_dict.get(message[i]), '') i += 1 else: for j in range(i + 1, m_len): # case II if message[i:j + 1] not in tree_dict.keys(): yield (tree_dict.get(message[i:j]), message[j]) tree_dict[message[i:j + 1]] = len(tree_dict) + 1 i = j + 1 break # case III elif j == m_len - 1: yield (tree_dict.get(message[i:j + 1]), '') i = j + 1 def uncompress(packed): unpacked, tree_dict = '', {} for index, ch in packed: if index == 0: unpacked += ch tree_dict[len(tree_dict) + 1] = ch else: term = tree_dict.get(index) + ch unpacked += term tree_dict[len(tree_dict) + 1] = term return unpacked if __name__ == '__main__': messages = ['ABBCBCABABCAABCAAB', 'BABAABRRRA', 'AAAAAAAAA'] for m in messages: pack = compress(m) unpack = uncompress(pack) print(unpack == m)
到此这篇关于Java 实现LZ78压缩算法的文章就介绍到这了,更多相关Java LZ78压缩算法内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家!