gpt4 book ai didi

swift - 霍夫曼压缩不会导致存储树和填充编码为 0's and 1' 的字节数减少

转载 作者:行者123 更新时间:2023-11-30 10:54:04 26 4
gpt4 key购买 nike

我正在致力于在 Swift 中实现一种名为霍夫曼编码的无损压缩算法。我从使用 "1""0" 字符串的版本开始,但后来了解到单个字符串,例如 "0" 本身占用整个字节。现在,我已将字符串转换为 [UInt8] (请参阅 packBits 函数)。似乎有些人通过编写 BitWriter 和 BitReader 来实现它,我最终可以重构它们,但我认为我的方法至少最终得到相同的 [UInt8] 结果,这肯定会减少HuffData.code 的大小与文本字节的大小,但是我需要存储要遍历的树以及用于解码的垫。由于 encode func 返回 HuffData(一个包含所有三个的结构),我注意到该大小实际上大于我传递给编码的文本。

例如,一个 paragraph.utf8.count 等于 8959 字节,编码后的总数据约为 21099 字节!其中2763是树,5050是代码,2是pad。我认为剩下的来自结构?为什么这些不加起来?

let huff = try? Huffman.encode(paragraph) 

huff.count = 总数据字节

我希望了解为什么这些字节计数没有相加,以及我的代码中的什么原因导致总数明显大于文本。我是否需要使用更大的文本才能看到真正的结果?我可以以更节省内存的方式存储树、代码和填充吗?为什么总数不等于树+代码+垫?序列化数据会增加更多字节吗?谢谢您的建议!

import Foundation

struct HuffData: Codable {
var code: [UInt8]
var tree: Node
var pad: Int
}

class Huffman {
static func decode(_ data: Data) throws -> String {
let huff = try JSONDecoder().decode(HuffData.self, from: data)
var bits: String = ""
// return bits to a string O and 1
for i in huff.code {
var str = String(i, radix: 2)
// if bits originally started with zeros, that was removed e.g. 32
if str.count < 8 {
str = String(repeating: "0", count: 8 - str.count) + str
}
bits += str
}
return Huffman.traverse(tree: huff.tree, with: String(bits.dropLast(huff.pad)))
}

static func encode(_ input: String) throws -> Data {
// count letter frequency
let sortedFrequency = input.reduce(into: [String: Int](), { freq, char in
freq[String(char), default: 0] += 1
})
// create queue of initial Nodes
let queue = sortedFrequency.map{ Node(name: $0.key, value: $0.value)}
// create tree
let tree = Huffman.createTree(with: queue)
// generate key by traversing tree
let key = Huffman.generateKey(for: tree, prefix: "")
// bit packed code
let code = input.compactMap({key[String($0)]}).joined()
let buffer = Huffman.packBits(for: code)
// save data
let huff = HuffData(code: buffer.code, tree: tree, pad: buffer.pad)
let data = try JSONEncoder().encode(huff)
return data
}

static private func generateKey(for node: Node, prefix: String) -> [String: String] {
var key = [String: String]()
if let left = node.left, let right = node.right {
key.merge(generateKey(for: left, prefix: prefix + "0"), uniquingKeysWith: {current,_ in current})
key.merge(generateKey(for: right, prefix: prefix + "1"), uniquingKeysWith: {current,_ in current})
}else {
key[node.name] = prefix
}
return key
}

static private func createTree(with queue: [Node]) -> Node {
// initialize queue that sorts by decreasing count
var queue = PriorityQueue(queue: queue)
// until we have 1 root node, join subtrees of least frequency
while queue.count > 1 {
let node1 = queue.dequeue()
let node2 = queue.dequeue()
let rootNode = Huffman.createRoot(with: node1, and: node2)
queue.enqueue(node: rootNode)
}
return queue.queue[0]
}

static private func traverse(tree: Node, with code: String) -> String {
var result = ""
var node = tree
for bit in code {
if bit == "0", let left = node.left {
node = left
} else if bit == "1", let right = node.right {
node = right
}
if node.left == nil && node.right == nil {
result += node.name
node = tree
}
}
return result
}

static private func createRoot(with first: Node, and second: Node) -> Node {
return Node(name: "\(first.name)\(second.name)", value: first.value + second.value, left: first, right: second)
}

static private func packBits(for s: String) -> (pad: Int, code: [UInt8]) {
var result = [UInt8]()
// pad with extra "0"'s to a length that is exact multiple of 8
let padding = 8 - (s.count % 8)
var bits = s + String(repeating: "0", count: padding)
// convert 8 bits at a time to a byte
while !bits.isEmpty {
result.append(UInt8(bits.prefix(8), radix: 2)!)
bits = String(bits.dropFirst(8))
}
return (pad: padding, code: result)
}
}

struct PriorityQueue {
var queue: [Node]
var count: Int {
return queue.count
}
mutating func enqueue(node: Node) {
queue.insert(node, at: queue.index(where: {$0.value <= node.value}) ?? 0)
}
mutating func dequeue() -> Node {
return queue.removeLast()
}
init(queue: [Node]){
// assumes queue will always be sorted by decreasing count
self.queue = queue.sorted(by: {$0.value > $1.value})
}
}

class Node: CustomStringConvertible, Codable {
var description: String {
return "\(name): \(value)"
}
let name: String
let value: Int
let left: Node?
let right: Node?

init(name: String, value: Int, left: Node? = nil, right: Node? = nil) {
self.name = name
self.value = value
self.left = left
self.right = right
}
}

最佳答案

您正在使用一个 Uint8 = 1 字节 = 每比特 8 位来编码比特序列。所以你的“压缩”比需要的要差 8 倍。

首先创建一个每个字节可以存储 8 位的数据结构。

关于swift - 霍夫曼压缩不会导致存储树和填充编码为 0's and 1' 的字节数减少,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/54152202/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com