Compression algorithm (Compression)
Run-length Encoding (RLE)
RLE is probably the easiest way to do compression. The assumed data are as follows:
aaaaabbbcdeeeeeeef...
Copy the code
RLE then encodes it as:
5a3b1c1d7e1f...
Copy the code
When data has many duplicate bytes, RLE can save considerable space. It works fine on images.
The compression code
extension Data {
public func compressRLE(a) -> Data {
var data = Data(a)self.withUnsafeBytes { (uPtr: UnsafePointer<UInt8>) in
var ptr = uPtr
let end = ptr + count
while ptr < end {
var count = 0
var byte = ptr.pintee
var next = byte
while next = = byte && ptr < end && count < 64 {
ptr = ptr.advanced(by: 1)
next = ptr.pointee
count + = 1
}
if count > 1 || byte > = 192 {
var size = 191 + UInt8(count)
data.append(&size, count: 1)
data.append(&size, count: 1)}else {
data.append(&byte, count: 1)}}}return data
}
}
Copy the code
Decompression code
public func decompressRLE(a) -> Data {
var data = Data(a)self.withUnsafeBytes { (uPtr: UnsafePointer<UInt8>) in
var ptr = uPtr
let end = ptr + count
while ptr < end {
// Read the next byte. This is either a single value less than 192,
// or the start of a byte run.
var byte = ptr.pointee
ptr = ptr.advanced(by: 1)
if byte < 192 {
data.append(&byte, count: 1)}else if ptr < end {
// Read the actual data value.
var value = ptr.pointee
ptr = ptr.advanced(by: 1)
// And write it out repeatedly.
for _ in 0 ..< byte - 191 {
data.append(&value, count: 1)}}}}return data
}
Copy the code
Huffman Coding
An example is the most intuitive and simple illustration
Suppose you have the following text
so much words wow many compression
Copy the code
Calculate the frequency of each byte
space: 5 u: 1
0: 5 h: 1
s: 4 d: 1
m: 3 a: 1
w: 3 y: 1
c: 2 p: 1
r: 2 e: 1
n: 2 i: 1
Copy the code
You can code it like this
sapce: 5 010 u: 1 11001
0: 5 000 h: 1 10001
s: 4 101 d: 1 11010
m: 3 111 a: 1 11011
w: 3 0010 y: 1 01111
c: 2 0011 p: 1 11000
r: 2 1001 e: 1 01110
n: 2 0110 i: 1 10000
Copy the code
101 000 010 111 11001 0011 10001 010 0010 000 1001 11010 101
s o _ m u c h _ w o r d s
010 0010 000 0010 010 111 11011 0110 01111 010 0011 000 111
_ w o w _ m a n y _ c o m
11000 1001 01110 101 101 10000 000 0110 0
p r e s s i o n
Copy the code
The use of Huffman codes on small inputs is disadvantageous.
The auxiliary code
The minimum data that NSData understands is bytes, but we’re dealing with bits, so we need to convert between the two
Npublic class BitWriter {
public var data = NSMutableData(a)var outByte: UInt8 = 0
var outCount = 0
public func writeBit(bit: Bool) {
if outCount = = 8 {
data.append(&outByte, length: 1)
outCount = 0
}
outByte = (outByte << 1) | (bit ? 1 : 0)
outCount + = 1
}
public func flush(a) {
if outCount > 0 {
if outCount < 8 {
let diff = UInt8(8 - outCount)
outByte < < = diff
}
data.append(&outByte, length: 1)}}}Copy the code
Used to read bits from NSData:
public class BitReader {
var ptr: UnsafePointer<UInt8>
var inByte: UInt8 = 0
var inCount = 8
public init(data: NSData) {
ptr = data.bytes.assumingMemoryBound(to: UInt8.self)}public func readBit(a) -> Bool {
if inCount = = 8 {
inByte = ptr.pointee // load the next byte
inCount = 0
ptr = ptr.successor()
}
let bit = inByte & 0x80 // read the next bit
inByte < < = 1
inCount + = 1
return bit = = 0 ? false : true}}Copy the code
class Huffman {
typealias NodeIndex = Int
struct Node {
var count = 0
var index: NodeIndex = -1
var parent: NodeIndex = -1
var left: NodeIndex = -1
var right: NodeIndex = -1
}
var tree = [Node](repeating: Node(), count: 256)
var root: NodeIndex = -1
}
Copy the code
Calculate the frequency of each byte in the input data
fileprivate func countByteFrequency(inData data: NSData) {
var ptr = data.bytes.assumingMemoryBound(to: UInt8.self)
for _ in 0..<data.length {
let i = Int(ptr.pointee)
tree[i].count + = 1
tree[i].index = i
ptr = ptr.successor()
}
}
Copy the code
Export frequency table
struct Freq {
var byte: UInt8 = 0
var count = 0
}
func frequencyTable(a)- > [Freq] {
var a = [Freq] ()for i in 0..<256 where tree[i].count > 0 {
a.append(Freq(byte: UInt8(i), count: tree[i].count))
}
return a
}
Copy the code
Use a priority queue
fileprivate func buildTree(a) {
var queue = PriortyQueue<Node>(sort: { $0.count < The $1.count })
for node in tree where node.count > 0 {
queue.enqueue(node)
}
while queue.count > 1 {
let node1 = queue.dequeue()!
let node2 = queue.dequeue()!
var parentNode = Node()
parentNode.count = node1.count + node2.count
parentNode.left = node1.index
parentNode.right = node2.index
parentNode.index = tree.count
tree.append(parentNode)
tree[node1.index].parent = parentNode.index
tree[node2.index].parent = parentNode.index
queue.enqueue(parentNode)
}
let rootNode = queue.dequeue()!
root = rootNode.index
}
Copy the code
Now that we know how to build a compression tree from the frequency table, we can use it to compress the contents of the NSData object.
public func compressData(data: NSData) -> NSData {
countByteFrequency(inData: data)
buildTree()
let writer = BitWriter(a)var ptr = data.bytes.assumingMemoryBound(to: UInt8.self)
for _ in 0..<data.length {
let c = ptr.pointee
let i = Int(c)
traverseTree(writer: writer, nodeIndex: i, childIndex: -1)
ptr = ptr.successor()
}
writer.flush()
return writer.data
}
Copy the code
Note: Compression always involves traversing the entire input data twice: first building the frequency table and then converting the bytes into a compressed sequence of bits.
Something interesting happens in traverseTree(). This is a recursive approach:
private func traverseTree(writer: BitWriter.nodeIndex h: Int.childIndex child: Int) {
if tree[h].parent ! = -1 {
traverseTree(writer: writer, nodeIndex: tree[h].parent, childIndex: h)
}
if child ! = -1 {
if child = = tree[h].left {
writer.writeBit(bit: true)}else if child = = tree[h].right {
writer.writeBit(bit: false)}}}Copy the code
Use the compressData() method like this:
let s1 = "so much words wow many compression"
if let originalData = s1.dataUsingEncoding(NSUTF8StringEncoding) {
let huffman1 = Huffman(a)let compressedData = huffman1.compressData(originalData)
print(compressedData.length)
}
Copy the code
unzip
First we need some methods to convert the [Freq] array back to the compression tree:
fileprivate func restoreTree(fromTable frequencyTable: [Freq]) {
for freq in frequencyTable {
let i = Int(freq.byte)
tree[i].count = freq.count
tree[i].index = i
}
buildTree()
}
Copy the code
func decompressData(data: NSData.frequencyTable: [Freq]) -> NSData {
restoreTree(fromTable: frequencyTable)
let reader = BitReader(data: data)
let outData = NSMutableData(a)let byteCount = tree[root].count
var i = 0
while i < byteCount {
var b = findLeafNode(reader: reader, nodeIndex: root)
outData.append(&b, length: 1)
i + = 1
}
return outData
}
Copy the code
Also use helper methods to traverse the tree
private func findLeafNode(reader reader: BitReader.nodeIndex: Int) -> UInt8 {
var h = nodeIndex
while tree[h].right ! = -1 {
if reader.readBit() {
h = tree[h].left
} else {
h = tree[h].right
}
}
return UInt8(h)
}
Copy the code
How to use the decompression method:
let frequencyTable = huffman1.frequencyTable()
let huffman2 = Huffman(a)let decompressedData = huffman2.decompressData(compressedData, frequencyTable: frequencyTable: frequencyTable)
let s2 = String(data: decompressedData, encoding: NSUTF8StringEncoding)!
Copy the code