2.70
练习 2.70 下面带有相对频度的8个符号的字母表,是为了有效编码20世纪50年代的摇滚歌曲中的词语而设计的。(请注意,“字母表”中的“符号”不必是单个字母。)
A 2 NA 16
BOOM 1 SHA 3
GET 2 YIP 9
JOB 2 WAH 1
请用(练习2.69的)generate-huffman-tree过程生成对应的Huffman树,用(练习2.68的)encode过程编码下面的消息:
Get a job
Sha na na na na na na na na
Get a job
Sha na na na na na na na na
Wah yip yip yip yip yip yip yip yip yip
Sha boom
这一编码需要多少个二进制位?如果对这8个符号的字母表采用定长编码,完成这个歌曲的编码最少需要多少个二进制位?
make-leaf-set的代码如下:
(define (make-leaf-set pairs)
(if (null? pairs)
'()
(let ((pair (car pairs)))
(adjoin-set
(make-leaf (car pair) (cadr pair))
(make-leaf-set (cdr pairs))
)
)
)
)
x#<undef>
adjoin-set 代码如下:
(define (adjoin-set x set)
(cond
((null? set) (list x))
((<= (weight x) (weight (car set))) (cons x set))
(else (cons (car set) (adjoin-set x (cdr set))))
)
)
#<undef>
上一练习中的代码:
(define (leaf? object) (eq? (car object) 'leaf))
(define (symbol-leaf x) (cadr x))
(define (weight-leaf x) (caddr x))
(define (left-branch tree)
(car tree)
)
(define (right-branch tree)
(if (null? (cdr tree))
'()
(cadr tree)
)
)
(define (encode-symbol-with-code char tree code)
(if (leaf? tree)
(if (eq? (symbol-leaf tree) char)
(list code)
'()
)
(let
(
(left (encode-symbol-with-code char (left-branch tree) '0))
(right (encode-symbol-with-code char (right-branch tree) '1))
)
(let
(
(res (append left right))
)
(if (null? res)
'()
(append (list code) res)
)
)
)
)
)
(define (encode-symbol char tree)
; (encode-symbol-with-code char tree '0)
(if (leaf? tree)
(if (eq? (symbol-leaf tree) char)
(list '0)
'()
)
(let
(
(left-res (encode-symbol-with-code char (left-branch tree) '0))
(right-res (encode-symbol-with-code char (right-branch tree) '1))
)
(append left-res right-res)
)
)
)
#<undef>
decode 的代码如下:
(define (decode bits tree)
(define (decode-1 bits current-branch)
(if (null? bits)
'()
(let
(
(next-branch
(choose-branch
(car bits)
current-branch
)
)
)
(if (leaf? next-branch)
(cons
(symbol-leaf next-branch)
(decode-1 (cdr bits) tree)
)
(decode-1 (cdr bits) next-branch)
)
)
)
)
(decode-1 bits tree)
)
(define (choose-branch bit branch)
(cond
((= bit 0) (left-branch branch))
((= bit 1) (right-branch branch))
(else
(error "bad bit -- CHOOSE-BRANCH" bit)
)
)
)
(define (make-leaf symbol weight)
(list 'leaf symbol weight)
)
(define (leaf? object)
(eq? (car object) 'leaf)
)
(define (symbol-leaf x) (cadr x))
(define (weight-leaf x) (caddr x))
(define (make-code-tree left right)
(list
left
right
(append (symbols left) (symbols right))
(+ (weight left) (weight right))
)
)
(define (symbols tree)
(if (leaf? tree)
(list (symbol-leaf tree))
(caddr tree))
)
(define (weight tree)
(if
(leaf? tree)
(weight-leaf tree)
(cadddr tree)
)
)
(define sample-tree
(make-code-tree
(make-leaf 'A 4)
(make-code-tree
(make-leaf 'B 2)
(make-code-tree
(make-leaf 'D 1)
(make-leaf 'C 1)
)
)
)
)
(define sample-message '(0 1 1 0 0 1 0 1 0 1 1 1 0))
(decode sample-message sample-tree)
('A 'D 'A 'B 'B 'C 'A)
(define sample-tree
(make-code-tree
(make-leaf 'A 4)
(make-code-tree
(make-leaf 'B 2)
(make-code-tree
(make-leaf 'D 1)
(make-leaf 'C 1)
)
)
)
)
(encode-symbol 'A sample-tree)
(0)
测试一下 make-leaf-set:
(define sorted (make-leaf-set (list (list 'A 8) (list 'B' 4) (list 'C' 2) (list 'D 1))))
sorted
(('leaf 'D 1) ('leaf 'C' 2) ('leaf 'B' 4) ('leaf 'A 8))
果然能将对偶表变成叶的有序集合!现在来写successive-merge:
(define (successive-merge sorted-leaf-set)
(if (<= (length sorted-leaf-set) 1)
sorted-leaf-set
(let
(
(left (car sorted-leaf-set))
(right (cadr sorted-leaf-set))
)
(let
(
(new-leaf (make-code-tree left right))
(rest (cddr sorted-leaf-set))
)
(if (null? rest)
new-leaf
(successive-merge
(adjoin-set new-leaf rest)
)
)
)
)
)
)
(successive-merge sorted)
(((('leaf 'D 1) ('leaf 'C' 2) ('D 'C') 3) ('leaf 'B' 4) ('D 'C' 'B') 7) ('leaf 'A 8) ('D 'C' 'B' 'A) 15)
生成 Huffman 树:
(define (generate-huffman-tree pairs)
(successive-merge (make-leaf-set pairs))
)
(define p (list
(list 'A 2)
(list 'NA 16)
(list 'BOOM 1)
(list 'SHA 3)
(list 'GET 2)
(list 'YIP 9)
(list 'JOB 2)
(list 'WAH 1)
))
(define huffman (generate-huffman-tree p))
huffman
(('leaf 'NA 16) (('leaf 'YIP 9) (((('leaf 'BOOM 1) ('leaf 'WAH 1) ('BOOM 'WAH) 2) ('leaf 'A 2) ('BOOM 'WAH 'A) 4) (('leaf 'SHA 3) (('leaf 'GET 2) ('leaf 'JOB 2) ('GET 'JOB) 4) ('SHA 'GET 'JOB) 7) ('BOOM 'WAH 'A 'SHA 'GET 'JOB) 11) ('YIP 'BOOM 'WAH 'A 'SHA 'GET 'JOB) 20) ('NA 'YIP 'BOOM 'WAH 'A 'SHA 'GET 'JOB) 36)
编码
(define (encode message tree)
(if (null? message)
'()
(append
(encode-symbol (car message) tree)
(encode (cdr message) tree)
)
)
)
(define message (list
'GET 'A 'JOB 'SHA 'NA 'NA 'NA 'NA 'NA 'NA 'NA 'NA 'GET 'A 'JOB 'SHA 'NA 'NA 'NA 'NA 'NA 'NA 'NA 'NA 'WAH 'YIP 'YIP 'YIP 'YIP 'YIP 'YIP 'YIP 'YIP 'YIP 'SHA 'BOOM
))
(define m (encode message huffman))
m
(1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 1 1 0 1 1 0 0 0)
需要的二进制位:
(length m)
84
如果采用定长编码,由于一共有 8 个不同的符号,为了能够区别它们,对于每个符号需要至少 3 个二进制位。对于要编码的信息,需要使用的二进制位是该信息长度的 3 倍,即需要:
(* 3 (length message))
108
个二进制位。