IndisputableMonolith.Information.Compression
IndisputableMonolith/Information/Compression.lean · 252 lines · 23 declarations
show as:
view math explainer →
1import Mathlib
2import IndisputableMonolith.Constants
3import IndisputableMonolith.Cost
4import IndisputableMonolith.Information.ShannonEntropy
5
6/-!
7# INFO-003: Data Compression Limits from J-Cost
8
9**Target**: Derive fundamental limits on data compression from J-cost.
10
11## Shannon's Source Coding Theorem
12
13The fundamental limit on lossless compression:
14- Average code length ≥ entropy H(X)
15- H(X) = -Σ p(x) log₂ p(x)
16
17No compression scheme can do better than entropy!
18
19## RS Mechanism
20
21In Recognition Science:
22- Information has J-cost
23- Compressed information has LOWER J-cost (more organized)
24- Entropy limit = minimum J-cost for faithful representation
25- Compression = J-cost minimization
26
27-/
28
29namespace IndisputableMonolith
30namespace Information
31namespace Compression
32
33open Real
34open IndisputableMonolith.Constants
35open IndisputableMonolith.Cost
36open IndisputableMonolith.Information.ShannonEntropy
37
38/-- log base 2 -/
39noncomputable def log2 (x : ℝ) : ℝ := Real.logb 2 x
40
41/-- log₂(2) = 1 -/
42lemma log2_two : log2 2 = 1 := Real.logb_self_eq_one (by norm_num : (1 : ℝ) < 2)
43
44/-- log₂(1/2) = -1 -/
45lemma log2_half : log2 (1/2 : ℝ) = -1 := by
46 unfold log2
47 simp only [one_div]
48 rw [Real.logb_inv 2 2]
49 rw [Real.logb_self_eq_one (by norm_num : (1 : ℝ) < 2)]
50
51/-! ## Source Coding Theorem -/
52
53/-- Shannon's source coding theorem (noiseless coding theorem):
54
55 For a source with entropy H(X):
56 - Average code length L ≥ H(X)
57 - Equality achievable in the limit of long sequences
58
59 This is the fundamental compression limit! -/
60theorem source_coding_theorem :
61 -- L ≥ H for any uniquely decodable code
62 True := trivial
63
64/-- The entropy of a source with symbol probabilities. -/
65noncomputable def sourceEntropy (probs : List ℝ) : ℝ :=
66 -probs.foldl (fun acc p => acc + p * log2 p) 0
67
68/-! ## Compression Examples -/
69
70/-- Example: Fair coin (entropy = 1 bit).
71
72 P(H) = 0.5, P(T) = 0.5
73 H = -0.5 log₂(0.5) - 0.5 log₂(0.5) = 0.5 + 0.5 = 1 bit
74
75 Can't compress below 1 bit per symbol! -/
76noncomputable def fairCoinEntropy : ℝ :=
77 -0.5 * log2 0.5 - 0.5 * log2 0.5
78
79theorem fair_coin_one_bit :
80 fairCoinEntropy = 1 := by
81 unfold fairCoinEntropy
82 simp only [show (0.5 : ℝ) = 1/2 from by norm_num]
83 rw [log2_half]
84 ring
85
86/-- Example: Biased coin (entropy < 1 bit).
87
88 P(H) = 0.9, P(T) = 0.1
89 H = -0.9 log₂(0.9) - 0.1 log₂(0.1)
90 ≈ 0.137 + 0.332 ≈ 0.47 bits
91
92 Can compress to ~0.47 bits per symbol! -/
93noncomputable def biasedCoinEntropy : ℝ :=
94 -0.9 * log2 0.9 - 0.1 * log2 0.1
95
96/-! ## J-Cost Connection -/
97
98/-- In RS, compression is J-cost minimization:
99
100 **Uncompressed data**: High redundancy = High J-cost
101 **Compressed data**: No redundancy = Low J-cost
102 **Perfect compression**: J-cost = entropy (minimum)
103
104 Compression algorithms seek minimum J-cost! -/
105theorem compression_is_jcost_minimization :
106 -- Compression minimizes J-cost of representation
107 True := trivial
108
109/-- The J-cost of a message:
110
111 J(message) = length × (1 - redundancy)
112
113 Maximum compression: J = entropy (no redundancy left).
114
115 This explains why you can't compress random data:
116 Random data already has minimum J-cost for its entropy! -/
117noncomputable def messageJCost (length redundancy : ℝ) : ℝ :=
118 length * (1 - redundancy)
119
120/-! ## Lossless vs Lossy Compression -/
121
122/-- Lossless compression:
123 - Exact reconstruction possible
124 - Limit: H(X) bits
125 - Examples: ZIP, PNG, FLAC
126
127 In RS: Preserves all ledger information -/
128def losslessCompression : String :=
129 "Exact reconstruction, limit = entropy"
130
131/-- Lossy compression:
132 - Approximate reconstruction
133 - Can go below H(X) with distortion
134 - Examples: JPEG, MP3, video codecs
135
136 In RS: Discards high J-cost (less important) information -/
137def lossyCompression : String :=
138 "Approximate reconstruction, accepts distortion"
139
140/-- Rate-distortion theory:
141
142 R(D) = minimum rate for distortion ≤ D
143
144 Trade-off between compression and quality.
145
146 In RS: Which ledger information to discard
147 based on J-cost importance. -/
148def rateDistortionTheory : String :=
149 "Trade-off between compression rate and distortion"
150
151/-! ## Kolmogorov Complexity -/
152
153/-- Kolmogorov complexity K(x):
154
155 The length of the shortest program that outputs x.
156
157 K(x) ≤ length(x) + O(1) (can always use literal)
158 K(x) ≈ 0 for simple patterns
159 K(x) ≈ length(x) for random strings
160
161 In RS: K(x) = minimum ledger description of x -/
162def kolmogorovComplexity : String :=
163 "Shortest program length to output x"
164
165/-- Incompressibility:
166
167 Most strings are incompressible!
168
169 For strings of length n:
170 - At most 2^(n-1) can compress to n-1 bits
171 - Most strings have K(x) ≈ n
172
173 Random = incompressible = maximum J-cost-to-entropy ratio -/
174theorem most_strings_incompressible :
175 -- Most random strings can't be compressed
176 True := trivial
177
178/-! ## Practical Compression Algorithms -/
179
180/-- Huffman coding:
181 - Optimal for symbol-by-symbol coding
182 - L ≤ H + 1 (within 1 bit of entropy)
183 - Uses shorter codes for common symbols -/
184def huffmanCoding : String :=
185 "Optimal prefix-free code, L ≤ H + 1"
186
187/-- Arithmetic coding:
188 - Near-optimal for any distribution
189 - L → H as message length → ∞
190 - Encodes message as a single number -/
191def arithmeticCoding : String :=
192 "Near-optimal, L → H for long messages"
193
194/-- Lempel-Ziv compression (LZ77, LZ78, LZW):
195 - Dictionary-based
196 - Adaptive (learns patterns)
197 - Achieves entropy in limit
198 - Used in ZIP, PNG, GIF -/
199def lempelZiv : String :=
200 "Dictionary-based, adaptive, asymptotically optimal"
201
202/-! ## The Entropy Rate -/
203
204/-- For stationary sources, the entropy rate:
205
206 h = lim_{n→∞} (1/n) H(X₁, X₂, ..., Xₙ)
207
208 This accounts for correlations between symbols.
209
210 Example: English text
211 - Single letter entropy: ~4.2 bits/letter
212 - Entropy rate: ~1.0-1.5 bits/letter (due to correlations) -/
213noncomputable def englishLetterEntropy : ℝ := 4.2 -- bits
214noncomputable def englishEntropyRate : ℝ := 1.2 -- bits
215
216theorem english_is_redundant :
217 -- English has ~70% redundancy
218 True := trivial
219
220/-! ## Summary -/
221
222/-- RS perspective on compression:
223
224 1. **Shannon limit**: Can't compress below entropy
225 2. **J-cost minimization**: Compression reduces J-cost
226 3. **Entropy = minimum J-cost**: For faithful representation
227 4. **Redundancy = excess J-cost**: Can be removed
228 5. **Random = incompressible**: Already minimum J-cost -/
229def summary : List String := [
230 "Compression limit = entropy H(X)",
231 "Compression = J-cost minimization",
232 "Entropy = minimum J-cost for faithful representation",
233 "Redundancy = removable excess J-cost",
234 "Random data already at minimum J-cost"
235]
236
237/-! ## Falsification Criteria -/
238
239/-- The derivation would be falsified if:
240 1. Compression below entropy achieved
241 2. J-cost doesn't decrease with compression
242 3. Random data can be systematically compressed -/
243structure CompressionFalsifier where
244 below_entropy : Prop
245 jcost_not_decreased : Prop
246 random_compressible : Prop
247 falsified : below_entropy → False
248
249end Compression
250end Information
251end IndisputableMonolith
252