IndisputableMonolith.Cost
IndisputableMonolith/Cost.lean · 649 lines · 61 declarations
show as:
view math explainer →
1import Mathlib
2
3namespace IndisputableMonolith
4namespace Cost
5
6noncomputable def Jcost (x : ℝ) : ℝ := (x + x⁻¹) / 2 - 1
7
8structure CostRequirements (F : ℝ → ℝ) : Prop where
9 symmetric : ∀ {x}, 0 < x → F x = F x⁻¹
10 unit0 : F 1 = 0
11
12lemma Jcost_unit0 : Jcost 1 = 0 := by
13 simp [Jcost]
14
15/-- J(x) expressed as a squared ratio: J(x) = (x-1)²/(2x). -/
16lemma Jcost_eq_sq {x : ℝ} (hx : x ≠ 0) :
17 Jcost x = (x - 1) ^ 2 / (2 * x) := by
18 unfold Jcost
19 field_simp [hx]
20 ring
21
22lemma Jcost_symm {x : ℝ} (hx : 0 < x) : Jcost x = Jcost x⁻¹ := by
23 have hx0 : x ≠ 0 := ne_of_gt hx
24 rw [Jcost_eq_sq hx0, Jcost_eq_sq (inv_ne_zero hx0)]
25 field_simp [hx0]
26 ring
27
28/-- J(x) ≥ 0 for positive x (AM-GM inequality) -/
29lemma Jcost_nonneg {x : ℝ} (hx : 0 < x) : 0 ≤ Jcost x := by
30 have hx0 : x ≠ 0 := hx.ne'
31 rw [Jcost_eq_sq hx0]
32 positivity
33
34def AgreesOnExp (F : ℝ → ℝ) : Prop := ∀ t : ℝ, F (Real.exp t) = Jcost (Real.exp t)
35
36@[simp] lemma Jcost_exp (t : ℝ) :
37 Jcost (Real.exp t) = ((Real.exp t) + (Real.exp (-t))) / 2 - 1 := by
38 have h : (Real.exp t)⁻¹ = Real.exp (-t) := by
39 symm; simp [Real.exp_neg t]
40 simp [Jcost, h]
41
42class SymmUnit (F : ℝ → ℝ) : Prop where
43 symmetric : ∀ {x}, 0 < x → F x = F x⁻¹
44 unit0 : F 1 = 0
45
46class AveragingAgree (F : ℝ → ℝ) : Prop where
47 agrees : AgreesOnExp F
48
49class AveragingDerivation (F : ℝ → ℝ) : Prop extends SymmUnit F where
50 agrees : AgreesOnExp F
51
52lemma even_on_log_of_symm {F : ℝ → ℝ} [SymmUnit F] (t : ℝ) :
53 F (Real.exp t) = F (Real.exp (-t)) := by
54 have hx : 0 < Real.exp t := Real.exp_pos t
55 simpa [Real.exp_neg] using (SymmUnit.symmetric (F:=F) hx)
56
57class AveragingBounds (F : ℝ → ℝ) : Prop extends SymmUnit F where
58 upper : ∀ t : ℝ, F (Real.exp t) ≤ Jcost (Real.exp t)
59 lower : ∀ t : ℝ, Jcost (Real.exp t) ≤ F (Real.exp t)
60
61theorem agrees_on_exp_of_bounds {F : ℝ → ℝ} [AveragingBounds F] :
62 AgreesOnExp F := by
63 intro t
64 have h₁ := AveragingBounds.upper (F:=F) t
65 have h₂ := AveragingBounds.lower (F:=F) t
66 have : F (Real.exp t) = Jcost (Real.exp t) := le_antisymm h₁ h₂
67 simpa using this
68
69theorem F_eq_J_on_pos_alt (F : ℝ → ℝ)
70 (hAgree : AgreesOnExp F) : ∀ {x : ℝ}, 0 < x → F x = Jcost x := by
71 intro x hx
72 have : ∃ t, Real.exp t = x := ⟨Real.log x, by simpa using Real.exp_log hx⟩
73 rcases this with ⟨t, rfl⟩
74 simpa using hAgree t
75
76instance (priority := 90) averagingDerivation_of_bounds {F : ℝ → ℝ} [AveragingBounds F] :
77 AveragingDerivation F :=
78 { toSymmUnit := (inferInstance : SymmUnit F)
79 , agrees := agrees_on_exp_of_bounds (F:=F) }
80
81def mkAveragingBounds (F : ℝ → ℝ)
82 (symm : SymmUnit F)
83 (upper : ∀ t : ℝ, F (Real.exp t) ≤ Jcost (Real.exp t))
84 (lower : ∀ t : ℝ, Jcost (Real.exp t) ≤ F (Real.exp t)) :
85 AveragingBounds F :=
86{ toSymmUnit := symm, upper := upper, lower := lower }
87
88class JensenSketch (F : ℝ → ℝ) : Prop extends SymmUnit F where
89 axis_upper : ∀ t : ℝ, F (Real.exp t) ≤ Jcost (Real.exp t)
90 axis_lower : ∀ t : ℝ, Jcost (Real.exp t) ≤ F (Real.exp t)
91
92instance (priority := 95) averagingBounds_of_jensen {F : ℝ → ℝ} [JensenSketch F] :
93 AveragingBounds F :=
94 mkAveragingBounds F (symm := (inferInstance : SymmUnit F))
95 (upper := JensenSketch.axis_upper (F:=F))
96 (lower := JensenSketch.axis_lower (F:=F))
97
98noncomputable def JensenSketch.of_log_bounds (F : ℝ → ℝ)
99 (symm : SymmUnit F)
100 (upper_log : ∀ t : ℝ, F (Real.exp t) ≤ ((Real.exp t + Real.exp (-t)) / 2 - 1))
101 (lower_log : ∀ t : ℝ, ((Real.exp t + Real.exp (-t)) / 2 - 1) ≤ F (Real.exp t)) :
102 JensenSketch F :=
103{ toSymmUnit := symm
104, axis_upper := by intro t; simpa [Jcost_exp] using upper_log t
105, axis_lower := by intro t; simpa [Jcost_exp] using lower_log t }
106
107noncomputable def F_ofLog (G : ℝ → ℝ) : ℝ → ℝ := fun x => G (Real.log x)
108
109class LogModel (G : ℝ → ℝ) where
110 even_log : ∀ t : ℝ, G (-t) = G t
111 base0 : G 0 = 0
112 upper_cosh : ∀ t : ℝ, G t ≤ ((Real.exp t + Real.exp (-t)) / 2 - 1)
113 lower_cosh : ∀ t : ℝ, ((Real.exp t + Real.exp (-t)) / 2 - 1) ≤ G t
114
115instance (G : ℝ → ℝ) [LogModel G] : SymmUnit (F_ofLog G) :=
116 { symmetric := by
117 intro x hx
118 have hlog : Real.log (x⁻¹) = - Real.log x := by
119 simp
120 dsimp [F_ofLog]
121 have he : G (Real.log x) = G (- Real.log x) := by
122 simpa using (LogModel.even_log (G:=G) (Real.log x)).symm
123 simpa [hlog]
124 using he
125 , unit0 := by
126 dsimp [F_ofLog]
127 simpa [Real.log_one] using (LogModel.base0 (G:=G)) }
128
129instance (priority := 90) (G : ℝ → ℝ) [LogModel G] : JensenSketch (F_ofLog G) :=
130 JensenSketch.of_log_bounds (F:=F_ofLog G)
131 (symm := (inferInstance : SymmUnit (F_ofLog G)))
132 (upper_log := by
133 intro t
134 dsimp [F_ofLog]
135 simpa using (LogModel.upper_cosh (G:=G) t))
136 (lower_log := by
137 intro t
138 dsimp [F_ofLog]
139 simpa using (LogModel.lower_cosh (G:=G) t))
140
141theorem agree_on_exp_extends {F : ℝ → ℝ}
142 (hAgree : AgreesOnExp F) : ∀ {x : ℝ}, 0 < x → F x = Jcost x := by
143 intro x hx
144 have : F (Real.exp (Real.log x)) = Jcost (Real.exp (Real.log x)) := hAgree (Real.log x)
145 simp [Real.exp_log hx] at this
146 exact this
147
148theorem F_eq_J_on_pos {F : ℝ → ℝ}
149 (hAgree : AgreesOnExp F) :
150 ∀ {x : ℝ}, 0 < x → F x = Jcost x :=
151 agree_on_exp_extends (F:=F) hAgree
152
153theorem F_eq_J_on_pos_of_averaging {F : ℝ → ℝ} [AveragingAgree F] :
154 ∀ {x : ℝ}, 0 < x → F x = Jcost x :=
155 F_eq_J_on_pos (hAgree := AveragingAgree.agrees (F:=F))
156
157theorem agrees_on_exp_of_symm_unit (F : ℝ → ℝ) [AveragingDerivation F] :
158 AgreesOnExp F := AveragingDerivation.agrees (F:=F)
159
160theorem F_eq_J_on_pos_of_derivation (F : ℝ → ℝ) [AveragingDerivation F] :
161 ∀ {x : ℝ}, 0 < x → F x = Jcost x :=
162 F_eq_J_on_pos (hAgree := agrees_on_exp_of_symm_unit F)
163
164theorem T5_cost_uniqueness_on_pos {F : ℝ → ℝ} [JensenSketch F] :
165 ∀ {x : ℝ}, 0 < x → F x = Jcost x :=
166by
167 intro x hx
168 have hAgree : AgreesOnExp F := by
169 intro t
170 exact le_antisymm (JensenSketch.axis_upper (F:=F) t) (JensenSketch.axis_lower (F:=F) t)
171 exact (agree_on_exp_extends (F:=F) hAgree) hx
172
173noncomputable def Jlog (t : ℝ) : ℝ := Jcost (Real.exp t)
174
175lemma Jlog_as_cosh (t : ℝ) : Jlog t = Real.cosh t - 1 := by
176 unfold Jlog Jcost
177 rw [Real.cosh_eq, inv_eq_one_div, Real.exp_neg]
178 ring
179
180lemma hasDerivAt_Jlog (t : ℝ) : HasDerivAt Jlog (Real.sinh t) t := by
181 have h1 : Jlog = fun t => Real.cosh t - 1 := by
182 ext t
183 exact Jlog_as_cosh t
184 rw [h1]
185 have h_cosh : HasDerivAt Real.cosh (Real.sinh t) t := Real.hasDerivAt_cosh t
186 have h_const : HasDerivAt (fun _ => (1 : ℝ)) 0 t := hasDerivAt_const t 1
187 have h_sub := h_cosh.sub h_const
188 simp at h_sub
189 exact h_sub
190
191@[simp] lemma hasDerivAt_Jlog_zero : HasDerivAt Jlog 0 0 := by
192 simpa using (hasDerivAt_Jlog 0)
193
194@[simp] lemma deriv_Jlog_zero : deriv Jlog 0 = 0 := by
195 classical
196 simpa using (hasDerivAt_Jlog_zero).deriv
197
198theorem hasDerivAt_Jcost (x : ℝ) (hx : x ≠ 0) :
199 HasDerivAt Jcost ((1 - x⁻¹ ^ 2) / 2) x := by
200 unfold Jcost
201 -- The derivative of f(x) = (x + 1/x)/2 - 1 is f'(x) = (1 - 1/x²)/2
202 have h1 : HasDerivAt (fun y => y + y⁻¹) (1 + (-(x^2)⁻¹ : ℝ)) x := by
203 apply HasDerivAt.add
204 · exact hasDerivAt_id x
205 · exact hasDerivAt_inv hx
206 have h2 : HasDerivAt (fun y => (y + y⁻¹) / 2) ((1 + (-(x^2)⁻¹)) / 2) x :=
207 h1.div_const 2
208 have h3 : HasDerivAt (fun y => (y + y⁻¹) / 2 - 1) ((1 + (-(x^2)⁻¹)) / 2 - 0) x :=
209 h2.sub (hasDerivAt_const x (1 : ℝ))
210 have h_eq : (1 + (-(x^2)⁻¹)) / 2 - 0 = (1 - x⁻¹ ^ 2) / 2 := by
211 have : (x^2)⁻¹ = x⁻¹ ^ 2 := by rw [inv_pow]
212 linarith [this]
213 rw [← h_eq]
214 exact h3
215
216theorem deriv_Jcost_one : deriv Jcost 1 = 0 := by
217 have h : HasDerivAt Jcost ((1 - 1⁻¹ ^ 2) / 2) 1 := hasDerivAt_Jcost 1 (by norm_num)
218 simp at h
219 exact h.deriv
220
221/-!
222## Strict Convexity of Jcost
223
224The theorem `Jcost_strictConvexOn_pos : StrictConvexOn ℝ (Set.Ioi 0) Jcost`
225is proven in `Cost/Convexity.lean` using the second derivative approach:
226J''(x) = x⁻³ > 0 for x > 0.
227
228Import `IndisputableMonolith.Cost.Convexity` to access this theorem.
229-/
230
231@[simp] lemma Jlog_zero : Jlog 0 = 0 := by
232 simp [Jlog, Jcost]
233
234lemma Jlog_nonneg (t : ℝ) : 0 ≤ Jlog t :=
235 Jcost_nonneg (Real.exp_pos t)
236
237/-- J(x) > 0 for x ≠ 1 and x > 0. -/
238lemma Jcost_pos_of_ne_one (x : ℝ) (hx : 0 < x) (hx1 : x ≠ 1) : 0 < Jcost x := by
239 have hx0 : x ≠ 0 := ne_of_gt hx
240 rw [Jcost_eq_sq hx0]
241 apply div_pos
242 · have hne : (x - 1) ≠ 0 := sub_ne_zero.mpr hx1
243 exact sq_pos_of_ne_zero hne
244 · have h2 : (0 : ℝ) < 2 := by norm_num
245 exact mul_pos h2 hx
246
247/-- J(x) = 0 iff x = 1, for positive x. -/
248lemma Jcost_eq_zero_iff (x : ℝ) (hx : 0 < x) : Jcost x = 0 ↔ x = 1 := by
249 constructor
250 · intro h
251 by_contra h1
252 exact absurd h (ne_of_gt (Jcost_pos_of_ne_one x hx h1))
253 · intro h
254 rw [h]
255 exact Jcost_unit0
256
257/-- **THEOREM**: Jcost is surjective onto [0, ∞). -/
258theorem Jcost_surjective_on_nonneg : ∀ y : ℝ, 0 ≤ y → ∃ x : ℝ, 1 ≤ x ∧ Jcost x = y := by
259 intro y hy
260 -- J(x) = (x + 1/x)/2 - 1
261 -- Solve (x + 1/x)/2 - 1 = y => x + 1/x = 2(y+1)
262 -- x^2 - 2(y+1)x + 1 = 0
263 -- x = [(2(y+1)) + sqrt(4(y+1)^2 - 4)] / 2 = (y+1) + sqrt((y+1)^2 - 1)
264 let x := (y + 1) + Real.sqrt ((y + 1) ^ 2 - 1)
265 use x
266 have h_y1_ge_1 : 1 ≤ y + 1 := by linarith
267 have h_sq_ge_0 : 0 ≤ (y + 1) ^ 2 - 1 := by nlinarith
268 constructor
269 · have : 0 ≤ Real.sqrt ((y + 1) ^ 2 - 1) := Real.sqrt_nonneg _
270 linarith
271 · unfold Jcost
272 have hx_pos : 0 < x := by
273 have : 0 ≤ Real.sqrt ((y + 1) ^ 2 - 1) := Real.sqrt_nonneg _
274 linarith
275 field_simp [hx_pos.ne']
276 -- Goal after field_simp: x^2 + 1 - x*2 = x*2*y
277 -- Since x = y+1 + sqrt((y+1)^2 - 1), we have x^2 - 2(y+1)x + 1 = 0
278 -- Thus x^2 + 1 = 2(y+1)x = 2x + 2xy, so x^2 + 1 - 2x = 2xy
279 let s := Real.sqrt ((y + 1) ^ 2 - 1)
280 have hs_sq : s ^ 2 = (y + 1) ^ 2 - 1 := Real.sq_sqrt h_sq_ge_0
281 have hx_eq : x = y + 1 + s := rfl
282 -- Key equation: x^2 - 2(y+1)x + 1 = 0
283 have h_quad : x ^ 2 - 2 * (y + 1) * x + 1 = 0 := by
284 have h1 : x ^ 2 = (y + 1 + s) ^ 2 := by rw [hx_eq]
285 have h2 : (y + 1 + s) ^ 2 = (y + 1) ^ 2 + 2 * (y + 1) * s + s ^ 2 := by ring
286 have h3 : s ^ 2 = (y + 1) ^ 2 - 1 := hs_sq
287 calc x ^ 2 - 2 * (y + 1) * x + 1
288 = (y + 1 + s) ^ 2 - 2 * (y + 1) * (y + 1 + s) + 1 := by simp only [hx_eq]
289 _ = ((y + 1) ^ 2 + 2 * (y + 1) * s + s ^ 2) - 2 * (y + 1) * (y + 1 + s) + 1 := by rw [h2]
290 _ = ((y + 1) ^ 2 + 2 * (y + 1) * s + ((y + 1) ^ 2 - 1)) - 2 * (y + 1) * (y + 1 + s) + 1 := by rw [h3]
291 _ = 0 := by ring
292 -- From h_quad: x^2 + 1 = 2(y+1)x = 2x(y+1) = 2x + 2xy
293 -- So: x^2 + 1 - 2x = 2xy
294 linarith [h_quad]
295
296lemma Jlog_eq_zero_iff (t : ℝ) : Jlog t = 0 ↔ t = 0 := by
297 constructor
298 · intro h
299 have hxpos : 0 < Real.exp t := Real.exp_pos t
300 have hxne : Real.exp t ≠ 0 := ne_of_gt hxpos
301 have hrepr := Jcost_eq_sq hxne
302 rw [Jlog, hrepr] at h
303 have hden_pos : 0 < 2 * Real.exp t := by
304 apply mul_pos
305 · norm_num
306 · exact hxpos
307 have hden_ne : 2 * Real.exp t ≠ 0 := ne_of_gt hden_pos
308 rw [div_eq_zero_iff] at h
309 cases h with
310 | inl h_num =>
311 have hexp1 : Real.exp t - 1 = 0 := by nlinarith [sq_nonneg (Real.exp t - 1)]
312 have hexp_eq : Real.exp t = 1 := by linarith
313 rw [Real.exp_eq_one_iff] at hexp_eq
314 exact hexp_eq
315 | inr h_den =>
316 exact absurd h_den hden_ne
317 · intro h
318 rw [h]
319 exact Jlog_zero
320
321
322theorem EL_stationary_at_zero : deriv Jlog 0 = 0 := by
323 simp
324
325theorem EL_global_min (t : ℝ) : Jlog 0 ≤ Jlog t := by
326 simpa [Jlog_zero] using Jlog_nonneg t
327
328/-- From J(x) = 0 and x > 0, conclude x = 1. -/
329lemma Jcost_zero_iff_one {x : ℝ} (hx : 0 < x) (h : Jcost x = 0) : x = 1 :=
330 (Jcost_eq_zero_iff x hx).mp h
331
332/-! ## Triangle Inequality for J -/
333
334/-- J in terms of cosh: J(exp(t)) = cosh(t) - 1 -/
335lemma Jcost_exp_cosh (t : ℝ) : Jcost (Real.exp t) = Real.cosh t - 1 :=
336 Jlog_as_cosh t
337
338/-- The sqrt of 2*J gives |log|, which IS a metric. -/
339noncomputable def Jmetric (x : ℝ) : ℝ := Real.sqrt (2 * Jcost x)
340
341/-- Jmetric(1) = 0 -/
342lemma Jmetric_one : Jmetric 1 = 0 := by
343 simp [Jmetric, Jcost_unit0]
344
345/-- Jmetric is symmetric: Jmetric(x) = Jmetric(1/x) -/
346lemma Jmetric_symm {x : ℝ} (hx : 0 < x) : Jmetric x = Jmetric x⁻¹ := by
347 simp only [Jmetric, Jcost_symm hx]
348
349/-- Jmetric is non-negative -/
350lemma Jmetric_nonneg {x : ℝ} (_ : 0 < x) : 0 ≤ Jmetric x :=
351 Real.sqrt_nonneg _
352
353/-- Key identity: 2(cosh(t) - 1) = 4sinh²(t/2)
354
355 Proof: cosh(2u) = cosh²(u) + sinh²(u), and cosh²(u) = 1 + sinh²(u).
356 So cosh(2u) = 1 + 2sinh²(u), hence cosh(2u) - 1 = 2sinh²(u). -/
357lemma cosh_minus_one_eq (t : ℝ) : 2 * (Real.cosh t - 1) = 4 * Real.sinh (t / 2) ^ 2 := by
358 -- Use the double-angle formula: cosh(2u) = cosh²(u) + sinh²(u)
359 have h := Real.cosh_two_mul (t / 2)
360 -- h : cosh(2*(t/2)) = cosh²(t/2) + sinh²(t/2)
361 simp only [two_mul, add_halves] at h
362 -- So h : cosh(t) = cosh²(t/2) + sinh²(t/2)
363 -- From cosh²(t/2) - sinh²(t/2) = 1, we get cosh²(t/2) = 1 + sinh²(t/2)
364 have h2 := Real.cosh_sq_sub_sinh_sq (t / 2)
365 have h3 : Real.cosh (t / 2) ^ 2 = 1 + Real.sinh (t / 2) ^ 2 := by linarith
366 -- Substitute: cosh(t) = (1 + sinh²(t/2)) + sinh²(t/2) = 1 + 2sinh²(t/2)
367 -- So cosh(t) - 1 = 2sinh²(t/2)
368 calc 2 * (Real.cosh t - 1)
369 = 2 * ((Real.cosh (t / 2) ^ 2 + Real.sinh (t / 2) ^ 2) - 1) := by rw [h]
370 _ = 2 * (((1 + Real.sinh (t / 2) ^ 2) + Real.sinh (t / 2) ^ 2) - 1) := by rw [h3]
371 _ = 2 * (2 * Real.sinh (t / 2) ^ 2) := by ring
372 _ = 4 * Real.sinh (t / 2) ^ 2 := by ring
373
374/-- **THEOREM: Quadratic Lower Bound for cosh**
375 cosh x ≥ 1 + x²/2 for all x.
376
377 Proof: From the definition cosh x = (e^x + e^(-x)) / 2 and the Taylor series,
378 we have cosh x = 1 + x²/2 + x⁴/24 + ... where all terms are non-negative.
379
380 Alternative proof via convexity: cosh is convex (cosh'' = cosh > 0), and
381 the tangent line at 0 gives cosh x ≥ cosh 0 + cosh'(0) * x = 1 + 0 = 1.
382 The quadratic bound follows from cosh'' = cosh ≥ 1. -/
383theorem cosh_quadratic_lower_bound (x : ℝ) : Real.cosh x ≥ 1 + x^2 / 2 := by
384 -- Use the Taylor series expansion
385 -- cosh x = ∑' n, x ^ (2 * n) / (2 * n)!
386 -- The first two partial sums are: n=0 → 1, n=1 → 1 + x²/2
387 -- Since all terms are non-negative, cosh x ≥ 1 + x²/2
388 have h := Real.hasSum_cosh x
389 -- Extract first two terms and show tail is non-negative
390 have h_term0 : (fun n => x ^ (2 * n) / ↑(2 * n).factorial) 0 = 1 := by simp
391 have h_term1 : (fun n => x ^ (2 * n) / ↑(2 * n).factorial) 1 = x^2 / 2 := by simp
392 -- Each term x^(2n)/(2n)! is non-negative because even powers are non-negative
393 have h_nn : ∀ n, 0 ≤ x ^ (2 * n) / ↑(2 * n).factorial := fun n => by
394 apply div_nonneg
395 · -- x^(2n) = (x^2)^n ≥ 0
396 rw [pow_mul]
397 exact pow_nonneg (sq_nonneg x) n
398 · exact Nat.cast_nonneg _
399 -- The sum is at least the sum of the first two terms
400 have h_ge : Real.cosh x ≥ 1 + x^2 / 2 := by
401 rw [← h.tsum_eq]
402 calc ∑' n, x ^ (2 * n) / ↑(2 * n).factorial
403 ≥ (x ^ (2 * 0) / ↑(2 * 0).factorial) + (x ^ (2 * 1) / ↑(2 * 1).factorial) := by
404 have hs := h.summable
405 have h01 : ({0, 1} : Finset ℕ).sum (fun n => x ^ (2 * n) / ↑(2 * n).factorial) ≤
406 ∑' n, x ^ (2 * n) / ↑(2 * n).factorial :=
407 hs.sum_le_tsum _ (fun i _ => h_nn i)
408 simp only [Finset.sum_pair (by decide : (0 : ℕ) ≠ 1)] at h01
409 exact h01
410 _ = 1 + x^2 / 2 := by simp
411 exact h_ge
412
413/-- Jmetric in terms of sinh: Jmetric(exp(t)) = 2|sinh(t/2)| -/
414lemma Jmetric_exp_sinh (t : ℝ) : Jmetric (Real.exp t) = 2 * |Real.sinh (t / 2)| := by
415 unfold Jmetric
416 rw [Jcost_exp_cosh, cosh_minus_one_eq]
417 -- sqrt(4 * sinh²(t/2)) = sqrt(4) * |sinh(t/2)| = 2 * |sinh(t/2)|
418 rw [show (4 : ℝ) * Real.sinh (t / 2) ^ 2 = (2 * Real.sinh (t / 2)) ^ 2 by ring]
419 rw [Real.sqrt_sq_eq_abs]
420 rw [abs_mul, abs_of_pos (by norm_num : (0 : ℝ) < 2)]
421
422/-! ### Note on Triangle Inequality
423
424The function `Jmetric(x) = sqrt(2 * Jcost(x))` does NOT satisfy the triangle inequality
425under multiplication. Numerical counterexample:
426- Jmetric(6) ≈ 2.04 > Jmetric(2) + Jmetric(3) ≈ 1.86
427
428This is expected: J-cost measures the "recognition strain" of a ratio, and strain
429compounds superlinearly when multiplying far-from-unity ratios.
430
431The key inequality that DOES hold is the d'Alembert identity, which gives
432`Jcost_submult`: J(xy) ≤ 2J(x) + 2J(y) + 2J(x)J(y). -/
433
434/-- **Standard Analysis**: Evaluation of Jmetric at specific points. -/
435theorem Jmetric_val_6 : Jmetric 6 = Real.sqrt (25 / 6) := by
436 unfold Jmetric Jcost
437 norm_num
438
439theorem Jmetric_val_2 : Jmetric 2 = Real.sqrt (1 / 2) := by
440 unfold Jmetric Jcost
441 norm_num
442
443theorem Jmetric_val_3 : Jmetric 3 = Real.sqrt (4 / 3) := by
444 unfold Jmetric Jcost
445 norm_num
446
447theorem sqrt_triangle_violation : Real.sqrt (25 / 6) > Real.sqrt (1 / 2) + Real.sqrt (4 / 3) := by
448 have h1 : 0 ≤ Real.sqrt (1 / 2) + Real.sqrt (4 / 3) := by positivity
449 change Real.sqrt (1 / 2) + Real.sqrt (4 / 3) < Real.sqrt (25 / 6)
450 rw [← Real.sqrt_sq h1]
451 rw [Real.sqrt_lt_sqrt_iff (by positivity)]
452 rw [add_sq, Real.sq_sqrt (by norm_num), Real.sq_sqrt (by norm_num)]
453 rw [mul_assoc, ← Real.sqrt_mul (by norm_num)]
454 norm_num
455 -- Goal: 1 / 2 + 2 * (√2 / √3) + 4 / 3 < 25 / 6
456 suffices 2 * (Real.sqrt 2 / Real.sqrt 3) < 7 / 3 by linarith
457 have h_lhs : 2 * (Real.sqrt 2 / Real.sqrt 3) = Real.sqrt (8 / 3) := by
458 rw [← Real.sqrt_div (by norm_num) 3]
459 rw [show (2 : ℝ) = Real.sqrt 4 by norm_num]
460 rw [← Real.sqrt_mul (by norm_num)]
461 norm_num
462 have h_rhs : (7 / 3 : ℝ) = Real.sqrt (49 / 9) := by
463 rw [Real.sqrt_div (by norm_num) 9]
464 norm_num
465 rw [h_lhs, h_rhs]
466 rw [Real.sqrt_lt_sqrt_iff (by positivity)]
467 norm_num
468
469/-- **DEPRECATED**: The naive triangle inequality does NOT hold for Jmetric.
470 Use `Jcost_submult` instead, which gives a valid submultiplicativity bound. -/
471theorem Jmetric_triangle_FALSE {x y : ℝ} (_hx : 0 < x) (_hy : 0 < y) :
472 ¬ (∀ a b : ℝ, 0 < a → 0 < b → Jmetric (a * b) ≤ Jmetric a + Jmetric b) := by
473 intro h
474 -- Counterexample: a = 2, b = 3
475 let a : ℝ := 2
476 let b : ℝ := 3
477 have ha : 0 < a := by norm_num
478 have hb : 0 < b := by norm_num
479 have h_tri := h a b ha hb
480 rw [show a * b = 6 by norm_num, Jmetric_val_6, Jmetric_val_2, Jmetric_val_3] at h_tri
481 have h_viol := sqrt_triangle_violation
482 linarith
483
484/-- **DEPRECATED**: The "weak triangle" J(xy) ≤ 2(J(x)+J(y)) + 2√(J(x)J(y)) is FALSE.
485
486 Counterexample: x = y = 10
487 - J(100) ≈ 49.005
488 - 2(J(10) + J(10)) + 2√(J(10)·J(10)) = 2(4.05 + 4.05) + 2·4.05 ≈ 24.3
489 - 49.005 > 24.3
490
491 Use `Jcost_submult` instead: J(xy) ≤ 2J(x) + 2J(y) + 2J(x)J(y), which IS proved. -/
492theorem Jcost_weak_triangle_FALSE :
493 ¬ (∀ x y : ℝ, 0 < x → 0 < y →
494 Jcost (x * y) ≤ 2 * (Jcost x + Jcost y) + 2 * Real.sqrt (Jcost x * Jcost y)) := by
495 intro h
496 -- Counterexample: x = y = 10, J(100) > 2(J(10)+J(10)) + 2*sqrt(J(10)*J(10))
497 -- J(100) = (100 + 1/100)/2 - 1 = 49.005
498 -- J(10) = (10 + 0.1)/2 - 1 = 4.05
499 -- RHS = 2*(4.05 + 4.05) + 2*sqrt(4.05*4.05) = 16.2 + 8.1 = 24.3
500 -- 49.005 > 24.3 is TRUE, not ≤, so the inequality fails
501 have hc := h 10 10 (by norm_num) (by norm_num)
502 -- The claim asserts ≤ but the counterexample shows >
503 -- J(100) = 49.005, RHS = 24.3, so 49.005 > 24.3
504 simp only [Jcost] at hc
505 nlinarith [sq_nonneg (10 : ℝ), Real.sqrt_nonneg (Jcost 10 * Jcost 10)]
506
507/-- The d'Alembert identity: J(xy) + J(x/y) = 2J(x) + 2J(y) + 2J(x)J(y) -/
508theorem dalembert_identity {x y : ℝ} (hx : 0 < x) (hy : 0 < y) :
509 Jcost (x * y) + Jcost (x / y) = 2 * Jcost x + 2 * Jcost y + 2 * Jcost x * Jcost y := by
510 have hx0 : x ≠ 0 := ne_of_gt hx
511 have hy0 : y ≠ 0 := ne_of_gt hy
512 have hxy : x * y ≠ 0 := mul_ne_zero hx0 hy0
513 have hxdy : x / y ≠ 0 := div_ne_zero hx0 hy0
514 simp only [Jcost_eq_sq hxy, Jcost_eq_sq hxdy, Jcost_eq_sq hx0, Jcost_eq_sq hy0]
515 field_simp
516 ring
517
518/-- From d'Alembert: J(xy) ≤ 2J(x) + 2J(y) + 2J(x)J(y) since J(x/y) ≥ 0 -/
519lemma Jcost_submult {x y : ℝ} (hx : 0 < x) (hy : 0 < y) :
520 Jcost (x * y) ≤ 2 * Jcost x + 2 * Jcost y + 2 * Jcost x * Jcost y := by
521 have h := dalembert_identity hx hy
522 have hnonneg : 0 ≤ Jcost (x / y) := Jcost_nonneg (div_pos hx hy)
523 linarith
524
525/-- Bound on J product: J(xy) ≤ 2*(1 + J(x))*(1 + J(y)) - 2
526 This follows from d'Alembert since (1+J(x))(1+J(y)) = 1 + J(x) + J(y) + J(x)J(y) -/
527lemma Jcost_prod_bound {x y : ℝ} (hx : 0 < x) (hy : 0 < y) :
528 Jcost (x * y) ≤ 2 * (1 + Jcost x) * (1 + Jcost y) - 2 := by
529 have h := Jcost_submult hx hy
530 -- 2*(1+J(x))*(1+J(y)) - 2 = 2*(1 + J(x) + J(y) + J(x)*J(y)) - 2
531 -- = 2 + 2*J(x) + 2*J(y) + 2*J(x)*J(y) - 2
532 -- = 2*J(x) + 2*J(y) + 2*J(x)*J(y)
533 calc Jcost (x * y) ≤ 2 * Jcost x + 2 * Jcost y + 2 * Jcost x * Jcost y := h
534 _ = 2 * (1 + Jcost x) * (1 + Jcost y) - 2 := by ring
535
536/-! ## Small-strain Taylor expansion -/
537
538lemma Jcost_one_plus_eps_quadratic (ε : ℝ) (hε : |ε| ≤ (1 : ℝ) / 2) :
539 ∃ (c : ℝ), Jcost (1 + ε) = ε ^ 2 / 2 + c * ε ^ 3 ∧ |c| ≤ 2 := by
540 classical
541 have hbounds := abs_le.mp hε
542 have hpos : 0 < 1 + ε := by
543 have : -(1 : ℝ) / 2 ≤ ε := by simpa [neg_div] using hbounds.1
544 linarith
545 have hne : 1 + ε ≠ 0 := ne_of_gt hpos
546 have hcalc : Jcost (1 + ε) = ε ^ 2 / (2 * (1 + ε)) := by
547 simpa [pow_two, add_comm, add_left_comm, add_assoc, sub_eq_add_neg]
548 using (Jcost_eq_sq hne)
549 let c : ℝ := -1 / (2 * (1 + ε))
550 have h_eq :
551 Jcost (1 + ε) = ε ^ 2 / 2 + c * ε ^ 3 := by
552 have : ε ^ 2 / (2 * (1 + ε)) = ε ^ 2 / 2 + (-1 / (2 * (1 + ε))) * ε ^ 3 := by
553 field_simp [hne]
554 ring
555 simpa [hcalc, c] using this
556 have hden_pos : 0 < 2 * (1 + ε) := by nlinarith [hpos]
557 have habs : |c| = 1 / (2 * (1 + ε)) := by
558 simp [c, div_eq_mul_inv, abs_mul, abs_inv, abs_of_pos hpos]
559 -- Use 1/(2(1+ε)) ≤ 1 from (1+ε) ≥ 1/2
560 have hone_le : (1 : ℝ) ≤ 2 * (1 + ε) := by
561 have : (1 / 2 : ℝ) ≤ 1 + ε := by linarith
562 simpa [two_mul] using mul_le_mul_of_nonneg_left this (by norm_num : (0 : ℝ) ≤ 2)
563 have hdiv_le_one : 1 / (2 * (1 + ε)) ≤ 1 := by
564 have hpos1 : 0 < (1 : ℝ) := by norm_num
565 simpa [one_div] using one_div_le_one_div_of_le hpos1 hone_le
566 have hbound : |c| ≤ 2 := by
567 have h1 : |c| ≤ 1 := by simpa [habs] using hdiv_le_one
568 have h12 : (1 : ℝ) ≤ 2 := by norm_num
569 exact le_trans h1 h12
570 exact ⟨c, h_eq, hbound⟩
571
572lemma Jcost_small_strain_bound (ε : ℝ) (hε : |ε| ≤ (1 : ℝ) / 10) :
573 |Jcost (1 + ε) - ε ^ 2 / 2| ≤ ε ^ 2 / 10 := by
574 classical
575 have hbounds := abs_le.mp hε
576 have hpos : 0 < 1 + ε := by
577 have : -(1 : ℝ) / 10 ≤ ε := by simpa [neg_div] using hbounds.1
578 linarith
579 have hne : 1 + ε ≠ 0 := ne_of_gt hpos
580 have hform : Jcost (1 + ε) = ε ^ 2 / (2 * (1 + ε)) := by
581 simpa [pow_two, add_comm, add_left_comm, add_assoc, sub_eq_add_neg]
582 using (Jcost_eq_sq hne)
583 have hden_pos : 0 < 2 * (1 + ε) := by nlinarith [hpos]
584 -- Exact difference and absolute value
585 have h1 : Jcost (1 + ε) - ε ^ 2 / 2
586 = ε ^ 2 / (2 * (1 + ε)) - ε ^ 2 / 2 := by
587 simp [hform]
588 have hx : (2 : ℝ) * (1 + ε) ≠ 0 := mul_ne_zero two_ne_zero hne
589 have h2 : ε ^ 2 / (2 * (1 + ε)) - ε ^ 2 / 2 = -ε ^ 3 / (2 * (1 + ε)) := by
590 field_simp [hx]
591 ring
592 have hdiff : Jcost (1 + ε) - ε ^ 2 / 2 = -ε ^ 3 / (2 * (1 + ε)) := h1.trans h2
593 have habs : |Jcost (1 + ε) - ε ^ 2 / 2| = |ε| ^ 3 / (2 * (1 + ε)) := by
594 have hposden : 0 < 2 * (1 + ε) := hden_pos
595 simpa [abs_div, abs_neg, abs_pow, abs_of_pos hposden] using
596 congrArg (fun z => |z|) hdiff
597 -- Now bound using |ε|/(2(1+ε)) ≤ 1/18 from below
598 have hx_lower : (9 : ℝ) / 10 ≤ 1 + ε := by linarith [show -(1 : ℝ) / 10 ≤ ε from by simpa [neg_div] using hbounds.1]
599 have hx_pos : 0 < (9 : ℝ) / 10 := by norm_num
600 have hx_inv : 1 / (1 + ε) ≤ (10 : ℝ) / 9 := by
601 have := one_div_le_one_div_of_le hx_pos hx_lower
602 simpa using this
603 have hrec_bound : 1 / (2 * (1 + ε)) ≤ (5 : ℝ) / 9 := by
604 have hmul : (1 / 2 : ℝ) * (1 / (1 + ε)) ≤ (1 / 2) * ((10 : ℝ) / 9) :=
605 mul_le_mul_of_nonneg_left hx_inv (by norm_num)
606 have hleft : 1 / (2 * (1 + ε)) = (1 / 2) * (1 / (1 + ε)) := by
607 simp [div_eq_mul_inv, mul_comm]
608 have hright : (5 : ℝ) / 9 = (1 / 2) * ((10 : ℝ) / 9) := by norm_num
609 simpa [hleft, hright] using hmul
610 have hrec_nonneg : 0 ≤ 1 / (2 * (1 + ε)) := by
611 have : 0 ≤ 2 * (1 + ε) := le_of_lt (by nlinarith [hpos])
612 exact one_div_nonneg.mpr this
613 have hA : |ε| / (2 * (1 + ε)) ≤ (1 : ℝ) / 10 * (1 / (2 * (1 + ε))) := by
614 simpa [div_eq_mul_inv, mul_comm, mul_left_comm, mul_assoc]
615 using mul_le_mul_of_nonneg_right hε hrec_nonneg
616 have hB : (1 : ℝ) / 10 * (1 / (2 * (1 + ε))) ≤ (1 : ℝ) / 18 := by
617 have hmul := mul_le_mul_of_nonneg_left hrec_bound (by norm_num : (0 : ℝ) ≤ (1 : ℝ) / 10)
618 have hright : (1 : ℝ) / 18 = (1 : ℝ) / 10 * ((5 : ℝ) / 9) := by norm_num
619 simpa [hright] using hmul
620 have hfrac : |ε| / (2 * (1 + ε)) ≤ (1 : ℝ) / 18 := hA.trans hB
621 -- Conclude
622 have hineq : |Jcost (1 + ε) - ε ^ 2 / 2| ≤ |ε| ^ 2 / 18 := by
623 have hnn : 0 ≤ |ε| ^ 2 := by
624 have := sq_nonneg (|ε|); simpa [pow_two] using this
625 have hmul := mul_le_mul_of_nonneg_left hfrac hnn
626 calc
627 |Jcost (1 + ε) - ε ^ 2 / 2| = |ε| ^ 3 / (2 * (1 + ε)) := by simp [habs]
628 _ ≤ |ε| ^ 2 * (1 / 18) := by
629 simpa [pow_succ, pow_two, mul_comm, mul_left_comm, mul_assoc, div_eq_mul_inv] using hmul
630 _ = |ε| ^ 2 / 18 := by simp [div_eq_mul_inv]
631 have hratio : (1 : ℝ) / 18 ≤ 1 / 10 := by norm_num
632 have hsq : |ε| ^ 2 = ε ^ 2 := by
633 have h1 : |ε| * |ε| = |ε * ε| := by simp [abs_mul]
634 calc
635 |ε| ^ 2 = |ε| * |ε| := by simp [pow_two]
636 _ = |ε * ε| := h1
637 _ = |ε ^ 2| := by simp [pow_two]
638 _ = ε ^ 2 := by simp [abs_of_nonneg (sq_nonneg ε)]
639 have hcompare : |ε| ^ 2 / 18 ≤ ε ^ 2 / 10 := by
640 have := mul_le_mul_of_nonneg_left hratio (by exact sq_nonneg ε)
641 simpa [hsq, pow_two] using this
642 exact (hineq.trans hcompare)
643
644/-- Alias: Jcost_reciprocal = Jcost_symm (for parallel-work compat). -/
645lemma Jcost_reciprocal {x : ℝ} (hx : 0 < x) : Jcost x = Jcost x⁻¹ := Jcost_symm hx
646
647end Cost
648end IndisputableMonolith
649