`IndisputableMonolith.Cost`

IndisputableMonolith/Cost.lean · 649 lines · 61 declarations
Explainer status: ready · generated 2026-05-12 10:14:17.875304+00:00
   1import Mathlib
   2
   3namespace IndisputableMonolith
   4namespace Cost
   5
   6noncomputable def Jcost (x : ℝ) : ℝ := (x + x⁻¹) / 2 - 1
   7
   8structure CostRequirements (F : ℝ → ℝ) : Prop where
   9  symmetric : ∀ {x}, 0 < x → F x = F x⁻¹
  10  unit0 : F 1 = 0
  11
  12lemma Jcost_unit0 : Jcost 1 = 0 := by
  13  simp [Jcost]
  14
  15/-- J(x) expressed as a squared ratio: J(x) = (x-1)²/(2x). -/
  16lemma Jcost_eq_sq {x : ℝ} (hx : x ≠ 0) :
  17    Jcost x = (x - 1) ^ 2 / (2 * x) := by
  18  unfold Jcost
  19  field_simp [hx]
  20  ring
  21
  22lemma Jcost_symm {x : ℝ} (hx : 0 < x) : Jcost x = Jcost x⁻¹ := by
  23  have hx0 : x ≠ 0 := ne_of_gt hx
  24  rw [Jcost_eq_sq hx0, Jcost_eq_sq (inv_ne_zero hx0)]
  25  field_simp [hx0]
  26  ring
  27
  28/-- J(x) ≥ 0 for positive x (AM-GM inequality) -/
  29lemma Jcost_nonneg {x : ℝ} (hx : 0 < x) : 0 ≤ Jcost x := by
  30  have hx0 : x ≠ 0 := hx.ne'
  31  rw [Jcost_eq_sq hx0]
  32  positivity
  33
  34def AgreesOnExp (F : ℝ → ℝ) : Prop := ∀ t : ℝ, F (Real.exp t) = Jcost (Real.exp t)
  35
  36@[simp] lemma Jcost_exp (t : ℝ) :
  37  Jcost (Real.exp t) = ((Real.exp t) + (Real.exp (-t))) / 2 - 1 := by
  38  have h : (Real.exp t)⁻¹ = Real.exp (-t) := by
  39    symm; simp [Real.exp_neg t]
  40  simp [Jcost, h]
  41
  42class SymmUnit (F : ℝ → ℝ) : Prop where
  43  symmetric : ∀ {x}, 0 < x → F x = F x⁻¹
  44  unit0 : F 1 = 0
  45
  46class AveragingAgree (F : ℝ → ℝ) : Prop where
  47  agrees : AgreesOnExp F
  48
  49class AveragingDerivation (F : ℝ → ℝ) : Prop extends SymmUnit F where
  50  agrees : AgreesOnExp F
  51
  52lemma even_on_log_of_symm {F : ℝ → ℝ} [SymmUnit F] (t : ℝ) :
  53    F (Real.exp t) = F (Real.exp (-t)) := by
  54  have hx : 0 < Real.exp t := Real.exp_pos t
  55  simpa [Real.exp_neg] using (SymmUnit.symmetric (F:=F) hx)
  56
  57class AveragingBounds (F : ℝ → ℝ) : Prop extends SymmUnit F where
  58  upper : ∀ t : ℝ, F (Real.exp t) ≤ Jcost (Real.exp t)
  59  lower : ∀ t : ℝ, Jcost (Real.exp t) ≤ F (Real.exp t)
  60
  61theorem agrees_on_exp_of_bounds {F : ℝ → ℝ} [AveragingBounds F] :
  62    AgreesOnExp F := by
  63  intro t
  64  have h₁ := AveragingBounds.upper (F:=F) t
  65  have h₂ := AveragingBounds.lower (F:=F) t
  66  have : F (Real.exp t) = Jcost (Real.exp t) := le_antisymm h₁ h₂
  67  simpa using this
  68
  69theorem F_eq_J_on_pos_alt (F : ℝ → ℝ)
  70  (hAgree : AgreesOnExp F) : ∀ {x : ℝ}, 0 < x → F x = Jcost x := by
  71  intro x hx
  72  have : ∃ t, Real.exp t = x := ⟨Real.log x, by simpa using Real.exp_log hx⟩
  73  rcases this with ⟨t, rfl⟩
  74  simpa using hAgree t
  75
  76instance (priority := 90) averagingDerivation_of_bounds {F : ℝ → ℝ} [AveragingBounds F] :
  77    AveragingDerivation F :=
  78  { toSymmUnit := (inferInstance : SymmUnit F)
  79  , agrees := agrees_on_exp_of_bounds (F:=F) }
  80
  81def mkAveragingBounds (F : ℝ → ℝ)
  82  (symm : SymmUnit F)
  83  (upper : ∀ t : ℝ, F (Real.exp t) ≤ Jcost (Real.exp t))
  84  (lower : ∀ t : ℝ, Jcost (Real.exp t) ≤ F (Real.exp t)) :
  85  AveragingBounds F :=
  86{ toSymmUnit := symm, upper := upper, lower := lower }
  87
  88class JensenSketch (F : ℝ → ℝ) : Prop extends SymmUnit F where
  89  axis_upper : ∀ t : ℝ, F (Real.exp t) ≤ Jcost (Real.exp t)
  90  axis_lower : ∀ t : ℝ, Jcost (Real.exp t) ≤ F (Real.exp t)
  91
  92instance (priority := 95) averagingBounds_of_jensen {F : ℝ → ℝ} [JensenSketch F] :
  93    AveragingBounds F :=
  94  mkAveragingBounds F (symm := (inferInstance : SymmUnit F))
  95    (upper := JensenSketch.axis_upper (F:=F))
  96    (lower := JensenSketch.axis_lower (F:=F))
  97
  98noncomputable def JensenSketch.of_log_bounds (F : ℝ → ℝ)
  99  (symm : SymmUnit F)
 100  (upper_log : ∀ t : ℝ, F (Real.exp t) ≤ ((Real.exp t + Real.exp (-t)) / 2 - 1))
 101  (lower_log : ∀ t : ℝ, ((Real.exp t + Real.exp (-t)) / 2 - 1) ≤ F (Real.exp t)) :
 102  JensenSketch F :=
 103{ toSymmUnit := symm
 104, axis_upper := by intro t; simpa [Jcost_exp] using upper_log t
 105, axis_lower := by intro t; simpa [Jcost_exp] using lower_log t }
 106
 107noncomputable def F_ofLog (G : ℝ → ℝ) : ℝ → ℝ := fun x => G (Real.log x)
 108
 109class LogModel (G : ℝ → ℝ) where
 110  even_log : ∀ t : ℝ, G (-t) = G t
 111  base0 : G 0 = 0
 112  upper_cosh : ∀ t : ℝ, G t ≤ ((Real.exp t + Real.exp (-t)) / 2 - 1)
 113  lower_cosh : ∀ t : ℝ, ((Real.exp t + Real.exp (-t)) / 2 - 1) ≤ G t
 114
 115instance (G : ℝ → ℝ) [LogModel G] : SymmUnit (F_ofLog G) :=
 116  { symmetric := by
 117      intro x hx
 118      have hlog : Real.log (x⁻¹) = - Real.log x := by
 119        simp
 120      dsimp [F_ofLog]
 121      have he : G (Real.log x) = G (- Real.log x) := by
 122        simpa using (LogModel.even_log (G:=G) (Real.log x)).symm
 123      simpa [hlog]
 124        using he
 125    , unit0 := by
 126      dsimp [F_ofLog]
 127      simpa [Real.log_one] using (LogModel.base0 (G:=G)) }
 128
 129instance (priority := 90) (G : ℝ → ℝ) [LogModel G] : JensenSketch (F_ofLog G) :=
 130  JensenSketch.of_log_bounds (F:=F_ofLog G)
 131    (symm := (inferInstance : SymmUnit (F_ofLog G)))
 132    (upper_log := by
 133      intro t
 134      dsimp [F_ofLog]
 135      simpa using (LogModel.upper_cosh (G:=G) t))
 136    (lower_log := by
 137      intro t
 138      dsimp [F_ofLog]
 139      simpa using (LogModel.lower_cosh (G:=G) t))
 140
 141theorem agree_on_exp_extends {F : ℝ → ℝ}
 142  (hAgree : AgreesOnExp F) : ∀ {x : ℝ}, 0 < x → F x = Jcost x := by
 143  intro x hx
 144  have : F (Real.exp (Real.log x)) = Jcost (Real.exp (Real.log x)) := hAgree (Real.log x)
 145  simp [Real.exp_log hx] at this
 146  exact this
 147
 148theorem F_eq_J_on_pos {F : ℝ → ℝ}
 149  (hAgree : AgreesOnExp F) :
 150  ∀ {x : ℝ}, 0 < x → F x = Jcost x :=
 151  agree_on_exp_extends (F:=F) hAgree
 152
 153theorem F_eq_J_on_pos_of_averaging {F : ℝ → ℝ} [AveragingAgree F] :
 154  ∀ {x : ℝ}, 0 < x → F x = Jcost x :=
 155  F_eq_J_on_pos (hAgree := AveragingAgree.agrees (F:=F))
 156
 157theorem agrees_on_exp_of_symm_unit (F : ℝ → ℝ) [AveragingDerivation F] :
 158  AgreesOnExp F := AveragingDerivation.agrees (F:=F)
 159
 160theorem F_eq_J_on_pos_of_derivation (F : ℝ → ℝ) [AveragingDerivation F] :
 161  ∀ {x : ℝ}, 0 < x → F x = Jcost x :=
 162  F_eq_J_on_pos (hAgree := agrees_on_exp_of_symm_unit F)
 163
 164theorem T5_cost_uniqueness_on_pos {F : ℝ → ℝ} [JensenSketch F] :
 165  ∀ {x : ℝ}, 0 < x → F x = Jcost x :=
 166by
 167  intro x hx
 168  have hAgree : AgreesOnExp F := by
 169    intro t
 170    exact le_antisymm (JensenSketch.axis_upper (F:=F) t) (JensenSketch.axis_lower (F:=F) t)
 171  exact (agree_on_exp_extends (F:=F) hAgree) hx
 172
 173noncomputable def Jlog (t : ℝ) : ℝ := Jcost (Real.exp t)
 174
 175lemma Jlog_as_cosh (t : ℝ) : Jlog t = Real.cosh t - 1 := by
 176  unfold Jlog Jcost
 177  rw [Real.cosh_eq, inv_eq_one_div, Real.exp_neg]
 178  ring
 179
 180lemma hasDerivAt_Jlog (t : ℝ) : HasDerivAt Jlog (Real.sinh t) t := by
 181  have h1 : Jlog = fun t => Real.cosh t - 1 := by
 182    ext t
 183    exact Jlog_as_cosh t
 184  rw [h1]
 185  have h_cosh : HasDerivAt Real.cosh (Real.sinh t) t := Real.hasDerivAt_cosh t
 186  have h_const : HasDerivAt (fun _ => (1 : ℝ)) 0 t := hasDerivAt_const t 1
 187  have h_sub := h_cosh.sub h_const
 188  simp at h_sub
 189  exact h_sub
 190
 191@[simp] lemma hasDerivAt_Jlog_zero : HasDerivAt Jlog 0 0 := by
 192  simpa using (hasDerivAt_Jlog 0)
 193
 194@[simp] lemma deriv_Jlog_zero : deriv Jlog 0 = 0 := by
 195  classical
 196  simpa using (hasDerivAt_Jlog_zero).deriv
 197
 198theorem hasDerivAt_Jcost (x : ℝ) (hx : x ≠ 0) :
 199    HasDerivAt Jcost ((1 - x⁻¹ ^ 2) / 2) x := by
 200  unfold Jcost
 201  -- The derivative of f(x) = (x + 1/x)/2 - 1 is f'(x) = (1 - 1/x²)/2
 202  have h1 : HasDerivAt (fun y => y + y⁻¹) (1 + (-(x^2)⁻¹ : ℝ)) x := by
 203    apply HasDerivAt.add
 204    · exact hasDerivAt_id x
 205    · exact hasDerivAt_inv hx
 206  have h2 : HasDerivAt (fun y => (y + y⁻¹) / 2) ((1 + (-(x^2)⁻¹)) / 2) x :=
 207    h1.div_const 2
 208  have h3 : HasDerivAt (fun y => (y + y⁻¹) / 2 - 1) ((1 + (-(x^2)⁻¹)) / 2 - 0) x :=
 209    h2.sub (hasDerivAt_const x (1 : ℝ))
 210  have h_eq : (1 + (-(x^2)⁻¹)) / 2 - 0 = (1 - x⁻¹ ^ 2) / 2 := by
 211    have : (x^2)⁻¹ = x⁻¹ ^ 2 := by rw [inv_pow]
 212    linarith [this]
 213  rw [← h_eq]
 214  exact h3
 215
 216theorem deriv_Jcost_one : deriv Jcost 1 = 0 := by
 217  have h : HasDerivAt Jcost ((1 - 1⁻¹ ^ 2) / 2) 1 := hasDerivAt_Jcost 1 (by norm_num)
 218  simp at h
 219  exact h.deriv
 220
 221/-!
 222## Strict Convexity of Jcost
 223
 224The theorem `Jcost_strictConvexOn_pos : StrictConvexOn ℝ (Set.Ioi 0) Jcost`
 225is proven in `Cost/Convexity.lean` using the second derivative approach:
 226J''(x) = x⁻³ > 0 for x > 0.
 227
 228Import `IndisputableMonolith.Cost.Convexity` to access this theorem.
 229-/
 230
 231@[simp] lemma Jlog_zero : Jlog 0 = 0 := by
 232  simp [Jlog, Jcost]
 233
 234lemma Jlog_nonneg (t : ℝ) : 0 ≤ Jlog t :=
 235  Jcost_nonneg (Real.exp_pos t)
 236
 237/-- J(x) > 0 for x ≠ 1 and x > 0. -/
 238lemma Jcost_pos_of_ne_one (x : ℝ) (hx : 0 < x) (hx1 : x ≠ 1) : 0 < Jcost x := by
 239  have hx0 : x ≠ 0 := ne_of_gt hx
 240  rw [Jcost_eq_sq hx0]
 241  apply div_pos
 242  · have hne : (x - 1) ≠ 0 := sub_ne_zero.mpr hx1
 243    exact sq_pos_of_ne_zero hne
 244  · have h2 : (0 : ℝ) < 2 := by norm_num
 245    exact mul_pos h2 hx
 246
 247/-- J(x) = 0 iff x = 1, for positive x. -/
 248lemma Jcost_eq_zero_iff (x : ℝ) (hx : 0 < x) : Jcost x = 0 ↔ x = 1 := by
 249  constructor
 250  · intro h
 251    by_contra h1
 252    exact absurd h (ne_of_gt (Jcost_pos_of_ne_one x hx h1))
 253  · intro h
 254    rw [h]
 255    exact Jcost_unit0
 256
 257/-- **THEOREM**: Jcost is surjective onto [0, ∞). -/
 258theorem Jcost_surjective_on_nonneg : ∀ y : ℝ, 0 ≤ y → ∃ x : ℝ, 1 ≤ x ∧ Jcost x = y := by
 259  intro y hy
 260  -- J(x) = (x + 1/x)/2 - 1
 261  -- Solve (x + 1/x)/2 - 1 = y => x + 1/x = 2(y+1)
 262  -- x^2 - 2(y+1)x + 1 = 0
 263  -- x = [(2(y+1)) + sqrt(4(y+1)^2 - 4)] / 2 = (y+1) + sqrt((y+1)^2 - 1)
 264  let x := (y + 1) + Real.sqrt ((y + 1) ^ 2 - 1)
 265  use x
 266  have h_y1_ge_1 : 1 ≤ y + 1 := by linarith
 267  have h_sq_ge_0 : 0 ≤ (y + 1) ^ 2 - 1 := by nlinarith
 268  constructor
 269  · have : 0 ≤ Real.sqrt ((y + 1) ^ 2 - 1) := Real.sqrt_nonneg _
 270    linarith
 271  · unfold Jcost
 272    have hx_pos : 0 < x := by
 273      have : 0 ≤ Real.sqrt ((y + 1) ^ 2 - 1) := Real.sqrt_nonneg _
 274      linarith
 275    field_simp [hx_pos.ne']
 276    -- Goal after field_simp: x^2 + 1 - x*2 = x*2*y
 277    -- Since x = y+1 + sqrt((y+1)^2 - 1), we have x^2 - 2(y+1)x + 1 = 0
 278    -- Thus x^2 + 1 = 2(y+1)x = 2x + 2xy, so x^2 + 1 - 2x = 2xy
 279    let s := Real.sqrt ((y + 1) ^ 2 - 1)
 280    have hs_sq : s ^ 2 = (y + 1) ^ 2 - 1 := Real.sq_sqrt h_sq_ge_0
 281    have hx_eq : x = y + 1 + s := rfl
 282    -- Key equation: x^2 - 2(y+1)x + 1 = 0
 283    have h_quad : x ^ 2 - 2 * (y + 1) * x + 1 = 0 := by
 284      have h1 : x ^ 2 = (y + 1 + s) ^ 2 := by rw [hx_eq]
 285      have h2 : (y + 1 + s) ^ 2 = (y + 1) ^ 2 + 2 * (y + 1) * s + s ^ 2 := by ring
 286      have h3 : s ^ 2 = (y + 1) ^ 2 - 1 := hs_sq
 287      calc x ^ 2 - 2 * (y + 1) * x + 1
 288          = (y + 1 + s) ^ 2 - 2 * (y + 1) * (y + 1 + s) + 1 := by simp only [hx_eq]
 289        _ = ((y + 1) ^ 2 + 2 * (y + 1) * s + s ^ 2) - 2 * (y + 1) * (y + 1 + s) + 1 := by rw [h2]
 290        _ = ((y + 1) ^ 2 + 2 * (y + 1) * s + ((y + 1) ^ 2 - 1)) - 2 * (y + 1) * (y + 1 + s) + 1 := by rw [h3]
 291        _ = 0 := by ring
 292    -- From h_quad: x^2 + 1 = 2(y+1)x = 2x(y+1) = 2x + 2xy
 293    -- So: x^2 + 1 - 2x = 2xy
 294    linarith [h_quad]
 295
 296lemma Jlog_eq_zero_iff (t : ℝ) : Jlog t = 0 ↔ t = 0 := by
 297  constructor
 298  · intro h
 299    have hxpos : 0 < Real.exp t := Real.exp_pos t
 300    have hxne : Real.exp t ≠ 0 := ne_of_gt hxpos
 301    have hrepr := Jcost_eq_sq hxne
 302    rw [Jlog, hrepr] at h
 303    have hden_pos : 0 < 2 * Real.exp t := by
 304      apply mul_pos
 305      · norm_num
 306      · exact hxpos
 307    have hden_ne : 2 * Real.exp t ≠ 0 := ne_of_gt hden_pos
 308    rw [div_eq_zero_iff] at h
 309    cases h with
 310    | inl h_num =>
 311      have hexp1 : Real.exp t - 1 = 0 := by nlinarith [sq_nonneg (Real.exp t - 1)]
 312      have hexp_eq : Real.exp t = 1 := by linarith
 313      rw [Real.exp_eq_one_iff] at hexp_eq
 314      exact hexp_eq
 315    | inr h_den =>
 316      exact absurd h_den hden_ne
 317  · intro h
 318    rw [h]
 319    exact Jlog_zero
 320
 321
 322theorem EL_stationary_at_zero : deriv Jlog 0 = 0 := by
 323  simp
 324
 325theorem EL_global_min (t : ℝ) : Jlog 0 ≤ Jlog t := by
 326  simpa [Jlog_zero] using Jlog_nonneg t
 327
 328/-- From J(x) = 0 and x > 0, conclude x = 1. -/
 329lemma Jcost_zero_iff_one {x : ℝ} (hx : 0 < x) (h : Jcost x = 0) : x = 1 :=
 330  (Jcost_eq_zero_iff x hx).mp h
 331
 332/-! ## Triangle Inequality for J -/
 333
 334/-- J in terms of cosh: J(exp(t)) = cosh(t) - 1 -/
 335lemma Jcost_exp_cosh (t : ℝ) : Jcost (Real.exp t) = Real.cosh t - 1 :=
 336  Jlog_as_cosh t
 337
 338/-- The sqrt of 2*J gives |log|, which IS a metric. -/
 339noncomputable def Jmetric (x : ℝ) : ℝ := Real.sqrt (2 * Jcost x)
 340
 341/-- Jmetric(1) = 0 -/
 342lemma Jmetric_one : Jmetric 1 = 0 := by
 343  simp [Jmetric, Jcost_unit0]
 344
 345/-- Jmetric is symmetric: Jmetric(x) = Jmetric(1/x) -/
 346lemma Jmetric_symm {x : ℝ} (hx : 0 < x) : Jmetric x = Jmetric x⁻¹ := by
 347  simp only [Jmetric, Jcost_symm hx]
 348
 349/-- Jmetric is non-negative -/
 350lemma Jmetric_nonneg {x : ℝ} (_ : 0 < x) : 0 ≤ Jmetric x :=
 351  Real.sqrt_nonneg _
 352
 353/-- Key identity: 2(cosh(t) - 1) = 4sinh²(t/2)
 354
 355    Proof: cosh(2u) = cosh²(u) + sinh²(u), and cosh²(u) = 1 + sinh²(u).
 356    So cosh(2u) = 1 + 2sinh²(u), hence cosh(2u) - 1 = 2sinh²(u). -/
 357lemma cosh_minus_one_eq (t : ℝ) : 2 * (Real.cosh t - 1) = 4 * Real.sinh (t / 2) ^ 2 := by
 358  -- Use the double-angle formula: cosh(2u) = cosh²(u) + sinh²(u)
 359  have h := Real.cosh_two_mul (t / 2)
 360  -- h : cosh(2*(t/2)) = cosh²(t/2) + sinh²(t/2)
 361  simp only [two_mul, add_halves] at h
 362  -- So h : cosh(t) = cosh²(t/2) + sinh²(t/2)
 363  -- From cosh²(t/2) - sinh²(t/2) = 1, we get cosh²(t/2) = 1 + sinh²(t/2)
 364  have h2 := Real.cosh_sq_sub_sinh_sq (t / 2)
 365  have h3 : Real.cosh (t / 2) ^ 2 = 1 + Real.sinh (t / 2) ^ 2 := by linarith
 366  -- Substitute: cosh(t) = (1 + sinh²(t/2)) + sinh²(t/2) = 1 + 2sinh²(t/2)
 367  -- So cosh(t) - 1 = 2sinh²(t/2)
 368  calc 2 * (Real.cosh t - 1)
 369      = 2 * ((Real.cosh (t / 2) ^ 2 + Real.sinh (t / 2) ^ 2) - 1) := by rw [h]
 370    _ = 2 * (((1 + Real.sinh (t / 2) ^ 2) + Real.sinh (t / 2) ^ 2) - 1) := by rw [h3]
 371    _ = 2 * (2 * Real.sinh (t / 2) ^ 2) := by ring
 372    _ = 4 * Real.sinh (t / 2) ^ 2 := by ring
 373
 374/-- **THEOREM: Quadratic Lower Bound for cosh**
 375    cosh x ≥ 1 + x²/2 for all x.
 376
 377    Proof: From the definition cosh x = (e^x + e^(-x)) / 2 and the Taylor series,
 378    we have cosh x = 1 + x²/2 + x⁴/24 + ... where all terms are non-negative.
 379
 380    Alternative proof via convexity: cosh is convex (cosh'' = cosh > 0), and
 381    the tangent line at 0 gives cosh x ≥ cosh 0 + cosh'(0) * x = 1 + 0 = 1.
 382    The quadratic bound follows from cosh'' = cosh ≥ 1. -/
 383theorem cosh_quadratic_lower_bound (x : ℝ) : Real.cosh x ≥ 1 + x^2 / 2 := by
 384  -- Use the Taylor series expansion
 385  -- cosh x = ∑' n, x ^ (2 * n) / (2 * n)!
 386  -- The first two partial sums are: n=0 → 1, n=1 → 1 + x²/2
 387  -- Since all terms are non-negative, cosh x ≥ 1 + x²/2
 388  have h := Real.hasSum_cosh x
 389  -- Extract first two terms and show tail is non-negative
 390  have h_term0 : (fun n => x ^ (2 * n) / ↑(2 * n).factorial) 0 = 1 := by simp
 391  have h_term1 : (fun n => x ^ (2 * n) / ↑(2 * n).factorial) 1 = x^2 / 2 := by simp
 392  -- Each term x^(2n)/(2n)! is non-negative because even powers are non-negative
 393  have h_nn : ∀ n, 0 ≤ x ^ (2 * n) / ↑(2 * n).factorial := fun n => by
 394    apply div_nonneg
 395    · -- x^(2n) = (x^2)^n ≥ 0
 396      rw [pow_mul]
 397      exact pow_nonneg (sq_nonneg x) n
 398    · exact Nat.cast_nonneg _
 399  -- The sum is at least the sum of the first two terms
 400  have h_ge : Real.cosh x ≥ 1 + x^2 / 2 := by
 401    rw [← h.tsum_eq]
 402    calc ∑' n, x ^ (2 * n) / ↑(2 * n).factorial
 403        ≥ (x ^ (2 * 0) / ↑(2 * 0).factorial) + (x ^ (2 * 1) / ↑(2 * 1).factorial) := by
 404          have hs := h.summable
 405          have h01 : ({0, 1} : Finset ℕ).sum (fun n => x ^ (2 * n) / ↑(2 * n).factorial) ≤
 406                     ∑' n, x ^ (2 * n) / ↑(2 * n).factorial :=
 407            hs.sum_le_tsum _ (fun i _ => h_nn i)
 408          simp only [Finset.sum_pair (by decide : (0 : ℕ) ≠ 1)] at h01
 409          exact h01
 410      _ = 1 + x^2 / 2 := by simp
 411  exact h_ge
 412
 413/-- Jmetric in terms of sinh: Jmetric(exp(t)) = 2|sinh(t/2)| -/
 414lemma Jmetric_exp_sinh (t : ℝ) : Jmetric (Real.exp t) = 2 * |Real.sinh (t / 2)| := by
 415  unfold Jmetric
 416  rw [Jcost_exp_cosh, cosh_minus_one_eq]
 417  -- sqrt(4 * sinh²(t/2)) = sqrt(4) * |sinh(t/2)| = 2 * |sinh(t/2)|
 418  rw [show (4 : ℝ) * Real.sinh (t / 2) ^ 2 = (2 * Real.sinh (t / 2)) ^ 2 by ring]
 419  rw [Real.sqrt_sq_eq_abs]
 420  rw [abs_mul, abs_of_pos (by norm_num : (0 : ℝ) < 2)]
 421
 422/-! ### Note on Triangle Inequality
 423
 424The function `Jmetric(x) = sqrt(2 * Jcost(x))` does NOT satisfy the triangle inequality
 425under multiplication. Numerical counterexample:
 426- Jmetric(6) ≈ 2.04 > Jmetric(2) + Jmetric(3) ≈ 1.86
 427
 428This is expected: J-cost measures the "recognition strain" of a ratio, and strain
 429compounds superlinearly when multiplying far-from-unity ratios.
 430
 431The key inequality that DOES hold is the d'Alembert identity, which gives
 432`Jcost_submult`: J(xy) ≤ 2J(x) + 2J(y) + 2J(x)J(y). -/
 433
 434/-- **Standard Analysis**: Evaluation of Jmetric at specific points. -/
 435theorem Jmetric_val_6 : Jmetric 6 = Real.sqrt (25 / 6) := by
 436  unfold Jmetric Jcost
 437  norm_num
 438
 439theorem Jmetric_val_2 : Jmetric 2 = Real.sqrt (1 / 2) := by
 440  unfold Jmetric Jcost
 441  norm_num
 442
 443theorem Jmetric_val_3 : Jmetric 3 = Real.sqrt (4 / 3) := by
 444  unfold Jmetric Jcost
 445  norm_num
 446
 447theorem sqrt_triangle_violation : Real.sqrt (25 / 6) > Real.sqrt (1 / 2) + Real.sqrt (4 / 3) := by
 448  have h1 : 0 ≤ Real.sqrt (1 / 2) + Real.sqrt (4 / 3) := by positivity
 449  change Real.sqrt (1 / 2) + Real.sqrt (4 / 3) < Real.sqrt (25 / 6)
 450  rw [← Real.sqrt_sq h1]
 451  rw [Real.sqrt_lt_sqrt_iff (by positivity)]
 452  rw [add_sq, Real.sq_sqrt (by norm_num), Real.sq_sqrt (by norm_num)]
 453  rw [mul_assoc, ← Real.sqrt_mul (by norm_num)]
 454  norm_num
 455  -- Goal: 1 / 2 + 2 * (√2 / √3) + 4 / 3 < 25 / 6
 456  suffices 2 * (Real.sqrt 2 / Real.sqrt 3) < 7 / 3 by linarith
 457  have h_lhs : 2 * (Real.sqrt 2 / Real.sqrt 3) = Real.sqrt (8 / 3) := by
 458    rw [← Real.sqrt_div (by norm_num) 3]
 459    rw [show (2 : ℝ) = Real.sqrt 4 by norm_num]
 460    rw [← Real.sqrt_mul (by norm_num)]
 461    norm_num
 462  have h_rhs : (7 / 3 : ℝ) = Real.sqrt (49 / 9) := by
 463    rw [Real.sqrt_div (by norm_num) 9]
 464    norm_num
 465  rw [h_lhs, h_rhs]
 466  rw [Real.sqrt_lt_sqrt_iff (by positivity)]
 467  norm_num
 468
 469/-- **DEPRECATED**: The naive triangle inequality does NOT hold for Jmetric.
 470    Use `Jcost_submult` instead, which gives a valid submultiplicativity bound. -/
 471theorem Jmetric_triangle_FALSE {x y : ℝ} (_hx : 0 < x) (_hy : 0 < y) :
 472    ¬ (∀ a b : ℝ, 0 < a → 0 < b → Jmetric (a * b) ≤ Jmetric a + Jmetric b) := by
 473  intro h
 474  -- Counterexample: a = 2, b = 3
 475  let a : ℝ := 2
 476  let b : ℝ := 3
 477  have ha : 0 < a := by norm_num
 478  have hb : 0 < b := by norm_num
 479  have h_tri := h a b ha hb
 480  rw [show a * b = 6 by norm_num, Jmetric_val_6, Jmetric_val_2, Jmetric_val_3] at h_tri
 481  have h_viol := sqrt_triangle_violation
 482  linarith
 483
 484/-- **DEPRECATED**: The "weak triangle" J(xy) ≤ 2(J(x)+J(y)) + 2√(J(x)J(y)) is FALSE.
 485
 486    Counterexample: x = y = 10
 487    - J(100) ≈ 49.005
 488    - 2(J(10) + J(10)) + 2√(J(10)·J(10)) = 2(4.05 + 4.05) + 2·4.05 ≈ 24.3
 489    - 49.005 > 24.3
 490
 491    Use `Jcost_submult` instead: J(xy) ≤ 2J(x) + 2J(y) + 2J(x)J(y), which IS proved. -/
 492theorem Jcost_weak_triangle_FALSE :
 493    ¬ (∀ x y : ℝ, 0 < x → 0 < y →
 494      Jcost (x * y) ≤ 2 * (Jcost x + Jcost y) + 2 * Real.sqrt (Jcost x * Jcost y)) := by
 495  intro h
 496  -- Counterexample: x = y = 10, J(100) > 2(J(10)+J(10)) + 2*sqrt(J(10)*J(10))
 497  -- J(100) = (100 + 1/100)/2 - 1 = 49.005
 498  -- J(10) = (10 + 0.1)/2 - 1 = 4.05
 499  -- RHS = 2*(4.05 + 4.05) + 2*sqrt(4.05*4.05) = 16.2 + 8.1 = 24.3
 500  -- 49.005 > 24.3 is TRUE, not ≤, so the inequality fails
 501  have hc := h 10 10 (by norm_num) (by norm_num)
 502  -- The claim asserts ≤ but the counterexample shows >
 503  -- J(100) = 49.005, RHS = 24.3, so 49.005 > 24.3
 504  simp only [Jcost] at hc
 505  nlinarith [sq_nonneg (10 : ℝ), Real.sqrt_nonneg (Jcost 10 * Jcost 10)]
 506
 507/-- The d'Alembert identity: J(xy) + J(x/y) = 2J(x) + 2J(y) + 2J(x)J(y) -/
 508theorem dalembert_identity {x y : ℝ} (hx : 0 < x) (hy : 0 < y) :
 509    Jcost (x * y) + Jcost (x / y) = 2 * Jcost x + 2 * Jcost y + 2 * Jcost x * Jcost y := by
 510  have hx0 : x ≠ 0 := ne_of_gt hx
 511  have hy0 : y ≠ 0 := ne_of_gt hy
 512  have hxy : x * y ≠ 0 := mul_ne_zero hx0 hy0
 513  have hxdy : x / y ≠ 0 := div_ne_zero hx0 hy0
 514  simp only [Jcost_eq_sq hxy, Jcost_eq_sq hxdy, Jcost_eq_sq hx0, Jcost_eq_sq hy0]
 515  field_simp
 516  ring
 517
 518/-- From d'Alembert: J(xy) ≤ 2J(x) + 2J(y) + 2J(x)J(y) since J(x/y) ≥ 0 -/
 519lemma Jcost_submult {x y : ℝ} (hx : 0 < x) (hy : 0 < y) :
 520    Jcost (x * y) ≤ 2 * Jcost x + 2 * Jcost y + 2 * Jcost x * Jcost y := by
 521  have h := dalembert_identity hx hy
 522  have hnonneg : 0 ≤ Jcost (x / y) := Jcost_nonneg (div_pos hx hy)
 523  linarith
 524
 525/-- Bound on J product: J(xy) ≤ 2*(1 + J(x))*(1 + J(y)) - 2
 526    This follows from d'Alembert since (1+J(x))(1+J(y)) = 1 + J(x) + J(y) + J(x)J(y) -/
 527lemma Jcost_prod_bound {x y : ℝ} (hx : 0 < x) (hy : 0 < y) :
 528    Jcost (x * y) ≤ 2 * (1 + Jcost x) * (1 + Jcost y) - 2 := by
 529  have h := Jcost_submult hx hy
 530  -- 2*(1+J(x))*(1+J(y)) - 2 = 2*(1 + J(x) + J(y) + J(x)*J(y)) - 2
 531  --                        = 2 + 2*J(x) + 2*J(y) + 2*J(x)*J(y) - 2
 532  --                        = 2*J(x) + 2*J(y) + 2*J(x)*J(y)
 533  calc Jcost (x * y) ≤ 2 * Jcost x + 2 * Jcost y + 2 * Jcost x * Jcost y := h
 534    _ = 2 * (1 + Jcost x) * (1 + Jcost y) - 2 := by ring
 535
 536/-! ## Small-strain Taylor expansion -/
 537
 538lemma Jcost_one_plus_eps_quadratic (ε : ℝ) (hε : |ε| ≤ (1 : ℝ) / 2) :
 539    ∃ (c : ℝ), Jcost (1 + ε) = ε ^ 2 / 2 + c * ε ^ 3 ∧ |c| ≤ 2 := by
 540  classical
 541  have hbounds := abs_le.mp hε
 542  have hpos : 0 < 1 + ε := by
 543    have : -(1 : ℝ) / 2 ≤ ε := by simpa [neg_div] using hbounds.1
 544    linarith
 545  have hne : 1 + ε ≠ 0 := ne_of_gt hpos
 546  have hcalc : Jcost (1 + ε) = ε ^ 2 / (2 * (1 + ε)) := by
 547    simpa [pow_two, add_comm, add_left_comm, add_assoc, sub_eq_add_neg]
 548      using (Jcost_eq_sq hne)
 549  let c : ℝ := -1 / (2 * (1 + ε))
 550  have h_eq :
 551      Jcost (1 + ε) = ε ^ 2 / 2 + c * ε ^ 3 := by
 552    have : ε ^ 2 / (2 * (1 + ε)) = ε ^ 2 / 2 + (-1 / (2 * (1 + ε))) * ε ^ 3 := by
 553      field_simp [hne]
 554      ring
 555    simpa [hcalc, c] using this
 556  have hden_pos : 0 < 2 * (1 + ε) := by nlinarith [hpos]
 557  have habs : |c| = 1 / (2 * (1 + ε)) := by
 558    simp [c, div_eq_mul_inv, abs_mul, abs_inv, abs_of_pos hpos]
 559  -- Use 1/(2(1+ε)) ≤ 1 from (1+ε) ≥ 1/2
 560  have hone_le : (1 : ℝ) ≤ 2 * (1 + ε) := by
 561    have : (1 / 2 : ℝ) ≤ 1 + ε := by linarith
 562    simpa [two_mul] using mul_le_mul_of_nonneg_left this (by norm_num : (0 : ℝ) ≤ 2)
 563  have hdiv_le_one : 1 / (2 * (1 + ε)) ≤ 1 := by
 564    have hpos1 : 0 < (1 : ℝ) := by norm_num
 565    simpa [one_div] using one_div_le_one_div_of_le hpos1 hone_le
 566  have hbound : |c| ≤ 2 := by
 567    have h1 : |c| ≤ 1 := by simpa [habs] using hdiv_le_one
 568    have h12 : (1 : ℝ) ≤ 2 := by norm_num
 569    exact le_trans h1 h12
 570  exact ⟨c, h_eq, hbound⟩
 571
 572lemma Jcost_small_strain_bound (ε : ℝ) (hε : |ε| ≤ (1 : ℝ) / 10) :
 573    |Jcost (1 + ε) - ε ^ 2 / 2| ≤ ε ^ 2 / 10 := by
 574  classical
 575  have hbounds := abs_le.mp hε
 576  have hpos : 0 < 1 + ε := by
 577    have : -(1 : ℝ) / 10 ≤ ε := by simpa [neg_div] using hbounds.1
 578    linarith
 579  have hne : 1 + ε ≠ 0 := ne_of_gt hpos
 580  have hform : Jcost (1 + ε) = ε ^ 2 / (2 * (1 + ε)) := by
 581    simpa [pow_two, add_comm, add_left_comm, add_assoc, sub_eq_add_neg]
 582      using (Jcost_eq_sq hne)
 583  have hden_pos : 0 < 2 * (1 + ε) := by nlinarith [hpos]
 584  -- Exact difference and absolute value
 585  have h1 : Jcost (1 + ε) - ε ^ 2 / 2
 586      = ε ^ 2 / (2 * (1 + ε)) - ε ^ 2 / 2 := by
 587    simp [hform]
 588  have hx : (2 : ℝ) * (1 + ε) ≠ 0 := mul_ne_zero two_ne_zero hne
 589  have h2 : ε ^ 2 / (2 * (1 + ε)) - ε ^ 2 / 2 = -ε ^ 3 / (2 * (1 + ε)) := by
 590    field_simp [hx]
 591    ring
 592  have hdiff : Jcost (1 + ε) - ε ^ 2 / 2 = -ε ^ 3 / (2 * (1 + ε)) := h1.trans h2
 593  have habs : |Jcost (1 + ε) - ε ^ 2 / 2| = |ε| ^ 3 / (2 * (1 + ε)) := by
 594    have hposden : 0 < 2 * (1 + ε) := hden_pos
 595    simpa [abs_div, abs_neg, abs_pow, abs_of_pos hposden] using
 596      congrArg (fun z => |z|) hdiff
 597  -- Now bound using |ε|/(2(1+ε)) ≤ 1/18 from below
 598  have hx_lower : (9 : ℝ) / 10 ≤ 1 + ε := by linarith [show -(1 : ℝ) / 10 ≤ ε from by simpa [neg_div] using hbounds.1]
 599  have hx_pos : 0 < (9 : ℝ) / 10 := by norm_num
 600  have hx_inv : 1 / (1 + ε) ≤ (10 : ℝ) / 9 := by
 601    have := one_div_le_one_div_of_le hx_pos hx_lower
 602    simpa using this
 603  have hrec_bound : 1 / (2 * (1 + ε)) ≤ (5 : ℝ) / 9 := by
 604    have hmul : (1 / 2 : ℝ) * (1 / (1 + ε)) ≤ (1 / 2) * ((10 : ℝ) / 9) :=
 605      mul_le_mul_of_nonneg_left hx_inv (by norm_num)
 606    have hleft : 1 / (2 * (1 + ε)) = (1 / 2) * (1 / (1 + ε)) := by
 607      simp [div_eq_mul_inv, mul_comm]
 608    have hright : (5 : ℝ) / 9 = (1 / 2) * ((10 : ℝ) / 9) := by norm_num
 609    simpa [hleft, hright] using hmul
 610  have hrec_nonneg : 0 ≤ 1 / (2 * (1 + ε)) := by
 611    have : 0 ≤ 2 * (1 + ε) := le_of_lt (by nlinarith [hpos])
 612    exact one_div_nonneg.mpr this
 613  have hA : |ε| / (2 * (1 + ε)) ≤ (1 : ℝ) / 10 * (1 / (2 * (1 + ε))) := by
 614    simpa [div_eq_mul_inv, mul_comm, mul_left_comm, mul_assoc]
 615      using mul_le_mul_of_nonneg_right hε hrec_nonneg
 616  have hB : (1 : ℝ) / 10 * (1 / (2 * (1 + ε))) ≤ (1 : ℝ) / 18 := by
 617    have hmul := mul_le_mul_of_nonneg_left hrec_bound (by norm_num : (0 : ℝ) ≤ (1 : ℝ) / 10)
 618    have hright : (1 : ℝ) / 18 = (1 : ℝ) / 10 * ((5 : ℝ) / 9) := by norm_num
 619    simpa [hright] using hmul
 620  have hfrac : |ε| / (2 * (1 + ε)) ≤ (1 : ℝ) / 18 := hA.trans hB
 621  -- Conclude
 622  have hineq : |Jcost (1 + ε) - ε ^ 2 / 2| ≤ |ε| ^ 2 / 18 := by
 623    have hnn : 0 ≤ |ε| ^ 2 := by
 624      have := sq_nonneg (|ε|); simpa [pow_two] using this
 625    have hmul := mul_le_mul_of_nonneg_left hfrac hnn
 626    calc
 627      |Jcost (1 + ε) - ε ^ 2 / 2| = |ε| ^ 3 / (2 * (1 + ε)) := by simp [habs]
 628      _ ≤ |ε| ^ 2 * (1 / 18) := by
 629        simpa [pow_succ, pow_two, mul_comm, mul_left_comm, mul_assoc, div_eq_mul_inv] using hmul
 630      _ = |ε| ^ 2 / 18 := by simp [div_eq_mul_inv]
 631  have hratio : (1 : ℝ) / 18 ≤ 1 / 10 := by norm_num
 632  have hsq : |ε| ^ 2 = ε ^ 2 := by
 633    have h1 : |ε| * |ε| = |ε * ε| := by simp [abs_mul]
 634    calc
 635      |ε| ^ 2 = |ε| * |ε| := by simp [pow_two]
 636      _ = |ε * ε| := h1
 637      _ = |ε ^ 2| := by simp [pow_two]
 638      _ = ε ^ 2 := by simp [abs_of_nonneg (sq_nonneg ε)]
 639  have hcompare : |ε| ^ 2 / 18 ≤ ε ^ 2 / 10 := by
 640    have := mul_le_mul_of_nonneg_left hratio (by exact sq_nonneg ε)
 641    simpa [hsq, pow_two] using this
 642  exact (hineq.trans hcompare)
 643
 644/-- Alias: Jcost_reciprocal = Jcost_symm (for parallel-work compat). -/
 645lemma Jcost_reciprocal {x : ℝ} (hx : 0 < x) : Jcost x = Jcost x⁻¹ := Jcost_symm hx
 646
 647end Cost
 648end IndisputableMonolith
 649
source mirrored from github.com/jonwashburn/shape-of-logic