{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:35Y2AJGVRPCVCPXENSTKMQCKUQ","short_pith_number":"pith:35Y2AJGV","schema_version":"1.0","canonical_sha256":"df71a024d58bc5513ee46ca6a6404aa40903d3aa4e58730dddd29c60a2f22e0f","source":{"kind":"arxiv","id":"2503.08679","version":5},"attestation_state":"computed","paper":{"title":"Chain-of-Thought Reasoning In The Wild Is Not Always Faithful","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.LG"],"primary_cat":"cs.AI","authors_text":"Arthur Conmy, Iv\\'an Arcuschin, Jett Janiak, Neel Nanda, Robert Krzyzanowski, Senthooran Rajamanoharan","submitted_at":"2025-03-11T17:56:30Z","abstract_excerpt":"Recent studies indicate that when faced with explicit biases in prompts, models often omit mentioning these biases in their Chain-of-Thought (CoT) output, revealing that verbalized reasoning can give an incorrect picture of how models arrive at conclusions (unfaithfulness). In this work, we show that unfaithful CoT also occurs on naturally worded, non-adversarial prompts without adding artificial biases or editing model outputs. We find that when separately presented with the questions \"Is X bigger than Y?\" and \"Is Y bigger than X?\", models sometimes produce superficially coherent arguments to"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2503.08679","kind":"arxiv","version":5},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-03-11T17:56:30Z","cross_cats_sorted":["cs.CL","cs.LG"],"title_canon_sha256":"6d7630c8408d438a7426dd9e6fbdb0e41c6f7c8c2074dadbf79737ed965447b4","abstract_canon_sha256":"aee0f7719fda4e10ee3eb2c57c02c4913041f33638c1b2ace858d9613e850bb0"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-01T02:03:19.841886Z","signature_b64":"PpDE52KIb6JO09V8IhIlKAIFSibGmYqSz/6+2OBj+okNQmZ/n1R8DYNGIZtK/FFlPRQTQWmIYqRB+WxGF0ePDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"df71a024d58bc5513ee46ca6a6404aa40903d3aa4e58730dddd29c60a2f22e0f","last_reissued_at":"2026-06-01T02:03:19.840669Z","signature_status":"signed_v1","first_computed_at":"2026-06-01T02:03:19.840669Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Chain-of-Thought Reasoning In The Wild Is Not Always Faithful","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.LG"],"primary_cat":"cs.AI","authors_text":"Arthur Conmy, Iv\\'an Arcuschin, Jett Janiak, Neel Nanda, Robert Krzyzanowski, Senthooran Rajamanoharan","submitted_at":"2025-03-11T17:56:30Z","abstract_excerpt":"Recent studies indicate that when faced with explicit biases in prompts, models often omit mentioning these biases in their Chain-of-Thought (CoT) output, revealing that verbalized reasoning can give an incorrect picture of how models arrive at conclusions (unfaithfulness). In this work, we show that unfaithful CoT also occurs on naturally worded, non-adversarial prompts without adding artificial biases or editing model outputs. We find that when separately presented with the questions \"Is X bigger than Y?\" and \"Is Y bigger than X?\", models sometimes produce superficially coherent arguments to"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2503.08679","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2503.08679/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2503.08679","created_at":"2026-06-01T02:03:19.840848+00:00"},{"alias_kind":"arxiv_version","alias_value":"2503.08679v5","created_at":"2026-06-01T02:03:19.840848+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2503.08679","created_at":"2026-06-01T02:03:19.840848+00:00"},{"alias_kind":"pith_short_12","alias_value":"35Y2AJGVRPCV","created_at":"2026-06-01T02:03:19.840848+00:00"},{"alias_kind":"pith_short_16","alias_value":"35Y2AJGVRPCVCPXE","created_at":"2026-06-01T02:03:19.840848+00:00"},{"alias_kind":"pith_short_8","alias_value":"35Y2AJGV","created_at":"2026-06-01T02:03:19.840848+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":20,"internal_anchor_count":20,"sample":[{"citing_arxiv_id":"2605.22870","citing_title":"The Readout Shortcut: Positional Number Copying Dominates Arithmetic CoT Readout in Small Language Models","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2602.04003","citing_title":"When AI Persuades: Adversarial Explanation Attacks on Human Trust in AI-Assisted Decision Making","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2604.13392","citing_title":"ReSS: Learning Reasoning Models for Tabular Data Prediction via Symbolic Scaffold","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10930","citing_title":"Evaluating the False Trust Engendered by LLM Explanations","ref_index":52,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19228","citing_title":"Diagnosing Multi-step Reasoning Failures in Black-box LLMs via Stepwise Confidence Attribution","ref_index":38,"is_internal_anchor":true},{"citing_arxiv_id":"2506.19807","citing_title":"KnowRL: Exploring Knowledgeable Reinforcement Learning for Factuality","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2507.09788","citing_title":"TinyTroupe: An LLM-powered Multiagent Persona Simulation Toolkit","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2510.24941","citing_title":"Can Aha Moments Be Fake? Towards Quantifying Decorative and True Thinking in Chain-of-Thought","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2509.08827","citing_title":"A Survey of Reinforcement Learning for Large Reasoning Models","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2511.17069","citing_title":"Interpretability from the Ground Up: Stakeholder-Centric Design of Automated Scoring in Educational Assessments","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2504.21318","citing_title":"Phi-4-reasoning Technical Report","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2510.25741","citing_title":"Scaling Latent Reasoning via Looped Language Models","ref_index":74,"is_internal_anchor":true},{"citing_arxiv_id":"2506.14245","citing_title":"Reinforcement Learning with Verifiable Rewards Implicitly Incentivizes Correct Reasoning in Base LLMs","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11746","citing_title":"When Reasoning Traces Become Performative: Step-Level Evidence that Chain-of-Thought Is an Imperfect Oversight Channel","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11388","citing_title":"Deep Reasoning in General Purpose Agents via Structured Meta-Cognition","ref_index":88,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10930","citing_title":"Evaluating the False Trust Engendered by LLM Explanations","ref_index":52,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07353","citing_title":"Confidence-Aware Alignment Makes Reasoning LLMs More Reliable","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2604.13392","citing_title":"ReSS: Learning Reasoning Models for Tabular Data Prediction via Symbolic Scaffold","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2604.15726","citing_title":"LLM Reasoning Is Latent, Not the Chain of Thought","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2605.01048","citing_title":"Compared to What? Baselines and Metrics for Counterfactual Prompting","ref_index":47,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/35Y2AJGVRPCVCPXENSTKMQCKUQ","json":"https://pith.science/pith/35Y2AJGVRPCVCPXENSTKMQCKUQ.json","graph_json":"https://pith.science/api/pith-number/35Y2AJGVRPCVCPXENSTKMQCKUQ/graph.json","events_json":"https://pith.science/api/pith-number/35Y2AJGVRPCVCPXENSTKMQCKUQ/events.json","paper":"https://pith.science/paper/35Y2AJGV"},"agent_actions":{"view_html":"https://pith.science/pith/35Y2AJGVRPCVCPXENSTKMQCKUQ","download_json":"https://pith.science/pith/35Y2AJGVRPCVCPXENSTKMQCKUQ.json","view_paper":"https://pith.science/paper/35Y2AJGV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2503.08679&json=true","fetch_graph":"https://pith.science/api/pith-number/35Y2AJGVRPCVCPXENSTKMQCKUQ/graph.json","fetch_events":"https://pith.science/api/pith-number/35Y2AJGVRPCVCPXENSTKMQCKUQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/35Y2AJGVRPCVCPXENSTKMQCKUQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/35Y2AJGVRPCVCPXENSTKMQCKUQ/action/storage_attestation","attest_author":"https://pith.science/pith/35Y2AJGVRPCVCPXENSTKMQCKUQ/action/author_attestation","sign_citation":"https://pith.science/pith/35Y2AJGVRPCVCPXENSTKMQCKUQ/action/citation_signature","submit_replication":"https://pith.science/pith/35Y2AJGVRPCVCPXENSTKMQCKUQ/action/replication_record"}},"created_at":"2026-06-01T02:03:19.840848+00:00","updated_at":"2026-06-01T02:03:19.840848+00:00"}