{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:2FXG3G62O4RSG5BAGWT4CI4T65","short_pith_number":"pith:2FXG3G62","canonical_record":{"source":{"id":"2605.11723","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-12T08:08:33Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"77d7cc7414059063102e3e5e96fe5eb7b57bdde140cb94450c41d496e33a7a51","abstract_canon_sha256":"fd99ad0981f7740e96478469d14ece693c9119a2593312aac538b19a4c329129"},"schema_version":"1.0"},"canonical_sha256":"d16e6d9bda772323742035a7c12393f75c84d9ed59c5152933eb1713be7c322d","source":{"kind":"arxiv","id":"2605.11723","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.11723","created_at":"2026-05-29T02:05:46Z"},{"alias_kind":"arxiv_version","alias_value":"2605.11723v2","created_at":"2026-05-29T02:05:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.11723","created_at":"2026-05-29T02:05:46Z"},{"alias_kind":"pith_short_12","alias_value":"2FXG3G62O4RS","created_at":"2026-05-29T02:05:46Z"},{"alias_kind":"pith_short_16","alias_value":"2FXG3G62O4RSG5BA","created_at":"2026-05-29T02:05:46Z"},{"alias_kind":"pith_short_8","alias_value":"2FXG3G62","created_at":"2026-05-29T02:05:46Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:2FXG3G62O4RSG5BAGWT4CI4T65","target":"record","payload":{"canonical_record":{"source":{"id":"2605.11723","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-12T08:08:33Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"77d7cc7414059063102e3e5e96fe5eb7b57bdde140cb94450c41d496e33a7a51","abstract_canon_sha256":"fd99ad0981f7740e96478469d14ece693c9119a2593312aac538b19a4c329129"},"schema_version":"1.0"},"canonical_sha256":"d16e6d9bda772323742035a7c12393f75c84d9ed59c5152933eb1713be7c322d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T02:05:46.618919Z","signature_b64":"sHDuSNQaFTHBf2pT0TX9ojmdhSwMtiFlLIDhciwZP0PRm/z+d4mkhaRhRMDFKYj+k3NMY3D1u7rlyjg3osC/Bg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d16e6d9bda772323742035a7c12393f75c84d9ed59c5152933eb1713be7c322d","last_reissued_at":"2026-05-29T02:05:46.618181Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T02:05:46.618181Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.11723","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-29T02:05:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"smY4Quf6GGg3Mid8Kk/HnTEtxImaZIiR3g3DsZrZce8AXWdp0F+f0mPuCvuBmSv+Yudut868VDb8X256c/DMCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T15:48:30.267832Z"},"content_sha256":"c8e1767830daef56a209e07065208424ccf2c26260214b6d7778cd0048cba68b","schema_version":"1.0","event_id":"sha256:c8e1767830daef56a209e07065208424ccf2c26260214b6d7778cd0048cba68b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:2FXG3G62O4RSG5BAGWT4CI4T65","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"CaC: Advancing Video Reward Models via Hierarchical Spatiotemporal Concentrating","license":"http://creativecommons.org/licenses/by/4.0/","headline":"CaC shows that a hierarchical temporal-then-spatial scan lets vision-language models detect subtle video anomalies more reliably for use as rewards.","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Boheng Zhang, Chunyu Lin, Dewen Fan, Fan Yang, Fei Zuo, Guosheng Lin, Haonan Fan, Honglie Wang, Huaiqing Wang, Huan Ouyang, Jia Sun, Jiuzhou Lin, Jiyuan Wang, Tingting Gao, Yiyang Fan, Yongrui Heng, Zhenlong Yuan, Zijun Li","submitted_at":"2026-05-12T08:08:33Z","abstract_excerpt":"In this paper, we propose Concentrate and Concentrate (CaC), a coarse-to-fine anomaly reward model based on Vision-Language Models. During inference, it first conducts a global temporal scan to anchor anomalous time windows, then performs fine-grained spatial grounding within the localized interval, and finally derives robust judgments via structured spatiotemporal Chain-of-Thought reasoning. To equip the model with these capabilities, we construct the first large-scale generated video anomaly dataset with per-frame bounding-box annotations, temporal anomaly windows, and fine-grained attributi"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"CaC can stably concentrate on subtle anomalies, achieving a 25.7% accuracy improvement on fine-grained anomaly benchmarks and, when used as a reward signal, reduces generated-video anomalies by 11.7% while improving overall video quality.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the authors' newly constructed generated-video anomaly dataset is sufficiently representative of real deployment distributions and that the added Temporal and Spatial IoU rewards in GRPO training produce generalizable improvements rather than dataset-specific fitting.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"CaC is a hierarchical spatiotemporal concentrating reward model for video anomalies that reports 25.7% accuracy gains on fine-grained benchmarks and 11.7% anomaly reduction in generated videos via a new dataset and GRPO training with temporal/spatial IoU rewards.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"CaC shows that a hierarchical temporal-then-spatial scan lets vision-language models detect subtle video anomalies more reliably for use as rewards.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"3f0ffae2e8f3b0b1f06381e2f981ec2c694d01bce536ea69aa6cb589db1a4dba"},"source":{"id":"2605.11723","kind":"arxiv","version":2},"verdict":{"id":"f18c56b4-c8d1-4b53-bda0-3a42092ff5ef","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-13T05:56:03.644666Z","strongest_claim":"CaC can stably concentrate on subtle anomalies, achieving a 25.7% accuracy improvement on fine-grained anomaly benchmarks and, when used as a reward signal, reduces generated-video anomalies by 11.7% while improving overall video quality.","one_line_summary":"CaC is a hierarchical spatiotemporal concentrating reward model for video anomalies that reports 25.7% accuracy gains on fine-grained benchmarks and 11.7% anomaly reduction in generated videos via a new dataset and GRPO training with temporal/spatial IoU rewards.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the authors' newly constructed generated-video anomaly dataset is sufficiently representative of real deployment distributions and that the added Temporal and Spatial IoU rewards in GRPO training produce generalizable improvements rather than dataset-specific fitting.","pith_extraction_headline":"CaC shows that a hierarchical temporal-then-spatial scan lets vision-language models detect subtle video anomalies more reliably for use as rewards."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.11723/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_title_agreement","ran_at":"2026-05-20T23:31:31.958396Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-20T13:47:15.592145Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-20T03:42:00.439063Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T11:39:12.897039Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"192f9c482ad9e4a54f0455ff3e3d7617fa6ff85d73ab27e3ac0817f1c5c1a3cb"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"5c4d2e3acb0d79a08a38bdce028014ed0bb144e975204ccb5ce3c9f1ab6d0cbd"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"f18c56b4-c8d1-4b53-bda0-3a42092ff5ef"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-29T02:05:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"141ysuTUgPSkjuTkuU/Cz4kGKHHs3Hl99XPWI3rqnI3twpqjXU9EPQzq7BrZUmsuWmuX6SEqSAT9hxdmdp6ABw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T15:48:30.268411Z"},"content_sha256":"0526639826c8f97a5596aecef11e1611da86161f75bdf46a58547ae07603ecdf","schema_version":"1.0","event_id":"sha256:0526639826c8f97a5596aecef11e1611da86161f75bdf46a58547ae07603ecdf"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/2FXG3G62O4RSG5BAGWT4CI4T65/bundle.json","state_url":"https://pith.science/pith/2FXG3G62O4RSG5BAGWT4CI4T65/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/2FXG3G62O4RSG5BAGWT4CI4T65/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T15:48:30Z","links":{"resolver":"https://pith.science/pith/2FXG3G62O4RSG5BAGWT4CI4T65","bundle":"https://pith.science/pith/2FXG3G62O4RSG5BAGWT4CI4T65/bundle.json","state":"https://pith.science/pith/2FXG3G62O4RSG5BAGWT4CI4T65/state.json","well_known_bundle":"https://pith.science/.well-known/pith/2FXG3G62O4RSG5BAGWT4CI4T65/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:2FXG3G62O4RSG5BAGWT4CI4T65","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"fd99ad0981f7740e96478469d14ece693c9119a2593312aac538b19a4c329129","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-12T08:08:33Z","title_canon_sha256":"77d7cc7414059063102e3e5e96fe5eb7b57bdde140cb94450c41d496e33a7a51"},"schema_version":"1.0","source":{"id":"2605.11723","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.11723","created_at":"2026-05-29T02:05:46Z"},{"alias_kind":"arxiv_version","alias_value":"2605.11723v2","created_at":"2026-05-29T02:05:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.11723","created_at":"2026-05-29T02:05:46Z"},{"alias_kind":"pith_short_12","alias_value":"2FXG3G62O4RS","created_at":"2026-05-29T02:05:46Z"},{"alias_kind":"pith_short_16","alias_value":"2FXG3G62O4RSG5BA","created_at":"2026-05-29T02:05:46Z"},{"alias_kind":"pith_short_8","alias_value":"2FXG3G62","created_at":"2026-05-29T02:05:46Z"}],"graph_snapshots":[{"event_id":"sha256:0526639826c8f97a5596aecef11e1611da86161f75bdf46a58547ae07603ecdf","target":"graph","created_at":"2026-05-29T02:05:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"CaC can stably concentrate on subtle anomalies, achieving a 25.7% accuracy improvement on fine-grained anomaly benchmarks and, when used as a reward signal, reduces generated-video anomalies by 11.7% while improving overall video quality."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the authors' newly constructed generated-video anomaly dataset is sufficiently representative of real deployment distributions and that the added Temporal and Spatial IoU rewards in GRPO training produce generalizable improvements rather than dataset-specific fitting."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"CaC is a hierarchical spatiotemporal concentrating reward model for video anomalies that reports 25.7% accuracy gains on fine-grained benchmarks and 11.7% anomaly reduction in generated videos via a new dataset and GRPO training with temporal/spatial IoU rewards."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"CaC shows that a hierarchical temporal-then-spatial scan lets vision-language models detect subtle video anomalies more reliably for use as rewards."}],"snapshot_sha256":"3f0ffae2e8f3b0b1f06381e2f981ec2c694d01bce536ea69aa6cb589db1a4dba"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"5c4d2e3acb0d79a08a38bdce028014ed0bb144e975204ccb5ce3c9f1ab6d0cbd"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-20T23:31:31.958396Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-20T13:47:15.592145Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-20T03:42:00.439063Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T11:39:12.897039Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.11723/integrity.json","findings":[],"snapshot_sha256":"192f9c482ad9e4a54f0455ff3e3d7617fa6ff85d73ab27e3ac0817f1c5c1a3cb","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"In this paper, we propose Concentrate and Concentrate (CaC), a coarse-to-fine anomaly reward model based on Vision-Language Models. During inference, it first conducts a global temporal scan to anchor anomalous time windows, then performs fine-grained spatial grounding within the localized interval, and finally derives robust judgments via structured spatiotemporal Chain-of-Thought reasoning. To equip the model with these capabilities, we construct the first large-scale generated video anomaly dataset with per-frame bounding-box annotations, temporal anomaly windows, and fine-grained attributi","authors_text":"Boheng Zhang, Chunyu Lin, Dewen Fan, Fan Yang, Fei Zuo, Guosheng Lin, Haonan Fan, Honglie Wang, Huaiqing Wang, Huan Ouyang, Jia Sun, Jiuzhou Lin, Jiyuan Wang, Tingting Gao, Yiyang Fan, Yongrui Heng, Zhenlong Yuan, Zijun Li","cross_cats":["cs.AI"],"headline":"CaC shows that a hierarchical temporal-then-spatial scan lets vision-language models detect subtle video anomalies more reliably for use as rewards.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-12T08:08:33Z","title":"CaC: Advancing Video Reward Models via Hierarchical Spatiotemporal Concentrating"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.11723","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-13T05:56:03.644666Z","id":"f18c56b4-c8d1-4b53-bda0-3a42092ff5ef","model_set":{"reader":"grok-4.3"},"one_line_summary":"CaC is a hierarchical spatiotemporal concentrating reward model for video anomalies that reports 25.7% accuracy gains on fine-grained benchmarks and 11.7% anomaly reduction in generated videos via a new dataset and GRPO training with temporal/spatial IoU rewards.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"CaC shows that a hierarchical temporal-then-spatial scan lets vision-language models detect subtle video anomalies more reliably for use as rewards.","strongest_claim":"CaC can stably concentrate on subtle anomalies, achieving a 25.7% accuracy improvement on fine-grained anomaly benchmarks and, when used as a reward signal, reduces generated-video anomalies by 11.7% while improving overall video quality.","weakest_assumption":"That the authors' newly constructed generated-video anomaly dataset is sufficiently representative of real deployment distributions and that the added Temporal and Spatial IoU rewards in GRPO training produce generalizable improvements rather than dataset-specific fitting."}},"verdict_id":"f18c56b4-c8d1-4b53-bda0-3a42092ff5ef"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c8e1767830daef56a209e07065208424ccf2c26260214b6d7778cd0048cba68b","target":"record","created_at":"2026-05-29T02:05:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"fd99ad0981f7740e96478469d14ece693c9119a2593312aac538b19a4c329129","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-12T08:08:33Z","title_canon_sha256":"77d7cc7414059063102e3e5e96fe5eb7b57bdde140cb94450c41d496e33a7a51"},"schema_version":"1.0","source":{"id":"2605.11723","kind":"arxiv","version":2}},"canonical_sha256":"d16e6d9bda772323742035a7c12393f75c84d9ed59c5152933eb1713be7c322d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d16e6d9bda772323742035a7c12393f75c84d9ed59c5152933eb1713be7c322d","first_computed_at":"2026-05-29T02:05:46.618181Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-29T02:05:46.618181Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"sHDuSNQaFTHBf2pT0TX9ojmdhSwMtiFlLIDhciwZP0PRm/z+d4mkhaRhRMDFKYj+k3NMY3D1u7rlyjg3osC/Bg==","signature_status":"signed_v1","signed_at":"2026-05-29T02:05:46.618919Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.11723","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c8e1767830daef56a209e07065208424ccf2c26260214b6d7778cd0048cba68b","sha256:0526639826c8f97a5596aecef11e1611da86161f75bdf46a58547ae07603ecdf"],"state_sha256":"ef3e15f0b69116b7029708ae52b83b2d17d409cecfa2ee698b5a1a3b6f63805b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"IMS3DoI9nqGroD4W0Hpet65tvV4bSpRqERay9XfKoQeW4mqICvE0Aq2pI3DBINC26zVDmoWINY2ZK9a1irZfBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T15:48:30.270835Z","bundle_sha256":"e8bf645a6eb7c26b298d4d24602a45cc6f77262e2c87766479373b10d41b8c6d"}}