{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:DWHWGCVL76ID7DPJSH3QLZ7EDB","short_pith_number":"pith:DWHWGCVL","schema_version":"1.0","canonical_sha256":"1d8f630aabff903f8de991f705e7e41845787f03747d804b4dbc8a983caf075e","source":{"kind":"arxiv","id":"1612.03928","version":3},"attestation_state":"computed","paper":{"title":"Paying More Attention to Attention: Improving the Performance of Convolutional Neural Networks via Attention Transfer","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Nikos Komodakis, Sergey Zagoruyko","submitted_at":"2016-12-12T21:15:57Z","abstract_excerpt":"Attention plays a critical role in human visual experience. Furthermore, it has recently been demonstrated that attention can also play an important role in the context of applying artificial neural networks to a variety of tasks from fields such as computer vision and NLP. In this work we show that, by properly defining attention for convolutional neural networks, we can actually use this type of information in order to significantly improve the performance of a student CNN network by forcing it to mimic the attention maps of a powerful teacher network. To that end, we propose several novel m"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1612.03928","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-12-12T21:15:57Z","cross_cats_sorted":[],"title_canon_sha256":"06159371d76d020a8baf75178001776f1ed8641dae9a9c7cedb137bff183cc53","abstract_canon_sha256":"b471bce050db0ceb0667f37d266363af9b80c55a4317d95d20ffe582a392cc25"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:50:56.241820Z","signature_b64":"gKb8fjV4Q3fDBNO0yj4ouK+BdGnzH+W0iU40TqUOARiknkYhuP3mPIBjxMxgJE8wAifcl57MiRGz8cGqcTl9AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1d8f630aabff903f8de991f705e7e41845787f03747d804b4dbc8a983caf075e","last_reissued_at":"2026-05-18T00:50:56.241221Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:50:56.241221Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Paying More Attention to Attention: Improving the Performance of Convolutional Neural Networks via Attention Transfer","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Nikos Komodakis, Sergey Zagoruyko","submitted_at":"2016-12-12T21:15:57Z","abstract_excerpt":"Attention plays a critical role in human visual experience. Furthermore, it has recently been demonstrated that attention can also play an important role in the context of applying artificial neural networks to a variety of tasks from fields such as computer vision and NLP. In this work we show that, by properly defining attention for convolutional neural networks, we can actually use this type of information in order to significantly improve the performance of a student CNN network by forcing it to mimic the attention maps of a powerful teacher network. To that end, we propose several novel m"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1612.03928","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1612.03928","created_at":"2026-05-18T00:50:56.241330+00:00"},{"alias_kind":"arxiv_version","alias_value":"1612.03928v3","created_at":"2026-05-18T00:50:56.241330+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1612.03928","created_at":"2026-05-18T00:50:56.241330+00:00"},{"alias_kind":"pith_short_12","alias_value":"DWHWGCVL76ID","created_at":"2026-05-18T12:30:12.583610+00:00"},{"alias_kind":"pith_short_16","alias_value":"DWHWGCVL76ID7DPJ","created_at":"2026-05-18T12:30:12.583610+00:00"},{"alias_kind":"pith_short_8","alias_value":"DWHWGCVL","created_at":"2026-05-18T12:30:12.583610+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":12,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.15456","citing_title":"DIPA: Distilled Preconditioned Algorithms for Solving Imaging Inverse Problems","ref_index":73,"is_internal_anchor":true},{"citing_arxiv_id":"2511.13415","citing_title":"Attention Grounded Enhancement for Visual Document Retrieval","ref_index":61,"is_internal_anchor":true},{"citing_arxiv_id":"2303.17760","citing_title":"CAMEL: Communicative Agents for \"Mind\" Exploration of Large Language Model Society","ref_index":127,"is_internal_anchor":false},{"citing_arxiv_id":"2604.02509","citing_title":"Rapidly deploying on-device eye tracking by distilling visual foundation models","ref_index":43,"is_internal_anchor":false},{"citing_arxiv_id":"2605.11414","citing_title":"Generative Diffusion Prior Distillation for Long-Context Knowledge Transfer","ref_index":13,"is_internal_anchor":false},{"citing_arxiv_id":"2604.26255","citing_title":"GaitKD: A Universal Decoupled Distillation Framework for Efficient Gait Recognition","ref_index":28,"is_internal_anchor":false},{"citing_arxiv_id":"2604.25795","citing_title":"Improving Diversity in Black-box Few-shot Knowledge Distillation","ref_index":28,"is_internal_anchor":false},{"citing_arxiv_id":"2604.24313","citing_title":"Self-Abstraction Learning for Effective and Stable Training of Deep Neural Networks","ref_index":24,"is_internal_anchor":false},{"citing_arxiv_id":"2605.01563","citing_title":"Multi-Dataset Cross-Domain Knowledge Distillation for Unified Medical Image Segmentation, Classification, and Detection","ref_index":90,"is_internal_anchor":false},{"citing_arxiv_id":"2604.19015","citing_title":"FedProxy: Federated Fine-Tuning of LLMs via Proxy SLMs and Heterogeneity-Aware Fusion","ref_index":75,"is_internal_anchor":false},{"citing_arxiv_id":"2605.04447","citing_title":"Deep Reprogramming Distillation for Medical Foundation Models","ref_index":25,"is_internal_anchor":false},{"citing_arxiv_id":"2605.01931","citing_title":"SwiftChannel: Algorithm-Hardware Co-Design for Deep Learning-Based 5G Channel Estimation","ref_index":38,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/DWHWGCVL76ID7DPJSH3QLZ7EDB","json":"https://pith.science/pith/DWHWGCVL76ID7DPJSH3QLZ7EDB.json","graph_json":"https://pith.science/api/pith-number/DWHWGCVL76ID7DPJSH3QLZ7EDB/graph.json","events_json":"https://pith.science/api/pith-number/DWHWGCVL76ID7DPJSH3QLZ7EDB/events.json","paper":"https://pith.science/paper/DWHWGCVL"},"agent_actions":{"view_html":"https://pith.science/pith/DWHWGCVL76ID7DPJSH3QLZ7EDB","download_json":"https://pith.science/pith/DWHWGCVL76ID7DPJSH3QLZ7EDB.json","view_paper":"https://pith.science/paper/DWHWGCVL","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1612.03928&json=true","fetch_graph":"https://pith.science/api/pith-number/DWHWGCVL76ID7DPJSH3QLZ7EDB/graph.json","fetch_events":"https://pith.science/api/pith-number/DWHWGCVL76ID7DPJSH3QLZ7EDB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/DWHWGCVL76ID7DPJSH3QLZ7EDB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/DWHWGCVL76ID7DPJSH3QLZ7EDB/action/storage_attestation","attest_author":"https://pith.science/pith/DWHWGCVL76ID7DPJSH3QLZ7EDB/action/author_attestation","sign_citation":"https://pith.science/pith/DWHWGCVL76ID7DPJSH3QLZ7EDB/action/citation_signature","submit_replication":"https://pith.science/pith/DWHWGCVL76ID7DPJSH3QLZ7EDB/action/replication_record"}},"created_at":"2026-05-18T00:50:56.241330+00:00","updated_at":"2026-05-18T00:50:56.241330+00:00"}