{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:R5OTX27LNHMH4ZWKCJTEZRNGF2","short_pith_number":"pith:R5OTX27L","canonical_record":{"source":{"id":"1806.01742","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2018-06-05T15:18:47Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"2b194f0422d66d6a5ed1a473f5d8edf7c179eb636a7d989b77c63cf02505e0b2","abstract_canon_sha256":"4de53973d3761d1e3cd5be5a4766dd83db47afda4f00d7915d69c2a199e49f55"},"schema_version":"1.0"},"canonical_sha256":"8f5d3bebeb69d87e66ca12664cc5a62ebe436afabfbb193a0683784815ededfd","source":{"kind":"arxiv","id":"1806.01742","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.01742","created_at":"2026-05-18T00:13:09Z"},{"alias_kind":"arxiv_version","alias_value":"1806.01742v2","created_at":"2026-05-18T00:13:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.01742","created_at":"2026-05-18T00:13:09Z"},{"alias_kind":"pith_short_12","alias_value":"R5OTX27LNHMH","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_16","alias_value":"R5OTX27LNHMH4ZWK","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_8","alias_value":"R5OTX27L","created_at":"2026-05-18T12:32:50Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:R5OTX27LNHMH4ZWKCJTEZRNGF2","target":"record","payload":{"canonical_record":{"source":{"id":"1806.01742","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2018-06-05T15:18:47Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"2b194f0422d66d6a5ed1a473f5d8edf7c179eb636a7d989b77c63cf02505e0b2","abstract_canon_sha256":"4de53973d3761d1e3cd5be5a4766dd83db47afda4f00d7915d69c2a199e49f55"},"schema_version":"1.0"},"canonical_sha256":"8f5d3bebeb69d87e66ca12664cc5a62ebe436afabfbb193a0683784815ededfd","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:13:09.935082Z","signature_b64":"RWOTgjo5AL3lT8J7N1dZxdnh3gOtXQPd2Xp5ZIb9IQckowcnW5G+HuCcraLUPiH6nJkG7a1I3Q7twm8GUKjkBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8f5d3bebeb69d87e66ca12664cc5a62ebe436afabfbb193a0683784815ededfd","last_reissued_at":"2026-05-18T00:13:09.934273Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:13:09.934273Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1806.01742","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:13:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Egjw1rFdmTspxkh7eYta4ijvq/iIC0k0RcIh8n78jy4nG1/1ZmVMlHL3MoFjmPOwbJJvOb8agScizjDNQdxlBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T21:14:32.876834Z"},"content_sha256":"58bfb9d5822dbcd93b0c670bd8d963e9d71f2add2c13bceee5023b8af95f96aa","schema_version":"1.0","event_id":"sha256:58bfb9d5822dbcd93b0c670bd8d963e9d71f2add2c13bceee5023b8af95f96aa"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:R5OTX27LNHMH4ZWKCJTEZRNGF2","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Adapting Neural Text Classification for Improved Software Categorization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.SE","authors_text":"Alexander LeClair, Collin McMillan, Zachary Eberhart","submitted_at":"2018-06-05T15:18:47Z","abstract_excerpt":"Software Categorization is the task of organizing software into groups that broadly describe the behavior of the software, such as \"editors\" or \"science.\" Categorization plays an important role in several maintenance tasks, such as repository navigation and feature elicitation. Current approaches attempt to cast the problem as text classification, to make use of the rich body of literature from the NLP domain. However, as we will show in this paper, text classification algorithms are generally not applicable off-the-shelf to source code; we found that they work well when high-level project des"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.01742","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:13:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"oEUU+qXzpSzBpntYM/cYUTxNGCP0GzgH6a1u5h/vXjJN4IpGwUlOWtmVr9n619GvnOf+KQTHblDCjLIhwOB8AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T21:14:32.877284Z"},"content_sha256":"39bb34ede86f372ef9d4eee594851bc7ddc27f069701caa80ee1ba6c6c986770","schema_version":"1.0","event_id":"sha256:39bb34ede86f372ef9d4eee594851bc7ddc27f069701caa80ee1ba6c6c986770"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/R5OTX27LNHMH4ZWKCJTEZRNGF2/bundle.json","state_url":"https://pith.science/pith/R5OTX27LNHMH4ZWKCJTEZRNGF2/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/R5OTX27LNHMH4ZWKCJTEZRNGF2/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T21:14:32Z","links":{"resolver":"https://pith.science/pith/R5OTX27LNHMH4ZWKCJTEZRNGF2","bundle":"https://pith.science/pith/R5OTX27LNHMH4ZWKCJTEZRNGF2/bundle.json","state":"https://pith.science/pith/R5OTX27LNHMH4ZWKCJTEZRNGF2/state.json","well_known_bundle":"https://pith.science/.well-known/pith/R5OTX27LNHMH4ZWKCJTEZRNGF2/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:R5OTX27LNHMH4ZWKCJTEZRNGF2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4de53973d3761d1e3cd5be5a4766dd83db47afda4f00d7915d69c2a199e49f55","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2018-06-05T15:18:47Z","title_canon_sha256":"2b194f0422d66d6a5ed1a473f5d8edf7c179eb636a7d989b77c63cf02505e0b2"},"schema_version":"1.0","source":{"id":"1806.01742","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.01742","created_at":"2026-05-18T00:13:09Z"},{"alias_kind":"arxiv_version","alias_value":"1806.01742v2","created_at":"2026-05-18T00:13:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.01742","created_at":"2026-05-18T00:13:09Z"},{"alias_kind":"pith_short_12","alias_value":"R5OTX27LNHMH","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_16","alias_value":"R5OTX27LNHMH4ZWK","created_at":"2026-05-18T12:32:50Z"},{"alias_kind":"pith_short_8","alias_value":"R5OTX27L","created_at":"2026-05-18T12:32:50Z"}],"graph_snapshots":[{"event_id":"sha256:39bb34ede86f372ef9d4eee594851bc7ddc27f069701caa80ee1ba6c6c986770","target":"graph","created_at":"2026-05-18T00:13:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Software Categorization is the task of organizing software into groups that broadly describe the behavior of the software, such as \"editors\" or \"science.\" Categorization plays an important role in several maintenance tasks, such as repository navigation and feature elicitation. Current approaches attempt to cast the problem as text classification, to make use of the rich body of literature from the NLP domain. However, as we will show in this paper, text classification algorithms are generally not applicable off-the-shelf to source code; we found that they work well when high-level project des","authors_text":"Alexander LeClair, Collin McMillan, Zachary Eberhart","cross_cats":["cs.CL"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2018-06-05T15:18:47Z","title":"Adapting Neural Text Classification for Improved Software Categorization"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.01742","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:58bfb9d5822dbcd93b0c670bd8d963e9d71f2add2c13bceee5023b8af95f96aa","target":"record","created_at":"2026-05-18T00:13:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4de53973d3761d1e3cd5be5a4766dd83db47afda4f00d7915d69c2a199e49f55","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2018-06-05T15:18:47Z","title_canon_sha256":"2b194f0422d66d6a5ed1a473f5d8edf7c179eb636a7d989b77c63cf02505e0b2"},"schema_version":"1.0","source":{"id":"1806.01742","kind":"arxiv","version":2}},"canonical_sha256":"8f5d3bebeb69d87e66ca12664cc5a62ebe436afabfbb193a0683784815ededfd","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8f5d3bebeb69d87e66ca12664cc5a62ebe436afabfbb193a0683784815ededfd","first_computed_at":"2026-05-18T00:13:09.934273Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:13:09.934273Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"RWOTgjo5AL3lT8J7N1dZxdnh3gOtXQPd2Xp5ZIb9IQckowcnW5G+HuCcraLUPiH6nJkG7a1I3Q7twm8GUKjkBg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:13:09.935082Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.01742","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:58bfb9d5822dbcd93b0c670bd8d963e9d71f2add2c13bceee5023b8af95f96aa","sha256:39bb34ede86f372ef9d4eee594851bc7ddc27f069701caa80ee1ba6c6c986770"],"state_sha256":"4d2c37871aeba124bf90df98697f730ec38e6db3bc0bfabc67abbf6f68596053"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ARSquyiKrxAUfR+X5pdzl/Qu7xGVX/5qkQr8wpU0hUwZFnn4UKnNquqtZ5AEZpnvJT0xt2pOYm6LB4slsPQsBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T21:14:32.881941Z","bundle_sha256":"bbc99b4a54552e3e2a6f56235d6be665c3308196a046c417484c5b76cf0eec4a"}}