{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:MQSEVPF7FVVGEF6TOTTBORD4AE","short_pith_number":"pith:MQSEVPF7","canonical_record":{"source":{"id":"1705.01265","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-05-03T06:33:37Z","cross_cats_sorted":[],"title_canon_sha256":"b07a9d19bd239e7ee5def94f4f63b641cb395c0ed56c04113e32bfb163cea2e0","abstract_canon_sha256":"e2a9a9a3f7cbc0f03a505fb55d49a81b99cd2095e7eff42ced4013c711155ed0"},"schema_version":"1.0"},"canonical_sha256":"64244abcbf2d6a6217d374e617447c0112fe85827dcf8f9fb84c78c2fcf8b644","source":{"kind":"arxiv","id":"1705.01265","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1705.01265","created_at":"2026-05-18T00:09:36Z"},{"alias_kind":"arxiv_version","alias_value":"1705.01265v2","created_at":"2026-05-18T00:09:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.01265","created_at":"2026-05-18T00:09:36Z"},{"alias_kind":"pith_short_12","alias_value":"MQSEVPF7FVVG","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_16","alias_value":"MQSEVPF7FVVGEF6T","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_8","alias_value":"MQSEVPF7","created_at":"2026-05-18T12:31:31Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:MQSEVPF7FVVGEF6TOTTBORD4AE","target":"record","payload":{"canonical_record":{"source":{"id":"1705.01265","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-05-03T06:33:37Z","cross_cats_sorted":[],"title_canon_sha256":"b07a9d19bd239e7ee5def94f4f63b641cb395c0ed56c04113e32bfb163cea2e0","abstract_canon_sha256":"e2a9a9a3f7cbc0f03a505fb55d49a81b99cd2095e7eff42ced4013c711155ed0"},"schema_version":"1.0"},"canonical_sha256":"64244abcbf2d6a6217d374e617447c0112fe85827dcf8f9fb84c78c2fcf8b644","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:09:36.647487Z","signature_b64":"JdwimpDIrGkWHX7SeJrNcrT3CqxKGdL0V22/WHEoOMc+cyWP8TaDpr6eRJRHoyeYC761Lecs/uzsbHhsjxioCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"64244abcbf2d6a6217d374e617447c0112fe85827dcf8f9fb84c78c2fcf8b644","last_reissued_at":"2026-05-18T00:09:36.646903Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:09:36.646903Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1705.01265","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:09:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tgBXAOFOFbjP8bJveWXMpcbM7tOxAdPp7kXEERHBB5pvrPz85mo0ThuvkfTnfE2oh7wNhW3gyLjfAdSFNCOoCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-24T20:40:41.677713Z"},"content_sha256":"82c7ef8ad1c75f310846c11e97df71c469af831b7af7aeee6f179ba7b4eae459","schema_version":"1.0","event_id":"sha256:82c7ef8ad1c75f310846c11e97df71c469af831b7af7aeee6f179ba7b4eae459"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:MQSEVPF7FVVGEF6TOTTBORD4AE","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"On the effectiveness of feature set augmentation using clusters of word embeddings","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Georgios Balikas, Ioannis Partalas","submitted_at":"2017-05-03T06:33:37Z","abstract_excerpt":"Word clusters have been empirically shown to offer important performance improvements on various tasks. Despite their importance, their incorporation in the standard pipeline of feature engineering relies more on a trial-and-error procedure where one evaluates several hyper-parameters, like the number of clusters to be used. In order to better understand the role of such features we systematically evaluate their effect on four tasks, those of named entity segmentation and classification as well as, those of five-point sentiment classification and quantification. Our results strongly suggest th"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.01265","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:09:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SSeJSf8ewwGwRV5RHNyrsHsZwXLGcSLIX208CfqyWvImzatAYfoQ6mCFW0WUh0o405AIzCADNAoWtYSwYBcWCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-24T20:40:41.678049Z"},"content_sha256":"d2eb6ed9f2f5bb67226f09e44be9ef3b5a6d79a0c0b9f7f780298825a455c391","schema_version":"1.0","event_id":"sha256:d2eb6ed9f2f5bb67226f09e44be9ef3b5a6d79a0c0b9f7f780298825a455c391"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/MQSEVPF7FVVGEF6TOTTBORD4AE/bundle.json","state_url":"https://pith.science/pith/MQSEVPF7FVVGEF6TOTTBORD4AE/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/MQSEVPF7FVVGEF6TOTTBORD4AE/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-24T20:40:41Z","links":{"resolver":"https://pith.science/pith/MQSEVPF7FVVGEF6TOTTBORD4AE","bundle":"https://pith.science/pith/MQSEVPF7FVVGEF6TOTTBORD4AE/bundle.json","state":"https://pith.science/pith/MQSEVPF7FVVGEF6TOTTBORD4AE/state.json","well_known_bundle":"https://pith.science/.well-known/pith/MQSEVPF7FVVGEF6TOTTBORD4AE/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:MQSEVPF7FVVGEF6TOTTBORD4AE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e2a9a9a3f7cbc0f03a505fb55d49a81b99cd2095e7eff42ced4013c711155ed0","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-05-03T06:33:37Z","title_canon_sha256":"b07a9d19bd239e7ee5def94f4f63b641cb395c0ed56c04113e32bfb163cea2e0"},"schema_version":"1.0","source":{"id":"1705.01265","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1705.01265","created_at":"2026-05-18T00:09:36Z"},{"alias_kind":"arxiv_version","alias_value":"1705.01265v2","created_at":"2026-05-18T00:09:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.01265","created_at":"2026-05-18T00:09:36Z"},{"alias_kind":"pith_short_12","alias_value":"MQSEVPF7FVVG","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_16","alias_value":"MQSEVPF7FVVGEF6T","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_8","alias_value":"MQSEVPF7","created_at":"2026-05-18T12:31:31Z"}],"graph_snapshots":[{"event_id":"sha256:d2eb6ed9f2f5bb67226f09e44be9ef3b5a6d79a0c0b9f7f780298825a455c391","target":"graph","created_at":"2026-05-18T00:09:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Word clusters have been empirically shown to offer important performance improvements on various tasks. Despite their importance, their incorporation in the standard pipeline of feature engineering relies more on a trial-and-error procedure where one evaluates several hyper-parameters, like the number of clusters to be used. In order to better understand the role of such features we systematically evaluate their effect on four tasks, those of named entity segmentation and classification as well as, those of five-point sentiment classification and quantification. Our results strongly suggest th","authors_text":"Georgios Balikas, Ioannis Partalas","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-05-03T06:33:37Z","title":"On the effectiveness of feature set augmentation using clusters of word embeddings"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.01265","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:82c7ef8ad1c75f310846c11e97df71c469af831b7af7aeee6f179ba7b4eae459","target":"record","created_at":"2026-05-18T00:09:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e2a9a9a3f7cbc0f03a505fb55d49a81b99cd2095e7eff42ced4013c711155ed0","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-05-03T06:33:37Z","title_canon_sha256":"b07a9d19bd239e7ee5def94f4f63b641cb395c0ed56c04113e32bfb163cea2e0"},"schema_version":"1.0","source":{"id":"1705.01265","kind":"arxiv","version":2}},"canonical_sha256":"64244abcbf2d6a6217d374e617447c0112fe85827dcf8f9fb84c78c2fcf8b644","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"64244abcbf2d6a6217d374e617447c0112fe85827dcf8f9fb84c78c2fcf8b644","first_computed_at":"2026-05-18T00:09:36.646903Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:09:36.646903Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"JdwimpDIrGkWHX7SeJrNcrT3CqxKGdL0V22/WHEoOMc+cyWP8TaDpr6eRJRHoyeYC761Lecs/uzsbHhsjxioCg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:09:36.647487Z","signed_message":"canonical_sha256_bytes"},"source_id":"1705.01265","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:82c7ef8ad1c75f310846c11e97df71c469af831b7af7aeee6f179ba7b4eae459","sha256:d2eb6ed9f2f5bb67226f09e44be9ef3b5a6d79a0c0b9f7f780298825a455c391"],"state_sha256":"35ef92639ee4c87ffb221c5991a41ae94be222574c11195888ccb315c6c97a59"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"TbKzWjTniQNuu6amLrtoRYhh6hKImPUAi0AIPShy+9jPwYAsj/unkrMMTilwSTZHKp6p0CH92pXIJPNl/dBSBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-24T20:40:41.679935Z","bundle_sha256":"1e3cd34e67d8bcc1cae8632733c2473c79446ce5511f26b1c41a4591038af1ec"}}