{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2014:VWX2QZSTWGQQQSFHLBGSGN3DIA","short_pith_number":"pith:VWX2QZST","canonical_record":{"source":{"id":"1404.4653","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2014-04-17T21:13:43Z","cross_cats_sorted":[],"title_canon_sha256":"b2dbfbbe00b54fdb08347a20531f37f690b557359fab8dd31af590e81162fa68","abstract_canon_sha256":"548b6944dc6274b85970c72217402f644ce97eea2c0c3a18920b241e27442bf3"},"schema_version":"1.0"},"canonical_sha256":"adafa86653b1a10848a7584d233763402658a00845f13f5be5b5cf26491fbc07","source":{"kind":"arxiv","id":"1404.4653","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1404.4653","created_at":"2026-05-18T02:53:55Z"},{"alias_kind":"arxiv_version","alias_value":"1404.4653v1","created_at":"2026-05-18T02:53:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1404.4653","created_at":"2026-05-18T02:53:55Z"},{"alias_kind":"pith_short_12","alias_value":"VWX2QZSTWGQQ","created_at":"2026-05-18T12:28:54Z"},{"alias_kind":"pith_short_16","alias_value":"VWX2QZSTWGQQQSFH","created_at":"2026-05-18T12:28:54Z"},{"alias_kind":"pith_short_8","alias_value":"VWX2QZST","created_at":"2026-05-18T12:28:54Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2014:VWX2QZSTWGQQQSFHLBGSGN3DIA","target":"record","payload":{"canonical_record":{"source":{"id":"1404.4653","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2014-04-17T21:13:43Z","cross_cats_sorted":[],"title_canon_sha256":"b2dbfbbe00b54fdb08347a20531f37f690b557359fab8dd31af590e81162fa68","abstract_canon_sha256":"548b6944dc6274b85970c72217402f644ce97eea2c0c3a18920b241e27442bf3"},"schema_version":"1.0"},"canonical_sha256":"adafa86653b1a10848a7584d233763402658a00845f13f5be5b5cf26491fbc07","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:53:55.561473Z","signature_b64":"NMQd3pgIPbhlSG/uu57jw1DOe0OxhAIXOuqzF5Fw5nlcuDekKdv43FleUeFTUHt+dpvZZbhQXPm9vZYf1X5eCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"adafa86653b1a10848a7584d233763402658a00845f13f5be5b5cf26491fbc07","last_reissued_at":"2026-05-18T02:53:55.560728Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:53:55.560728Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1404.4653","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:53:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZRQfh9PJr87YNTVKcaZaWP7UQsUOhO0/LydXRnxY2444GtVG0i3Isd7LYYlcrbE6JVhYh2ridwC1EIHJp9nNDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T17:06:05.575304Z"},"content_sha256":"9479c6f319ffbbc949340d2c1a50dc181b47bb87b56a3c4f7e7ec2a3e27f8162","schema_version":"1.0","event_id":"sha256:9479c6f319ffbbc949340d2c1a50dc181b47bb87b56a3c4f7e7ec2a3e27f8162"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2014:VWX2QZSTWGQQQSFHLBGSGN3DIA","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"An Efficient and Balanced Platform for Data-Parallel Subsampling Workloads","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DC","authors_text":"Christopher Stewart, Sundeep Kambhampati","submitted_at":"2014-04-17T21:13:43Z","abstract_excerpt":"With the advent of internet services, data started growing faster than it can be processed. To personalize user experience, this enormous data has to be processed in real time, in interactive fashion. In order to achieve faster data processing often a statistical method called subsampling. Subsampling workloads compute statistics from a random subset of sample data (i.e., a subsample). Data-parallel platforms group these samples into tasks; each task subsamples its data in parallel.\n  Current, state-of-the-art platforms such as Hadoop are built for large tasks that run for long periods of time"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1404.4653","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:53:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"I0rSfGDWnBLiLH6WEF6aMYh/uU59YV5FU5eNOV/MbEgfPSiHf98JKDeb2nTYHwWyLqJaIWf08eQbMBfZZBIsBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T17:06:05.575678Z"},"content_sha256":"7e8fb37f979a6b121a06775acd9e800a84837e64ca026ae03558032705444ab1","schema_version":"1.0","event_id":"sha256:7e8fb37f979a6b121a06775acd9e800a84837e64ca026ae03558032705444ab1"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/VWX2QZSTWGQQQSFHLBGSGN3DIA/bundle.json","state_url":"https://pith.science/pith/VWX2QZSTWGQQQSFHLBGSGN3DIA/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/VWX2QZSTWGQQQSFHLBGSGN3DIA/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-30T17:06:05Z","links":{"resolver":"https://pith.science/pith/VWX2QZSTWGQQQSFHLBGSGN3DIA","bundle":"https://pith.science/pith/VWX2QZSTWGQQQSFHLBGSGN3DIA/bundle.json","state":"https://pith.science/pith/VWX2QZSTWGQQQSFHLBGSGN3DIA/state.json","well_known_bundle":"https://pith.science/.well-known/pith/VWX2QZSTWGQQQSFHLBGSGN3DIA/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2014:VWX2QZSTWGQQQSFHLBGSGN3DIA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"548b6944dc6274b85970c72217402f644ce97eea2c0c3a18920b241e27442bf3","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2014-04-17T21:13:43Z","title_canon_sha256":"b2dbfbbe00b54fdb08347a20531f37f690b557359fab8dd31af590e81162fa68"},"schema_version":"1.0","source":{"id":"1404.4653","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1404.4653","created_at":"2026-05-18T02:53:55Z"},{"alias_kind":"arxiv_version","alias_value":"1404.4653v1","created_at":"2026-05-18T02:53:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1404.4653","created_at":"2026-05-18T02:53:55Z"},{"alias_kind":"pith_short_12","alias_value":"VWX2QZSTWGQQ","created_at":"2026-05-18T12:28:54Z"},{"alias_kind":"pith_short_16","alias_value":"VWX2QZSTWGQQQSFH","created_at":"2026-05-18T12:28:54Z"},{"alias_kind":"pith_short_8","alias_value":"VWX2QZST","created_at":"2026-05-18T12:28:54Z"}],"graph_snapshots":[{"event_id":"sha256:7e8fb37f979a6b121a06775acd9e800a84837e64ca026ae03558032705444ab1","target":"graph","created_at":"2026-05-18T02:53:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"With the advent of internet services, data started growing faster than it can be processed. To personalize user experience, this enormous data has to be processed in real time, in interactive fashion. In order to achieve faster data processing often a statistical method called subsampling. Subsampling workloads compute statistics from a random subset of sample data (i.e., a subsample). Data-parallel platforms group these samples into tasks; each task subsamples its data in parallel.\n  Current, state-of-the-art platforms such as Hadoop are built for large tasks that run for long periods of time","authors_text":"Christopher Stewart, Sundeep Kambhampati","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2014-04-17T21:13:43Z","title":"An Efficient and Balanced Platform for Data-Parallel Subsampling Workloads"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1404.4653","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9479c6f319ffbbc949340d2c1a50dc181b47bb87b56a3c4f7e7ec2a3e27f8162","target":"record","created_at":"2026-05-18T02:53:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"548b6944dc6274b85970c72217402f644ce97eea2c0c3a18920b241e27442bf3","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2014-04-17T21:13:43Z","title_canon_sha256":"b2dbfbbe00b54fdb08347a20531f37f690b557359fab8dd31af590e81162fa68"},"schema_version":"1.0","source":{"id":"1404.4653","kind":"arxiv","version":1}},"canonical_sha256":"adafa86653b1a10848a7584d233763402658a00845f13f5be5b5cf26491fbc07","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"adafa86653b1a10848a7584d233763402658a00845f13f5be5b5cf26491fbc07","first_computed_at":"2026-05-18T02:53:55.560728Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:53:55.560728Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"NMQd3pgIPbhlSG/uu57jw1DOe0OxhAIXOuqzF5Fw5nlcuDekKdv43FleUeFTUHt+dpvZZbhQXPm9vZYf1X5eCw==","signature_status":"signed_v1","signed_at":"2026-05-18T02:53:55.561473Z","signed_message":"canonical_sha256_bytes"},"source_id":"1404.4653","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9479c6f319ffbbc949340d2c1a50dc181b47bb87b56a3c4f7e7ec2a3e27f8162","sha256:7e8fb37f979a6b121a06775acd9e800a84837e64ca026ae03558032705444ab1"],"state_sha256":"e10cd8ea749435cec70e167735d1f80243fd099fcf94db589d412ba6dc32a701"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"I2Sesorce1oZtufdLfVwaKe/YcoKkpy6pPzclevwQa2wKU2vdJiEjcGT3JHFE2T7ey0VmbvOTZQ7lmuU0Y5RDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-30T17:06:05.577611Z","bundle_sha256":"6604cf64ebff637624869c2671502408468ad562a5c01d216f75b4a2de2587e0"}}