{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:G3ZLJ3UVYTJY5D6MBGVGXWSPMU","short_pith_number":"pith:G3ZLJ3UV","schema_version":"1.0","canonical_sha256":"36f2b4ee95c4d38e8fcc09aa6bda4f650c999fa2a1d4d134117ad10825010d4d","source":{"kind":"arxiv","id":"1809.10274","version":1},"attestation_state":"computed","paper":{"title":"Semantically Invariant Text-to-Image Generation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.CV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Ameya Shringi, Andreas Savakis, Chi Zhang, Dheeraj Peri, Miguel Dominguez, Ray Ptucha, Shagan Sah","submitted_at":"2018-09-27T00:11:25Z","abstract_excerpt":"Image captioning has demonstrated models that are capable of generating plausible text given input images or videos. Further, recent work in image generation has shown significant improvements in image quality when text is used as a prior. Our work ties these concepts together by creating an architecture that can enable bidirectional generation of images and text. We call this network Multi-Modal Vector Representation (MMVR). Along with MMVR, we propose two improvements to the text conditioned image generation. Firstly, a n-gram metric based cost function is introduced that generalizes the cap"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1809.10274","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2018-09-27T00:11:25Z","cross_cats_sorted":["cs.CL","cs.CV","stat.ML"],"title_canon_sha256":"8b0d202f75e96498b2010aaaa28e7349ec2deb2a2f2f66b48a7967f39c041621","abstract_canon_sha256":"dee3d07434530226ab31992df59f8c9337206917f32a565a42f020af29a2b774"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:04:38.713586Z","signature_b64":"lgKAa6kFloSG4aJzGze8Csh45LOo69U9uW4OKZ371jse6CY+XjaQkFwEe7Qt0cNn9VTSiHEvWJcv5ZSYjyVaDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"36f2b4ee95c4d38e8fcc09aa6bda4f650c999fa2a1d4d134117ad10825010d4d","last_reissued_at":"2026-05-18T00:04:38.713064Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:04:38.713064Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Semantically Invariant Text-to-Image Generation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.CV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Ameya Shringi, Andreas Savakis, Chi Zhang, Dheeraj Peri, Miguel Dominguez, Ray Ptucha, Shagan Sah","submitted_at":"2018-09-27T00:11:25Z","abstract_excerpt":"Image captioning has demonstrated models that are capable of generating plausible text given input images or videos. Further, recent work in image generation has shown significant improvements in image quality when text is used as a prior. Our work ties these concepts together by creating an architecture that can enable bidirectional generation of images and text. We call this network Multi-Modal Vector Representation (MMVR). Along with MMVR, we propose two improvements to the text conditioned image generation. Firstly, a n-gram metric based cost function is introduced that generalizes the cap"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.10274","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1809.10274","created_at":"2026-05-18T00:04:38.713144+00:00"},{"alias_kind":"arxiv_version","alias_value":"1809.10274v1","created_at":"2026-05-18T00:04:38.713144+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.10274","created_at":"2026-05-18T00:04:38.713144+00:00"},{"alias_kind":"pith_short_12","alias_value":"G3ZLJ3UVYTJY","created_at":"2026-05-18T12:32:25.280505+00:00"},{"alias_kind":"pith_short_16","alias_value":"G3ZLJ3UVYTJY5D6M","created_at":"2026-05-18T12:32:25.280505+00:00"},{"alias_kind":"pith_short_8","alias_value":"G3ZLJ3UV","created_at":"2026-05-18T12:32:25.280505+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/G3ZLJ3UVYTJY5D6MBGVGXWSPMU","json":"https://pith.science/pith/G3ZLJ3UVYTJY5D6MBGVGXWSPMU.json","graph_json":"https://pith.science/api/pith-number/G3ZLJ3UVYTJY5D6MBGVGXWSPMU/graph.json","events_json":"https://pith.science/api/pith-number/G3ZLJ3UVYTJY5D6MBGVGXWSPMU/events.json","paper":"https://pith.science/paper/G3ZLJ3UV"},"agent_actions":{"view_html":"https://pith.science/pith/G3ZLJ3UVYTJY5D6MBGVGXWSPMU","download_json":"https://pith.science/pith/G3ZLJ3UVYTJY5D6MBGVGXWSPMU.json","view_paper":"https://pith.science/paper/G3ZLJ3UV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1809.10274&json=true","fetch_graph":"https://pith.science/api/pith-number/G3ZLJ3UVYTJY5D6MBGVGXWSPMU/graph.json","fetch_events":"https://pith.science/api/pith-number/G3ZLJ3UVYTJY5D6MBGVGXWSPMU/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/G3ZLJ3UVYTJY5D6MBGVGXWSPMU/action/timestamp_anchor","attest_storage":"https://pith.science/pith/G3ZLJ3UVYTJY5D6MBGVGXWSPMU/action/storage_attestation","attest_author":"https://pith.science/pith/G3ZLJ3UVYTJY5D6MBGVGXWSPMU/action/author_attestation","sign_citation":"https://pith.science/pith/G3ZLJ3UVYTJY5D6MBGVGXWSPMU/action/citation_signature","submit_replication":"https://pith.science/pith/G3ZLJ3UVYTJY5D6MBGVGXWSPMU/action/replication_record"}},"created_at":"2026-05-18T00:04:38.713144+00:00","updated_at":"2026-05-18T00:04:38.713144+00:00"}