{"total":13,"items":[{"citing_arxiv_id":"2606.05868","ref_index":48,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"YouZhi: Towards High-Concurrency Financial LLMs via Adaptive GQA-to-MLA Transition","primary_cat":"cs.CL","submitted_at":"2026-06-04T08:44:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"YouZhi-LLM applies a layer-adaptive GQA-to-MLA transition plus Ascend-specific distillation and fine-tuning to reduce KV-cache size, yielding up to 2.69× higher concurrency and modest gains on financial benchmarks versus base models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.26074","ref_index":29,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"StakeBench: Evaluating Language Understanding Grounded in Market Commitment","primary_cat":"cs.CL","submitted_at":"2026-05-25T17:38:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"StakeBench is a new benchmark using market-derived supervision from resolved prediction markets to test LLMs on commitment detection, side identification, action anticipation, and odds projection, revealing partial success on sides but structural failures on higher tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.24910","ref_index":22,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Noise-Robust Financial Numerical Entity Attribute Tagging","primary_cat":"cs.AI","submitted_at":"2026-05-24T07:31:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"NORA applies task-aware weighting and NPK filtering to handle label noise in multi-attribute tagging of financial numerical entities, outperforming baselines on a new 6.6M-instance benchmark.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.15092","ref_index":35,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Monetary Policy in the Media Spotlight: Sentiments, Signals, and Economic Impact","primary_cat":"econ.EM","submitted_at":"2026-05-14T17:12:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Media sentiment indicators from Canadian news, when added to a New Keynesian model with endogenous central-bank response, improve out-of-sample forecasts and account for part of monetary-policy propagation to output and prices.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.05409","ref_index":42,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Agentic Retrieval-Augmented Generation for Financial Document Question Answering","primary_cat":"cs.AI","submitted_at":"2026-05-06T19:59:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"FinAgent-RAG achieves 76.81-78.46% execution accuracy on financial QA benchmarks by combining contrastive retrieval, program-of-thought code generation, and adaptive strategy routing, outperforming baselines by 5.62-9.32 points.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01384","ref_index":14,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"SBCA: Cross-Modal BERT-driven Actor-Critic for Multi-Asset Portfolio Optimization","primary_cat":"q-fin.CP","submitted_at":"2026-05-02T11:16:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SBCA is a reinforcement learning framework using BERT cross-modal fusion and Actor-Critic to integrate price data with sentiment text for multi-asset portfolio optimization with practical trading constraints.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.12047","ref_index":47,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Empirical Evaluation of PDF Parsing and Chunking for Financial Question Answering with RAG","primary_cat":"cs.CL","submitted_at":"2026-04-13T20:39:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Systematic tests show that specific PDF parsers combined with overlapping chunking strategies better preserve structure and improve RAG answer correctness on financial QA benchmarks including the new TableQuest dataset.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.08649","ref_index":18,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"PRAGMA: Revolut Foundation Model","primary_cat":"cs.LG","submitted_at":"2026-04-09T18:00:00+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"PRAGMA pre-trains a Transformer on heterogeneous banking events with a tailored self-supervised masked objective, yielding embeddings that support strong downstream performance on credit scoring, fraud detection, and lifetime value prediction using linear heads or light fine-tuning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.16411","ref_index":33,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"CGCMA: Conditionally-Gated Cross-Modal Attention for Event-Conditioned Asynchronous Fusion","primary_cat":"cs.LG","submitted_at":"2026-04-01T14:06:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CGCMA separates text-conditioned grounding from lag-aware trust gating to fuse asynchronous price and web data, yielding the highest Sharpe ratio of +0.449 on a new crypto news corpus.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"operate in settings with tightly scheduled events and much easier synchronization. General-purpose fusion architectures therefore provide the main methodological reference points for our compar- isons: BiLSTM late fusion [29], directional cross-modal attention in MulT [24], multiplicative interaction modeling in TFN [32], modal- ity gating in GMU [2], memory-based sequential fusion in MFN [33], and the broader fusion taxonomy summarized by Baltrušaitis et al. [3]. In cryptocurrency prediction, prior multimodal datasets and systems remain mostly daily and do not expose within-day freshness. Sentiment from Twitter [1], Reddit [13], fear & greed in- dicators [4], and on-chain activity [14] has been studied separately, while integrated benchmarks such as PreBit [35], DAM [9], Cryp-"},{"citing_arxiv_id":"2512.13040","ref_index":50,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Understanding Structured Financial Data with LLMs: A Case Study on Fraud Detection","primary_cat":"cs.LG","submitted_at":"2025-12-15T07:09:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"FinFRE-RAG combines importance-guided feature reduction with label-aware retrieval-augmented generation to boost LLM performance on tabular fraud detection across four public datasets while providing human-readable rationales.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2503.22693","ref_index":114,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Bridging Language Models and Financial Analysis","primary_cat":"q-fin.ST","submitted_at":"2025-03-14T01:35:20+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"A survey synthesizing recent LLM research and assessing its applicability to financial data analysis.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2411.10915","ref_index":75,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Bias in Large Language Models: Origin, Evaluation, and Mitigation","primary_cat":"cs.CL","submitted_at":"2024-11-16T23:54:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"A literature review that categorizes bias in LLMs, surveys evaluation and mitigation techniques, and discusses ethical implications.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2406.12009","ref_index":35,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"FinTruthQA: A Benchmark for AI-Driven Financial Disclosure Quality Assessment in Investor -- Firm Interactions","primary_cat":"cs.CL","submitted_at":"2024-06-17T18:25:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Introduces FinTruthQA, a 6,000-entry annotated benchmark for AI assessment of financial disclosure quality across four criteria, with model evaluations showing strong results on question tasks but weaker on answer relevance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}