<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>ProbBrain - AI News Terminal</title>
    <link>https://probbrain.com/news</link>
    <atom:link href="https://probbrain.com/news.xml" rel="self" type="application/rss+xml"/>
    <description>AI lab announcements, arXiv papers, GitHub and HuggingFace trending, AI Hacker News, and independent writers - aggregated, deduped, and posted to @ProbBrain on Telegram.</description>
    <language>en</language>
    <lastBuildDate>Mon, 04 May 2026 12:23:31 +0000</lastBuildDate>
    <ttl>15</ttl>
    <item>
      <title>[GitHub Trending] sponsors/msitarzewski — A complete AI agency at your fingertips - From frontend wizards to Reddit community ninjas, from whimsy injectors to reality checkers. Each agent is a specialized expert with personality, processes, and proven deliverables.</title>
      <link>https://github.com/sponsors/msitarzewski</link>
      <guid isPermaLink="false">github|https://github.com/sponsors/msitarzewski</guid>
      <pubDate>Mon, 04 May 2026 10:51:20 +0000</pubDate>
      <category>code</category>
      <description><![CDATA[System of AI agents with different specializations for tasks such as frontend development, community engagement, and content validation.]]></description>
    </item>
    <item>
      <title>[GitHub Trending] virattt/dexter — An autonomous agent for deep financial research</title>
      <link>https://github.com/virattt/dexter</link>
      <guid isPermaLink="false">github|https://github.com/virattt/dexter</guid>
      <pubDate>Mon, 04 May 2026 10:51:20 +0000</pubDate>
      <category>code</category>
      <description><![CDATA[Dexter is an autonomous agent designed for conducting deep financial research.]]></description>
    </item>
    <item>
      <title>[GitHub Trending] fspecii/ace-step-ui — 🎵 The Ultimate Open Source Suno Alternative - Professional UI for ACE-Step 1.5 AI Music Generation. Free, local, unlimited. Stop paying for Suno!</title>
      <link>https://github.com/fspecii/ace-step-ui</link>
      <guid isPermaLink="false">github|https://github.com/fspecii/ace-step-ui</guid>
      <pubDate>Mon, 04 May 2026 10:51:20 +0000</pubDate>
      <category>code</category>
      <description><![CDATA[Open-source ace-step-ui provides a free, local UI for ACE-Step 1.5 AI music generation.]]></description>
    </item>
    <item>
      <title>[GitHub Trending] cocoindex-io/cocoindex — Incremental engine for long horizon agents 🌟 Star if you like it!</title>
      <link>https://github.com/cocoindex-io/cocoindex</link>
      <guid isPermaLink="false">github|https://github.com/cocoindex-io/cocoindex</guid>
      <pubDate>Mon, 04 May 2026 10:51:20 +0000</pubDate>
      <category>code</category>
      <description><![CDATA[Cocoindex is an incremental engine designed for long horizon agents.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Cloud Is Closer Than It Appears: Revisiting the Tradeoffs of Distributed Real-Time Inference</title>
      <link>https://arxiv.org/abs/2605.00005</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00005</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Cloud-based inference can match or exceed on-device performance for real-time control tasks like autonomous driving when adequately resourced.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] FedACT: Concurrent Federated Intelligence across Heterogeneous Data Sources</title>
      <link>https://arxiv.org/abs/2605.00011</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00011</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[FedACT proposes a resource-aware device scheduling approach for concurrent federated learning across heterogeneous devices, minimizing job completion time while ensuring fair participation.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] What Physics do Data-Driven MoCap-to-Radar Models Learn?</title>
      <link>https://arxiv.org/abs/2605.00018</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00018</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Physics-based metrics show MoCap-to-radar models often lack physical consistency despite low reconstruction error; temporal attention crucial for transformer learning.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] AirFM-DDA: Air-Interface Foundation Model in the Delay-Doppler-Angle Domain for AI-Native 6G</title>
      <link>https://arxiv.org/abs/2605.00020</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00020</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Foundation model for 6G wireless systems uses Delay-Doppler-Angle domain to untangle multipath components and reduce computational overhead.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Learning physically grounded traffic accident reconstruction from public accident reports</title>
      <link>https://arxiv.org/abs/2605.00050</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00050</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Framework reconstructs traffic accidents from public NHTSA reports using multimodal learning, improving accuracy for traffic safety and autonomous driving.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Smart Ensemble Learning Framework for Predicting Groundwater Heavy Metal Pollution</title>
      <link>https://arxiv.org/abs/2605.00056</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00056</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Ensemble machine learning with Gaussian copula transformation improves prediction of groundwater heavy metal pollution.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Information-Theoretic Generalization Bounds for Stochastic Gradient Descent with Predictable Virtual Noise</title>
      <link>https://arxiv.org/abs/2605.00064</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00064</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Generalization bounds for stochastic gradient descent now support adaptive virtual noise geometry based on past optimization trajectories.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Human-in-the-Loop Meta Bayesian Optimization for Fusion Energy and Scientific Applications</title>
      <link>https://arxiv.org/abs/2605.00068</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00068</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Human-in-the-Loop Meta Bayesian Optimization uses expert knowledge and few-shot learning to optimize experiments in data-scarce scientific domains like fusion energy.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Soft-MSM: Differentiable Context-Aware Elastic Alignment for Time Series</title>
      <link>https://arxiv.org/abs/2605.00069</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00069</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Researchers introduce Soft-MSM, a differentiable elastic distance metric for time series that enables gradient-based optimization with context-aware alignment costs.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] CRADIPOR: Crash Dispersion Predictor</title>
      <link>https://arxiv.org/abs/2605.00070</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00070</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[CRADIPOR uses a Rank Reduction Autoencoder to predict numerical dispersion in crash simulations without repeating computations.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Hyperspherical Forward-Forward with Prototypical Representations</title>
      <link>https://arxiv.org/abs/2605.00082</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00082</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Hyperspherical Forward-Forward reformulates layer objectives as multi-class classification using prototypes, achieving 40x faster inference and improved accuracy over standard Forward-Forward.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Comparative Analysis of Polygon-Based and Global Machine Learning Models for Bus Occupancy Prediction</title>
      <link>https://arxiv.org/abs/2605.00083</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00083</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Spatially-clustered local machine learning models predict bus ridership as accurately as global models for urban transit systems.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] SPLICE: Latent Diffusion over JEPA Embeddings for Conformal Time-Series Inpainting</title>
      <link>https://arxiv.org/abs/2605.00126</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00126</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[SPLICE combines latent diffusion with conformal prediction for time-series imputation, providing reliability guarantees and outperforming baselines in power systems.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Learning Fingerprints for Medical Time Series with Redundancy-Constrained Information Maximization</title>
      <link>https://arxiv.org/abs/2605.00130</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00130</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Framework learns fingerprint tokens for medical time series using redundancy-constrained information maximization for interpretable representations.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Smart Profit-Aware Crop Advisory System: Kisan AI</title>
      <link>https://arxiv.org/abs/2605.00133</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00133</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Kisan AI crop advisory system incorporates market prices with agronomic data to maximize farmer profitability.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Technical Report: Activation Residual Hessian Quantization (ARHQ) for Low-Bit LLM Quantization</title>
      <link>https://arxiv.org/abs/2605.00140</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00140</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[ARHQ is a post-training method that improves low-bit LLM quantization by isolating error-sensitive weights using activation residuals.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Wasserstein Distributionally Robust Regret Optimization for Reinforcement Learning from Human Feedback</title>
      <link>https://arxiv.org/abs/2605.00155</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00155</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Wasserstein distributionally robust regret optimization framework proposed to address reward over-optimization in reinforcement learning from human feedback.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Consistent Diffusion Language Models</title>
      <link>https://arxiv.org/abs/2605.00161</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00161</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Novel discrete diffusion training method using stochastic bridges achieves faster parallel text generation with fewer steps.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Towards A Generative Protein Evolution Machine with DPLM-Evo</title>
      <link>https://arxiv.org/abs/2605.00182</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00182</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[New discrete diffusion framework DPLM-Evo explicitly models substitution and insertion/deletion operations to improve protein sequence generation and mutation prediction.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Introducing WARM-VR: Benchmark Dataset for Multimodal Wearable Affect Recognition in Virtual Reality</title>
      <link>https://arxiv.org/abs/2605.00184</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00184</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[WARM-VR is a multimodal wearable sensor dataset for emotion recognition in virtual reality environments, benchmarked with machine learning algorithms from 31 participants.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Fair Dataset Distillation via Cross-Group Barycenter Alignment</title>
      <link>https://arxiv.org/abs/2605.00185</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00185</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Cross-group barycenter alignment reduces fairness gaps caused by dataset distillation across demographic groups.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] OTSS: Output-Targeted Soft Segmentation for Contextual Decision-Weight Learning</title>
      <link>https://arxiv.org/abs/2605.00193</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00193</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[OTSS employs soft segmentation to learn context-specific decision weights from logged data, achieving lower regret than hard-partition and mixture approaches.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Diversity in Large Language Models under Supervised Fine-Tuning</title>
      <link>https://arxiv.org/abs/2605.00195</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00195</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Researchers identify that supervised fine-tuning reduces LLM diversity due to low-frequency pattern neglect and knowledge forgetting, proposing TOFU loss to recover diversity while maintaining quality.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] State Stream Transformer (SST) V2: Parallel Training of Nonlinear Recurrence for Latent Space Reasoning</title>
      <link>https://arxiv.org/abs/2605.00206</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00206</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[State Stream Transformer V2 enables parameter-efficient reasoning in latent space through nonlinear recurrence with parallel training.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] CompleteRXN: Toward Completing Open Chemical Reaction Databases</title>
      <link>https://arxiv.org/abs/2605.00222</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00222</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[CompleteRXN introduces a benchmark for completing incomplete chemical reactions in USPTO, achieving 99.20% accuracy on standard tests but showing significant performance drops on real-world data.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Bayesian Optimization in Linear Time</title>
      <link>https://arxiv.org/abs/2605.00237</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00237</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Bayesian optimization method using recursive binary partitioning reduces computational complexity from cubic to linear while improving optimization performance.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] NLPOpt-Net: A Learning Method for Nonlinear Optimization with Feasibility Guarantees</title>
      <link>https://arxiv.org/abs/2605.00260</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00260</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[NLPOpt-Net is an unsupervised learning architecture that solves constrained nonlinear optimization problems with guaranteed constraint satisfaction.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Pessimism-Free Offline Learning in General-Sum Games via KL Regularization</title>
      <link>https://arxiv.org/abs/2605.00264</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00264</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[KL regularization enables pessimism-free offline multi-agent reinforcement learning, achieving accelerated equilibrium recovery rates in general-sum games.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Polaris: Coupled Orbital Polar Embeddings for Hierarchical Concept Learning</title>
      <link>https://arxiv.org/abs/2605.00265</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00265</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Polaris, a polar hyperspherical embedding framework, learns hierarchical representations by separating semantic meaning from structure, achieving significant improvements in taxonomy expansion tasks.]]></description>
    </item>
    <item>
      <title>[arXiv cs.LG] Jailbroken Frontier Models Retain Their Capabilities</title>
      <link>https://arxiv.org/abs/2605.00267</link>
      <guid isPermaLink="false">arxiv-cs.LG|https://arxiv.org/abs/2605.00267</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[More capable frontier models experience minimal performance degradation from jailbreaks, with Opus 4.6 losing only 7.7% versus Haiku 4.5 at 33.1%.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Putting HUMANS first: Efficient LAM Evaluation with Human Preference Alignment</title>
      <link>https://arxiv.org/abs/2605.00022</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00022</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Researchers found 50-example subsets effectively evaluate large audio models while regression-weighted subsets better predict human preferences than full benchmarks.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] NorBERTo: A ModernBERT Model Trained for Portuguese with 331 Billion Tokens Corpus</title>
      <link>https://arxiv.org/abs/2605.00086</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00086</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Researchers introduce NorBERTo, a modern Portuguese encoder trained on 331 billion tokens, achieving top PLUE and ASSIN 2 results.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] How Frontier LLMs Adapt to Neurodivergence Context: A Measurement Framework for Surface vs. Structural Change in System-Prompted Responses</title>
      <link>https://arxiv.org/abs/2605.00113</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00113</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[NDBench benchmark finds frontier LLMs significantly adapt outputs when prompted with neurodivergence context, producing longer and more structured responses.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] ViLegalNLI: Natural Language Inference for Vietnamese Legal Texts</title>
      <link>https://arxiv.org/abs/2605.00116</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00116</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Researchers introduce ViLegalNLI, the first large-scale Vietnamese legal NLI dataset containing 42,012 annotated premise-hypothesis pairs from statutory documents.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Cultural Benchmarking of LLMs in Standard and Dialectal Arabic Dialogues</title>
      <link>https://arxiv.org/abs/2605.00119</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00119</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[New cultural benchmark covering 13 Arabic-speaking countries reveals significant LLM performance gaps between Modern Standard Arabic and dialects.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Timing is Everything: Temporal Scaffolding of Semantic Surprise in Humor</title>
      <link>https://arxiv.org/abs/2605.00143</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00143</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Temporal features outweigh semantic incongruity in humor appreciation, with strategic pauses before punchlines driving audience engagement.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] RSAT: Structured Attribution Makes Small Language Models Faithful Table Reasoners</title>
      <link>https://arxiv.org/abs/2605.00199</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00199</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[RSAT method trains small language models to answer table questions with step-by-step reasoning backed by cell-level citations, achieving 3.7x improvement in faithfulness.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Confidence Estimation in Automatic Short Answer Grading with LLMs</title>
      <link>https://arxiv.org/abs/2605.00200</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00200</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Study proposes hybrid confidence framework combining model-based signals with dataset uncertainty for more reliable LLM-based automatic short answer grading.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Why Do LLMs Struggle in Strategic Play? Broken Links Between Observations, Beliefs, and Actions</title>
      <link>https://arxiv.org/abs/2605.00226</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00226</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[LLMs show gaps between internal beliefs and actions in strategic games; beliefs degrade under reasoning despite exceeding verbal accuracy.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Persona-Grounded Safety Evaluation of AI Companions in Multi-Turn Conversations</title>
      <link>https://arxiv.org/abs/2605.00227</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00227</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Researchers present a scalable safety evaluation framework for AI companions, demonstrating that Replika exhibits a narrow emotional range and normalizes unsafe content like self-harm.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Estimating LLM Grading Ability and Response Difficulty in Automatic Short Answer Grading via Item Response Theory</title>
      <link>https://arxiv.org/abs/2605.00238</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00238</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Researchers introduce item response theory framework for evaluating LLM-based automatic short answer grading, revealing performance differences across response difficulty levels.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Lost in State Space: Probing Frozen Mamba Representations</title>
      <link>https://arxiv.org/abs/2605.00253</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00253</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Mamba frozen representations fail to produce semantic sentence summaries without fine-tuning due to severe anisotropy and representational collapse.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Retrieval-Augmented Reasoning for Chartered Accountancy</title>
      <link>https://arxiv.org/abs/2605.00257</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00257</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[CA-ThinkFlow RAG framework achieves 68.75% of GPT-4o/Claude performance on Indian chartered accountancy benchmarks using quantized models.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] How Language Models Process Out-of-Distribution Inputs: A Two-Pathway Framework</title>
      <link>https://arxiv.org/abs/2605.00269</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00269</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Study reveals language model OOD detection methods are confounded by sequence length; proposes two-pathway framework using embeddings and hidden-state trajectories.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Are You the A-hole? A Fair, Multi-Perspective Ethical Reasoning Framework</title>
      <link>https://arxiv.org/abs/2605.00270</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00270</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[A neuro-symbolic framework using MaxSAT resolves conflicting moral judgments by converting natural language explanations into logical constraints, outperforming majority voting.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] What Don&apos;t You Understand? Using Large Language Models to Identify and Characterize Student Misconceptions About Challenging Topics</title>
      <link>https://arxiv.org/abs/2605.00294</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00294</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[LLMs identify student misconceptions in online biomedical courses by analyzing quiz data, response patterns, and lecture transcripts.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Structure-Aware Chunking for Tabular Data in Retrieval-Augmented Generation</title>
      <link>https://arxiv.org/abs/2605.00318</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00318</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Researchers propose structure-aware chunking method for tabular data in RAG systems that reduces chunk count by up to 56% and improves retrieval performance.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Prompt-Induced Score Variance in Zero-Shot Binary Vision-Language Safety Classification</title>
      <link>https://arxiv.org/abs/2605.00326</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00326</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Vision-language safety classifiers produce inconsistent scores across semantically equivalent prompts; prompt averaging improves reliability and calibration.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Budget-Aware Routing for Long Clinical Text</title>
      <link>https://arxiv.org/abs/2605.00336</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00336</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[RCD algorithm selects clinical text subsets under token budgets to reduce LLM deployment costs while balancing relevance, coverage, and diversity.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Making Every Verified Token Count: Adaptive Verification for MoE Speculative Decoding</title>
      <link>https://arxiv.org/abs/2605.00342</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00342</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[EVICT adaptively truncates draft trees in MoE speculative decoding to reduce verification costs, achieving 2.35x speedup over standard autoregressive generation.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] MemRouter: Memory-as-Embedding Routing for Long-Term Conversational Agents</title>
      <link>https://arxiv.org/abs/2605.00356</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00356</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[MemRouter uses embedding-based routing instead of autoregressive generation to improve memory management in conversational agents, achieving higher F1 scores and lower latency.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] From Backward Spreading to Forward Replay: Revisiting Target Construction in LLM Parameter Editing</title>
      <link>https://arxiv.org/abs/2605.00358</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00358</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Study proposes forward-propagation to replace backward spreading in LLM parameter editing, achieving more accurate targets with same computational cost.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Unlearning What Matters: Token-Level Attribution for Precise Language Model Unlearning</title>
      <link>https://arxiv.org/abs/2605.00364</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00364</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[TokenUnlearn selectively targets critical tokens for machine unlearning in language models, outperforming sequence-level approaches in forgetting and utility preservation.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Language-free Experience at Expo 2025 Osaka</title>
      <link>https://arxiv.org/abs/2605.00373</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00373</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Researchers developed multilingual translation and simultaneous interpretation technologies for Expo 2025 Osaka, emphasizing low-latency, high-quality translation using chunk-based segmentation and multi-engine approaches.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Agentic AI for Substance Use Education: Integrating Regulatory and Scientific Knowledge Sources</title>
      <link>https://arxiv.org/abs/2605.00383</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00383</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[An agentic AI system integrating DEA records and peer-reviewed literature for substance use education received high ratings from subject matter experts.]]></description>
    </item>
    <item>
      <title>[arXiv cs.CL] Agent Capsules: Quality-Gated Granularity Control for Multi-Agent LLM Pipelines</title>
      <link>https://arxiv.org/abs/2605.00410</link>
      <guid isPermaLink="false">arxiv-cs.CL|https://arxiv.org/abs/2605.00410</guid>
      <pubDate>Mon, 04 May 2026 04:00:00 +0000</pubDate>
      <category>paper</category>
      <description><![CDATA[Agent Capsules optimizes multi-agent LLM pipelines by adaptively selecting execution strategies within quality constraints, achieving 51% token savings.]]></description>
    </item>
  </channel>
</rss>
