Spaces:
Running
Running
| <html> | |
| <head> | |
| <meta charset="utf-8"> | |
| <meta name="description" | |
| content="AUTOMOTIVE-ENV: Benchmarking Multimodal Agents in Vehicle Interface Systems. A high-fidelity benchmark and environment for in-vehicle GUIs with 185 parameterized tasks and reproducible checks."> | |
| <meta name="keywords" content="Automotive-ENV, multimodal agents, vehicle GUI, benchmark, ASURADA"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1"> | |
| <title>AUTOMOTIVE-ENV: Benchmarking Multimodal Agents in Vehicle Interface Systems</title> | |
| <!-- (Optional) Google Analytics - remove if not needed --> | |
| <script async src="https://www.googletagmanager.com/gtag/js?id=G-PYVRSFMDRL"></script> | |
| <script> | |
| window.dataLayer = window.dataLayer || []; | |
| function gtag(){ dataLayer.push(arguments); } | |
| gtag('js', new Date()); | |
| gtag('config', 'G-PYVRSFMDRL'); | |
| </script> | |
| <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet"> | |
| <link rel="stylesheet" href="./static/css/bulma.min.css"> | |
| <link rel="stylesheet" href="./static/css/bulma-carousel.min.css"> | |
| <link rel="stylesheet" href="./static/css/bulma-slider.min.css"> | |
| <link rel="stylesheet" href="./static/css/fontawesome.all.min.css"> | |
| <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css"> | |
| <link rel="stylesheet" href="./static/css/index.css"> | |
| <link rel="icon" href="./static/images/favicon.svg"> | |
| <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script> | |
| <script defer src="./static/js/fontawesome.all.min.js"></script> | |
| <script src="./static/js/bulma-carousel.min.js"></script> | |
| <script src="./static/js/bulma-slider.min.js"></script> | |
| <script src="./static/js/index.js"></script> | |
| <style> | |
| /* light paper-like tuning to match os-world vibe */ | |
| body { background: #ffffff; color: #111; } | |
| .hero { background: #fff; } | |
| .publication-title { letter-spacing: -0.02em; } | |
| .publication-links .button { margin: 0 6px 8px; } | |
| .system-figure img { max-width: 100%; height: auto; border: 1px solid #eee; border-radius: 6px; } | |
| .subtitle { color: #444; } | |
| .footer { background: #fafafa; } | |
| /* Ensure both system images line up nicely */ | |
| .sysimg { width: 100%; height: auto; display: block; } | |
| .system-overview-desc { margin-top: 12px; } | |
| </style> | |
| </head> | |
| <body> | |
| <!-- Minimal navbar with quick links --> | |
| <nav class="navbar" role="navigation" aria-label="main navigation"> | |
| <div class="navbar-brand"> | |
| <a role="button" class="navbar-burger" aria-label="menu" aria-expanded="false"> | |
| <span aria-hidden="true"></span> | |
| <span aria-hidden="true"></span> | |
| <span aria-hidden="true"></span> | |
| </a> | |
| </div> | |
| <div class="navbar-menu"> | |
| <div class="navbar-start" style="flex-grow: 1; justify-content: center;"> | |
| <a class="navbar-item" href="https://arxiv.org/abs/2509.21143" target="_blank" rel="noopener"> | |
| <span class="icon"><i class="ai ai-arxiv"></i></span> Paper | |
| </a> | |
| <a class="navbar-item" href="https://github.com/automotive-env/AutmotiveEnv.github.io" target="_blank" rel="noopener"> | |
| <span class="icon"><i class="fab fa-github"></i></span> Code | |
| </a> | |
| </div> | |
| </div> | |
| </nav> | |
| <!-- Hero: title, authors, links --> | |
| <section class="hero"> | |
| <div class="hero-body"> | |
| <div class="container is-max-desktop"> | |
| <div class="columns is-centered"> | |
| <div class="column has-text-centered"> | |
| <h1 class="title is-1 publication-title">AUTOMOTIVE-ENV: Benchmarking Multimodal Agents in Vehicle Interface Systems</h1> | |
| <div class="is-size-5 publication-authors"> | |
| <span class="author-block"><strong>Junfeng Yan</strong><sup>*1</sup>,</span> | |
| <span class="author-block"><strong>Biao Wu</strong><sup>*1</sup>,</span> | |
| <span class="author-block"><strong>Meng Fang</strong><sup>2</sup>,</span> | |
| <span class="author-block"><strong>Ling Chen</strong><sup>1</sup></span> | |
| </div> | |
| <div class="is-size-6 publication-authors" style="margin-top:6px;"> | |
| <span class="author-block"><sup>1</sup>Australian Artificial Intelligence Institute, Sydney, Australia</span><br> | |
| <span class="author-block"><sup>2</sup>University of Liverpool, Liverpool, United Kingdom</span> | |
| </div> | |
| <div class="column has-text-centered" style="margin-top:16px;"> | |
| <div class="publication-links"> | |
| <span class="link-block"> | |
| <a href="https://arxiv.org/abs/2509.21143" | |
| class="external-link button is-normal is-rounded is-dark" target="_blank" rel="noopener"> | |
| <span class="icon"><i class="ai ai-arxiv"></i></span> | |
| <span>Paper</span> | |
| </a> | |
| </span> | |
| <span class="link-block"> | |
| <a href="https://github.com/automotive-env/AutmotiveEnv.github.io" | |
| class="external-link button is-normal is-rounded is-dark" target="_blank" rel="noopener"> | |
| <span class="icon"><i class="fab fa-github"></i></span> | |
| <span>Code (coming soon)</span> | |
| </a> | |
| </span> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Teaser video (local mp4 like os-world's teaser section) | |
| Put your video at ./static/videos/demo.mp4 --> | |
| <section class="hero teaser"> | |
| <div class="container is-max-desktop"> | |
| <div class="hero-body"> | |
| <video id="teaser" autoplay muted loop playsinline height="100%"> | |
| <source src="./static/videos/demo.mp4" type="video/mp4"> | |
| </video> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- System overview (two stacked images + description) | |
| Place images at: | |
| ./static/images/demo_task.jpg | |
| ./static/images/demo_arch.jpg | |
| --> | |
| <section class="section" id="system-overview"> | |
| <div class="container is-max-desktop"> | |
| <div class="columns is-centered"> | |
| <div class="column is-four-fifths"> | |
| <h2 class="title is-3 has-text-centered">System Overview</h2> | |
| <figure class="system-figure has-text-centered" style="margin-bottom:16px;"> | |
| <img class="sysimg" src="./static/images/demo_task.jpg" alt="Automotive-ENV task overview"> | |
| <figcaption class="subtitle is-6" style="margin-top:8px;"> | |
| Task instruction: Open the front windshield defroster, open the rear windshield defroster. | |
| </figcaption> | |
| </figure> | |
| <figure class="system-figure has-text-centered"> | |
| <img class="sysimg" src="./static/images/demo_arch.jpg" alt="Automotive-ENV system architecture overview"> | |
| </figure> | |
| <div class="content has-text-justified system-overview-desc"> | |
| <p> | |
| Automotive OS-based environment where the agent observes the accessibility tree, screen, and GPS; | |
| optionally consults GPS-contextualized web knowledge; and acts through tap screens and API calls. | |
| Task success is determined by low-level programmatic checks of system signals. | |
| </p> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Abstract --> | |
| <section class="section"> | |
| <div class="container is-max-desktop"> | |
| <div class="columns is-centered has-text-centered"> | |
| <div class="column is-four-fifths"> | |
| <h2 class="title is-3">Abstract</h2> | |
| <div class="content has-text-justified"> | |
| <p> | |
| Multimodal agents have demonstrated strong performance in general GUI interactions, but their | |
| application in automotive systems has been largely unexplored. In-vehicle GUIs present distinct | |
| challenges: drivers’ limited attention, strict safety requirements, and complex location-based | |
| interaction patterns. To address these challenges, we introduce <strong>Automotive-ENV</strong>, | |
| the first high-fidelity benchmark and interaction environment tailored for vehicle GUIs. | |
| </p> | |
| <p> | |
| This platform defines <strong>185 parameterized tasks</strong> spanning explicit control, | |
| implicit intent understanding, and safety-aware tasks, and provides structured multimodal | |
| observations with precise programmatic checks for reproducible evaluation. Building on this | |
| benchmark, we propose <strong>ASURADA</strong>, a geo-aware multimodal agent that integrates | |
| GPS-informed context to dynamically adjust actions based on location, environmental conditions, | |
| and regional driving norms. | |
| </p> | |
| <p> | |
| Experiments show that geo-aware information significantly improves success on safety-aware tasks, | |
| highlighting the importance of location-based context in automotive environments. We will release | |
| Automotive-ENV, complete with all tasks and benchmarking tools, to further the development of | |
| safe and adaptive in-vehicle agents. | |
| </p> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- BibTeX --> | |
| <section class="section" id="BibTeX"> | |
| <div class="container is-max-desktop content"> | |
| <h2 class="title">BibTeX</h2> | |
| <pre><code>@misc{yan2025automotiveenvbenchmarkingmultimodalagents, | |
| title={Automotive-ENV: Benchmarking Multimodal Agents in Vehicle Interface Systems}, | |
| author={Junfeng Yan and Biao Wu and Meng Fang and Ling Chen}, | |
| year={2025}, | |
| eprint={2509.21143}, | |
| archivePrefix={arXiv}, | |
| primaryClass={cs.RO}, | |
| url={https://arxiv.org/abs/2509.21143} | |
| }</code></pre> | |
| </div> | |
| </section> | |
| <footer class="footer"> | |
| <div class="container"> | |
| <div class="content has-text-centered"> | |
| <a class="icon-link" href="https://arxiv.org/abs/2509.21143" target="_blank" rel="noopener"> | |
| <i class="ai ai-arxiv"></i> | |
| </a> | |
| <a class="icon-link" href="https://github.com/automotive-env/AutmotiveEnv.github.io" target="_blank" rel="noopener"> | |
| <i class="fab fa-github"></i> | |
| </a> | |
| </div> | |
| <div class="columns is-centered"> | |
| <div class="column is-8"> | |
| <div class="content has-text-centered"> | |
| <p>This website reuses the open-source Nerfies/OS-World page framework. Please remove analytics if you do not need it.</p> | |
| <p>© 2025 automotive-env — Hosted on GitHub Pages.</p> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </footer> | |
| </body> | |
| </html> |