2025

Ranjan Sapkota; Marco Flores-Calero; Rizwan Qureshi; Chetan Badgujar; Upesh Nepal; Alwin Poulose; Peter Zeno; Uday Bhanu Prakash Vaddevolu; Sheheryar Khan; Maged Shoman; Hong Yan; Manoj Karkee
YOLO advances to its genesis: a decadal and comprehensive review of the You Only Look Once (YOLO) series Journal Article
In: Artificial Intelligence Review, vol. 58, no. 9, pp. 274, 2025, ISSN: 1573-7462.
Abstract | Links | BibTeX | Tags: Agriculture, Artificial intelligence, Autonomous vehicles, CNN, Computer vision, Deep learning, Healthcare and medical imaging, Industrial manufacturing, Real-time object detection, Surveillance, Traffic safety, YOLO, YOLO configurations, YOLOv1 to YOLOv12, You Only Look Once
@article{sapkota_yolo_2025,
title = {YOLO advances to its genesis: a decadal and comprehensive review of the You Only Look Once (YOLO) series},
author = {Ranjan Sapkota and Marco Flores-Calero and Rizwan Qureshi and Chetan Badgujar and Upesh Nepal and Alwin Poulose and Peter Zeno and Uday Bhanu Prakash Vaddevolu and Sheheryar Khan and Maged Shoman and Hong Yan and Manoj Karkee},
url = {https://doi.org/10.1007/s10462-025-11253-3},
doi = {10.1007/s10462-025-11253-3},
issn = {1573-7462},
year = {2025},
date = {2025-06-01},
urldate = {2025-06-01},
journal = {Artificial Intelligence Review},
volume = {58},
number = {9},
pages = {274},
abstract = {This review systematically examines the progression of the You Only Look Once (YOLO) object detection algorithms from YOLOv1 to the recently unveiled YOLOv12. Employing a reverse chronological analysis, this study examines the advancements introduced by YOLO algorithms, beginning with YOLOv12 and progressing through YOLO11 (or YOLOv11), YOLOv10, YOLOv9, YOLOv8, and subsequent versions to explore each version’s contributions to enhancing speed, detection accuracy, and computational efficiency in real-time object detection. Additionally, this study reviews the alternative versions derived from YOLO architectural advancements of YOLO-NAS, YOLO-X, YOLO-R, DAMO-YOLO, and Gold-YOLO. Moreover, the study highlights the transformative impact of YOLO models across five critical application areas: autonomous vehicles and traffic safety, healthcare and medical imaging, industrial manufacturing, surveillance and security, and agriculture. By detailing the incremental technological advancements in subsequent YOLO versions, this review chronicles the evolution of YOLO, and discusses the challenges and limitations in each of the earlier versions. The evolution signifies a path towards integrating YOLO with multimodal, context-aware, and Artificial General Intelligence (AGI) systems for the next YOLO decade, promising significant implications for future developments in AI-driven applications.},
keywords = {Agriculture, Artificial intelligence, Autonomous vehicles, CNN, Computer vision, Deep learning, Healthcare and medical imaging, Industrial manufacturing, Real-time object detection, Surveillance, Traffic safety, YOLO, YOLO configurations, YOLOv1 to YOLOv12, You Only Look Once},
pubstate = {published},
tppubtype = {article}
}

Ranjan Sapkota; Rizwan Qureshi; Muhammad Usman Hadi; Syed Zohaib Hassan; Ferhat Sadak; Maged Shoman; Muhammad Sajjad; Fayaz Ali Dharejo; Achyut Paudel; Jiajia Li; Zhichao Meng; John Shutske; Manoj Karkee
Multi-Modal LLMs in Agriculture: A Comprehensive Review Journal Article
In: IEEE Transactions on Automation Science and Engineering, vol. 22, pp. 22510–22540, 2025, ISSN: 1558-3783.
Abstract | Links | BibTeX | Tags: Agriculture, Analytical models, ChatGPT, Computational modeling, Computer vision, Data models, Deep learning, Farming, generative artificial intelligence, Hidden Markov models, Large language models (LLMs), Machine Learning, Precision agriculture, Reviews, Training, Transformers, Translation, Vision-language models
@article{sapkota_multi-modal_2025,
title = {Multi-Modal LLMs in Agriculture: A Comprehensive Review},
author = {Ranjan Sapkota and Rizwan Qureshi and Muhammad Usman Hadi and Syed Zohaib Hassan and Ferhat Sadak and Maged Shoman and Muhammad Sajjad and Fayaz Ali Dharejo and Achyut Paudel and Jiajia Li and Zhichao Meng and John Shutske and Manoj Karkee},
url = {https://ieeexplore.ieee.org/document/11173627},
doi = {10.1109/TASE.2025.3612154},
issn = {1558-3783},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-01},
journal = {IEEE Transactions on Automation Science and Engineering},
volume = {22},
pages = {22510\textendash22540},
abstract = {Given the rapid emergence and applications of Multi-Modal Large Language Models (MM-LLMs) across various scientific fields, insights regarding their applicability in agriculture are still only partially explored. This paper conducts an in-depth review of MM-LLMs in agriculture, focusing on understanding how MM-LLMs can be developed and implemented to optimize agricultural processes, increase efficiency, and reduce costs. Recent studies have explored the capabilities of MM-LLMs in agricultural information processing and decision-making. Despite these advancements, significant gaps persist, particularly in addressing domain-specific challenges such as variable data quality and availability, integration with existing agricultural systems, and the creation of robust training datasets that accurately represent complex agricultural environments. Moreover, a comprehensive understanding of the capabilities, challenges, and limitations of MM-LLMs in agricultural information processing and application is still missing. Exploring these areas is crucial to providing the community with a broader perspective and a clearer understanding of MM-LLMs’ applications, establishing a benchmark for the current state and emerging trends in this field. To bridge this gap, this survey reviews the progress of MM-LLMs and their utilization in agriculture, with an additional focus on 11 key research questions (RQs), where 4 RQs are general and 7 RQs are agriculture focused. By addressing these RQs, this review outlines the current opportunities and challenges, limitations, and future roadmap for MM-LLMs in agriculture. The findings indicate that multi-modal MM-LLMs not only simplify complex agricultural challenges but also significantly enhance decision-making and improve the efficiency of agricultural image processing. These advancements position MM-LLMs as an essential tool for the future of farming. For continued research and understanding, an organized and regularly updated list of papers on MM-LLMs is available at https://github.com/JiajiaLi04/Multi-Modal-LLMs-in-Agriculture Note to Practitioners\textemdashMotivated by the need to optimize agricultural practices, this paper investigates the use of Large Language Models (MM-LLMs) to improve efficiency and decision-making in agriculture. We delve into critical RQs to reveal the capabilities and challenges of MM-LLMs, and their potential applications in the agricultural sector. Looking ahead, our findings suggest a promising future for the integration of MM-LLMs in agriculture, potentially revolutionizing how we manage and operate farms.},
keywords = {Agriculture, Analytical models, ChatGPT, Computational modeling, Computer vision, Data models, Deep learning, Farming, generative artificial intelligence, Hidden Markov models, Large language models (LLMs), Machine Learning, Precision agriculture, Reviews, Training, Transformers, Translation, Vision-language models},
pubstate = {published},
tppubtype = {article}
}
