Publicatons

Smaller, Faster, Cheaper: Architectural Designs for Efficient Machine Learning

Steven Walton

BibTeX

@phdthesis{Walton2025Dissertation,
  type = {Dissertation Defense},
  author = {Steven Walton},
  title = {Smaller, Faster, Cheaper: Architectural Designs for Efficient Machine Learning},
  institution = {College of Arts and Sciences, University of Oregon},
  month = {June},
  year = {2025},
  note = {Available at \url{https://www.cs.uoregon.edu/Reports/PHD-202506-Walton.pdf}},
}

Generalized Neighborhood Attention: Multi-dimensional Sparse Attention at the Speed of Light

Ali Hassani, Fengzhe Zhou, Aditya Kane, Jiannan Huang, Chieh-Yun Chen, Min Shi, Steven Walton, Markus Hoehnerbach, Vijay Thakkar, Michael Isaev, Qinsheng Zhang, Bing Xu, Haicheng Wu, Wen-mei Hwu, Ming-Yu Liu, Humphrey Shi

GitHub
arXiv

BibTeX

@misc{hassani2025generalizedneighborhoodattentionmultidimensional,
  title  = {Generalized Neighborhood Attention: Multi-dimensional Sparse Attention at the Speed of Light},
  author = {Ali Hassani and Fengzhe Zhou and Aditya Kane and Jiannan Huang and Chieh-Yun Chen and Min Shi and Steven Walton and Markus Hoehnerbach and Vijay Thakkar and Michael Isaev and Qinsheng Zhang and Bing Xu and Haicheng Wu and Wen-mei Hwu and Ming-Yu Liu and Humphrey Shi},
  year   = {2025},
  eprint = {2504.16922},
  url    = {https://arxiv.org/abs/2504.16922},
  archivePrefix = {arXiv},
  primaryClass  = {cs.CV},
}

Efficient Image Generation with Variadic Attention Heads (StyleNAT)

Steven Walton, Ali Hassani, Xingqian Xu, Zhangyang Wang, Humphrey Shi

eLVM @ CVPR 2025

GitHub
arXiv

BibTeX

@InProceedings{WaltonStyleNAT2025CVPR,
  title     = {Efficient Image Generation with Variadic Attention Heads},
  author    = {Steven Walton and Ali Hassani and Xingqian Xu and Zhangyang Wang and Humphrey Shi},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
  pages     = {3239--3250},
  month     = {June},
  year      = {2025},
}

###### Formerly ######
@misc{walton2023stylenatgivingheadnew,
  title  = {StyleNAT: Giving Each Head a New Perspective},
  author = {Steven Walton and Ali Hassani and Xingqian Xu and Zhangyang Wang and Humphrey Shi},
  year   = {2023},
  eprint = {2211.05770},
  url    = {https://arxiv.org/abs/2211.05770},
  archivePrefix = {arXiv},
  primaryClass  = {cs.CV},
}

Distilling Normalizing Flows

Steven Walton, Valeriy Klyukin, Maksim Artemev, Denis Derkach, Nikita Orlov, Humphrey Shi

eLVM @ CVPR 2025

arXiv

BibTeX

@InProceedings{WaltonDNF2025CVPR,
  author    = {Walton, Steven and Klyukin, Valeriy and Artemev, Maksim and Derkach, Denis and Orlov, Nikita and Shi, Humphrey},
  title     = {Distilling Normalizing Flows},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
  pages     = {3337--3346},
  month     = {June},
  year      = {2025},
}

ZeroBench: An Impossible Visual Benchmark for Contemporary Large Multimodal Models

Jonathan Roberts, Mohammad Reza Taesiri, Ansh Sharma, Akash Gupta, Samuel Roberts, Ioana Croitoru, Simion-Vlad Bogolin, Jialu Tang, Florian Langer, Vyas Raina, Vatsal Raina, Hanyi Xiong, Vishaal Udandarao, Jingyi Lu, Shiyang Chen, Sam Purkis, Tianshuo Yan, Wenye Lin, Gyungin Shin, Qiaochu Yang, Anh Totti Nguyen, David I. Atkinson, Aaditya Baranwal, Alexandru Coca, Mikah Dang, Sebastian Dziadzio, Jakob D. Kunz, Kaiqu Liang, Alexander Lo, Brian Pulfer, Steven Walton, Charig Yang, Kai Han, Samuel Albanie

GitHub
arXiv

BibTeX

@misc{roberts2025zerobenchimpossiblevisualbenchmark,
  title  = {ZeroBench: An Impossible Visual Benchmark for Contemporary Large Multimodal Models},
  author = {Jonathan Roberts and Mohammad Reza Taesiri and Ansh Sharma and Akash Gupta and Samuel Roberts and Ioana Croitoru and Simion-Vlad Bogolin and Jialu Tang and Florian Langer and Vyas Raina and Vatsal Raina and Hanyi Xiong and Vishaal Udandarao and Jingyi Lu and Shiyang Chen and Sam Purkis and Tianshuo Yan and Wenye Lin and Gyungin Shin and Qiaochu Yang and Anh Totti Nguyen and David I. Atkinson and Aaditya Baranwal and Alexandru Coca and Mikah Dang and Sebastian Dziadzio and Jakob D. Kunz and Kaiqu Liang and Alexander Lo and Brian Pulfer and Steven Walton and Charig Yang and Kai Han and Samuel Albanie},
  year   = {2025},
  eprint = {2502.09696},
  url    = {https://arxiv.org/abs/2502.09696},
  archivePrefix = {arXiv},
  primaryClass  = {cs.CV},
}

Neighborhood Attention Transformer

Ali Hassani, Steven Walton, Jiachen Li, Shen Li, Humphrey Shi

CVPR 2023

GitHub
NATTEN Repo

BibTeX

@InProceedings{Hassani_2023_CVPR,
  author    = {Hassani, Ali and Walton, Steven and Li, Jiachen and Li, Shen and Shi, Humphrey},
  title     = {Neighborhood Attention Transformer},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
  month     = {June},
  year      = {2023},
  pages     = {6185-6194}
}

Design Amortization for Bayesian Optimal Experimental Design

Noble Kennamer, Steven Walton, Alexander Ihler

AAAI 2023

BibTeX

@article{Kennamer_Walton_Ihler_2023,
  title  = {Design Amortization for Bayesian Optimal Experimental Design},
  volume = {37},
  url    = {https://ojs.aaai.org/index.php/AAAI/article/view/25992},
  DOI    = {10.1609/aaai.v37i7.25992},
  abstractNote = {Bayesian optimal experimental design is a sub-field of statistics focused on developing methods to make efficient use of experimental resources. Any potential design is evaluated in terms of a utility function, such as the (theoretically well-justified) expected information gain (EIG); unfortunately however, under most circumstances the EIG is intractable to evaluate. In this work we build off of successful variational approaches, which optimize a parameterized variational model with respect to bounds on the EIG. Past work focused on learning a new variational model from scratch for each new design considered. Here we present a novel neural architecture that allows experimenters to optimize a single variational model that can estimate the EIG for potentially infinitely many designs. To further improve computational efficiency, we also propose to train the variational model on a significantly cheaper-to-evaluate lower bound, and show empirically that the resulting model provides an excellent guide for more accurate, but expensive to evaluate bounds on the EIG. We demonstrate the effectiveness of our technique on generalized linear models, a class of statistical models that is widely used in the analysis of controlled experiments. Experiments show that our method is able to greatly improve accuracy over existing approximation strategies, and achieve these results with far better sample efficiency.},
  number  = {7},
  journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
  author  = {Kennamer, Noble and Walton, Steven and Ihler, Alexander},
  year    = {2023},
  month   = {Jun.},
  pages   = {8220-8227}
}

Isomorphism, Normalizing Flows, and Density Estimation: Preserving Relationships Between Data

Steven Walton

(UO) Area Exam

BibTeX

@techreport{walton2023isomorphism,
  type = {Area Exam},
  author = {Steven Walton}
  title  = {Isomorphism, Normalizing Flows, and Density Estimation: Preserving Relationships Between Data},
  institution = {University of Oregon, Computer and Information Sciences Department},
  number = {AREA-202307-Walton},
  month  = {July},
  year   = {2023},
  note = {Available at \url{https://www.cs.uoregon.edu/Reports/AREA-202307-Walton.pdf}},
}

Semask: Semantically Masked Transformers for Semantic Segmentation

Jitesh Jain, Anukriti Singh, Nikita Orlov, Zilong Huang, Jiachen Li, Steven Walton, Humphrey Shi

NIVT @ ICCV 2023

GitHub
arXiv

BibTeX

@InProceedings{Jain_2023_ICCV,
  author    = {Jain, Jitesh and Singh, Anukriti and Orlov, Nikita and Huang, Zilong and Li, Jiachen and Walton, Steven and Shi, Humphrey},
  title     = {SeMask: Semantically Masked Transformers for Semantic Segmentation},
  booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops},
  month     = {October},
  year      = {2023},
  pages     = {752-761}
}

Convmlp: Hierarchical Convolutional MLPs for Vision

Jiachen Li, Ali Hassani, Steven Walton, Humphrey Shi

WFM @ CVPR 2023

GitHub
arXiv

BibTeX

@InProceedings{Li_2023_CVPR,
  author    = {Li, Jiachen and Hassani, Ali and Walton, Steven and Shi, Humphrey},
  title     = {ConvMLP: Hierarchical Convolutional MLPs for Vision},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
  month     = {June},
  year      = {2023},
  pages     = {6307-6316}
}

Escaping the Big Data Paradigm with Compact Transformers

Steven Walton, Ali Hassani, Nikhil Shah, Abulikemu Abuduweili, Jiachen Li, Humphrey Shi

GitHub
arXiv
Pytorch Blog

BibTeX

@misc{hassani2022escapingbigdataparadigm,
  title  = {Escaping the Big Data Paradigm with Compact Transformers},
  author = {Ali Hassani and Steven Walton and Nikhil Shah and Abulikemu Abuduweili and Jiachen Li and Humphrey Shi},
  year   = {2022},
  eprint = {2104.05704},
  url    = {https://arxiv.org/abs/2104.05704},
  archivePrefix = {arXiv},
  primaryClass  = {cs.CV},
}

###### Blog Post ######
@article{walton2021Escaping,
  title   = {Training Compact Transformers from Scratch in 30 Minutes with PyTorch},
  author  = {Steven Walton and Ali Hassani and Abulikemu Abuduweili and Humphrey Shi},
  journal = {medium.com/pytorch},
  year    = {2021},
  url     = {https://medium.com/pytorch/training-compact-transformers-from-scratch-in-30-minutes-with-pytorch-ff5c21668ed5},
}

Visualization as a Service for Scientific Data

David Pugmire, James Kress, Jieyang Chen, Hank Childs, Jong Choi, Dmitry Ganyushin, Berk Geveci, Mark Kim, Scott Klasky, Xin Liang, Jeremy Logan, Nicole Marsaglia, Kshitij Mehta, Norbert Podhorszki, Caitlin Ross, Eric Suchyta, Nick Thompson, Steven Walton, Lipeng Wan, Matthew Wolf

SMC 2020

BibTeX

@InProceedings{pugmire_vaas,
  author    = "Pugmire, David Kress, James Chen, Jieyang Childs, Hank Choi, Jong Ganyushin, Dmitry Geveci, Berk Kim, Mark Klasky, Scott Liang, Xin Logan, Jeremy Marsaglia, Nicole Mehta, Kshitij Podhorszki, Norbert Ross, Caitlin Suchyta, Eric Thompson, Nick Walton, Steven Wan, Lipeng Wolf, Matthew", tor = "Nichols, Jeffrey Verastegui, Becky Maccabe, Arthur `Barney' Hernandez, Oscar Parete-Koon, Suzanne Ahearn, Theresa",
  title     = "Visualization as a Service for Scientific Data",
  booktitle = "Driving Scientific and Engineering Discoveries Through the Convergence of HPC, Big Data and AI",
  year      = "2020",
  publisher = "Springer International Publishing",
  address   = "Cham",
  pages     = "157--174",
  isbn      = "978-3-030-63393-6"
}

DATUM: Dotted Attention Temporal Upscaling Method

Steven Walton

(UO) DRP

BibTeX

@techreport{waltondatum,
  type = {Directed Research Project},
  author = {Steven Walton},
  title  = {DATUM: Dotted Attention Temporal Upscaling Method},
  institution = {University of Oregon, Computer and Information Sciences Department},
  number = {DRP-202006-Walton},
  month = {June},
  year = {2020},
  note = {Available at \url{https://www.cs.uoregon.edu/Reports/DRP-202006-Walton.pdf}},
}

Publicatons

Trending Tags