title={Finding Point with Image: An End-to-End Benchmark for Vision-based UAV Localization},
author={Dai, Ming and Chen, Jiahao and Lu, Yusheng and Hao, Wenlong and Zheng, Enhui},
journal={arXiv preprint arXiv:2208.06561},
year={2022}
}
@article{vaswani2017attention,
title={Attention is all you need},
author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
journal={Advances in neural information processing systems},
volume={30},
year={2017}
}
@inproceedings{liu2021swin,
title={Swin transformer: Hierarchical vision transformer using shifted windows},
author={Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining},
booktitle={Proceedings of the IEEE/CVF international conference on computer vision},
pages={10012--10022},
year={2021}
}
@article{chu2021twins,
title={Twins: Revisiting the design of spatial attention in vision transformers},
author={Chu, Xiangxiang and Tian, Zhi and Wang, Yuqing and Zhang, Bo and Ren, Haibing and Wei, Xiaolin and Xia, Huaxia and Shen, Chunhua},
journal={Advances in Neural Information Processing Systems},