Q-learning with UCB Exploration is Sample Efficient for Infinite-Horizon MDP.
http://arxiv.org/abs/1901.09311
@article{DBLP:journals/corr/abs-1901-09311,
author = {Kefan Dong and
Yuanhao Wang and
Xiaoyu Chen and
Liwei Wang},
title = {Q-learning with {UCB} Exploration is Sample Efficient for Infinite-Horizon
{MDP}},
journal = {CoRR},
volume = {abs/1901.09311},
year = {2019},
url = {http://arxiv.org/abs/1901.09311},
archivePrefix = {arXiv},
eprint = {1901.09311},
timestamp = {Sat, 02 Feb 2019 16:56:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1901-09311.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
本页面没有标签