Q-learning with UCB Exploration is Sample Efficient for Infinite-Horizon MDP.
https://openreview.net/forum?id=BkglSTNFDB
@inproceedings{DBLP:conf/iclr/WangDCW20,
author = {Yuanhao Wang and
Kefan Dong and
Xiaoyu Chen and
Liwei Wang},
title = {Q-learning with {UCB} Exploration is Sample Efficient for Infinite-Horizon
{MDP}},
booktitle = {8th International Conference on Learning Representations, {ICLR} 2020,
Addis Ababa, Ethiopia, April 26-30, 2020},
publisher = {OpenReview.net},
year = {2020},
url = {https://openreview.net/forum?id=BkglSTNFDB},
timestamp = {Thu, 07 May 2020 17:11:47 +0200},
biburl = {https://dblp.org/rec/conf/iclr/WangDCW20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
本页面没有标签