@inproceedings{3465f4e91ae14205960e0e2c0407ea2f,
title = "Policy Updating Methods of Q Learning for Two Player Bargaining Game",
abstract = "Reinforcement learning algorithms have been used to discover the strategies in game theory. This study investigates whether Q learning, one of the classic reinforcement learning methods, is capable of training bargaining players via self-play, a training paradigm used by AlphaGo, to maximum their profit. We also compare our empirical results with the known theoretic solutions and perform an comprehensive analysis upon their differences. To accomplish these, we come up with two policy updating methods used in the training process, namely alternate update and simultaneous update, which are tailored for two players who propose offers and counter-offers in an alternating manner under a time constraint enforced by the discount factors. Our experimental results have demonstrated that the values of the discount factor actually have tangible impact on how far the bargaining outcomes deviate from the game theoretic solutions.",
keywords = "bargaining game, Q learning, self-play",
author = "Jianing Xu and Bei Zhou and Nanlin Jin",
note = "Publisher Copyright: {\textcopyright} 2023 ACM.; 5th International Conference on Pattern Recognition and Intelligent Systems, PRIS 2023 ; Conference date: 29-07-2023",
year = "2023",
month = jul,
day = "28",
doi = "10.1145/3609703.3609722",
language = "English",
series = "ACM International Conference Proceeding Series",
publisher = "Association for Computing Machinery",
pages = "51--58",
editor = "Wenbing Zhao and Xinguo Yu",
booktitle = "Proceedings - 2023 5th International Conference on Pattern Recognition and Intelligent Systems, PRIS 2023",
}