{"id":1582,"date":"2025-02-18T23:12:35","date_gmt":"2025-02-18T14:12:35","guid":{"rendered":"https:\/\/miniqr.com\/?p=1582"},"modified":"2025-02-18T23:12:35","modified_gmt":"2025-02-18T14:12:35","slug":"%e3%80%8c%e5%bc%b7%e5%8c%96%e5%ad%a6%e7%bf%92%e3%81%ae%e5%9f%ba%e7%a4%8e%e3%81%a8%e5%bf%9c%e7%94%a8%e3%80%80%e4%b8%bb%e8%a6%81%e3%82%a2%e3%83%ab%e3%82%b4%e3%83%aa%e3%82%ba%e3%83%a0%e3%81%8b%e3%82%89","status":"publish","type":"post","link":"https:\/\/miniqr.com\/?p=1582","title":{"rendered":"**\u300c\u5f37\u5316\u5b66\u7fd2\u306e\u57fa\u790e\u3068\u5fdc\u7528\u3000\u4e3b\u8981\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u304b\u3089\u672a\u6765\u3078\u306e\u5c55\u671b\u300d**"},"content":{"rendered":"<p>**\u5f37\u5316\u5b66\u7fd2\u3068\u306f\u3069\u306e\u3088\u3046\u306a\u5b66\u7fd2\u65b9\u6cd5\u3067\u3059\u304b\uff1f**<br \/>\n\u5f37\u5316\u5b66\u7fd2\u3068\u306f<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\uff08Reinforcement Learning, RL\uff09\u306f\u3001\u4eba\u5de5\u77e5\u80fd\uff08AI\uff09\u306e\u4e00\u5206\u91ce\u3067\u3042\u308a\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u547c\u3070\u308c\u308b\u5b66\u7fd2\u8005\u304c\u74b0\u5883\u3068\u76f8\u4e92\u4f5c\u7528\u3057\u306a\u304c\u3089\u3001\u5831\u916c\u3092\u6700\u5927\u5316\u3059\u308b\u884c\u52d5\u3092\u5b66\u7fd2\u3059\u308b\u624b\u6cd5\u3067\u3059\u3002<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u306f\u3001\u8a66\u884c\u932f\u8aa4\u3092\u901a\u3058\u3066\u6700\u9069\u306a\u6226\u7565\uff08\u30dd\u30ea\u30b7\u30fc\uff09\u3092\u898b\u3064\u3051\u51fa\u3059\u70b9\u3067\u7279\u5fb4\u7684\u3067\u3042\u308a\u3001\u4ed6\u306e\u6a5f\u68b0\u5b66\u7fd2\u624b\u6cd5\uff08\u4f8b\u3048\u3070\u6559\u5e2b\u3042\u308a\u5b66\u7fd2\u3084\u6559\u5e2b\u306a\u3057\u5b66\u7fd2\uff09\u3068\u306f\u7570\u306a\u308b\u30a2\u30d7\u30ed\u30fc\u30c1\u3092\u53d6\u308a\u307e\u3059\u3002<\/p>\n<p>\u57fa\u672c\u7684\u306a\u69cb\u6210\u8981\u7d20<\/p>\n<p>\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\uff08Agent\uff09 \u5b66\u7fd2\u3092\u884c\u3046\u4e3b\u4f53\u3002<\/p>\n<p>\u74b0\u5883\u3068\u76f8\u4e92\u4f5c\u7528\u3057\u3001\u884c\u52d5\u3092\u9078\u629e\u3059\u308b\u3002<\/p>\n<p>\u74b0\u5883\uff08Environment\uff09 \u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u64cd\u4f5c\u3059\u308b\u5bfe\u8c61\u3002<\/p>\n<p>\u72b6\u614b\uff08State\uff09\u3084\u5831\u916c\uff08Reward\uff09\u306e\u63d0\u4f9b\u5143\u3002<\/p>\n<p>\u72b6\u614b\uff08State\uff09 \u74b0\u5883\u306e\u73fe\u5728\u306e\u72b6\u6cc1\u3092\u8868\u3059\u60c5\u5831\u3002<\/p>\n<p>\u884c\u52d5\uff08Action\uff09 \u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u9078\u629e\u3067\u304d\u308b\u9078\u629e\u80a2\u3002<\/p>\n<p>\u5831\u916c\uff08Reward\uff09 \u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u884c\u52d5\u306b\u5bfe\u3059\u308b\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u3002<\/p>\n<p>\u76ee\u6a19\u306f\u7d2f\u7a4d\u5831\u916c\u306e\u6700\u5927\u5316\u3002<\/p>\n<p>\u5b66\u7fd2\u306e\u6d41\u308c<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u3067\u306f\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306f\u3042\u308b\u72b6\u614b\u306b\u304a\u3044\u3066\u884c\u52d5\u3092\u9078\u629e\u3057\u3001\u305d\u306e\u7d50\u679c\u3068\u3057\u3066\u65b0\u3057\u3044\u72b6\u614b\u3068\u5831\u916c\u3092\u53d7\u3051\u53d6\u308a\u307e\u3059\u3002<\/p>\n<p>\u3053\u306e\u30d7\u30ed\u30bb\u30b9\u3092\u7e70\u308a\u8fd4\u3059\u3053\u3068\u3067\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306f\u3069\u306e\u884c\u52d5\u304c\u9ad8\u3044\u5831\u916c\u3092\u3082\u305f\u3089\u3059\u304b\u3092\u5b66\u7fd2\u3057\u3001\u6700\u9069\u306a\u884c\u52d5\u6226\u7565\uff08\u30dd\u30ea\u30b7\u30fc\uff09\u3092\u5f62\u6210\u3057\u307e\u3059\u3002<\/p>\n<p>\u5b66\u7fd2\u306e\u67a0\u7d44\u307f\u3068\u3057\u3066\u306f\u3001\u4e3b\u306b\u30de\u30eb\u30b3\u30d5\u6c7a\u5b9a\u904e\u7a0b\uff08Markov Decision Process, MDP\uff09\u304c\u7528\u3044\u3089\u308c\u3001\u72b6\u614b\u9077\u79fb\u3084\u5831\u916c\u304c\u78ba\u7387\u7684\u306b\u5b9a\u7fa9\u3055\u308c\u307e\u3059\u3002<\/p>\n<p>\u4e3b\u8981\u306a\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0<\/p>\n<p>Q\u5b66\u7fd2\uff08Q-Learning\uff09 \u5404\u72b6\u614b\u3068\u884c\u52d5\u306e\u7d44\u307f\u5408\u308f\u305b\u306b\u5bfe\u3057\u3066\u4fa1\u5024\uff08Q\u5024\uff09\u3092\u5272\u308a\u5f53\u3066\u3001\u6700\u9069\u306a\u884c\u52d5\u3092\u9078\u629e\u3059\u308b\u624b\u6cd5\u3002<\/p>\n<p>\u30aa\u30d5\u30dd\u30ea\u30b7\u30fc\u5b66\u7fd2\u6cd5\u3068\u3057\u3066\u77e5\u3089\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u30b5\u30eb\u30b5\u6cd5\uff08SARSA\uff09 \u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u5b9f\u969b\u306b\u53d6\u308b\u884c\u52d5\u306b\u57fa\u3065\u3044\u3066\u4fa1\u5024\u3092\u66f4\u65b0\u3059\u308b\u30aa\u30f3\u30dd\u30ea\u30b7\u30fc\u5b66\u7fd2\u6cd5\u3002<\/p>\n<p>\u30dd\u30ea\u30b7\u30fc\u52fe\u914d\u6cd5\uff08Policy Gradient Methods\uff09 \u76f4\u63a5\u30dd\u30ea\u30b7\u30fc\u3092\u30d1\u30e9\u30e1\u30fc\u30bf\u5316\u3057\u3001\u305d\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u3092\u6700\u9069\u5316\u3059\u308b\u624b\u6cd5\u3002<\/p>\n<p>\u6df1\u5c64\u5f37\u5316\u5b66\u7fd2\uff08Deep RL\uff09\u3067\u5e83\u304f\u7528\u3044\u3089\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u30c7\u30a3\u30fc\u30d7Q\u30cd\u30c3\u30c8\u30ef\u30fc\u30af\uff08DQN\uff09 Q\u5b66\u7fd2\u3068\u6df1\u5c64\u5b66\u7fd2\u3092\u7d44\u307f\u5408\u308f\u305b\u305f\u624b\u6cd5\u3067\u3001\u9ad8\u6b21\u5143\u306e\u72b6\u614b\u7a7a\u9593\u306b\u3082\u5bfe\u5fdc\u53ef\u80fd\u3002<\/p>\n<p>\u5fdc\u7528\u4f8b<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u306f\u3001\u30b2\u30fc\u30e0\uff08\u30c1\u30a7\u30b9\u3001\u56f2\u7881\u3001\u30d3\u30c7\u30aa\u30b2\u30fc\u30e0\uff09\u3067\u306e\u6226\u7565\u5b66\u7fd2\u3001\u81ea\u5f8b\u30ed\u30dc\u30c3\u30c8\u306e\u5236\u5fa1\u3001\u7269\u6d41\u306e\u6700\u9069\u5316\u3001\u91d1\u878d\u53d6\u5f15\u306e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u8a2d\u8a08\u306a\u3069\u3001\u5e45\u5e83\u3044\u5206\u91ce\u3067\u5b9f\u7528\u5316\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u7279\u306b\u3001Google DeepMind\u306b\u3088\u308bAlphaGo\u3084AlphaZero\u306f\u3001\u5f37\u5316\u5b66\u7fd2\u306e\u6709\u52b9\u6027\u3092\u793a\u3059\u4ee3\u8868\u7684\u306a\u4f8b\u3067\u3059\u3002<\/p>\n<p>\u6839\u62e0\u3068\u7406\u8ad6\u7684\u57fa\u76e4<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u306e\u7406\u8ad6\u7684\u57fa\u76e4\u306f\u3001\u30de\u30eb\u30b3\u30d5\u6c7a\u5b9a\u904e\u7a0b\uff08MDP\uff09\u306b\u57fa\u3065\u3044\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>MDP\u306f\u3001\u72b6\u614b\u9077\u79fb\u304c\u30de\u30eb\u30b3\u30d5\u6027\uff08\u73fe\u5728\u306e\u72b6\u614b\u304c\u904e\u53bb\u306b\u4f9d\u5b58\u3057\u306a\u3044\uff09\u3092\u6301\u3064\u3068\u4eee\u5b9a\u3057\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u884c\u52d5\u304c\u5831\u916c\u306b\u4e0e\u3048\u308b\u5f71\u97ff\u3092\u6570\u5b66\u7684\u306b\u30e2\u30c7\u30eb\u5316\u3057\u307e\u3059\u3002<\/p>\n<p>\u6700\u9069\u30dd\u30ea\u30b7\u30fc\u306e\u5b58\u5728\u3068\u4e00\u610f\u6027\u3001\u53ce\u675f\u6027\u306e\u4fdd\u8a3c\u306a\u3069\u3001\u7406\u8ad6\u7684\u306a\u89e3\u6790\u304c\u8c4a\u5bcc\u306b\u5b58\u5728\u3057\u3001\u3053\u308c\u304c\u5f37\u5316\u5b66\u7fd2\u306e\u4fe1\u983c\u6027\u3068\u6709\u52b9\u6027\u306e\u6839\u62e0\u3068\u306a\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u3055\u3089\u306b\u3001\u52d5\u7684\u8a08\u753b\u6cd5\u3084\u30d9\u30eb\u30de\u30f3\u65b9\u7a0b\u5f0f\u3068\u3044\u3063\u305f\u6570\u5b66\u7684\u624b\u6cd5\u304c\u5f37\u5316\u5b66\u7fd2\u306e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u8a2d\u8a08\u306b\u6d3b\u7528\u3055\u308c\u3066\u304a\u308a\u3001\u3053\u308c\u3089\u306e\u7406\u8ad6\u7684\u6210\u679c\u304c\u5b9f\u8df5\u7684\u306a\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306e\u6027\u80fd\u5411\u4e0a\u306b\u5bc4\u4e0e\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u8fd1\u5e74\u3067\u306f\u3001\u6df1\u5c64\u5b66\u7fd2\u3068\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001\u8907\u96d1\u3067\u9ad8\u6b21\u5143\u306a\u554f\u984c\u306b\u3082\u5bfe\u5fdc\u53ef\u80fd\u3068\u306a\u308a\u3001\u5b9f\u4e16\u754c\u3067\u306e\u5fdc\u7528\u7bc4\u56f2\u304c\u3055\u3089\u306b\u5e83\u304c\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u307e\u3068\u3081<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u306f\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u74b0\u5883\u3068\u76f8\u4e92\u4f5c\u7528\u3057\u306a\u304c\u3089\u5831\u916c\u3092\u6700\u5927\u5316\u3059\u308b\u305f\u3081\u306e\u884c\u52d5\u6226\u7565\u3092\u5b66\u7fd2\u3059\u308b\u624b\u6cd5\u3067\u3042\u308a\u3001\u305d\u306e\u7406\u8ad6\u7684\u57fa\u76e4\u3068\u5b9f\u8df5\u7684\u306a\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306b\u652f\u3048\u3089\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u591a\u69d8\u306a\u5fdc\u7528\u5206\u91ce\u3067\u306e\u6210\u529f\u4e8b\u4f8b\u304c\u5b58\u5728\u3057\u3001\u4eca\u5f8c\u3082AI\u6280\u8853\u306e\u767a\u5c55\u306b\u304a\u3044\u3066\u91cd\u8981\u306a\u5f79\u5272\u3092\u679c\u305f\u3059\u3053\u3068\u304c\u671f\u5f85\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>**\u4e3b\u8981\u306a\u5f37\u5316\u5b66\u7fd2\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306b\u306f\u3069\u3093\u306a\u3082\u306e\u304c\u3042\u308a\u307e\u3059\u304b\uff1f**<br \/>\n\u5f37\u5316\u5b66\u7fd2\uff08Reinforcement Learning, RL\uff09\u306f\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u74b0\u5883\u3068\u76f8\u4e92\u4f5c\u7528\u3057\u306a\u304c\u3089\u6700\u9069\u306a\u884c\u52d5\u65b9\u91dd\uff08\u30dd\u30ea\u30b7\u30fc\uff09\u3092\u5b66\u7fd2\u3059\u308b\u6a5f\u68b0\u5b66\u7fd2\u306e\u4e00\u5206\u91ce\u3067\u3059\u3002<\/p>\n<p>\u3053\u3053\u3067\u306f\u3001\u4e3b\u8981\u306a\u5f37\u5316\u5b66\u7fd2\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306b\u3064\u3044\u3066\u8a73\u3057\u304f\u8aac\u660e\u3057\u3001\u305d\u308c\u305e\u308c\u306e\u6839\u62e0\u306b\u3064\u3044\u3066\u3082\u89e6\u308c\u307e\u3059\u3002<\/p>\n<p>1. Q\u5b66\u7fd2\uff08Q-Learning\uff09<\/p>\n<p>\u6982\u8981\u3000<br \/>\nQ\u5b66\u7fd2\u306f\u3001\u30aa\u30d5\u30dd\u30ea\u30b7\u30fc\u306e\u5024\u30d9\u30fc\u30b9\u306e\u5f37\u5316\u5b66\u7fd2\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3067\u3059\u3002<\/p>\n<p>\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306f\u72b6\u614b-\u884c\u52d5\u4fa1\u5024\u95a2\u6570\uff08Q\u95a2\u6570\uff09\u3092\u5b66\u7fd2\u3057\u3001\u3053\u308c\u3092\u57fa\u306b\u6700\u9069\u306a\u884c\u52d5\u3092\u9078\u629e\u3057\u307e\u3059\u3002<\/p>\n<p>\u5177\u4f53\u7684\u306b\u306f\u3001\u30d9\u30eb\u30de\u30f3\u65b9\u7a0b\u5f0f\u3092\u7528\u3044\u3066Q\u5024\u3092\u66f4\u65b0\u3057\u307e\u3059\u3002<\/p>\n<p>\u6839\u62e0\u3000<br \/>\nWatkins\uff081989\u5e74\uff09\u306b\u3088\u3063\u3066\u63d0\u6848\u3055\u308c\u305fQ\u5b66\u7fd2\u306f\u3001\u7406\u8ad6\u7684\u306b\u53ce\u675f\u6027\u304c\u4fdd\u8a3c\u3055\u308c\u3066\u304a\u308a\u3001\u30e2\u30c7\u30eb\u30d5\u30ea\u30fc\u3067\u74b0\u5883\u306e\u52d5\u4f5c\u30e2\u30c7\u30eb\u3092\u5fc5\u8981\u3068\u3057\u307e\u305b\u3093\u3002<\/p>\n<p>\u3053\u306e\u305f\u3081\u3001\u30b7\u30f3\u30d7\u30eb\u3067\u3042\u308a\u306a\u304c\u3089\u591a\u304f\u306e\u554f\u984c\u3067\u6709\u52b9\u306b\u6a5f\u80fd\u3059\u308b\u57fa\u672c\u7684\u306a\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3068\u3057\u3066\u5e83\u304f\u5229\u7528\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>2. SARSA\uff08State-Action-Reward-State-Action\uff09<\/p>\n<p>\u6982\u8981\u3000<br \/>\nSARSA\u306f\u3001\u30aa\u30f3\u30dd\u30ea\u30b7\u30fc\u306e\u5024\u30d9\u30fc\u30b9\u306e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3067\u3001Q\u5b66\u7fd2\u3068\u4f3c\u3066\u3044\u307e\u3059\u304c\u3001\u5b9f\u969b\u306b\u63a1\u7528\u3059\u308b\u30dd\u30ea\u30b7\u30fc\u306b\u57fa\u3065\u3044\u3066Q\u5024\u3092\u66f4\u65b0\u3057\u307e\u3059\u3002<\/p>\n<p>\u5177\u4f53\u7684\u306b\u306f\u3001\u73fe\u5728\u306e\u30dd\u30ea\u30b7\u30fc\u306b\u5f93\u3063\u305f\u6b21\u306e\u884c\u52d5\u3092\u9078\u629e\u3057\u3001\u305d\u308c\u306b\u57fa\u3065\u3044\u3066\u66f4\u65b0\u3092\u884c\u3044\u307e\u3059\u3002<\/p>\n<p>\u6839\u62e0\u3000<br \/>\nSARSA\u306f\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u884c\u52d5\u304c\u30dd\u30ea\u30b7\u30fc\u306b\u4f9d\u5b58\u3059\u308b\u305f\u3081\u3001\u63a2\u7d22\u3068\u5229\u7528\u306e\u30d0\u30e9\u30f3\u30b9\u3092\u8003\u616e\u3057\u305f\u5b66\u7fd2\u304c\u53ef\u80fd\u3067\u3059\u3002<\/p>\n<p>\u3053\u308c\u306b\u3088\u308a\u3001\u7279\u5b9a\u306e\u30dd\u30ea\u30b7\u30fc\u4e0b\u3067\u306e\u6027\u80fd\u3092\u5411\u4e0a\u3055\u305b\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n<p>\u7279\u306b\u3001\u74b0\u5883\u304c\u5909\u52d5\u3059\u308b\u5834\u5408\u3084\u5b89\u5168\u6027\u304c\u91cd\u8996\u3055\u308c\u308b\u5834\u5408\u306b\u6709\u52b9\u3067\u3059\u3002<\/p>\n<p>3. \u30e2\u30f3\u30c6\u30ab\u30eb\u30ed\u6cd5\uff08Monte Carlo Methods\uff09<\/p>\n<p>\u6982\u8981\u3000<br \/>\n\u30e2\u30f3\u30c6\u30ab\u30eb\u30ed\u6cd5\u306f\u3001\u30a8\u30d4\u30bd\u30fc\u30c9\u5168\u4f53\u306e\u7d4c\u9a13\u3092\u57fa\u306b\u4fa1\u5024\u95a2\u6570\u3092\u66f4\u65b0\u3059\u308b\u624b\u6cd5\u3067\u3059\u3002<\/p>\n<p>\u30a8\u30d4\u30bd\u30fc\u30c9\u304c\u7d42\u4e86\u3059\u308b\u307e\u3067\u5f85\u3061\u3001\u305d\u306e\u5f8c\u306b\u7d2f\u7a4d\u5831\u916c\u3092\u8a08\u7b97\u3057\u3066\u66f4\u65b0\u3057\u307e\u3059\u3002<\/p>\n<p>\u6839\u62e0\u3000<br \/>\n\u30e2\u30f3\u30c6\u30ab\u30eb\u30ed\u6cd5\u306f\u3001\u30e2\u30c7\u30eb\u30d5\u30ea\u30fc\u3067\u3042\u308a\u3001\u72b6\u614b\u9077\u79fb\u78ba\u7387\u3092\u5fc5\u8981\u3068\u3057\u306a\u3044\u305f\u3081\u3001\u8907\u96d1\u306a\u74b0\u5883\u3067\u3082\u9069\u7528\u53ef\u80fd\u3067\u3059\u3002<\/p>\n<p>\u307e\u305f\u3001\u30d0\u30a4\u30a2\u30b9\u306e\u5c11\u306a\u3044\u63a8\u5b9a\u3092\u63d0\u4f9b\u3059\u308b\u305f\u3081\u3001\u6b63\u78ba\u306a\u4fa1\u5024\u8a55\u4fa1\u304c\u53ef\u80fd\u3067\u3059\u3002<\/p>\n<p>\u305f\u3060\u3057\u3001\u30a8\u30d4\u30bd\u30fc\u30c9\u304c\u9577\u3044\u5834\u5408\u306b\u306f\u8a08\u7b97\u30b3\u30b9\u30c8\u304c\u9ad8\u304f\u306a\u308b\u6b20\u70b9\u3082\u3042\u308a\u307e\u3059\u3002<\/p>\n<p>4. \u6642\u9593\u5dee\u5206\u5b66\u7fd2\uff08Temporal Difference Learning, TD\uff09<\/p>\n<p>\u6982\u8981\u3000<br \/>\nTD\u5b66\u7fd2\u306f\u3001\u6b21\u306e\u72b6\u614b\u306e\u4fa1\u5024\u3092\u5229\u7528\u3057\u3066\u73fe\u5728\u306e\u4fa1\u5024\u3092\u66f4\u65b0\u3059\u308b\u624b\u6cd5\u3067\u3059\u3002<\/p>\n<p>Q\u5b66\u7fd2\u3084SARSA\u3082TD\u5b66\u7fd2\u306e\u4e00\u7a2e\u3067\u3059\u3002<\/p>\n<p>TD(0)\u306f1\u30b9\u30c6\u30c3\u30d7\u5148\u306e\u60c5\u5831\u3092\u7528\u3044\u308b\u57fa\u672c\u7684\u306a\u65b9\u6cd5\u3067\u3059\u3002<\/p>\n<p>\u6839\u62e0\u3000<br \/>\nTD\u5b66\u7fd2\u306f\u3001\u5b8c\u5168\u306a\u30a8\u30d4\u30bd\u30fc\u30c9\u3092\u5f85\u305f\u305a\u306b\u30aa\u30f3\u30e9\u30a4\u30f3\u3067\u5b66\u7fd2\u3067\u304d\u308b\u305f\u3081\u3001\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u306a\u5236\u5fa1\u554f\u984c\u306b\u9069\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u307e\u305f\u3001\u30d0\u30a4\u30a2\u30b9\u3068\u5206\u6563\u306e\u30d0\u30e9\u30f3\u30b9\u304c\u53d6\u308c\u305f\u52b9\u7387\u7684\u306a\u5b66\u7fd2\u3092\u53ef\u80fd\u306b\u3057\u307e\u3059\u3002<\/p>\n<p>Sutton\u3068Barto\uff081998\u5e74\uff09\u306e\u7814\u7a76\u304c\u57fa\u790e\u3092\u7bc9\u3044\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>5. \u65b9\u7b56\u52fe\u914d\u6cd5\uff08Policy Gradient Methods\uff09<\/p>\n<p>\u6982\u8981\u3000<br \/>\n\u65b9\u7b56\u52fe\u914d\u6cd5\u306f\u3001\u76f4\u63a5\u7684\u306b\u30d1\u30e9\u30e1\u30c8\u30ea\u30c3\u30af\u306a\u65b9\u7b56\u3092\u6700\u9069\u5316\u3059\u308b\u624b\u6cd5\u3067\u3059\u3002<\/p>\n<p>\u78ba\u7387\u7684\u306a\u65b9\u7b56\u3092\u7528\u3044\u308b\u3053\u3068\u3067\u3001\u9023\u7d9a\u884c\u52d5\u7a7a\u9593\u3084\u9ad8\u6b21\u5143\u306e\u554f\u984c\u306b\u3082\u9069\u5fdc\u3067\u304d\u307e\u3059\u3002<\/p>\n<p>\u4ee3\u8868\u7684\u306a\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3000<br \/>\n&#8211; REINFORCE \u57fa\u672c\u7684\u306a\u65b9\u7b56\u52fe\u914d\u6cd5\u3067\u3001\u30a8\u30d4\u30bd\u30fc\u30c9\u5168\u4f53\u306e\u5831\u916c\u3092\u57fa\u306b\u30d1\u30e9\u30e1\u30fc\u30bf\u3092\u66f4\u65b0\u3057\u307e\u3059\u3002<\/p>\n<p>&#8211; Actor-Critic \u65b9\u7b56\uff08Actor\uff09\u3068\u4fa1\u5024\u95a2\u6570\uff08Critic\uff09\u3092\u540c\u6642\u306b\u5b66\u7fd2\u3059\u308b\u30cf\u30a4\u30d6\u30ea\u30c3\u30c9\u624b\u6cd5\u3067\u3059\u3002<\/p>\n<p>\u6839\u62e0\u3000<br \/>\n\u65b9\u7b56\u52fe\u914d\u6cd5\u306f\u3001\u9023\u7d9a\u7684\u306a\u884c\u52d5\u7a7a\u9593\u3084\u9ad8\u6b21\u5143\u306e\u72b6\u614b\u7a7a\u9593\u306b\u5bfe\u3057\u3066\u67d4\u8edf\u306b\u5bfe\u5fdc\u3067\u304d\u308b\u305f\u3081\u3001\u69d8\u3005\u306a\u5b9f\u4e16\u754c\u306e\u30bf\u30b9\u30af\u3067\u6709\u52b9\u3067\u3059\u3002<\/p>\n<p>\u307e\u305f\u3001\u52fe\u914d\u306b\u57fa\u3065\u304f\u6700\u9069\u5316\u624b\u6cd5\u306f\u3001\u52b9\u7387\u7684\u306a\u30d1\u30e9\u30e1\u30fc\u30bf\u66f4\u65b0\u3092\u53ef\u80fd\u306b\u3057\u307e\u3059\u3002<\/p>\n<p>6. Deep Q-Network\uff08DQN\uff09<\/p>\n<p>\u6982\u8981\u3000<br \/>\nDQN\u306f\u3001\u6df1\u5c64\u30cb\u30e5\u30fc\u30e9\u30eb\u30cd\u30c3\u30c8\u30ef\u30fc\u30af\u3092\u7528\u3044\u3066Q\u95a2\u6570\u3092\u8fd1\u4f3c\u3059\u308b\u624b\u6cd5\u3067\u3059\u3002<\/p>\n<p>\u7d4c\u9a13\u518d\u751f\uff08Experience Replay\uff09\u3084\u30bf\u30fc\u30b2\u30c3\u30c8\u30cd\u30c3\u30c8\u30ef\u30fc\u30af\u306e\u5c0e\u5165\u306b\u3088\u308a\u3001\u5b66\u7fd2\u306e\u5b89\u5b9a\u6027\u3068\u52b9\u7387\u3092\u5411\u4e0a\u3055\u305b\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u6839\u62e0\u3000<br \/>\nDQN\u306f\u3001Atari\u30b2\u30fc\u30e0\u306a\u3069\u306e\u8907\u96d1\u306a\u30bf\u30b9\u30af\u3067\u4eba\u9593\u3068\u540c\u7b49\u4ee5\u4e0a\u306e\u6027\u80fd\u3092\u9054\u6210\u3057\u305f\u3053\u3068\u3067\u6ce8\u76ee\u3092\u6d74\u3073\u307e\u3057\u305f\uff08Mnih et al., 2015\uff09\u3002<\/p>\n<p>\u6df1\u5c64\u5b66\u7fd2\u306e\u529b\u3092\u5f37\u5316\u5b66\u7fd2\u306b\u7d71\u5408\u3059\u308b\u3053\u3068\u3067\u3001\u5927\u898f\u6a21\u3067\u9ad8\u6b21\u5143\u306a\u554f\u984c\u306b\u5bfe\u3059\u308b\u89e3\u6c7a\u80fd\u529b\u3092\u98db\u8e8d\u7684\u306b\u5411\u4e0a\u3055\u305b\u307e\u3057\u305f\u3002<\/p>\n<p>7. Double DQN<\/p>\n<p>\u6982\u8981\u3000<br \/>\nDouble DQN\u306f\u3001DQN\u306e\u904e\u5927\u8a55\u4fa1\u554f\u984c\u3092\u89e3\u6c7a\u3059\u308b\u305f\u3081\u306b\u63d0\u6848\u3055\u308c\u305f\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3067\u3059\u3002<\/p>\n<p>\u30a2\u30af\u30b7\u30e7\u30f3\u9078\u629e\u3068\u8a55\u4fa1\u3092\u5206\u96e2\u3059\u308b\u3053\u3068\u3067\u3001Q\u5024\u306e\u30d0\u30a4\u30a2\u30b9\u3092\u4f4e\u6e1b\u3057\u307e\u3059\u3002<\/p>\n<p>\u6839\u62e0\u3000<br \/>\nDouble DQN\u306f\u3001DQN\u304c\u62b1\u3048\u308bQ\u5024\u306e\u904e\u5927\u8a55\u4fa1\u3092\u6291\u5236\u3057\u3001\u3088\u308a\u6b63\u78ba\u306a\u4fa1\u5024\u95a2\u6570\u306e\u63a8\u5b9a\u3092\u53ef\u80fd\u306b\u3057\u307e\u3059\uff08Van Hasselt et al., 2016\uff09\u3002<\/p>\n<p>\u3053\u308c\u306b\u3088\u308a\u3001\u5b66\u7fd2\u306e\u5b89\u5b9a\u6027\u3068\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u304c\u5411\u4e0a\u3057\u307e\u3059\u3002<\/p>\n<p>8. Dueling DQN<\/p>\n<p>\u6982\u8981\u3000<br \/>\nDueling DQN\u306f\u3001Q\u30cd\u30c3\u30c8\u30ef\u30fc\u30af\u3092\u72b6\u614b\u4fa1\u5024\uff08Value\uff09\u3068\u30a2\u30c9\u30d0\u30f3\u30c6\u30fc\u30b8\uff08Advantage\uff09\u306e2\u3064\u306e\u7d4c\u8def\u306b\u5206\u96e2\u3059\u308b\u30a2\u30fc\u30ad\u30c6\u30af\u30c1\u30e3\u3092\u63a1\u7528\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u6839\u62e0\u3000<br \/>\n\u3053\u306e\u5206\u5272\u306b\u3088\u308a\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306f\u72b6\u614b\u4fa1\u5024\u306e\u8a55\u4fa1\u306b\u96c6\u4e2d\u3067\u304d\u308b\u305f\u3081\u3001\u52b9\u7387\u7684\u304b\u3064\u52b9\u679c\u7684\u306b\u5b66\u7fd2\u304c\u9032\u307f\u307e\u3059\uff08Wang et al., 2016\uff09\u3002<\/p>\n<p>\u7279\u306b\u3001\u7279\u5b9a\u306e\u72b6\u614b\u306b\u304a\u3044\u3066\u91cd\u8981\u306a\u884c\u52d5\u304c\u660e\u78ba\u3067\u306a\u3044\u5834\u5408\u306b\u6709\u52b9\u3067\u3059\u3002<\/p>\n<p>9. Proximal Policy Optimization\uff08PPO\uff09<\/p>\n<p>\u6982\u8981\u3000<br \/>\nPPO\u306f\u3001\u4fe1\u983c\u9818\u57df\u3092\u7dad\u6301\u3057\u306a\u304c\u3089\u65b9\u7b56\u3092\u66f4\u65b0\u3059\u308b\u624b\u6cd5\u3067\u3001Clipped Surrogate Objective\u3092\u7528\u3044\u3066\u5b66\u7fd2\u306e\u5b89\u5b9a\u6027\u3092\u4fdd\u3061\u307e\u3059\u3002<\/p>\n<p>\u6839\u62e0\u3000<br \/>\nPPO\u306f\u3001\u5f93\u6765\u306eTRPO\uff08Trust Region Policy Optimization\uff09\u3068\u6bd4\u8f03\u3057\u3066\u8a08\u7b97\u30b3\u30b9\u30c8\u304c\u4f4e\u304f\u3001\u5b9f\u88c5\u304c\u5bb9\u6613\u3067\u3042\u308a\u306a\u304c\u3089\u9ad8\u3044\u6027\u80fd\u3068\u5b89\u5b9a\u6027\u3092\u793a\u3057\u307e\u3059\uff08Schulman et al., 2017\uff09\u3002<\/p>\n<p>\u591a\u304f\u306e\u5b9f\u4e16\u754c\u306e\u30bf\u30b9\u30af\u3067\u6210\u529f\u3092\u53ce\u3081\u3066\u304a\u308a\u3001\u73fe\u5728\u6700\u3082\u5e83\u304f\u4f7f\u7528\u3055\u308c\u3066\u3044\u308b\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306e\u4e00\u3064\u3067\u3059\u3002<\/p>\n<p>10. Soft Actor-Critic\uff08SAC\uff09<\/p>\n<p>\u6982\u8981\u3000<br \/>\nSAC\u306f\u3001\u6700\u5927\u30a8\u30f3\u30c8\u30ed\u30d4\u30fc\u5f37\u5316\u5b66\u7fd2\u306e\u67a0\u7d44\u307f\u3092\u5229\u7528\u3057\u3001\u65b9\u7b56\u304c\u9ad8\u3044\u30a8\u30f3\u30c8\u30ed\u30d4\u30fc\uff08\u591a\u69d8\u6027\uff09\u3092\u6301\u3064\u3088\u3046\u306b\u5b66\u7fd2\u3057\u307e\u3059\u3002<\/p>\n<p>\u9023\u7d9a\u884c\u52d5\u7a7a\u9593\u306b\u9069\u3057\u305f\u30aa\u30d5\u30dd\u30ea\u30b7\u30fc\u306e\u30a2\u30af\u30bf\u30fc-\u30af\u30ea\u30c6\u30a3\u30c3\u30af\u624b\u6cd5\u3067\u3059\u3002<\/p>\n<p>\u6839\u62e0\u3000<br \/>\nSAC\u306f\u3001\u9ad8\u30a8\u30f3\u30c8\u30ed\u30d4\u30fc\u65b9\u7b56\u306b\u3088\u3063\u3066\u63a2\u7d22\u304c\u4fc3\u9032\u3055\u308c\u3001\u5c40\u6240\u7684\u306a\u6700\u9069\u89e3\u306b\u9665\u308a\u306b\u304f\u304f\u306a\u308a\u307e\u3059\uff08Haarnoja et al., 2018\uff09\u3002<\/p>\n<p>\u3053\u308c\u306b\u3088\u308a\u3001\u5b89\u5b9a\u3057\u305f\u5b66\u7fd2\u3068\u9ad8\u3044\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u3092\u5b9f\u73fe\u3057\u3001\u591a\u69d8\u306a\u9023\u7d9a\u5236\u5fa1\u30bf\u30b9\u30af\u3067\u6709\u52b9\u3067\u3059\u3002<\/p>\n<p>\u307e\u3068\u3081<\/p>\n<p>\u4ee5\u4e0a\u306b\u8ff0\u3079\u305f\u5f37\u5316\u5b66\u7fd2\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306f\u3001\u305d\u308c\u305e\u308c\u7570\u306a\u308b\u7279\u6027\u3068\u5229\u70b9\u3092\u6301\u3061\u3001\u69d8\u3005\u306a\u554f\u984c\u306b\u5bfe\u3057\u3066\u9069\u7528\u53ef\u80fd\u3067\u3059\u3002<\/p>\n<p>\u57fa\u672c\u7684\u306aQ\u5b66\u7fd2\u3084SARSA\u304b\u3089\u3001\u6df1\u5c64\u5b66\u7fd2\u3092\u7d71\u5408\u3057\u305fDQN\u3084PPO\u3001SAC\u306a\u3069\u306e\u9ad8\u5ea6\u306a\u624b\u6cd5\u307e\u3067\u3001\u7528\u9014\u306b\u5fdc\u3058\u3066\u6700\u9069\u306a\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u9078\u629e\u3059\u308b\u3053\u3068\u304c\u91cd\u8981\u3067\u3059\u3002<\/p>\n<p>\u3053\u308c\u3089\u306e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306e\u767a\u5c55\u306f\u3001\u7406\u8ad6\u7684\u306a\u7814\u7a76\u3068\u5b9f\u8df5\u7684\u306a\u5fdc\u7528\u306e\u4e21\u9762\u3067\u9032\u3093\u3067\u304a\u308a\u3001\u4eca\u5f8c\u3082\u5f37\u5316\u5b66\u7fd2\u306e\u5fdc\u7528\u7bc4\u56f2\u306f\u3055\u3089\u306b\u5e83\u304c\u308b\u3068\u671f\u5f85\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>**\u5f37\u5316\u5b66\u7fd2\u306f\u3069\u306e\u3088\u3046\u306a\u5206\u91ce\u3067\u5fdc\u7528\u3055\u308c\u3066\u3044\u307e\u3059\u304b\uff1f**<br \/>\n\u5f37\u5316\u5b66\u7fd2\uff08Reinforcement Learning, RL\uff09\u306f\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u74b0\u5883\u3068\u76f8\u4e92\u4f5c\u7528\u3057\u306a\u304c\u3089\u5831\u916c\u3092\u6700\u5927\u5316\u3059\u308b\u305f\u3081\u306e\u884c\u52d5\u3092\u5b66\u7fd2\u3059\u308b\u6a5f\u68b0\u5b66\u7fd2\u306e\u4e00\u5206\u91ce\u3067\u3059\u3002<\/p>\n<p>\u3053\u306e\u5f37\u529b\u306a\u5b66\u7fd2\u624b\u6cd5\u306f\u3001\u591a\u5c90\u306b\u308f\u305f\u308b\u5206\u91ce\u3067\u5fdc\u7528\u3055\u308c\u3066\u304a\u308a\u3001\u305d\u306e\u6210\u529f\u4f8b\u3084\u7406\u8ad6\u7684\u6839\u62e0\u306b\u57fa\u3065\u3044\u3066\u5e83\u304f\u63a1\u7528\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u4ee5\u4e0b\u306b\u4e3b\u8981\u306a\u5fdc\u7528\u5206\u91ce\u3068\u305d\u306e\u6839\u62e0\u306b\u3064\u3044\u3066\u8a73\u3057\u304f\u8aac\u660e\u3057\u307e\u3059\u3002<\/p>\n<p>1. \u30b2\u30fc\u30e0\u30d7\u30ec\u30a4<\/p>\n<p>\u5fdc\u7528\u4f8b AlphaGo\u3001OpenAI Five\u3001DeepMind\u306eAtari\u30b2\u30fc\u30e0\u30d7\u30ec\u30a4<br \/>\n\u6839\u62e0 \u5f37\u5316\u5b66\u7fd2\u306f\u3001\u8907\u96d1\u306a\u610f\u601d\u6c7a\u5b9a\u30d7\u30ed\u30bb\u30b9\u3092\u5fc5\u8981\u3068\u3059\u308b\u30b2\u30fc\u30e0\u3067\u306e\u6226\u7565\u5b66\u7fd2\u306b\u9069\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>AlphaGo\u306f\u3001\u56f2\u7881\u306e\u3088\u3046\u306a\u9ad8\u6b21\u5143\u3067\u6226\u7565\u7684\u306a\u30b2\u30fc\u30e0\u3067\u4eba\u9593\u306e\u30c1\u30e3\u30f3\u30d4\u30aa\u30f3\u3092\u6253\u3061\u7834\u308b\u6210\u679c\u3092\u4e0a\u3052\u3001RL\u306e\u6709\u52b9\u6027\u3092\u5b9f\u8a3c\u3057\u307e\u3057\u305f\u3002<\/p>\n<p>\u3053\u308c\u3089\u306e\u6210\u679c\u306f\u3001\u63a2\u7d22\u3068\u6700\u9069\u5316\u80fd\u529b\u3092\u6301\u3064RL\u624b\u6cd5\u304c\u8907\u96d1\u306a\u554f\u984c\u89e3\u6c7a\u306b\u512a\u308c\u3066\u3044\u308b\u3053\u3068\u3092\u793a\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>2. \u30ed\u30dc\u30c6\u30a3\u30af\u30b9<\/p>\n<p>\u5fdc\u7528\u4f8b \u30ed\u30dc\u30c3\u30c8\u30a2\u30fc\u30e0\u306e\u64cd\u4f5c\u3001\u6b69\u884c\u5236\u5fa1\u3001\u81ea\u5f8b\u79fb\u52d5<br \/>\n\u6839\u62e0 \u30ed\u30dc\u30c6\u30a3\u30af\u30b9\u306b\u304a\u3044\u3066\u306f\u3001\u74b0\u5883\u3068\u306e\u7d99\u7d9a\u7684\u306a\u76f8\u4e92\u4f5c\u7528\u3068\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u304c\u91cd\u8981\u3067\u3059\u3002<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u306f\u3001\u8a66\u884c\u932f\u8aa4\u3092\u901a\u3058\u3066\u6700\u9069\u306a\u884c\u52d5\u30dd\u30ea\u30b7\u30fc\u3092\u5b66\u7fd2\u3059\u308b\u305f\u3081\u3001\u67d4\u8edf\u3067\u9069\u5fdc\u7684\u306a\u30ed\u30dc\u30c3\u30c8\u5236\u5fa1\u3092\u5b9f\u73fe\u3057\u307e\u3059\u3002<\/p>\n<p>\u4f8b\u3048\u3070\u3001OpenAI\u306e\u30ed\u30dc\u30c3\u30c8\u30cf\u30f3\u30c9\u306f\u7269\u4f53\u306e\u628a\u6301\u3092RL\u3067\u5b66\u7fd2\u3057\u3001\u8907\u96d1\u306a\u64cd\u4f5c\u3092\u53ef\u80fd\u306b\u3057\u307e\u3057\u305f\u3002<\/p>\n<p>3. \u81ea\u52d5\u904b\u8ee2\u8eca<\/p>\n<p>\u5fdc\u7528\u4f8b \u7d4c\u8def\u8a08\u753b\u3001\u969c\u5bb3\u7269\u56de\u907f\u3001\u8eca\u4e21\u5236\u5fa1<br \/>\n\u6839\u62e0 \u81ea\u52d5\u904b\u8ee2\u8eca\u306f\u3001\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u3067\u306e\u610f\u601d\u6c7a\u5b9a\u3068\u9069\u5fdc\u304c\u6c42\u3081\u3089\u308c\u307e\u3059\u3002<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u306f\u3001\u30b7\u30df\u30e5\u30ec\u30fc\u30b7\u30e7\u30f3\u74b0\u5883\u3067\u5b89\u5168\u306b\u8a13\u7df4\u3067\u304d\u3001\u9ad8\u901f\u3067\u5909\u5316\u3059\u308b\u4ea4\u901a\u72b6\u6cc1\u306b\u9069\u5fdc\u3067\u304d\u308b\u30dd\u30ea\u30b7\u30fc\u3092\u5b66\u3076\u306e\u306b\u9069\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>Waymo\u3084Tesla\u306a\u3069\u304cRL\u3092\u6d3b\u7528\u3057\u3066\u904b\u8ee2\u6226\u7565\u306e\u6700\u9069\u5316\u3092\u56f3\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>4. \u91d1\u878d<\/p>\n<p>\u5fdc\u7528\u4f8b \u30dd\u30fc\u30c8\u30d5\u30a9\u30ea\u30aa\u7ba1\u7406\u3001\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u53d6\u5f15\u3001\u30ea\u30b9\u30af\u7ba1\u7406<br \/>\n\u6839\u62e0 \u91d1\u878d\u5e02\u5834\u306f\u8907\u96d1\u3067\u30c0\u30a4\u30ca\u30df\u30c3\u30af\u306a\u74b0\u5883\u3067\u3042\u308a\u3001RL\u306e\u63a2\u7d22\u3068\u6700\u9069\u5316\u80fd\u529b\u306f\u3001\u53ce\u76ca\u6027\u306e\u9ad8\u3044\u53d6\u5f15\u6226\u7565\u306e\u958b\u767a\u306b\u6709\u52b9\u3067\u3059\u3002<\/p>\n<p>RL\u3092\u7528\u3044\u308b\u3053\u3068\u3067\u3001\u904e\u53bb\u306e\u30c7\u30fc\u30bf\u304b\u3089\u5b66\u7fd2\u3057\u3001\u5c06\u6765\u306e\u5e02\u5834\u5909\u52d5\u306b\u5bfe\u3059\u308b\u9069\u5fdc\u7684\u306a\u6226\u7565\u3092\u69cb\u7bc9\u3067\u304d\u307e\u3059\u3002<\/p>\n<p>\u8907\u6570\u306e\u30d8\u30c3\u30b8\u30d5\u30a1\u30f3\u30c9\u304cRL\u3092\u53d6\u308a\u5165\u308c\u3066\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u306e\u5411\u4e0a\u3092\u56f3\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>5. \u30d8\u30eb\u30b9\u30b1\u30a2<\/p>\n<p>\u5fdc\u7528\u4f8b \u6cbb\u7642\u65b9\u91dd\u306e\u6700\u9069\u5316\u3001\u85ac\u5264\u6295\u4e0e\u7ba1\u7406\u3001\u60a3\u8005\u30e2\u30cb\u30bf\u30ea\u30f3\u30b0<br \/>\n\u6839\u62e0 \u30d8\u30eb\u30b9\u30b1\u30a2\u3067\u306f\u3001\u60a3\u8005\u306e\u72b6\u614b\u306b\u5fdc\u3058\u305f\u6700\u9069\u306a\u6cbb\u7642\u7b56\u3092\u6c7a\u5b9a\u3059\u308b\u3053\u3068\u304c\u91cd\u8981\u3067\u3059\u3002<\/p>\n<p>RL\u306f\u3001\u60a3\u8005\u3054\u3068\u306e\u52d5\u7684\u306a\u72b6\u614b\u5909\u5316\u3092\u8003\u616e\u3057\u3001\u9577\u671f\u7684\u306a\u5065\u5eb7\u7d50\u679c\u3092\u6700\u5927\u5316\u3059\u308b\u6cbb\u7642\u65b9\u91dd\u3092\u5b66\u7fd2\u3059\u308b\u306e\u306b\u9069\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u4f8b\u3048\u3070\u3001\u764c\u6cbb\u7642\u306b\u304a\u3051\u308b\u5316\u5b66\u7642\u6cd5\u306e\u6700\u9069\u5316\u306bRL\u304c\u5fdc\u7528\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>6. \u30ec\u30b3\u30e1\u30f3\u30c7\u30fc\u30b7\u30e7\u30f3\u30b7\u30b9\u30c6\u30e0<\/p>\n<p>\u5fdc\u7528\u4f8b \u30b3\u30f3\u30c6\u30f3\u30c4\u63a8\u85a6\u3001\u5e83\u544a\u914d\u4fe1\u3001\u30d1\u30fc\u30bd\u30ca\u30e9\u30a4\u30ba\u30c9\u30de\u30fc\u30b1\u30c6\u30a3\u30f3\u30b0<br \/>\n\u6839\u62e0 \u30ec\u30b3\u30e1\u30f3\u30c7\u30fc\u30b7\u30e7\u30f3\u30b7\u30b9\u30c6\u30e0\u3067\u306f\u3001\u30e6\u30fc\u30b6\u30fc\u306e\u884c\u52d5\u306b\u57fa\u3065\u3044\u3066\u6700\u9069\u306a\u30a2\u30a4\u30c6\u30e0\u3092\u63d0\u6848\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<p>RL\u306f\u3001\u30e6\u30fc\u30b6\u30fc\u306e\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u3092\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u3067\u53d6\u308a\u5165\u308c\u3001\u9577\u671f\u7684\u306a\u30a8\u30f3\u30b2\u30fc\u30b8\u30e1\u30f3\u30c8\u3092\u6700\u5927\u5316\u3059\u308b\u63a8\u85a6\u30dd\u30ea\u30b7\u30fc\u3092\u5b66\u7fd2\u3057\u307e\u3059\u3002<\/p>\n<p>Netflix\u3084Amazon\u304cRL\u3092\u6d3b\u7528\u3057\u3066\u30d1\u30fc\u30bd\u30ca\u30e9\u30a4\u30ba\u30c9\u63a8\u85a6\u3092\u5f37\u5316\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>7. \u30a8\u30cd\u30eb\u30ae\u30fc\u30b7\u30b9\u30c6\u30e0<\/p>\n<p>\u5fdc\u7528\u4f8b \u30b9\u30de\u30fc\u30c8\u30b0\u30ea\u30c3\u30c9\u7ba1\u7406\u3001\u30a8\u30cd\u30eb\u30ae\u30fc\u9700\u8981\u4e88\u6e2c\u3001\u518d\u751f\u53ef\u80fd\u30a8\u30cd\u30eb\u30ae\u30fc\u306e\u6700\u9069\u5316<br \/>\n\u6839\u62e0 \u30a8\u30cd\u30eb\u30ae\u30fc\u5206\u91ce\u3067\u306f\u3001\u9700\u8981\u3068\u4f9b\u7d66\u306e\u30d0\u30e9\u30f3\u30b9\u3092\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u3067\u8abf\u6574\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<p>RL\u306f\u3001\u30a8\u30cd\u30eb\u30ae\u30fc\u9700\u8981\u306e\u5909\u52d5\u306b\u5bfe\u5fdc\u3057\u3001\u52b9\u7387\u7684\u306a\u30a8\u30cd\u30eb\u30ae\u30fc\u914d\u5206\u3084\u30b3\u30b9\u30c8\u524a\u6e1b\u3092\u5b9f\u73fe\u3059\u308b\u305f\u3081\u306e\u6700\u9069\u5316\u624b\u6cd5\u3068\u3057\u3066\u6709\u52b9\u3067\u3059\u3002<\/p>\n<p>\u30b9\u30de\u30fc\u30c8\u30db\u30fc\u30e0\u3084\u7523\u696d\u7528\u30a8\u30cd\u30eb\u30ae\u30fc\u7ba1\u7406\u30b7\u30b9\u30c6\u30e0\u3067RL\u306e\u5fdc\u7528\u304c\u9032\u3093\u3067\u3044\u307e\u3059\u3002<\/p>\n<p>8. \u30ca\u30c1\u30e5\u30e9\u30eb\u30e9\u30f3\u30b2\u30fc\u30b8\u30d7\u30ed\u30bb\u30c3\u30b7\u30f3\u30b0\uff08NLP\uff09<\/p>\n<p>\u5fdc\u7528\u4f8b \u5bfe\u8a71\u30b7\u30b9\u30c6\u30e0\u3001\u6a5f\u68b0\u7ffb\u8a33\u3001\u30c6\u30ad\u30b9\u30c8\u751f\u6210<br \/>\n\u6839\u62e0 NLP\u306b\u304a\u3044\u3066\u306f\u3001\u30e6\u30fc\u30b6\u30fc\u3068\u306e\u30a4\u30f3\u30bf\u30e9\u30af\u30b7\u30e7\u30f3\u3092\u901a\u3058\u3066\u6700\u9069\u306a\u5fdc\u7b54\u3092\u751f\u6210\u3059\u308b\u3053\u3068\u304c\u6c42\u3081\u3089\u308c\u307e\u3059\u3002<\/p>\n<p>RL\u306f\u3001\u5bfe\u8a71\u306e\u8cea\u3092\u5411\u4e0a\u3055\u305b\u308b\u305f\u3081\u306b\u5831\u916c\u3092\u57fa\u306b\u3057\u305f\u5b66\u7fd2\u304c\u53ef\u80fd\u3067\u3042\u308a\u3001\u30e6\u30fc\u30b6\u30fc\u6e80\u8db3\u5ea6\u3092\u9ad8\u3081\u308b\u5fdc\u7b54\u751f\u6210\u306b\u5bc4\u4e0e\u3057\u307e\u3059\u3002<\/p>\n<p>\u4f8b\u3048\u3070\u3001\u30c1\u30e3\u30c3\u30c8\u30dc\u30c3\u30c8\u306e\u5fdc\u7b54\u6700\u9069\u5316\u306bRL\u304c\u6d3b\u7528\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>9. \u4ea4\u901a\u7ba1\u7406<\/p>\n<p>\u5fdc\u7528\u4f8b \u4fe1\u53f7\u5236\u5fa1\u3001\u4ea4\u901a\u6d41\u6700\u9069\u5316\u3001\u516c\u5171\u4ea4\u901a\u8a08\u753b<br \/>\n\u6839\u62e0 \u90fd\u5e02\u90e8\u306e\u4ea4\u901a\u7ba1\u7406\u306f\u8907\u96d1\u3067\u591a\u69d8\u306a\u8981\u7d20\u304c\u7d61\u307f\u5408\u3044\u307e\u3059\u3002<\/p>\n<p>RL\u306f\u3001\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u3067\u306e\u4ea4\u901a\u72b6\u6cc1\u3092\u8003\u616e\u3057\u3001\u4fe1\u53f7\u30bf\u30a4\u30df\u30f3\u30b0\u3084\u30eb\u30fc\u30c8\u6848\u5185\u3092\u6700\u9069\u5316\u3059\u308b\u3053\u3068\u3067\u3001\u6e0b\u6ede\u306e\u7de9\u548c\u3084\u4ea4\u901a\u52b9\u7387\u306e\u5411\u4e0a\u306b\u5bc4\u4e0e\u3057\u307e\u3059\u3002<\/p>\n<p>\u30b7\u30f3\u30ac\u30dd\u30fc\u30eb\u3084\u4e0a\u6d77\u306a\u3069\u306e\u90fd\u5e02\u3067RL\u3092\u7528\u3044\u305f\u4ea4\u901a\u4fe1\u53f7\u5236\u5fa1\u30b7\u30b9\u30c6\u30e0\u304c\u8a66\u9a13\u904b\u7528\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>10. \u88fd\u9020\u696d<\/p>\n<p>\u5fdc\u7528\u4f8b \u751f\u7523\u30d7\u30ed\u30bb\u30b9\u306e\u6700\u9069\u5316\u3001\u54c1\u8cea\u7ba1\u7406\u3001\u5728\u5eab\u7ba1\u7406<br \/>\n\u6839\u62e0 \u88fd\u9020\u696d\u3067\u306f\u3001\u52b9\u7387\u7684\u306a\u751f\u7523\u30e9\u30a4\u30f3\u306e\u7dad\u6301\u3084\u54c1\u8cea\u7ba1\u7406\u304c\u91cd\u8981\u3067\u3059\u3002<\/p>\n<p>RL\u306f\u3001\u751f\u7523\u30d7\u30ed\u30bb\u30b9\u306e\u5404\u30b9\u30c6\u30fc\u30b8\u3067\u306e\u6700\u9069\u306a\u64cd\u4f5c\u30d1\u30e9\u30e1\u30fc\u30bf\u3092\u5b66\u7fd2\u3057\u3001\u5168\u4f53\u306e\u751f\u7523\u52b9\u7387\u3092\u5411\u4e0a\u3055\u305b\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n<p>\u307e\u305f\u3001\u5728\u5eab\u7ba1\u7406\u306b\u304a\u3044\u3066\u3082\u9700\u8981\u4e88\u6e2c\u3068\u5728\u5eab\u88dc\u5145\u306e\u6700\u9069\u5316\u306bRL\u304c\u6d3b\u7528\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u6839\u62e0\u3068\u5c06\u6765\u5c55\u671b<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u304c\u3053\u308c\u3089\u306e\u5206\u91ce\u3067\u6210\u529f\u3092\u53ce\u3081\u3066\u3044\u308b\u80cc\u666f\u306b\u306f\u3001\u4ee5\u4e0b\u306e\u8981\u56e0\u304c\u3042\u308a\u307e\u3059\u3000<\/p>\n<p>\u63a2\u7d22\u3068\u6700\u9069\u5316\u306e\u80fd\u529b RL\u306f\u3001\u8a66\u884c\u932f\u8aa4\u3092\u901a\u3058\u3066\u6700\u9069\u306a\u6226\u7565\u3092\u898b\u3064\u3051\u51fa\u3059\u80fd\u529b\u304c\u3042\u308a\u3001\u8907\u96d1\u3067\u52d5\u7684\u306a\u74b0\u5883\u3067\u306e\u9069\u5fdc\u306b\u512a\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u5927\u898f\u6a21\u30c7\u30fc\u30bf\u3068\u8a08\u7b97\u8cc7\u6e90\u306e\u6d3b\u7528 \u30c7\u30a3\u30fc\u30d7\u30e9\u30fc\u30cb\u30f3\u30b0\u3068\u306e\u878d\u5408\uff08Deep Reinforcement Learning\uff09\u306b\u3088\u308a\u3001\u5927\u898f\u6a21\u306a\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3068\u8a08\u7b97\u8cc7\u6e90\u3092\u6d3b\u7528\u3057\u3066\u9ad8\u5ea6\u306a\u30dd\u30ea\u30b7\u30fc\u3092\u5b66\u7fd2\u3067\u304d\u307e\u3059\u3002<\/p>\n<p>\u30b7\u30df\u30e5\u30ec\u30fc\u30b7\u30e7\u30f3\u74b0\u5883\u306e\u767a\u5c55 \u73fe\u5b9f\u4e16\u754c\u306e\u8907\u96d1\u306a\u554f\u984c\u3092\u6a21\u5023\u3057\u305f\u30b7\u30df\u30e5\u30ec\u30fc\u30b7\u30e7\u30f3\u74b0\u5883\u306e\u69cb\u7bc9\u304c\u9032\u307f\u3001\u5b89\u5168\u304b\u3064\u52b9\u7387\u7684\u306bRL\u306e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u8a13\u7df4\u30fb\u8a55\u4fa1\u3067\u304d\u308b\u3088\u3046\u306b\u306a\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u7406\u8ad6\u7684\u9032\u5c55 RL\u306e\u7406\u8ad6\u7684\u57fa\u76e4\u306e\u7814\u7a76\u304c\u9032\u307f\u3001\u53ce\u675f\u6027\u3084\u5b89\u5b9a\u6027\u306b\u95a2\u3059\u308b\u7406\u89e3\u304c\u6df1\u307e\u308b\u3053\u3068\u3067\u3001\u5b9f\u7528\u5fdc\u7528\u3078\u306e\u4fe1\u983c\u6027\u304c\u5411\u4e0a\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u5c06\u6765\u7684\u306b\u306f\u3001RL\u306e\u5fdc\u7528\u7bc4\u56f2\u306f\u3055\u3089\u306b\u62e1\u5927\u3057\u3001\u7279\u306b\u30de\u30eb\u30c1\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u30b7\u30b9\u30c6\u30e0\u3084\u4eba\u9593\u3068\u306e\u5354\u8abf\u30ed\u30dc\u30c6\u30a3\u30af\u30b9\u3001\u6301\u7d9a\u53ef\u80fd\u306a\u30a8\u30cd\u30eb\u30ae\u30fc\u7ba1\u7406\u306a\u3069\u3001\u65b0\u305f\u306a\u9818\u57df\u3067\u306e\u6d3b\u7528\u304c\u671f\u5f85\u3055\u308c\u307e\u3059\u3002<\/p>\n<p>\u307e\u305f\u3001\u502b\u7406\u7684\u306a\u554f\u984c\u3084\u5b89\u5168\u6027\u306e\u78ba\u4fdd\u3068\u3044\u3063\u305f\u8ab2\u984c\u306b\u5bfe\u5fdc\u3059\u308b\u305f\u3081\u306e\u7814\u7a76\u3082\u9032\u5c55\u3057\u3001RL\u306e\u5b9f\u793e\u4f1a\u3078\u306e\u9069\u7528\u304c\u3088\u308a\u4e00\u5c64\u4fc3\u9032\u3055\u308c\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n<p>\u4ee5\u4e0a\u306e\u3088\u3046\u306b\u3001\u5f37\u5316\u5b66\u7fd2\u306f\u305d\u306e\u67d4\u8edf\u6027\u3068\u5f37\u529b\u306a\u5b66\u7fd2\u80fd\u529b\u306b\u3088\u308a\u3001\u591a\u5c90\u306b\u308f\u305f\u308b\u5206\u91ce\u3067\u5b9f\u7e3e\u3092\u4e0a\u3052\u3066\u304a\u308a\u3001\u4eca\u5f8c\u3082\u3055\u3089\u306a\u308b\u5fdc\u7528\u304c\u671f\u5f85\u3055\u308c\u308b\u91cd\u8981\u306a\u6280\u8853\u3067\u3059\u3002<\/p>\n<p>**\u5f37\u5316\u5b66\u7fd2\u306e\u8ab2\u984c\u3068\u4eca\u5f8c\u306e\u5c55\u671b\u306f\u4f55\u3067\u3059\u304b\uff1f**<br \/>\n\u5f37\u5316\u5b66\u7fd2\uff08Reinforcement Learning\u3001RL\uff09\u306f\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u74b0\u5883\u3068\u76f8\u4e92\u4f5c\u7528\u3057\u306a\u304c\u3089\u6700\u9069\u306a\u884c\u52d5\u3092\u5b66\u7fd2\u3059\u308b\u6a5f\u68b0\u5b66\u7fd2\u306e\u4e00\u5206\u91ce\u3067\u3059\u3002<\/p>\n<p>\u8fd1\u5e74\u3001AlphaGo\u3084OpenAI\u306e\u30b2\u30fc\u30e0\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306a\u3069\u3001\u6570\u3005\u306e\u6210\u529f\u4f8b\u304c\u5831\u544a\u3055\u308c\u3066\u3044\u307e\u3059\u304c\u3001\u540c\u6642\u306b\u3044\u304f\u3064\u304b\u306e\u8ab2\u984c\u3082\u5b58\u5728\u3057\u307e\u3059\u3002<\/p>\n<p>\u672c\u7a3f\u3067\u306f\u3001\u5f37\u5316\u5b66\u7fd2\u306e\u4e3b\u306a\u8ab2\u984c\u3068\u4eca\u5f8c\u306e\u5c55\u671b\u306b\u3064\u3044\u3066\u8a73\u8ff0\u3057\u3001\u305d\u306e\u6839\u62e0\u3092\u793a\u3057\u307e\u3059\u3002<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u306e\u8ab2\u984c<\/p>\n<p>1. \u30b5\u30f3\u30d7\u30eb\u52b9\u7387\u306e\u4f4e\u3055<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u306f\u591a\u304f\u306e\u5834\u5408\u3001\u8a66\u884c\u932f\u8aa4\u3092\u7e70\u308a\u8fd4\u3057\u306a\u304c\u3089\u5b66\u7fd2\u3092\u9032\u3081\u307e\u3059\u3002<\/p>\n<p>\u3053\u308c\u306b\u3088\u308a\u3001\u81a8\u5927\u306a\u30c7\u30fc\u30bf\u3084\u6642\u9593\u304c\u5fc5\u8981\u3068\u306a\u308b\u5834\u5408\u304c\u591a\u3044\u3067\u3059\u3002<\/p>\n<p>\u7279\u306b\u5b9f\u4e16\u754c\u3067\u306e\u5fdc\u7528\u3067\u306f\u3001\u30c7\u30fc\u30bf\u53ce\u96c6\u304c\u30b3\u30b9\u30c8\u9ad8\u3068\u306a\u308b\u305f\u3081\u3001\u30b5\u30f3\u30d7\u30eb\u52b9\u7387\u306e\u5411\u4e0a\u304c\u6c42\u3081\u3089\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u4f8b\u3048\u3070\u3001\u30ed\u30dc\u30c3\u30c8\u5236\u5fa1\u3067\u306f\u73fe\u5b9f\u4e16\u754c\u3067\u306e\u8a66\u884c\u932f\u8aa4\u304c\u6642\u9593\u3068\u8cc7\u6e90\u3092\u6d6a\u8cbb\u3059\u308b\u305f\u3081\u3001\u30b7\u30df\u30e5\u30ec\u30fc\u30b7\u30e7\u30f3\u74b0\u5883\u3067\u306e\u5b66\u7fd2\u3068\u8ee2\u79fb\u5b66\u7fd2\u306e\u6d3b\u7528\u304c\u7814\u7a76\u3055\u308c\u3066\u3044\u307e\u3059\uff08\u53c2\u8003\u3000Sutton &amp; Barto, 2018\uff09\u3002<\/p>\n<p>2. \u63a2\u7d22\u3068\u6d3b\u7528\u306e\u30c8\u30ec\u30fc\u30c9\u30aa\u30d5<\/p>\n<p>\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306f\u65b0\u3057\u3044\u884c\u52d5\u3092\u63a2\u7d22\u3059\u308b\u300c\u63a2\u7d22\u300d\u3068\u3001\u65e2\u77e5\u306e\u6700\u9069\u884c\u52d5\u3092\u9078\u629e\u3059\u308b\u300c\u6d3b\u7528\u300d\u306e\u30d0\u30e9\u30f3\u30b9\u3092\u53d6\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<p>\u3057\u304b\u3057\u3001\u3053\u306e\u30d0\u30e9\u30f3\u30b9\u3092\u9069\u5207\u306b\u53d6\u308b\u3053\u3068\u306f\u96e3\u3057\u304f\u3001\u4e0d\u9069\u5207\u306a\u30d0\u30e9\u30f3\u30b9\u306f\u5b66\u7fd2\u306e\u52b9\u7387\u3084\u6027\u80fd\u306b\u60aa\u5f71\u97ff\u3092\u53ca\u307c\u3057\u307e\u3059\u3002<\/p>\n<p>\u4f8b\u3048\u3070\u3001\u03b5-\u30b0\u30ea\u30fc\u30c7\u30a3\u6cd5\u3067\u306f\u63a2\u7d22\u306e\u5272\u5408\u3092\u56fa\u5b9a\u3057\u307e\u3059\u304c\u3001\u3053\u308c\u304c\u6700\u9069\u306a\u30d0\u30e9\u30f3\u30b9\u3092\u4fdd\u8a3c\u3059\u308b\u308f\u3051\u3067\u306f\u3042\u308a\u307e\u305b\u3093\u3002<\/p>\n<p>3. \u74b0\u5883\u306e\u8907\u96d1\u3055\u3068\u30b9\u30b1\u30fc\u30e9\u30d3\u30ea\u30c6\u30a3<\/p>\n<p>\u73fe\u5b9f\u4e16\u754c\u306e\u554f\u984c\u306f\u9ad8\u6b21\u5143\u3067\u8907\u96d1\u306a\u5834\u5408\u304c\u591a\u304f\u3001\u5f37\u5316\u5b66\u7fd2\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u304c\u30b9\u30b1\u30fc\u30eb\u3057\u306b\u304f\u3044\u3053\u3068\u304c\u8ab2\u984c\u3067\u3059\u3002<\/p>\n<p>\u4f8b\u3048\u3070\u3001\u81ea\u52d5\u904b\u8ee2\u8eca\u306e\u5236\u5fa1\u3067\u306f\u3001\u591a\u6570\u306e\u30bb\u30f3\u30b5\u30fc\u304b\u3089\u5f97\u3089\u308c\u308b\u60c5\u5831\u3092\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u3067\u51e6\u7406\u3057\u3001\u9069\u5207\u306a\u884c\u52d5\u3092\u9078\u629e\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<p>\u9ad8\u6b21\u5143\u30c7\u30fc\u30bf\u306e\u51e6\u7406\u80fd\u529b\u3068\u52b9\u7387\u7684\u306a\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306e\u958b\u767a\u304c\u6c42\u3081\u3089\u308c\u307e\u3059\u3002<\/p>\n<p>4. \u30ea\u30ef\u30fc\u30c9\u8a2d\u8a08\u306e\u96e3\u3057\u3055<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u3067\u306f\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u76ee\u7684\u3092\u9054\u6210\u3059\u308b\u305f\u3081\u306e\u5831\u916c\uff08\u30ea\u30ef\u30fc\u30c9\uff09\u3092\u8a2d\u8a08\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<p>\u3057\u304b\u3057\u3001\u9069\u5207\u306a\u30ea\u30ef\u30fc\u30c9\u8a2d\u8a08\u306f\u96e3\u6613\u5ea6\u304c\u9ad8\u304f\u3001\u8aa4\u3063\u305f\u30ea\u30ef\u30fc\u30c9\u8a2d\u8a08\u306f\u671b\u307e\u3057\u304f\u306a\u3044\u884c\u52d5\u3092\u5f15\u304d\u8d77\u3053\u3059\u53ef\u80fd\u6027\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<p>\u4f8b\u3048\u3070\u3001\u30a8\u30cd\u30eb\u30ae\u30fc\u52b9\u7387\u3092\u6700\u5927\u5316\u3059\u308b\u30ed\u30dc\u30c3\u30c8\u306b\u5bfe\u3057\u3066\u5358\u7d14\u306a\u30a8\u30cd\u30eb\u30ae\u30fc\u6d88\u8cbb\u306e\u6700\u5c0f\u5316\u3092\u30ea\u30ef\u30fc\u30c9\u3068\u3059\u308b\u3068\u3001\u5b89\u5168\u6027\u304c\u72a0\u7272\u306b\u306a\u308b\u3053\u3068\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<p>5. \u4e00\u822c\u5316\u80fd\u529b\u306e\u4e0d\u8db3<\/p>\n<p>\u591a\u304f\u306e\u5f37\u5316\u5b66\u7fd2\u30e2\u30c7\u30eb\u306f\u7279\u5b9a\u306e\u74b0\u5883\u306b\u5bfe\u3057\u3066\u6700\u9069\u5316\u3055\u308c\u3066\u304a\u308a\u3001\u65b0\u3057\u3044\u74b0\u5883\u3084\u72b6\u6cc1\u306b\u9069\u5fdc\u3059\u308b\u4e00\u822c\u5316\u80fd\u529b\u304c\u4e0d\u5341\u5206\u3067\u3059\u3002<\/p>\n<p>\u3053\u308c\u306b\u3088\u308a\u3001\u5b9f\u4e16\u754c\u3067\u306e\u6c4e\u7528\u6027\u304c\u5236\u7d04\u3055\u308c\u307e\u3059\u3002<\/p>\n<p>\u4f8b\u3048\u3070\u3001\u30c1\u30a7\u30b9\u3084\u56f2\u7881\u3067\u306f\u9ad8\u3044\u6027\u80fd\u3092\u793a\u3059\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3082\u3001\u30eb\u30fc\u30eb\u304c\u5c11\u3057\u5909\u308f\u308b\u3060\u3051\u3067\u6027\u80fd\u304c\u4f4e\u4e0b\u3059\u308b\u3053\u3068\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<p>6. \u5b89\u5168\u6027\u3068\u4fe1\u983c\u6027\u306e\u554f\u984c<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u4e88\u671f\u305b\u306c\u884c\u52d5\u3092\u53d6\u308b\u53ef\u80fd\u6027\u304c\u3042\u308a\u3001\u7279\u306b\u5b89\u5168\u6027\u304c\u91cd\u8981\u306a\u5206\u91ce\uff08\u533b\u7642\u3001\u81ea\u52d5\u904b\u8ee2\u306a\u3069\uff09\u3067\u306f\u4fe1\u983c\u6027\u304c\u8ab2\u984c\u3068\u306a\u308a\u307e\u3059\u3002<\/p>\n<p>\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u5b89\u5168\u306a\u884c\u52d5\u3092\u5b66\u7fd2\u3059\u308b\u305f\u3081\u306e\u67a0\u7d44\u307f\u3084\u4fdd\u8a3c\u304c\u5fc5\u8981\u3067\u3059\uff08\u53c2\u8003\u3000Amodei et al., 2016\uff09\u3002<\/p>\n<p>7. \u8a08\u7b97\u8cc7\u6e90\u306e\u6d88\u8cbb<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u306f\u7279\u306b\u30c7\u30a3\u30fc\u30d7\u30e9\u30fc\u30cb\u30f3\u30b0\u3068\u7d44\u307f\u5408\u308f\u305b\u305f\u5834\u5408\u3001\u81a8\u5927\u306a\u8a08\u7b97\u8cc7\u6e90\u3092\u5fc5\u8981\u3068\u3057\u307e\u3059\u3002<\/p>\n<p>\u3053\u308c\u306b\u3088\u308a\u3001\u7814\u7a76\u3084\u5b9f\u7528\u5316\u304c\u9ad8\u30b3\u30b9\u30c8\u3068\u306a\u308b\u554f\u984c\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<p>\u52b9\u7387\u7684\u306a\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3084\u4e26\u5217\u51e6\u7406\u6280\u8853\u306e\u958b\u767a\u304c\u6c42\u3081\u3089\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>8. \u89e3\u91c8\u53ef\u80fd\u6027\u306e\u6b20\u5982<\/p>\n<p>\u591a\u304f\u306e\u5f37\u5316\u5b66\u7fd2\u30e2\u30c7\u30eb\u3001\u7279\u306b\u30c7\u30a3\u30fc\u30d7\u30e9\u30fc\u30cb\u30f3\u30b0\u3092\u7528\u3044\u305f\u3082\u306e\u306f\u300c\u30d6\u30e9\u30c3\u30af\u30dc\u30c3\u30af\u30b9\u300d\u3068\u306a\u308a\u304c\u3061\u3067\u3001\u306a\u305c\u7279\u5b9a\u306e\u884c\u52d5\u3092\u9078\u629e\u3057\u305f\u306e\u304b\u7406\u89e3\u3057\u3065\u3089\u3044\u3068\u3044\u3046\u554f\u984c\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<p>\u3053\u308c\u306b\u3088\u308a\u3001\u30e2\u30c7\u30eb\u306e\u4fe1\u983c\u6027\u3084\u5b89\u5168\u6027\u306e\u8a55\u4fa1\u304c\u56f0\u96e3\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u306e\u4eca\u5f8c\u306e\u5c55\u671b<\/p>\n<p>1. \u30b5\u30f3\u30d7\u30eb\u52b9\u7387\u306e\u5411\u4e0a<\/p>\n<p>\u30e2\u30c7\u30eb\u30d9\u30fc\u30b9\u306e\u5f37\u5316\u5b66\u7fd2\u3084\u8ee2\u79fb\u5b66\u7fd2\u3001\u30e1\u30bf\u5b66\u7fd2\u306e\u624b\u6cd5\u304c\u7814\u7a76\u3055\u308c\u3066\u304a\u308a\u3001\u5c11\u306a\u3044\u30c7\u30fc\u30bf\u3067\u52b9\u7387\u7684\u306b\u5b66\u7fd2\u3059\u308b\u65b9\u6cd5\u304c\u9032\u5c55\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u3053\u308c\u306b\u3088\u308a\u3001\u5b9f\u4e16\u754c\u3067\u306e\u5fdc\u7528\u304c\u4e00\u5c64\u4fc3\u9032\u3055\u308c\u308b\u3053\u3068\u304c\u671f\u5f85\u3055\u308c\u307e\u3059\uff08\u53c2\u8003\u3000Schmidt &amp; Lipson, 2009\uff09\u3002<\/p>\n<p>2. \u63a2\u7d22\u6226\u7565\u306e\u6539\u5584<\/p>\n<p>\u9ad8\u5ea6\u306a\u63a2\u7d22\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306e\u958b\u767a\u306b\u3088\u308a\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u3088\u308a\u52b9\u679c\u7684\u306b\u63a2\u7d22\u3068\u6d3b\u7528\u306e\u30d0\u30e9\u30f3\u30b9\u3092\u53d6\u308b\u3053\u3068\u304c\u53ef\u80fd\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<p>\u4f8b\u3048\u3070\u3001\u30d9\u30a4\u30ba\u7684\u30a2\u30d7\u30ed\u30fc\u30c1\u3084\u60c5\u5831\u7406\u8ad6\u306b\u57fa\u3065\u304f\u65b9\u6cd5\u304c\u6ce8\u76ee\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>3. \u30b9\u30b1\u30fc\u30e9\u30d3\u30ea\u30c6\u30a3\u306e\u5411\u4e0a<\/p>\n<p>\u5206\u6563\u30b3\u30f3\u30d4\u30e5\u30fc\u30c6\u30a3\u30f3\u30b0\u3084\u52b9\u7387\u7684\u306a\u30cb\u30e5\u30fc\u30e9\u30eb\u30cd\u30c3\u30c8\u30ef\u30fc\u30af\u30a2\u30fc\u30ad\u30c6\u30af\u30c1\u30e3\u306e\u63a1\u7528\u306b\u3088\u308a\u3001\u5f37\u5316\u5b66\u7fd2\u306e\u30b9\u30b1\u30fc\u30e9\u30d3\u30ea\u30c6\u30a3\u304c\u5411\u4e0a\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u307e\u305f\u3001\u74b0\u5883\u306e\u62bd\u8c61\u5316\u3084\u968e\u5c64\u7684\u5f37\u5316\u5b66\u7fd2\uff08HRL\uff09\u306b\u3088\u308a\u3001\u8907\u96d1\u306a\u30bf\u30b9\u30af\u3092\u5206\u89e3\u3057\u3066\u5b66\u7fd2\u3059\u308b\u30a2\u30d7\u30ed\u30fc\u30c1\u3082\u6709\u671b\u3067\u3059\u3002<\/p>\n<p>4. \u30ea\u30ef\u30fc\u30c9\u8a2d\u8a08\u306e\u81ea\u52d5\u5316<\/p>\n<p>\u9006\u5f37\u5316\u5b66\u7fd2\u3084\u4eba\u9593\u306e\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u3092\u6d3b\u7528\u3057\u305f\u624b\u6cd5\u306b\u3088\u308a\u3001\u30ea\u30ef\u30fc\u30c9\u8a2d\u8a08\u306e\u81ea\u52d5\u5316\u304c\u9032\u3093\u3067\u3044\u307e\u3059\u3002<\/p>\n<p>\u3053\u308c\u306b\u3088\u308a\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u4eba\u9593\u306e\u610f\u56f3\u3092\u3088\u308a\u6b63\u78ba\u306b\u7406\u89e3\u3057\u3001\u671b\u307e\u3057\u3044\u884c\u52d5\u3092\u5b66\u7fd2\u3059\u308b\u3053\u3068\u304c\u53ef\u80fd\u3068\u306a\u308a\u307e\u3059\u3002<\/p>\n<p>5. \u6c4e\u7528\u6027\u3068\u4e00\u822c\u5316\u80fd\u529b\u306e\u5f37\u5316<\/p>\n<p>\u30e1\u30bf\u5b66\u7fd2\u3084\u81ea\u5df1\u6559\u5e2b\u3042\u308a\u5b66\u7fd2\u306e\u6280\u8853\u3092\u7d44\u307f\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u591a\u69d8\u306a\u74b0\u5883\u306b\u9069\u5fdc\u3059\u308b\u6c4e\u7528\u7684\u306a\u80fd\u529b\u3092\u7372\u5f97\u3059\u308b\u65b9\u5411\u306b\u9032\u3093\u3067\u3044\u307e\u3059\u3002<\/p>\n<p>\u3053\u308c\u306b\u3088\u308a\u3001\u5b9f\u4e16\u754c\u3067\u306e\u5e83\u7bc4\u306a\u5fdc\u7528\u304c\u671f\u5f85\u3055\u308c\u307e\u3059\u3002<\/p>\n<p>6. \u5b89\u5168\u6027\u3068\u502b\u7406\u306e\u78ba\u7acb<\/p>\n<p>\u5b89\u5168\u306a\u5f37\u5316\u5b66\u7fd2\uff08Safe RL\uff09\u306e\u7814\u7a76\u304c\u9032\u3093\u3067\u304a\u308a\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u5b89\u5168\u3067\u4fe1\u983c\u6027\u306e\u9ad8\u3044\u884c\u52d5\u3092\u5b66\u7fd2\u3059\u308b\u305f\u3081\u306e\u65b9\u6cd5\u304c\u958b\u767a\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u4f8b\u3048\u3070\u3001\u5236\u7d04\u4ed8\u304d\u5f37\u5316\u5b66\u7fd2\u3084\u9006\u5f37\u5316\u5b66\u7fd2\u306b\u3088\u308b\u5b89\u5168\u306a\u30ea\u30ef\u30fc\u30c9\u8a2d\u8a08\u304c\u6ce8\u76ee\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>7. \u8a08\u7b97\u52b9\u7387\u306e\u6539\u5584<\/p>\n<p>\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306e\u6700\u9069\u5316\u3084\u30cf\u30fc\u30c9\u30a6\u30a7\u30a2\u306e\u9032\u5316\u306b\u3088\u308a\u3001\u8a08\u7b97\u52b9\u7387\u306e\u5411\u4e0a\u304c\u56f3\u3089\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u307e\u305f\u3001\u91cf\u5b50\u5f37\u5316\u5b66\u7fd2\u306a\u3069\u65b0\u3057\u3044\u6280\u8853\u306e\u5c0e\u5165\u3082\u691c\u8a0e\u3055\u308c\u3066\u304a\u308a\u3001\u3055\u3089\u306a\u308b\u6027\u80fd\u5411\u4e0a\u304c\u671f\u5f85\u3055\u308c\u307e\u3059\u3002<\/p>\n<p>8. \u89e3\u91c8\u53ef\u80fd\u6027\u306e\u5411\u4e0a<\/p>\n<p>\u8aac\u660e\u53ef\u80fd\u306aAI\uff08XAI\uff09\u306e\u624b\u6cd5\u3092\u5f37\u5316\u5b66\u7fd2\u306b\u9069\u7528\u3059\u308b\u7814\u7a76\u304c\u9032\u3093\u3067\u304a\u308a\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u610f\u601d\u6c7a\u5b9a\u30d7\u30ed\u30bb\u30b9\u3092\u53ef\u8996\u5316\u30fb\u7406\u89e3\u3059\u308b\u6280\u8853\u304c\u958b\u767a\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u3053\u308c\u306b\u3088\u308a\u3001\u30e2\u30c7\u30eb\u306e\u900f\u660e\u6027\u3068\u4fe1\u983c\u6027\u304c\u5411\u4e0a\u3057\u307e\u3059\u3002<\/p>\n<p>\u6839\u62e0<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u306e\u8ab2\u984c\u3068\u5c55\u671b\u306b\u3064\u3044\u3066\u306f\u3001\u591a\u6570\u306e\u7814\u7a76\u8ad6\u6587\u3084\u5b9f\u7528\u4f8b\u304c\u5b58\u5728\u3057\u307e\u3059\u3002<\/p>\n<p>\u4f8b\u3048\u3070\u3001Google DeepMind\u306eAlphaGo\u306f\u3001\u5f37\u5316\u5b66\u7fd2\u306e\u53ef\u80fd\u6027\u3092\u793a\u3059\u4e00\u65b9\u3067\u3001\u8a08\u7b97\u8cc7\u6e90\u306e\u5927\u91cf\u6d88\u8cbb\u3084\u7279\u5b9a\u74b0\u5883\u3078\u306e\u4f9d\u5b58\u3092\u660e\u3089\u304b\u306b\u3057\u307e\u3057\u305f\uff08Silver et al., 2016\uff09\u3002<\/p>\n<p>\u307e\u305f\u3001OpenAI\u306e\u7814\u7a76\u3067\u306f\u3001\u6c4e\u7528\u6027\u3092\u6301\u3064\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u958b\u767a\u306b\u5411\u3051\u305f\u8a66\u307f\u304c\u884c\u308f\u308c\u3066\u304a\u308a\u3001\u30e1\u30bf\u5b66\u7fd2\u3084\u8ee2\u79fb\u5b66\u7fd2\u306e\u6709\u52b9\u6027\u304c\u793a\u3055\u308c\u3066\u3044\u307e\u3059\uff08OpenAI, 2019\uff09\u3002<\/p>\n<p>\u3055\u3089\u306b\u3001\u5b89\u5168\u6027\u306b\u95a2\u3057\u3066\u306f\u3001Amodei\u3089\uff082016\uff09\u306e\u300cConcrete Problems in AI Safety\u300d\u306a\u3069\u304c\u53c2\u8003\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<p>\u3053\u308c\u3089\u306e\u7814\u7a76\u306f\u3001\u5f37\u5316\u5b66\u7fd2\u304c\u76f4\u9762\u3059\u308b\u8ab2\u984c\u3068\u305d\u308c\u306b\u5bfe\u3059\u308b\u5bfe\u7b56\u306e\u5fc5\u8981\u6027\u3092\u660e\u78ba\u306b\u793a\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u7d50\u8ad6<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u306f\u3001\u6a5f\u68b0\u5b66\u7fd2\u306e\u4e2d\u3067\u3082\u7279\u306b\u5fdc\u7528\u7bc4\u56f2\u304c\u5e83\u304f\u3001\u4eca\u5f8c\u306eAI\u767a\u5c55\u306b\u304a\u3044\u3066\u91cd\u8981\u306a\u5f79\u5272\u3092\u679c\u305f\u3059\u3053\u3068\u304c\u671f\u5f85\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u3057\u304b\u3057\u3001\u30b5\u30f3\u30d7\u30eb\u52b9\u7387\u3084\u63a2\u7d22\u6226\u7565\u3001\u30ea\u30ef\u30fc\u30c9\u8a2d\u8a08\u3001\u5b89\u5168\u6027\u306a\u3069\u591a\u304f\u306e\u8ab2\u984c\u304c\u5b58\u5728\u3057\u307e\u3059\u3002<\/p>\n<p>\u3053\u308c\u3089\u306e\u8ab2\u984c\u306b\u5bfe\u3059\u308b\u7814\u7a76\u3068\u6280\u8853\u306e\u9032\u5c55\u306b\u3088\u308a\u3001\u5f37\u5316\u5b66\u7fd2\u306e\u5b9f\u7528\u5316\u3068\u666e\u53ca\u304c\u3055\u3089\u306b\u9032\u3080\u3053\u3068\u304c\u898b\u8fbc\u307e\u308c\u307e\u3059\u3002<\/p>\n<p>\u7279\u306b\u3001\u4ed6\u306eAI\u6280\u8853\u3068\u306e\u7d71\u5408\u3084\u65b0\u3057\u3044\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306e\u958b\u767a\u304c\u9375\u3068\u306a\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n<p>\u3010\u8981\u7d04\u3011<br \/>\n\u5f37\u5316\u5b66\u7fd2\uff08RL\uff09\u306f\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u74b0\u5883\u3068\u76f8\u4e92\u4f5c\u7528\u3057\u3001\u7d2f\u7a4d\u5831\u916c\u3092\u6700\u5927\u5316\u3059\u308b\u884c\u52d5\u3092\u8a66\u884c\u932f\u8aa4\u3067\u5b66\u7fd2\u3059\u308bAI\u624b\u6cd5\u3067\u3059\u3002\u4e3b\u8981\u306a\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306b\u306fQ\u5b66\u7fd2\u3001SARSA\u3001\u30dd\u30ea\u30b7\u30fc\u52fe\u914d\u6cd5\u3001DQN\u306a\u3069\u304c\u3042\u308a\u3001\u30b2\u30fc\u30e0\u6226\u7565\u3001\u81ea\u5f8b\u30ed\u30dc\u30c3\u30c8\u5236\u5fa1\u3001\u7269\u6d41\u6700\u9069\u5316\u3001\u91d1\u878d\u53d6\u5f15\u306a\u3069\u5e45\u5e83\u3044\u5206\u91ce\u3067\u5fdc\u7528\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>**\u5f37\u5316\u5b66\u7fd2\u3068\u306f\u3069\u306e\u3088\u3046\u306a\u5b66\u7fd2\u65b9\u6cd5\u3067\u3059\u304b\uff1f** \u5f37\u5316\u5b66\u7fd2\u3068\u306f \u5f37\u5316\u5b66\u7fd2\uff08Reinforcement Learning, RL\uff09\u306f\u3001\u4eba\u5de5\u77e5\u80fd\uff08AI\uff09\u306e\u4e00\u5206\u91ce\u3067\u3042\u308a\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u547c\u3070\u308c\u308b\u5b66\u7fd2\u8005\u304c\u74b0\u5883\u3068\u76f8\u4e92\u4f5c\u7528\u3057\u306a\u304c\u3089\u3001\u5831\u916c [&hellip;]<\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"","ping_status":"","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":{"0":"post-1582","1":"post","2":"type-post","3":"status-publish","4":"format-standard","6":"category-seo"},"_links":{"self":[{"href":"https:\/\/miniqr.com\/index.php?rest_route=\/wp\/v2\/posts\/1582","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/miniqr.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/miniqr.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/miniqr.com\/index.php?rest_route=\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/miniqr.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1582"}],"version-history":[{"count":0,"href":"https:\/\/miniqr.com\/index.php?rest_route=\/wp\/v2\/posts\/1582\/revisions"}],"wp:attachment":[{"href":"https:\/\/miniqr.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1582"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/miniqr.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1582"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/miniqr.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1582"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}