<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Derivation on PANG Kaicheng&#39;s Homepage</title>
    <link>http://pangkaicheng.com/tags/derivation/</link>
    <description>Recent content in Derivation on PANG Kaicheng&#39;s Homepage</description>
    <generator>Hugo</generator>
    <language>en-us</language>
    <lastBuildDate>Tue, 30 Sep 2025 00:00:00 +0000</lastBuildDate>
    <atom:link href="http://pangkaicheng.com/tags/derivation/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>Derivation for Action-Value Function in Off-Policy Learning</title>
      <link>http://pangkaicheng.com/blog/derivation-for-action-value-function-in-off-policy-learning/</link>
      <pubDate>Tue, 30 Sep 2025 00:00:00 +0000</pubDate>
      <guid>http://pangkaicheng.com/blog/derivation-for-action-value-function-in-off-policy-learning/</guid>
      <description>Detailed derivation of the action-value function $Q(s, a)$ in off-policy learning using importance sampling, and an explanation of the backward loop implementation in Monte Carlo prediction.</description>
    </item>
  </channel>
</rss>
