<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Afterstate on PANG Kaicheng&#39;s Homepage</title>
    <link>http://pangkaicheng.com/tags/afterstate/</link>
    <description>Recent content in Afterstate on PANG Kaicheng&#39;s Homepage</description>
    <generator>Hugo</generator>
    <language>en-us</language>
    <lastBuildDate>Tue, 11 Nov 2025 00:00:00 +0000</lastBuildDate>
    <atom:link href="http://pangkaicheng.com/tags/afterstate/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>Temporal Difference (TD) Control Algorithms Comparison: SARSA, Expected SARSA, and Q-learning</title>
      <link>http://pangkaicheng.com/blog/comparision-of-three-td-method-approximation/</link>
      <pubDate>Tue, 11 Nov 2025 00:00:00 +0000</pubDate>
      <guid>http://pangkaicheng.com/blog/comparision-of-three-td-method-approximation/</guid>
      <description>Comparative analysis of major one-step Temporal Difference (TD) control algorithms: SARSA, Expected SARSA, and Q-learning, focusing on their policy nature and target construction.</description>
    </item>
    <item>
      <title>Reinforcement Learning for Outfit Compatibility</title>
      <link>http://pangkaicheng.com/blog/reinforcement-learning-for-outfit/</link>
      <pubDate>Mon, 15 Sep 2025 00:00:00 +0000</pubDate>
      <guid>http://pangkaicheng.com/blog/reinforcement-learning-for-outfit/</guid>
      <description>Modeling the outfit compatibility problem as a Markov Decision Process (MDP), defining the state space, action space, and afterstate formulation for sequential item selection.</description>
    </item>
    <item>
      <title>Afterstate Formulation</title>
      <link>http://pangkaicheng.com/blog/afterstate-formulation/</link>
      <pubDate>Sun, 01 Sep 2024 00:00:00 +0000</pubDate>
      <guid>http://pangkaicheng.com/blog/afterstate-formulation/</guid>
      <description>Formalization of the afterstate concept in Reinforcement Learning, including value functions and Dynamic Programming / Temporal Difference algorithms.</description>
    </item>
  </channel>
</rss>
