$$\begin{align}
Q^\pi(s,a) &= \mathbb{E}[r_{t+1} + \gamma r_{t+2} + \gamma^2 r_{t+2} + \dots | s,a ]\\
&= \mathbb{E}_{s’}[r + \gamma Q^\pi(s’,a’) | s,a ]
\end{align}
$$
$$ Q^*(s,a) = \mathbb{E}_{s’}[r + \gamma Q^\pi(s’,a’) | s,a ]$$
$$\begin{align} &Q_{t+1}(s_{t},a_{t}) = \underbrace{Q_t(s_t,a_t)}_{\rm old~value} + \underbrace{\alpha_t(s_t,a_t)}_{\rm learning~rate} \cdot \\ &\left( \overbrace{\underbrace{R_{t+1}}_{\rm reward} + \underbrace{\gamma}_{\rm discount~factor} \underbrace{\max_{a}Q_t(s_{t+1}, a)}_{\rm estimate~of~optimal~future~value}}^{\rm learned~value} - \underbrace{Q_t(s_t,a_t)}_{\rm old~value} \right) \end{align}$$