理解“backward()”：如何从头开始编写 Pytorch 函数“.backward()”？

backward()正在计算相对于（wrt）图叶的梯度。 grad()函数更通用，它可以计算任何输入（包括叶子）的梯度。我grad()前段时间实现了这个功能，你可以看看。它使用自动微分（AD）的力量。import mathclass ADNumber: def __init__(self,val, name=""): self.name=name self._val=val self._children=[] def __truediv__(self,other): new = ADNumber(self._val / other._val, name=f"{self.name}/{other.name}") self._children.append((1.0/other._val,new)) other._children.append((-self._val/other._val**2,new)) # first derivation of 1/x is -1/x^2 return new def __mul__(self,other): new = ADNumber(self._val*other._val, name=f"{self.name}*{other.name}") self._children.append((other._val,new)) other._children.append((self._val,new)) return new def __add__(self,other): if isinstance(other, (int, float)): other = ADNumber(other, str(other)) new = ADNumber(self._val+other._val, name=f"{self.name}+{other.name}") self._children.append((1.0,new)) other._children.append((1.0,new)) return new def __sub__(self,other): new = ADNumber(self._val-other._val, name=f"{self.name}-{other.name}") self._children.append((1.0,new)) other._children.append((-1.0,new)) return new @staticmethod def exp(self): new = ADNumber(math.exp(self._val), name=f"exp({self.name})") self._children.append((self._val,new)) return new @staticmethod def sin(self): new = ADNumber(math.sin(self._val), name=f"sin({self.name})") self._children.append((math.cos(self._val),new)) # first derivative is cos return new def grad(self,other): if self==other: return 1.0 else: result=0.0 for child in other._children: result+=child[0]*self.grad(child[1]) return result A = ADNumber # shortcutssin = A.sinexp = A.expdef print_childs(f, wrt): # with respect to for e in f._children: print("child:", wrt, "->" , e[1].name, "grad: ", e[0]) print_child(e[1], e[1].name) x1 = A(1.5, name="x1")x2 = A(0.5, name="x2")f=(sin(x2)+1)/(x2+exp(x1))+x1*x2print_childs(x2,"x2")print("\ncalculated gradient for the function f with respect to x2:", f.grad(x2))出去：child: x2 -> sin(x2) grad: 0.8775825618903728child: sin(x2) -> sin(x2)+1 grad: 1.0child: sin(x2)+1 -> sin(x2)+1/x2+exp(x1) grad: 0.20073512936690338child: sin(x2)+1/x2+exp(x1) -> sin(x2)+1/x2+exp(x1)+x1*x2 grad: 1.0child: x2 -> x2+exp(x1) grad: 1.0child: x2+exp(x1) -> sin(x2)+1/x2+exp(x1) grad: -0.05961284871202578child: sin(x2)+1/x2+exp(x1) -> sin(x2)+1/x2+exp(x1)+x1*x2 grad: 1.0child: x2 -> x1*x2 grad: 1.5child: x1*x2 -> sin(x2)+1/x2+exp(x1)+x1*x2 grad: 1.0calculated gradient for the function f with respect to x2: 1.6165488003791766

理解“backward()”：如何从头开始编写 Pytorch 函数“.backward()”？

1回答