Source code for layered.optimization

[docs]class GradientDecent: """ Adapt the weights in the opposite direction of the gradient to reduce the error. """
[docs] def __call__(self, weights, gradient, learning_rate=0.1): return weights - learning_rate * gradient
[docs]class Momentum: """ Slow down changes of direction in the gradient by aggregating previous values of the gradient and multiplying them in. """ def __init__(self): self.previous = None
[docs] def __call__(self, gradient, rate=0.9): gradient = gradient.copy() if self.previous is None: self.previous = gradient.copy() else: assert self.previous.shape == gradient.shape gradient += rate * self.previous self.previous = gradient.copy() return gradient
[docs]class WeightDecay: """ Slowly moves each weight closer to zero for regularization. This can help the model to find simpler solutions. """
[docs] def __call__(self, weights, rate=1e-4): return (1 - rate) * weights
[docs]class WeightTying: """ Constraint groups of slices of the gradient to have the same value by averaging them. Should be applied to the initial weights and each gradient. """ def __init__(self, *groups): for group in groups: assert group and hasattr(group, '__len__') assert all([isinstance(x[0], int) for x in group]) assert all([isinstance(y, (slice, int)) for x in group for y in x]) self.groups = groups
[docs] def __call__(self, matrices): matrices = matrices.copy() for group in self.groups: slices = [matrices[slice_] for slice_ in group] assert all([x.shape == slices[0].shape for x in slices]), ( 'All slices within a group must have the same shape. ' 'Shapes are ' + ', '.join(str(x.shape) for x in slices) + '.') average = sum(slices) / len(slices) assert average.shape == slices[0].shape for slice_ in group: matrices[slice_] = average return matrices