Source code for layered.optimization

[docs]class GradientDecent:
    """
    Adapt the weights in the opposite direction of the gradient to reduce the
    error.
    """

[docs]    def __call__(self, weights, gradient, learning_rate=0.1):
        return weights - learning_rate * gradient


[docs]class Momentum:
    """
    Slow down changes of direction in the gradient by aggregating previous
    values of the gradient and multiplying them in.
    """

    def __init__(self):
        self.previous = None

[docs]    def __call__(self, gradient, rate=0.9):
        gradient = gradient.copy()
        if self.previous is None:
            self.previous = gradient.copy()
        else:
            assert self.previous.shape == gradient.shape
            gradient += rate * self.previous
            self.previous = gradient.copy()
        return gradient


[docs]class WeightDecay:
    """
    Slowly moves each weight closer to zero for regularization. This can help
    the model to find simpler solutions.
    """

[docs]    def __call__(self, weights, rate=1e-4):
        return (1 - rate) * weights


[docs]class WeightTying:
    """
    Constraint groups of slices of the gradient to have the same value by
    averaging them. Should be applied to the initial weights and each gradient.
    """

    def __init__(self, *groups):
        for group in groups:
            assert group and hasattr(group, '__len__')
            assert all([isinstance(x[0], int) for x in group])
            assert all([isinstance(y, (slice, int)) for x in group for y in x])
        self.groups = groups

[docs]    def __call__(self, matrices):
        matrices = matrices.copy()
        for group in self.groups:
            slices = [matrices[slice_] for slice_ in group]
            assert all([x.shape == slices[0].shape for x in slices]), (
                'All slices within a group must have the same shape. '
                'Shapes are ' + ', '.join(str(x.shape) for x in slices) + '.')
            average = sum(slices) / len(slices)
            assert average.shape == slices[0].shape
            for slice_ in group:
                matrices[slice_] = average
        return matrices