Source code for source_localization.gradient_descent

import numpy as np



[docs]
def gradient_descent(fun, x0, args=(), options=None, callback=None, f_old=np.inf):
    r"""
    Minimize a function :math:`f(x)` using the gradient descent method.

    The steps of the algorithme are the following:

        - Initialization: Starting from an initial point :math:`x_0`.
        - For each iteration :math:`k`:

            1. Compute the gradient of the function :math:`\nabla f(x_k)` at the current point :math:`x_k`.
            2. Update the estimate of :math:`x` using the rule :math:`x_{k+1} = x_k - \alpha \nabla f(x_k)`
               where :math:`\alpha` is the step size.
        - Convergence Check: the algorithm stops
          when the change in the function value or the gradient is below a predefined threshold, or
          when the maximum number of iterations is reached.


    Parameters
    ----------
    fun : callable
        Method that, given :math:`x`, returns the evaluation of the function :math:`f(x)` and its gradient :math:`\nabla f(x)`.
    x0 : ~numpy.ndarray
        Initial point :math:`x_0` from which the iterative optimization algorithm starts.
    args : tuple
        Arguments that should be given to the callable :meth:`fun` besides the array containing the current value of :math:`x`.
    options : dict, optional, default: None
        Dictionary containing several options to customize the gradient descent method and its stopping criteria.
        These options include:

            - 'step size': float or int, default: 1, step size of the gradient descent :math:`\alpha`, also called learning rate.
            - 'nit_max': int, default: 50, maximum number of iterations.
            - 'ftol': float, default: -1,
              tolerance such that the algorithm terminates successfully when :math:`|f(x_{k+1})-f(x_{k})|<\text{ftol}`
            - 'gtol': float, default: -1,
              tolerance such that the algorithm terminates successfully when :math:`|\nabla f(x_{k})|<\text{gtol}`

    callback : callable, optional
        Method called at each optimization iteration.
        The method should take as sole input the array containing the current value of :math:`x`.
    f_old : float, optional, default: numpy.inf
        Previous value of the function to optimize :math:`f(x)` if the optimization process is restarted.

    Returns
    -------
    x_opt : ~numpy.ndarray
        Optimal value :math:`x^*`.
    f_opt : float
        Evaluation of the function to minimize at the optimal value :math:`f(x^*)`.
    grad_opt : ~numpy.ndarray
        Evaluation of the gradient of the function to minimize at the optimal value :math:`\nabla f(x^*)`.
    n_iter : int
        Number of iterations needed to converge.
    """

    # To do:
    # Add exceptions to check input types.

    if not isinstance(args, tuple):
        args = (args,)

    # if callable(jac):
    #    pass
    # elif jac is True:
    #    # fun returns func and grad
    #    jac = lambda x, *args : fun(x, *args)[1]
    #    fct = lambda x, *args : fun(x, *args)[0]

    if options is None:
        options = {}

    if 'ftol' in options.keys():
        ftol = options['ftol']
    else:
        ftol = -1

    if 'gtol' in options.keys():
        gtol = options['gtol']
    else:
        gtol = -1

    if 'nit_max' in options.keys():
        nit_max = options['nit_max']
    else:
        nit_max = 50

    if 'step size' in options.keys():
        step_size = options['step size']
    else:
        step_size = 1

    if callback is None:

        def callback(x):
            pass

    # initilization of the iteration number, of x, the stopping criteria and the previous evaluation of the function
    nit = 0
    x = x0
    flag_stop = True

    # loop until the stopping criteria is reached
    while flag_stop:
        # evaluation of the function to minimize and its gradient
        f, df = fun(x, *args)

        if f > f_old:
            raise ValueError(
                "The cost function at the current iteration is larger than the cost function at the previous iteration."
                + " This is likely to be du to a too large step."
            )

        # updating x following the descent gradient method
        x -= df * step_size

        # call the callback function and update the iteration number
        callback(x)
        nit += 1

        # update the flag of the stopping criteria and the previous evaluation of the function
        flag_ite = nit < nit_max
        flag_ftol = np.abs(f - f_old) > ftol
        flag_gtol = np.max(np.abs(df)) > gtol
        flag_stop = flag_ite and flag_ftol and flag_gtol
        f_old = np.copy(f)

    return x, f, df, nit