Source code for source_localization.proximal_gradient

import numpy as np


[docs] def proximal_gradient(x0, fun1, proximal_fun2, args=(), options=None, callback=None): r""" Minimize a function of the shape :math:`f(x)+g(x)` using the proximal gradient method, with :math:`f` a differentiable function and :math:`g` a function whose proximal operator is known, e.g. a LASSO or group-LASSO regularization term). The proximal gradient method, also refered to as ISTA, combines the gradient-descent step for the differentiable part with the proximal step for the non-differentiable part as following: - Initialization: Starting from an initial point :math:`x_0`. - For each iteration :math:`k`: 1. compute the gradient of the function :math:`\nabla f(x_k)` at the current point :math:`x_k`, 2. Update the estimate of :math:`x` using the rule :math:`x_{k+1} = \text{prox}^g_\alpha(x_k - \alpha \nabla f(x_k))` where :math:`\text{prox}^g_\lambda` is the proximal operator of :math:`g` and :math:`\alpha` is the step size of the gradient descent step. - Convergence Check: the algorithm stops when the maximum number of iterations is reached. An accelerated version, called FISTA, incorporates an inertial term, that takes into account not only :math:`x_k` but also :math:`x_{k-1}`, to speed up convergence. Parameters ---------- x0 : ~numpy.ndarray Initial point :math:`x_0` from which the iterative optimization algorithm starts. fun1 : callable Method that, given :math:`x`, returns the evaluation of the function :math:`f(x)` and its gradient :math:`\nabla f(x)`. proximal_fun2 : callable Method that, given :math:`x` and a parameter :math:`\lambda`, returns the evaluation of the function :math:`\text{prox}^g_\lambda(x)`. args : tuple, optional Arguments that should be given to the callable :meth:`fun1` besides the array containing the current value of :math:`x`. options : dict, optional, default: `None` Dictionary containing several options to customize the proximal gradient method and its stopping criteria. These options include: - 'algorithm': str, default: `'ISTA`', the type of algorithm used (`'ISTA'` or `'FISTA'`). - 'step_size': float, default: `1`, the step size of the gradient descent. - 'nit_max': int, default: `50`, the maximal number of iterations. callback : callable, optional Method called at each optimization iteration. The method should take as sole input the array containing the current value of :math:`x`. Returns ------- x: ~numpy.ndarray Optimal value :math:`x^*`. f: float Evaluation of the function :math:`f` at the optimal value :math:`f(x^*)`. df: ~numpy.ndarray Evaluation of the gradient of the function :math:`f` at the optimal value :math:`\nabla f(x^*)`. nit: int The number of iterations needed to converge. """ # TO DO # Improve stopping criteria, add ftol and gtol. # Add exceptions to check input types. if not isinstance(args, tuple): args = (args,) if options is None: options = {} if 'nit_max' in options.keys(): nit_max = options['nit_max'] else: nit_max = 50 if 'algorithm' in options.keys(): algorithm = options['algorithm'] else: algorithm = 'ISTA' if 'step size' in options.keys(): step_size = options['step size'] else: step_size = 1 if callback is None: def callback(x): pass # initilization of the iteration number, of x, the stopping criteria and the previous evaluation of the function x = np.copy(x0) nit = 0 flag_convergence = False # if the used algorithm is the FISTA algorithm # the step size should be 1/L with L the Lipschitz constant in the case of the FISTA algorithm ! if algorithm == 'FISTA': # initialization of the inertial coefficient t = 1 x_old = np.copy(x) # loop until the stopping criteria is reached while not (flag_convergence): # store the inertial coefficient at the previous iteration # and computation of the inertial coefficient at the current iteration (see Beck and Teboule, 2009) told = np.copy(t) t = 0.5 * (1 + np.sqrt(1 + 4 * told**2)) alpha = (told - 1) / t # adding the inertial terme to x z = x + alpha * (x - x_old) x_old = np.copy(x) # computation of the differentiable function f and its gradient f, df = fun1(z, *args) # computation of the proximal part of the algorithm and update of x x = proximal_fun2(z - step_size * df, step_size) # call the callback function and update the iteration number callback(x) nit += 1 # update the flag of the stopping criteria and the previous evaluation of the function flag_ite = nit >= nit_max flag_convergence = flag_ite # if the used algorithm is the ISTA algorithm elif algorithm == 'ISTA': # loop until the stopping criteria is reached f_old = np.inf while not (flag_convergence): # computation of the differentiable function f and its gradient f, df = fun1(x, *args) if f > f_old: raise ValueError( "The cost function at the current iteration is larger than the cost function at the previous iteration." + " This is likely to be du to a too large step." ) # computation of the proximal part of the algorithm and update of x x = proximal_fun2(x - step_size * df, step_size) # call the callback function and update the iteration number callback(x) nit += 1 # update the flag of the stopping criteria and the previous evaluation of the function flag_ite = nit >= nit_max flag_convergence = flag_ite f_old = np.copy(f) else: raise ValueError("The given type of algorithm has not been implemented. It should be ISTA or FISTA ") return x, f, df, nit