{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from __future__ import division, print_function\n", "import numpy as np\n", "from matplotlib import pyplot as plt " ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# NOTE: make sure that for the following, the\n", "# input x is either a 1-dimensional numpy.array\n", "# or a list \n", "\n", "# sphere function\n", "def sphere(x, alpha=0.5):\n", " return alpha * np.sum(np.array(x)**2)\n", "\n", "# derivative of sphere function\n", "def sphere_der(x, alpha=0.5):\n", " return 2*alpha*np.array(x)\n", "\n", "# ellipsoid with condition number 10^alpha\n", "# make sure to have python 3 or 'division' imported\n", "def elli(x, alpha=1):\n", " s = np.array(x)\n", " n = len(s) # problem dimension\n", " sum = 0\n", " for i in range(n):\n", " sum = sum + s[i]*s[i]*(10**alpha)**(i/(n-1))\n", " sum = sum / 2\n", " return sum\n", "\n", "# derivative of ellipsoid function\n", "def elli_der(x, alpha=1):\n", " s = np.array(x)\n", " n = len(s) # problem dimension\n", " der = np.zeros(n) \n", " for i in range(n):\n", " der[i] = s[i] * (10**alpha)**(i/(n-1))\n", " return der\n", "\n", "# Hessian matrix of ellipsoid function at solution x\n", "def elli_hess(x, alpha=1):\n", " n = len(x)\n", " H = np.eye(n)\n", " for i in range(n):\n", " H[i,i] = (10**alpha)**(i/(n-1))\n", " return H\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def gradientdescent_fixedstepsize(func, grad, xinit, sigma, numfevals, alpha=1):\n", " \"\"\" Gradient descent with fixed step size.\n", " \n", " Input parameters:\n", " - func: objective function\n", " - grad: gradient of objective function \n", " - xinit: initial search point (dimension of the problem = number of variables in xinit)\n", " - sigma: constant stepsize used throughout the optimization\n", " - numfevals: number of function evaluations until stop\n", " - alpha: potential parameter of objective and gradient functions\n", " \n", " \"\"\"\n", " # storing all solutions and function values:\n", " solutions = []\n", " functionvalues = []\n", " bestsofarvalues = []\n", " \n", " # initialization:\n", " x = xinit\n", " \n", " solutions.append(x)\n", " functionvalues.append(func(x, alpha))\n", " bestsofarvalues.append(func(x, alpha))\n", " \n", " t = 1\n", " while (t < numfevals) and (np.linalg.norm(grad(x, alpha)) > 1e-12):\n", " x = x - sigma * grad(x, alpha)\n", " \n", " solutions.append(x)\n", " feval = func(x, alpha)\n", " functionvalues.append(feval)\n", " if feval < bestsofarvalues[-1]:\n", " bestsofarvalues.append(feval)\n", " else:\n", " bestsofarvalues.append(bestsofarvalues[-1])\n", " \n", " t = t + 1\n", " \n", " return (solutions, functionvalues, bestsofarvalues)\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 10. 10. ]\n", " [ 9. 9. ]\n", " [ 8.1 8.1 ]\n", " [ 7.29 7.29 ]\n", " [ 6.561 6.561]]\n", "...\n", "[[ 1.01128294e-12 1.01128294e-12]\n", " [ 9.10154646e-13 9.10154646e-13]\n", " [ 8.19139181e-13 8.19139181e-13]\n", " [ 7.37225263e-13 7.37225263e-13]]\n", "number of function evaluations: 289\n", "[[ -5. 10. ]\n", " [ -4.5 9. ]\n", " [ -4.05 8.1 ]\n", " [ -3.645 7.29 ]\n", " [ -3.2805 6.561 ]]\n", "...\n", "[[ -6.24248728e-13 1.24849746e-12]\n", " [ -5.61823855e-13 1.12364771e-12]\n", " [ -5.05641470e-13 1.01128294e-12]\n", " [ -4.55077323e-13 9.10154646e-13]]\n", "number of function evaluations: 287\n" ] } ], "source": [ "# test gradient descent with fixed step size:\n", "n = 2\n", "xfixed = 10 * np.ones(n)\n", "#xfixed = [-5, 10]\n", "numfevals = 1e6\n", "sigma = 0.1\n", "alpha = 0.5\n", "(sols, functs, bestsofars) = gradientdescent_fixedstepsize(sphere, sphere_der, xfixed, sigma, numfevals, alpha)\n", "sss = np.array(sols)\n", "print(sss[0:5,:])\n", "print('...')\n", "print(sss[-5:-1,:])\n", "print(\"number of function evaluations: %d\" % len(functs))\n", "\n", "# second run from another initial search point:\n", "xfixed = [-5, 10]\n", "(sols, functs, bestsofars) = gradientdescent_fixedstepsize(sphere, sphere_der, xfixed, sigma, numfevals, alpha)\n", "ttt = np.array(sols)\n", "print(ttt[0:5,:])\n", "print('...')\n", "print(ttt[-5:-1,:])\n", "print(\"number of function evaluations: %d\" % len(functs))\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.figure(figsize=(16,10))\n", "plt.plot(sss[:,0], sss[:,1], 'x--')\n", "plt.plot(ttt[:,0], ttt[:,1], 'o--')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 10. 10. 10. 10. 10. 10. 10. 10. 10. 10. ]\n", " [ 9. 9. 9. 9. 9. 9. 9. 9. 9. 9. ]\n", " [ 8.1 8.1 8.1 8.1 8.1 8.1 8.1 8.1 8.1\n", " 8.1 ]\n", " [ 7.29 7.29 7.29 7.29 7.29 7.29 7.29 7.29 7.29\n", " 7.29 ]\n", " [ 6.561 6.561 6.561 6.561 6.561 6.561 6.561 6.561 6.561\n", " 6.561]]\n", "...\n", "[[ 4.35324146e-13 4.35324146e-13 4.35324146e-13 4.35324146e-13\n", " 4.35324146e-13 4.35324146e-13 4.35324146e-13 4.35324146e-13\n", " 4.35324146e-13 4.35324146e-13]\n", " [ 3.91791731e-13 3.91791731e-13 3.91791731e-13 3.91791731e-13\n", " 3.91791731e-13 3.91791731e-13 3.91791731e-13 3.91791731e-13\n", " 3.91791731e-13 3.91791731e-13]\n", " [ 3.52612558e-13 3.52612558e-13 3.52612558e-13 3.52612558e-13\n", " 3.52612558e-13 3.52612558e-13 3.52612558e-13 3.52612558e-13\n", " 3.52612558e-13 3.52612558e-13]\n", " [ 3.17351302e-13 3.17351302e-13 3.17351302e-13 3.17351302e-13\n", " 3.17351302e-13 3.17351302e-13 3.17351302e-13 3.17351302e-13\n", " 3.17351302e-13 3.17351302e-13]]\n", "number of function evaluations: 297\n" ] } ], "source": [ "# test gradient descent with fixed step size:\n", "n = 10\n", "xfixed = 10 * np.ones(n)\n", "#xfixed = [-5, 10]\n", "numfevals = 1e6\n", "sigma = 0.1\n", "alpha = 0.5 # 1/2, 1/20, 1/200\n", "(sols, functs, bestsofars) = gradientdescent_fixedstepsize(sphere, sphere_der, xfixed, sigma, numfevals, alpha)\n", "sss = np.array(sols)\n", "print(sss[0:5,:])\n", "print('...')\n", "print(sss[-5:-1,:])\n", "print(\"number of function evaluations: %d\" % len(functs))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def line_search_Armijorule(x, d, func, f_x, grad_x, sigma, alpha=0.5):\n", " \"\"\" This line search algorithm gives back a tuple \n", " (xt, fxt, sigma, numevals) with the chosen solution xt\n", " its function value fxt = func(xt), the chosen\n", " stepsize and the number of used function\n", " evaluations when optimizing along a search space\n", " direction d from search point x.\n", " 'func' and 'grad' give the objective function and\n", " its gradient respectively where alpha is their\n", " (optional) parameter. f_x and grad_x give the function\n", " value and gradient at the initial starting point x\n", " in order to not compute it again here.\n", " \"\"\"\n", " theta = 0.5 # internal parameter in [0,1]\n", " beta = 0.5 # internal parameter in [0,1]\n", " \n", " t = theta * np.dot(d, grad_x)\n", " \n", " fxt = func(x + sigma*d, alpha)\n", " ftotal = 1 # number of used function evaluations\n", " \n", " while (fxt > f_x + sigma * t):\n", " sigma = beta * sigma\n", " fxt = func(x + sigma*d, alpha)\n", " ftotal = ftotal + 1\n", " \n", " return (x + sigma*d, fxt, sigma, ftotal)\n", "\n", "\n", "\n", "def gradientdescent_Armijorule(func, grad, x, sigma, numfevals, alpha):\n", " \"\"\" Gradient descent with Armijo rule to find right step size.\n", " \n", " Input parameters:\n", " - func: objective function\n", " - grad: gradient of objective function \n", " - x: initial search point (dimension of the problem = number of variables in x)\n", " - sigma: initial stepsize\n", " - numfevals: number of function evaluations until stop\n", " \n", " \"\"\"\n", " # storing all solutions and function values:\n", " solutions = [x]\n", " functionvalues = [func(x, alpha)]\n", " funevals = [1]\n", " \n", " while funevals[-1] < numfevals:\n", " \n", " grad_x = grad(x, alpha)\n", " if np.linalg.norm(grad_x) < 1e-12:\n", " break\n", "\n", " # Armijo rule line search, giving back the proposed stepsize and\n", " # the number of used function evaluations when optimizing\n", " # along a search space direction d = -grad(x, alpha)\n", " # from search point x with an initial stepsize of 10.\n", " (x, fval, stepsize, numfunevalsArmijo) = line_search_Armijorule(x, -grad_x, func,\n", " functionvalues[-1], grad_x,\n", " sigma, alpha=alpha)\n", " \n", " solutions.append(x)\n", " functionvalues.append(fval) \n", " funevals.append(funevals[-1] + numfunevalsArmijo)\n", " \n", " return (solutions, functionvalues, funevals)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 10. 10. ]\n", " [ 9. 9. ]\n", " [ 8.1 8.1 ]\n", " [ 7.29 7.29 ]\n", " [ 6.561 6.561]]\n", "...\n", "[[ 1.01128294e-12 1.01128294e-12]\n", " [ 9.10154646e-13 9.10154646e-13]\n", " [ 8.19139181e-13 8.19139181e-13]\n", " [ 7.37225263e-13 7.37225263e-13]]\n", "number of function evaluations: 289\n" ] } ], "source": [ "# test gradient descent with Armijo rule for step size:\n", "lb = -5\n", "ub = 5\n", "n = 2\n", "#xfixed = (ub-lb) * nprand.rand(n) + lb\n", "xfixed = 10 * np.ones(n)\n", "numfevals = 1e6\n", "sigma = 0.1\n", "alpha = 0.5 # 0.5, 0.05, 0.005\n", "\n", "(sols, functs, FEs) = gradientdescent_Armijorule(sphere, sphere_der, xfixed, sigma, numfevals, alpha)\n", "aaa = np.array(sols)\n", "print(aaa[0:5,:])\n", "print('...')\n", "print(aaa[-5:-1,:])\n", "print(\"number of function evaluations: %d\" % FEs[-1])\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Armijo rule:\n", "[[ 10. 10. ]\n", " [ 9. 9. ]\n", " [ 8.1 8.1]]\n", "...\n", "[[ 8.19139181e-13 8.19139181e-13]\n", " [ 7.37225263e-13 7.37225263e-13]]\n", "number of function evaluations for Armijo: 289\n", "-------\n", "Gradient descent:\n", "[[ 10. 10. ]\n", " [ 9. 9. ]\n", " [ 8.1 8.1]]\n", "...\n", "[[ 8.19139181e-13 8.19139181e-13]\n", " [ 7.37225263e-13 7.37225263e-13]]\n", "number of function evaluations for gradient descent: 289\n" ] } ], "source": [ "# compare both descent algorithms on sphere function:\n", "n = 2\n", "xfixed = 10 * np.ones(n)\n", "numfevals = 1e6\n", "sigma = 0.1 # 0.1, 0.7, 1.7\n", "alpha = 0.5\n", "\n", "(sols, functs, FEs) = gradientdescent_Armijorule(sphere, sphere_der, xfixed, sigma, numfevals, alpha)\n", "arm = np.array(sols)\n", "print(\"Armijo rule:\")\n", "print(arm[0:3,:])\n", "print('...')\n", "print(arm[-3:-1,:])\n", "print(\"number of function evaluations for Armijo: %d\" % FEs[-1])\n", "print(\"-------\")\n", "(sols, functs, bestsofars) = gradientdescent_fixedstepsize(sphere, sphere_der, xfixed, sigma, numfevals, alpha)\n", "fss = np.array(sols)\n", "print(\"Gradient descent:\")\n", "print(fss[0:3,:])\n", "print('...')\n", "print(fss[-3:-1,:])\n", "print(\"number of function evaluations for gradient descent: %d\" % len(functs))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.plot(fss[:,0], fss[:,1], 'xb--')\n", "plt.plot(arm[:,0], arm[:,1], 'or--')\n", "\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def newtondescent_Armijorule(func, grad, hess, x, sigma, numfevals, alpha=0.5):\n", " \"\"\" Newton descent with Armijo rule to find right step size.\n", " \n", " Input parameters:\n", " - func: objective function\n", " - gradient of objective function \n", " - Hessian of objective function\n", " - x: initial search point (dimension of the problem = number of variables in x)\n", " - sigma: initial stepsize\n", " - numfevals: number of function evaluations until stop\n", " - alpha: parameter of objective and gradient functions\n", " \n", " \"\"\"\n", " # storing all solutions and function values:\n", " solutions = [x]\n", " functionvalues = [func(x, alpha=alpha)]\n", " funevals = [1]\n", " \n", " t = 1\n", " while (t < numfevals) and (np.linalg.norm(grad(x, alpha=alpha)) > 1e-12):\n", " inverseofhessianatx = np.linalg.inv(hess(x, alpha=alpha))\n", " grad_x = grad(x, alpha=alpha)\n", " (x, feval, stepsize, numfunevalsArmijo) = line_search_Armijorule(x, np.dot(-inverseofhessianatx, grad_x),\n", " func, functionvalues[-1], grad_x,\n", " sigma, alpha=alpha)\n", " \n", " solutions.append(x)\n", " functionvalues.append(feval) \n", " funevals.append(funevals[-1] + numfunevalsArmijo)\n", " t = t + numfunevalsArmijo\n", " \n", " return (solutions, functionvalues, funevals)\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Armijo rule, gradient descent:\n", "[[ 10. 10. ]\n", " [ 9.9375 3.75 ]\n", " [ 9.87539062 1.40625 ]]\n", "...\n", "[[ 9.38907590e-13 2.89035551e-14]\n", " [ 9.33039417e-13 1.08388332e-14]]\n", "number of function evaluations for gradient descent (with Armijo rule): 2468\n", "number of gradient evaluations for gradient descent (with Armijo rule): 741\n", "-------\n", "Armijo rule, Newton descent:\n", "[[ 10. 10. ]\n", " [ 9. 9. ]\n", " [ 8.1 8.1]]\n", "...\n", "[[ 1.21076003e-14 1.21076003e-14]\n", " [ 1.08968403e-14 1.08968403e-14]]\n", "number of function evaluations for Newton descent (with Armijo rule): 329\n", "number of gradient evaluations for Newton descent (with Armijo rule): 329\n" ] } ], "source": [ "# compare both Armijo rule descent algorithms on ellipsoid function:\n", "n = 2\n", "xfixed = 10 * np.ones(n)\n", "#xfixed = [1,3]\n", "\n", "numfevals = 1e6\n", "sigma = 0.1\n", "alpha = 2 # 2, 3, 4\n", "\n", "(sols, functs, FEs) = gradientdescent_Armijorule(elli, elli_der, xfixed, sigma, numfevals, alpha)\n", "grd = np.array(sols)\n", "print(\"Armijo rule, gradient descent:\")\n", "print(grd[0:3,:])\n", "print('...')\n", "print(grd[-3:-1,:])\n", "print(\"number of function evaluations for gradient descent (with Armijo rule): %d\" % FEs[-1])\n", "print(\"number of gradient evaluations for gradient descent (with Armijo rule): %d\" % len(FEs))\n", "print(\"-------\")\n", "(sols, functs, FEs) = newtondescent_Armijorule(elli, elli_der, elli_hess, xfixed, sigma, numfevals, alpha)\n", "nwt = np.array(sols)\n", "print(\"Armijo rule, Newton descent:\")\n", "print(nwt[0:3,:])\n", "print('...')\n", "print(nwt[-3:-1,:])\n", "print(\"number of function evaluations for Newton descent (with Armijo rule): %d\" % FEs[-1])\n", "print(\"number of gradient evaluations for Newton descent (with Armijo rule): %d\" % len(FEs))" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.plot(grd[:,0], grd[:,1], 'xb--', label='Gradient')\n", "plt.plot(nwt[:,0], nwt[:,1], 'or--', label='Newton')\n", "plt.legend(loc='upper left')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" } }, "nbformat": 4, "nbformat_minor": 0 }