{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from __future__ import division, print_function\n",
    "import numpy as np\n",
    "from matplotlib import pyplot as plt "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# NOTE: make sure that for the following, the\n",
    "# input x is either a 1-dimensional numpy.array\n",
    "# or a list \n",
    "\n",
    "# sphere function\n",
    "def sphere(x, alpha=0.5):\n",
    "    return alpha * np.sum(np.array(x)**2)\n",
    "\n",
    "# derivative of sphere function\n",
    "def sphere_der(x, alpha=0.5):\n",
    "    return 2*alpha*np.array(x)\n",
    "\n",
    "# ellipsoid with condition number 10^alpha\n",
    "# make sure to have python 3 or 'division' imported\n",
    "def elli(x, alpha=1):\n",
    "    s = np.array(x)\n",
    "    n = len(s)   # problem dimension\n",
    "    sum = 0\n",
    "    for i in range(n):\n",
    "        sum = sum + s[i]*s[i]*(10**alpha)**(i/(n-1))\n",
    "    sum = sum / 2\n",
    "    return sum\n",
    "\n",
    "# derivative of ellipsoid function\n",
    "def elli_der(x, alpha=1):\n",
    "    s = np.array(x)\n",
    "    n = len(s)   # problem dimension\n",
    "    der = np.zeros(n)    \n",
    "    for i in range(n):\n",
    "        der[i] = s[i] * (10**alpha)**(i/(n-1))\n",
    "    return der\n",
    "\n",
    "# Hessian matrix of ellipsoid function at solution x\n",
    "def elli_hess(x, alpha=1):\n",
    "    n = len(x)\n",
    "    H = np.eye(n)\n",
    "    for i in range(n):\n",
    "        H[i,i] = (10**alpha)**(i/(n-1))\n",
    "    return H\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def gradientdescent_fixedstepsize(func, grad, xinit, sigma, numfevals, alpha=1):\n",
    "    \"\"\" Gradient descent with fixed step size.\n",
    "    \n",
    "    Input parameters:\n",
    "       - func: objective function\n",
    "       - grad: gradient of objective function \n",
    "       - xinit: initial search point (dimension of the problem = number of variables in xinit)\n",
    "       - sigma: constant stepsize used throughout the optimization\n",
    "       - numfevals: number of function evaluations until stop\n",
    "       - alpha: potential parameter of objective and gradient functions\n",
    "        \n",
    "    \"\"\"\n",
    "    # storing all solutions and function values:\n",
    "    solutions = []\n",
    "    functionvalues = []\n",
    "    bestsofarvalues = []\n",
    "    \n",
    "    # initialization:\n",
    "    x = xinit\n",
    "    \n",
    "    solutions.append(x)\n",
    "    functionvalues.append(func(x, alpha))\n",
    "    bestsofarvalues.append(func(x, alpha))\n",
    "    \n",
    "    t = 1\n",
    "    while (t < numfevals) and (np.linalg.norm(grad(x, alpha)) > 1e-12):\n",
    "        x = x - sigma * grad(x, alpha)\n",
    "        \n",
    "        solutions.append(x)\n",
    "        feval = func(x, alpha)\n",
    "        functionvalues.append(feval)\n",
    "        if feval < bestsofarvalues[-1]:\n",
    "            bestsofarvalues.append(feval)\n",
    "        else:\n",
    "            bestsofarvalues.append(bestsofarvalues[-1])\n",
    "        \n",
    "        t = t + 1\n",
    "    \n",
    "    return (solutions, functionvalues, bestsofarvalues)\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 10.     10.   ]\n",
      " [  9.      9.   ]\n",
      " [  8.1     8.1  ]\n",
      " [  7.29    7.29 ]\n",
      " [  6.561   6.561]]\n",
      "...\n",
      "[[  1.01128294e-12   1.01128294e-12]\n",
      " [  9.10154646e-13   9.10154646e-13]\n",
      " [  8.19139181e-13   8.19139181e-13]\n",
      " [  7.37225263e-13   7.37225263e-13]]\n",
      "number of function evaluations: 289\n",
      "[[ -5.      10.    ]\n",
      " [ -4.5      9.    ]\n",
      " [ -4.05     8.1   ]\n",
      " [ -3.645    7.29  ]\n",
      " [ -3.2805   6.561 ]]\n",
      "...\n",
      "[[ -6.24248728e-13   1.24849746e-12]\n",
      " [ -5.61823855e-13   1.12364771e-12]\n",
      " [ -5.05641470e-13   1.01128294e-12]\n",
      " [ -4.55077323e-13   9.10154646e-13]]\n",
      "number of function evaluations: 287\n"
     ]
    }
   ],
   "source": [
    "# test gradient descent with fixed step size:\n",
    "n = 2\n",
    "xfixed = 10 * np.ones(n)\n",
    "#xfixed = [-5, 10]\n",
    "numfevals = 1e6\n",
    "sigma = 0.1\n",
    "alpha = 0.5\n",
    "(sols, functs, bestsofars) = gradientdescent_fixedstepsize(sphere, sphere_der, xfixed, sigma, numfevals, alpha)\n",
    "sss = np.array(sols)\n",
    "print(sss[0:5,:])\n",
    "print('...')\n",
    "print(sss[-5:-1,:])\n",
    "print(\"number of function evaluations: %d\" % len(functs))\n",
    "\n",
    "# second run from another initial search point:\n",
    "xfixed = [-5, 10]\n",
    "(sols, functs, bestsofars) = gradientdescent_fixedstepsize(sphere, sphere_der, xfixed, sigma, numfevals, alpha)\n",
    "ttt = np.array(sols)\n",
    "print(ttt[0:5,:])\n",
    "print('...')\n",
    "print(ttt[-5:-1,:])\n",
    "print(\"number of function evaluations: %d\" % len(functs))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "plt.figure(figsize=(16,10))\n",
    "plt.plot(sss[:,0], sss[:,1], 'x--')\n",
    "plt.plot(ttt[:,0], ttt[:,1], 'o--')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 10.     10.     10.     10.     10.     10.     10.     10.     10.     10.   ]\n",
      " [  9.      9.      9.      9.      9.      9.      9.      9.      9.      9.   ]\n",
      " [  8.1     8.1     8.1     8.1     8.1     8.1     8.1     8.1     8.1\n",
      "    8.1  ]\n",
      " [  7.29    7.29    7.29    7.29    7.29    7.29    7.29    7.29    7.29\n",
      "    7.29 ]\n",
      " [  6.561   6.561   6.561   6.561   6.561   6.561   6.561   6.561   6.561\n",
      "    6.561]]\n",
      "...\n",
      "[[  4.35324146e-13   4.35324146e-13   4.35324146e-13   4.35324146e-13\n",
      "    4.35324146e-13   4.35324146e-13   4.35324146e-13   4.35324146e-13\n",
      "    4.35324146e-13   4.35324146e-13]\n",
      " [  3.91791731e-13   3.91791731e-13   3.91791731e-13   3.91791731e-13\n",
      "    3.91791731e-13   3.91791731e-13   3.91791731e-13   3.91791731e-13\n",
      "    3.91791731e-13   3.91791731e-13]\n",
      " [  3.52612558e-13   3.52612558e-13   3.52612558e-13   3.52612558e-13\n",
      "    3.52612558e-13   3.52612558e-13   3.52612558e-13   3.52612558e-13\n",
      "    3.52612558e-13   3.52612558e-13]\n",
      " [  3.17351302e-13   3.17351302e-13   3.17351302e-13   3.17351302e-13\n",
      "    3.17351302e-13   3.17351302e-13   3.17351302e-13   3.17351302e-13\n",
      "    3.17351302e-13   3.17351302e-13]]\n",
      "number of function evaluations: 297\n"
     ]
    }
   ],
   "source": [
    "# test gradient descent with fixed step size:\n",
    "n = 10\n",
    "xfixed = 10 * np.ones(n)\n",
    "#xfixed = [-5, 10]\n",
    "numfevals = 1e6\n",
    "sigma = 0.1\n",
    "alpha = 0.5 # 1/2, 1/20, 1/200\n",
    "(sols, functs, bestsofars) = gradientdescent_fixedstepsize(sphere, sphere_der, xfixed, sigma, numfevals, alpha)\n",
    "sss = np.array(sols)\n",
    "print(sss[0:5,:])\n",
    "print('...')\n",
    "print(sss[-5:-1,:])\n",
    "print(\"number of function evaluations: %d\" % len(functs))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def line_search_Armijorule(x, d, func, f_x, grad_x, sigma, alpha=0.5):\n",
    "    \"\"\" This line search algorithm gives back a tuple \n",
    "        (xt, fxt, sigma, numevals) with the chosen solution xt\n",
    "        its function value fxt = func(xt), the chosen\n",
    "        stepsize and the number of used function\n",
    "        evaluations when optimizing along a search space\n",
    "        direction d from search point x.\n",
    "        'func' and 'grad' give the objective function and\n",
    "        its gradient respectively where alpha is their\n",
    "        (optional) parameter. f_x and grad_x give the function\n",
    "        value and gradient at the initial starting point x\n",
    "        in order to not compute it again here.\n",
    "    \"\"\"\n",
    "    theta = 0.5   # internal parameter in [0,1]\n",
    "    beta = 0.5 # internal parameter in [0,1]\n",
    "    \n",
    "    t = theta * np.dot(d, grad_x)\n",
    "    \n",
    "    fxt = func(x + sigma*d, alpha)\n",
    "    ftotal = 1   # number of used function evaluations\n",
    "    \n",
    "    while (fxt > f_x + sigma * t):\n",
    "        sigma = beta * sigma\n",
    "        fxt = func(x + sigma*d, alpha)\n",
    "        ftotal = ftotal + 1\n",
    "        \n",
    "    return (x + sigma*d, fxt, sigma, ftotal)\n",
    "\n",
    "\n",
    "\n",
    "def gradientdescent_Armijorule(func, grad, x, sigma, numfevals, alpha):\n",
    "    \"\"\" Gradient descent with Armijo rule to find right step size.\n",
    "    \n",
    "    Input parameters:\n",
    "       - func: objective function\n",
    "       - grad: gradient of objective function \n",
    "       - x: initial search point (dimension of the problem = number of variables in x)\n",
    "       - sigma: initial stepsize\n",
    "       - numfevals: number of function evaluations until stop\n",
    "        \n",
    "    \"\"\"\n",
    "    # storing all solutions and function values:\n",
    "    solutions = [x]\n",
    "    functionvalues = [func(x, alpha)]\n",
    "    funevals = [1]\n",
    "    \n",
    "    while funevals[-1] < numfevals:\n",
    "        \n",
    "        grad_x = grad(x, alpha)\n",
    "        if np.linalg.norm(grad_x) < 1e-12:\n",
    "              break\n",
    "\n",
    "        # Armijo rule line search, giving back the proposed stepsize and\n",
    "        # the number of used function evaluations when optimizing\n",
    "        # along a search space direction d = -grad(x, alpha)\n",
    "        # from search point x with an initial stepsize of 10.\n",
    "        (x, fval, stepsize, numfunevalsArmijo) = line_search_Armijorule(x, -grad_x, func,\n",
    "                                                                        functionvalues[-1], grad_x,\n",
    "                                                                        sigma, alpha=alpha)\n",
    "        \n",
    "        solutions.append(x)\n",
    "        functionvalues.append(fval)     \n",
    "        funevals.append(funevals[-1] + numfunevalsArmijo)\n",
    "    \n",
    "    return (solutions, functionvalues, funevals)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 10.     10.   ]\n",
      " [  9.      9.   ]\n",
      " [  8.1     8.1  ]\n",
      " [  7.29    7.29 ]\n",
      " [  6.561   6.561]]\n",
      "...\n",
      "[[  1.01128294e-12   1.01128294e-12]\n",
      " [  9.10154646e-13   9.10154646e-13]\n",
      " [  8.19139181e-13   8.19139181e-13]\n",
      " [  7.37225263e-13   7.37225263e-13]]\n",
      "number of function evaluations: 289\n"
     ]
    }
   ],
   "source": [
    "# test gradient descent with Armijo rule for step size:\n",
    "lb = -5\n",
    "ub = 5\n",
    "n = 2\n",
    "#xfixed = (ub-lb) * nprand.rand(n) + lb\n",
    "xfixed = 10 * np.ones(n)\n",
    "numfevals = 1e6\n",
    "sigma = 0.1\n",
    "alpha = 0.5 # 0.5, 0.05, 0.005\n",
    "\n",
    "(sols, functs, FEs) = gradientdescent_Armijorule(sphere, sphere_der, xfixed, sigma, numfevals, alpha)\n",
    "aaa = np.array(sols)\n",
    "print(aaa[0:5,:])\n",
    "print('...')\n",
    "print(aaa[-5:-1,:])\n",
    "print(\"number of function evaluations: %d\" % FEs[-1])\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Armijo rule:\n",
      "[[ 10.   10. ]\n",
      " [  9.    9. ]\n",
      " [  8.1   8.1]]\n",
      "...\n",
      "[[  8.19139181e-13   8.19139181e-13]\n",
      " [  7.37225263e-13   7.37225263e-13]]\n",
      "number of function evaluations for Armijo: 289\n",
      "-------\n",
      "Gradient descent:\n",
      "[[ 10.   10. ]\n",
      " [  9.    9. ]\n",
      " [  8.1   8.1]]\n",
      "...\n",
      "[[  8.19139181e-13   8.19139181e-13]\n",
      " [  7.37225263e-13   7.37225263e-13]]\n",
      "number of function evaluations for gradient descent: 289\n"
     ]
    }
   ],
   "source": [
    "# compare both descent algorithms on sphere function:\n",
    "n = 2\n",
    "xfixed = 10 * np.ones(n)\n",
    "numfevals = 1e6\n",
    "sigma = 0.1 # 0.1, 0.7, 1.7\n",
    "alpha = 0.5\n",
    "\n",
    "(sols, functs, FEs) = gradientdescent_Armijorule(sphere, sphere_der, xfixed, sigma, numfevals, alpha)\n",
    "arm = np.array(sols)\n",
    "print(\"Armijo rule:\")\n",
    "print(arm[0:3,:])\n",
    "print('...')\n",
    "print(arm[-3:-1,:])\n",
    "print(\"number of function evaluations for Armijo: %d\" % FEs[-1])\n",
    "print(\"-------\")\n",
    "(sols, functs, bestsofars) = gradientdescent_fixedstepsize(sphere, sphere_der, xfixed, sigma, numfevals, alpha)\n",
    "fss = np.array(sols)\n",
    "print(\"Gradient descent:\")\n",
    "print(fss[0:3,:])\n",
    "print('...')\n",
    "print(fss[-3:-1,:])\n",
    "print(\"number of function evaluations for gradient descent: %d\" % len(functs))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "plt.plot(fss[:,0], fss[:,1], 'xb--')\n",
    "plt.plot(arm[:,0], arm[:,1], 'or--')\n",
    "\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def newtondescent_Armijorule(func, grad, hess, x, sigma, numfevals, alpha=0.5):\n",
    "    \"\"\" Newton descent with Armijo rule to find right step size.\n",
    "    \n",
    "    Input parameters:\n",
    "       - func: objective function\n",
    "       - gradient of objective function \n",
    "       - Hessian of objective function\n",
    "       - x: initial search point (dimension of the problem = number of variables in x)\n",
    "       - sigma: initial stepsize\n",
    "       - numfevals: number of function evaluations until stop\n",
    "       - alpha: parameter of objective and gradient functions\n",
    "        \n",
    "    \"\"\"\n",
    "    # storing all solutions and function values:\n",
    "    solutions = [x]\n",
    "    functionvalues = [func(x, alpha=alpha)]\n",
    "    funevals = [1]\n",
    "    \n",
    "    t = 1\n",
    "    while (t < numfevals) and (np.linalg.norm(grad(x, alpha=alpha)) > 1e-12):\n",
    "        inverseofhessianatx = np.linalg.inv(hess(x, alpha=alpha))\n",
    "        grad_x = grad(x, alpha=alpha)\n",
    "        (x, feval, stepsize, numfunevalsArmijo) = line_search_Armijorule(x, np.dot(-inverseofhessianatx, grad_x),\n",
    "                                                                         func, functionvalues[-1], grad_x,\n",
    "                                                                         sigma, alpha=alpha)\n",
    "        \n",
    "        solutions.append(x)\n",
    "        functionvalues.append(feval)        \n",
    "        funevals.append(funevals[-1] + numfunevalsArmijo)\n",
    "        t = t + numfunevalsArmijo\n",
    "    \n",
    "    return (solutions, functionvalues, funevals)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Armijo rule, gradient descent:\n",
      "[[ 10.          10.        ]\n",
      " [  9.9375       3.75      ]\n",
      " [  9.87539062   1.40625   ]]\n",
      "...\n",
      "[[  9.38907590e-13   2.89035551e-14]\n",
      " [  9.33039417e-13   1.08388332e-14]]\n",
      "number of function evaluations for gradient descent (with Armijo rule): 2468\n",
      "number of gradient evaluations for gradient descent (with Armijo rule): 741\n",
      "-------\n",
      "Armijo rule, Newton descent:\n",
      "[[ 10.   10. ]\n",
      " [  9.    9. ]\n",
      " [  8.1   8.1]]\n",
      "...\n",
      "[[  1.21076003e-14   1.21076003e-14]\n",
      " [  1.08968403e-14   1.08968403e-14]]\n",
      "number of function evaluations for Newton descent (with Armijo rule): 329\n",
      "number of gradient evaluations for Newton descent (with Armijo rule): 329\n"
     ]
    }
   ],
   "source": [
    "# compare both Armijo rule descent algorithms on ellipsoid function:\n",
    "n = 2\n",
    "xfixed = 10 * np.ones(n)\n",
    "#xfixed = [1,3]\n",
    "\n",
    "numfevals = 1e6\n",
    "sigma = 0.1\n",
    "alpha = 2 # 2, 3, 4\n",
    "\n",
    "(sols, functs, FEs) = gradientdescent_Armijorule(elli, elli_der, xfixed, sigma, numfevals, alpha)\n",
    "grd = np.array(sols)\n",
    "print(\"Armijo rule, gradient descent:\")\n",
    "print(grd[0:3,:])\n",
    "print('...')\n",
    "print(grd[-3:-1,:])\n",
    "print(\"number of function evaluations for gradient descent (with Armijo rule): %d\" % FEs[-1])\n",
    "print(\"number of gradient evaluations for gradient descent (with Armijo rule): %d\" % len(FEs))\n",
    "print(\"-------\")\n",
    "(sols, functs, FEs) = newtondescent_Armijorule(elli, elli_der, elli_hess, xfixed, sigma, numfevals, alpha)\n",
    "nwt = np.array(sols)\n",
    "print(\"Armijo rule, Newton descent:\")\n",
    "print(nwt[0:3,:])\n",
    "print('...')\n",
    "print(nwt[-3:-1,:])\n",
    "print(\"number of function evaluations for Newton descent (with Armijo rule): %d\" % FEs[-1])\n",
    "print(\"number of gradient evaluations for Newton descent (with Armijo rule): %d\" % len(FEs))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "plt.plot(grd[:,0], grd[:,1], 'xb--', label='Gradient')\n",
    "plt.plot(nwt[:,0], nwt[:,1], 'or--', label='Newton')\n",
    "plt.legend(loc='upper left')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}