#!/usr/bin/env python
"""Module barecmaes2 implements the CMA-ES without using `numpy`.  

The Covariance Matrix Adaptation Evolution Strategy, CMA-ES, serves for
nonlinear function minimization.

The **main functionality** is implemented in

  - class `CMAES` and
  - function `fmin()` that is a single-line-usage wrapper around `CMAES`.

This code has two **purposes**: 

1. it might be used for READING and UNDERSTANDING the basic flow and
   the details of the CMA-ES *algorithm*. The source code is meant to
   be read. For short, study the class `CMAES`, in particular its doc
   string and the code of the method `CMAES.tell()`, where all the real
   work is done in about 20 lines (see "def tell" in the source).
   Otherwise, reading from the top is a feasible option.

2. it might be used when the python module `numpy` is not available.
   When `numpy` is available, rather use `cma.py` (see http://www.lri
   .fr/~hansen/cmaes_inmatlab.html#python) to run serious simulations:
   the latter code has many more lines, but executes much faster
   (roughly ten times), offers a richer user interface, far better
   termination options, supposedly quite useful output, and automatic
   restarts.
    
Dependencies: `math.exp`, `math.log` and `random.normalvariate` (modules
`matplotlib.pylab` and `sys` are optional).

Testing: call ``python barecmaes2.py`` at the OS shell. Tested with 
Python 2.5 (only with removed import of print_function), 2.6, 2.7, 3.2. 
Small deviations between the versions are expected. 

URL: http://www.lri.fr/~hansen/barecmaes2.py

Last change: May, 2014, version 2.00

:Author: Nikolaus Hansen, 2010-2011, hansen[at-symbol]lri.fr
:License: This code is released into the public domain (that is, you may 
    use and modify it however you like). 

"""
# import future syntax as for Python version >= 3.0
from __future__ import division  # such that 0 != 1/2 == 0.5
from __future__ import print_function  # available since 2.6, not needed

from math import log, exp
from random import normalvariate as random_normalvariate
# see randn keyword argument to CMAES.__init__

# Optional imports, can be out-commented, if not available
import sys
try:
    import matplotlib.pylab as pylab
    pylab.ion()  # prevents that execution stops after plotting
    # for plotting, scitools.easyfiz might be an alternative
except ImportError:
    pylab = None
    print('  pylab could not be imported  ')

__version__ = '1.10'
__author__ = 'Nikolaus Hansen'
__docformat__ = 'reStructuredText'


def fmin(objectivefct, xstart, sigma, args=(),
         max_eval='1e3*N**2', ftarget=None,
         verb_disp=100, verb_log=1, verb_plot=100):
    """non-linear non-convex minimization procedure. 
    The functional interface to CMA-ES. 
    
    Parameters
    ==========
        `objectivefct`
            a function that takes as input a list of floats (like
            [3.0, 2.2, 1.1]) and returns a single float (a scalar).
            The objective is to find ``x`` with ``objectivefct(x)``
            to be as small as possible.
        `xstart`
            list of numbers (like `[3,2,1.2]`), initial solution vector
        `sigma`
            float, initial step-size, standard deviation in any coordinate
        `args`
            arguments to `objectivefct`
        `ftarget`
            float, target function value
        `max_eval`
            int or str, maximal number of function evaluations, a string
            is evaluated with N being the search space dimension
        `verb_disp`
            int, display on console every verb_disp iteration, 0 for never
        `verb_log`
            int, data logging every verb_log iteration, 0 for never
        `verb_plot`
            int, plot logged data every verb_plot iteration, 0 for never
            
    Returns
    =======
    ``return es.result() + (es.stop(), es, logger)``, that is, 
    ``(xbest, fbest, evalsbest, evals, iterations, xmean,``
    `` termination_condition, CMAES_object_instance, data_logger)``
 
    Example
    =======
    The following example minimizes the function `Fcts.elli`:: 
     
        >> import barecmaes2 as cma
        >> res = cma.fmin(cma.Fcts.elli, 10 * [0.5], 0.3, verb_disp=100)
        evals: ax-ratio max(std)   f-value
           10:     1.0  2.8e-01  198003.585517
         1000:     8.4  5.5e-02  95.9162313173
         2000:    40.5  3.6e-02  5.07618122556
         3000:   149.1  8.5e-03  0.271537247667
         4000:   302.2  4.2e-03  0.0623570374451
         5000:   681.1  5.9e-03  0.000485971681802
         6000:  1146.4  9.5e-06  5.26919100476e-10
         6510:  1009.1  2.3e-07  3.34128914738e-13
        termination by {'tolfun': 1e-12}
        best f-value = 3.34128914738e-13
        
        >> print(res[0])
        [2.1187532328944602e-07, 6.893386424102321e-08, -2.008255256456535e-09, 4.472078873398156e-09, -9.421306741003398e-09, 7.331265238205156e-09, 2.4804701814730273e-10, -6.030651566971234e-10, -6.063921614755129e-10, -1.066906137937511e-10]

        >> print(res[1])
        3.34128914738e-13

        >> res[-1].plot()  # needs pylab/matplotlib to be installed
        
    Details
    =======
    By default `fmin()` tries to plot some output. This works only with
    Python module `matplotlib` being installed. The first and last lines ::
    
        cma.fmin(cma.Fcts.elli, 10 * [0.5], 0.3, verb_plot=0)
        logger = cma.CMADataLogger()
        cma.CMAES(10 * [0.5], 0.3).optimize(cma.Fcts.elli, logger=logger)
        
    do the same. `fmin()` adds the possibility to plot *during* the
    execution.
            
    :See: `CMAES`, `OOOptimizer`
    
    """
    es = CMAES(xstart, sigma, max_eval, ftarget)  # new optimizer instance
    logger = CMAESDataLogger(verb_log).add(es, True)  # add data row
    while not es.stop():
        es.optimize(objectivefct, args=args,
                    iterations=verb_plot if verb_plot else None,
                    verb_disp=verb_disp,
                    logger=logger)
        if verb_plot:  # and es.counteval/es.lam % verb_plot == 0:
            logger.plot()

    return es.result() + (es.stop(), es, logger)


class OOOptimizer(object):
    """"abstract" base class for an OO optimizer interface.

    """
    def __init__(self, xstart, **more_kwargs):
        """abstract method, ``xstart`` is a mandatory argument """ 
        raise NotImplementedError('method to be implemented in subclass')

    def ask(self): 
        """abstract method, AKA get, deliver new candidate solution(s),
        a list of "vectors" """
        raise NotImplementedError('method to be implemented in subclass')

    def tell(self, solutions, function_values): 
        """abstract method, AKA update, prepare for next iteration"""
        raise NotImplementedError('method to be implemented in subclass')

    def stop(self): 
        """abstract method, return satisfied termination conditions in
        a dictionary like ``{'termination reason':value, ...}``,
        for example ``{'tolfun':1e-12}``, or the empty dictionary ``{}``.
        The implementation of `stop()` should prevent an infinite loop.

        """
        raise NotImplementedError('method to be implemented in subclass')

    def result(self): 
        """abstract method, return ``(x, f(x), ...)``, that is the
        minimizer, its function value, ..."""
        raise NotImplementedError('method to be implemented in subclass')

    def disp(self, verbosity_modulo=1):
        """display of some iteration info"""
        print("method disp of " + str(type(self)) + " is not implemented")

    def optimize(self, objectivefct, iterations=None, min_iterations=1,
                 args=(), verb_disp=20, logger=None):
        """iterate at least ``min_iterations`` and at most ``iterations``
        over ``OOOptimizer self`` using objective function
        ``objectivefct``.  Prints current information every ``verb_disp``,
        uses ``OptimDataLogger logger``, and returns the number of 
        conducted iterations.

        Example
        =======
        ::

            import barecmaes2 as cma
            es = cma.CMAES(7 * [0.1], 0.5).optimize(cma.Fcts.rosenbrock)
            print(es.result()[0])
            
        """
        if iterations is not None and iterations < min_iterations:
            raise ValueError("iterations was smaller than min_iterations")
        if iterations == 0:
            raise ValueError("parameter iterations = 0")

        iteration = 0
        while not self.stop() or iteration < min_iterations:
            if iterations and iteration >= iterations:
                return self
            iteration += 1

            X = self.ask()         # deliver candidate solutions
            fitvals = [objectivefct(x, *args) for x in X]
            self.tell(X, fitvals)  # all the work is done here

            self.disp(verb_disp)
            logger.add(self) if logger else None
        
        logger.add(self, force=True) if logger else None
        if verb_disp:
            self.disp(1)    
            print('termination by', self.stop())
            print('best f-value =', self.result()[1])
            print('solution =', self.result()[0])

        return self

class CMAES(OOOptimizer):
    """class for non-linear non-convex minimization. The class implements
    the interface define in `OOOptimizer`, namely the methods
    `__init__()`, `ask()`, `tell()`, `stop()`, `result()`, and `disp()`.
        
    Examples
    --------
    All examples minimize the function `elli`, the output is not shown. 
    (A preferred environment to execute all examples is
    ``ipython -pylab``.)

    First we need to ::
    
        import barecmaes2 as cma

    The shortest example uses the inherited method
    `OOOptimizer.optimize()`::
        
        res = cma.CMAES(8 * [0.1], 0.5).optimize(cma.Fcts.elli)
    
    See method `__init__()` for the input parameters to `CMAES`. We might
    have a look at the result::
    
        print(res[0])  # best solution and
        print(res[1])  # its function value
    
    `res` is the return value from method `CMAES.results()` appended with
    `None` (no logger). In order to display more exciting output we rather
    do ::
    
        logger = cma.CMAESDataLogger()
        res = cma.CMAES(9 * [0.5], 0.3).optimize(cma.Fcts.elli, logger)
        logger.plot()  # if matplotlib is available

    Virtually the same example can be written with an explicit loop
    instead of using `optimize()`. This gives the necessary insight
    into the `CMAES` class interface and gives entire control over the
    iteration loop ::
     
        optim = cma.CMAES(9 * [0.5], 0.3)  # calls CMAES.__init__()
        logger = cma.CMAESDataLogger().register(optim)  # logger instance
 
        # this loop resembles optimize() 
        while not optim.stop(): # iterate
            X = optim.ask()     # get candidate solutions
            #  do whatever needs to be done, however rather don't 
            #  change X unless like for example X[2] = optim.ask()[0]
            f = [cma.Fcts.elli(x) for x in X]  # evaluate solutions
            optim.tell(X, f)    # do all the real work
            optim.disp(20)      # display info every 20th iteration
            logger.add()        # log another "data line"
 
        # final output
        print('termination by', optim.stop())
        print('best f-value =', optim.result()[1])
        print('best solution =', optim.result()[0])
        
        print('potentially better solution xmean =', optim.result()[5])
        print('let\'s check f(xmean) =', cma.Fcts.elli(optim.result()[5])) 
        logger.plot()  # if matplotlib is available
        raw_input('press enter to continue')  # prevents closing figures
    
    A slightly longer example, which also plots within the loop, is 
    the implementation of function `fmin(...)`. 
    
    Details
    -------
    Most of the work is done in the method `tell(...)`. The method
    `result()` returns more useful output.
         
    :See: `fmin()`, `OOOptimizer.optimize()`
    
    """
    def __init__(self, xstart, sigma,  # mandatory
                 max_eval='1e3*N**2',
                 ftarget=None,
                 popsize="4 + int(3 * log(N))",
                 randn=random_normalvariate):
        """Initialize` CMAES` object instance, the first two arguments are
        mandatory.
         
        Parameters
        ----------
            `xstart`
                ``list`` of numbers (like ``[3, 2, 1.2]``), initial
                solution vector
            `sigma`
                ``float``, initial step-size (standard deviation in each
                coordinate)
            `max_eval`
                ``int`` or ``str``, maximal number of function
                evaluations, a string is evaluated with ``N`` being the
                search space dimension
            `ftarget`
                `float`, target function value
            `randn`
                normal random number generator, by default
                ``random.normalvariate``
        
        """
        # process input parameters
        N = len(xstart)  # number of objective variables/problem dimension
        self.xmean = xstart[:]  # initial point, distribution mean, a copy
        self.sigma = sigma
        self.ftarget = ftarget # stop if fitness < ftarget
        self.max_eval = eval(str(max_eval))  # eval a string
        self.randn = randn
        
        # Strategy parameter setting: Selection  
        self.lam = eval(str(popsize))  # population size, offspring number
        self.mu = int(self.lam / 2)  # number of parents/points for recombination
        self.weights = [log(self.mu+0.5) - log(i+1) for i in range(self.mu)]  # recombination weights
        self.weights = [w / sum(self.weights) for w in self.weights]  # normalize recombination weights array
        self.mueff = sum(self.weights)**2 / sum(w**2 for w in self.weights)  # variance-effectiveness of sum w_i x_i
    
        # Strategy parameter setting: Adaptation
        self.cc = (4 + self.mueff/N) / (N+4 + 2 * self.mueff/N)  # time constant for cumulation for C
        self.cs = (self.mueff + 2) / (N + self.mueff + 5)  # t-const for cumulation for sigma control
        self.c1 = 2 / ((N + 1.3)**2 + self.mueff)  # learning rate for rank-one update of C
        self.cmu = min([1 - self.c1, 2 * (self.mueff - 2 + 1/self.mueff) / ((N + 2)**2 + self.mueff)])  # and for rank-mu update
        self.damps = 2 * self.mueff/self.lam + 0.3 + self.cs  # damping for sigma, usually close to 1
    
        # Initialize dynamic (internal) state variables 
        self.pc, self.ps = N * [0], N * [0]  # evolution paths for C,sigma
        self.B = eye(N)   # B defines the coordinate system 
        self.D = N * [1]  # diagonal D defines the scaling
        self.C = eye(N)   # covariance matrix 
        self.invsqrtC = eye(N)  # C^-1/2 
        self.eigeneval = 0      # tracking the update of B and D
        self.counteval = 0
        self.fitvals = []  # for bookkeeping output and termination

        self.best = BestSolution()
        
    def stop(self):
        """return satisfied termination conditions in a dictionary like 
        {'termination reason':value, ...}, for example {'tolfun':1e-12}, 
        or the empty dict {}""" 
        res = {}
        if self.counteval > 0: 
            if self.counteval >= self.max_eval:
                res['evals'] = self.max_eval
            if self.ftarget is not None and len(self.fitvals) > 0 \
                    and self.fitvals[0] <= self.ftarget:
                res['ftarget'] = self.ftarget
            if max(self.D) > 1e7 * min(self.D):
                res['condition'] = 1e7
            if len(self.fitvals) > 1 \
                    and self.fitvals[-1] - self.fitvals[0] < 1e-12:
                res['tolfun'] = 1e-12 
            if self.sigma * max(self.D) < 1e-11:
                # remark: max(D) >= max(diag(C))**0.5
                res['tolx'] = 1e-11
        return res

    def ask(self):
        """return a list of lambda candidate solutions according to 
        m + sig * Normal(0,C) = m + sig * B * D * Normal(0,I)"""

        # Eigendecomposition: first update B, D and invsqrtC from C
        # postpone in case to achieve O(N**2)
        if self.counteval - self.eigeneval > \
                self.lam/(self.c1+self.cmu)/len(self.C)/10:
            self.eigeneval = self.counteval
            self.D, self.B = eig(self.C)       # eigen decomposition, B==normalized eigenvectors, O(N**3)
            self.D = [d**0.5 for d in self.D]  # D contains standard deviations now
            rg = range(len(self.C))
            for i in rg:  # compute invsqrtC = C**(-1/2) = B D**(-1/2) B'
                for j in rg:
                    self.invsqrtC[i][j] = sum(self.B[i][k] * self.B[j][k]
                                              / self.D[k] for k in rg)

        # lam vectors x_k = m  +          sigma * B * D * randn_k(N)
        res = []
        for k in range(self.lam):  # repeat lam times
            z = [d * self.randn(0, 1) for d in self.D]
            res.append(plus(self.xmean, dot1(self.sigma, dot(self.B, z))))
        return res

    def tell(self, arx, fitvals):
        """update the evolution paths and the distribution parameters m,
        sigma, and C within CMA-ES.
        
        Parameters
        ----------
            `arx` 
                a list of solutions, presumably from `ask()`
            `fitvals` 
                the corresponding objective function values
        
        """
        
        # bookkeeping, preparation
        self.counteval += len(fitvals)  # slightly artificial to do here
        N = len(self.xmean)             # convenience short cuts
        iN = range(N)  
        xold = self.xmean
        
        # Sort by fitness and compute weighted mean into xmean
        self.fitvals, arindex = sorted(fitvals), argsort(fitvals)  # min
        arx = [arx[arindex[k]] for k in range(self.mu)]  # sorted arx
        del fitvals, arindex  # to prevent misuse
        # self.fitvals is kept for termination and display only
        self.best.update([arx[0]], [self.fitvals[0]], self.counteval)
        
        # xmean = [x_1=best, x_2, ..., x_mu] * weights
        self.xmean = dot(arx[0:self.mu], self.weights, t=True)
        # recombination, new mean value
        # == [sum(self.weights[k] * arx[k][i] for k in range(self.mu))
        #                                     for i in iN]
        
        # Cumulation: update evolution paths
        y = minus(self.xmean, xold)
        z = dot(self.invsqrtC, y)  # == C**(-1/2) * (xnew - xold)
        
        c = (self.cs * (2-self.cs) * self.mueff)**0.5 / self.sigma
        for i in iN:
            self.ps[i] -= self.cs * self.ps[i]  # exponential decay on ps
            self.ps[i] += c * z[i]
        hsig = (sum(x**2 for x in self.ps)
                / (1-(1-self.cs)**(2*self.counteval/self.lam)) / N
                < 2 + 4./(N+1))
        c = (self.cc * (2-self.cc) * self.mueff)**0.5 / self.sigma
        for i in iN:
            self.pc[i] -= self.cc * self.pc[i]  # exponential decay on pc
            self.pc[i] += c * hsig * y[i]

        # Adapt covariance matrix C
        c1a = self.c1 - (1-hsig**2) * self.c1 * self.cc * (2-self.cc)
        # for a minor adjustment to the variance loss by hsig
        for i in iN:
            for j in iN:
                Cmuij = sum(self.weights[k] * (arx[k][i]-xold[i])
                            * (arx[k][j]-xold[j]) for k in range(self.mu)
                            ) / self.sigma**2  # rank-mu update
                self.C[i][j] += (-c1a - self.cmu) * self.C[i][j] \
                    + self.c1 * self.pc[i]*self.pc[j] + self.cmu * Cmuij

        # Adapt step-size sigma with factor <= exp(0.6) \approx 1.82
        self.sigma *= exp(min(0.6, (self.cs / self.damps) *
                                   (sum(x**2 for x in self.ps)/N - 1)/2))
        # chiN = (1-1./(4.*N)+1./(21.*N**2))
        # self.sigma *= exp(min(0.6, (self.cs/self.damps) *
        #     ((sum(x**2 for x in self.ps) / N)**0.5 / chiN - 1)))

    def result(self):
        """return (xbest, f(xbest), evaluations_xbest, evaluations,
        iterations, xmean)

        """
        return self.best.get() + (self.counteval,
                                  int(self.counteval / self.lam),
                                  self.xmean)
        
    def disp(self, verb_modulo=1):
        """display some iteration info"""
        iteration = self.counteval / self.lam 

        if iteration == 1 or iteration % (10*verb_modulo) == 0:
            print('evals: ax-ratio max(std)   f-value')
        if iteration <= 2 or iteration % verb_modulo == 0:
            max_std = max([self.C[i][i] for i in range(len(self.C))])**0.5
            print(repr(self.counteval).rjust(5) + ': ' +
                  ' %6.1f %8.1e  ' % (max(self.D) / min(self.D),
                                      self.sigma * max_std) +
                  str(self.fitvals[0]))
            sys.stdout.flush()

        return None


# -----------------------------------------------
class BestSolution(object):
    """container to keep track of the best solution seen"""
    def __init__(self, x=None, f=None, evals=None):
        """take `x`, `f`, and `evals` to initialize the best solution.
        The better solutions have smaller `f`-values. """
        self.x, self.f, self.evals = x, f, evals

    def update(self, arx, arf, evals=None):
        """initialize the best solution with `x`, `f`, and `evals`.
        Better solutions have smaller `f`-values.""" 
        if self.f is None or min(arf) < self.f:
            i = arf.index(min(arf))
            self.x, self.f = arx[i], arf[i]
            self.evals = None if not evals else evals - len(arf) + i + 1
        return self

    def get(self):
        """return ``(x, f, evals)`` """
        return self.x, self.f, self.evals


# -----------------------------------------------
class OptimDataLogger(object):
    """"abstract" base class for a data logger that can be used with an
    `OOOptimizer`"""
    def __init__(self):
        self.optim = None
        self.dat = None

    def register(self, optim):
        self.optim = optim
        return self

    def add(self, optim=None, force=False):
        """abstract method, add a "data point" from the state of optim
        into the logger"""
        raise NotImplementedError('to be implemented in a derived class')

    def disp(self):
        """display some data trace (not implemented)"""
        print('method OptimDataLogger.disp() is not implemented in class '
              + str(type(self)))

    def plot(self):
        """plot data"""
        print('method OptimDataLogger.plot() is not implemented in class '
              + str(type(self)))

    def data(self):
        """return logged data in a dictionary"""
        return self.dat
        

# -----------------------------------------------
class CMAESDataLogger(OptimDataLogger):
    """data logger for class CMAES, that can store and plot data. 
    An even more useful logger would write the data to files. 
    
    """
    
    plotted = 0  
    "plot count for all instances"
    
    def __init__(self, verb_modulo=1):
        """cma is the `OOOptimizer` class instance that is to be logged, 
        `verb_modulo` controls whether logging takes place for each call 
        to the method `add()` 
        
        """
        OptimDataLogger.__init__(self)  # not necessary
        self.modulo = verb_modulo
        self.dat = {'eval': [], 'iter': [], 'stds': [], 'D': [],
                    'sig': [], 'fit': [], 'xm': []}
        self.counter = 0  # number of calls of add
    
    def add(self, cma=None, force=False):
        """append some logging data from CMAES class instance cma, 
        if ``number_of_times_called modulo verb_modulo`` equals zero"""
        cma = self.optim if cma is None else cma
        if type(cma) is not CMAES:
            raise TypeError('logged object must be a CMAES instance')
        dat = self.dat
        self.counter += 1 
        if force and self.counter == 1:
            self.counter = 0 
        if (self.modulo
                and
            (len(dat['eval']) == 0 or cma.counteval != dat['eval'][-1])
                and
            (self.counter < 4 or force or
                int(self.counter) % self.modulo == 0)):
            dat['eval'].append(cma.counteval)
            dat['iter'].append(cma.counteval/cma.lam)
            dat['stds'].append([cma.C[i][i]**0.5
                                for i in range(len(cma.C))])
            dat['D'].append(sorted(cma.D))
            dat['sig'].append(cma.sigma)
            dat['fit'].append(cma.fitvals[0] if hasattr(cma, 'fitvals')
                              and cma.fitvals
                              else None)
            dat['xm'].append([x for x in cma.xmean])
        return self
        
    def plot(self, fig_number=322):
        """plot the stored data in figure fig_number.  
    
        Dependencies: matlabplotlib/pylab. 
        
        Example
        =======
        ::
        
            >> import barecmaes2 as cma
            >> es = cma.CMAES(3 * [0.1], 1)
            >> logger = cma.CMAESDataLogger().register(es)
            >> while not es.stop():
            >>     X = es.ask()
            >>     es.tell(X, [bc.Fcts.elli(x) for x in X])
            >>     logger.add()
            >> logger.plot() 
    
        """
        if pylab is None:
            return None
        pylab.figure(fig_number)

        from pylab import text, hold, plot, ylabel, grid, semilogy, \
            xlabel, draw, show, subplot
            
        dat = self.dat  # dictionary with entries as given in __init__
        if not dat:
            return
        try:  # a hack to get the presumable population size lambda
            strpopsize = ' (popsize~' + str(dat['eval'][-2] -
                                            dat['eval'][-3]) + ')'
        except IndexError:
            strpopsize = ''
         
        # plot fit, Delta fit, sigma
        subplot(221)
        hold(False)
        if dat['fit'][0] is None:
            dat['fit'][0] = dat['fit'][1]
            # should be reverted later, but let's be lazy
        assert dat['fit'].count(None) == 0
        dmin = min(dat['fit'])
        i = dat['fit'].index(dmin)
        dat['fit'][i] = max(dat['fit'])
        dmin2 = min(dat['fit'])
        dat['fit'][i] = dmin 
        semilogy(dat['iter'], [d - dmin + 1e-19 if d >= dmin2 else 
                               dmin2 - dmin for d in dat['fit']], 'c',
                 linewidth=1)
        hold(True)
        semilogy(dat['iter'], [abs(d) for d in dat['fit']], 'b')
        semilogy(dat['iter'][i], abs(dmin), 'r*')
        semilogy(dat['iter'], dat['sig'], 'g')
        ylabel('f-value, Delta-f-value, sigma')
        grid(True)

        # plot xmean
        subplot(222)
        hold(False)
        plot(dat['iter'], dat['xm'])
        hold(True)
        for i in range(len(dat['xm'][-1])):
            text(dat['iter'][0], dat['xm'][0][i], str(i))
            text(dat['iter'][-1], dat['xm'][-1][i], str(i))
        ylabel('mean solution', ha='center')
        grid(True)
        
        # plot D
        subplot(223)
        hold(False)
        semilogy(dat['iter'], dat['D'], 'm')
        xlabel('iterations' + strpopsize)
        ylabel('axes lengths')
        grid(True)

        # plot stds
        subplot(224)
        # if len(gcf().axes) > 1:
        #     sca(pylab.gcf().axes[1])
        # else:
        #     twinx()
        hold(False)
        semilogy(dat['iter'], dat['stds'])
        for i in range(len(dat['stds'][-1])):
            text(dat['iter'][-1], dat['stds'][-1][i], str(i))
        ylabel('coordinate stds disregarding sigma', ha='center')
        grid(True)
        xlabel('iterations' + strpopsize)
        sys.stdout.flush()
        draw()
        show()
        CMAESDataLogger.plotted += 1


#_____________________________________________________________________
#_______________________ Helper Functions _____________________________
#
def eye(dimension):
    """return identity matrix as list of "vectors" """
    m = [dimension * [0] for i in range(dimension)]
    # m = N * [N * [0]] fails because it gives N times the same reference
    for i in range(dimension):
        m[i][i] = 1
    return m


def dot(A, b, t=False):
    """ usual dot product of "matrix" A with "vector" b,
    where A[i] is the i-th row of A. With t=True, A transposed is used.

    :rtype : "vector" (list of float)
    """
    n = len(b)
    if not t:
        m = len(A)  # number of rows, like printed by pprint 
        v = m * [0]
        for i in range(m):
            v[i] = sum(b[j] * A[i][j] for j in range(n))
    else:
        m = len(A[0])  # number of columns 
        v = m * [0]
        for i in range(m):
            v[i] = sum(b[j] * A[j][i] for j in range(n))
    return v


def dot1(a, b):
    """scalar a times vector b"""
    return [a * c for c in b]


def plus(a, b):
    """add vectors, return a + b """
    return [a[i] + b[i] for i in range(len(a))]


def minus(a, b):
    """substract vectors, return a - b"""
    return [a[i] - b[i] for i in range(len(a))]


def argsort(a):
    """return index list to get a in order, ie
    a[argsort(a)[i]] == sorted(a)[i]"""
    return [a.index(val) for val in sorted(a)]


#_____________________________________________________________________
#_________________ Fitness (Objective) Functions _____________________

class Fcts(object):  # instead of a submodule
    """versatile collection of test functions"""
    
    @staticmethod  # syntax available since 2.4 
    def elli(x):
        """ellipsoid test objective function"""
        n = len(x)
        aratio = 1e3
        return sum(x[i]**2 * aratio**(2.*i/(n-1)) for i in range(n))

    @staticmethod
    def sphere(x):
        """sphere, ``sum(x**2)``, test objective function"""
        return sum(x[i]**2 for i in range(len(x)))

    @staticmethod
    def rosenbrock(x):
        """Rosenbrock test objective function"""
        n = len(x)
        if n < 2:
            raise ValueError('dimension must be greater one')
        return sum(100 * (x[i]**2 - x[i+1])**2 + (x[i] - 1)**2 for i
                   in range(n-1))


#____________________________________________________________
#____________________________________________________________
# 
# C and B are arrays rather than matrices, because they are
# addressed via B[i][j], matrices can only be addressed via B[i,j] 

# tred2(N, B, diagD, offdiag);
# tql2(N, diagD, offdiag, B);

# Symmetric Householder reduction to tridiagonal form, translated from
#   JAMA package.

def eig(C):
    """eigendecomposition of a symmetric matrix, much slower than 
    numpy.linalg.eigh, return the eigenvalues and an orthonormal basis 
    of the corresponding eigenvectors, ``(EVals, Basis)``, where 
    
        ``Basis[i]``
            the i-th row of ``Basis`` and columns of ``Basis``, 
        ``[Basis[j][i] for j in range(len(Basis))]``
            the i-th eigenvector with eigenvalue ``EVals[i]``
    
    """
    # class eig(object):
    #     def __call__(self, C):
    
    # Householder transformation of a symmetric matrix V into tridiagonal
    #   form.
    # -> n             : dimension
    # -> V             : symmetric nxn-matrix
    # <- V             : orthogonal transformation matrix:
    #                    tridiag matrix == V * V_in * V^t
    # <- d             : diagonal
    # <- e[0..n-1]     : off diagonal (elements 1..n-1)

    # Symmetric tridiagonal QL algorithm, iterative 
    # Computes the eigensystem from a tridiagonal matrix in roughtly 3N^3
    #    operations
    # -> n     : Dimension. 
    # -> d     : Diagonale of tridiagonal matrix. 
    # -> e[1..n-1] : off-diagonal, output from Householder
    # -> V     : matrix output von Householder
    # <- d     : eigenvalues
    # <- e     : garbage?
    # <- V     : basis of eigenvectors, according to d

    #  tred2(N, B, diagD, offdiag); B=C on input
    #  tql2(N, diagD, offdiag, B); 
    
    #  private void tred2 (int n, double V[][], double d[], double e[]) {
    def tred2(n, V, d, e):
        #  This is derived from the Algol procedures tred2 by
        #  Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
        #  Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
        #  Fortran subroutine in EISPACK.

        num_opt = False  # factor 1.5 in 30-D
        if num_opt:
            import numpy as np

        for j in range(n): 
            d[j] = V[n-1][j]  # d is output argument

        # Householder reduction to tridiagonal form.
    
        for i in range(n-1, 0, -1):
            # Scale to avoid under/overflow.
            h = 0.0
            if not num_opt:
                scale = 0.0
                for k in range(i):
                    scale = scale + abs(d[k])
            else:
                scale = sum(abs(d[0:i]))
  
            if scale == 0.0:
                e[i] = d[i-1]
                for j in range(i):
                    d[j] = V[i-1][j]
                    V[i][j] = 0.0
                    V[j][i] = 0.0
            else:
  
                # Generate Householder vector.
                if not num_opt:
                    for k in range(i):
                        d[k] /= scale
                        h += d[k] * d[k]
                else:
                    d[:i] /= scale
                    h = np.dot(d[:i], d[:i])
   
                f = d[i-1]
                g = h**0.5
   
                if f > 0:
                    g = -g
   
                e[i] = scale * g
                h -= f * g
                d[i-1] = f - g
                if not num_opt:
                    for j in range(i):
                        e[j] = 0.0
                else:
                    e[:i] = 0.0 
       
                # Apply similarity transformation to remaining columns.
       
                for j in range(i): 
                    f = d[j]
                    V[j][i] = f
                    g = e[j] + V[j][j] * f
                    if not num_opt:
                        for k in range(j+1, i):
                            g += V[k][j] * d[k]
                            e[k] += V[k][j] * f
                        e[j] = g
                    else:
                        e[j+1:i] += V.T[j][j+1:i] * f
                        e[j] = g + np.dot(V.T[j][j+1:i], d[j+1:i])
   
                f = 0.0
                if not num_opt:
                    for j in range(i):
                        e[j] /= h
                        f += e[j] * d[j]
                else:
                    e[:i] /= h
                    f += np.dot(e[:i], d[:i])
   
                hh = f / (h + h)
                if not num_opt:
                    for j in range(i): 
                        e[j] -= hh * d[j]
                else:
                    e[:i] -= hh * d[:i]
                    
                for j in range(i): 
                    f = d[j]
                    g = e[j]
                    if not num_opt:
                        for k in range(j, i): 
                            V[k][j] -= (f * e[k] + g * d[k])
                    else:
                        V.T[j][j:i] -= (f * e[j:i] + g * d[j:i])
                        
                    d[j] = V[i-1][j]
                    V[i][j] = 0.0
  
            d[i] = h
        # end for i--
    
        # Accumulate transformations.

        for i in range(n-1): 
            V[n-1][i] = V[i][i]
            V[i][i] = 1.0
            h = d[i+1]
            if h != 0.0:
                if not num_opt:
                    for k in range(i+1): 
                        d[k] = V[k][i+1] / h
                else:
                    d[:i+1] = V.T[i+1][:i+1] / h
                   
                for j in range(i+1): 
                    if not num_opt:
                        g = 0.0
                        for k in range(i+1): 
                            g += V[k][i+1] * V[k][j]
                        for k in range(i+1): 
                            V[k][j] -= g * d[k]
                    else:
                        g = np.dot(V.T[i+1][0:i+1], V.T[j][0:i+1])
                        V.T[j][:i+1] -= g * d[:i+1]
 
            if not num_opt:
                for k in range(i+1): 
                    V[k][i+1] = 0.0
            else:
                V.T[i+1][:i+1] = 0.0

        if not num_opt:
            for j in range(n): 
                d[j] = V[n-1][j]
                V[n-1][j] = 0.0
        else:
            d[:n] = V[n-1][:n]
            V[n-1][:n] = 0.0

        V[n-1][n-1] = 1.0
        e[0] = 0.0

    # Symmetric tridiagonal QL algorithm, taken from JAMA package.
    # private void tql2 (int n, double d[], double e[], double V[][]) {
    # needs roughly 3N^3 operations
    def tql2(n, d, e, V):
        #  This is derived from the Algol procedures tql2, by
        #  Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
        #  Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
        #  Fortran subroutine in EISPACK.
    
        num_opt = False  # using vectors from numpy make it faster

        if not num_opt:
            for i in range(1, n):  # (int i = 1; i < n; i++):
                e[i-1] = e[i]
        else:
            e[0:n-1] = e[1:n]
        e[n-1] = 0.0
    
        f = 0.0
        tst1 = 0.0
        eps = 2.0**-52.0
        for l in range(n):  # (int l = 0; l < n; l++) {

            # Find small subdiagonal element
     
            tst1 = max(tst1, abs(d[l]) + abs(e[l]))
            m = l
            while m < n: 
                if abs(e[m]) <= eps*tst1:
                    break
                m += 1
 
            # If m == l, d[l] is an eigenvalue,
            # otherwise, iterate.
     
            if m > l:
                iiter = 0
                while 1:  # do {
                    iiter += 1  # (Could check iteration count here.)
     
                    # Compute implicit shift
     
                    g = d[l]
                    p = (d[l+1] - g) / (2.0 * e[l])
                    r = (p**2 + 1)**0.5  # hypot(p, 1.0)
                    if p < 0:
                        r = -r
 
                    d[l] = e[l] / (p + r)
                    d[l+1] = e[l] * (p + r)
                    dl1 = d[l+1]
                    h = g - d[l]
                    if not num_opt: 
                        for i in range(l+2, n):
                            d[i] -= h
                    else:
                        d[l+2:n] -= h
 
                    f = f + h
     
                    # Implicit QL transformation.
     
                    p = d[m]
                    c = 1.0
                    c2 = c
                    c3 = c
                    el1 = e[l+1]
                    s = 0.0
                    s2 = 0.0
 
                    # hh = V.T[0].copy()  # only with num_opt
                    for i in range(m-1, l-1, -1):
                        # (int i = m-1; i >= l; i--) {
                        c3 = c2
                        c2 = c
                        s2 = s
                        g = c * e[i]
                        h = c * p
                        r = (p**2 + e[i]**2)**0.5  # hypot(p,e[i])
                        e[i+1] = s * r
                        s = e[i] / r
                        c = p / r
                        p = c * d[i] - s * g
                        d[i+1] = h + s * (c * g + s * d[i])
     
                        # Accumulate transformation.
     
                        if not num_opt:  # overall factor 3 in 30-D
                            for k in range(n):  # (int k = 0; k < n; k++){
                                h = V[k][i+1]
                                V[k][i+1] = s * V[k][i] + c * h
                                V[k][i] = c * V[k][i] - s * h
                        else:  # about 20% faster in 10-D
                            hh = V.T[i+1].copy()
                            # hh[:] = V.T[i+1][:]
                            V.T[i+1] = s * V.T[i] + c * hh
                            V.T[i] = c * V.T[i] - s * hh
                            # V.T[i] *= c
                            # V.T[i] -= s * hh
 
                    p = -s * s2 * c3 * el1 * e[l] / dl1
                    e[l] = s * p
                    d[l] = c * p
     
                    # Check for convergence.
                    if abs(e[l]) <= eps*tst1:
                        break
                # } while (Math.abs(e[l]) > eps*tst1);
 
            d[l] += f
            e[l] = 0.0

        # Sort eigenvalues and corresponding vectors.
        if 11 < 3:
            for i in range(n-1):  # (int i = 0; i < n-1; i++) {
                k = i
                p = d[i]
                for j in range(i+1, n):  # (int j = i+1; j < n; j++) {
                    if d[j] < p:  # NH find smallest k>i
                        k = j
                        p = d[j]

                if k != i: 
                    d[k] = d[i]  # swap k and i
                    d[i] = p   
                    for j in range(n):  # (int j = 0; j < n; j++) {
                        p = V[j][i]
                        V[j][i] = V[j][k]
                        V[j][k] = p
    # tql2
    N = len(C[0])
    V = [C[i][:] for i in range(N)]
    d = N * [0]
    e = N * [0]
    tred2(N, V, d, e)
    tql2(N, d, e, V)
    return d, V


def _test():
    """
    >>> import barecmaes2 as cma
    >>> import random
    >>> random.seed(5)
    >>> x = cma.fmin(cma.Fcts.rosenbrock, 4 * [0.5], 0.5, verb_plot=0)
    evals: ax-ratio max(std)   f-value
        8:     1.0  4.3e-01  8.17705253283
       16:     1.1  4.2e-01  66.2003009423
      800:    19.1  3.8e-02  0.0423501042891
     1600:    46.1  4.7e-06  1.14801950628e-11
     1736:    59.2  5.4e-07  1.26488753189e-13
    termination by {'tolfun': 1e-12}
    best f-value = 4.93281796387e-14
    solution = [0.9999999878867273, 0.9999999602211, 0.9999999323618144, 0.9999998579200512]

  """
  
    import doctest
    print('launching doctest')
    doctest.testmod(report=True)  # module test
    print("""done (ideally no line between launching and done was printed,
        however slight deviations are possible)""")

#_____________________________________________________________________
#_____________________________________________________________________
#
if __name__ == "__main__":

    _test()

    # fmin(Fcts.rosenbrock, 10 * [0.5], 0.5)