from pylab import *
import random

def bootstrap(data,target):
  """Bootstraps the examples set D = (data,target)
  Returns:[data*,target*] a set of size the size of D were each
          example is sampled with replacement from D."""
  l = len(data)
  b_in = []
  b_target = []
  for i in range(l):
    indice = int(random.uniform(0,l))
    b_in.append(data[indice])
    b_target.append(target[indice])
  return [array(b_in),array(b_target)]

def bagging(train_algo,data,target,n_bootstraps):
  """Trains on n_bootstraps bootstraps of the set (data,target) the
  model train_algo.
  Returns: the list [param_1,...,param_n_bootstrap] of each bootstrap
  optimal model."""
  models = []
  for i in range(n_bootstraps):
    [b_in,b_target] = bootstrap(data,target)
    params = train_algo(b_in,b_target)
    models.append(params)
  return models

def bagging_test(test_algo,models,data,target,cost_function):
  """Given a list of N optimal models of the data, computes the aggregated
  error (ie the error of the average model), incrementally.
  Returns: [cost of model1, cost of avg(model1; model2),...,
            cost of avg(model1;...;modelN)]."""
  l = len(data)
  n_bootstraps = len(models)
  out = [0 for i in range(l)]
  err = [0 for i in range(n_bootstraps)]
  err_cur = 0
  for j in range(n_bootstraps):
    params = models[j]
    for i in range(l):
      targ = target[i]
      inp = data[i]
      res = test_algo(inp,params)
      out[i] = out[i] + res[0]
      co = cost_function(targ,out[i]/(j+1))
      err[j] = err[j] + co
    err[j] = err[j]/l 
    print "n_boot %s mse_err %s"%(j+1,err[j])
  return err
