Source code for MCD

import numpy as np
from ctypes import *
from scipy.stats import chi2
from depth.multivariate.import_CDLL import libExact

[docs]def MCD(data, h, seed=None, mfull = 10, nstep = 7, hiRegimeCompleteLastComp = True): try: n, d = data.shape except ValueError: n = data.shape[0] d = 1 hParam = pointer(c_int(h)) numPoints = pointer(c_int(n)) dimension = pointer(c_int(d)) points_list=data.flatten() points=(c_double*len(points_list))(*points_list) points=pointer(points) if seed==None: seeded = False seed=0 else: seeded = True seed=pointer((c_int(seed))) c_seeded = c_bool(seeded) cov_size = d*d # print("cov_size",cov_size) mat_MCD=pointer((c_double*(cov_size))(*np.zeros((cov_size)))) chisqr05 = chi2(d).isf(0.5) chisqr0975 = chi2(d).isf(0.025) # print("chisqr05",chisqr05) # print("chisqr0975",chisqr0975) c_chisqr05 = c_double(chisqr05) c_chisqr0975 = c_double(chisqr0975) c_mfull = c_int(mfull) c_nstep = c_int(nstep) c_hiRegimeCompleteLastComp = c_bool(hiRegimeCompleteLastComp) libExact.MinimumCovarianceDeterminantEstim(points, numPoints, dimension, hParam, seed, mat_MCD,c_chisqr05,c_chisqr0975,c_mfull,c_nstep,c_hiRegimeCompleteLastComp,c_seeded) res = np.zeros((d,d)) for i in range(d): for j in range(d): res[i,j]=mat_MCD[0][i*d+j] return res
MCD.__doc__= """ Description Calculates the Minimum Covariance Determinant covariance matrix Arguments data Matrix of data where each row contains a d-variate point. h Size of the data subset to use during estimation. mfull In the high regime n>600, number of best results we keep before computing on the full dataset (cf paper by Rousseuw and van Driessen). hiRegimeCompleteLastComp "True" if in the high n regime case in the last computation we carry computation until convergence of the solutions, false if we use a fix amount of nstep number of steps. nstep In high n regime, finite number of steps to carry last computations for final solutions if we do not want to compute until convergence (hiRegimeCompleteLastComp is set to false). References * Peter J. Rousseeuw & Katrien Van Driessen (1999) A Fast Algorithm for the Minimum Covariance Determinant Estimator, Technometrics, 41:3, 212-223 """