Source code for SimplicialVolume

import numpy as np
from ctypes import *
from multiprocessing import *
import sklearn.covariance as sk
import scipy.special as scspecial
import sys, os, glob
import platform

if sys.platform=='linux':
    
    for i in sys.path :
        if i.split('/')[-1]=='site-packages':
            ddalpha_exact=glob.glob(i+'/*ddalpha*.so')
            ddalpha_approx=glob.glob(i+'/*depth_wrapper*.so')
    


    libr=CDLL(ddalpha_exact[0])
    libRom=CDLL(ddalpha_approx[0])
    
if sys.platform=='darwin':
    for i in sys.path :
        if i.split('/')[-1]=='site-packages':
            ddalpha_exact=glob.glob(i+'/*ddalpha*.so')
            ddalpha_approx=glob.glob(i+'/*depth_wrapper*.so')
  
    libr=CDLL(ddalpha_exact[0])
    libRom=CDLL(ddalpha_approx[0])

if sys.platform=='win32' and platform.architecture()[0] == "64bit":
    site_packages = next(p for p in sys.path if 'site-packages' in p)
    
    os.add_dll_directory(site_packages)
    ddalpha_exact=glob.glob(site_packages+'/depth/src/*ddalpha*.dll')
    ddalpha_approx=glob.glob(site_packages+'/depth/src/*depth_wrapper*.dll')
    libr=CDLL(r""+ddalpha_exact[0])
    libRom=CDLL(r""+ddalpha_approx[0])
    
if sys.platform=='win32' and platform.architecture()[0] == "32bit":
    site_packages = next(p for p in sys.path if 'site-packages' in p)
    
    os.add_dll_directory(site_packages)
    ddalpha_exact=glob.glob(site_packages+'/depth/src/*ddalpha*.dll')
    ddalpha_approx=glob.glob(site_packages+'/depth/src/*depth_wrapper*.dll')
    libr=CDLL(r""+ddalpha_exact[0])
    libRom=CDLL(r""+ddalpha_approx[0])

def longtoint(k):
  limit = 2000000000
  k1 = int(k/limit)
  k2 = int(k - k1*limit)
  return np.array([k1,k2])

def MCD_fun(data,alpha,NeedLoc=False):
    cov = sk.MinCovDet(support_fraction=alpha).fit(data)
    if NeedLoc:return([cov.covariance_,cov.location_])
    else:return(cov.covariance_)

[docs]def simplicialVolume(x,data,exact=True,k=0.05,mah_estimate="moment", mah_parMCD=0.75,seed=0): points_list=data.flatten() objects_list=x.flatten() if (mah_estimate == "none"): useCov = 0 covEst =np.eye(len(data[0])).flatten() elif (mah_estimate == "moment"): useCov = 1 covEst=np.cov(np.transpose(data)) elif (mah_estimate == "MCD") : useCov = 2 covEst = MCD_fun(data, mah_parMCD) else: print("Wrong argument \"mah.estimate\", should be one of \"moment\", \"MCD\", \"none\"") print("moment is use") useCov = 1 covEst=np.cov(data) points=(c_double*len(points_list))(*points_list) objects=(c_double*len(objects_list))(*objects_list) points=pointer(points) objects=pointer(objects) numPoints=pointer(c_int(len(data))) numObjects=pointer(c_int(len(x))) dimension=pointer(c_int(len(data[0]))) seed=pointer((c_int(seed))) exact=pointer((c_int(exact))) if k<=0: print("k must be positive") print("k=1") k=scspecial.comb(len(data),len(data[0]),exact=True)*k k=pointer((c_int*2)(*longtoint(k))) elif k<=1: k=scspecial.comb(len(data),len(data[0]),exact=True)*k k1=k k=pointer((c_int*2)(*longtoint(k))) else: k=pointer((c_int*2)(*longtoint(k))) useCov=pointer(c_int(useCov)) covEst=covEst.flatten() covEst=pointer((c_double*len(covEst))(*covEst)) depths=pointer((c_double*len(x))(*np.zeros(len(x)))) libr.OjaDepth(points,objects,numPoints,numObjects,dimension,seed, exact, k, useCov, covEst, depths) res=np.zeros(len(x)) for i in range(len(x)): res[i]=depths[0][i] return res
simplicialVolume.__doc__=""" Description Calculates the simpicial volume depth of points w.r.t. a multivariate data set. Arguments x Matrix of objects (numerical vector as one object) whose depth is to be calculated; each row contains a d-variate point. Should have the same dimension as data. data Matrix of data where each row contains a d-variate point, w.r.t. which the depth is to be calculated. exact ``exact=True`` (by default) implies the **exact algorithm**, ``exact=False`` implies the **approximative algorithm**, considering k simplices. k | Number (``k > 1``) or portion (if ``0 < k < 1``) of simplices that are considered if ``exact = F``. | If ``k > 1``, then the algorithmic complexity is polynomial in d but is independent of the number of observations in data, given k. | If ``0 < k < 1``, then the algorithmic complexity is exponential in the number of observations in data, but the calculation precision stays approximately the same. mah_estimate A character string specifying affine-invariance adjustment; can be ``'none'``, ``'moment'`` or ``'MCD'``, determining whether no affine-invariance adjustemt or moment or Minimum Covariance Determinant (MCD) estimates of the covariance are used. By default ``'moment'`` is used. mah_parMcd The value of the argument alpha for the function covMcd is used when, ``mah.estimate='MCD'``. seed The random seed. The default value ``seed=0`` makes no changes. References * Oja, H. (1983). Descriptive statistics for multivariate distributions. *Statistics and Probability Letters*, 1, 327–332. Examples >>> import numpy as np >>> from depth.multivariate import * >>> mat1=[[1, 0, 0],[0, 2, 0],[0, 0, 1]] >>> mat2=[[1, 0, 0],[0, 1, 0],[0, 0, 1]] >>> x = np.random.multivariate_normal([1,1,1], mat2, 10) >>> data = np.random.multivariate_normal([0,0,0], mat1, 20) >>> simplicalVolume(x, data, exact=True) [0.45749049 0.34956166 0.2263421 0.68742137 0.94796538 0.51112415 0.85250931 0.67914988 0.79165292 0.33192247] >>> simplicalVolume(x, data, exact=False, k=0.2) [0.46826813 0.40138917 0.23189724 0.69025277 0.938543 0.56005713 0.8113647 0.72220103 0.82036139 0.33908597] """