# Authors: Leonardo Leone
import numpy as np
from . import docHelp
from . import multivariate as mtv
from typing import Literal, List
try:import torch
except:torch=None
import sys, os
try:os.environ['CUDA_HOME']=os.environ.get('CUDA_PATH').split(";")[0] # Force add cuda path
except:pass
[docs]
class DepthEucl():
"""
Statistical Data-Depth.
Return the depth of each sample w.r.t. a dataset, D(x,data), using a chosen depth notion.
Data depth computes the centrality (similarity, belongness) of a sample 'x' given a dataset 'data.
Parameters
----------
data : {array-like} of shape (n,d).
Reference dataset to compute the depth of a sample x
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
exact : bool, delfaut=True
Whether the depth computation is exact.
mah_estimate : str, {"none", "moment", "mcd"}, default="moment"
Specifying which estimates to use when calculating the depth
mah_parMcd : float, default=0.75
Value of the argument alpha for the function covMcd
solver : str, default="neldermead"
The type of solver used to approximate the depth.
NRandom : int, default=1000
Total number of directions used for approximate depth
n_refinements : int, default = 10
Number of iterations used to approximate the depth
For ``solver='refinedrandom'`` or ``'refinedgrid'``
sphcap_shrink : float, default = 0.5
For ``solver`` = ``refinedrandom`` or `refinedgrid`, it's the shrinking of the spherical cap.
alpha_Dirichlet : float, default = 1.25
For ``solver`` = ``randomsimplices``. it's the parameter of the Dirichlet distribution.
cooling_factor : float, default = 0.95
For ``solver`` = ``randomsimplices``, it's the cooling factor.
cap_size : int | float, default = 1
For ``solver`` = ``simulatedannealing`` or ``neldermead``, it's the size of the spherical cap.
start : str {'mean', 'random'}, default = mean
For ``solver`` = ``simulatedannealing`` or ``neldermead``, it's the method used to compute the first depth.
space : str {'sphere', 'euclidean'}, default = sphere
For ``solver`` = ``coordinatedescent`` or ``neldermead``, it's the type of spacecin which
line_solver : str {'uniform', 'goldensection'}, default = goldensection
For ``solver`` = ``coordinatedescent``, it's the line searh strategy used by this solver.
bound_gc : bool, default = True
For ``solver`` = ``neldermead``, it's ``True`` if the search is limited to the closed hemispher
output_option : str {"lowest_depth","final_depth_dir","all_depth","all_depth_directions}, default = final_depth_dir
Determines what will be computated alongside with the final depth
evaluate_dataset: bool, default=False,
Boolean to determine if the loaded dataset will be evaluate
Attributes
----------
data: {array-like}, default=None,
Returns loaded dataset
{depth-name}Depth : {array-like}, default=None,
Returns the computed depth using {depth-name} notion.
Available for all depth notions.
Example: halfspaceDepth, projectionDepth
{depth-name}Dir : {array-like}, default=None,
Returns the directoion whose {depth-name}Depth corresponds using {depth-name} notion.
Available only for projection-based depths.
Example: halfspaceDir, projectionDir
{depth-name}DepthDS : {array-like}, default=None,
Returns the computed depth of the loaded dataset using {depth-name} notion.
Available for all depth notions.
Example: halfspaceDepthDS, projectionDepthDS
{depth-name}DirDS : {array-like}, default=None,
Returns the directoion whose {depth-name}DepthDS corresponds using {depth-name} notion.
Available only for projection-based depths.
Example: halfspaceDirDS, projectionDirDS
"""
def __init__(self,):
"""
Initialize depthModel instance for statistical depth computation.
"""
self.data=None
self.approxOption=["lowest_depth","final_depth_dir","all_depth","all_depth_directions"]
self.set_seed() # set initial seed
self._create_selfRef() # create self. referecnces for storing depth and directions
[docs]
def load_dataset(self,data:np.ndarray=None,distribution:np.ndarray=None, CUDA:bool=False,y:np.ndarray=None):
"""
Load the dataset X for reference calculations. Depth is computed with respect to this dataset.
Parameters
----------
data : {array-like} of shape (n,d).
Dataset that will be used for depth computation
distribution : Ignored, default=None
Not used, present for API consistency by convention.
CUDA : bool, default=False
Determine with device CUDA will be used
y : Ignored, default=None
Not used, present for API consistency by convention.
Returns
---------
self : DepthEucl model object.
Returns the instance itself.
"""
if type(data)==None:
raise Exception("You must load a dataset")
assert(type(data)==np.ndarray), "The dataset must be a numpy array"
self._nSamples=data.shape[0] # define dataset size - n
self._spaceDim=data.shape[1] # define space dimension - d
if type(distribution)!=type(None):
if distribution.shape[0]!=data.shape[0]:
raise Exception(f"distribution and dataset must have same length, {distribution.shape[0]}!={data.shape[0]}")
self.distribution=distribution # define distributions
self.distRef=np.unique(distribution) # define unique dist
else:
self.distribution=np.repeat(0,data.shape[0])
self.distRef=np.array([0]) # define unique dist
if type(y)!=type(None):
if y.shape[0]!=data.shape[0]:
raise Exception(f"y and dataset must have same length, {y.shape[0]}!={data.shape[0]}")
self.y=y # define y
else:self.y=None
self.CUDA=CUDA
if self.CUDA==False:
self.data=data
if type(torch)!=type(None):
self.device = torch.device("cpu")
elif self.CUDA==True and type(torch)!=type(None):
if torch.cuda.is_available() or torch.backends.mps.is_available():
if torch.backends.mps.is_available():
self.device = torch.device("mps")
elif torch.cuda.is_available():
self.device = torch.device("cuda")
else:
self.device = torch.device("cpu")
self.data=data
# Tensor is transposed to facilitate projection and depth computation
else:
self.device = torch.device("cpu")
self.data=data
print("CUDA is set to True, but cuda is not available")
self.dataCuda=torch.tensor(data.T,device=self.device,dtype=torch.float32)
return self
[docs]
def mahalanobis(self, x: np.ndarray = None, exact: bool = True, mah_estimate: Literal["none", "moment", "mcd"] = "moment",
mah_parMcd: float = 0.75,solver= "neldermead", NRandom= 1000,
n_refinements= 10, sphcap_shrink=0.5,
alpha_Dirichlet= 1.25, cooling_factor=0.95,
cap_size=1, start="mean", space= "sphere",
line_solver="goldensection", bound_gc= True,
output_option:Literal["lowest_depth","final_depth_dir",
"all_depth","all_depth_directions"]="lowest_depth", evaluate_dataset:bool=False):
"""
Mahalanobis depth
Parameters
----------
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
output_option : str
Determines the format of the output.
- ``"lowest_depth"`` : single numpy array
- ``"final_depth_dir"`` : tuple of numpy arrays
- ``"all_depth"`` : tuple of numpy arrays
- ``"all_depth_directions"`` : tuple of numpy arrays
Returns
----------
array_like or tuple of array_like
The first return is the lowest comuted depth regarding all explored directions in space.
The second return is the direction that best represents the analyzed point, the direction corresponfing to the lowest depth.
The third return is all the computed depth values with respect to all approximative directions.
The fourth return is all corresponding directions.
If ``output_option=="lowest_depth"`` returns:
array_like
- Lowest Mahalanobis Detph
If ``output_option=="final_depth_dir"`` returns:
Tuple of array_like
- Lowest Mahalanobis Detph
- Lowest depth respective sirection
If ``output_option=="all_depth"`` returns:
array_like
- Lowest Mahalanobis Detph
- Lowest depth respective sirection
- All computed depths
If ``output_option=="all_depth_directions"`` returns:
array_like
- Lowest Mahalanobis Detph
- Lowest depth respective sirection
- All computed depths
- All respective directions
"""
if evaluate_dataset==True: # Dataset evaluation
print("x value is set to the loaded dataset")
x=self.data
if output_option=="all_depth"or output_option=="all_depth_directions":
print(f"output_option is set to {output_option}, only possible for lowest_depth or final_depth_dir, \
automaticaly set to lowest_depth")
output_option="lowest_depth"
self.mahalanobisDepthDS=np.empty((self.distRef.shape[0],x.shape[0]))
else:self.mahalanobisDepth=np.empty((self.distRef.shape[0],x.shape[0]))
self._check_variables(x=x,exact=exact,mah_estimate=mah_estimate,mah_parMcd=mah_parMcd,
NRandom=NRandom, n_refinements=n_refinements, sphcap_shrink=sphcap_shrink,
alpha_Dirichlet=alpha_Dirichlet, cooling_factor=cooling_factor,
cap_size=cap_size, output_option=output_option, ) # check if parameters are valid
option=self._determine_option(x,NRandom,output_option,exact=exact) # determine option number
if option>=2:
if evaluate_dataset:self.mahalanobisDirDS=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
else:self.mahalanobisDir=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
if option>=3:
self.allDepth=np.empty((self.distRef.shape[0],x.shape[0],NRandom))
if option==4:
self.allDirections=np.empty((self.distRef.shape[0],x.shape[0],NRandom,x.shape[1]))
for ind, d in enumerate(self.distRef):
DM=mtv.mahalanobis(
x,self.data[self.distribution==d],exact,mah_estimate.lower(),mah_parMcd,
solver=solver, NRandom=NRandom,
n_refinements=n_refinements, sphcap_shrink=sphcap_shrink,
alpha_Dirichlet=alpha_Dirichlet, cooling_factor=cooling_factor,
cap_size=cap_size, start=start, space=space,
line_solver=line_solver, bound_gc=bound_gc,option=option,
) #compute depth value
if evaluate_dataset==False:
if exact or option==1:self.mahalanobisDepth[ind]=DM # assign value - exact or option 1
elif option==2:self.mahalanobisDepth[ind],self.mahalanobisDir[ind]=DM # assign value option 2
elif option==3:self.mahalanobisDepth[ind],self.mahalanobisDir[ind],self.allDepth[ind]=DM # assign value option 3
elif option==4:self.mahalanobisDepth[ind],self.mahalanobisDir[ind],self.allDepth[ind],self.allDirections[ind],_=DM # assign value option 4
elif evaluate_dataset==True:
if exact or option==1:self.mahalanobisDepthDS[ind]=DM # assign value - exact or option 1
elif option==2:self.mahalanobisDepthDS[ind],self.mahalanobisDirDS[ind]=DM # assign value option 2
if self.distRef.shape[0]==1: #fix for one distribution
if evaluate_dataset:
self.mahalanobisDepthDS=self.mahalanobisDepthDS[0]
if option==2:self.mahalanobisDirDS=self.mahalanobisDirDS[0]
else:
self.mahalanobisDepth=self.mahalanobisDepth[0]
if option>=2:self.mahalanobisDir=self.mahalanobisDir[0]
if option>=3:self.allDepth=self.allDepth[0]
if option>=4:self.allDirections=self.allDirections[0]
if evaluate_dataset==False:
if exact or option==1:return self.mahalanobisDepth
if option==2:return self.mahalanobisDepth,self.mahalanobisDir
if option==3:return self.mahalanobisDepth,self.mahalanobisDir,self.allDepth
if option==4:return self.mahalanobisDepth,self.mahalanobisDir,self.allDepth,self.allDirections
elif evaluate_dataset==True:
if exact or option==1:return self.mahalanobisDepthDS
if option==2:return self.mahalanobisDepthDS,self.mahalanobisDirDS
[docs]
def aprojection(self,x:np.ndarray=None,solver: str = "neldermead", NRandom: int = 1000,
n_refinements: int = 10, sphcap_shrink: float = 0.5, alpha_Dirichlet: float = 1.25,
cooling_factor: float = 0.95,cap_size: int = 1, start: str = "mean", space: str = "sphere",
line_solver: str = "goldensection", bound_gc: bool = True,
output_option:Literal["lowest_depth","final_depth_dir",
"all_depth","all_depth_directions"]="lowest_depth", evaluate_dataset:bool=False,
CUDA:bool=False):
"""
Compute asymmetric projection depth
Parameters
----------
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
output_option : str
Determines the format of the output.
- ``"lowest_depth"`` : single numpy array
- ``"final_depth_dir"`` : tuple of numpy arrays
- ``"all_depth"`` : tuple of numpy arrays
- ``"all_depth_directions"`` : tuple of numpy arrays
Returns
----------
array_like or tuple of array_like
The first return is the lowest comuted depth regarding all explored directions in space.
The second return is the direction that best represents the analyzed point, the direction corresponfing to the lowest depth.
The third return is all the computed depth values with respect to all approximative directions.
The fourth return is all corresponding directions.
If ``output_option=="lowest_depth"`` returns:
array_like
- Lowest Asymmetrical Projection Detph
If ``output_option=="final_depth_dir"`` returns:
Tuple of array_like
- Lowest Asymmetrical Projection Detph
- Lowest depth respective sirection
If ``output_option=="all_depth"`` returns:
array_like
- Lowest Asymmetrical Projection Detph
- Lowest depth respective sirection
- All computed depths
If ``output_option=="all_depth_directions"`` returns:
array_like
- Lowest Asymmetrical Projection Detph
- Lowest depth respective sirection
- All computed depths
- All respective directions
"""
if evaluate_dataset==True: # Dataset evaluation
print("x value is set to the loaded dataset")
x=self.data
if output_option=="all_depth"or output_option=="all_depth_directions":
print(f"output_option is set to {output_option}, only possible for lowest_depth or final_depth_dir, \
automaticaly set to lowest_depth")
output_option="lowest_depth"
self.aprojectionDepthDS=np.empty((self.distRef.shape[0],x.shape[0]))
else:self.aprojectionDepth=np.empty((self.distRef.shape[0],x.shape[0]))
self._check_variables(x=x, solver=solver, NRandom=NRandom,n_refinements=n_refinements, sphcap_shrink=sphcap_shrink,
alpha_Dirichlet=alpha_Dirichlet, cooling_factor=cooling_factor,cap_size=cap_size,
) # check if parameters are valid
option=self._determine_option(x,NRandom,output_option) # determine option number
if option>=2:
if evaluate_dataset:self.aprojectionDirDS=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
else:self.aprojectionDir=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
if option>=3:
self.allDepth=np.empty((self.distRef.shape[0],x.shape[0],NRandom))
if option==4:
self.allDirections=np.empty((self.distRef.shape[0],x.shape[0],NRandom,x.shape[1]))
for ind,d in enumerate(self.distRef):
if CUDA and self.CUDA:DAP=mtv.aprojection(x=x,data=self.dataCuda[:,self.distribution==d],solver=solver,NRandom=NRandom,option=option,
n_refinements=n_refinements, sphcap_shrink=sphcap_shrink, alpha_Dirichlet=alpha_Dirichlet, cooling_factor=cooling_factor,
cap_size=cap_size,start=start,space=space,line_solver=line_solver,bound_gc=bound_gc,CUDA=CUDA,device=self.device) #compute depth value
else:DAP=mtv.aprojection(x=x,data=self.data[self.distribution==d],solver=solver,NRandom=NRandom,option=option,
n_refinements=n_refinements, sphcap_shrink=sphcap_shrink, alpha_Dirichlet=alpha_Dirichlet, cooling_factor=cooling_factor,
cap_size=cap_size,start=start,space=space,line_solver=line_solver,bound_gc=bound_gc,CUDA=CUDA,device=self.device) #compute depth value
if evaluate_dataset==False:
if option==1:self.aprojectionDepth[ind]=DAP # assign val option 1
elif option==2:self.aprojectionDepth[ind],self.aprojectionDir[ind]=DAP # assign value option 2
elif option==3:self.aprojectionDepth[ind],self.aprojectionDir[ind],self.allDepth[ind]=DAP # assign value option 3
elif option==4:self.aprojectionDepth[ind],self.aprojectionDir[ind],self.allDepth[ind],self.allDirections[ind],_=DAP # assign value option 4
elif evaluate_dataset==True:
if option==1:self.aprojectionDepthDS[ind]=DAP # assign val option 1
elif option==2:self.aprojectionDepthDS[ind],self.aprojectionDirDS[ind]=DAP # assign value option 2
if self.distRef.shape[0]==1: #fix for one distribution
if evaluate_dataset:
self.aprojectionDepthDS=self.aprojectionDepthDS[0]
if option==2:self.aprojectionDirDS=self.aprojectionDirDS[0]
else:
self.aprojectionDepth=self.aprojectionDepth[0]
if option>=2:self.aprojectionDir=self.aprojectionDir[0]
if option>=3:self.allDepth=self.allDepth[0]
if option>=4:self.allDirections=self.allDirections[0]
if evaluate_dataset==False: # return correct value
if option==1:return self.aprojectionDepth
if option==2:return self.aprojectionDepth,self.aprojectionDir
if option==3:return self.aprojectionDepth,self.aprojectionDir,self.allDepth
if option==4:return self.aprojectionDepth,self.aprojectionDir,self.allDepth,self.allDirections
elif evaluate_dataset==True:
if option==1:return self.aprojectionDepthDS
if option==2:return self.aprojectionDepthDS,self.aprojectionDirDS
[docs]
def betaSkeleton(self,x:np.ndarray=None, beta:int=2,distance: str = "Lp",
Lp_p: int = 2, mah_estimate: str = "moment", mah_parMcd: float = 0.75, evaluate_dataset:bool=False):
"""
Calculates the beta-skeleton depth.
Parameters
----------
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
Results
----------
Beta-skeleton depth : array_like
"""
if evaluate_dataset==True: # Dataset evaluation
print("x value is set to the loaded dataset")
x=self.data
self.betaSkeletonDepthDS=np.empty((self.distRef.shape[0],x.shape[0]))
else:
self.betaSkeletonDepth=np.empty((self.distRef.shape[0],x.shape[0]))
self._check_variables(x=x,mah_estimate=mah_estimate,mah_parMcd=mah_parMcd) #check validity
for ind,d in enumerate(self.distRef):
DB=mtv.betaSkeleton(x=x,data=self.data[self.distribution==d],beta=beta,distance=distance, Lp_p=Lp_p,
mah_estimate=mah_estimate,mah_parMcd=mah_parMcd) # compute depth
if evaluate_dataset==False: self.betaSkeletonDepth[ind]=DB
if evaluate_dataset==True: self.betaSkeletonDepthDS[ind]=DB
if self.distRef.shape[0]==1:
if evaluate_dataset==True:self.betaSkeletonDepthDS=self.betaSkeletonDepthDS[0]
else: self.betaSkeletonDepth=self.betaSkeletonDepth[0]
return self.betaSkeletonDepthDS if evaluate_dataset==True else self.betaSkeletonDepth
[docs]
def cexpchull(self,x: np.ndarray=None,solver:str= "neldermead",NRandom:int = 1000,
n_refinements:int = 10, sphcap_shrink:float = 0.5,
alpha_Dirichlet:float = 1.25,cooling_factor:float = 0.95,
cap_size:float = 1,start:str = "mean",space:str = "sphere",
line_solver:str = "goldensection",bound_gc:bool = True,
output_option:Literal["lowest_depth","final_depth_dir",
"all_depth","all_depth_directions"]="lowest_depth", evaluate_dataset:bool=False):
"""
Compute approximately the continuous explected convex hull depth of all samples w.r.t. the dataset.
Parameters
----------
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
Results
----------
Continuous explected convex hull depth : array_like
"""
if evaluate_dataset==True: # Dataset evaluation
print("x value is set to the loaded dataset")
x=self.data
if output_option=="all_depth"or output_option=="all_depth_directions":
print(f"output_option is set to {output_option}, only possible for lowest_depth or final_depth_dir, \
automaticaly set to lowest_depth")
output_option="lowest_depth"
self.cexpchullDepthDS=np.empty((self.distRef.shape[0],x.shape[0]))
else:self.cexpchullDepth=np.empty((self.distRef.shape[0],x.shape[0]))
self._check_variables(
x=x,NRandom =NRandom,output_option =output_option,n_refinements =n_refinements,
sphcap_shrink=sphcap_shrink,alpha_Dirichlet =alpha_Dirichlet,
cooling_factor=cooling_factor,cap_size =cap_size,
) # check if parameters are valid
option=self._determine_option(x,NRandom,output_option) # determine option number
if option>=2:
if evaluate_dataset:self.cexpchullDirDS=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
else:self.cexpchullDir=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
if option>=3:
self.allDepth=np.empty((self.distRef.shape[0],x.shape[0],NRandom))
if option==4:
self.allDirections=np.empty((self.distRef.shape[0],x.shape[0],NRandom,x.shape[1]))
for ind,d in enumerate(self.distRef):
DC=mtv.cexpchull(
x=x, data=self.data[self.distribution==d],solver=solver,NRandom=NRandom,option=option,n_refinements=n_refinements,
sphcap_shrink=sphcap_shrink,alpha_Dirichlet =alpha_Dirichlet,cooling_factor=cooling_factor,
cap_size =cap_size,start =start,space =space,line_solver =line_solver,bound_gc =bound_gc,
) # compute depth
if evaluate_dataset==False:
if option==1:self.cexpchullDepth[ind]=DC # assign value
elif option==2:self.cexpchullDepth[ind],self.cexpchullDir[ind]=DC # assign value
elif option==3:self.cexpchullDepth[ind],self.cexpchullDir[ind],self.allDepth[ind]=DC # assign value
elif option==4:self.cexpchullDepth[ind],self.cexpchullDir[ind],self.allDepth[ind],self.allDirections[ind],_=DC # assign value
if evaluate_dataset==True:
if option==1:self.cexpchullDepthDS[ind]=DC # assign value
elif option==2:self.cexpchullDepthDS[ind],self.cexpchullDirDS[ind]=DC # assign value
if self.distRef.shape[0]==1: #fix for one distribution
if evaluate_dataset:
self.cexpchullDepthDS=self.cexpchullDepthDS[0]
if option==2:self.cexpchullDirDS=self.cexpchullDirDS[0]
else:
self.cexpchullDepth=self.cexpchullDepth[0]
if option>=2:self.cexpchullDir=self.cexpchullDir[0]
if option>=3:self.allDepth=self.allDepth[0]
if option>=4:self.allDirections=self.allDirections[0]
if evaluate_dataset==False: # return correct value
if option==1:return self.cexpchullDepth
if option==2:return self.cexpchullDepth,self.cexpchullDir
if option==3:return self.cexpchullDepth,self.cexpchullDir,self.allDepth
if option==4:return self.cexpchullDepth,self.cexpchullDir,self.allDepth,self.allDirections
elif evaluate_dataset==True:
if option==1:return self.cexpchullDepthDS
if option==2:return self.cexpchullDepthDS,self.cexpchullDirDS
[docs]
def cexpchullstar(self,x: np.ndarray=None, solver: str = "neldermead", NRandom: int = 1000,
option: int = 1, n_refinements: int = 10, sphcap_shrink: float = 0.5,
alpha_Dirichlet: float = 1.25, cooling_factor: float = 0.95, cap_size: int = 1,
start: str = "mean", space: str = "sphere", line_solver: str = "goldensection", bound_gc: bool = True,
output_option:Literal["lowest_depth","final_depth_dir",
"all_depth","all_depth_directions"]="lowest_depth", evaluate_dataset:bool=False):
"""
Calculates approximately the continuous modified explected convex hull depth
Parameters
----------
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
output_option : str
Determines the format of the output.
- ``"lowest_depth"`` : single numpy array
- ``"final_depth_dir"`` : tuple of numpy arrays
- ``"all_depth"`` : tuple of numpy arrays
- ``"all_depth_directions"`` : tuple of numpy arrays
Returns
----------
array_like or tuple of array_like
The first return is the lowest comuted depth regarding all explored directions in space.
The second return is the direction that best represents the analyzed point, the direction corresponfing to the lowest depth.
The third return is all the computed depth values with respect to all approximative directions.
The fourth return is all corresponding directions.
If ``output_option=="lowest_depth"`` returns:
array_like
- Lowest Continuous Modified Explected Convex Hull Detph
If ``output_option=="final_depth_dir"`` returns:
Tuple of array_like
- Lowest Continuous Modified Explected Convex Hull Detph
- Lowest depth respective sirection
If ``output_option=="all_depth"`` returns:
array_like
- Lowest Continuous Modified Explected Convex Hull Detph
- Lowest depth respective sirection
- All computed depths
If ``output_option=="all_depth_directions"`` returns:
array_like
- Lowest Continuous Modified Explected Convex Hull Detph
- Lowest depth respective sirection
- All computed depths
- All respective directions
"""
if evaluate_dataset==True: # Dataset evaluation
print("x value is set to the loaded dataset")
x=self.data
if output_option=="all_depth"or output_option=="all_depth_directions":
print(f"output_option is set to {output_option}, only possible for lowest_depth or final_depth_dir, \
automaticaly set to lowest_depth")
output_option="lowest_depth"
self.cexpchullstarDepthDS=np.empty((self.distRef.shape[0],x.shape[0]))
else:self.cexpchullstarDepth=np.empty((self.distRef.shape[0],x.shape[0]))
self._check_variables(x=x,NRandom=NRandom, n_refinements=n_refinements, sphcap_shrink=sphcap_shrink,
alpha_Dirichlet=alpha_Dirichlet, cooling_factor= cooling_factor, cap_size=cap_size,
) # check if parameters are valid
option=self._determine_option(x,NRandom,output_option) # determine option number
if option>=2:
if evaluate_dataset:self.cexpchullstarDirDS=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
else:self.cexpchullstarDir=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
if option>=3:
self.allDepth=np.empty((self.distRef.shape[0],x.shape[0],NRandom))
if option==4:
self.allDirections=np.empty((self.distRef.shape[0],x.shape[0],NRandom,x.shape[1]))
for ind,d in enumerate(self.distRef):
DC=mtv.cexpchullstar(x=x,data=self.data[self.distribution==d], solver=solver, NRandom=NRandom, option=option, n_refinements=n_refinements,
sphcap_shrink=sphcap_shrink, alpha_Dirichlet=alpha_Dirichlet, cooling_factor=cooling_factor,
cap_size=cap_size,start=start, space=space, line_solver=line_solver, bound_gc=bound_gc)
if evaluate_dataset==False:
if option==1:self.cexpchullstarDepth[ind]=DC # assign value
elif option==2:self.cexpchullstarDepth[ind],self.cexpchullstarDir[ind]=DC # assign value
elif option==3:self.cexpchullstarDepth[ind],self.cexpchullstarDir[ind],self.allDepth[ind]=DC # assign value
elif option==4:self.cexpchullstarDepth[ind],self.cexpchullstarDir[ind],self.allDepth[ind],self.allDirections[ind],_=DC # assign value
if evaluate_dataset==True:
if option==1:self.cexpchullstarDepthDS[ind]=DC # assign value
elif option==2:self.cexpchullstarDepthDS[ind],self.cexpchullstarDirDS[ind]=DC # assign value
if self.distRef.shape[0]==1: #fix for one distribution
if evaluate_dataset:
self.cexpchullstarDepthDS=self.cexpchullstarDepthDS[0]
if option==2:self.cexpchullstarDirDS=self.cexpchullstarDirDS[0]
else:
self.cexpchullstarDepth=self.cexpchullstarDepth[0]
if option>=2:self.cexpchullstarDir=self.cexpchullstarDir[0]
if option>=3:self.allDepth=self.allDepth[0]
if option>=4:self.allDirections=self.allDirections[0]
if evaluate_dataset==False: # return correct value
if option==1:return self.cexpchullstarDepth
if option==2:return self.cexpchullstarDepth,self.cexpchullstarDir
if option==3:return self.cexpchullstarDepth,self.cexpchullstarDir,self.allDepth
if option==4:return self.cexpchullstarDepth,self.cexpchullstarDir,self.allDepth,self.allDirections
elif evaluate_dataset==True:
if option==1:return self.cexpchullstarDepthDS
if option==2:return self.cexpchullstarDepthDS,self.cexpchullstarDirDS
[docs]
def geometrical(self,x:np.ndarray=None,solver: str = "neldermead", NRandom: int = 1000, n_refinements: int = 10,
sphcap_shrink: float = 0.5, alpha_Dirichlet: float = 1.25, cooling_factor: float = 0.95,
cap_size: int = 1, start: str = "mean", space: str = "sphere", line_solver: str = "goldensection", bound_gc: bool = True,
output_option:Literal["lowest_depth","final_depth_dir",
"all_depth","all_depth_directions"]="lowest_depth", evaluate_dataset:bool=False):
"""
Compute geometrical depth
Parameters
----------
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
output_option : str
Determines the format of the output.
- ``"lowest_depth"`` : single numpy array
- ``"final_depth_dir"`` : tuple of numpy arrays
- ``"all_depth"`` : tuple of numpy arrays
- ``"all_depth_directions"`` : tuple of numpy arrays
Returns
----------
array_like or tuple of array_like
The first return is the lowest comuted depth regarding all explored directions in space.
The second return is the direction that best represents the analyzed point, the direction corresponfing to the lowest depth.
The third return is all the computed depth values with respect to all approximative directions.
The fourth return is all corresponding directions.
If ``output_option=="lowest_depth"`` returns:
array_like
- Lowest Geometrical Detph
If ``output_option=="final_depth_dir"`` returns:
Tuple of array_like
- Lowest Geometrical Detph
- Lowest depth respective sirection
If ``output_option=="all_depth"`` returns:
array_like
- Lowest Geometrical Detph
- Lowest depth respective sirection
- All computed depths
If ``output_option=="all_depth_directions"`` returns:
array_like
- Lowest Geometrical Detph
- Lowest depth respective sirection
- All computed depths
- All respective directions
"""
if evaluate_dataset==True: # Dataset evaluation
print("x value is set to the loaded dataset")
x=self.data
if output_option=="all_depth"or output_option=="all_depth_directions":
print(f"output_option is set to {output_option}, only possible for lowest_depth or final_depth_dir, \
automaticaly set to lowest_depth")
output_option="lowest_depth"
self.geometricalDepthDS=np.empty((self.distRef.shape[0],x.shape[0]))
else:self.geometricalDepth=np.empty((self.distRef.shape[0],x.shape[0]))
self._check_variables(
x=x, NRandom=NRandom, n_refinements=n_refinements, sphcap_shrink=sphcap_shrink,
alpha_Dirichlet=alpha_Dirichlet, cooling_factor=cooling_factor,cap_size=cap_size,
)# check if parameters are valid
option=self._determine_option(x,NRandom,output_option) # determine option number
if option>=2:
if evaluate_dataset:self.geometricalDirDS=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
else:self.geometricalDir=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
if option>=3:
self.allDepth=np.empty((self.distRef.shape[0],x.shape[0],NRandom))
if option==4:
self.allDirections=np.empty((self.distRef.shape[0],x.shape[0],NRandom,x.shape[1]))
for ind,d in enumerate(self.distRef):
DG=mtv.geometrical(x=x,data=self.data[self.distribution==d], solver=solver, NRandom=NRandom, option=option, n_refinements=n_refinements,
sphcap_shrink=sphcap_shrink, alpha_Dirichlet=alpha_Dirichlet, cooling_factor=cooling_factor,
cap_size=cap_size,start=start, space=space, line_solver=line_solver, bound_gc=bound_gc)
if evaluate_dataset==False:
if option==1:self.geometricalDepth[ind]=DG # assign value
elif option==2:self.geometricalDepth[ind],self.geometricalDir[ind]=DG # assign value
elif option==3:self.geometricalDepth[ind],self.geometricalDir[ind],self.allDepth[ind]=DG # assign value
elif option==4:self.geometricalDepth[ind],self.geometricalDir[ind],self.allDepth[ind],self.allDirections[ind],_=DG # assign value
if evaluate_dataset==True:
if option==1:self.geometricalDepthDS[ind]=DG # assign value
elif option==2:self.geometricalDepthDS[ind],self.geometricalDirDS[ind]=DG # assign value
if self.distRef.shape[0]==1: #fix for one distribution
if evaluate_dataset:
self.geometricalDepthDS=self.geometricalDepthDS[0]
if option==2:self.geometricalDirDS=self.geometricalDirDS[0]
else:
self.geometricalDepth=self.geometricalDepth[0]
if option>=2:self.geometricalDir=self.geometricalDir[0]
if option>=3:self.allDepth=self.allDepth[0]
if option>=4:self.allDirections=self.allDirections[0]
if evaluate_dataset==False: # return correct value
if option==1:return self.geometricalDepth
if option==2:return self.geometricalDepth,self.geometricalDir
if option==3:return self.geometricalDepth,self.geometricalDir,self.allDepth
if option==4:return self.geometricalDepth,self.geometricalDir,self.allDepth,self.allDirections
elif evaluate_dataset==True:
if option==1:return self.geometricalDepthDS
if option==2:return self.geometricalDepthDS,self.geometricalDirDS
[docs]
def halfspace(self, x:np.ndarray=None,exact: bool = True,method: str = "recursive",solver: str = "neldermead",
NRandom: int = 1000,n_refinements: int = 10,sphcap_shrink: float = 0.5,alpha_Dirichlet: float = 1.25,cooling_factor: float = 0.95,
cap_size: int = 1,start: str = "mean",space: str = "sphere",line_solver: str = "goldensection",bound_gc: bool = True,
CUDA:bool=False,output_option:Literal["lowest_depth","final_depth_dir",
"all_depth","all_depth_directions"]="lowest_depth", evaluate_dataset:bool=False):
"""
Compute Halfspace depth
Parameters
----------
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
output_option : str
Determines the format of the output.
- ``"lowest_depth"`` : single numpy array
- ``"final_depth_dir"`` : tuple of numpy arrays
- ``"all_depth"`` : tuple of numpy arrays
- ``"all_depth_directions"`` : tuple of numpy arrays
Returns
----------
array_like or tuple of array_like
The first return is the lowest comuted depth regarding all explored directions in space.
The second return is the direction that best represents the analyzed point, the direction corresponfing to the lowest depth.
The third return is all the computed depth values with respect to all approximative directions.
The fourth return is all corresponding directions.
If ``output_option=="lowest_depth"`` returns:
array_like
- Lowest Halfspace (Tukey) Detph
If ``output_option=="final_depth_dir"`` returns:
Tuple of array_like
- Lowest Halfspace (Tukey) Detph
- Lowest depth respective sirection
If ``output_option=="all_depth"`` returns:
array_like
- Lowest Halfspace (Tukey) Detph
- Lowest depth respective sirection
- All computed depths
If ``output_option=="all_depth_directions"`` returns:
array_like
- Lowest Halfspace (Tukey) Detph
- Lowest depth respective sirection
- All computed depths
- All respective directions
"""
if evaluate_dataset==True: # Dataset evaluation
print("x value is set to the loaded dataset")
x=self.data
if output_option=="all_depth"or output_option=="all_depth_directions":
print(f"output_option is set to {output_option}, only possible for lowest_depth or final_depth_dir, \
automaticaly set to lowest_depth")
output_option="lowest_depth"
self.halfspaceDepthDS=np.empty((self.distRef.shape[0],x.shape[0]))
else:self.halfspaceDepth=np.empty((self.distRef.shape[0],x.shape[0]))
CUDA=self._check_CUDA(CUDA,solver)
if CUDA:exact=False
self._check_variables(x=x,NRandom=NRandom,
n_refinements=n_refinements,sphcap_shrink=sphcap_shrink,
alpha_Dirichlet=alpha_Dirichlet,cooling_factor=cooling_factor,cap_size=cap_size,) # check if parameters are valid
option=self._determine_option(x,NRandom,output_option, CUDA=CUDA,exact=exact) # determine option number
if option>=2:
if evaluate_dataset:self.halfspaceDirDS=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
else:self.halfspaceDir=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
if option>=3:
self.allDepth=np.empty((self.distRef.shape[0],x.shape[0],NRandom))
if option==4:
self.allDirections=np.empty((self.distRef.shape[0],x.shape[0],NRandom,x.shape[1]))
for ind,d in enumerate(self.distRef):
if CUDA==True and self.CUDA==True:DH=mtv.halfspace(x=x,data=self.dataCuda[:,self.distribution==d],exact=exact,method=method,
solver=solver,NRandom=NRandom,option=option,n_refinements=n_refinements,sphcap_shrink=sphcap_shrink,
alpha_Dirichlet=alpha_Dirichlet,cooling_factor=cooling_factor,cap_size=cap_size,start=start,
space=space,line_solver=line_solver,bound_gc=bound_gc,CUDA=CUDA, device=self.device,
)
elif CUDA==False:DH=mtv.halfspace(x=x,data=self.data[self.distribution==d],exact=exact,method=method,
solver=solver,NRandom=NRandom,option=option,n_refinements=n_refinements,sphcap_shrink=sphcap_shrink,
alpha_Dirichlet=alpha_Dirichlet,cooling_factor=cooling_factor,cap_size=cap_size,start=start,
space=space,line_solver=line_solver,bound_gc=bound_gc,CUDA=CUDA,
)
if evaluate_dataset==False:
if option==1 or exact==True:self.halfspaceDepth[ind]=DH # assign value
elif option==2:self.halfspaceDepth[ind],self.halfspaceDir[ind]=DH # assign value
elif option==3:self.halfspaceDepth[ind],self.halfspaceDir[ind],self.allDepth[ind]=DH # assign value
elif option==4:self.halfspaceDepth[ind],self.halfspaceDir[ind],self.allDepth[ind],self.allDirections[ind],_=DH # assign value
if evaluate_dataset==True:
if option==1:self.halfspaceDepthDS[ind]=DH # assign value
elif option==2:self.halfspaceDepthDS[ind],self.halfspaceDirDS[ind]=DH # assign value
if self.distRef.shape[0]==1: #fix for one distribution
if evaluate_dataset:
self.halfspaceDepthDS=self.halfspaceDepthDS[0]
if option==2:self.halfspaceDirDS=self.halfspaceDirDS[0]
else:
self.halfspaceDepth=self.halfspaceDepth[0]
if option>=2:self.halfspaceDir=self.halfspaceDir[0]
if option>=3:self.allDepth=self.allDepth[0]
if option>=4:self.allDirections=self.allDirections[0]
if evaluate_dataset==False: # return correct value
if option==1:return self.halfspaceDepth
if option==2:return self.halfspaceDepth,self.halfspaceDir
if option==3:return self.halfspaceDepth,self.halfspaceDir,self.allDepth
if option==4:return self.halfspaceDepth,self.halfspaceDir,self.allDepth,self.allDirections
elif evaluate_dataset==True:
if option==1:return self.halfspaceDepthDS
if option==2:return self.halfspaceDepthDS,self.halfspaceDirDS
[docs]
def L2(self,x: np.ndarray=None, mah_estimate: str = 'moment', mah_parMcd: float = 0.75, evaluate_dataset:bool=False):
"""
Compute L2 depth
Parameters
----------
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
Results
----------
L2 depth : array_like
"""
if evaluate_dataset==True: # Dataset evaluation
print("x value is set to the loaded dataset")
x=self.data
self.L2DepthDS=np.zeros((self.distRef.shape[0], x.shape[0]))
else: # create self
self.L2Depth=np.zeros((self.distRef.shape[0], x.shape[0]))
self._check_variables(x=x,mah_estimate=mah_estimate, mah_parMcd=mah_parMcd) # check if parameters are valid
for ind,d in enumerate(self.distRef): # run distributions
DL2=mtv.L2(x=x,data=self.data[self.distribution==d],
mah_estimate=mah_estimate, mah_parMcd=mah_parMcd)
if evaluate_dataset:self.L2DepthDS[ind]=DL2
else:self.L2Depth[ind]=DL2
if self.distRef.shape[0]==1: # Fix size
if evaluate_dataset==True: self.L2DepthDS=self.L2DepthDS[0]
else: self.L2Depth=self.L2Depth[0]
return self.L2DepthDS if evaluate_dataset==True else self.L2Depth
[docs]
def potential(self,x:np.ndarray=None, pretransform: str = "1Mom", kernel: str = "EDKernel",
mah_parMcd: float = 0.75, kernel_bandwidth: int = 0, evaluate_dataset:bool=False):
"""
Compute potential depth
Parameters
----------
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
pretransform: str, default="1Mom"
The method of data scaling.
``'1Mom'`` or ``'NMom'`` for scaling using data moments.
``'1MCD'`` or ``'NMCD'`` for scaling using robust data moments (Minimum Covariance Determinant (MCD).
kernel: str, default="EDKernel"
``'EDKernel'`` for the kernel of type 1/(1+kernel.bandwidth*EuclidianDistance2(x,y)),
``'GKernel'`` [default and recommended] for the simple Gaussian kernel,
``'EKernel'`` exponential kernel: exp(-kernel.bandwidth*EuclidianDistance(x, y)),
``'VarGKernel'`` variable Gaussian kernel, where kernel.bandwidth is proportional to the depth.zonoid of a point.
kernel_bandwidth: int, default=0
the single bandwidth parameter of the kernel. If ``0`` - the Scott`s rule of thumb is used.
Results
----------
Potential depth : array_like
"""
if evaluate_dataset==True: # Dataset evaluation
print("x value is set to the loaded dataset")
x=self.data
self.potentialDepthDS=np.zeros((self.distRef.shape[0], x.shape[0]))
else: # create self
self.potentialDepth=np.zeros((self.distRef.shape[0], x.shape[0]))
self._check_variables(x=x,mah_parMcd=mah_parMcd)# check if parameters are valid
#x: Any, data: Any, pretransform: str = "1Mom", kernel: str = "EDKernel", mah_parMcd: float = 0.75, kernel_bandwidth: int = 0
for ind,d in enumerate(self.distRef):
DP=mtv.potential(x=x, data=self.data[self.distribution==d], pretransform=pretransform, kernel=kernel, mah_parMcd=mah_parMcd, kernel_bandwidth=kernel_bandwidth)
if evaluate_dataset==True: # Dataset evaluation
self.potentialDepthDS[ind]=DP
else:self.potentialDepth[ind]=DP
if self.distRef.shape[0]==1: # Fix size
if evaluate_dataset==True:self.potentialDepthDS=self.potentialDepthDS[0]
else: self.potentialDepth=self.potentialDepth[0]
return self.potentialDepthDS if evaluate_dataset==True else self.potentialDepth
[docs]
def projection(self,x:np.ndarray=None,solver: str = "neldermead",NRandom: int = 1000,n_refinements: int = 10,
sphcap_shrink: float = 0.5,alpha_Dirichlet: float = 1.25,cooling_factor: float = 0.95,
cap_size: int = 1,start: str = "mean",space: str = "sphere",line_solver: str = "goldensection",bound_gc: bool = True,
CUDA:bool=False, output_option:Literal["lowest_depth","final_depth_dir",
"all_depth","all_depth_directions"]="lowest_depth", evaluate_dataset:bool=False):
"""
Compute projection depth
Parameters
----------
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
output_option : str
Determines the format of the output.
- ``"lowest_depth"`` : single numpy array
- ``"final_depth_dir"`` : tuple of numpy arrays
- ``"all_depth"`` : tuple of numpy arrays
- ``"all_depth_directions"`` : tuple of numpy arrays
Returns
----------
array_like or tuple of array_like
The first return is the lowest comuted depth regarding all explored directions in space.
The second return is the direction that best represents the analyzed point, the direction corresponfing to the lowest depth.
The third return is all the computed depth values with respect to all approximative directions.
The fourth return is all corresponding directions.
If ``output_option=="lowest_depth"`` returns:
array_like
- Lowest Projection Detph
If ``output_option=="final_depth_dir"`` returns:
Tuple of array_like
- Lowest Projection Detph
- Lowest depth respective sirection
If ``output_option=="all_depth"`` returns:
array_like
- Lowest Projection Detph
- Lowest depth respective sirection
- All computed depths
If ``output_option=="all_depth_directions"`` returns:
array_like
- Lowest Projection Detph
- Lowest depth respective sirection
- All computed depths
- All respective directions
"""
if evaluate_dataset==True: # Dataset evaluation
print("x value is set to the loaded dataset")
x=self.data
if output_option=="all_depth"or output_option=="all_depth_directions":
print(f"output_option is set to {output_option}, only possible for lowest_depth or final_depth_dir, \
automaticaly set to lowest_depth")
output_option="lowest_depth"
self.projectionDepthDS=np.empty((self.distRef.shape[0],x.shape[0]))
else:self.projectionDepth=np.empty((self.distRef.shape[0],x.shape[0]))
self._check_variables(x=x,NRandom=NRandom,
n_refinements=n_refinements,sphcap_shrink=sphcap_shrink,
alpha_Dirichlet=alpha_Dirichlet,cooling_factor=cooling_factor,cap_size=cap_size,) # check if parameters are valid
CUDA=self._check_CUDA(CUDA,solver)
option=self._determine_option(x,NRandom,output_option,CUDA) # determine option number
if option>=2:
if evaluate_dataset:self.projectionDirDS=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
else:self.projectionDir=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
if option>=3:
self.allDepth=np.empty((self.distRef.shape[0],x.shape[0],NRandom))
if option==4:
self.allDirections=np.empty((self.distRef.shape[0],x.shape[0],NRandom,x.shape[1]))
for ind,d in enumerate(self.distRef):
if CUDA and self.CUDA:DP=mtv.projection(x=x,data=self.dataCuda[:,self.distribution==d],solver=solver,NRandom=NRandom,option=option,
n_refinements=n_refinements,sphcap_shrink=sphcap_shrink,
alpha_Dirichlet=alpha_Dirichlet,cooling_factor=cooling_factor,cap_size=cap_size,start=start,
space=space,line_solver=line_solver,bound_gc=bound_gc,CUDA=CUDA,device=self.device,
)
else:DP=mtv.projection(x=x,data=self.data[self.distribution==d],solver=solver,NRandom=NRandom,option=option,
n_refinements=n_refinements,sphcap_shrink=sphcap_shrink,
alpha_Dirichlet=alpha_Dirichlet,cooling_factor=cooling_factor,cap_size=cap_size,start=start,
space=space,line_solver=line_solver,bound_gc=bound_gc,CUDA=CUDA
)
if evaluate_dataset==False:
if option==1:self.projectionDepth[ind]=DP # assign value
elif option==2:self.projectionDepth[ind],self.projectionDir[ind]=DP # assign value
elif option==3:self.projectionDepth[ind],self.projectionDir[ind],self.allDepth[ind]=DP # assign value
elif option==4:self.projectionDepth[ind],self.projectionDir[ind],self.allDepth[ind],self.allDirections[ind],_=DP # assign value
if evaluate_dataset==True:
if option==1:self.projectionDepthDS[ind]=DP # assign value
elif option==2:self.projectionDepthDS[ind],self.projectionDirDS[ind]=DP # assign value
if self.distRef.shape[0]==1: #fix for one distribution
if evaluate_dataset:
self.projectionDepthDS=self.projectionDepthDS[0]
if option==2:self.projectionDirDS=self.projectionDirDS[0]
else:
self.projectionDepth=self.projectionDepth[0]
if option>=2:self.projectionDir=self.projectionDir[0]
if option>=3:self.allDepth=self.allDepth[0]
if option>=4:self.allDirections=self.allDirections[0]
if evaluate_dataset==False: # return correct value
if option==1:return self.projectionDepth
if option==2:return self.projectionDepth,self.projectionDir
if option==3:return self.projectionDepth,self.projectionDir,self.allDepth
if option==4:return self.projectionDepth,self.projectionDir,self.allDepth,self.allDirections
elif evaluate_dataset==True:
if option==1:return self.projectionDepthDS
if option==2:return self.projectionDepthDS,self.projectionDirDS
[docs]
def qhpeeling(self,x:np.ndarray=None, evaluate_dataset:bool=False):
"""
Calculates the convex hull peeling depth.
Parameters
----------
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
Results
----------
Convex hull peeling depth : array_like
"""
if evaluate_dataset==True: # Dataset evaluation
print("x value is set to the loaded dataset")
x=self.data
self.qhpeelingDepthDS=np.zeros((self.distRef.shape[0], x.shape[0]))
else: # create self
self.qhpeelingDepth=np.zeros((self.distRef.shape[0], x.shape[0]))
self._check_variables(x=x)# check if parameters are valid
for ind,d in enumerate(self.distRef): # run distribution
DQ=mtv.qhpeeling(x=x,data=self.data[self.distribution==d])
if evaluate_dataset==True:self.qhpeelingDepthDS[ind]=DQ
if evaluate_dataset==False:self.qhpeelingDepth[ind]=DQ
if self.distRef.shape[0]==1: # Fix size
if evaluate_dataset==True: self.qhpeelingDepthDS=self.qhpeelingDepthDS[0]
else: self.qhpeelingDepth=self.qhpeelingDepth[0]
return self.qhpeelingDepthDS if evaluate_dataset==True else self.qhpeelingDepth
[docs]
def simplicial(self,x:np.ndarray=None,exact:bool=True,k:float=0.05,evaluate_dataset:bool=False):
"""
Compute simplicial depth.
Parameters
----------
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
k: float, default=0.05
Number (``k > 1``) or portion (if ``0 < k < 1``) of simplices that are considered if ``exact=False``.
If ``k > 1``, then the algorithmic complexity is polynomial in d but is independent of the number of observations in data, given k.
If ``0 < k < 1``,then the algorithmic complexity is exponential in the number of observations in data,
but the calculation precision stays approximately the same.
Results
----------
Simplicial depth : array_like
"""
if evaluate_dataset==True: # Dataset evaluation
print("x value is set to the loaded dataset")
x=self.data
self.simplicialDepthDS=np.zeros((self.distRef.shape[0], x.shape[0]))
else: # create self
self.simplicialDepth=np.zeros((self.distRef.shape[0], x.shape[0]))
self._check_variables(x=x)# check if parameters are valid
for ind,d in enumerate(self.distRef):
DS=mtv.simplicial(x=x,data=self.data[self.distribution==d],exact=exact,k=k,seed=self.seed)
if evaluate_dataset==False:
self.simplicialDepth[ind]=DS
if evaluate_dataset==True:
self.simplicialDepthDS[ind]=DS
if self.distRef.shape[0]==1: # Fix size
if evaluate_dataset==True: self.simplicialDepthDS=self.simplicialDepthDS[0]
else: self.simplicialDepth=self.simplicialDepth[0]
return self.simplicialDepthDS if evaluate_dataset==True else self.simplicialDepth
[docs]
def simplicialVolume(self,x:np.ndarray=None,exact: bool = True, k: float = 0.05,
mah_estimate: str = "moment", mah_parMCD: float = 0.75,
evaluate_dataset:bool=False):
"""
Compute simplicial volume depth
Parameters
----------
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
k: float, default=0.05
Number (``k > 1``) or portion (if ``0 < k < 1``) of simplices that are considered if ``exact=False``.
If ``k > 1``, then the algorithmic complexity is polynomial in d but is independent of the number of observations in data, given k.
If ``0 < k < 1``,then the algorithmic complexity is exponential in the number of observations in data,
but the calculation precision stays approximately the same.
Results
----------
Simplicial volume depth : array_like
"""
if evaluate_dataset==True: # Dataset evaluation
print("x value is set to the loaded dataset")
x=self.data
self.simplicialVolumeDepthDS=np.zeros((self.distRef.shape[0], x.shape[0]))
else: # create self
self.simplicialVolumeDepth=np.zeros((self.distRef.shape[0], x.shape[0]))
self._check_variables(x=x)# check if parameters are valid
for ind,d in enumerate(self.distRef):
DS=mtv.simplicialVolume(x=x,data=self.data[self.distribution==d],
exact=exact,k=k,mah_estimate=mah_estimate,mah_parMCD=mah_parMCD,seed=self.seed)
if evaluate_dataset==True:self.simplicialVolumeDepthDS[ind]=DS
elif evaluate_dataset==False:self.simplicialVolumeDepth[ind]=DS
if self.distRef.shape[0]==1: # Fix size
if evaluate_dataset==True: self.simplicialVolumeDepthDS=self.simplicialVolumeDepthDS[0]
else: self.simplicialVolumeDepth=self.simplicialVolumeDepth[0]
return self.simplicialVolumeDepthDS if evaluate_dataset==True else self.simplicialVolumeDepth
[docs]
def spatial(self,x:np.ndarray=None,mah_estimate:str='moment',mah_parMcd:float=0.75,
evaluate_dataset:bool=False):
"""
Compute spatial depth
Parameters
----------
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
Results
----------
Spatial depth : array_like
"""
if evaluate_dataset==True: # Dataset evaluation
print("x value is set to the loaded dataset")
x=self.data
self.spatialDepthDS=np.zeros((self.distRef.shape[0], x.shape[0]))
else: # create self
self.spatialDepth=np.zeros((self.distRef.shape[0], x.shape[0]))
self._check_variables(x=x,mah_estimate=mah_estimate,mah_parMcd=mah_parMcd) # check if parameters are valid
for ind,d in enumerate(self.distRef):
DS=mtv.spatial(x,self.data[self.distribution==d],mah_estimate=mah_estimate,mah_parMcd=mah_parMcd)
if evaluate_dataset==False:self.spatialDepth[ind]=DS
if evaluate_dataset==True:self.spatialDepthDS[ind]=DS
if self.distRef.shape[0]==1: # Fix size
if evaluate_dataset==True: self.spatialDepthDS=self.spatialDepthDS[0]
else: self.spatialDepth=self.spatialDepth[0]
return self.spatialDepthDS if evaluate_dataset==True else self.spatialDepth
[docs]
def zonoid(self,x:np.ndarray=None, exact:bool=True,
solver="neldermead",NRandom=1000,n_refinements=10,
sphcap_shrink=0.5,alpha_Dirichlet=1.25,cooling_factor=0.95,cap_size=1,
start="mean",space="sphere",line_solver="goldensection",bound_gc=True,
output_option:Literal["lowest_depth","final_depth_dir",
"all_depth","all_depth_directions"]="lowest_depth",
evaluate_dataset:bool=False):
"""
Compute zonoide depth
Parameters
----------
x : {array-like} of shape (n_samples,d).
Samples matrix to compute depth
output_option : str
Determines the format of the output.
- ``"lowest_depth"`` : single numpy array
- ``"final_depth_dir"`` : tuple of numpy arrays
- ``"all_depth"`` : tuple of numpy arrays
- ``"all_depth_directions"`` : tuple of numpy arrays
Returns
----------
array_like or tuple of array_like
The first return is the lowest comuted depth regarding all explored directions in space.
The second return is the direction that best represents the analyzed point, the direction corresponfing to the lowest depth.
The third return is all the computed depth values with respect to all approximative directions.
The fourth return is all corresponding directions.
If ``output_option=="lowest_depth"`` returns:
array_like
- Lowest Zonoid Detph
If ``output_option=="final_depth_dir"`` returns:
Tuple of array_like
- Lowest Zonoid Detph
- Lowest depth respective sirection
If ``output_option=="all_depth"`` returns:
array_like
- Lowest Zonoid Detph
- Lowest depth respective sirection
- All computed depths
If ``output_option=="all_depth_directions"`` returns:
array_like
- Lowest Zonoid Detph
- Lowest depth respective sirection
- All computed depths
- All respective directions
"""
if evaluate_dataset==True: # Dataset evaluation
print("x value is set to the loaded dataset")
x=self.data
if output_option=="all_depth"or output_option=="all_depth_directions":
print(f"output_option is set to {output_option}, only possible for lowest_depth or final_depth_dir, \
automaticaly set to lowest_depth")
output_option="lowest_depth"
self.zonoidDepthDS=np.empty((self.distRef.shape[0],x.shape[0]))
else:self.zonoidDepth=np.empty((self.distRef.shape[0],x.shape[0]))
self._check_variables(x=x,exact=exact,
NRandom=NRandom,n_refinements=n_refinements,
sphcap_shrink=sphcap_shrink,alpha_Dirichlet=alpha_Dirichlet,cooling_factor=cooling_factor,
cap_size=cap_size,output_option=output_option) # check if parameters are valid
# seedZ=seed if seed!=self.seed else self.seed #set seed value to default if seed is not passed
option=self._determine_option(x,NRandom,output_option,exact=exact) # determine option number
if option>=2:
if evaluate_dataset:self.zonoidDirDS=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
else:self.zonoidDir=np.empty((self.distRef.shape[0],x.shape[0],x.shape[1]))
if option>=3:
self.allDepth=np.empty((self.distRef.shape[0],x.shape[0],NRandom))
if option==4:
self.allDirections=np.empty((self.distRef.shape[0],x.shape[0],NRandom,x.shape[1]))
for ind,d in enumerate(self.distRef):
DZ=mtv.zonoid(
x,self.data[self.distribution==d],seed=self.seed,exact=exact,
solver=solver,NRandom=NRandom,n_refinements=n_refinements,
sphcap_shrink=sphcap_shrink,alpha_Dirichlet=alpha_Dirichlet,cooling_factor=cooling_factor,
cap_size=cap_size,start=start,space=space,line_solver=line_solver,
bound_gc=bound_gc,option=option) # compute zonoid depth
if evaluate_dataset==False:
if exact or option==1:self.zonoidDepth[ind]=DZ # assign value
elif option==2:self.zonoidDepth[ind],self.zonoidDir[ind]=DZ # assign value
elif option==3:self.zonoidDepth[ind],self.zonoidDir[ind],self.allDepth[ind]=DZ # assign value
elif option==4:self.zonoidDepth[ind],self.zonoidDir[ind],self.allDepth[ind],self.allDirections[ind],_=DZ # assign value
if evaluate_dataset==True:
if exact or option==1:self.zonoidDepthDS[ind]=DZ # assign value
elif option==2:self.zonoidDepthDS[ind],self.zonoidDirDS[ind]=DZ # assign value
if self.distRef.shape[0]==1: #fix for one distribution
if evaluate_dataset:
self.zonoidDepthDS=self.zonoidDepthDS[0]
if option==2:self.zonoidDirDS=self.zonoidDirDS[0]
else:
self.zonoidDepth=self.zonoidDepth[0]
if option>=2:self.zonoidDir=self.zonoidDir[0]
if option>=3:self.allDepth=self.allDepth[0]
if option>=4:self.allDirections=self.allDirections[0]
if evaluate_dataset==False: # return correct value
if option==1:return self.zonoidDepth
if option==2:return self.zonoidDepth,self.zonoidDir
if option==3:return self.zonoidDepth,self.zonoidDir,self.allDepth
if option==4:return self.zonoidDepth,self.zonoidDir,self.allDepth,self.allDirections
elif evaluate_dataset==True:
if option==1:return self.zonoidDepthDS
if option==2:return self.zonoidDepthDS,self.zonoidDirDS
[docs]
def ACA(self,dim:int=2,
sample_size: None = None,
sample: None = None,
notion: str = "projection",
solver: str = "neldermead",
NRandom: int = 100,
n_refinements: int = 10,
sphcap_shrink: float = 0.5,
alpha_Dirichlet: float = 1.25,
cooling_factor: float = 0.95,
cap_size: int = 1,
start: str = "mean",
space: str = "sphere",
line_solver: str = "goldensection",
bound_gc: bool = True):
"""
Computes the abnormal component analysis
Parameters
----------
dim: int, default=2
Number of dimensions to keep in the reduction
sample_size: int, default=None
Size of the dataset (uniform sampling) to be used in the ACA calculation
sample: list[int], default=None
Indices for the dataset to be used in the computation
notion: str, default="projection"
Chosen notion for depth computation
Results
--------
ACA directions for dimensional reduction : array_like
The return respresents directions that best represents anomalies in the dataset.
"""
ACA_tab=mtv.ACA(X=self.data,dim=dim,
sample_size=sample_size,
sample=sample,
notion=notion,
solver=solver,
NRandom=NRandom,
n_refinements=n_refinements,
sphcap_shrink=sphcap_shrink,
alpha_Dirichlet=alpha_Dirichlet,
cooling_factor=cooling_factor,
cap_size=cap_size,
start=start,
space=space,
line_solver=line_solver,
bound_gc=bound_gc)
return ACA_tab
## Det and MCD
def _calcDet(self,mat:np.ndarray):
"""
Computes the determinant of a matrix
Parametres
-----------
mat: {array-like}
Matrix to compute the determinant
Results
-----------
Det: float
determinant of the matrix
"""
# self._check_variables
return mtv.calcDet(mat)
[docs]
def computeMCD(self,mat:np.ndarray=None, h:float=1, mfull: int = 10, nstep: int = 7, hiRegimeCompleteLastComp: bool = True)->None:
"""
Compute Minimum Covariance Determinant (MCD)
Parametres
-----------
mat: {array-like} or None, default=None
Matrix to compute MCD. If set to None, compute the MCD of the loaded dataset
h: int or float, default=1
Represents the amount of data of the dataset used to compute the MCD.
If the value is in the interval [0,1], it is treated as the percentage of dataset,
if the value is in the interval [n/2,n], it is treated as the amount of sample points.
It in the interval ]1,n/2[, the amount is rounded to n/2.
mfull: int, default=10
nstep: int, default=7
Amount of steps to compute MCD
hiRegimeCompleteLastComp: bool, default=True
Results
-----------
Minimum Covariance Determinant (MCD): {array-like}
"""
self._check_variables(h) # check if h is in the acceptable range
if h>0 and h<=1: # transform h in the good value for MCD function
h=int(h*self._nSamples)
elif h<self._nSamples/2:
h=int(self._nSamples/2)
else:h=int(h)
self.MCD=mtv.MCD(self.data,h=h,seed=self.seed,mfull=mfull, nstep=nstep, hiRegimeCompleteLastComp=hiRegimeCompleteLastComp)
return self.MCD
[docs]
def change_dataset(self,newDataset:np.ndarray,newY:np.ndarray=None, newDistribution:np.ndarray=None,keepOld:bool=False,)->None:
"""
Description
------------
Modify loaded dataset.
Arguments
---------
newDataset:np.ndarray
New dataset
newDistribution:np.ndarray|None, default=None,
Distribution related to the dataset
newY:np.ndarray|None, default=None,
Only for convention.
keepOld:bool, default=False,
Boolean to determine if current dataset is kept or not.
If True, newDataset is added in the end of the old one.
Returns
-------
None
"""
if keepOld: # keep old dataset
if self.data.shape[1]!=newDataset.shape[1]:
raise Exception(f"Dimensions must be the same, current dimension is {self.data.shape[1]} and new dimension is {newDataset.shape[1]}")
self.data=np.concatenate((self.data,newDataset), axis=0)
try:self.y=np.concatenate((self.y,newY)) # try for y
except:pass
try:
self.distribution=np.concatenate((self.distribution,newDistribution)) # try for distribution
self.distRef=np.unique(self.distribution)
except:
self.distribution=np.concatenate((self.distribution,np.repeat(0,newDataset.shape[0]))) # try for distribution
else:
self.data=newDataset
try:self.y=newY # try for y
except:pass
try:
self.distribution=newDistribution # try for distribution
self.distRef=np.unique(self.distribution)
except:
self.distribution=np.repeat(0,newDataset.shape[0])
self.distRef=np.unique(self.distribution)
return self
#### auxiliar functions ####
[docs]
def set_seed(self,seed:int=2801)->None:
"""Set seed for computation"""
self.seed=seed
def _check_dataset(self,)->None:
"""Check if the dataset is loaded"""
if type(self.data)==None:
raise Exception("A dataset must be loaded before depth computation")
def _create_selfRef(self,)->None:
"""Initialize all self.depth and self.directions"""
# main direction and depth values
self.cexpchullDir,self.cexpchullDepth=None, None
self.cexpchullstarDir,self.cexpchullstarDepth=None, None
self.geometricalDir,self.geometricalDepth=None, None
self.halfspaceDir,self.halfspaceDepth=None, None
self.mahalanobisDir,self.mahalanobisDepth=None, None
self.projectionDir,self.projectionDepth=None, None
self.aprojectionDir,self.aprojectionDepth=None,None
self.zonoidDir,self.zonoidDepth=None,None
self.potentialDepth=None
self.qhpeelingDepth,self.betaSkeletonDepth,self.L2Depth=None,None,None
self.simplicialVolumeDepth,self.simplicialDepth,self.spatialDepth=None,None,None
# depth and directions for dataset
self.cexpchullDirDS,self.cexpchullDepthDS=None, None
self.cexpchullstarDirDS,self.cexpchullstarDepthDS=None, None
self.geometricalDirDS,self.geometricalDepthDS=None, None
self.halfspaceDirDS,self.halfspaceDepthDS=None, None
self.mahalanobisDirDS,self.mahalanobisDepthDS=None, None
self.projectionDirDS,self.projectionDepthDS=None, None
self.aprojectionDirDS,self.aprojectionDepthDS=None,None
self.zonoidDirDS,self.zonoidDepthDS=None,None
self.potentialDepthDS=None
self.qhpeelingDepthDS,self.betaSkeletonDepthDS,self.L2DepthDS=None,None,None
self.simplicialVolumeDepthDS,self.simplicialDepthDS,self.spatialDepthDS=None,None,None
# MCD
self.MCD=None
# approximate depth and direction
# self.allDepth,self.allDirections,self.dirIndiex=None,None,None
def _determine_option(self,x:np.ndarray,NRandom:int,output_option:str,CUDA:bool=False, exact:bool=False)->int:
"""Determine which is the option number (following the 1 to 4 convention),
with a created criteria to compute option 4 - all depths and directions - of 1Gb to the direction matrix"""
if exact==True: return 1 # only depth
option=self.approxOption.index(output_option)+1 # define option for function return
memorySize=x.size*x.itemsize*NRandom*self.distRef.shape[0]//1048576 # compute an estimate of the memory amount used for option 4
if type(self.distribution)==type(None):
if memorySize>1 and option==4:
print("output_option demands too much memory, output_option automatically set to 'final_direction'")
option=2
if CUDA and option>2:
option=1
print(f"{output_option} is not available for CUDA computation, output_option automatically set to 'lowest_depth'")
return option
def _check_variables(self,**kwargs)->None:
"""Check if passed parameters have valid values"""
self._check_dataset() #check if dataset is loaded
for key, value in kwargs.items():
if key=="x":
assert(type(value)==np.ndarray),f"x must be a numpy array, got {type(value)}"
if key=="exact":
if type(value)!=bool or value not in [0,1]:
raise ValueError(f"exact must be a boolean or [0,1], got {value}.")
if key=="mah_estimate":
assert(type(value)==str), f"mah_estimate must be a string, got {type(value)}"
if value.lower() not in {"none", "moment", "mcd"}:
raise ValueError(f"Only mah_estimate possibilities are {{'none', 'moment', 'mcd'}}, got {value}.")
if key in ["NRandom","n_refinements"]:
assert type(value)==int, f"{key} must be an integer, got {type(value)}"
if key in ["mah_parMcd","sphcap_shrink","alpha_Dirichlet","cooling_factor"]:
assert type(value)==float, f"{key} must be a float, got {type(value)}"
if key=="cap_size":
assert type(value)==float or type(value)==int, f"cap_size must be a float or integer, got {type(value)}"
if key=="output_option":
assert type(value)==str, f"output_option must be a str, got {type(value)}"
if value not in self.approxOption:
raise ValueError(f"Only output_option possibilities are {self.approxOption}, got {value}.")
if key=="h":
assert type(value)==int or type(value)==float, f"h must be a float or int, got {type(value)}"
if value<=0 or value>self._nSamples:
raise ValueError(f"h must be in the range from 0 to {self._nSamples}, got {value}.")
def _check_CUDA(self,CUDA,solver):
if solver not in ["simplerandom", "refinedrandom"] and CUDA==True:
print(f"CUDA is only available for 'simplerandom', 'refinedrandom', solver is {solver}, CUDA is set to False")
return False
return CUDA
DepthEucl.mahalanobis.__doc__=docHelp.mahalanobis__doc__
DepthEucl.aprojection.__doc__=docHelp.aprojection__doc__
DepthEucl.betaSkeleton.__doc__=docHelp.betaSkeleton__doc__
DepthEucl.cexpchull.__doc__=docHelp.cexpchull__doc__
DepthEucl.cexpchullstar.__doc__=docHelp.cexpchullstar__doc__
DepthEucl.geometrical.__doc__=docHelp.geometrical__doc__
DepthEucl.halfspace.__doc__=docHelp.halfspace__doc__
DepthEucl.L2.__doc__=docHelp.L2__doc__
DepthEucl.potential.__doc__=docHelp.potential__doc__
DepthEucl.projection.__doc__=docHelp.projection__doc__
DepthEucl.qhpeeling.__doc__=docHelp.qhpeeling__doc__
DepthEucl.simplicial.__doc__=docHelp.simplicial__doc__
DepthEucl.simplicialVolume.__doc__=docHelp.simplicialVolume__doc__
DepthEucl.spatial.__doc__=docHelp.spatial__doc__
DepthEucl.zonoid.__doc__=docHelp.zonoid__doc__
DepthEucl.ACA.__doc__=docHelp.ACA__doc__
DepthEucl.change_dataset.__doc__=docHelp.change_dataset__doc__
# depth_mesh.__doc__=mtv.depth_mesh.__doc__
# depth_plot2d.__doc__=mtv.depth_plot2d.__doc__
# _calcDet.__doc__=mtv.calcDet.__doc__