U
    9hѱ                     @   sb  d dl Zd dlmZmZ d dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ d	d
lmZ d	dlmZ d	dlmZmZmZ d	dlmZ d	dlmZmZmZm Z m!Z! G dd de"Z#d4ddZ$d5ddZ%dd Z&dd Z'dd Z(d6d d!Z)d7d"d#Z*d8d$d%Z+d&d' Z,d(d) Z-d*d+ Z.d,d- Z/G d.d/ d/eeZ0d0d1 Z1G d2d3 d3Z2dS )9    N)BaseEstimatorClusterMixin)KDTreeBallTree)	coo_array)minimum_spanning_tree)Memory)Paralleldelayed)	cpu_count   )DistanceMetric)label)CondensedTreeSingleLinkageTreeApproximationGraph)approximate_predict)get_branchescondense_treerecurse_leaf_dfscompute_stabilitysimplify_branch_hierarchyc                   @   s$   e Zd ZdZeedZdddZdS )	BranchDetectionDataa  Input data for branch detection functionality.

    Recreates and caches internal data structures from the clustering stage.

    Parameters
    ----------

    data : array (n_samples, n_features)
        The original data set that was clustered.

    labels : array (n_samples)
        The cluster labels for every point in the data set.

    min_samples : int
        The min_samples value used in clustering.

    tree_type : string, optional
        Which type of space tree to use for core distance computation.
        One of:
            * ``kdtree``
            * ``balltree``

    metric : string, optional
        The metric used to determine distance for the clustering.
        This is the metric that will be used for the space tree to determine
        core distances etc.

    **kwargs :
        Any further arguments to the metric.

    Attributes
    ----------

    all_finite : bool
        Whether the data set contains any infinite or NaN values.

    finite_index : array (n_samples)
        The indices of the finite data points in the original data set.

    internal_to_raw : dict
        A mapping from the finite data set indices to the original data set.

    tree : KDTree or BallTree
        A space partitioning tree that can be queried for nearest neighbors if
        the metric is supported by a KDTree or BallTree.

    neighbors : array (n_samples, min_samples)
        The nearest neighbor for every non-noise point in the original data set.

    core_distances : array (n_samples)
        The core distance for every non-noise point in the original data set.

    dist_metric : callable
        Accelerated distance metric function.
    )kdtreeZballtreer   	euclideanc                 K   s  || _ || _|tj}	|sL|| }|	| }	dd ttt||D | _nd | _| j	| |	fd|i|| _
tj|f|| _t|	jd tj| _tj|	jd |fdtjd| _|dk}
|
 r| j
j|	|
 |d\}| j|
d d f< |d d df | j|
< d S )Nc                 S   s   i | ]\}}||qS  r   ).0xyr   r   c/var/www/html/CrowdFlow/HYROX/ble_analysis_env_py38/lib/python3.8/site-packages/hdbscan/branches.py
<dictcomp>e   s     z0BranchDetectionData.__init__.<locals>.<dictcomp>metricr   Zdtype)k)
all_finitefinite_indexastypenpZfloat64ziprangeleninternal_to_raw_tree_type_maptreer   Z
get_metricdist_metricfullshapenancore_distancesZint64	neighborsanyquery)selfdatar%   r&   labelsZmin_samplesZ	tree_typer!   kwargsZ
clean_dataZ
noise_maskZ	distancesr   r   r   __init__S   s*    
 zBranchDetectionData.__init__N)r   r   )__name__
__module____qualname____doc__r   r   r-   r;   r   r   r   r   r      s
   8
	  r   Fr0   eom        c                 C   sn  | j dkrtd| jdkr$td|dkr2| j}t|}tt|tjrT|dksdtd| dtt|tj	r~|dkstd| d|d	krtd
| d|dkrtd| d| j
}t|trt|dd}t| j}	| j}
| j}| jjs| jj}|
| }
|| }|dk}| j}|dk r>tt d | d}|rJt n
t|dd}|jtdgd|
|| j | jj| jj| jj| jj|	||d
\}}}}|jtdgd|||||||d\}}}}|t |
|||||||d\}
}}}}| jjsV| jj!}t"|| t#|| t| j}t$|
||}
t%|||}t$|||}t%|||}t%|||}|
|||||||||f
S )aR  
    Performs a flare-detection post-processing step to detect branches within
    clusters [1]_.

    For each cluster, a graph is constructed connecting the data points based on
    their mutual reachability distances. Each edge is given a centrality value
    based on how far it lies from the cluster's center. Then, the edges are
    clustered as if that centrality was a distance, progressively removing the
    'center' of each cluster and seeing how many branches remain.

    Parameters
    ----------

    clusterer : hdbscan.HDBSCAN
        The clusterer object that has been fit to the data with branch detection
        data generated.

    min_branch_size : int, optional (default=None)
        The minimum number of samples in a group for that group to be
        considered a branch; groupings smaller than this size will seen as
        points falling out of a branch. Defaults to the clusterer's min_cluster_size.

    allow_single_branch : bool, optional (default=False)
        Analogous to ``allow_single_cluster``.

    branch_detection_method : str, optional (default=``full``)
        Deteremines which graph is conctructed to detect branches with. Valid
        values are, ordered by increasing computation cost and decreasing
        sensitivity to noise:
        - ``core``: Contains the edges that connect each point to all other
          points within a mutual reachability distance lower than or equal to
          the point's core distance. This is the cluster's subgraph of the
          k-NN graph over the entire data set (with k = ``min_samples``).
        - ``full``: Contains all edges between points in each cluster with a
          mutual reachability distance lower than or equal to the distance of
          the most-distance point in each cluster. These graphs represent the
          0-dimensional simplicial complex of each cluster at the first point in
          the filtration where they contain all their points.

    branch_selection_method : str, optional (default='eom')
        The method used to select branches from the cluster's condensed tree.
        The standard approach for FLASC is to use the ``eom`` approach.
        Options are:
          * ``eom``
          * ``leaf``

    branch_selection_persistence: float, optional (default=0.0)
        An eccentricity persistence threshold. Branches with a persistence below
        this value will be merged. See [3]_ for more information. Note that this
        should not be used if we want to predict the cluster labels for new
        points in future (e.g. using approximate_predict), as the
        :func:`~flasc.prediction.approximate_predict` function is not aware of
        this argument.

    max_branch_size : int, optional (default=0)
        A limit to the size of clusters returned by the ``eom`` algorithm.
        Has no effect when using ``leaf`` clustering (where clusters are
        usually small regardless). Note that this should not be used if we
        want to predict the cluster labels for new points in future (e.g. using
        :func:`~flasc.prediction.approximate_predict`), as that function is
        not aware of this argument.

    label_sides_as_branches : bool, optional (default=False),
        When this flag is False, branches are only labelled for clusters with at
        least three branches (i.e., at least y-shapes). Clusters with only two
        branches represent l-shapes. The two branches describe the cluster's
        outsides growing towards each other. Enableing this flag separates these
        branches from each other in the produced labelling.

    Returns
    -------
    labels : np.ndarray, shape (n_samples, )
        Labels that differentiate all subgroups (clusters and branches). Noisy
        samples are given the label -1.

    probabilities : np.ndarray, shape (n_samples, )
        Probabilities considering both cluster and branch membership. Noisy
        samples are assigned 0.

    branch_labels : np.ndarray, shape (n_samples, )
        Branch labels for each point. Noisy samples are given the label -1.

    branch_probabilities : np.ndarray, shape (n_samples, )
        Branch membership strengths for each point. Noisy samples are
        assigned 0.

    branch_persistences : tuple (n_clusters)
        A branch persistence (eccentricity range) for each detected branch.

    cluster_approximation_graphs : tuple (n_clusters)
        The graphs used to detect branches in each cluster stored as a numpy
        array with four columns: source, target, centrality, mutual reachability
        distance. Points are labelled by their row-index into the input data.
        The edges contained in the graphs depend on the ``branch_detection_method``:
        - ``core``: Contains the edges that connect each point to all other
          points in a cluster within a mutual reachability distance lower than
          or equal to the point's core distance. This is an extension of the
          minimum spanning tree introducing only edges with equal distances. The
          reachability distance introduces ``num_points`` * ``min_samples`` of
          such edges.
        - ``full``: Contains all edges between points in each cluster with a
          mutual reachability distance lower than or equal to the distance of
          the most-distance point in each cluster. These graphs represent the
          0-dimensional simplicial complex of each cluster at the first point in
          the filtration where they contain all their points.

    cluster_condensed_trees : tuple (n_clusters)
        A condensed branch hierarchy for each cluster produced during the
        branch detection step. Data points are numbered with in-cluster ids.

    cluster_linkage_trees : tuple (n_clusters)
        A single linkage tree for each cluster produced during the branch
        detection step, in the scipy hierarchical clustering format.
        (see http://docs.scipy.org/doc/scipy/reference/cluster.hierarchy.html).
        Data points are numbered with in-cluster ids.

    cluster_centralities : np.ndarray, shape (n_samples, )
        Centrality values for each point in a cluster. Overemphasizes points'
        eccentricity within the cluster as the values are based on minimum
        spanning trees that do not contain the equally distanced edges resulting
        from the mutual reachability distance.

    cluster_points : list (n_clusters)
        The data point row indices for each cluster.

    References
    ----------
    .. [1] Bot, D. M., Peeters, J., Liesenborgs J., & Aerts, J. (2023, November).
       FLASC: A Flare-Sensitive Clustering Algorithm: Extending HDBSCAN* for
       Detecting Branches in Clusters. arXiv:2311.15887
    NzClusterer does not have an explicit minimum spannning tree! Try fitting with branch_detection_data=True or gen_min_span_tree=True set.zClusterer does not have branch detection data! Try fitting with branch_detection_data=True set, or run generate_branch_detection_data on the clusterer   z;min_branch_size must be an integer greater or equal to 2,  z given.rA   zFbranch_selection_persistence must be a float greater or equal to 0.0, )r@   leafz!Invalid branch_selection_method: z!
Should be one of: "eom", "leaf"
)corer0   z%Invalid ``branch_detection_method``: z"
Should be one of: "core", "full"
r   )verboserD   r   )n_jobsZ
max_nbytesthread_pool)ignore)run_coremin_branch_sizeallow_single_branchbranch_selection_methodbranch_selection_persistencemax_branch_size)label_sides_as_branches)&Z_min_spanning_tree
ValueErrorbranch_detection_data_Zmin_cluster_sizefloatr(   Z
issubdtypetypeintegerZfloatingmemory
isinstancestrr   r+   Zcluster_persistence_labels_probabilities_r%   r&   Zcore_dist_n_jobsmaxr   SequentialPoolr	   cache_compute_branch_linkager.   r4   r3   r/   _compute_branch_segmentation_update_labellingr,   _remap_point_lists_remap_edge_lists_remap_labels_remap_probabilities)Z	clustererrK   rL   branch_detection_methodrM   rN   rO   rP   rV   num_clustersr9   probabilitiesr&   rI   Znum_jobsrG   cluster_pointsZcluster_centralitiescluster_linkage_treesZcluster_approximation_graphsbranch_labelsbranch_probabilitiesZbranch_persistencesZcluster_condensed_treesr,   
num_pointsr   r   r   detect_branches_in_clusters|   s     














    rm   c
              	      s@   | fddt |D }
t|
r<tt|
 S dS )Nc                 3   s*   | ]"}t t |	V  qd S N)r
   "_compute_branch_linkage_of_cluster)r   
cluster_idcluster_labelscluster_probabilitiesr3   r/   min_spanning_treer4   rI   
space_treer   r   	<genexpr>  s   z*_compute_branch_linkage.<locals>.<genexpr>r   r   r   r   )r*   r+   tupler)   )rr   rs   rt   ru   r4   r3   r/   rf   rG   rI   resultr   rq   r   r^     s    r^   c	                 C   sH  | |k}	t |	d }
t j| jd dt jd}t jt|
t jd||
< | |dddf t j |k}| |dddf t j |k}|||@  }|j	j
|
 }t j|||	 dd}||d |dddf }d| }|rt||||
 |}n|jd  }t|||
||}t ||dddf t j ||dddf t j |dddf  tt|dddf |dddf t j|dddf t jfft|
t|
fd }t |j|j|j	f}|t |jd ddf }t|}|
|dddf t j |dddf< |
|dddf t j |dddf< |
|||fS )	z#Detect branches within one cluster.r   r"   r#   Nr   weightsaxisrB   )r1   )r(   wherer0   r1   doublearanger+   r'   intpr8   baseaveragepairwise_extract_core_cluster_graphTr[   _extract_full_cluster_graphmaximumr   r   Zint32ZtocooZcolumn_stackrowcolZargsortr   )rr   rs   rt   ru   r4   r3   r/   rI   rp   Zcluster_maskrh   in_cluster_idsZparent_maskZ
child_maskZcluster_mstpointsZcentroidZcentralitiesedgesmax_distZcentrality_mstZlinkage_treer   r   r   ro     sZ             >((ro   c                 C   s  | }|j d }|j d }|j d }tj|||  dftjd}|| dddf tj }	|| dddf tj }
t|	|
|d|df  t|	|
|d|df  ttj	|tjd|}||
  }t||||ddf  t||||ddf  | dddf |d|df< t|||ddf tj |||ddf tj ||ddf  tj||dddf dkddf dd	}|S )
zGCreate a graph connecting all points within each point's core distance.r   r      r#   NrB            r|   )r1   r(   zerosr~   r'   r   minimumr   repeatr   flattenunique)Zcluster_spanning_treer3   r4   r   Zcluster_spanning_tree_viewrl   Znum_neighborscountr   Zmst_parentsZmst_childrenZcore_parentZcore_childrenr   r   r   r     s*    


*r   c              	   C   s@  | j | jj| |d dd\}}tjt|tjd}t|D ]\}}	||  t|	7  < q<ttj	t|tj
d|}
|t| }t|}|dk|
|k @ ||k@ }tj| dftj
d}|
| |d d df< || |d d df< tt||d d df tj ||d d df tj || |d d d	f  |S )
Ng:0yE>T)rZreturn_distancer#   r   r   r   r   r   )Zquery_radiusr8   r   r(   r   r+   r   	enumerater   r   r~   Zconcatenatesumr   r'   )ru   r3   rh   r   r   Zchildren_mapZdistances_mapZnum_childrenichildrenZfull_parentsZfull_childrenZfull_distancesmaskr   r   r   r   r   #  s@    
  
 
r      c                    s6   | fdd| D }t |r2tt| S dS )z/Extracts branches from the linkage hierarchies.c              	   3   s&   | ]}t t| d V  qdS )rJ   N)r
   '_compute_branch_segmentation_of_cluster)r   cluster_linkage_treerL   rM   rN   rO   rK   r   r   rv   U  s   	z/_compute_branch_segmentation.<locals>.<genexpr>rw   )r+   rx   r)   )ri   rG   rK   rL   rM   rN   rO   resultsr   r   r   r_   K  s    
	r_   c                 C   sX   t | |}|dkrt||}t|}t|||||d\}}	}
t|
||dk < ||	|
|fS )z#Select branches within one cluster.rA   )rL   rM   rO   r   )r   r   r   r   r+   )r   rK   rL   rM   rN   rO   Zcondensed_treeZ	stabilityr9   rg   Zpersistencesr   r   r   r   e  s     	
 r   c                 C   s   t | }dtj|tjd }	| }
tj|tjd}tj|tjd}tj|tjd}d}t|||||D ]\}}}}}t |}|||< ||rdndkr||	|< |d7 }qh|| |	|< |||< |||< |
|  |7  < |
|  d  < ||d 7 }qh|	|
|||fS )z1Updates the labelling with the detected branches.r"   r#   r   r   rB   )r+   r(   onesr   copyr   r~   r)   )rr   rs   cluster_points_cluster_centralities_branch_labels_branch_probabilities_branch_persistences_rP   rl   r9   rg   rj   rk   Zbranch_centralitiesZ
running_idZ_pointsZ_centralitiesZ_labelsZ_probsZ_persZnum_branchesr   r   r   r`     s>    
r`   c                 C   s8   | D ].}|D ]$}||d  |d< ||d  |d< qqdS )aU  
    Takes a list of edge lists and replaces the internal indices to raw indices.

    Parameters
    ----------
    edge_lists : list[np.ndarray]
        A list of numpy edgelists with the first two columns indicating
        datapoints.
    internal_to_raw: dict
        A mapping from internal integer index to the raw integer index.
    r   r   Nr   )Z
edge_listsr,   graphedger   r   r   rb     s    rb   c                 C   s0   | D ]&}t t|D ]}|||  ||< qqdS )a/  
    Takes a list of points lists and replaces the internal indices to raw indices.

    Parameters
    ----------
    point_lists : list[np.ndarray]
        A list of numpy arrays with point indices.
    internal_to_raw: dict
        A mapping from internal integer index to the raw integer index.
    N)r*   r+   )Zpoint_listsr,   r   idxr   r   r   ra     s    ra   c                 C   s   t |d}| ||< |S )z7Creates new label array with infinite points set to -1.r"   )r(   r0   )Z
old_labelsr&   rl   Z
new_labelsr   r   r   rc     s    rc   c                 C   s   t |}| ||< |S )z<Creates new probability array with infinite points set to 0.)r(   r   )Z	old_probsr&   rl   Z	new_probsr   r   r   rd     s    
rd   c                   @   sr   e Zd ZdZddd	Zdd
dZdddZdddZdddZe	dd Z
e	dd Ze	dd Ze	dd ZdS )BranchDetectora  Performs a flare-detection post-processing step to detect branches within
    clusters [1]_.

    For each cluster, a graph is constructed connecting the data points based on
    their mutual reachability distances. Each edge is given a centrality value
    based on how far it lies from the cluster's center. Then, the edges are
    clustered as if that centrality was a distance, progressively removing the
    'center' of each cluster and seeing how many branches remain.

    Parameters
    ----------
    min_branch_size : int, optional (default=None)
        The minimum number of samples in a group for that group to be
        considered a branch; groupings smaller than this size will seen as
        points falling out of a branch. Defaults to the clusterer's min_cluster_size.

    allow_single_branch : bool, optional (default=False)
        Analogous to ``allow_single_cluster``.

    branch_detection_method : str, optional (default=``full``)
        Deteremines which graph is conctructed to detect branches with. Valid
        values are, ordered by increasing computation cost and decreasing
        sensitivity to noise:
        - ``core``: Contains the edges that connect each point to all other
          points within a mutual reachability distance lower than or equal to
          the point's core distance. This is the cluster's subgraph of the
          k-NN graph over the entire data set (with k = ``min_samples``).
        - ``full``: Contains all edges between points in each cluster with a
          mutual reachability distance lower than or equal to the distance of
          the most-distance point in each cluster. These graphs represent the
          0-dimensional simplicial complex of each cluster at the first point in
          the filtration where they contain all their points.

    branch_selection_method : str, optional (default='eom')
        The method used to select branches from the cluster's condensed tree.
        The standard approach for FLASC is to use the ``eom`` approach.
        Options are:
          * ``eom``
          * ``leaf``

    branch_selection_persistence: float, optional (default=0.0)
        An eccentricity persistence threshold. Branches with a persistence below
        this value will be merged. See [3]_ for more information. Note that this
        should not be used if we want to predict the cluster labels for new
        points in future (e.g. using approximate_predict), as the
        :func:`~flasc.prediction.approximate_predict` function is not aware of
        this argument.

    max_branch_size : int, optional (default=0)
        A limit to the size of clusters returned by the ``eom`` algorithm.
        Has no effect when using ``leaf`` clustering (where clusters are
        usually small regardless). Note that this should not be used if we
        want to predict the cluster labels for new points in future (e.g. using
        :func:`~flasc.prediction.approximate_predict`), as that function is
        not aware of this argument.

    label_sides_as_branches : bool, optional (default=False),
        When this flag is False, branches are only labelled for clusters with at
        least three branches (i.e., at least y-shapes). Clusters with only two
        branches represent l-shapes. The two branches describe the cluster's
        outsides growing towards each other. Enableing this flag separates these
        branches from each other in the produced labelling.

    Attributes
    ----------
    labels_ : np.ndarray, shape (n_samples, )
        Labels that differentiate all subgroups (clusters and branches). Noisy
        samples are given the label -1.

    probabilities_ : np.ndarray, shape (n_samples, )
        Probabilities considering both cluster and branch membership. Noisy
        samples are assigned 0.

    branch_labels_ : np.ndarray, shape (n_samples, )
        Branch labels for each point. Noisy samples are given the label -1.

    branch_probabilities_ : np.ndarray, shape (n_samples, )
        Branch membership strengths for each point. Noisy samples are
        assigned 0.

    branch_persistences_ : tuple (n_clusters)
        A branch persistence (eccentricity range) for each detected branch.

    cluster_approximation_graphs_ : tuple (n_clusters)
        The graphs used to detect branches in each cluster stored as a numpy
        array with four columns: source, target, centrality, mutual reachability
        distance. Points are labelled by their row-index into the input data.
        The edges contained in the graphs depend on the ``branch_detection_method``:
        - ``core``: Contains the edges that connect each point to all other
          points in a cluster within a mutual reachability distance lower than
          or equal to the point's core distance. This is an extension of the
          minimum spanning tree introducing only edges with equal distances. The
          reachability distance introduces ``num_points`` * ``min_samples`` of
          such edges.
        - ``full``: Contains all edges between points in each cluster with a
          mutual reachability distance lower than or equal to the distance of
          the most-distance point in each cluster. These graphs represent the
          0-dimensional simplicial complex of each cluster at the first point in
          the filtration where they contain all their points.

    cluster_condensed_trees_ : tuple (n_clusters)
        A condensed branch hierarchy for each cluster produced during the
        branch detection step. Data points are numbered with in-cluster ids.

    cluster_linkage_trees_ : tuple (n_clusters)
        A single linkage tree for each cluster produced during the branch
        detection step, in the scipy hierarchical clustering format.
        (see http://docs.scipy.org/doc/scipy/reference/cluster.hierarchy.html).
        Data points are numbered with in-cluster ids.

    cluster_centralities_ : np.ndarray, shape (n_samples, )
        Centrality values for each point in a cluster. Overemphasizes points'
        eccentricity within the cluster as the values are based on minimum
        spanning trees that do not contain the equally distanced edges resulting
        from the mutual reachability distance.

    cluster_points_ : list (n_clusters)
        The data point row indices for each cluster.

    References
    ----------
    .. [1] Bot, D. M., Peeters, J., Liesenborgs J., & Aerts, J. (2023, November).
       FLASC: A Flare-Sensitive Clustering Algorithm: Extending HDBSCAN* for
       Detecting Branches in Clusters. arXiv:2311.15887
    NFr0   r@   rA   r   c                 C   sF   || _ || _|| _|| _|| _|| _|| _d | _d | _d | _	d | _
d S rn   )rK   rL   re   rM   rN   rO   rP   _cluster_approximation_graphs_cluster_condensed_trees_cluster_linkage_trees_branch_exemplars)r7   rK   rL   re   rM   rN   rO   rP   r   r   r   r;   b  s    
zBranchDetector.__init__c                 C   sF   || _ |  }t|f|\
| _| _| _| _| _| _| _	| _
| _| _| S )aB  
        Perform a flare-detection post-processing step to detect branches within
        clusters.

        Parameters
        ----------
        X : HDBSCAN
            A fitted HDBSCAN object with branch detection data generated.

        Returns
        -------
        self : object
            Returns self.
        )
_clusterer
get_paramsrm   rY   rZ   r   r   r   r   r   r   r   r   )r7   Xr   r:   r   r   r   fity  s    
zBranchDetector.fitc                 C   s   |  || | jS )a  
        Perform a flare-detection post-processing step to detect branches within
        clusters [1]_.

        Parameters
        ----------
        X : HDBSCAN
            A fitted HDBSCAN object with branch detection data generated.

        Returns
        -------
        labels : ndarray, shape (n_samples, )
            subgroup labels differentiated by cluster and branch.
        )r   rY   )r7   r   r   r   r   r   fit_predict  s    zBranchDetector.fit_predictc                 C   sz   | j dkrtd| jjdkr.|dkr.td|dkr>td|dkrN| jj}| j |k}|| }| j| }tj||ddS )a  Provides an approximate representative point for a given branch.
        Note that this technique assumes a euclidean metric for speed of
        computation. For more general metrics use the ``weighted_medoid`` method
        which is slower, but can work with the metric the model trained with.

        Parameters
        ----------
        label_id: int
            The id of the cluster to compute a centroid for.

        data : np.ndarray (n_samples, n_features), optional (default=None)
            A dataset to use instead of the raw data that was clustered on.

        Returns
        -------
        centroid: array of shape (n_features,)
            A representative centroid for cluster ``label_id``.
        NModel has not been fit to dataRaw data not availabler"   MCannot calculate weighted centroid for -1 cluster since it is a noise clusterr   rz   )rY   AttributeErrorr   	_raw_datarQ   rZ   r(   r   )r7   label_idr8   r   cluster_datacluster_membership_strengthsr   r   r   weighted_centroid  s    


z BranchDetector.weighted_centroidc           	      C   s   | j dkrtd| jjdkr.|dkr.td|dkr>td|dkrN| jj}| j |k}|| }| j| }| jjj}||| }t	
|jdd}|| S )a  Provides an approximate representative point for a given branch.

        Note that this technique can be very slow and memory intensive for large
        clusters. For faster results use the ``weighted_centroid`` method which
        is faster, but assumes a euclidean metric.

        Parameters
        ----------
        label_id: int
            The id of the cluster to compute a medoid for.

        data : np.ndarray (n_samples, n_features), optional (default=None)
            A dataset to use instead of the raw data that was clustered on.

        Returns
        -------
        centroid: array of shape (n_features,)
            A representative medoid for cluster ``label_id``.
        Nr   r   r"   r   r   r   )rY   r   r   r   rQ   rZ   rR   r/   r   r(   Zargminr   )	r7   r   r8   r   r   r   r/   Zdist_matZmedoid_indexr   r   r   weighted_medoid  s"    



zBranchDetector.weighted_medoidc              
   C   sB   | j dkrtdt| j | j| j| jj| jj| j| j| j| jj		S )@See :class:`~hdbscan.branches.BranchDetector` for documentation.NzDNo cluster approximation graph was generated; try running fit first.)
r   r   r   rY   rZ   r   r   r   r   r   r7   r   r   r   cluster_approximation_graph_  s    
z+BranchDetector.cluster_approximation_graph_c                    s&    j dkrtd fdd j D S )r   NzANo cluster condensed trees were generated; try running fit first.c                    s   g | ]}t | j jqS r   )r   rM   rL   r   r.   r   r   r   
<listcomp>  s   z;BranchDetector.cluster_condensed_trees_.<locals>.<listcomp>)r   r   r   r   r   r   cluster_condensed_trees_
  s    

z'BranchDetector.cluster_condensed_trees_c                 C   s"   | j dkrtddd | j D S )r   Nz?No cluster linkage trees were generated; try running fit first.c                 S   s   g | ]}t |qS r   )r   r   r   r   r   r     s     z9BranchDetector.cluster_linkage_trees_.<locals>.<listcomp>)r   r   r   r   r   r   cluster_linkage_trees_  s
    
z%BranchDetector.cluster_linkage_trees_c                 C   s\  | j dk	r| j S | jjdkr$td| jdkr6tdt| j}dd | jD }dd | jD }dg| | _ t| jD ]\}}|| }t|| j	rdndkrqvg | j |< | j| }|D ]}t
jg t
jd	}	t|| t
|D ]J}
|d
 |d |
k  }|d |d |
k|d
 |k@  }t
|	|g}	q||	 }| j | | jj|ddf  qqv| j S )r   Nz:Branch exemplars not available with precomputed distances.z,No branches detected; try running fit first.c                 S   s   g | ]}||d  dk qS )Z
child_sizer   r   r   Zbranch_treer   r   r   r   ,  s   z4BranchDetector.branch_exemplars_.<locals>.<listcomp>c                 S   s   g | ]}t | qS r   )sortedZ_select_clustersr   r   r   r   r   0  s   r   rB   r#   Z
lambda_valparentchild)r   r   r   r   r   r+   r   r   r   rP   r(   arrayr   r   r[   Zhstackappend)r7   rf   Zbranch_cluster_treesZselected_branch_idsr   r   Zselected_branchesZraw_condensed_treebranchr   rC   Zleaf_max_lambda
candidatesZidsr   r   r   branch_exemplars_  sJ    







$z BranchDetector.branch_exemplars_)NFr0   r@   rA   r   F)N)N)N)N)r<   r=   r>   r?   r;   r   r   r   r   propertyr   r   r   r   r   r   r   r   r     s,           

 

$
(


r   c                 C   s  t | j|dd\}}}t|}tj|tjd}tj|tjd}tj|tjd}tj|tjd}	| j	shdnd}
t
t|||D ]\}\}}}|dk rd||< q|t| j| |
kr|||< |||< q|| j| ||< | j| ||< | j| |	|< ||	|  d ||< q|||||||	fS )aC  Predict the cluster and branch label of new points.

    Extends ``approximate_predict`` to also predict in which branch
    new points lie (if the cluster they are part of has branches).

    Parameters
    ----------
    branch_detector : BranchDetector
        A clustering object that has been fit to vector inpt data.

    points_to_predict : array, or array-like (n_samples, n_features)
        The new data points to predict cluster labels for. They should
        have the same dimensionality as the original dataset over which
        clusterer was fit.

    Returns
    -------
    labels : array (n_samples,)
        The predicted cluster and branch labels.

    probabilities : array (n_samples,)
        The soft cluster scores for each.

    cluster_labels : array (n_samples,)
        The predicted cluster labels.

    cluster_probabilities : array (n_samples,)
        The soft cluster scores for each.

    branch_labels : array (n_samples,)
        The predicted cluster labels.

    branch_probabilities : array (n_samples,)
        The soft cluster scores for each.
    T)Zreturn_connecting_pointsr#   rB   r   r   r"   )r   r   r+   r(   emptyr   r   r~   r   rP   r   r)   r   rY   r   r   )Zbranch_detectorZpoints_to_predictrr   rs   Zconnecting_pointsZnum_predictr9   rg   rj   rk   Zmin_num_branchesr   r   ZprobZconnecting_pointr   r   r   approximate_predict_branchP  s@    %  


r   c                   @   s    e Zd ZdZdd Zdd ZdS )r\   z6API of a Joblib Parallel pool but sequential executionc                 C   s
   d| _ d S )Nr   )rF   r   r   r   r   r;     s    zSequentialPool.__init__c                 C   s   dd |D S )Nc                 S   s   g | ]\}}}|||qS r   r   )r   Zfunargsr:   r   r   r   r     s     z+SequentialPool.__call__.<locals>.<listcomp>r   )r7   jobsr   r   r   __call__  s    zSequentialPool.__call__N)r<   r=   r>   r?   r;   r   r   r   r   r   r\     s   r\   )NFr0   r@   rA   r   F)F)r   Fr@   rA   r   )r   Fr@   rA   r   )F)3numpyr(   Zsklearn.baser   r   Zsklearn.neighborsr   r   Zscipy.sparser   Zscipy.sparse.csgraphr   Zjoblibr   r	   r
   Zjoblib.parallelr   Zdist_metricsr   Z_hdbscan_linkager   Zplotsr   r   r   Z
predictionr   Z_hdbscan_treer   r   r   r   r   objectr   rm   r^   ro   r   r   r_   r   r`   rb   ra   rc   rd   r   r   r\   r   r   r   r   <module>   sf   	f       
  + 
B'+     
     
# 
3  oI