U
    9hO                     @   s   d dl Zd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ ddlmZmZmZ d ZdZd	Zd
Zdd Zdd Zdd ZG dd deZdd Zdd ZG dd deZG dd deZG dd dZdS )    N)
dendrogram)TSNE)PCA)pairwise_distances)warn   )compute_stabilitylabelling_at_cutrecurse_leaf_dfs      c                 C   s:   g }|g}|r6| | | d t| d |  }q
|S )zK
    Perform a breadth first search on a tree in condensed tree format
    childparent)extendnpisintolist)treeZbfs_rootresultZ
to_process r   `/var/www/html/CrowdFlow/HYROX/ble_analysis_env_py38/lib/python3.8/site-packages/hdbscan/plots.py_bfs_from_cluster_tree   s    
r   c                    sB     d |k d }t |dkr&|gS t fdd|D g S d S )Nr   r   r   c                    s   g | ]}t  |qS r   )r
   .0r   cluster_treer   r   
<listcomp>(   s     z%_recurse_leaf_dfs.<locals>.<listcomp>)lensum)r   Zcurrent_nodechildrenr   r   r   _recurse_leaf_dfs#   s    r    c                 C   sB   | | d dk }|j d dkr,| d  gS |d  }t||S )N
child_sizer   r   r   )shapeminr    )Zcondensed_treer   rootr   r   r   _get_leaves*   s
    r%   c                	   @   sN   e Zd ZdZdddZddd	Zd
d ZdddZdd Zdd Z	dd Z
dS )CondensedTreeaa  The condensed tree structure, which provides a simplified or smoothed version
    of the :class:`~hdbscan.plots.SingleLinkageTree`.

    Parameters
    ----------
    condensed_tree_array : numpy recarray from :class:`~hdbscan.HDBSCAN`
        The raw numpy rec array version of the condensed tree as produced
        internally by hdbscan.

    cluster_selection_method : string, optional (default 'eom')
        The method of selecting clusters. One of 'eom' or 'leaf'

    allow_single_cluster : Boolean, optional (default False)
        Whether to allow the root cluster as the only selected cluster

    eomFc                 C   s   || _ || _|| _d S N)	_raw_treecluster_selection_methodallow_single_cluster)selfZcondensed_tree_arrayr*   r+   r   r   r   __init__D   s    zCondensedTree.__init__r      c           '         s  t | j}| jd  }| jd  }t|tjr<| i}n$tt| fddt	t
|D }|di}t	||d dD ]}	| jddg }
|
| jd |	k| jd	 dk@  }
t
|
d dkrx|
d \}}t|| || g||	< |
d d
 ||< |
d d ||< qxg }g }g }g }i }t| j| jd |k d	 }|rDt|}t	||d dD ]}d
d
d
d
g||< | j| jd |k }t|d	 }|| }|}|d  }t||d |k d	 }|rt|}t|}t|}t|| }|| }|| | |d  || t< || | |d  || t< || || t< t|d || t< |}|}t|d D ]}|| }|d |kr|| |ks|d |kr||| |  ||d |  || || |}|}|r"t||d	  } | dkrtt||d	  }nd}n||d	 8 }|d }qjqTg }!g }"| j| jd	 dk D ]}|d }#|d }$|d	 }%|rt|%}%t||$ ||#  }&|!||# | ||$ | |&|%d   g |"||$ ||$ g qZ|||||!|"|dS )a  Generates data for use in plotting the 'icicle plot' or dendrogram
        plot of the condensed tree generated by HDBSCAN.

        Parameters
        ----------
        leaf_separation : float, optional
                          How far apart to space the final leaves of the
                          dendrogram. (default 1)

        log_size : boolean, optional
                   Use log scale for the 'size' of clusters (i.e. number of
                   points in the cluster at a given lambda value).
                   (default False)

        max_rectangles_per_icicle : int, optional
            To simplify the plot this method will only emit
            ``max_rectangles_per_icicle`` bars per branch of the dendrogram.
            This ensures that we don't suffer from massive overplotting in
            cases with a lot of data points.

        Returns
        -------
        plot_data : dict
                    Data associated to bars in a bar plot:
                        `bar_centers` x coordinate centers for bars
                        `bar_tops` heights of bars in lambda scale
                        `bar_bottoms` y coordinate of bottoms of bars
                        `bar_widths` widths of the bars (in x coord scale)
                        `bar_bounds` a 4-tuple of [left, right, bottom, top]
                                     giving the bounds on a full set of
                                     cluster bars
                    Data associates with cluster splits:
                        `line_xs` x coordinates for horizontal dendrogram lines
                        `line_ys` y coordinates for horizontal dendrogram lines
        r   c                    s   g | ]} | qS r   r   r   xleaf_separationr   r   r   {   s   z/CondensedTree.get_plot_data.<locals>.<listcomp>g        r   r   
lambda_valr!   r          @g{Gz?)bar_centersbar_topsbar_bottoms
bar_widthsline_xsline_yscluster_bounds)r%   r)   maxr#   
isinstancer   Zint64dictzipranger   meanr   logfloatCB_LEFTCB_RIGHT	CB_BOTTOMCB_TOPZargsortappendexpsign)'r,   r2   log_sizemax_rectangle_per_icicleleavesZ	last_leafr$   Zcluster_x_coordsZcluster_y_coordsclustersplit
left_childright_childr6   r7   r8   r9   r<   ZscalingcZ
c_childrenZcurrent_sizeZcurrent_lambdaZcluster_max_sizeZcluster_max_lambdaZcluster_min_sizeZtotal_size_changeZstep_size_changeZlast_step_sizeZlast_step_lambdairowZexp_sizer:   r;   r   r   r!   rK   r   r1   r   get_plot_dataJ   s    '

















zCondensedTree.get_plot_datac                    s  | j dkrt| j| jr,t dd}nt ddd d }| j| jd dk }dd |D  |D ]n}|d	 |k}tfd
d|d | D }|| krd |< ||< qjt||D ]}||krd |< qqjt fdd D S | j dkrt	| jS t
dd S )Nr'   T)reverser3   r!   r   c                 S   s   i | ]
}|d qS )Tr   r   rO   r   r   r   
<dictcomp>   s      z2CondensedTree._select_clusters.<locals>.<dictcomp>r   c                    s   g | ]} | qS r   r   r   )	stabilityr   r   r      s   z2CondensedTree._select_clusters.<locals>.<listcomp>r   Fc                    s   g | ]} | r|qS r   r   rX   )
is_clusterr   r   r     s   leafzEInvalid Cluster Selection Method: %s
Should be one of: "eom", "leaf"
)r*   r   r)   r+   sortedkeysr   r   r   r%   
ValueError)r,   Z	node_listr   nodeZchild_selectionZsubtree_stabilityZsub_noder   )r[   rZ   r   _select_clusters   s0    




zCondensedTree._select_clustersviridisNTc
               	      s|  zddl m}
 W n tk
r,   tdY nX | j|||	d |dkr|
jj||
dt d d d  fdd	 d D }nd
}|dkr|
	 }|j
 d  d  d  d |ddd g }t d  d D ]\}}|| || q|j|d
dd |rzddlm} W n tk
rB   tdY nX |  }t fdd	|D }t| s|td t d  d g}|t| }tt|t|g}ttj|ddgd}t|D ].\}} d | }|t |t  }|t |t  }t|t |t gt|t |t gf}t|d sX|d |f}t|sh|}d| }||k r~|}|dk	rt |t |kr|| }nd}||d| d| d|d d!}|r|j!t"|||d d"|  |d d#|  fd$d%d& |#| q|r@|
j$|d'}|r4|j%&d( n|j%&d) |'g  d*D ]}|j(| )d+ qN|*  |&d, |S )-a
  Use matplotlib to plot an 'icicle plot' dendrogram of the condensed tree.

        Effectively this is a dendrogram where the width of each cluster bar is
        equal to the number of points (or log of the number of points) in the cluster
        at the given lambda value. Thus bars narrow as points progressively drop
        out of clusters. The make the effect more apparent the bars are also colored
        according the the number of points (or log of the number of points).

        Parameters
        ----------
        leaf_separation : float, optional (default 1)
                          How far apart to space the final leaves of the
                          dendrogram.

        cmap : string or matplotlib colormap, optional (default viridis)
               The matplotlib colormap to use to color the cluster bars.


        select_clusters : boolean, optional (default False)
                          Whether to draw ovals highlighting which cluster
                          bar represent the clusters that were selected by
                          HDBSCAN as the final clusters.

        label_clusters : boolean, optional (default False)
                         If select_clusters is True then this determines
                         whether to draw text labels on the clusters.

        selection_palette : list of colors, optional (default None)
                            If not None, and at least as long as
                            the number of clusters, draw ovals
                            in colors iterating through this palette.
                            This can aid in cluster identification
                            when plotting.

        axis : matplotlib axis or None, optional (default None)
               The matplotlib axis to render to. If None then a new axis
               will be generated. The rendered axis will be returned.


        colorbar : boolean, optional (default True)
                   Whether to draw a matplotlib colorbar displaying the range
                   of cluster sizes as per the colormap.

        log_size : boolean, optional (default False)
                   Use log scale for the 'size' of clusters (i.e. number of
                   points in the cluster at a given lambda value).


        max_rectangles_per_icicle : int, optional (default 20)
            To simplify the plot this method will only emit
            ``max_rectangles_per_icicle`` bars per branch of the dendrogram.
            This ensures that we don't suffer from massive overplotting in
            cases with a lot of data points.

         Returns
        -------
        axis : matplotlib axis
               The axis on which the 'icicle plot' has been rendered.
        r   NzYou must install the matplotlib library to plot the condensed tree.Use get_plot_data to calculate the relevant data without plotting.)r2   rL   rM   noner9   cmapZnormc                    s   g | ]}  |qS r   )to_rgbar/   )smr   r   r   Y  s     z&CondensedTree.plot.<locals>.<listcomp>blackr6   r7   r8   center)bottomwidthcoloralign	linewidthr:   r;   r   )rl   rn   )EllipsezEYou must have matplotlib.patches available to plot selected clusters.c                    s   g | ]} d  | qS )r<   r   )r   rS   )	plot_datar   r   r   }  s     zcInfinite lambda values encountered in chosen clusters. This might be due to duplicates in the data.
   Z   )qr<   g?rr5   g333333?r   )Z	facecolor	edgecolorrn   g      @g?leftrj   )ZxyZxytextZhorizontalalignmentZverticalalignmentaxlog(Number of points)zNumber of pointsrighttoprj   Fz$\lambda$ value)+matplotlib.pyplotpyplotImportErrorrV   cmScalarMappable	Normalizer=   	set_arraygcabarr@   rI   plotZmatplotlib.patchesro   ra   r   arrayisfiniteallr   ZhstackrB   r#   diffZ
percentile	enumeraterF   rE   rH   rG   r   Zannotatestr
add_artistcolorbarrx   
set_ylabel
set_xticksspinesset_visibleZinvert_yaxis) r,   r2   re   Zselect_clustersZlabel_clustersZselection_paletteaxisr   rL   Zmax_rectangles_per_iciclepltZ
bar_colorsZ	drawlinesZxsZysro   Zchosen_clustersr<   Z
plot_rangeZmean_y_center
max_heightrT   rS   Zc_boundsrk   heightri   Z
min_heightZ
oval_colorboxcbsider   )rp   rg   r   r     s    ?





	

zCondensedTree.plotc                 C   s
   | j  S )zNReturn a numpy structured array representation of the condensed tree.
        )r)   copyr,   r   r   r   to_numpy  s    zCondensedTree.to_numpyc                 C   s@   zddl m}m} W n tk
r0   tdY nX || j}|S )a  Return a pandas dataframe representation of the condensed tree.

        Each row of the dataframe corresponds to an edge in the tree.
        The columns of the dataframe are `parent`, `child`, `lambda_val`
        and `child_size`.

        The `parent` and `child` are the ids of the
        parent and child nodes in the tree. Node ids less than the number
        of points in the original dataset represent individual points, while
        ids greater than the number of points are clusters.

        The `lambda_val` value is the value (1/distance) at which the `child`
        node leaves the cluster.

        The `child_size` is the number of points in the `child` node.
        r   	DataFrameSeries:You must have pandas installed to export pandas DataFrames)pandasr   r   r   r)   )r,   r   r   r   r   r   r   	to_pandas  s    
zCondensedTree.to_pandasc                 C   s~   zddl m}m} W n tk
r0   tdY nX | }| jD ] }|j|d |d |d d q>||t| jddg d	 |S )
a  Return a NetworkX DiGraph object representing the condensed tree.

        Edge weights in the graph are the lamba values at which child nodes
        'leave' the parent cluster.

        Nodes have a `size` attribute attached giving the number of points
        that are in the cluster (or 1 if it is a singleton point) at the
        point of cluster creation (fewer points may be in the cluster at
        larger lambda values).
        r   DiGraphset_node_attributes:You must have networkx installed to export networkx graphsr   r   r4   weightr!   size)networkxr   r   r   r)   add_edger?   )r,   r   r   r   rU   r   r   r   to_networkx  s    
zCondensedTree.to_networkx)r'   F)r   Fr.   )	r   rb   FFNNTFr.   )__name__
__module____qualname____doc__r-   rV   ra   r   r   r   r   r   r   r   r   r&   3   s,     
   
 !!          
 :r&   c                 C   sJ   | |k rg S t t|| |  d ||t t|| |  d || | g S )Nr   r   )_get_dendrogram_orderingint)r   linkager$   r   r   r   r     s    r   c                 C   s   g }| D ]}|||  d |krB|t |||  d |  d }nd}|||  d |kr||t |||  d |  d }nd}|||f q|S )Nr   r   r   )r   rI   )Zorderingr   r$   
linewidthsr0   Z
left_widthZright_widthr   r   r   _calculate_linewidths  s    ""r   c                   @   sD   e Zd ZdZdd Zddd	Zd
d Zdd Zdd ZdddZ	dS )SingleLinkageTreea&  A single linkage format dendrogram tree, with plotting functionality
    and networkX support.

    Parameters
    ----------
    linkage : ndarray (n_samples, 4)
        The numpy array that holds the tree structure. As output by
        scipy.cluster.hierarchy, hdbscan, of fastcluster.

    c                 C   s
   || _ d S r(   )_linkage)r,   r   r   r   r   r-   #  s    zSingleLinkageTree.__init__Nr   Trb   c              
   C   sx  t | j||dd}|d }|d }	zddlm}
 W n tk
rN   tdY nX |dkr`|
 }|rtdt| j | jt| jd	 }t|| jt| jd	 }nd
gt|	 }|dkrt	
t	| }|
jj||
d| d}|| t||	|D ]$\}}}|dd }|dd }|dd }|dd }|d	d }|d	d }|dkr|j|||t	
|d t	
d	|d  ddd |j|||t	
|d	 t	
d	|d	  ddd nH|j||dt	
d	|d  ddd |j||dt	
d	|d	  ddd |j||ddddd q|rD|
j||d}|jd |g  dD ]}|j| d qR|d |S )a  Plot a dendrogram of the single linkage tree.

        Parameters
        ----------
        truncate_mode : str, optional
                        The dendrogram can be hard to read when the original
                        observation matrix from which the linkage is derived
                        is large. Truncation is used to condense the dendrogram.
                        There are several modes:

        ``None/'none'``
                No truncation is performed (Default).

        ``'lastp'``
                The last p non-singleton formed in the linkage are the only
                non-leaf nodes in the linkage; they correspond to rows
                Z[n-p-2:end] in Z. All other non-singleton clusters are
                contracted into leaf nodes.

        ``'level'/'mtica'``
                No more than p levels of the dendrogram tree are displayed.
                This corresponds to Mathematica(TM) behavior.

        p : int, optional
            The ``p`` parameter for ``truncate_mode``.

        vary_line_width : boolean, optional
            Draw downward branches of the dendrogram with line thickness that
            varies depending on the size of the cluster.

        cmap : string or matplotlib colormap, optional
               The matplotlib colormap to use to color the cluster bars.
               A value of 'none' will result in black bars.
               (default 'viridis')

        colorbar : boolean, optional
                   Whether to draw a matplotlib colorbar displaying the range
                   of cluster sizes as per the colormap. (default True)

        Returns
        -------
        axis : matplotlib axis
               The axis on which the dendrogram plot has been rendered.

        T)ptruncate_modeZno_plotZicoordZdcoordr   NzHYou must install the matplotlib library to plot the single linkage tree.r   r   )      ?r   rc   rd   r   ZmiterZbutt)rl   rn   Zsolid_joinstyleZsolid_capstylekr   rw   ry   rz   Fdistance)r   r   r}   r~   r   r   r   r   r   r   log2r   flattenr   r   r   r=   r   r@   r   rf   r   rx   r   r   r   r   )r,   r   r   r   vary_line_widthre   r   Zdendrogram_dataXYr   Zdendrogram_orderingr   Zcolor_arrayrg   r0   yZlwZleft_xZright_xZleft_yZright_yZhorizontal_xZhorizontal_yr   r   r   r   r   r   &  sv    /"

  
 
  

zSingleLinkageTree.plotc                 C   s
   | j  S )a>  Return a numpy array representation of the single linkage tree.

        This representation conforms to the scipy.cluster.hierarchy notion
        of a single linkage tree, and can be used with all the associated
        scipy tools. Please see the scipy documentation for more details
        on the format.
        )r   r   r   r   r   r   r     s    zSingleLinkageTree.to_numpyc                 C   s   zddl m}m} W n tk
r0   tdY nX d| jjd  }|| jjd d  }t||d }||| jjd | jjd | jjd | jjd ddd	d
ddg }|S )a  Return a pandas dataframe representation of the single linkage tree.

        Each row of the dataframe corresponds to an edge in the tree.
        The columns of the dataframe are `parent`, `left_child`,
        `right_child`, `distance` and `size`.

        The `parent`, `left_child` and `right_child` are the ids of the
        parent and child nodes in the tree. Node ids less than the number
        of points in the original dataset represent individual points, while
        ids greater than the number of points are clusters.

        The `distance` value is the at which the child nodes merge to form
        the parent node.

        The `size` is the number of points in the `parent` node.
        r   r   r   r   r   r   )r   rQ   rR   r   r   r   rQ   rR   r   r   )	r   r   r   r   r   r"   r   ZarangeT)r,   r   r   max_node
num_pointsZparent_arrayr   r   r   r   r     s"    



zSingleLinkageTree.to_pandasc           	      C   s   zddl m}m} W n tk
r0   tdY nX d| jjd  }|| jjd d  }| }t| j|D ]8\}}|j||d |d d |j||d |d d qhdd t| j|D }|||d	 |S )
a6  Return a NetworkX DiGraph object representing the single linkage tree.

        Edge weights in the graph are the distance values at which child nodes
        merge to form the parent cluster.

        Nodes have a `size` attribute attached giving the number of points
        that are in the cluster.
        r   r   r   r   r   r   c                 S   s   i | ]\}}||d  qS )r   r   )r   r   rU   r   r   r   rY     s      z1SingleLinkageTree.to_networkx.<locals>.<dictcomp>r   )r   r   r   r   r   r"   r   r   )	r,   r   r   r   r   r   r   rU   Z	size_dictr   r   r   r     s    	zSingleLinkageTree.to_networkx   c                 C   s   t | j||S )a?  Return a flat clustering from the single linkage hierarchy.

        This represents the result of selecting a cut value for robust single linkage
        clustering. The `min_cluster_size` allows the flat clustering to declare noise
        points (and cluster smaller than `min_cluster_size`).

        Parameters
        ----------

        cut_distance : float
            The mutual reachability distance cut value to use to generate a flat clustering.

        min_cluster_size : int, optional
            Clusters smaller than this value with be called 'noise' and remain unclustered
            in the resulting flat clustering.

        Returns
        -------

        labels : array [n_samples]
            An array of cluster labels, one per datapoint. Unclustered points are assigned
            the label -1.
        )r	   r   )r,   Zcut_distanceZmin_cluster_sizer   r   r   get_clusters  s    zSingleLinkageTree.get_clusters)NNr   Trb   T)r   )
r   r   r   r   r-   r   r   r   r   r   r   r   r   r   r     s   
    
l%r   c                	   @   s6   e Zd Zdd ZdddZdd Zdd Zdd ZdS )MinimumSpanningTreec                 C   s   || _ || _d S r(   )_mst_data)r,   Zmstdatar   r   r   r-     s    zMinimumSpanningTree.__init__N(   r   皙?      ?	viridis_rr   Tc
                 C   s  zddl m}
 ddlm} W n tk
r8   tdY nX | jjd dkrVtd dS |dkrf|
 }| jjd dkr| jjd d	krt	d	d

| j}n
| j }t 
|}n
| j }|r|t| jjd  | jjd  d  }n|}|| jddddf t }|||||d}|| jdddf j || |j|jd |jd |||d |g  |g  |	r|
j||d}|jd |S )a  Plot the minimum spanning tree (as projected into 2D by t-SNE if required).

        Parameters
        ----------

        axis : matplotlib axis, optional
               The axis to render the plot to

        node_size : int, optional
                The size of nodes in the plot (default 40).

        node_color : matplotlib color spec, optional
                The color to render nodes (default black).

        node_alpha : float, optional
                The alpha value (between 0 and 1) to render nodes with
                (default 0.8).

        edge_cmap : matplotlib colormap, optional
                The colormap to color edges by (varying color by edge
                    weight/distance). Can be a cmap object or a string
                    recognised by matplotlib. (default `viridis_r`)

        edge_alpha : float, optional
                The alpha value (between 0 and 1) to render edges with
                (default 0.5).

        edge_linewidth : float, optional
                The linewidth to use for rendering edges (default 2).

        vary_line_width : bool, optional
                Edge width is proportional to (log of) the inverse of the
                mutual reachability distance. (default True)

        colorbar : bool, optional
                Whether to draw a colorbar. (default True)

        Returns
        -------

        axis : matplotlib axis
                The axis used the render the plot.
        r   N)LineCollectionzJYou must install the matplotlib library to plot the minimum spanning tree.i  zDToo many data points for safe rendering of an minimal spanning tree!r   r       )Zn_componentsr   )rn   re   alpha)rS   r   srw   zMutual reachability distance)r}   r~   matplotlib.collectionsr   r   r   r"   r   r   r   Zfit_transformr   r   r   rC   r   r   r=   astyper   r   r   scatterr   Z
set_yticksr   rx   r   )r,   r   	node_size
node_color
node_alpha
edge_alpha	edge_cmapZedge_linewidthr   r   r   r   Zdata_for_projectionZ
projection
line_widthZline_coordsZline_collectionr   r   r   r   r     sB    .

,  
 

zMinimumSpanningTree.plotc                 C   s
   | j  S )zLReturn a numpy array of weighted edges in the minimum spanning tree
        )r   r   r   r   r   r   r   Y  s    zMinimumSpanningTree.to_numpyc                 C   sf   zddl m} W n tk
r,   tdY nX || jjd t| jjd t| jjd d}|S )a"  Return a Pandas dataframe of the minimum spanning tree.

        Each row is an edge in the tree; the columns are `from`,
        `to`, and `distance` giving the two vertices of the edge
        which are indices into the dataset, and the distance
        between those datapoints.
        r   r   r   r   r   )fromtor   )r   r   r   r   r   r   r   )r,   r   r   r   r   r   r   ^  s    
zMinimumSpanningTree.to_pandasc                 C   s   zddl m}m} W n tk
r0   tdY nX | }| jD ] }|j|d |d |d d q>dd t| jD }|||d	 |S )
a  Return a NetworkX Graph object representing the minimum spanning tree.

        Edge weights in the graph are the distance between the nodes they connect.

        Nodes have a `data` attribute attached giving the data vector of the
        associated point.
        r   )Graphr   r   r   r   r   c                 S   s   i | ]\}}|t |qS r   )tuple)r   indexrU   r   r   r   rY     s      z3MinimumSpanningTree.to_networkx.<locals>.<dictcomp>r   )r   r   r   r   r   r   r   r   )r,   r   r   r   rU   Z	data_dictr   r   r   r   p  s    
zMinimumSpanningTree.to_networkx)	Nr   r   r   r   r   r   TT)r   r   r   r-   r   r   r   r   r   r   r   r   r     s              
\r   c                   @   s>   e Zd ZdZdddZdd
dZdd Zdd ZdddZdS )ApproximationGrapha  
    Cluster approximation graph describing the connectivity in clusters
    that is used to detect branches.

    Parameters
    ----------
    approximation_graphs : list[np.ndarray], shape (n_clusters),

    labels : np.ndarray, shape (n_samples, )
        cluster and branches labelling.

    probabilities : np.ndarray, shape (n_samples, )
        cluster and branches probabilities.

    cluster_labels : np.ndarray, shape (n_samples, )
        HDBSCAN* labelling.

    cluster_probabilities : np.ndarray, shape (n_samples, )
        HDBSCAN* probabilities.

    cluster_centralities : np.ndarray, shape (n_samples, )
        Within cluster centrality values.

    branch_labels : np.ndarray, shape (n_samples, )
        Within cluster branch labels for each point.

    branch_probabilities : np.ndarray, shape (n_samples, )
        Within cluster branch membership strengths for each point.

    Attributes
    ----------
    point_mask : np.ndarray[bool], shape (n_samples)
        A mask to extract points within clusters from the raw data.
    Nc
           
         s   t jdd t|D dt jfdt jfdt jfdt jfdt jfgd| _d	k| _|	d k	rj|	| jd d f nd | _t j fd
dt | jd	 D dt jfdt jfdt jfdt jfdt jfdt jfdt jfdt jfgd| _	d | _
d S )Nc                 S   s8   g | ]0\}}|D ]"}|d  |d |d |d |fqqS )r   r   r   r   r   )r   rO   edgesedger   r   r   r     s    z/ApproximationGraph.__init__.<locals>.<listcomp>r   r   
centralitymutual_reachabilityrO   )dtyper   c                    s<   g | ]4}|| | | | |  | | fqS r   r   r   rT   branch_labelsbranch_probabilitiescluster_centralitiescluster_labelscluster_probabilitieslabelsprobabilitiesr   r   r     s   idlabelZprobabilityZcluster_labelZcluster_probabilityZcluster_centralityZbranch_labelZbranch_probability)r   r   r   ZintpZfloat64_edges
point_mask	_raw_datawhere_points_pos)
r,   Zapproximation_graphsr   r   r   r   r   r   r   raw_datar   r   r   r-     s8    
 zApproximationGraph.__init__r   rb   r   or   c                 C   s  zddl m} ddlm} W n tk
r8   tdY nX |dkrDnt|tr|| jjj	krd|krd}d}d}| j| d }q| j| }nn| j
dk	r|dk	r||kr||}| j
dd|f }n6| j
dk	r|drt|d	d }| j
dd|f }nt|t| jkr|| j }t|
tr@|
| jjj	kr@| j|
 }
tjtt| j | _tjtt| j | _|dkr.zddl}W n tk
r   td
Y nX | jdkr| }| jD ]&}|j|d |d d|d  d q|jj|dd| _| j D ]&\}}|d | j|< |d | j|< qn|jd t| jkrj|dddf | _|dddf | _nf|jd t| jkrt| jd D ]6\}}||df | j|df< ||df | j|df< qntd| jd }| jd }|j t!t"t"| j| | j| t"| j| | j| |||dd}|#|| t|
trJ|$|
 n
|%|
 |dk	rh|&| |' (| |j)| j| j  | j| j  |d|	|ddd |j)| j| j | j| j ||||	|dd||d |*d dS )aE	  
        Plots the Approximation graph, requires networkx and matplotlib.

        Parameters
        ----------
        positions : np.ndarray, shape (n_samples, 2) (default = None)
            A position for each data point in the graph or each data point in the
            raw data. When None, the function attempts to compute graphviz'
            sfdp layout, which requires pygraphviz to be installed and available.

        node_color : str (default = 'label')
            The point attribute to to color the nodes by. Possible values:
            - id
            - label
            - probability
            - cluster_label
            - cluster_probability
            - cluster_centrality
            - branch_label
            - branch_probability,
            - The input data's feature (if available) names if
            ``feature_names`` is specified or ``feature_x`` for the x-th feature
            if no ``feature_names`` are given, or anything matplotlib scatter
            interprets as a color.

        node_vmin : float, (default = None)
            The minimum value to use for normalizing node colors.

        node_vmax : float, (default = None)
            The maximum value to use for normalizing node colors.

        node_cmap : str, (default = 'tab10')
            The cmap to use for coloring nodes.

        node_alpha : float, (default = 1)
            The node transparency value.

        node_size : float, (default = 5)
            The node marker size value.

        node_marker : str, (default = 'o')
            The node marker string.

        edge_color : str (default = 'label')
            The point attribute to to color the nodes by. Possible values:
            - weight
            - mutual reachability
            - centrality,
            - cluster,
            or anything matplotlib linecollection interprets as color.

        edge_vmin : float, (default = None)
            The minimum value to use for normalizing edge colors.

        edge_vmax : float, (default = None)
            The maximum value to use for normalizing edge colors.

        edge_cmap : str, (default = viridis)
            The cmap to use for coloring edges.

        edge_alpha : float, (default = 1)
            The edge transparency value.

        edge_width : float, (default = 1)
            The edge line width size value.
        r   NzHYou must install the matplotlib library to plot the Approximation Graph.r   	   Ztab10rq   Zfeature_   z7You must install the networkx to compute a sfdp layout.r   r   r   r   r   Zsfdp)progr   z(Incorrect number of positions specified.)r   re   r   Zzordersilverrc   )rl   markerr   rn   ru   )re   r   r   rn   ru   ZvminZvmaxoff)+r}   r~   r   collectionsr   r>   r   r   r   namesr   r   
startswithr   r   r   r   r   nanZonesZ_xsZ_ysr   r   r   r   Z	nx_agraphZgraphviz_layoutitemsr"   r   r_   r   listr@   Zset_climZset_edgecolorr   Z	set_alphar   Zadd_collectionr   r   )r,   Z	positionsfeature_namesr   Z	node_vminZ	node_vmaxZ	node_cmapr   r   Znode_markerZ
edge_colorZ	edge_vminZ	edge_vmaxr   r   Z
edge_widthr   ZmcidxnxgrU   r   vrT   dsourcetargetlcr   r   r   r     s    U

















zApproximationGraph.plotc                 C   s   | j  | j fS )a  Converts the approximation graph to numpy arrays.

        Returns
        -------
        points : np.recarray, shape (n_points, 8)
            A numpy record array with for each point its:
            - id (row index),
            - label,
            - probability,
            - cluster label,
            - cluster probability,
            - cluster centrality,
            - branch label,
            - branch probability

        edges : np.recarray, shape (n_edges, 5)
            A numpy record array with for each edge its:
            - parent point,
            - child point,
            - cluster centrality,
            - mutual reachability,
            - cluster label
        )r   r   r   r   r   r   r   r     s    zApproximationGraph.to_numpyc                 C   sJ   zddl m} W n tk
r,   tdY nX || j}|| j}||fS )a  Converts the approximation graph to pandas data frames.

        Returns
        -------
        points : pd.DataFrame, shape (n_points, 8)
            A DataFrame with for each point its:
            - id (row index),
            - label,
            - probability,
            - cluster label,
            - cluster probability,
            - cluster centrality,
            - branch label,
            - branch probability

        edges : pd.DataFrame, shape (n_edges, 5)
            A DataFrame with for each edge its:
            - parent point,
            - child point,
            - cluster centrality,
            - mutual reachability,
            - cluster label
        r   r   r   )r   r   r   r   r   )r,   r   Zpointsr   r   r   r   r     s    


zApproximationGraph.to_pandasc              
   C   s  zddl }W n tk
r(   tdY nX | }| jD ]6}|j|d |d d|d  |d |d |d	 d
 q8| jjjdd D ] }||t	| jd|g | q| j
dk	r|dkrdd t| j
jd D }t|D ]4\}}||t	t| jd | j
dd|f | q|S )a  Convert to a NetworkX Graph object.

        Parameters
        ----------
        feature_names : list[n_features]
            Names to use for the data features if available.

        Returns
        -------
        g : nx.Graph
            A NetworkX Graph object containing the non-noise points and edges
            within clusters.

            Node attributes:
            - label,
            - probability,
            - cluster label,
            - cluster probability,
            - cluster centrality,
            - branch label,
            - branch probability,

            Edge attributes:
            - weight (1 / mutual_reachability),
            - mutual_reachability,
            - centrality,
            - cluster label,
            -
        r   Nr   r   r   r   r   r   rO   )r   r   r   rO   r   c                 S   s   g | ]}d | qS )zfeature r   r   r   r   r   r      s     z2ApproximationGraph.to_networkx.<locals>.<listcomp>)r   r   r   r   r   r   r   r  r   r?   r   rA   r"   r   r@   )r,   r  r
  r  rU   attrr	  namer   r   r   r     s8    



 zApproximationGraph.to_networkx)N)NNr   NNrb   r   r   r   r   NNrb   r   r   )N)	r   r   r   r   r-   r   r   r   r   r   r   r   r   r     s.   - 
9               
 J#r   )numpyr   Zscipy.cluster.hierarchyr   Zsklearn.manifoldr   Zsklearn.decompositionr   Zsklearn.metricsr   warningsr   Z_hdbscan_treer   r	   r
   rE   rF   rG   rH   r   r    r%   objectr&   r   r   r   r   r   r   r   r   r   <module>   s0   	   L a 