3
[hU                 @   s   d dl Z d dlZd dljZddlmZmZm	Z	 ddlm
Z
 ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ G dd de
eeZG dd de
e	eZdS )    N   )BaseEstimatorClassifierMixinRegressorMixin)MultiOutputMixin)check_random_state)_num_samples)check_array)check_consistent_length)check_is_fitted_check_sample_weight)_random_choice_csc)_weighted_percentile)class_distribution)_deprecate_positional_argsc                   sb   e Zd ZdZeddddddZdddZd	d
 Zdd Zdd Z	dd Z
d fdd	Z  ZS )DummyClassifiera
  
    DummyClassifier is a classifier that makes predictions using simple rules.

    This classifier is useful as a simple baseline to compare with other
    (real) classifiers. Do not use it for real problems.

    Read more in the :ref:`User Guide <dummy_estimators>`.

    .. versionadded:: 0.13

    Parameters
    ----------
    strategy : {"stratified", "most_frequent", "prior", "uniform",             "constant"}, default="prior"
        Strategy to use to generate predictions.

        * "stratified": generates predictions by respecting the training
          set's class distribution.
        * "most_frequent": always predicts the most frequent label in the
          training set.
        * "prior": always predicts the class that maximizes the class prior
          (like "most_frequent") and ``predict_proba`` returns the class prior.
        * "uniform": generates predictions uniformly at random.
        * "constant": always predicts a constant label that is provided by
          the user. This is useful for metrics that evaluate a non-majority
          class

          .. versionchanged:: 0.24
             The default value of `strategy` has changed to "prior" in version
             0.24.

    random_state : int, RandomState instance or None, default=None
        Controls the randomness to generate the predictions when
        ``strategy='stratified'`` or ``strategy='uniform'``.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    constant : int or str or array-like of shape (n_outputs,)
        The explicit constant as predicted by the "constant" strategy. This
        parameter is useful only for the "constant" strategy.

    Attributes
    ----------
    classes_ : ndarray of shape (n_classes,) or list of such arrays
        Class labels for each output.

    n_classes_ : int or list of int
        Number of label for each output.

    class_prior_ : ndarray of shape (n_classes,) or list of such arrays
        Probability of each class for each output.

    n_outputs_ : int
        Number of outputs.

    sparse_output_ : bool
        True if the array returned from predict is to be in sparse CSC format.
        Is automatically set to True if the input y is passed in sparse format.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.dummy import DummyClassifier
    >>> X = np.array([-1, 1, 1, 1])
    >>> y = np.array([0, 1, 1, 1])
    >>> dummy_clf = DummyClassifier(strategy="most_frequent")
    >>> dummy_clf.fit(X, y)
    DummyClassifier(strategy='most_frequent')
    >>> dummy_clf.predict(X)
    array([1, 1, 1, 1])
    >>> dummy_clf.score(X, y)
    0.75
    priorN)strategyrandom_stateconstantc            C   s   || _ || _|| _d S )N)r   r   r   )selfr   r   r    r   5/tmp/pip-build-zwgx3nbq/scikit-learn/sklearn/dummy.py__init__a   s    zDummyClassifier.__init__c                s  d}| j |kr td| j |f | j | _| jdkrPtj|rP|j }tjdt tj|| _	| j	svt
j|}t
j|}|jdkrt
j|d}|jd | _d	| _t|| |d	k	rt||}| jdkr| jd	krtd
n4t
jt
j| jd  jd | jkrtd| j t||\| _| _| _| jdkrxTt| jD ]Ft fdd| j D s<dj| jt| j }t|q<W | jdkr| jd | _| jd | _| jd | _| S )a  Fit the random classifier.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Target values.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        self : object
        most_frequent
stratifieduniformr   r   z.Unknown strategy type: %s, expected one of %s.zA local copy of the target data has been converted to a numpy array. Predicting on sparse target data with the uniform strategy would not save memory and would be slower.r   NzMConstant target value has to be specified when the constant strategy is used.r   z0Constant target value should have shape (%d, 1).c             3   s   | ]}  d  |kV  qdS )r   Nr   ).0c)r   kr   r   	<genexpr>   s    z&DummyClassifier.fit.<locals>.<genexpr>zrThe constant target value must be present in the training data. You provided constant={}. Possible values are: {}.)r   r   r   r   r   )r!   r   r!   )r!   r   )r   
ValueError	_strategyspissparseZtoarraywarningswarnUserWarningsparse_output_npZasarrayZ
atleast_1dndimreshapeshape
n_outputs_n_features_in_r
   r   r   r   classes_
n_classes_class_prior_rangeanyformatlist)r   Xysample_weightallowed_strategieserr_msgr   )r   r   r   fith   sP     








 zDummyClassifier.fitc                s  t |  t|t| j| j| j| j | j}| jdkrTgg g |g}| j	dkrx| j
|| jdkrxg| jrd}| j	dkrdd  D n<| j	dkr }n,| j	dkrtd	n| j	d
krdd |D t|| j}n| j	dkrtj fddt| jD dg}n| j	dkrNtjfddt| jD j}nV| j	dkrfddt| jD }tj|j}n| j	d
krtj| jdf}| jdkrtj|}|S )a;  Perform classification on test vectors X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test data.

        Returns
        -------
        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Predicted target values for X.
        r   r   Nr   r   c             S   s   g | ]}t j|j gqS r   )r*   arrayargmax)r   cpr   r   r   
<listcomp>   s    z+DummyClassifier.predict.<locals>.<listcomp>r   zCSparse target prediction is not supported with the uniform strategyr   c             S   s   g | ]}t j|gqS r   )r*   r=   )r   r   r   r   r   r@      s    c                s    g | ]}|  | j   qS r   )r>   )r   r   )r2   r0   r   r   r@      s   c                s$   g | ]} | | j d d qS )r   )axis)r>   )r   r   )r0   probar   r   r@      s   c                s&   g | ]} | j | d  qS ))size)randint)r   r   )r0   r1   	n_samplesrsr   r   r@      s   )r   r   )r   r   )r   r   r   r   r1   r0   r2   r   r.   r#   predict_probar)   r"   r   r*   Ztiler3   ZvstackTravel)r   r7   r   Z
class_probr8   retr   )r2   r0   r1   rE   rB   rF   r   predict   sV    











zDummyClassifier.predictc             C   s  t |  t|}t| j}| j}| j}| j}| j}| jdkrT|g}|g}|g}|g}g }x(t	| jD ]}	| j
dkr||	 j }
tj|||	 ftjd}d|dd|
f< n| j
dkrtj|df||	  }n| j
dkr|jd||	 |d}|jtj}n|| j
d	kr,tj|||	 ftjd}|||	  }nJ| j
d
krvtj||	 ||	 k}
tj|||	 ftjd}d|dd|
f< |j| qfW | jdkr|d }|S )a  
        Return probability estimates for the test vectors X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test data.

        Returns
        -------
        P : ndarray of shape (n_samples, n_classes) or list of such arrays
            Returns the probability of the sample for each class in
            the model, where classes are ordered arithmetically, for each
            output.
        r   r   )dtypeg      ?Nr   r   )rC   r   r   r   )r   r   r   r   r1   r0   r2   r   r.   r3   r#   r>   r*   zerosZfloat64ZonesZmultinomialZastypewhereappend)r   r7   rE   rF   r1   r0   r2   r   Pr   indoutr   r   r   rG     sD    




zDummyClassifier.predict_probac             C   s0   | j |}| jdkrtj|S dd |D S dS )a  
        Return log probability estimates for the test vectors X.

        Parameters
        ----------
        X : {array-like, object with finite length or shape}
            Training data, requires length = n_samples

        Returns
        -------
        P : ndarray of shape (n_samples, n_classes) or list of such arrays
            Returns the log probability of the sample for each class in
            the model, where classes are ordered arithmetically for each
            output.
        r   c             S   s   g | ]}t j|qS r   )r*   log)r   pr   r   r   r@   [  s    z5DummyClassifier.predict_log_proba.<locals>.<listcomp>N)rG   r.   r*   rS   )r   r7   rB   r   r   r   predict_log_probaG  s    


z!DummyClassifier.predict_log_probac             C   s   ddddddS )NTzfails for the predict method)Zcheck_methods_subset_invarianceZ%check_methods_sample_order_invariance)
poor_scoreno_validationZ_xfail_checksr   )r   r   r   r   
_more_tags]  s    zDummyClassifier._more_tagsc                s,   |dkrt jt|dfd}t j|||S )ak  Returns the mean accuracy on the given test data and labels.

        In multi-label classification, this is the subset accuracy
        which is a harsh metric since you require for each sample that
        each label set be correctly predicted.

        Parameters
        ----------
        X : None or array-like of shape (n_samples, n_features)
            Test samples. Passing None as test samples gives the same result
            as passing real test samples, since DummyClassifier
            operates independently of the sampled observations.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            True labels for X.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        score : float
            Mean accuracy of self.predict(X) wrt. y.

        Nr   )r-   )r*   rM   lensuperscore)r   r7   r8   r9   )	__class__r   r   r[   h  s    zDummyClassifier.score)N)N)__name__
__module____qualname____doc__r   r   r<   rK   rG   rU   rX   r[   __classcell__r   r   )r\   r   r      s   I
UK?r   c                   sT   e Zd ZdZeddddddZdddZdd
dZdd Zd fdd	Z	  Z
S )DummyRegressoraQ  
    DummyRegressor is a regressor that makes predictions using
    simple rules.

    This regressor is useful as a simple baseline to compare with other
    (real) regressors. Do not use it for real problems.

    Read more in the :ref:`User Guide <dummy_estimators>`.

    .. versionadded:: 0.13

    Parameters
    ----------
    strategy : {"mean", "median", "quantile", "constant"}, default="mean"
        Strategy to use to generate predictions.

        * "mean": always predicts the mean of the training set
        * "median": always predicts the median of the training set
        * "quantile": always predicts a specified quantile of the training set,
          provided with the quantile parameter.
        * "constant": always predicts a constant value that is provided by
          the user.

    constant : int or float or array-like of shape (n_outputs,), default=None
        The explicit constant as predicted by the "constant" strategy. This
        parameter is useful only for the "constant" strategy.

    quantile : float in [0.0, 1.0], default=None
        The quantile to predict using the "quantile" strategy. A quantile of
        0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the
        maximum.

    Attributes
    ----------
    constant_ : ndarray of shape (1, n_outputs)
        Mean or median or quantile of the training targets or constant value
        given by the user.

    n_outputs_ : int
        Number of outputs.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.dummy import DummyRegressor
    >>> X = np.array([1.0, 2.0, 3.0, 4.0])
    >>> y = np.array([2.0, 3.0, 5.0, 10.0])
    >>> dummy_regr = DummyRegressor(strategy="mean")
    >>> dummy_regr.fit(X, y)
    DummyRegressor()
    >>> dummy_regr.predict(X)
    array([5., 5., 5., 5.])
    >>> dummy_regr.score(X, y)
    0.0
    meanN)r   r   quantilec            C   s   || _ || _|| _d S )N)r   r   rd   )r   r   r   rd   r   r   r   r     s    zDummyRegressor.__init__c                s  d}| j |kr td| j |f tddd| _td	krFtd
jdkr\tjdjd | _	t
| dk	rt|| j dkrtjd	d| _n8| j dkrdkrtjd	d| _nfddt| j	D | _n| j dkrf| jdkstj| j rtd| j | jd  dkrFtjd	 d| _n fddt| j	D | _nx| j dkr| jdkrtdt| jdddgdd	d| _| j	dkr| jjd	 jd krtdjd  | j| _tj| jd| _| S )a  Fit the random regressor.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Target values.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        self : object
        rc   medianrd   r   z.Unknown strategy type: %s, expected one of %s.F)	ensure_2dNr   zy must not be empty.r   )rA   weights)rA   c                s&   g | ]}t d d |f  ddqS )Ng      I@)
percentile)r   )r   r   )r9   r8   r   r   r@     s   z&DummyRegressor.fit.<locals>.<listcomp>z>Quantile must be a scalar in the range [0.0, 1.0], but got %s.g      Y@)rA   qc                s&   g | ]}t d d |f  dqS )N)rh   )r   )r   r   )rh   r9   r8   r   r   r@     s   zMConstant target value has to be specified when the constant strategy is used.ZcsrZcscZcoo)Zaccept_sparserf   Zensure_min_samplesz0Constant target value should have shape (%d, 1).)rc   re   rd   r   r!   )r!   r   r!   )r   r!   )r   r"   r	   r/   rY   r+   r*   r,   r-   r.   r
   r   Zaverage	constant_re   r3   rd   Zisscalarrh   r   	TypeError)r   r7   r8   r9   r:   r   )rh   r9   r8   r   r<     sV    







$zDummyRegressor.fitFc             C   sp   t |  t|}tj|| jf| jtj| jjd}tj|| jf}| jdkr`tj	|}tj	|}|rl||fS |S )a  
        Perform classification on test vectors X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test data.

        return_std : bool, default=False
            Whether to return the standard deviation of posterior prediction.
            All zeros in this case.

            .. versionadded:: 0.20

        Returns
        -------
        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Predicted target values for X.

        y_std : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Standard deviation of predictive distribution of query points.
        )rL   r   )
r   r   r*   fullr.   rj   r=   rL   rM   rI   )r   r7   Z
return_stdrE   r8   Zy_stdr   r   r   rK     s    


zDummyRegressor.predictc             C   s
   dddS )NT)rV   rW   r   )r   r   r   r   rX   9  s    zDummyRegressor._more_tagsc                s,   |dkrt jt|dfd}t j|||S )a  Returns the coefficient of determination R^2 of the prediction.

        The coefficient R^2 is defined as (1 - u/v), where u is the residual
        sum of squares ((y_true - y_pred) ** 2).sum() and v is the total
        sum of squares ((y_true - y_true.mean()) ** 2).sum().
        The best possible score is 1.0 and it can be negative (because the
        model can be arbitrarily worse). A constant model that always
        predicts the expected value of y, disregarding the input features,
        would get a R^2 score of 0.0.

        Parameters
        ----------
        X : None or array-like of shape (n_samples, n_features)
            Test samples. Passing None as test samples gives the same result
            as passing real test samples, since DummyRegressor
            operates independently of the sampled observations.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            True values for X.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        score : float
            R^2 of self.predict(X) wrt. y.
        Nr   )r-   )r*   rM   rY   rZ   r[   )r   r7   r8   r9   )r\   r   r   r[   <  s    zDummyRegressor.score)N)F)N)r]   r^   r_   r`   r   r   r<   rK   rX   r[   ra   r   r   )r\   r   rb     s   7
P
$rb   )r&   Znumpyr*   Zscipy.sparsesparser$   baser   r   r   r   utilsr   Zutils.validationr   r	   r
   r   r   Zutils.randomr   Zutils.statsr   Zutils.multiclassr   r   r   rb   r   r   r   r   <module>   s"   
  r