import pytest
import numpy as np
from numpy.testing import (assert_array_almost_equal, assert_array_equal,
                           assert_allclose)

from sklearn.datasets import load_linnerud
from sklearn.cross_decomposition._pls import (
    _center_scale_xy,
    _get_first_singular_vectors_power_method,
    _get_first_singular_vectors_svd,
    _svd_flip_1d
)
from sklearn.cross_decomposition import CCA
from sklearn.cross_decomposition import PLSSVD, PLSRegression, PLSCanonical
from sklearn.datasets import make_regression
from sklearn.utils import check_random_state
from sklearn.utils.extmath import svd_flip
from sklearn.exceptions import ConvergenceWarning


def assert_matrix_orthogonal(M):
    K = np.dot(M.T, M)
    assert_array_almost_equal(K, np.diag(np.diag(K)))


def test_pls_canonical_basics():
    # Basic checks for PLSCanonical
    d = load_linnerud()
    X = d.data
    Y = d.target

    pls = PLSCanonical(n_components=X.shape[1])
    pls.fit(X, Y)

    assert_matrix_orthogonal(pls.x_weights_)
    assert_matrix_orthogonal(pls.y_weights_)
    assert_matrix_orthogonal(pls._x_scores)
    assert_matrix_orthogonal(pls._y_scores)

    # Check X = TP' and Y = UQ'
    T = pls._x_scores
    P = pls.x_loadings_
    U = pls._y_scores
    Q = pls.y_loadings_
    # Need to scale first
    Xc, Yc, x_mean, y_mean, x_std, y_std = _center_scale_xy(
        X.copy(), Y.copy(), scale=True)
    assert_array_almost_equal(Xc, np.dot(T, P.T))
    assert_array_almost_equal(Yc, np.dot(U, Q.T))

    # Check that rotations on training data lead to scores
    Xt = pls.transform(X)
    assert_array_almost_equal(Xt, pls._x_scores)
    Xt, Yt = pls.transform(X, Y)
    assert_array_almost_equal(Xt, pls._x_scores)
    assert_array_almost_equal(Yt, pls._y_scores)

    # Check that inverse_transform works
    X_back = pls.inverse_transform(Xt)
    assert_array_almost_equal(X_back, X)


def test_sanity_check_pls_regression():
    # Sanity check for PLSRegression
    # The results were checked against the R-packages plspm, misOmics and pls

    d = load_linnerud()
    X = d.data
    Y = d.target

    pls = PLSRegression(n_components=X.shape[1])
    pls.fit(X, Y)

    expected_x_weights = np.array(
        [[-0.61330704, -0.00443647,  0.78983213],
         [-0.74697144, -0.32172099, -0.58183269],
         [-0.25668686,  0.94682413, -0.19399983]])

    expected_x_loadings = np.array(
        [[-0.61470416, -0.24574278,  0.78983213],
         [-0.65625755, -0.14396183, -0.58183269],
         [-0.51733059,  1.00609417, -0.19399983]])

    expected_y_weights = np.array(
        [[+0.32456184,  0.29892183,  0.20316322],
         [+0.42439636,  0.61970543,  0.19320542],
         [-0.13143144, -0.26348971, -0.17092916]])

    expected_y_loadings = np.array(
        [[+0.32456184,  0.29892183,  0.20316322],
         [+0.42439636,  0.61970543,  0.19320542],
         [-0.13143144, -0.26348971, -0.17092916]])

    assert_array_almost_equal(np.abs(pls.x_loadings_),
                              np.abs(expected_x_loadings))
    assert_array_almost_equal(np.abs(pls.x_weights_),
                              np.abs(expected_x_weights))
    assert_array_almost_equal(np.abs(pls.y_loadings_),
                              np.abs(expected_y_loadings))
    assert_array_almost_equal(np.abs(pls.y_weights_),
                              np.abs(expected_y_weights))

    # The R / Python difference in the signs should be consistent across
    # loadings, weights, etc.
    x_loadings_sign_flip = np.sign(pls.x_loadings_ / expected_x_loadings)
    x_weights_sign_flip = np.sign(pls.x_weights_ / expected_x_weights)
    y_weights_sign_flip = np.sign(pls.y_weights_ / expected_y_weights)
    y_loadings_sign_flip = np.sign(pls.y_loadings_ / expected_y_loadings)
    assert_array_almost_equal(x_loadings_sign_flip, x_weights_sign_flip)
    assert_array_almost_equal(y_loadings_sign_flip, y_weights_sign_flip)


def test_sanity_check_pls_regression_constant_column_Y():
    # Check behavior when the first column of Y is constant
    # The results are checked against a modified version of plsreg2
    # from the R-package plsdepot
    d = load_linnerud()
    X = d.data
    Y = d.target
    Y[:, 0] = 1
    pls = PLSRegression(n_components=X.shape[1])
    pls.fit(X, Y)

    expected_x_weights = np.array(
        [[-0.6273573, 0.007081799, 0.7786994],
         [-0.7493417, -0.277612681, -0.6011807],
         [-0.2119194, 0.960666981, -0.1794690]])

    expected_x_loadings = np.array(
        [[-0.6273512, -0.22464538, 0.7786994],
         [-0.6643156, -0.09871193, -0.6011807],
         [-0.5125877, 1.01407380, -0.1794690]])

    expected_y_loadings = np.array(
        [[0.0000000, 0.0000000, 0.0000000],
         [0.4357300, 0.5828479, 0.2174802],
         [-0.1353739, -0.2486423, -0.1810386]])

    assert_array_almost_equal(np.abs(expected_x_weights),
                              np.abs(pls.x_weights_))
    assert_array_almost_equal(np.abs(expected_x_loadings),
                              np.abs(pls.x_loadings_))
    # For the PLSRegression with default parameters, y_loadings == y_weights
    assert_array_almost_equal(np.abs(pls.y_loadings_),
                              np.abs(expected_y_loadings))
    assert_array_almost_equal(np.abs(pls.y_weights_),
                              np.abs(expected_y_loadings))

    x_loadings_sign_flip = np.sign(expected_x_loadings / pls.x_loadings_)
    x_weights_sign_flip = np.sign(expected_x_weights / pls.x_weights_)
    y_loadings_sign_flip = np.sign(expected_y_loadings / pls.y_loadings_)
    assert_array_equal(x_loadings_sign_flip, x_weights_sign_flip)
    assert_array_equal(x_loadings_sign_flip[1:], y_loadings_sign_flip[1:])


def test_sanity_check_pls_canonical():
    # Sanity check for PLSCanonical
    # The results were checked against the R-package plspm

    d = load_linnerud()
    X = d.data
    Y = d.target

    pls = PLSCanonical(n_components=X.shape[1])
    pls .fit(X, Y)

    expected_x_weights = np.array(
        [[-0.61330704,  0.25616119, -0.74715187],
         [-0.74697144,  0.11930791,  0.65406368],
         [-0.25668686, -0.95924297, -0.11817271]])

    expected_x_rotations = np.array(
        [[-0.61330704,  0.41591889, -0.62297525],
         [-0.74697144,  0.31388326,  0.77368233],
         [-0.25668686, -0.89237972, -0.24121788]])

    expected_y_weights = np.array(
        [[+0.58989127,  0.7890047,   0.1717553],
         [+0.77134053, -0.61351791,  0.16920272],
         [-0.23887670, -0.03267062,  0.97050016]])

    expected_y_rotations = np.array(
        [[+0.58989127,  0.7168115,  0.30665872],
         [+0.77134053, -0.70791757,  0.19786539],
         [-0.23887670, -0.00343595,  0.94162826]])

    assert_array_almost_equal(np.abs(pls.x_rotations_),
                              np.abs(expected_x_rotations))
    assert_array_almost_equal(np.abs(pls.x_weights_),
                              np.abs(expected_x_weights))
    assert_array_almost_equal(np.abs(pls.y_rotations_),
                              np.abs(expected_y_rotations))
    assert_array_almost_equal(np.abs(pls.y_weights_),
                              np.abs(expected_y_weights))

    x_rotations_sign_flip = np.sign(pls.x_rotations_ / expected_x_rotations)
    x_weights_sign_flip = np.sign(pls.x_weights_ / expected_x_weights)
    y_rotations_sign_flip = np.sign(pls.y_rotations_ / expected_y_rotations)
    y_weights_sign_flip = np.sign(pls.y_weights_ / expected_y_weights)
    assert_array_almost_equal(x_rotations_sign_flip, x_weights_sign_flip)
    assert_array_almost_equal(y_rotations_sign_flip, y_weights_sign_flip)

    assert_matrix_orthogonal(pls.x_weights_)
    assert_matrix_orthogonal(pls.y_weights_)

    assert_matrix_orthogonal(pls._x_scores)
    assert_matrix_orthogonal(pls._y_scores)


def test_sanity_check_pls_canonical_random():
    # Sanity check for PLSCanonical on random data
    # The results were checked against the R-package plspm
    n = 500
    p_noise = 10
    q_noise = 5
    # 2 latents vars:
    rng = check_random_state(11)
    l1 = rng.normal(size=n)
    l2 = rng.normal(size=n)
    latents = np.array([l1, l1, l2, l2]).T
    X = latents + rng.normal(size=4 * n).reshape((n, 4))
    Y = latents + rng.normal(size=4 * n).reshape((n, 4))
    X = np.concatenate(
        (X, rng.normal(size=p_noise * n).reshape(n, p_noise)), axis=1)
    Y = np.concatenate(
        (Y, rng.normal(size=q_noise * n).reshape(n, q_noise)), axis=1)

    pls = PLSCanonical(n_components=3)
    pls.fit(X, Y)

    expected_x_weights = np.array(
        [[0.65803719,  0.19197924,  0.21769083],
         [0.7009113,  0.13303969, -0.15376699],
         [0.13528197, -0.68636408,  0.13856546],
         [0.16854574, -0.66788088, -0.12485304],
         [-0.03232333, -0.04189855,  0.40690153],
         [0.1148816, -0.09643158,  0.1613305],
         [0.04792138, -0.02384992,  0.17175319],
         [-0.06781, -0.01666137, -0.18556747],
         [-0.00266945, -0.00160224,  0.11893098],
         [-0.00849528, -0.07706095,  0.1570547],
         [-0.00949471, -0.02964127,  0.34657036],
         [-0.03572177,  0.0945091,  0.3414855],
         [0.05584937, -0.02028961, -0.57682568],
         [0.05744254, -0.01482333, -0.17431274]])

    expected_x_loadings = np.array(
        [[0.65649254,  0.1847647,  0.15270699],
         [0.67554234,  0.15237508, -0.09182247],
         [0.19219925, -0.67750975,  0.08673128],
         [0.2133631, -0.67034809, -0.08835483],
         [-0.03178912, -0.06668336,  0.43395268],
         [0.15684588, -0.13350241,  0.20578984],
         [0.03337736, -0.03807306,  0.09871553],
         [-0.06199844,  0.01559854, -0.1881785],
         [0.00406146, -0.00587025,  0.16413253],
         [-0.00374239, -0.05848466,  0.19140336],
         [0.00139214, -0.01033161,  0.32239136],
         [-0.05292828,  0.0953533,  0.31916881],
         [0.04031924, -0.01961045, -0.65174036],
         [0.06172484, -0.06597366, -0.1244497]])

    expected_y_weights = np.array(
        [[0.66101097,  0.18672553,  0.22826092],
         [0.69347861,  0.18463471, -0.23995597],
         [0.14462724, -0.66504085,  0.17082434],
         [0.22247955, -0.6932605, -0.09832993],
         [0.07035859,  0.00714283,  0.67810124],
         [0.07765351, -0.0105204, -0.44108074],
         [-0.00917056,  0.04322147,  0.10062478],
         [-0.01909512,  0.06182718,  0.28830475],
         [0.01756709,  0.04797666,  0.32225745]])

    expected_y_loadings = np.array(
        [[0.68568625,  0.1674376,  0.0969508],
         [0.68782064,  0.20375837, -0.1164448],
         [0.11712173, -0.68046903,  0.12001505],
         [0.17860457, -0.6798319, -0.05089681],
         [0.06265739, -0.0277703,  0.74729584],
         [0.0914178,  0.00403751, -0.5135078],
         [-0.02196918, -0.01377169,  0.09564505],
         [-0.03288952,  0.09039729,  0.31858973],
         [0.04287624,  0.05254676,  0.27836841]])

    assert_array_almost_equal(np.abs(pls.x_loadings_),
                              np.abs(expected_x_loadings))
    assert_array_almost_equal(np.abs(pls.x_weights_),
                              np.abs(expected_x_weights))
    assert_array_almost_equal(np.abs(pls.y_loadings_),
                              np.abs(expected_y_loadings))
    assert_array_almost_equal(np.abs(pls.y_weights_),
                              np.abs(expected_y_weights))

    x_loadings_sign_flip = np.sign(pls.x_loadings_ / expected_x_loadings)
    x_weights_sign_flip = np.sign(pls.x_weights_ / expected_x_weights)
    y_weights_sign_flip = np.sign(pls.y_weights_ / expected_y_weights)
    y_loadings_sign_flip = np.sign(pls.y_loadings_ / expected_y_loadings)
    assert_array_almost_equal(x_loadings_sign_flip, x_weights_sign_flip)
    assert_array_almost_equal(y_loadings_sign_flip, y_weights_sign_flip)

    assert_matrix_orthogonal(pls.x_weights_)
    assert_matrix_orthogonal(pls.y_weights_)

    assert_matrix_orthogonal(pls._x_scores)
    assert_matrix_orthogonal(pls._y_scores)


def test_convergence_fail():
    # Make sure ConvergenceWarning is raised if max_iter is too small
    d = load_linnerud()
    X = d.data
    Y = d.target
    pls_nipals = PLSCanonical(n_components=X.shape[1], max_iter=2)
    with pytest.warns(ConvergenceWarning):
        pls_nipals.fit(X, Y)


@pytest.mark.filterwarnings('ignore:.*scores_ was deprecated')  # 1.1
@pytest.mark.parametrize('Est', (PLSSVD, PLSRegression, PLSCanonical))
def test_attibutes_shapes(Est):
    # Make sure attributes are of the correct shape depending on n_components
    d = load_linnerud()
    X = d.data
    Y = d.target
    n_components = 2
    pls = Est(n_components=n_components)
    pls.fit(X, Y)
    assert all(attr.shape[1] == n_components
               for attr in (pls.x_scores_, pls.y_scores_, pls.x_weights_,
                            pls.y_weights_))


@pytest.mark.parametrize('Est', (PLSRegression, PLSCanonical, CCA))
def test_univariate_equivalence(Est):
    # Ensure 2D Y with 1 column is equivalent to 1D Y
    d = load_linnerud()
    X = d.data
    Y = d.target

    est = Est(n_components=1)
    one_d_coeff = est.fit(X, Y[:, 0]).coef_
    two_d_coeff = est.fit(X, Y[:, :1]).coef_

    assert one_d_coeff.shape == two_d_coeff.shape
    assert_array_almost_equal(one_d_coeff, two_d_coeff)


@pytest.mark.parametrize('Est', (PLSRegression, PLSCanonical, CCA, PLSSVD))
def test_copy(Est):
    # check that the "copy" keyword works
    d = load_linnerud()
    X = d.data
    Y = d.target
    X_orig = X.copy()

    # copy=True won't modify inplace
    pls = Est(copy=True).fit(X, Y)
    assert_array_equal(X, X_orig)

    # copy=False will modify inplace
    with pytest.raises(AssertionError):
        Est(copy=False).fit(X, Y)
        assert_array_almost_equal(X, X_orig)

    if Est is PLSSVD:
        return  # PLSSVD does not support copy param in predict or transform

    X_orig = X.copy()
    with pytest.raises(AssertionError):
        pls.transform(X, Y, copy=False),
        assert_array_almost_equal(X, X_orig)

    X_orig = X.copy()
    with pytest.raises(AssertionError):
        pls.predict(X, copy=False),
        assert_array_almost_equal(X, X_orig)

    # Make sure copy=True gives same transform and predictions as predict=False
    assert_array_almost_equal(pls.transform(X, Y, copy=True),
                              pls.transform(X.copy(), Y.copy(), copy=False))
    assert_array_almost_equal(pls.predict(X, copy=True),
                              pls.predict(X.copy(), copy=False))


def _generate_test_scale_and_stability_datasets():
    """Generate dataset for test_scale_and_stability"""
    # dataset for non-regression 7818
    rng = np.random.RandomState(0)
    n_samples = 1000
    n_targets = 5
    n_features = 10
    Q = rng.randn(n_targets, n_features)
    Y = rng.randn(n_samples, n_targets)
    X = np.dot(Y, Q) + 2 * rng.randn(n_samples, n_features) + 1
    X *= 1000
    yield X, Y

    # Data set where one of the features is constaint
    X, Y = load_linnerud(return_X_y=True)
    # causes X[:, -1].std() to be zero
    X[:, -1] = 1.0
    yield X, Y

    X = np.array([[0., 0., 1.],
                  [1., 0., 0.],
                  [2., 2., 2.],
                  [3., 5., 4.]])
    Y = np.array([[0.1, -0.2],
                  [0.9, 1.1],
                  [6.2, 5.9],
                  [11.9, 12.3]])
    yield X, Y

    # Seeds that provide a non-regression test for #18746, where CCA fails
    seeds = [530, 741]
    for seed in seeds:
        rng = np.random.RandomState(seed)
        X = rng.randn(4, 3)
        Y = rng.randn(4, 2)
        yield X, Y


@pytest.mark.parametrize('Est', (CCA, PLSCanonical, PLSRegression, PLSSVD))
@pytest.mark.parametrize('X, Y', _generate_test_scale_and_stability_datasets())
def test_scale_and_stability(Est, X, Y):
    """scale=True is equivalent to scale=False on centered/scaled data
    This allows to check numerical stability over platforms as well"""

    X_s, Y_s, *_ = _center_scale_xy(X, Y)

    X_score, Y_score = Est(scale=True).fit_transform(X, Y)
    X_s_score, Y_s_score = Est(scale=False).fit_transform(X_s, Y_s)

    assert_allclose(X_s_score, X_score, atol=1e-4)
    assert_allclose(Y_s_score, Y_score, atol=1e-4)


@pytest.mark.parametrize('Est', (PLSSVD, PLSCanonical, CCA))
@pytest.mark.parametrize('n_components', (0, 4))
def test_n_components_bounds(Est, n_components):
    # n_components should be in [1, min(n_samples, n_features, n_targets)]
    # TODO: catch error instead of warning in 1.1
    rng = np.random.RandomState(0)
    X = rng.randn(10, 5)
    Y = rng.randn(10, 3)
    est = Est(n_components=n_components)
    with pytest.warns(FutureWarning,
                      match="n_components=3 will be used instead"):
        est.fit(X, Y)
        # make sure upper bound of rank is used as a fallback
        assert est.transform(X).shape[1] == 3


@pytest.mark.parametrize('n_components', (0, 6))
def test_n_components_bounds_pls_regression(n_components):
    # For PLSRegression, the upper bound for n_components is n_features
    # TODO: catch error instead of warning in 1.1
    rng = np.random.RandomState(0)
    X = rng.randn(10, 5)
    Y = rng.randn(10, 3)
    est = PLSRegression(n_components=n_components)
    with pytest.warns(FutureWarning,
                      match="n_components=5 will be used instead"):
        est.fit(X, Y)
        # make sure upper bound of rank is used as a fallback
        assert est.transform(X).shape[1] == 5


@pytest.mark.parametrize('Est', (PLSSVD, CCA, PLSCanonical))
def test_scores_deprecations(Est):
    # Make sure x_scores_ and y_scores_ are deprecated.
    # It's not deprecated for PLSRegression because y_score_ is different from
    # transform(Y_train)
    # TODO: remove attributes and test in 1.1
    rng = np.random.RandomState(0)
    X = rng.randn(10, 5)
    Y = rng.randn(10, 3)
    est = Est().fit(X, Y)
    with pytest.warns(FutureWarning, match="x_scores_ was deprecated"):
        assert_allclose(est.x_scores_, est.transform(X))
    with pytest.warns(FutureWarning, match="y_scores_ was deprecated"):
        assert_allclose(est.y_scores_, est.transform(X, Y)[1])


@pytest.mark.parametrize('Est', (PLSRegression, PLSCanonical, CCA))
def test_norm_y_weights_deprecation(Est):
    rng = np.random.RandomState(0)
    X = rng.randn(10, 5)
    Y = rng.randn(10, 3)
    est = Est().fit(X, Y)
    with pytest.warns(FutureWarning, match="norm_y_weights was deprecated"):
        est.norm_y_weights


# TODO: Remove test in 1.1
@pytest.mark.parametrize('Estimator',
                         (PLSRegression, PLSCanonical, CCA, PLSSVD))
@pytest.mark.parametrize('attribute',
                         ("x_mean_", "y_mean_", "x_std_", "y_std_"))
def test_mean_and_std_deprecation(Estimator, attribute):
    rng = np.random.RandomState(0)
    X = rng.randn(10, 5)
    Y = rng.randn(10, 3)
    estimator = Estimator().fit(X, Y)
    with pytest.warns(FutureWarning, match=f"{attribute} was deprecated"):
        getattr(estimator, attribute)


@pytest.mark.parametrize('n_samples, n_features', [(100, 10), (100, 200)])
@pytest.mark.parametrize('seed', range(10))
def test_singular_value_helpers(n_samples, n_features, seed):
    # Make sure SVD and power method give approximately the same results
    X, Y = make_regression(n_samples, n_features, n_targets=5,
                           random_state=seed)
    u1, v1, _ = _get_first_singular_vectors_power_method(X, Y,
                                                         norm_y_weights=True)
    u2, v2 = _get_first_singular_vectors_svd(X, Y)

    _svd_flip_1d(u1, v1)
    _svd_flip_1d(u2, v2)

    rtol = 1e-1
    assert_allclose(u1, u2, rtol=rtol)
    assert_allclose(v1, v2, rtol=rtol)


def test_one_component_equivalence():
    # PLSSVD, PLSRegression and PLSCanonical should all be equivalent when
    # n_components is 1
    X, Y = make_regression(100, 10, n_targets=5, random_state=0)
    svd = PLSSVD(n_components=1).fit(X, Y).transform(X)
    reg = PLSRegression(n_components=1).fit(X, Y).transform(X)
    canonical = PLSCanonical(n_components=1).fit(X, Y).transform(X)

    assert_allclose(svd, reg, rtol=1e-2)
    assert_allclose(svd, canonical, rtol=1e-2)


def test_svd_flip_1d():
    # Make sure svd_flip_1d is equivalent to svd_flip
    u = np.array([1, -4, 2])
    v = np.array([1, 2, 3])

    u_expected, v_expected = svd_flip(u.reshape(-1, 1), v.reshape(1, -1))
    _svd_flip_1d(u, v)  # inplace

    assert_allclose(u, u_expected.ravel())
    assert_allclose(u, [-1, 4, -2])

    assert_allclose(v, v_expected.ravel())
    assert_allclose(v, [-1, -2, -3])


def test_loadings_converges():
    """Test that CCA converges. Non-regression test for #19549."""
    X, y = make_regression(n_samples=200, n_features=20, n_targets=20,
                           random_state=20)

    cca = CCA(n_components=10, max_iter=500)

    with pytest.warns(None) as record:
        cca.fit(X, y)
    # ConvergenceWarning is not raised
    assert not record

    # Loadings converges to reasonable values
    assert np.all(np.abs(cca.x_loadings_) < 1)


def test_pls_constant_y():
    """Checks warning when y is constant. Non-regression test for #19831"""
    rng = np.random.RandomState(42)
    x = rng.rand(100, 3)
    y = np.zeros(100)

    pls = PLSRegression()

    msg = "Y residual is constant at iteration"
    with pytest.warns(UserWarning, match=msg):
        pls.fit(x, y)

    assert_allclose(pls.x_rotations_, 0)