Coverage for model_workflow/utils/bibitransformer

1import numpy as np

2from sklearn.base import TransformerMixin

3from sklearn import mixture

6class BiBiTransformer(TransformerMixin):

7 """

8 Obtain binormality/bimodality information for a given a distribution.

9 This is accomplished by fitting the dataset to two Gaussian Mixture models with one and two components, respectively.

10 Then the BIC score together with Bayes Factor is used to assert binormality, and a modification of Helguero's theorem to assert bimodality.

11 Other parameters such as means, variances and weights are given for both distributions.

13 :param float confidence_level: confidence level to use in Bayes Factor when determining binormality. (Default 5.0)

14 :param **kwargs: other arguments to be passed to sklearn.mixture.GaussianMixture

16 :raises ValueError: if ``confidence_level`` is not between 0 and 100.

17 """

19 def __init__(self, confidence_level=5.0, **kwargs):

20 self.models = self.GMM_models(**kwargs)

22 if confidence_level < 0 and confidence_level > 100:

23 raise ValueError("confidence_level must be between 0 and 100")

24 self.confidence_level = confidence_level

26 def GMM_models(self, **kwargs):

27 """

28 Create two GaussianMixture models with the same hyperparameters,

29 one with one component and the other with two components.

31 :return: tuple of both Gaussian Mixture models

32 """

33 gmm_1 = mixture.GaussianMixture(

34 n_components=1,

35 **kwargs)

36 gmm_2 = mixture.GaussianMixture(

37 n_components=2,

38 **kwargs)

39 return gmm_1, gmm_2

41 def fit(self, X):

42 """Fits both models to the same data.

44 :param X: array of shape (n_samples, n_features)"""

45 self.X = X

46 self.models = tuple(map(

47 lambda model: model.fit(X),

48 self.models))

49 return self

51 def transform(self, X, y=None):

52 """Get parameters describing the distribution fitted to model with lowest BIC value.

53 The information returned is:

54 bimodality

55 binormality/uninormality/insuficient evidence

56 BIC values

57 mean(s)

58 variance(s)

59 weight(s)

61 :param X: array of shape (n_samples, n_features)

63 :return: Dictionary with distribution information."""

64 bics = []

65 means = []

66 variances = []

67 weights = []

68 for gmm in self.models:

69 m = gmm.means_.flatten()

70 v = gmm.covariances_.flatten()

71 b = gmm.bic(X)

72 w = gmm.weights_.flatten()

73 means.append(m)

74 variances.append(v)

75 bics.append(b)

76 weights.append(w)

78 # bayes factor criteria for normality

79 uninormal, binormal, insuf_ev, p = self.bayes_factor_criteria(bics[0],bics[1])

80 if binormal:

81 maxm = np.argmax(means[1])

82 minm = np.argmin(means[1])

83 mean1 = means[1][minm]

84 var1 = variances[1][minm]

85 w1 = weights[1][minm]

86 mean2 = means[1][maxm]

87 var2 = variances[1][maxm]

88 w2 = weights[1][maxm]

89 # Helgueros theorem for unimodality

90 unimodal = self.is_unimodal(

91 [mean1, mean2], [var1, var2])

92 else:

93 mean1 = means[0][0]

94 var1 = variances[0][0]

95 w1 = weights[0][0]

96 mean2, var2, w2 = np.nan, np.nan, 0

97 unimodal = True

98 info = dict(

99 binormal=binormal,

100 uninormal=uninormal,

101 insuf_ev=insuf_ev,

102 unimodal=unimodal,

103 bics=bics,

104 p=p,

105 mean1=mean1,

106 mean2=mean2,

107 var1=var1,

108 var2=var2,

109 w1=w1,

110 w2=w2)

111 return info

112

113 def bayes_factor_criteria(self, bics0, bics1):

114 """

115 Determine uninormality, bimodality and insufficient evidence parameters from Bayes Factor.

116

117 :param bics0: BIC value for 1-component model

118 :param bics1: BIC value for 2-component model

119

120 :return: Tuple with uninormal, binormal and insuficient evidence boolean values.

121 """

122 diff_bic = bics1 - bics0

123 # probability of a two-component model

124 p = 1 / (1+np.exp(0.5*diff_bic))

125 if p == np.nan:

126 if bics0 == np.nan:

127 p = 1

128 elif bics1 == np.nan:

129 p = 0

130

131 uninormal = p < (self.confidence_level / 100)

132 binormal = p > (1 - (self.confidence_level / 100))

133 insuf_ev = True if (not uninormal and not binormal) else False

134 return uninormal, binormal, insuf_ev, p

135

136 def is_unimodal(self, means, vars):

137 """implements P.Dans version of Helguero's theorem in order to detect unimodality.

138

139 :param means: array with values of means for both fitted models.

140 :param vars: array with values of variances for both fitted models.

141

142 :return: unimodal boolean value."""

143 r = vars[0] / vars[1]

144

145 # separation factor

146 s = np.sqrt(

147 -2 + 3*r + 3*r**2 - 2*r**3 + 2*(1 - r + r**2)**1.5

148 ) / (

149 np.sqrt(r)*(1+np.sqrt(r))

150 )

151

152 unimodal = abs(means[1]-means[0]) <= s * \

153 (np.sqrt(vars[0]) + np.sqrt(vars[1]))

154 return unimodal

Coverage for model_workflow/utils/bibitransformer_nassa.py: 92%

66 statements