llr for significance testing ab test subgroups

var('x1', 'impact', 'n1', 'b1', 'n2', 'b2') #impact is the impact of global the conversion rate by going from A to B #x1 is the ln-odds of conversion in A in this segment that maximizes the liklihood and satisfies x2 = x1 + impact #x2 is similar to x1 except for in B #n1 number of conversions in this segment in A #n2 number of conversions in this segment in B #b1 number of non-conversions in this segment in A #b2 number of non-conversions in this segment in B #sagepy needs help so substituting by expx1 = e^x1 by hand var('expx1') expx2 = exp(log(expx1) + impact) p1 = expx1 / (1 + expx1) p2 = expx2 / (1 + expx2) ll1 = n1*log(p1) + (b1)*log(1 - p1) ll2 = n2*log(p2) + (b2)*log(1 - p2) ll1 + ll2 
       
n1*log(expx1/(expx1 + 1)) + b1*log(-expx1/(expx1 + 1) + 1) +
n2*log(e^(impact + log(expx1))/(e^(impact + log(expx1)) + 1)) +
b2*log(-e^(impact + log(expx1))/(e^(impact + log(expx1)) + 1) + 1)
n1*log(expx1/(expx1 + 1)) + b1*log(-expx1/(expx1 + 1) + 1) + n2*log(e^(impact + log(expx1))/(e^(impact + log(expx1)) + 1)) + b2*log(-e^(impact + log(expx1))/(e^(impact + log(expx1)) + 1) + 1)
d = derivative(ll1 + ll2, expx1)*expx1 # chain rule d 
       
(n2*(e^(impact + log(expx1))/(expx1*(e^(impact + log(expx1)) + 1)) -
e^(2*impact + 2*log(expx1))/(expx1*(e^(impact + log(expx1)) +
1)^2))*(e^(impact + log(expx1)) + 1)*e^(-impact - log(expx1)) + (expx1 +
1)*n1*(1/(expx1 + 1) - expx1/(expx1 + 1)^2)/expx1 + b1*(1/(expx1 + 1) -
expx1/(expx1 + 1)^2)/(expx1/(expx1 + 1) - 1) + b2*(e^(impact +
log(expx1))/(expx1*(e^(impact + log(expx1)) + 1)) - e^(2*impact +
2*log(expx1))/(expx1*(e^(impact + log(expx1)) + 1)^2))/(e^(impact +
log(expx1))/(e^(impact + log(expx1)) + 1) - 1))*expx1
(n2*(e^(impact + log(expx1))/(expx1*(e^(impact + log(expx1)) + 1)) - e^(2*impact + 2*log(expx1))/(expx1*(e^(impact + log(expx1)) + 1)^2))*(e^(impact + log(expx1)) + 1)*e^(-impact - log(expx1)) + (expx1 + 1)*n1*(1/(expx1 + 1) - expx1/(expx1 + 1)^2)/expx1 + b1*(1/(expx1 + 1) - expx1/(expx1 + 1)^2)/(expx1/(expx1 + 1) - 1) + b2*(e^(impact + log(expx1))/(expx1*(e^(impact + log(expx1)) + 1)) - e^(2*impact + 2*log(expx1))/(expx1*(e^(impact + log(expx1)) + 1)^2))/(e^(impact + log(expx1))/(e^(impact + log(expx1)) + 1) - 1))*expx1
f = solve(d, expx1) f 
       
[expx1 == -1/2*(b2*e^impact - n1*e^impact + b1 - n2 +
sqrt(b2^2*e^(2*impact) + n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 -
2*(b2*e^(2*impact) - (b1 + 2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact
+ n1*e^impact - b1)*n2 + n2^2))*e^(-impact)/(b1 + b2), expx1 ==
-1/2*(b2*e^impact - n1*e^impact + b1 - n2 - sqrt(b2^2*e^(2*impact) +
n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 +
2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 +
n2^2))*e^(-impact)/(b1 + b2)]
[expx1 == -1/2*(b2*e^impact - n1*e^impact + b1 - n2 + sqrt(b2^2*e^(2*impact) + n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 + 2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 + n2^2))*e^(-impact)/(b1 + b2), expx1 == -1/2*(b2*e^impact - n1*e^impact + b1 - n2 - sqrt(b2^2*e^(2*impact) + n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 + 2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 + n2^2))*e^(-impact)/(b1 + b2)]
g(expx1) = ll1 + ll2 h(n1, b1, n2, b2, impact) = g(f[1].right()) simplify(h(n1, b1, n2, b2, impact)) 
       
n1*log((b2*e^impact - n1*e^impact + b1 - n2 - sqrt(b2^2*e^(2*impact) +
n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 +
2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 +
n2^2))*e^(-impact)/((b1 + b2)*((b2*e^impact - n1*e^impact + b1 - n2 -
sqrt(b2^2*e^(2*impact) + n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 -
2*(b2*e^(2*impact) - (b1 + 2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact
+ n1*e^impact - b1)*n2 + n2^2))*e^(-impact)/(b1 + b2) - 2))) +
b1*log(-(b2*e^impact - n1*e^impact + b1 - n2 - sqrt(b2^2*e^(2*impact) +
n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 +
2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 +
n2^2))*e^(-impact)/((b1 + b2)*((b2*e^impact - n1*e^impact + b1 - n2 -
sqrt(b2^2*e^(2*impact) + n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 -
2*(b2*e^(2*impact) - (b1 + 2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact
+ n1*e^impact - b1)*n2 + n2^2))*e^(-impact)/(b1 + b2) - 2)) + 1) +
n2*log((b2*e^impact - n1*e^impact + b1 - n2 - sqrt(b2^2*e^(2*impact) +
n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 +
2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 +
n2^2))/((b1 + b2)*((b2*e^impact - n1*e^impact + b1 - n2 -
sqrt(b2^2*e^(2*impact) + n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 -
2*(b2*e^(2*impact) - (b1 + 2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact
+ n1*e^impact - b1)*n2 + n2^2))/(b1 + b2) - 2))) + b2*log(-(b2*e^impact
- n1*e^impact + b1 - n2 - sqrt(b2^2*e^(2*impact) + n1^2*e^(2*impact) +
2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 + 2*b2)*e^impact)*n1
+ 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 + n2^2))/((b1 +
b2)*((b2*e^impact - n1*e^impact + b1 - n2 - sqrt(b2^2*e^(2*impact) +
n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 +
2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 +
n2^2))/(b1 + b2) - 2)) + 1)
n1*log((b2*e^impact - n1*e^impact + b1 - n2 - sqrt(b2^2*e^(2*impact) + n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 + 2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 + n2^2))*e^(-impact)/((b1 + b2)*((b2*e^impact - n1*e^impact + b1 - n2 - sqrt(b2^2*e^(2*impact) + n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 + 2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 + n2^2))*e^(-impact)/(b1 + b2) - 2))) + b1*log(-(b2*e^impact - n1*e^impact + b1 - n2 - sqrt(b2^2*e^(2*impact) + n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 + 2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 + n2^2))*e^(-impact)/((b1 + b2)*((b2*e^impact - n1*e^impact + b1 - n2 - sqrt(b2^2*e^(2*impact) + n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 + 2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 + n2^2))*e^(-impact)/(b1 + b2) - 2)) + 1) + n2*log((b2*e^impact - n1*e^impact + b1 - n2 - sqrt(b2^2*e^(2*impact) + n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 + 2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 + n2^2))/((b1 + b2)*((b2*e^impact - n1*e^impact + b1 - n2 - sqrt(b2^2*e^(2*impact) + n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 + 2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 + n2^2))/(b1 + b2) - 2))) + b2*log(-(b2*e^impact - n1*e^impact + b1 - n2 - sqrt(b2^2*e^(2*impact) + n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 + 2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 + n2^2))/((b1 + b2)*((b2*e^impact - n1*e^impact + b1 - n2 - sqrt(b2^2*e^(2*impact) + n1^2*e^(2*impact) + 2*b1*b2*e^impact + b1^2 - 2*(b2*e^(2*impact) - (b1 + 2*b2)*e^impact)*n1 + 2*((2*b1 + b2)*e^impact + n1*e^impact - b1)*n2 + n2^2))/(b1 + b2) - 2)) + 1)
def ll(n, b): p = n / (n + b) return n*log(p) + b*log(1 - p) def signif_for_2_way_split(n1a, b1a, n1b, b1b, n2a, b2a, n2b, b2b): t1 = n1a + b1a + n1b + b1b t2 = n2a + b2a + n2b + b2b p1a = n1a / (n1a + b1a) p1b = n1b / (n1b + b1b) p2a = n2a / (n2a + b2a) p2b = n2b / (n2b + b2b) pa = (p1a * t1 + p2a * t2) / (t1 + t2) pb = (p1b * t1 + p2b * t2) / (t1 + t2) x = ((log(p1b) - log(1 - p1b) - (log(p1a) - log(1 - p1a)))*t1 + (log(p2b) - log(1 - p2b) - (log(p2a) - log(1 - p2a)))*t2) / (t1 + t2) return ( ll(n1a, b1a) + ll(n1b, b1b) - h(n1a, b1a, n1b, b1b, x) + ll(n2a, b2a)+ ll(n2b, b2b) - h(n2a, b2a, n2b, b2b, x) ) 
       
t1a = 20000 t1b = 30000 t2a = 10000 t2b = 50000 odds1a = 0.2 pp1a = odds1a / (1 + odds1a) odds1b = 0.4 pp1b = odds1b / (1 + odds1b) odds2a = 0.8 pp2a = odds2a / (1 + odds2a) odds2b = 1.2 pp2b = odds2b / (1 + odds2b) nn1a = int(pp1a*t1a) bb1a = float(t1a - nn1a) nn1b = int(pp1b*t1b) bb1b = float(t1b - nn1b) nn2a = int(pp2a*t2a) bb2a = float(t2a - nn2a) nn2b = int(pp2b*t2b) bb2b = float(t2b - nn2b) [nn1a, bb1a, nn1b, bb1b, nn2a, bb2a, nn2b, bb2b, pp1a, signif_for_2_way_split(nn1a, bb1a, nn1b, bb1b, nn2a, bb2a, nn2b, bb2b)] 
       
[3333, 16667.0, 8571, 21429.0, 4444, 5556.0, 27272, 22728.0,
0.166666666666667, 41.36653779781773]
[3333, 16667.0, 8571, 21429.0, 4444, 5556.0, 27272, 22728.0, 0.166666666666667, 41.36653779781773]