visit
{
'09/09/2023': [13, 17, 24, 30, 35, 37],
'07/09/2023': [7, 17, 19, 25, 35, 37],
'05/09/2023': [2, 3, 5, 9, 36, 37],
'02/09/2023': [4, 12, 22, 27, 30, 34],
'29/08/2023': [6, 8, 15, 19, 26, 31],
'26/08/2023': [6, 7, 14, 21, 25, 34],
'22/08/2023': [2, 6, 10, 23, 24, 29],
...
}
numbers = np.array(list(lotto.values())).flatten()
[13, 17, 24, 30, 35, 37, 7, 17, 19, 25, 35, 37, 2, 3, 5, 9, 36, ...]
count = np.bincount(numbers)[1:]
[268, 256, 257, 242, 255, 273, 247, 277, 260, 267, 289, 294,
271, 239, 254, 255, 263, 243, 246, 271, 265, 254, 252, 243,
291, 271, 258, 264, 275, 258, 251, 244, 263, 256, 267, 251, 264]
It appears that the distribution of numbers in the lottery results is relatively even. To further confirm this, we can conduct a test to validate the evenness of the distribution.
def chi2(data, size, expect, p_value = 0.05):
pl = size * 1/expect
df = expect - 1
x2_crit_1 = stats.chi2.ppf(p_value, df)
x2_crit_2 = stats.chi2.ppf(1 - p_value, df)
x2 = 0
for i in range(expect):
x2 += ((data[i] - pl) ** 2)/pl
accepted = x2_crit_1 < x2 < x2_crit_2 if x2_crit_1 < x2_crit_2 else x2_crit_2 < x2 < x2_crit_1
return x2, accepted
This function returns the tuple consisting of the chi-squared statistic and result that equiprobability accepted with the probability 1 - 2 * p-value
, i.e., extreme values of this discrete uniform distribution have low probability.
N = 37
chi2(count, len(numbers), N)
(25.0748, True)
from scipy import stats
chi2_statistic, p_value = stats.chisquare(count)
(25.074, 0.96053)
from itertools import combinations
pairs = list(combinations(range(1, N), 2))
pairs_count = np.zeros([N] * 2, dtype=int)
for pair in pairs:
for draw in lotto.values():
if pair[0] in draw and pair[1] in draw:
pairs_count[pair[0]][pair[1]] += 1
pairs_count = pairs_count[1:, 1:]
counts = pairs_count.flatten()
counts = counts[counts > 0]
chi2(counts, sum(counts), len(counts))
(589.2721893491138, True)
chi2_statistic, p_value = stats.chisquare(counts)
(589.2721893491124, 0.8698507423203673)
comb3 = list(combinations(range(1, N), 3))
comb3_count = np.zeros([N] * 3, dtype=int)
for comb in comb3:
for draw in lotto.values():
contains = comb[0] in draw and comb[1] in draw and comb[2] in draw
if contains:
comb3_count[comb[0]][comb[1]][comb[2]] += 1
comb3_count = comb3_count[1:, 1:, 1:]
counts = comb3_count.flatten()
counts = counts[counts > 0]
chi2(counts, sum(counts), len(counts))
(6457.575829383709, False)
6457.575829383709 < 6840.049842653838
chi2_statistic, p_value = stats.chisquare(counts)
(6457.575829383886, 0.9999997038479482)
count.argmax() or list(count).index(max(count))
11
year_result = dict()
for year in range(2009, 2024):
new_dict = {k:v for (k,v) in lotto.items() if str(year) in k}
year_result[year] = np.bincount(np.array(list(new_dict.values())).flatten())[1:].argmax()
{
2009: 16,
2010: 10,
2011: 11,
2012: 24,
2013: 32,
2014: 34,
2015: 21,
2016: 25,
2017: 5,
2018: 10,
2019: 24,
2020: 11,
2021: 12,
2022: 14,
2023: 11
}
year_result = dict()
arr = []
for year in range(2009, 2024):
new_dict = {k:v for (k,v) in lotto.items() if str(year) in k}
arr += list(np.array(list(new_dict.values())).flatten())
year_result['2009 - ' + str(year) if year > 2009 else str(year)] = np.bincount(arr)[1:].argmax()
{
'2009': 16,
'2009 - 2010': 10,
'2009 - 2011': 11,
'2009 - 2012': 20,
'2009 - 2013': 20,
'2009 - 2014': 20,
'2009 - 2015': 34,
'2009 - 2016': 20,
'2009 - 2017': 10,
'2009 - 2018': 10,
'2009 - 2019': 10,
'2009 - 2020': 10,
'2009 - 2021': 10,
'2009 - 2022': 24,
'2009 - 2023': 11
}
lotto_counts = {}
for k, v in lotto.items():
v_str = str(v)
if v_str in lotto_counts:
lotto_counts[v_str] += [k]
else:
lotto_counts[v_str] = [k]
result = {k: v for k, v in lotto_counts.items() if len(lotto_counts[k]) > 1}
{
'[13, 14, 26, 32, 33, 36]': ['16/10/2010', '21/09/2010']
}