import numpy as np
import matplotlib.pyplot as plt
total_pop = 1000
sample_size = int(0.2 * total_pop)
population = np.random.uniform(0, 10, total_pop)
mean = population.mean()
mean_ests = []
deduplicated = []
for i in range(10000):
mean_ests.append(
np.random.choice(population, size=sample_size, replace=True).mean()
)
dedupe_sample = list(set(np.random.randint(0, 100, size=sample_size)))
deduplicated.append(population[dedupe_sample].mean())
plt.hist(mean_ests, label="bootstrap", bins=50)
plt.hist(deduplicated, label="dedupe", bins=50)
plt.axvline(mean, color="black", label="true mean")
plt.legend()
plt.show()