In [ ]:
import random
import pandas as pd
In [ ]:
villes = ['Bordeaux', 'Paris', 'Marseille', 'Nantes', 'Lille']

# mise en place du 'weights' porté sur la fonction random
weights = [1, 4, 2, 2, 5]

# liste avec random & 'weights' qui influe directement sur le caractère aléatoire
col_villes_weights = random.choices(villes, weights = weights, k=10000)

# liste sans 'weights' 
col_villes = random.choices(villes, k=10000)

# création des dataframes pandas
df_weights = pd.DataFrame({'Villes':col_villes_weights})
df_without_weights = pd.DataFrame({'Villes':col_villes})


# un groupby pour vérifier la distribution aléatoire :
percent_cities_weights = df_weights.groupby('Villes', as_index=False).agg(num_weights = ('Villes','count'),
                                                          percent_weights = ('Villes', lambda x : f"{round(x.count()/len(df_weights)*100, 2)} %")
                                                          )
percent_cities_without_weights = df_without_weights.groupby('Villes', as_index=False).agg(num_without_weights = ('Villes','count'),
                                                          percent_without_weights = ('Villes', lambda x : f"{round(x.count()/len(df_without_weights)*100, 2)} %")
                                                          )



demo = percent_cities_weights.merge(percent_cities_without_weights, how = 'inner', on='Villes' )
display(demo)
Villes num_weights percent_weights num_without_weights percent_without_weights
0 Bordeaux 678 6.78 % 2063 20.63 %
1 Lille 3625 36.25 % 1982 19.82 %
2 Marseille 1484 14.84 % 2027 20.27 %
3 Nantes 1395 13.95 % 1930 19.3 %
4 Paris 2818 28.18 % 1998 19.98 %