import random
import pandas as pd


villes = ['Bordeaux', 'Paris', 'Marseille', 'Nantes', 'Lille']

# mise en place du 'weights' porté sur la fonction random
weights = [1, 4, 2, 2, 5]

# liste avec random & 'weights' qui influe directement sur le caractère aléatoire
col_villes_weights = random.choices(villes, weights = weights, k=10000)

# liste sans 'weights' 
col_villes = random.choices(villes, k=10000)

# création des dataframes pandas
df_weights = pd.DataFrame({'Villes':col_villes_weights})
df_without_weights = pd.DataFrame({'Villes':col_villes})


# un groupby pour vérifier la distribution aléatoire :
percent_cities_weights = df_weights.groupby('Villes', as_index=False).agg(num_weights = ('Villes','count'),
                                                          percent_weights = ('Villes', lambda x : f"{round(x.count()/len(df_weights)*100, 2)} %")
                                                          )
percent_cities_without_weights = df_without_weights.groupby('Villes', as_index=False).agg(num_without_weights = ('Villes','count'),
                                                          percent_without_weights = ('Villes', lambda x : f"{round(x.count()/len(df_without_weights)*100, 2)} %")
                                                          )



demo = percent_cities_weights.merge(percent_cities_without_weights, how = 'inner', on='Villes' )
display(demo)

	Villes	num_weights	percent_weights	num_without_weights	percent_without_weights
0	Bordeaux	678	6.78 %	2063	20.63 %
1	Lille	3625	36.25 %	1982	19.82 %
2	Marseille	1484	14.84 %	2027	20.27 %
3	Nantes	1395	13.95 %	1930	19.3 %
4	Paris	2818	28.18 %	1998	19.98 %