Extracting and analyzing data form idescat.com
#
Requirements#
import requests
import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
sns.set_style("whitegrid")
Maria’s analysis in Catalonia#
Extracting and processing the data#
years = ['2018', '2019', '2020', '2021', '2022']
Maria_abs_freq, Maria_total_position, Maria_girls_position, Maria_total_percent, Maria_girls_percent = [], [], [], [], []
for year in years :
url = f'https://api.idescat.cat/onomastica/v1/nadons/dades.json?id=40683&t={year}&lang=es'
response = requests.get(url)
data_json = response.json()
Maria_abs_freq.append(data_json['onomastica_nadons']['ff']['f']['pos1']['v']) # abs freq name Maria over the names of childs born in Catalonia in {year}
Maria_total_position.append(data_json['onomastica_nadons']['ff']['f']['rank']['total']) # rank of name Maria over the names of childs born in Catalonia in {year}
Maria_girls_position.append(data_json['onomastica_nadons']['ff']['f']['rank']['sex']) # rank of name Maria over the names of girls born in Catalonia in {year}
Maria_total_percent.append(data_json['onomastica_nadons']['ff']['f']['pos1']['w']['total']) # % of childs born in Catalonia in {year} whose name is Maria
Maria_girls_percent.append(data_json['onomastica_nadons']['ff']['f']['pos1']['w']['sex']) # % of girls born in Catalonia in {year} whose name is Maria
Maria_dict = dict()
Maria_dict['years'] = years
Maria_dict['Maria_abs_freq'] = Maria_abs_freq
Maria_dict['Maria_total_position'] = Maria_total_position
Maria_dict['Maria_girls_position'] = Maria_girls_position
Maria_dict['Maria_total_percent'] = Maria_total_percent
Maria_dict['Maria_girls_percent'] = Maria_girls_percent
Maria_df = pd.DataFrame(Maria_dict)
for col_name in Maria_df.columns :
if col_name in ['Maria_abs_freq', 'Maria_total_position', 'Maria_girls_position'] :
Maria_df[col_name] = Maria_df[col_name].astype('int64')
elif col_name in ['Maria_total_percent', 'Maria_girls_percent'] :
Maria_df[col_name] = Maria_df[col_name].astype('float64')
# Data-frame with the data
print('Maria\'s analysis in Catalonia\n')
display(Maria_df)
Maria's analysis in Catalonia
years | Maria_abs_freq | Maria_total_position | Maria_girls_position | Maria_total_percent | Maria_girls_percent | |
---|---|---|---|---|---|---|
0 | 2018 | 466 | 8 | 4 | 7.38 | 15.15 |
1 | 2019 | 387 | 13 | 6 | 6.36 | 13.00 |
2 | 2020 | 318 | 20 | 10 | 5.49 | 11.23 |
3 | 2021 | 327 | 19 | 8 | 5.67 | 11.65 |
4 | 2022 | 267 | 28 | 13 | 4.74 | 9.83 |
Plotting the results#
selected_columns = [x for x in Maria_df.columns if x != 'years']
titles = ["Absolute Frequencies Maria in Catalonia", "Ranking: Maria total position in Catalonia",
"Ranking: Maria girls position in Catalonia", "Maria total % in Catalonia", "Maria girls % in Catalonia"]
# Define the number of rows and columns for the matrix plot
num_cols = 2 # You can adjust the number of columns as needed
num_rows = int(np.ceil(len(selected_columns) / num_cols))
# Create a subplot with the specified number of rows and columns
fig, axes = plt.subplots(num_rows, num_cols, figsize=(12, 10))
# Flatten the axes array to make it easier to iterate
axes = axes.flatten()
colors = sns.color_palette("tab10", len(selected_columns))
# Loop through each 'geo' and create a subplot in the matrix
for (i, col_name), color, title in zip(enumerate(selected_columns), colors, titles) :
ax = axes[i] # Get the current axis
sns.barplot(x="years", y=col_name, data=Maria_df, color=color, ax=ax)
ax.set_title(f"{title}", fontsize=13)
xticks_index = np.arange(0, len(Maria_df), 1)
ax.set_xticks(xticks_index)
ax.tick_params(axis='x', rotation=0)
ax.set_xlabel('Year')
ax.set_ylabel('')
# Remove any unused subplots in case the number of 'geo' values is less than num_rows * num_cols
for j in range(len(selected_columns), num_rows * num_cols):
fig.delaxes(axes[j])
plt.suptitle('Maria\'s born in Catalonia - Statistical information\n Last five years (2018-2022)', fontsize=15, y=0.98, weight='bold') # Establishing a general tittle for the plot.
plt.subplots_adjust(hspace=0.5, wspace=0.25) # Adjust vertical (hspace) and horizontal (wspace) spacing
fig.savefig('Marias_Catalonia' + '.jpg', format='jpg', dpi=550)
# plt.tight_layout()
plt.show()
Maria’s analysis in Catalonia by city (comarca)#
Extracting and processing the data#
print('Maria\'s analysis in Catalonia by city (comarca)\n')
years = ['2018', '2019', '2020', '2021', '2022']
Maria_df_dict = dict()
for com_id in range(0, 42): # 42 = len(data_json['onomastica_nadons']['ff']['f']) - 1
Maria_abs_freq, Maria_total_position, Maria_girls_position, Maria_total_percent, Maria_girls_percent = [], [], [], [], []
for year in years :
url = f'https://api.idescat.cat/onomastica/v1/nadons/dades.json?id=40683&class=com&t={year}&lang=es'
response = requests.get(url)
data_json = response.json()
Maria_abs_freq.append(data_json['onomastica_nadons']['ff']['f'][com_id]['pos1']['v']) # abs freq name Maria over the names of childs born in comarca {com_id} in {year}
Maria_total_position.append(data_json['onomastica_nadons']['ff']['f'][com_id]['rank']['total']) # rank of name Maria over the names of childs born in comarca {com_id} in {year}
Maria_girls_position.append(data_json['onomastica_nadons']['ff']['f'][com_id]['rank']['sex']) # rank of name Maria over the names of girls born in comarca {com_id} in {year}
Maria_total_percent.append(data_json['onomastica_nadons']['ff']['f'][com_id]['pos1']['w']['total']) # % of childs born in comarca {com_id} in {year} whose name is Maria
Maria_girls_percent.append(data_json['onomastica_nadons']['ff']['f'][com_id]['pos1']['w']['sex']) # % of girls born in comarca {com_id} in {year} whose name is Maria
Maria_dict = dict()
Maria_dict['years'] = years
Maria_dict['Maria_abs_freq'] = Maria_abs_freq
Maria_dict['Maria_total_position'] = Maria_total_position
Maria_dict['Maria_girls_position'] = Maria_girls_position
Maria_dict['Maria_total_percent'] = Maria_total_percent
Maria_dict['Maria_girls_percent'] = Maria_girls_percent
com = data_json['onomastica_nadons']['ff']['f'][com_id]['c']['content']
Maria_df_dict[com] = pd.DataFrame(Maria_dict)
Maria's analysis in Catalonia by city (comarca)
### Catalonia city (comarca) with more Maria\'s born by year
Maria_abs_freq_year = dict()
for year in ['2018', '2019', '2020', '2021', '2022'] :
Maria_abs_freq_city = dict()
for com in Maria_df_dict.keys() :
value = Maria_df_dict[com].loc[Maria_df_dict[com]['years'] == year, 'Maria_abs_freq'].iloc[0]
if value != '_' : # To avoid the cities with missing value.
Maria_abs_freq_city[com] = value
Maria_abs_freq_year[year] = Maria_abs_freq_city
Maria_abs_freq_values = dict()
Maria_abs_freq_index = dict()
city_max_Maria = dict()
years = ['2018', '2019', '2020', '2021', '2022']
for year in years :
Maria_abs_freq_values[year] = np.array([x for x in Maria_abs_freq_year[year].values()], dtype=int)
Maria_abs_freq_index[year] = np.array([x for x in Maria_abs_freq_year[year].keys()])
city_max_Maria[year] = Maria_abs_freq_index[year][np.argmax(Maria_abs_freq_values[year])]
print(f'Catalonia city (comarca) with more Maria\'s born in {year} -->', city_max_Maria[year])
Catalonia city (comarca) with more Maria's born in 2018 --> Barcelonès
Catalonia city (comarca) with more Maria's born in 2019 --> Barcelonès
Catalonia city (comarca) with more Maria's born in 2020 --> Barcelonès
Catalonia city (comarca) with more Maria's born in 2021 --> Barcelonès
Catalonia city (comarca) with more Maria's born in 2022 --> Barcelonès
Plotting the results#
# Define the number of rows and columns for the matrix plot
num_cols = 3 # You can adjust the number of columns as needed
num_rows = int(np.ceil(len(years) / num_cols))
# Create a subplot with the specified number of rows and columns
fig, axes = plt.subplots(num_rows, num_cols, figsize=(12, 13))
# Flatten the axes array to make it easier to iterate
axes = axes.flatten()
colors = sns.color_palette("tab10", len(years))
# Loop through each 'geo' and create a subplot in the matrix
for i, color, year in zip(range(0,len(years)), colors, years) :
ax = axes[i] # Get the current axis
sns.barplot(x=Maria_abs_freq_values[year], y=Maria_abs_freq_index[year], color=color, ax=ax)
ax.set_title(f"{year}", fontsize=13)
ax.tick_params(axis='x', rotation=0)
ax.set_xlabel('Count')
ax.set_ylabel('Cities')
# Remove any unused subplots in case the number of 'geo' values is less than num_rows * num_cols
for j in range(len(years), num_rows * num_cols):
fig.delaxes(axes[j])
plt.suptitle('Absolute Frequencies Maria by Catalonia cities (comarcas)\n Last five years (2018-2022)', fontsize=15, y=0.95, weight='bold') # Establishing a general tittle for the plot.
plt.subplots_adjust(hspace=0.25, wspace=0.8) # Adjust vertical (hspace) and horizontal (wspace) spacing
# fig.savefig('Marias_Areas_Catalonia' + '.jpg', format='jpg', dpi=550)
# plt.tight_layout()
plt.show()
Gender info for the childs born in Catalonia in the last nine years (2014-2022)#
Extracting and processing the data#
def try_float(x):
try:
return float(x)
except ValueError:
return None
url = 'https://www.idescat.cat/indicadors/?id=aec&n=15237&fil=43&lang=en'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table')
columns = table.find(class_="cap")
columns_html = columns.select("thead th")
columns_list = []
for i in range(1,len(columns_html)) :
columns_list.append(columns.select("thead th")[i].text)
columns_list = ['Year'] + columns_list
tr_list = table.select('tr')
useful_info_index = range(1,(len(tr_list)-3))
rows = dict()
for i , r in enumerate(useful_info_index) :
text_data = tr_list[r].text
row_data = text_data.replace('\n', ' ').split()
row_data = [x.replace(',', '.') for x in row_data]
row_data = [try_float(x) for x in row_data]
row_data = [x for x in row_data if x != None]
rows[i] = row_data
df = pd.DataFrame(rows)
df = df.T
df.columns = columns_list
df['Year'] = df['Year'].astype('int')
df['Boys_prop'] = round(df['Boys'] / df['Total'], 3)
df['Girls_prop'] = round(df['Girls'] / df['Total'], 3)
# Data-frame with the data
print('Gender info for the childs born in Catalonia in the last nine years (2014-2022)\n')
display(df)
Gender info for the childs born in Catalonia in the last nine years (2014-2022)
Year | Boys | Girls | Total | Boys_prop | Girls_prop | |
---|---|---|---|---|---|---|
0 | 2022 | 29.187 | 27.195 | 56.382 | 0.518 | 0.482 |
1 | 2021 | 29.604 | 28.030 | 57.634 | 0.514 | 0.486 |
2 | 2020 | 29.832 | 28.632 | 58.464 | 0.510 | 0.490 |
3 | 2019 | 31.453 | 30.095 | 61.548 | 0.511 | 0.489 |
4 | 2018 | 32.623 | 30.943 | 63.566 | 0.513 | 0.487 |
5 | 2017 | 34.462 | 32.341 | 66.803 | 0.516 | 0.484 |
6 | 2016 | 35.435 | 33.539 | 68.974 | 0.514 | 0.486 |
7 | 2015 | 36.406 | 34.044 | 70.450 | 0.517 | 0.483 |
Plotting the results#
selected_columns = [x for x in df.columns if x in ['Boys_prop', 'Girls_prop']]
# Define the number of rows and columns for the matrix plot
num_cols = 2 # You can adjust the number of columns as needed
num_rows = int(np.ceil(len(selected_columns) / num_cols))
# Create a subplot with the specified number of rows and columns
fig, axes = plt.subplots(num_rows, num_cols, figsize=(8, 3))
# Flatten the axes array to make it easier to iterate
axes = axes.flatten()
colors = sns.color_palette("tab10", len(selected_columns))
titles = ['Girls', 'Boys']
# Loop through each 'geo' and create a subplot in the matrix
for i, color, col_name, title in zip(range(0,len(selected_columns)), colors, selected_columns, titles) :
ax = axes[i] # Get the current axis
sns.barplot(x="Year", y=col_name, data=df, color=color, ax=ax)
ax.set_title(f"{title}", fontsize=11)
xticks_index = np.arange(0, len(df), 1)
ax.set_xticks(xticks_index)
ax.tick_params(axis='x', rotation=0, labelsize=9)
ax.tick_params(axis='y', rotation=0, labelsize=9)
ax.set_xlabel('Year')
ax.set_ylabel(' ')
ax.set_ylim([0.45, 0.54])
# Remove any unused subplots in case the number of 'geo' values is less than num_rows * num_cols
for j in range(len(selected_columns), num_rows * num_cols):
fig.delaxes(axes[j])
plt.suptitle('Relative frequencies of the gender of childs born in Catalonia\n Last nine years (2014-2022)', fontsize=12, y=1.1, weight='bold') # Establishing a general tittle for the plot.
plt.subplots_adjust(hspace=0.6, wspace=0.3) # Adjust vertical (hspace) and horizontal (wspace) spacing
# fig.savefig('Gender_born_Catalonia' + '.jpg', format='jpg', dpi=550)
# plt.tight_layout()
plt.show()
Maria’s analysis in Catalonia by province#
Extracting and processing the data#
years = ['2018', '2019', '2020', '2021', '2022']
Maria_df_dict = dict()
for prov_id in range(0, 4): # 4 = len(data_json['onomastica_nadons']['ff']['f']) - 1
Maria_abs_freq, Maria_total_position, Maria_girls_position, Maria_total_percent, Maria_girls_percent = [], [], [], [], []
for year in years :
url = f'https://api.idescat.cat/onomastica/v1/nadons/dades.json?id=40683&class=prov&t={year}&lang=es'
response = requests.get(url)
data_json = response.json()
Maria_abs_freq.append(data_json['onomastica_nadons']['ff']['f'][prov_id]['pos1']['v']) # abs freq name Maria over the names of childs born in province {prov_id} in {year}
Maria_total_position.append(data_json['onomastica_nadons']['ff']['f'][prov_id]['rank']['total']) # rank of name Maria over the names of childs born in province {prov_id} in {year}
Maria_girls_position.append(data_json['onomastica_nadons']['ff']['f'][prov_id]['rank']['sex']) # rank of name Maria over the names of girls born in province {prov_id} in {year}
Maria_total_percent.append(data_json['onomastica_nadons']['ff']['f'][prov_id]['pos1']['w']['total']) # % of childs born in province {prov_id} in {year} whose name is Maria
Maria_girls_percent.append(data_json['onomastica_nadons']['ff']['f'][prov_id]['pos1']['w']['sex']) # % of girls born in province {prov_id} in {year} whose name is Maria
Maria_dict = dict()
Maria_dict['years'] = years
Maria_dict['Maria_abs_freq'] = Maria_abs_freq
Maria_dict['Maria_total_position'] = Maria_total_position
Maria_dict['Maria_girls_position'] = Maria_girls_position
Maria_dict['Maria_total_percent'] = Maria_total_percent
Maria_dict['Maria_girls_percent'] = Maria_girls_percent
prov = data_json['onomastica_nadons']['ff']['f'][prov_id]['c']['content']
Maria_df_dict[prov] = pd.DataFrame(Maria_dict)
print('Maria\'s analysis in Catalonia by province')
for prov in Maria_df_dict.keys():
print('-----------------------------------------------------------------------------------------------')
print(prov)
display(Maria_df_dict[prov])
Maria's analysis in Catalonia by province
-----------------------------------------------------------------------------------------------
Barcelona
years | Maria_abs_freq | Maria_total_position | Maria_girls_position | Maria_total_percent | Maria_girls_percent | |
---|---|---|---|---|---|---|
0 | 2018 | 344 | 11 | 5 | 7.41 | 15.19 |
1 | 2019 | 288 | 14 | 7 | 6.46 | 13.17 |
2 | 2020 | 245 | 19 | 9 | 5.81 | 11.89 |
3 | 2021 | _ | _ | _ | _ | _ |
4 | 2022 | 189 | 33 | 14 | 4.59 | 9.56 |
-----------------------------------------------------------------------------------------------
Girona
years | Maria_abs_freq | Maria_total_position | Maria_girls_position | Maria_total_percent | Maria_girls_percent | |
---|---|---|---|---|---|---|
0 | 2018 | 47 | 6 | 2 | 7.00 | 14.43 |
1 | 2019 | 27 | 31 | 16 | 4.21 | 8.62 |
2 | 2020 | 33 | 21 | 11 | 5.24 | 10.79 |
3 | 2021 | 37 | 12 | 5 | 6.10 | 12.45 |
4 | 2022 | 19 | 47 | 22 | 3.21 | 6.52 |
-----------------------------------------------------------------------------------------------
Lleida
years | Maria_abs_freq | Maria_total_position | Maria_girls_position | Maria_total_percent | Maria_girls_percent | |
---|---|---|---|---|---|---|
0 | 2018 | 17 | 28 | 13 | 4.89 | 10.15 |
1 | 2019 | 23 | 8 | 3 | 6.69 | 13.71 |
2 | 2020 | 8 | 69 | 38 | 2.44 | 4.84 |
3 | 2021 | 17 | 19 | 7 | 5.30 | 11.06 |
4 | 2022 | 19 | 15 | 7 | 5.89 | 12.22 |
-----------------------------------------------------------------------------------------------
Tarragona
years | Maria_abs_freq | Maria_total_position | Maria_girls_position | Maria_total_percent | Maria_girls_percent | |
---|---|---|---|---|---|---|
0 | 2018 | 58 | 4 | 2 | 8.81 | 18.22 |
1 | 2019 | 49 | 6 | 4 | 7.60 | 15.85 |
2 | 2020 | 32 | 19 | 8 | 5.23 | 10.62 |
3 | 2021 | 30 | 26 | 12 | 5.00 | 10.33 |
4 | 2022 | 40 | 10 | 5 | 6.66 | 13.70 |
# Catalonia province with more Maria's born in 2022
Maria_abs_freq_2022 = dict()
for prov in Maria_df_dict.keys():
Maria_abs_freq_2022[prov] = Maria_df_dict[prov].loc[Maria_df_dict[prov]['years'] == '2022', 'Maria_abs_freq'].iloc[0]
Maria_abs_freq_2022_values = np.array([x for x in Maria_abs_freq_2022.values()], dtype=int)
Maria_abs_freq_2022_index = np.array([x for x in Maria_abs_freq_2022.keys()])
prov_max_Maria_2022 = Maria_abs_freq_2022_index[np.argmax(Maria_abs_freq_2022_values)]
print('Catalonia province with more Maria\'s born in 2022 -->', prov_max_Maria_2022)
Catalonia province with more Maria's born in 2022 --> Barcelona
Plotting the results#
Maria_df_dict['Barcelona'] = Maria_df_dict['Barcelona'].drop([3], axis=0)
for prov in Maria_df_dict.keys() :
for col_name in Maria_df_dict[prov].columns :
if col_name in ['Maria_abs_freq', 'Maria_total_position', 'Maria_girls_position'] :
Maria_df_dict[prov][col_name] = Maria_df_dict[prov][col_name].astype('int64')
elif col_name in ['Maria_total_percent', 'Maria_girls_percent'] :
Maria_df_dict[prov][col_name] = Maria_df_dict[prov][col_name].astype('float64')
provinces = Maria_df_dict.keys()
# Define the number of rows and columns for the matrix plot
num_cols = 2 # You can adjust the number of columns as needed
num_rows = int(np.ceil(len(provinces) / num_cols))
# Create a subplot with the specified number of rows and columns
fig, axes = plt.subplots(num_rows, num_cols, figsize=(10, 8))
# Flatten the axes array to make it easier to iterate
axes = axes.flatten()
colors = sns.color_palette("tab10", len(provinces))
# Loop through each 'geo' and create a subplot in the matrix
for i, color, prov in zip(range(0,len(provinces)), colors, provinces) :
ax = axes[i] # Get the current axis
sns.barplot(x="years", y='Maria_abs_freq', data=Maria_df_dict[prov], color=color, ax=ax)
ax.set_title(f"{prov}", fontsize=12)
xticks_index = np.arange(0, len(Maria_df_dict[prov]), 1)
ax.set_xticks(xticks_index)
ax.tick_params(axis='x', rotation=0)
ax.set_xlabel('Year')
ax.set_ylabel('Count')
# Remove any unused subplots in case the number of 'geo' values is less than num_rows * num_cols
for j in range(len(provinces), num_rows * num_cols):
fig.delaxes(axes[j])
plt.suptitle('Absolute Frequencies Maria by Catalonia provinces\n Last five years (2018-2022)', fontsize=15, y=0.98, weight='bold') # Establishing a general tittle for the plot.
plt.subplots_adjust(hspace=0.3, wspace=0.3) # Adjust vertical (hspace) and horizontal (wspace) spacing
# fig.savefig('Marias_Provinces_Catalonia' + '.jpg', format='jpg', dpi=550)
# plt.tight_layout()
plt.show()
Maria’s analysis in Catalonia by area (region)#
Extracting and processing the data#
years = ['2018', '2019', '2020', '2021', '2022']
Maria_df_dict = dict()
for at_id in range(0, 8): # 8 = len(data_json['onomastica_nadons']['ff']['f']) - 1
Maria_abs_freq, Maria_total_position, Maria_girls_position, Maria_total_percent, Maria_girls_percent = [], [], [], [], []
for year in years :
url = f'https://api.idescat.cat/onomastica/v1/nadons/dades.json?id=40683&class=at&t={year}&lang=es'
response = requests.get(url)
data_json = response.json()
Maria_abs_freq.append(data_json['onomastica_nadons']['ff']['f'][at_id]['pos1']['v']) # abs freq name Maria over the names of childs born in comarca {com_id} in {year}
Maria_total_position.append(data_json['onomastica_nadons']['ff']['f'][at_id]['rank']['total']) # rank of name Maria over the names of childs born in comarca {com_id} in {year}
Maria_girls_position.append(data_json['onomastica_nadons']['ff']['f'][at_id]['rank']['sex']) # rank of name Maria over the names of girls born in comarca {com_id} in {year}
Maria_total_percent.append(data_json['onomastica_nadons']['ff']['f'][at_id]['pos1']['w']['total']) # % of childs born in comarca {com_id} in {year} whose name is Maria
Maria_girls_percent.append(data_json['onomastica_nadons']['ff']['f'][at_id]['pos1']['w']['sex']) # % of girls born in comarca {com_id} in {year} whose name is Maria
Maria_dict = dict()
Maria_dict['years'] = years
Maria_dict['Maria_abs_freq'] = Maria_abs_freq
Maria_dict['Maria_total_position'] = Maria_total_position
Maria_dict['Maria_girls_position'] = Maria_girls_position
Maria_dict['Maria_total_percent'] = Maria_total_percent
Maria_dict['Maria_girls_percent'] = Maria_girls_percent
at = data_json['onomastica_nadons']['ff']['f'][at_id]['c']['content']
Maria_df_dict[at] = pd.DataFrame(Maria_dict)
print('Maria\'s analysis in Catalonia by area (region)')
for at in Maria_df_dict.keys():
print('-----------------------------------------------------------------------------------------------')
print(at)
display(Maria_df_dict[at])
Maria's analysis in Catalonia by area (region)
-----------------------------------------------------------------------------------------------
Metropolità
years | Maria_abs_freq | Maria_total_position | Maria_girls_position | Maria_total_percent | Maria_girls_percent | |
---|---|---|---|---|---|---|
0 | 2018 | 308 | 10 | 5 | 7.67 | 15.66 |
1 | 2019 | 246 | 15 | 7 | 6.38 | 13.01 |
2 | 2020 | 210 | 22 | 10 | 5.77 | 11.80 |
3 | 2021 | 211 | 20 | 8 | 5.77 | 11.86 |
4 | 2022 | 164 | 30 | 13 | 4.63 | 9.63 |
-----------------------------------------------------------------------------------------------
Comarques Gironines
years | Maria_abs_freq | Maria_total_position | Maria_girls_position | Maria_total_percent | Maria_girls_percent | |
---|---|---|---|---|---|---|
0 | 2018 | 47 | 6 | 2 | 7.11 | 14.67 |
1 | 2019 | 27 | 31 | 16 | 4.29 | 8.77 |
2 | 2020 | 33 | 21 | 11 | 5.30 | 10.90 |
3 | 2021 | 37 | 12 | 5 | 6.21 | 12.69 |
4 | 2022 | 19 | 44 | 20 | 3.27 | 6.65 |
-----------------------------------------------------------------------------------------------
Camp de Tarragona
years | Maria_abs_freq | Maria_total_position | Maria_girls_position | Maria_total_percent | Maria_girls_percent | |
---|---|---|---|---|---|---|
0 | 2018 | 39 | 5 | 3 | 8.72 | 18.03 |
1 | 2019 | 35 | 3 | 2 | 8.26 | 17.34 |
2 | 2020 | 18 | 28 | 13 | 4.42 | 8.89 |
3 | 2021 | 17 | 37 | 18 | 4.27 | 8.89 |
4 | 2022 | 30 | 5 | 3 | 7.60 | 15.50 |
-----------------------------------------------------------------------------------------------
Terres de l'Ebre
years | Maria_abs_freq | Maria_total_position | Maria_girls_position | Maria_total_percent | Maria_girls_percent | |
---|---|---|---|---|---|---|
0 | 2018 | 14 | 7 | 4 | 10.52 | 21.41 |
1 | 2019 | 11 | 9 | 6 | 8.12 | 16.69 |
2 | 2020 | 5 | 30 | 16 | 4.13 | 8.17 |
3 | 2021 | 10 | 12 | 6 | 8.26 | 16.34 |
4 | 2022 | 5 | 37 | 15 | 4.00 | 8.35 |
-----------------------------------------------------------------------------------------------
Ponent
years | Maria_abs_freq | Maria_total_position | Maria_girls_position | Maria_total_percent | Maria_girls_percent | |
---|---|---|---|---|---|---|
0 | 2018 | 14 | 31 | 13 | 4.65 | 9.60 |
1 | 2019 | 20 | 7 | 4 | 6.78 | 13.93 |
2 | 2020 | 7 | 68 | 34 | 2.46 | 4.87 |
3 | 2021 | 16 | 15 | 5 | 5.76 | 11.98 |
4 | 2022 | 16 | 15 | 8 | 5.71 | 11.85 |
-----------------------------------------------------------------------------------------------
Comarques Centrals
years | Maria_abs_freq | Maria_total_position | Maria_girls_position | Maria_total_percent | Maria_girls_percent | |
---|---|---|---|---|---|---|
0 | 2018 | 22 | 18 | 8 | 6.46 | 13.56 |
1 | 2019 | 29 | 7 | 2 | 8.73 | 17.91 |
2 | 2020 | 17 | 19 | 7 | 5.40 | 11.14 |
3 | 2021 | 17 | 23 | 11 | 5.52 | 11.09 |
4 | 2022 | 17 | 22 | 9 | 5.46 | 11.36 |
-----------------------------------------------------------------------------------------------
Alt Pirineu i Aran
years | Maria_abs_freq | Maria_total_position | Maria_girls_position | Maria_total_percent | Maria_girls_percent | |
---|---|---|---|---|---|---|
0 | 2018 | _ | _ | _ | _ | _ |
1 | 2019 | _ | _ | _ | _ | _ |
2 | 2020 | _ | _ | _ | _ | _ |
3 | 2021 | 0 | _ | _ | 0.00 | 0.00 |
4 | 2022 | _ | _ | _ | _ | _ |
-----------------------------------------------------------------------------------------------
Penedès
years | Maria_abs_freq | Maria_total_position | Maria_girls_position | Maria_total_percent | Maria_girls_percent | |
---|---|---|---|---|---|---|
0 | 2018 | 20 | 23 | 12 | 5.38 | 11.22 |
1 | 2019 | 18 | 24 | 12 | 4.85 | 9.91 |
2 | 2020 | 27 | 4 | 3 | 7.59 | 15.86 |
3 | 2021 | 16 | 38 | 16 | 4.32 | 9.09 |
4 | 2022 | 15 | 38 | 18 | 4.26 | 8.87 |
# Catalonia region with more Maria\'s born in 2022
selected_at = [x for x in Maria_df_dict.keys() if x != 'Alt Pirineu i Aran']
Maria_abs_freq_2022 = dict()
for at in selected_at :
Maria_abs_freq_2022[at] = Maria_df_dict[at].loc[Maria_df_dict[at]['years'] == '2022', 'Maria_abs_freq'].iloc[0]
Maria_abs_freq_2022_values = np.array([x for x in Maria_abs_freq_2022.values()], dtype=int)
Maria_abs_freq_2022_index = np.array([x for x in Maria_abs_freq_2022.keys()])
prov_max_Maria_2022 = Maria_abs_freq_2022_index[np.argmax(Maria_abs_freq_2022_values)]
print('Catalonia area with more Maria\'s born in 2022 -->', prov_max_Maria_2022)
Catalonia area with more Maria's born in 2022 --> Metropolità
Plotting the results#
for at in selected_at :
for col_name in Maria_df_dict[at].columns :
if col_name in ['Maria_abs_freq', 'Maria_total_position', 'Maria_girls_position'] :
Maria_df_dict[at][col_name] = Maria_df_dict[at][col_name].astype('int64')
elif col_name in ['Maria_total_percent', 'Maria_girls_percent'] :
Maria_df_dict[at][col_name] = Maria_df_dict[at][col_name].astype('float64')
# Define the number of rows and columns for the matrix plot
num_cols = 2 # You can adjust the number of columns as needed
num_rows = int(np.ceil(len(selected_at) / num_cols))
# Create a subplot with the specified number of rows and columns
fig, axes = plt.subplots(num_rows, num_cols, figsize=(11, 10))
# Flatten the axes array to make it easier to iterate
axes = axes.flatten()
colors = sns.color_palette("tab10", len(selected_at))
# Loop through each 'geo' and create a subplot in the matrix
for i, color, at in zip(range(0,len(selected_at)), colors, selected_at) :
ax = axes[i] # Get the current axis
sns.barplot(x="years", y='Maria_abs_freq', data=Maria_df_dict[at], color=color, ax=ax)
ax.set_title(f"{at}", fontsize=13)
xticks_index = np.arange(0, len(Maria_df_dict[at]), 1)
ax.set_xticks(xticks_index)
ax.tick_params(axis='x', rotation=0)
ax.set_xlabel('Year')
ax.set_ylabel('Count')
# Remove any unused subplots in case the number of 'geo' values is less than num_rows * num_cols
for j in range(len(selected_at), num_rows * num_cols):
fig.delaxes(axes[j])
plt.suptitle('Absolute Frequencies Maria by Catalonia areas - Last five years (2018-2022)', fontsize=15, y=0.95, weight='bold') # Establishing a general tittle for the plot.
plt.subplots_adjust(hspace=0.8, wspace=0.3) # Adjust vertical (hspace) and horizontal (wspace) spacing
# fig.savefig('Marias_Areas_Catalonia' + '.jpg', format='jpg', dpi=550)
# plt.tight_layout()
plt.show()