Ting to add some percentage annotations to a population pyramid, but currently the percentages are duplicated, cannot, for the life of me figure out how to fix it.
#creating a count of each male in each age class
male_age_count = cleandf[cleandf['Gender'] == 'Male'].groupby('Age Class')['Age Class'].count()
#print(male_age_count)
#creating count of each female in each age class
female_age_count = cleandf[cleandf['Gender']=='Female'].groupby('Age Class')['Age Class'].count()
#print(f'The female age count of each age class:{female_age_count}')
#creating the negative male count to create correct format for age pyramid
negative_male_age_count = -male_age_count.abs()
#print(f'The negative male count of each age class:{negative_male_age_count}')
#to sort age classes in logical order on graph
ageclass = cleandf['Age Class'].unique()
ageclass = sorted(ageclass, key=lambda x: 200 if x == '100 +' else
int(x.split('-')[0]), reverse=True)
#ageclass = sorted(ageclass,key=lambda x:int(x.split('-')[0]),reverse=True)
#creating variable for graph
age_p = pd.DataFrame({'Age': ageclass,
'Male':-male_age_count.reindex(ageclass),
'Female':female_age_count.reindex(ageclass)})
plt.figure(figsize=(10,6))
#creating bar graph for male side
male_barplot = sns.barplot(x='Male', y= 'Age', data=age_p, color=('mediumblue'), label='Male')
#creating bar graph for female side
female_barplot = sns.barplot(x='Female',y= 'Age', data=age_p, color=('darkorange'), label='Female')
#obtaining entire population
population_total = cleandf.shape[0]
#adding percentage markers to male side of graph
for p in male_barplot.patches:
width = p.get_width()
if width != 0:
percentage = '{:.1f}%'.format(100* width/population_total)
x = p.get_x() + width - 10
y= p.get_y() + p.get_height() / 2
male_barplot.annotate(percentage, (x,y), ha='center',va='center')
#adding percentage markers to female side of graph
for p in female_barplot.patches:
width = p.get_width()
if width != 0:
percentage = '{:.1f}%'.format(100* abs(width)/population_total)
x = p.get_x() + width +10
y= p.get_y() + p.get_height() / 2
female_barplot.annotate(percentage, (x,y), ha='center',va='center')
plt.text(-400,5, 'Male', fontsize=15,fontweight='bold')
plt.text(300,5, 'Female', fontsize=15,fontweight='bold')
plt.legend(loc='best')
plt.xticks(range(-600,700,100))
plt.title('Population/Age Pyramid',fontsize=20,fontweight='bold')
plt.xlabel('Population',fontsize=15,fontweight='bold')
plt.ylabel('Age Range',fontsize=15,fontweight='bold')
plt.show()
Also tried an iterrows method of adding percentages, but that just broke everything. current output with above code