0

Ting to add some percentage annotations to a population pyramid, but currently the percentages are duplicated, cannot, for the life of me figure out how to fix it.

#creating a count of each male in each age class
male_age_count = cleandf[cleandf['Gender'] == 'Male'].groupby('Age Class')['Age Class'].count()
#print(male_age_count)

#creating count of each female in each age class
female_age_count = cleandf[cleandf['Gender']=='Female'].groupby('Age Class')['Age Class'].count()
#print(f'The female age count of each age class:{female_age_count}')

#creating the negative male count to create correct format for age pyramid 
negative_male_age_count = -male_age_count.abs()
#print(f'The negative male count of each age class:{negative_male_age_count}')


#to sort age classes in logical order on graph
ageclass = cleandf['Age Class'].unique()
ageclass = sorted(ageclass, key=lambda x: 200 if x == '100 +' else
                  int(x.split('-')[0]), reverse=True)
    
#ageclass = sorted(ageclass,key=lambda x:int(x.split('-')[0]),reverse=True)

#creating variable for graph
age_p = pd.DataFrame({'Age': ageclass, 
                      'Male':-male_age_count.reindex(ageclass), 
                      'Female':female_age_count.reindex(ageclass)})


plt.figure(figsize=(10,6))
#creating bar graph for male side
male_barplot = sns.barplot(x='Male', y= 'Age', data=age_p, color=('mediumblue'), label='Male')

#creating bar graph for female side
female_barplot = sns.barplot(x='Female',y= 'Age', data=age_p, color=('darkorange'), label='Female')
#obtaining entire population
population_total = cleandf.shape[0]

#adding percentage markers to male side of graph
for p in male_barplot.patches:
    width = p.get_width()
    if width != 0:
        percentage = '{:.1f}%'.format(100* width/population_total)
        x = p.get_x() + width - 10
        y= p.get_y() + p.get_height() / 2
        male_barplot.annotate(percentage, (x,y), ha='center',va='center')

#adding percentage markers to female side of graph
for p in female_barplot.patches:
    width = p.get_width()
    if width != 0:
        percentage = '{:.1f}%'.format(100* abs(width)/population_total)
        x = p.get_x() + width +10
        y= p.get_y() + p.get_height() / 2
        female_barplot.annotate(percentage, (x,y), ha='center',va='center')
    
plt.text(-400,5, 'Male', fontsize=15,fontweight='bold')
plt.text(300,5, 'Female', fontsize=15,fontweight='bold')

plt.legend(loc='best')
plt.xticks(range(-600,700,100))
plt.title('Population/Age Pyramid',fontsize=20,fontweight='bold')
plt.xlabel('Population',fontsize=15,fontweight='bold')
plt.ylabel('Age Range',fontsize=15,fontweight='bold')


plt.show()

Also tried an iterrows method of adding percentages, but that just broke everything. current output with above code

1 Answer 1

0

Error occurs due to duplicate data in the sns.barplot command

#creating a count of each male in each age class
male_age_count = cleandf[cleandf['Gender'] == 'Male'].groupby('Age Class')['Age Class'].count()
#print(male_age_count)

#creating count of each female in each age class
female_age_count = cleandf[cleandf['Gender']=='Female'].groupby('Age Class')['Age Class'].count()
#print(f'The female age count of each age class:{female_age_count}')

#creating the negative male count to create correct format for age pyramid 
negative_male_age_count = -male_age_count.abs()
#print(f'The negative male count of each age class:{negative_male_age_count}')


#to sort age classes in logical order on graph
ageclass = cleandf['Age Class'].unique()
ageclass = sorted(ageclass, key=lambda x: 200 if x == '100 +' else
                  int(x.split('-')[0]), reverse=True)
    
#ageclass = sorted(ageclass,key=lambda x:int(x.split('-')[0]),reverse=True)

#creating variable for graph
age_p = pd.DataFrame({'Age': ageclass, 
                      'Male':-male_age_count.reindex(ageclass), 
                      'Female':female_age_count.reindex(ageclass)})


plt.figure(figsize=(10,6))
#creating bar graph for male side
male_barplot = sns.barplot(x='Male', y= 'Age', data=age_p, color=('mediumblue'), label='Male')

#creating bar graph for female side
female_barplot = sns.barplot(x='Female',y= 'Age', data=age_p, color=('darkorange'), label='Female')
#obtaining entire population
population_total = cleandf.shape[0]

label_anotate_step = 10
# adding percentage markers to male side of graph
for p in male_barplot.patches[:len(male_barplot.patches)//2]:
    width = p.get_width()
    if width != 0:
        percentage = '{:.1f}%'.format(100 * width / population_total)
        x = p.get_x() + width - label_anotate_step
        y = p.get_y() + p.get_height() / 2
        male_barplot.annotate(percentage, (x, y), ha='center', va='center')

# adding percentage markers to female side of graph
for p in female_barplot.patches[:len(female_barplot.patches)//2]:
    width = p.get_width()
    if width != 0:
        percentage = '{:.1f}%'.format(100 * abs(width) / population_total)
        x = p.get_x() + width + label_anotate_step
        y = p.get_y() + p.get_height() / 2
        female_barplot.annotate(percentage, (x, y), ha='center', va='center')

    
plt.text(-400,5, 'Male', fontsize=15,fontweight='bold')
plt.text(300,5, 'Female', fontsize=15,fontweight='bold')

plt.legend(loc='best')
plt.xticks(range(-600,700,100))
plt.title('Population/Age Pyramid',fontsize=20,fontweight='bold')
plt.xlabel('Population',fontsize=15,fontweight='bold')
plt.ylabel('Age Range',fontsize=15,fontweight='bold')


plt.show()
2
  • As it’s currently written, your answer is unclear. Please edit to add additional details that will help others understand how this addresses the question asked. You can find more information on how to write good answers in the help center.
    – Community Bot
    Commented Jul 3 at 16:34
  • thank you for answering :) Still not getting quite the desired output, it's now showing the percentage annotations all on the left of the graph instead of on either side
    – Leakie
    Commented Jul 4 at 9:14

Not the answer you're looking for? Browse other questions tagged or ask your own question.