As an exercise for learning more advanced altair, I'm trying to generate a simplified version of this chart: https://climatereanalyzer.org/clim/t2_daily/?dm_id=world.
To simplify, I'm using gray for all years prior to 2023 and then red and black for 2023 and 2024, respectively. I'd like to have a legend that is either just for 2023 & 2024 or is "1940-2022", "2023", "2024".
Right now I'm focused on getting a compact legend that reflect either subset of years, but I'd take any advice on how to improve the code / approach.
import pandas as pd
import altair as alt
# Function to fetch and prepare the data
def fetch_and_prep_data():
url = "https://climatereanalyzer.org/clim/t2_daily/json/era5_world_t2_day.json"
data = requests.get(url).json()
years = []
all_temperatures = []
for year_data in data:
year = year_data['name']
temperatures = year_data['data']
temperatures = [temp if temp is not None else float('nan') for temp in temperatures]
days = list(range(1, len(temperatures) + 1))
df = pd.DataFrame({
'Year': [year] * len(temperatures),
'Day': days,
'Temperature': temperatures
})
years.append(year)
all_temperatures.append(df)
df_at = pd.concat(all_temperatures)
# Drop all rows where Year is more than 4 digits
df_at = df_at[df_at['Year'].str.len() <= 4]
return df_at
# Function to create the last day in month labels
def get_last_day_in_month_labels():
dates = pd.date_range(start='2023-01-01', end='2023-12-31', freq='D')
last_days = dates[dates.is_month_end]
labels = {day_of_year: month_abbr for day_of_year, month_abbr in zip(last_days.day_of_year, last_days.strftime('%b'))}
return labels
# Functions to determine opacity, color, and stroke width
def determine_opacity(year):
try:
year_int = int(year)
return 0.01 if year_int < 2023 else 1.0
except ValueError:
return 1.0
def determine_color(year):
color = 'gray'
try:
year_int = int(year)
if year_int < 2023:
color = 'gray'
elif year_int == 2023:
color = 'red'
elif year_int == 2024:
color = 'black'
except ValueError:
color = 'black'
return color
def determine_strokewidth(year):
width = 1
try:
year_int = int(year)
if year_int < 2023:
width = 1
else:
width = 4
except ValueError:
width = 4
return width
# Applying the functions to the 'Year' column
# Fetch and prepare the data
df_at = fetch_and_prep_data()
df_all = df_at.copy()
df_all['Opacity'] = df_all['Year'].apply(determine_opacity)
df_all['Color'] = df_all['Year'].apply(determine_color)
df_all['Width'] = df_all['Year'].apply(determine_strokewidth)
# Ensure 'Day' is correctly interpreted as a quantitative variable
df_all['Day'] = pd.to_numeric(df_all['Day'], errors='coerce')
# Filter the data to ensure 'Day' values are within the desired range
df_filtered = df_all[df_all['Day'] <= 365]
# Create last day in month labels
last_day_in_month_labels = get_last_day_in_month_labels()
# Extract the keys and values for tick marks and labels
tick_values = list(last_day_in_month_labels.keys())
tick_labels = list(last_day_in_month_labels.values())
# Plotting the main data using Altair with the existing Color and Opacity columns
line_chart = alt.Chart(df_filtered).mark_line().encode(
x=alt.X(
'Day:Q',
title='Month',
scale=alt.Scale(domain=(0, 365), clamp=True),
axis=alt.Axis(
labels=True,
tickCount=12,
values=tick_values,
labelExpr=f"datum.value == {tick_values[0]} ? '{tick_labels[0]}' : " +
" : ".join([f"datum.value == {tick} ? '{label}'" for tick, label in zip(tick_values[1:], tick_labels[1:])]) +
" : ''",
labelOffset= -30 # Shift the x-axis labels to the left by 30 units
)
),
y=alt.Y(
'Temperature:Q',
title='Temperature (C)',
scale=alt.Scale(domain=(11, 18), clamp=True),
),
color=alt.Color('Color:N', legend=None, scale=None), # Use the "Color" column for line colors
opacity=alt.Opacity('Opacity:Q', legend=None), # Use the "Opacity" column
detail=alt.Detail('Year:N'), # Add detail encoding for Year, otherwise you get vertical lines
strokeWidth=alt.StrokeWidth('Width:N'), legend=None) # Use the "Width" column
).properties(
width=800,
height=600
)
line_chart```