Some one recently asked how to create monthly means from daily AQS data. Here’s an easy example that should work for any system with pandas and matplotlib.
import pandas as pd
import matplotlib.pyplot as plt
YYYY = 2016
index_cols = ['State Code', 'County Code', 'Site Num', 'POC', 'Date Local']
# Reading data and subsetting by event type to prevent duplicate records
# "None" means no events
# If there are events, there are two records per day:
# "Included" means has events and the event data is included
# "Excluded" means has events and the event data was not included
data = pd.read_csv(
f'https://aqs.epa.gov/aqsweb/airdata/daily_44201_{YYYY}.zip',
usecols=index_cols + ['Longitude', 'Latitude', '1st Max Value', '1st Max Hour', 'Event Type', 'Pollutant Standard']
).query('`Event Type` in ("None", "Included")')
# Make a month variable. If you want it to be numeric, append `.astype('i')`
data['Month'] = data['Date Local'].str[5:7].astype('i')
# Create a group by object
monthg = data.groupby(index_cols[:-1] + ['Month'])
# Create a monthly mean
monthly = monthg.mean()
# Add count for completion checks
monthly['Count'] = monthg['1st Max Value'].count()
# Make a figure and save it
fig, ax = plt.subplots(1, 1)
tax = ax.twinx()
mc = monthly.loc[:, ['Count']].groupby(['Month']).sum()
ax.bar(mc.index, mc['Count'], color='lightgrey', alpha=0.25)
mvs = [mg['1st Max Value'] for mm, mg in monthly.groupby(['Month'])]
mms = [mm for mm, mg in monthly.groupby(['Month'])]
ax.set_ylabel('Count')
tax.set_ylabel('Ozone ppm')
tax.boxplot(mvs, positions=mms)
ax.set_title(f'Ozone MDA8 from all observations in {YYYY}')
ax.figure.savefig('OzoneMonthly.png')