In [1]:
# Import libraries
import earthpy # Manage local data
import pandas as pd # Work with vector format data
# Advanced options on matplotlib/seaborn/pandas plots
import matplotlib.pyplot as plt
# Common statistical plots for tabular data
import seaborn as sns
# Fit an OLS linear regression
from sklearn.linear_model import LinearRegression
# Addition for OLS Linear Regression
import numpy as np
In [2]:
# Format URL for data recovery (Portland, OR - https://www.ncdc.noaa.gov/cdo-web/datasets/GHCND/stations/GHCND:USC00358634/detail)
por_url = ('https://www.ncei.noaa.gov/access/services/da'
'ta/v1?dataset=daily-summaries&dataTypes=TOBS&stations=USC00358634&startDate=1948-01-01&endDate=2025-09-21&units=standard')
por_url
Out[2]:
'https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=TOBS&stations=USC00358634&startDate=1948-01-01&endDate=2025-09-21&units=standard'
In [3]:
# Download the climate data
por_climate_df = pd.read_csv(
    por_url,
    index_col='DATE',
    parse_dates=True,
    na_values=['NaN']
)

# Check that the download worked
por_climate_df.head()
Out[3]:
STATION TOBS
DATE
1948-01-01 USC00358634 NaN
1948-01-02 USC00358634 NaN
1948-01-03 USC00358634 NaN
1948-01-04 USC00358634 NaN
1948-01-05 USC00358634 NaN
In [4]:
# Save the climate data
por_climate_df.to_csv('portland_temp_data.csv')
In [5]:
por_climate_df
Out[5]:
STATION TOBS
DATE
1948-01-01 USC00358634 NaN
1948-01-02 USC00358634 NaN
1948-01-03 USC00358634 NaN
1948-01-04 USC00358634 NaN
1948-01-05 USC00358634 NaN
... ... ...
2025-09-17 USC00358634 NaN
2025-09-18 USC00358634 NaN
2025-09-19 USC00358634 NaN
2025-09-20 USC00358634 NaN
2025-09-21 USC00358634 NaN

25869 rows × 2 columns

In [6]:
# Take a look at the contents of the dataframe
por_climate_df.plot.hist()
Out[6]:
<Axes: ylabel='Frequency'>
No description has been provided for this image
In [7]:
# Remove station column from dataframe
portland_climate_df = por_climate_df[["TOBS"]]
portland_climate_df
Out[7]:
TOBS
DATE
1948-01-01 NaN
1948-01-02 NaN
1948-01-03 NaN
1948-01-04 NaN
1948-01-05 NaN
... ...
2025-09-17 NaN
2025-09-18 NaN
2025-09-19 NaN
2025-09-20 NaN
2025-09-21 NaN

25869 rows × 1 columns

In [8]:
# Rename the temperature column to reflect the unit of measure
portland_climate_df_units = portland_climate_df.rename(columns={
    'TOBS': 'temp_f',
})

portland_climate_df_units
Out[8]:
temp_f
DATE
1948-01-01 NaN
1948-01-02 NaN
1948-01-03 NaN
1948-01-04 NaN
1948-01-05 NaN
... ...
2025-09-17 NaN
2025-09-18 NaN
2025-09-19 NaN
2025-09-20 NaN
2025-09-21 NaN

25869 rows × 1 columns

In [9]:
# Add a column converting the temperature data to degrees Celsius
portland_climate_df_units['temp_c'] = (portland_climate_df_units['temp_f'] - 32) * 5 / 9 
portland_climate_df_units
Out[9]:
temp_f temp_c
DATE
1948-01-01 NaN NaN
1948-01-02 NaN NaN
1948-01-03 NaN NaN
1948-01-04 NaN NaN
1948-01-05 NaN NaN
... ... ...
2025-09-17 NaN NaN
2025-09-18 NaN NaN
2025-09-19 NaN NaN
2025-09-20 NaN NaN
2025-09-21 NaN NaN

25869 rows × 2 columns

In [10]:
portland_climate_df_units.plot()
Out[10]:
<Axes: xlabel='DATE'>
No description has been provided for this image
In [11]:
# Plot all the data
portland_climate_df_units.plot(
    y='temp_c',
    title='Daily Temperature in Portland OR',
    xlabel='Year',
    ylabel='Temperature (degrees C)'
)
Out[11]:
<Axes: title={'center': 'Daily Temperature in Portland OR'}, xlabel='Year', ylabel='Temperature (degrees C)'>
No description has been provided for this image
In [12]:
# Downsample the data to replot
portland_ann_climate_df = (
    portland_climate_df_units
    .resample('YE')  # 'YE' = year-end frequency
    .mean()
)
portland_ann_climate_df
Out[12]:
temp_f temp_c
DATE
1948-12-31 NaN NaN
1949-12-31 NaN NaN
1950-12-31 NaN NaN
1951-12-31 NaN NaN
1952-12-31 NaN NaN
... ... ...
2021-12-31 NaN NaN
2022-12-31 NaN NaN
2023-12-31 NaN NaN
2024-12-31 45.881356 7.711864
2025-12-31 46.192982 7.884990

78 rows × 2 columns

In [13]:
# Replot the annual data
portland_ann_climate_df.plot(
    y='temp_c',
    title='Annual Average Temperature in Portland OR',
    xlabel='Year',
    ylabel='Temperature (degrees C)'
)
Out[13]:
<Axes: title={'center': 'Annual Average Temperature in Portland OR'}, xlabel='Year', ylabel='Temperature (degrees C)'>
No description has been provided for this image
In [14]:
# Make an interactive plot
import hvplot.pandas

portland_interactive = portland_ann_climate_df.hvplot(
    y='temp_c',
    title='Annual Average Temperature in Portland OR'
).opts(
    xlabel='Year',
    ylabel='Temperature (degrees C)'
)
portland_interactive
Out[14]:
In [15]:
import holoviews as hv

hv.save(portland_interactive, 'portland_climate.html')
In [16]:
# Fit an OLS Linear Regression to the data
# Copy your annual climate DataFrame
df = portland_ann_climate_df.copy().reset_index(drop=True)

# Build a Year column (data start is 1948)
df['Year'] = np.arange(1948, 1948 + len(df))

# Keep only rows from 1959 and later
df = df[df['Year'] >= 1959]

# Exclude multiple years
exclude_years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025]
df = df[~df['Year'].isin(exclude_years)]

# Map Temperature to the correct column
df['Temperature'] = df['temp_c']

# Prepare arrays for scikit-learn
X = df[['Year']].values
y = df['Temperature'].values

# Fit the regression
model = LinearRegression()
model.fit(X, y)

# Get slope and R²
slope = model.coef_[0]
r_squared = model.score(X, y)

print(f"Slope (°C per year): {slope}")
print(f"R²: {r_squared:.4f}")
Slope (°C per year): 0.016131681212785833
R²: 0.0880
In [17]:
# Plot annual average temperature with a trend line
ax = sns.regplot(
    x='Year',
    y='temp_c',
    data=df,
    line_kws={'color': 'red'}  # color for the trend line
)

ax.set(
    title='Annual Average Temperature over time in Portland OR with Trend',
    xlabel='Year',
    ylabel='Temperature (°C)'
)

plt.show()
No description has been provided for this image

Portland Has Warmed ~0.16 °C per Decade Since 1958¶

A linear OLS regression shows an average warming rate of 0.016 °C per year (≈ 0.16 °C per decade). While the R² value is low (0.088), indicating that year-to-year variability is large, the overall trend suggests a gradual rise in Portland’s annual average temperature since the mid-20th century.

In [ ]:
%%capture
%%bash
jupyter nbconvert 09_Portfolio_Climate_Portland.ipynb --to html