Module 7 · Patterns · Free & interactive
Hot Spot Analysis & LISA in Python
Identify statistically significant hot and cold spots. Written by Eric Vaz. Read it here, or open the interactive version to write and run the Python yourself, in your browser, on real open data.
Open the interactive version (write & run the code) →Where the clusters are (LISA)
A single global statistic hides geography: it says clustering exists, not where. Local Moran's I (Anselin 1995) computes a contribution for every unit and classifies each into one of four types: high surrounded by high (HH, a hot spot), low by low (LL, a cold spot), or the spatial outliers HL and LH.
import numpy as np, json
from pyodide.http import open_url
from scipy.spatial import cKDTree
gj = json.load(open_url("https://cdn.jsdelivr.net/gh/PublicaMundi/MappingAPI@master/data/geojson/us-states.json"))
def feature_polys(f):
g = f["geometry"]; return g["coordinates"] if g["type"]=="MultiPolygon" else [g["coordinates"]]
def state_centroid(f):
best, ba = None, -1
for poly in feature_polys(f):
rr = np.array(poly[0]); aa = abs(0.5*np.sum(rr[:-1,0]*rr[1:,1]-rr[1:,0]*rr[:-1,1]))
if aa > ba: ba, best = aa, (rr[:,0].mean(), rr[:,1].mean())
return best
drop = {"Alaska","Hawaii","Puerto Rico"}
feats = [f for f in gj["features"] if f["properties"]["name"] not in drop]
dens = np.array([f["properties"]["density"] for f in feats]); y = np.log(dens)
cent = np.array([state_centroid(f) for f in feats])
def knn_W(coords, k=6):
n = len(coords); tree = cKDTree(coords); _, idx = tree.query(coords, k=k+1)
W = np.zeros((n, n))
for i in range(n):
for j in idx[i, 1:]: W[i, j] = 1.0
return W / W.sum(1, keepdims=True)
W = knn_W(cent, 6)
print(len(feats), "states ready | global Moran lag corr:", round(float(np.corrcoef(y, W@y)[0,1]), 3))np.sum(z**2)/len(y)) and the lag (W @ z).import numpy as np
def local_moran(y, W):
z = y - y.mean()
m2 = np.sum(z**2) / len(y)
Ii = (z / m2) * (W @ z)
return Ii
Ii = local_moran(y, W)
print("most positive local I:", round(float(Ii.max()), 2), "| most negative:", round(float(Ii.min()), 2))
In the interactive version this line is blanked out and you write it yourself, with a hint, a solution, and an automatic check.
Significance comes from a conditional permutation test: hold unit i fixed and reshuffle only the rest, many times. Run the example to classify each unit into a quadrant and flag the significant ones.
def local_moran_p(y, W, permutations=199, seed=0):
z = y - y.mean(); m2 = np.sum(z**2)/len(y); Ii = (z/m2)*(W@z)
rng = np.random.default_rng(seed); n = len(y); p = np.zeros(n)
for i in range(n):
nb = np.where(W[i] > 0)[0]; wi = W[i, nb]; others = np.delete(z, i)
sims = np.array([(z[i]/m2)*np.sum(wi*rng.choice(others, size=len(nb), replace=False)) for _ in range(permutations)])
p[i] = (np.sum(np.abs(sims) >= abs(Ii[i])) + 1) / (permutations + 1)
return Ii, p
Ii, p = local_moran_p(y, W, permutations=199)
zy = (y - y.mean())/y.std(); zlag = W @ zy
quad = np.where((zy>0)&(zlag>0), "HH", np.where((zy<0)&(zlag<0), "LL", np.where((zy>0)&(zlag<0), "HL", "LH")))
sig = p < 0.05
for q in ["HH","LL","HL","LH"]:
names = [f["properties"]["name"] for f,qq,s in zip(feats,quad,sig) if s and qq==q]
print(f"{q} ({len(names):2d}): {', '.join(names) if names else '-'}")colmap[q] if significant (s), else "#e9ecef".import matplotlib.pyplot as plt
from matplotlib.path import Path
from matplotlib.patches import PathPatch, Patch
from matplotlib.collections import PatchCollection
colmap = {"HH":"#b2182b", "LL":"#2166ac", "HL":"#ef8a62", "LH":"#67a9cf"}
cols = [colmap[q] if s else "#e9ecef" for q, s in zip(quad, sig)]
fig, ax = plt.subplots(figsize=(7.4, 4.4)); patches, pc = [], []
for f, c in zip(feats, cols):
for poly in feature_polys(f):
patches.append(PathPatch(Path(np.array(poly[0])))); pc.append(c)
ax.add_collection(PatchCollection(patches, facecolor=pc, edgecolor="white", linewidth=.4))
ax.autoscale(); ax.set_aspect(1.3); ax.axis("off")
ax.set_title("Local Moran clusters of log population density (p < 0.05)")
ax.legend(handles=[Patch(facecolor=colmap[k], label=k) for k in colmap], loc="lower left", ncol=4, fontsize=8, frameon=False)
In the interactive version this line is blanked out and you write it yourself, with a hint, a solution, and an automatic check.
Hot-spot and cluster analysis in regional science
- Eric Vaz — Google Scholar profile — LISA and local indicators applied to land use, tourism and urban change
- Selected publications on ericvaz.com