Pandas Cheatsheet
The pandas operations you actually use every day, grouped by intent. One-liner explanations, no filler.
language-tooling
#python
#pandas
#data
Read & write
import pandas as pd
pd.read_csv("data.csv") # CSV
pd.read_parquet("data.parquet") # Parquet — faster, typed, smaller
pd.read_json("data.json", lines=True) # JSON Lines
pd.read_sql(query, conn) # From SQLAlchemy connection
df.to_parquet("out.parquet") # Prefer Parquet over CSV for anything serious
df.to_csv("out.csv", index=False) # `index=False` 99% of the time
Inspect
df.head(10) # First 10 rows
df.tail(5) # Last 5
df.shape # (rows, cols)
df.dtypes # Column types
df.info() # Memory + nulls + types in one shot
df.describe() # Summary stats for numeric columns
df["col"].value_counts() # Frequency table
df.isna().sum() # Nulls per column
Select
df["col"] # One column → Series
df[["a", "b"]] # Many columns → DataFrame
df.loc[df["age"] > 30] # Boolean mask
df.loc[10:20, ["a", "b"]] # Label-based slice
df.iloc[10:20, 0:3] # Position-based slice
df.query("age > 30 and city == 'Paris'") # SQL-like expression
Filter
df[df["status"].isin(["paid", "shipped"])]
df[df["name"].str.startswith("A")]
df[df["created_at"].between("2026-01-01", "2026-03-31")]
df.dropna(subset=["email"]) # Drop rows missing email
df.fillna({"age": 0, "country": "NA"}) # Fill specific columns
Transform
df["age_years"] = df["age_days"] / 365
df["full_name"] = df["first"] + " " + df["last"]
df["bucket"] = pd.cut(df["age"], bins=[0, 18, 35, 65, 120])
df["score_z"] = (df["score"] - df["score"].mean()) / df["score"].std()
df.assign(profit=lambda d: d.revenue - d.cost) # Method chaining
df.rename(columns={"old": "new"})
df.drop(columns=["junk"])
df.astype({"id": "int64", "price": "float32"})
Group & aggregate
df.groupby("country")["revenue"].sum()
df.groupby("country").agg(
total=("revenue", "sum"),
avg=("revenue", "mean"),
n=("id", "count"),
)
df.groupby(["country", "year"])["revenue"].mean().unstack()
df.pivot_table(index="country", columns="month",
values="revenue", aggfunc="sum")
Merge & join
pd.merge(orders, users, on="user_id", how="left")
pd.merge(a, b, left_on="uid", right_on="id", how="inner")
pd.concat([df1, df2], axis=0) # Stack rows
pd.concat([df1, df2], axis=1) # Stack columns
df1.join(df2, on="key") # Index-aligned join
Time series
df["ts"] = pd.to_datetime(df["ts"])
df = df.set_index("ts").sort_index()
df.resample("D")["sales"].sum() # Daily totals
df["sales"].rolling("7D").mean() # 7-day moving average
df["sales"].shift(1) # Lag 1 day
df["sales"].pct_change() # Day-over-day change
df.between_time("09:00", "17:00") # Working hours
df.tz_localize("UTC").tz_convert("Europe/Paris")
Performance one-liners
df.memory_usage(deep=True).sum() / 1e6 # MB used
df["cat_col"] = df["cat_col"].astype("category") # Save memory on low-cardinality strings
df.to_parquet("out.parquet") # Smaller, faster than CSV
When pandas is too slow, the answer is usually Polars or DuckDB, not “more pandas tricks”.