R
Beginner
1 min read
Subsetting Data Frames
Example
df <- data.frame(
name = c("Alice", "Bob", "Carol", "Dave", "Eve"),
age = c(28, 34, 22, 41, 30),
salary = c(55000, 72000, 48000, 95000, 61000),
dept = c("HR", "IT", "IT", "Finance", "HR")
)
# Dollar-sign: extract a column as a vector
df$name # character vector of names
df$salary # numeric vector of salaries
# Double-bracket: programmatic column access
col <- "age"
df[[col]] # same as df$age
# Single-bracket: row/column matrix-style
df[1, ] # first row (a 1-row data frame)
df[ , 2] # second column (drops to vector by default)
df[ , 2, drop = FALSE] # keep as data frame
df[1:3, c("name", "age")] # rows 1-3, named columns
# Logical row filtering
df[df$age > 30, ] # rows where age > 30
df[df$dept == "IT", ] # IT employees
df[df$salary >= 60000 & df$active != FALSE, ] # compound condition
# subset() — readable syntax (avoid inside functions)
subset(df, age < 35, select = c(name, salary))
# Negative indexing (drop columns by position)
df[ , -1] # all columns except the first
df[ , -c(1, 4)] # drop columns 1 and 4
# which() to get row indices
high_earners <- which(df$salary > 60000)
df[high_earners, ]
# Updating values in place via subsetting
df$salary[df$dept == "IT"] <- df$salary[df$dept == "IT"] * 1.05