Below is a reproducible example with a test dataframe:
ts<-structure(list(OP = c(1, 3, 5), x = c(0.405595375690609, 0.256223899079487,
0.0131878938991576), y = c(0.792096293298528, 0.0120686823502183,
0.518370501697063), z = c(0.400826650671661, 0.279657100560144,
0.409006189322099)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-3L))
I want to add a column using mutate that gets me for each row the column position of the second highest value within the selected columns.
So far I'm only able to get the index column of the maximum value within selected columns by doing:
td<-ts %>%
rowwise() %>%
mutate(second_max = which.max(c_across(c(2:4))))
However, for this specific dataframe, my desired output would be:
| OP | x | y | z | second_max |
|---|---|---|---|---|
| 1 | 0.4055954 | 0.7920963 | 0.4008267 | 1 |
| 3 | 0.2562239 | 0.0120687 | 0.2796571 | 1 |
| 5 | 0.0131879 | 0.5183705 | 0.4090062 | 3 |
One possibility could be:
ts %>%
rowwise() %>%
mutate(second_max = which(dense_rank(-c_across(c(2:4))) == 2))
OP x y z second_max
<dbl> <dbl> <dbl> <dbl> <int>
1 1 0.406 0.792 0.401 1
2 3 0.256 0.0121 0.280 1
3 5 0.0132 0.518 0.409 3
The same idea with the addition of purrr:
ts %>%
mutate(secod_max = pmap_int(across(c(2:4)), ~ which(dense_rank(-c(...)) == 2)))
You can use tapply + row + rank like below
ts %>%
mutate(second_max = c(tapply(
t(.[-1]),
t(row(.[-1])),
\(x) which(rank(x) == 2)
)))
which gives
# A tibble: 3 × 5
OP x y z second_max
<dbl> <dbl> <dbl> <dbl> <int>
1 1 0.406 0.792 0.401 1
2 3 0.256 0.0121 0.280 1
3 5 0.0132 0.518 0.409 3
f_tmfmnk <- function() {
ts %>%
rowwise() %>%
mutate(second_max = which(dense_rank(-c_across(c(2:4))) == 2))
}
f_jonspring <- function() {
ts |>
left_join(
ts |>
pivot_longer(-OP) |>
mutate(second_max = row_number(), .by = OP) |>
arrange(-value) |>
slice(2, .by = OP) |>
select(OP, second_max),
by = join_by(OP)
)
}
f_friede <- function() {
ts %>%
mutate(second_max = Rfast::rownth(
x = as.matrix(ts[-1L]),
elems = rep(2L, nrow(ts)),
index.return = TRUE,
descending = TRUE
))
}
f_tic <- function() {
ts %>%
mutate(second_max = c(tapply(
t(.[-1]),
t(row(.[-1])),
\(x) which(rank(x) == 2)
)))
}
f_onyambu <- function() {
ts %>%
mutate(second_max = max.col(replace(as.matrix(ts[-1]), cbind(seq(nrow(ts)), max.col(ts[-1])), -Inf)))
}
microbenchmark(
f_tmfmnk(),
f_jonspring(),
f_friede(),
f_tic(),
f_onyambu(),
unit = "relative",
check = "equivalent"
)
shows
Unit: relative
expr min lq mean median uq max neval
f_tmfmnk() 4.685591 4.768867 4.273080 4.387400 4.743804 3.296211 100
f_jonspring() 10.069732 10.311884 8.904696 9.589922 9.944748 5.210142 100
f_friede() 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 100
f_tic() 1.199249 1.204067 1.712340 1.117963 1.052074 18.398534 100
f_onyambu() 1.168981 1.174615 1.083942 1.078781 1.004397 2.448716 100
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With