I have two dataframes - a dataframe of 7 bins, specifying the limits and name of each bin (called FJX_bins) and a frame of wavelength-sigma pairs (test_spectra). I want to create a new variable in Test_Spectra entitled bin_number based on the bin limits in the FJX_bins data. The dput of the two is included below.
It would be relatively easy to brute force this with mutate and case_when, but the key here is that I would like the solution to be extensible to an arbitrary number of bins. My feeling is that there is likely to be some sort of apply method in dplyr that could be of use here, but all I could think to do was use a for loop, as shown here:
df <- test_spectra %>%
  mutate(bin_number = case_when(
    for(ii in 1:nrow(FJX_bins)){
      Wavelength >= FJX_bins$Lambda_Start[ii] & Wavelength < FJX_bins$Lambda_End[ii] ~
        FJX_bins$Bin_Number[ii]}
    ))
This strategy fails, and throws the error
Case 1 (
for (ii in 1:nrow(FJX_bins)) {...) must be a two-sided formula, not a NULL
Is there a way to use dplyr to solve this question? Or do I need to step back and look at something like apply and cut? I'd rather stick within a dplyr framework for other reasons, but could go outside of it, too.
Thanks
FJX_bins <- structure(list(Bin_Number = 1:7, Lambda_Start = c(289, 298.25, 
307.45, 312.45, 320.3, 345, 412.45), Lambda_End = c(298.25, 307.45, 
312.45, 320.3, 345, 412.45, 850)), row.names = c(NA, -7L), class = c("tbl_df", 
"tbl", "data.frame"), spec = structure(list(cols = structure(list(
    Bin_Number = structure(list(), class = c("collector_integer", 
    "collector")), Lambda_Start = structure(list(), class = c("collector_double", 
    "collector")), Lambda_End = structure(list(), class = c("collector_double", 
    "collector")), Effective_Lambda = structure(list(), class = c("collector_integer", 
    "collector"))), .Names = c("Bin_Number", "Lambda_Start", 
"Lambda_End", "Effective_Lambda")), default = structure(list(), class = c("collector_guess", 
"collector"))), .Names = c("cols", "default"), class = "col_spec"), .Names = c("Bin_Number", 
"Lambda_Start", "Lambda_End"))
test_spectra <- structure(list(Wavelength = c(289L, 290L, 291L, 292L, 293L, 294L, 
295L, 296L, 297L, 298L, 299L, 300L, 289L, 290L, 291L, 292L, 293L, 
294L, 295L, 296L, 297L, 298L, 299L, 300L, 289L, 290L, 291L, 292L, 
293L, 294L, 295L, 296L, 297L, 298L, 299L, 300L, 289L, 290L, 291L, 
292L, 293L, 294L, 295L, 296L, 297L, 298L, 299L, 300L), Sigma = c(3.97790085259898e-20, 
3.88773011066234e-20, 3.77170497723194e-20, 3.63990173255768e-20, 
3.53611020195826e-20, 3.39379425027765e-20, 3.24540998352932e-20, 
3.08629426249589e-20, 2.93243925380076e-20, 2.80431593390348e-20, 
2.64345023340469e-20, 2.49597804268261e-20, 4.79587956800083e-20, 
4.67040607723134e-20, 4.5134283789068e-20, 4.32731814710643e-20, 
4.13196812361237e-20, 3.93856298421813e-20, 3.77050786831795e-20, 
3.62340670271797e-20, 3.49404344374885e-20, 3.36066462681245e-20, 
3.20871974271263e-20, 3.03438697547602e-20, 5.27803299371575e-20, 
5.12475486084599e-20, 4.99112054163632e-20, 4.86399784101602e-20, 
4.73236079731255e-20, 4.56798834656559e-20, 4.36887241590191e-20, 
4.13697643104457e-20, 3.89697643104457e-20, 3.66909671059429e-20, 
3.46634646072095e-20, 3.28648835305714e-20, 5.71590756444018e-20, 
5.57618648066173e-20, 5.44949261656802e-20, 5.33110977304272e-20, 
5.21177991137917e-20, 5.07478142704849e-20, 4.9100984463428e-20, 
4.70660943398542e-20, 4.47661068638463e-20, 4.24314737804269e-20, 
4.02176301884806e-20, 3.82570654305878e-20)), row.names = c(NA, 
-48L), class = c("tbl_df", "tbl", "data.frame"), .Names = c("Wavelength", 
"Sigma"))
fuzzyjoin implements dplyr range/interval joins:
library(fuzzyjoin)
interval_left_join(
    FJX_bins, 
    test_spectra,
    by = c('Wavelength' = 'Lambda_Start', 'Wavelength' = 'Lambda_End')
)
# A tibble: 52 x 5 Wavelength Sigma Bin_Number Lambda_Start Lambda_End <int> <dbl> <int> <dbl> <dbl> 1 289 3.98e-20 1 289 298. 2 290 3.89e-20 1 289 298. 3 291 3.77e-20 1 289 298. 4 292 3.64e-20 1 289 298. 5 293 3.54e-20 1 289 298. 6 294 3.39e-20 1 289 298. 7 295 3.25e-20 1 289 298. 8 296 3.09e-20 1 289 298. 9 297 2.93e-20 1 289 298. 10 298 2.80e-20 1 289 298. # … with 42 more rows
with dplyr:
To create bin number as a factor
library(dplyr)
Test_Spectra <- mutate(test_spectra, 
                       bin = cut(Wavelength, breaks = c(FJX_bins$Lambda_Start, 850), 
                                 labels = FJX_bins$Bin_Number, right = F))
Or to create bin number as a character variable
Test_Spectra <- mutate(test_spectra, 
                       bin = as.character(cut(Wavelength, 
                                          breaks = c(FJX_bins$Lambda_Start, 850), 
                                          labels = FJX_bins$Bin_Number, right = F)))
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With