Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

pivot_wider outputs <S3: vctrs_list_of> objects

Tags:

r

tidyr

I have the following data set that I am trying to spread.

    #create df    
df <- structure(list(file_number = c("3098129", "3096451", "3096774", 
"3095276", "3095464", "3096846", "3097132", "3096355", "3096951", 
"3096328", "3095441", "3096325", "3094412", "3096366", "3096372", 
"3096507", "3098510", "3096335", "3096403", "3094343", "3096941", 
"3096419", "3094431", "3096495", "3094647", "3094487", "3094947", 
"3094398", "3094386", "3094367", "3097480", "3096425", "3095193", 
"3095839a", "3097197", "3098453", "3098549", "3098428", "3096427", 
"3096895", "3096434", "3094835", "3096312", "3094517", "3094372", 
"3096387", "3096480", "3098504", "3096338", "3094615", "3096382", 
"3096638", "3096750", "3096418", "3094734", "3098503", "3096311", 
"3097197", "3094353", "3098442", "3097111", "3097325", "3096531", 
"3096405", "3096301", "3096692", "3096495", "3098406", "3098422", 
"3096315", "3096951", "3094491", "3096304", "3098416", "3096332", 
"3098404", "3098419", "3095225", "3094404", "3096374", "3098411", 
"3098556", "3096398", "3094421b", "3098477", "3094369b", "3098463", 
"3096893", "3098514", "3098477", "3098465", "3094560", "3098409", 
"3096434", "3097557", "3095061", "3098419", "3096404", "3095441", 
"3096537", "3098503", "3098400", "3097808", "3096389b", "3098446", 
"3096330", "3095533", "3094421a", "3094339", "3095578", "3094404", 
"3098552", "3098514", "3096630", "3096941", "3097027", "3096322", 
"3096514", "3098484", "3097038", "3096672", "3098483", "3094373", 
"3096774", "3096677", "3096408", "3096664", "3096365", "3096491", 
"3096820", "3096514", "3096556", "3096292", "3096495", "3094781", 
"3094344", "3094487", "3094690", "3098504", "3096503"), reader = c("aa", 
"aa", "aa", "aa", "aa", "aa", "aa", "aa", "aa", "aa", "aa", "aa", 
"aa", "aa", "aa", "aa", "aa", "aa", "aa", "aa", "ae", "ae", "ae", 
"ae", "ae", "ae", "ae", "ae", "ae", "ae", "ae", "ae", "ae", "ae", 
"ae", "ae", "ae", "ae", "ae", "ae", "db", "db", "db", "db", "db", 
"db", "db", "db", "db", "db", "db", "db", "db", "db", "db", "db", 
"db", "db", "db", "db", "dl", "dl", "dl", "dl", "dl", "dl", "dl", 
"dl", "dl", "dl", "dl", "dl", "dl", "dl", "dl", "dl", "dl", "dl", 
"dl", "dl", "mk", "mk", "mk", "mk", "mk", "mk", "mk", "mk", "mk", 
"mk", "mk", "mk", "mk", "mk", "mk", "mk", "mk", "mk", "mk", "mk", 
"mm", "mm", "mm", "mm", "mm", "mm", "mm", "mm", "mm", "mm", "mm", 
"mm", "mm", "mm", "mm", "mm", "mm", "mm", "mm", "mm", "np", "np", 
"np", "np", "np", "np", "np", "np", "np", "np", "np", "np", "np", 
"np", "np", "np", "np", "np", "np", "np"), event = c("fail", 
"fail", "fail", "fail", "pass", "fail", "fail", "pass", "fail", 
"fail", "pass", "pass", "pass", "fail", "fail", "pass", "pass", 
"fail", "pass", "pass", "pass", "pass", "pass", "pass", "fail", 
"fail", "pass", "pass", "fail", "pass", "pass", "pass", "pass", 
"pass", "fail", "pass", "fail", "fail", "fail", "pass", "pass", 
"pass", "fail", "pass", "pass", "fail", "pass", "fail", "fail", 
"pass", "fail", "fail", "pass", "fail", "pass", "fail", "pass", 
"fail", "fail", "fail", "fail", "pass", "pass", "fail", "pass", 
"pass", "fail", "pass", "fail", "pass", "pass", "fail", "pass", 
"fail", "fail", "pass", "pass", "fail", "pass", "pass", "fail", 
"pass", "fail", "pass", "fail", "pass", "pass", "pass", "pass", 
"fail", "pass", "pass", "fail", "pass", "fail", "pass", "fail", 
"pass", "pass", "fail", "pass", "pass", "fail", "pass", "pass", 
"fail", "pass", "fail", "fail", "fail", "pass", "pass", "pass", 
"fail", "fail", "fail", "fail", "fail", "fail", "fail", "fail", 
"fail", "pass", "fail", "fail", "fail", "pass", "pass", "pass", 
"pass", "fail", "pass", "pass", "fail", "fail", "pass", "pass", 
"fail", "fail", "fail")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -140L))

>head(df)
file_number reader  event
3098129     aa      fail        
3096451     aa      fail        
3096774     aa      fail        
3095276     aa      fail

But when I run the following tidyr::pivot_wider I get the output <S3: vctrs_list_of>. I think this has to do with having multiple values of the same type in the names_from column.

df %>%
  tidyr::pivot_wider(id_cols = file_number, names_from = reader, values_from = event)

id                       aa                 ae
3098129         <S3: vctrs_list_of> <S3: vctrs_list_of>     
3096451         <S3: vctrs_list_of> <S3: vctrs_list_of>     

Along with the following warning:

Values in `event` are not uniquely identified; output will contain list-cols.
* Use `values_fn = list(event = list)` to suppress this warning.
* Use `values_fn = list(event = length)` to identify where the duplicates arise
* Use `values_fn = list(event = summary_fun)` to summarise duplicates

My question is: Why does pivot_wider output S3 vector lists?

EDIT -added better reproducible example. -redefined question.

like image 926
daszlosek Avatar asked Oct 31 '25 09:10

daszlosek


1 Answers

I am able to fix the issue by using the tidyr::unnest function on the S3 vector objects.

df %>% ungroup() %>% pivot_wider(names_from = reader, values_from = event) %>% tidyr::unnest()
id  aa  bb
1   0   0       
2   0   1       
3   1   0       
4   1   1       
5   0   1

NOTE: all variables are factors now

like image 68
daszlosek Avatar answered Nov 02 '25 00:11

daszlosek