I have global data with over 180,000 data points. For every pair of coordinates I have a value, and many times I have multiple values at the same coordinates (see example of the dataframe df). I want to plot this data using hexagonal grids onto a world map and have been struggling. The statbinhex option ggplot doesn't let me set the grids to be 1000 square kilometers and it only counts the number of points within a hexbin rather than the mean of all values within a hexbin so I have switched to other options. Right now I am trying to grid the world map using the spsample function from the package 'sp' but I keep running into errors.
Data:
df<-structure(list(Z = c(3.23, 3.518, 3.518, 3.518, 3.518, 3.518, 
3.518, 1.961, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 
0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 
0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 
0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 
0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 
0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 
0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 
0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 
0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 
0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 
0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 
0.9845, 0.9845, 0.9845, 0.9845, 0.9845, 0.9845), latitude = c(-5.333, 
-19.01134381, -16.81667, -20.578928, -20.578928, -20.578928, 
-20.578928, 11.068441, 42.65, 59.6111, 55.8498, 58.6388, 57.0064, 
56.0202, 57.2875, 59.5252, 63.8363, 59.6032, 60.6532, 63.7764, 
59.3615, 62.6603, 58.9813, 58.9988, 58.984, 63.5407, 62.3942, 
59.36, 59.7953, 68.3704, 57.3549, 59.6111, 59.6111, 59.6068, 
59.6068, 59.6068, 63.6589, 59.169, 59.7762, 59.7762, 56.6949, 
56.2811, 61.6237, 56.3035, 56.7949, 56.6454, 65.5021, 59.8754, 
59.0856, 55.7247, 56.7308, 59.5479, 56.7237, 56.7821, 58.5819, 
59.5112, 67.8864, 67.8864, 59.0272, 58.9797, 60.2414, 59.0464, 
59.0805, 59.7875, 55.6308, 42.64, 42.534, 42.60166667, 41.2874, 
65.256706, 42.68333333, 42.61138889, 47.12, 63.3, 49.13547, 66.287, 
66.336, 66.468, 66.697, 66.968, 67.076, 67.566, 67.668, 67.679, 
67.939, 68.033, 68.455, 68.455, 68.501, 68.576, 68.881, 68.992, 
69.117, 69.141, 69.141, 69.203, 69.406, 69.426, 69.458, 69.512
), longitude = c(141.6, 146.2214001, 145.6333, 145.483398, 145.483398, 
145.483398, 145.483398, 76.509568, 77.47, 13.9202, 14.2217, 16.0795, 
14.4578, 14.6835, 17.9708, 16.2606, 20.127, 13.8554, 15.7272, 
20.8167, 13.4909, 17.399, 15.1313, 15.0579, 14.7382, 19.7277, 
17.7196, 13.4549, 17.5859, 18.7693, 15.762, 13.9202, 13.9202, 
13.8814, 13.8814, 13.8814, 20.3222, 15.1416, 18.3233, 18.3233, 
13.1492, 16.0232, 17.4425, 14.7285, 16.5662, 12.7691, 22.0001, 
18.0014, 14.6461, 14.1954, 13.0661, 17.5769, 12.8976, 12.8581, 
14.8691, 16.883, 22.2536, 22.2536, 14.9963, 15.0096, 14.48, 15.0569, 
14.9042, 17.6261, 13.5288, 2.09, 2.465, 1.093611111, 24.6651, 
31.904297, 1.205833333, 1.063888889, 6.63555, -150.5, 2.63457, 
36.865, 36.014, 35.334, 34.347, 29.208, 41.126, 33.391, 33.617, 
33.654, 32.91, 34.921, 35.344, 35.344, 28.733, 29.408, 33.02, 
29.031, 36.062, 29.242, 29.242, 33.455, 30.21, 31.057, 31.5, 
30.464), country = c("New Guinea", "Australia", "Australia", 
"Australia", "Australia", "Australia", "Australia", "India", 
"Kyrgyzstan", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", 
"Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", 
"Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", 
"Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", 
"Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", 
"Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", 
"Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", 
"Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", "Sweden", 
"Sweden", "Sweden", "France", "France", "Spain", "Greece", "Russia", 
"Spain", "Spain", "France", "USA", "France", "Russia", "Russia", 
"Russia", "Russia", "Russia", "Russia", "Russia", "Russia", "Russia", 
"Russia", "Russia", "Russia", "Russia", "Russia", "Russia", "Russia", 
"Russia", "Russia", "Russia", "Russia", "Russia", "Russia", "Russia", 
"Russia", "Russia")), row.names = c(NA, -100L), class = c("tbl_df", 
"tbl", "data.frame"))
World map:
library(rnaturalearth)
world <- ne_countries(scale = "medium", returnclass = "sf")
Hexbinninng the world map (this is where my code doesn't work and the hexbinning can't be executed, I also don't know how to make sure the sizes of the hexbins are 1000 square kilometers):
size <- 100
hex_points <- spsample(world, type = "hexagonal", cellsize = size)
hex_grid <- HexPoints2SpatialPolygons(hex_points, dx = size)
Error in (function (classes, fdef, mtable)  : 
  unable to find an inherited method for function ‘spsample’ for signature ‘"sf"’
My plan is to create polygon that is a hexbin or hex grid of the world that are 1000 km squared per hexbin and then intersect my data points with the polygon shape file and then plot the mean of of all points within the a hexbin across the world.
Would anyone know how to do this?
The sf package should cover most of what you need.
If you want a hexagonal grid with equal sized hexagons, you'll have to use an equal-area projection. Illustrating the difference:
world_6933 <- st_transform(world, 6933)
world_grid_6933 <- st_make_grid(world_6933,
                                n = c(100,100),
                                what = 'polygons',
                                square = FALSE,
                                flat_topped = TRUE) %>%
  st_as_sf() %>%
  mutate(area = st_area(.))
p2 <- ggplot() + 
  geom_sf(data = world_grid_6933,
          aes(fill = units::drop_units(area))) +
  geom_sf(data = world_6933,
          fill = NA, color = 'white')
cowplot::plot_grid(p1, p2, nrow = 2, labels = c('unequal hexagons', 'equal hexagons'))
In the top plot, hexagons near the equator are significantly larger. (units in m^2)

Once you've settled on a grid and projection, create an sf object from your data and transform it to the same CRS projection.
# df <- from data pasted in your question
st_as_sf(df, coords = c('longitude', 'latitude'))
st_crs(df_sf) <- 4326
df_sf <- st_transform(df_sf, 6933) # 6933 is the same crs as the hex grid
head(df_sf)
Simple feature collection with 6 features and 2 fields
geometry type:  POINT
dimension:      XY
bbox:           xmin: 13662460 ymin: -2570656 xmax: 14108360 ymax: -679392.6
projected CRS:  WGS 84 / NSIDC EASE-Grid 2.0 Global
# A tibble: 6 x 3
      Z country                geometry
  <dbl> <chr>               <POINT [m]>
1  3.23 New Guinea (13662457 -679392.6)
2  3.52 Australia   (14108359 -2382208)
3  3.52 Australia   (14051615 -2115478)
4  3.52 Australia   (14037152 -2570656)
5  3.52 Australia   (14037152 -2570656)
6  3.52 Australia   (14037152 -2570656)
Now they're ready to be joined & manipulated:
oined_sf <- st_join(df_sf, world_grid_6933, join = st_within) %>%
  mutate(hex = floor(as.numeric(rownames(.)))) %>%
  select(-area)
head(joined_sf)
Soined_sf <- st_join(df_sf, world_grid_6933, join = st_within) %>%
  mutate(hex = floor(as.numeric(rownames(.)))) %>%
  select(-area)
head(joined_sf)
Simple feature collection with 6 features and 3 fields
geometry type:  POINT
dimension:      XY
bbox:           xmin: 13662460 ymin: -2570656 xmax: 14108360 ymax: -679392.6
projected CRS:  WGS 84 / NSIDC EASE-Grid 2.0 Global
      Z    country hex                   geometry
1 3.230 New Guinea 720 POINT (13662457 -679392.6)
2 3.518  Australia 485  POINT (14108359 -2382208)
3 3.518  Australia 509  POINT (14051615 -2115478)
4 3.518  Australia 462  POINT (14037152 -2570656)
5 3.518  Australia 462  POINT (14037152 -2570656)
6 3.518  Australia 462  POINT (14037152 -2570656)
The above (joined_df) is all of the points, the Z and the country from the original df, and the hexagon line number it belongs to in world_grid_6933.
This should give you a column to join on:
world_grid_6933$hex <- seq.int(nrow(world_grid_6933)
# Sum of Z by hexagon
summary <- joined_sf %>% 
              st_drop_geometry() %>% 
              group_by(hex) %>% 
              summarise(sum_z = sum(Z))
left_join(world_grid_6933, summary, by = 'hex') %>% 
  filter(!is.na(sum_z)) %>% 
  st_crop(box) %>%  # box is a bounding box I made to zoom in on a relevant area
   ggplot() + 
    geom_sf(aes(fill = sum_z)) + 
    geom_sf(data = st_crop(world_6933, box), fill = NA) + 
    geom_sf(data = st_crop(df_sf, box), color = 'white'[![enter image description here][2]][2])+              
    scale_fill_viridis_c(option = 'A')

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With