# ========================================================= # CVICENIE: # Je priemerna LST v meste (urban) vyssia ako v rural uzemi? # Hypoteza 0 = teploty urban vs. rural územie nie sú rozdielne, # Hypoteza 1/alternativna: teploty urban vs. rural územie sú rozdielne, # Vstupy: # - mask_urban_rural.shp # - LC08_LST_L2SP_20220622_clip.tif # # Priecinok: # C:/PSMG # ========================================================= # ----------------------------- # 1. Balicky # ----------------------------- packages <- c("terra", "sf", "dplyr", "ggplot2", "gstat", "sp") to_install <- packages[!packages %in% installed.packages()[, "Package"]] if(length(to_install) > 0) install.packages(to_install) library(terra) library(sf) library(dplyr) library(ggplot2) library(gstat) library(sp) # ----------------------------- # 2. Nastavenie pracoviska # ----------------------------- setwd("C:/PSMG") # VSTUPY raster_file <- "LC08_LST_L2SP_20220622_clip.tif" mask_file <- "mask_urban_rural.shp" # PARAMETRE CVICENIA n_random <- 500 seed_value <- 123 set.seed(seed_value) # ----------------------------- # 3. Nacitanie dat # ----------------------------- lst <- rast(raster_file) mask <- st_read(mask_file, quiet = TRUE) cat("\n--- ZAKLADNE INFO ---\n") print(lst) print(mask) print(crs(lst)) print(st_crs(mask)) # ----------------------------- # 4. Rozdelenie na urban a rural # ----------------------------- urban <- mask[mask$land_type == "urban", ] rural <- mask[mask$land_type == "rural", ] # Vizualna kontrola plot(lst) plot(mask["land_type"], add= T) # Prevod na terra vect urban_v <- vect(urban) rural_v <- vect(rural) # ----------------------------- # 5. Extrakcia pixelov z rastra # ----------------------------- lst_urban <- mask(crop(lst, urban_v), urban_v) lst_rural <- mask(crop(lst, rural_v), rural_v) vals_urban <- values(lst_urban, na.rm = TRUE) vals_rural <- values(lst_rural, na.rm = TRUE) df_all <- data.frame( temp = c(vals_urban, vals_rural), zone = c(rep("urban", length(vals_urban)), rep("rural", length(vals_rural))) ) cat("\n--- ZAKLADNE STATISTIKY ---\n") print( df_all |> group_by(zone) |> summarise( n = n(), mean = mean(temp, na.rm = TRUE), sd = sd(temp, na.rm = TRUE), min = min(temp, na.rm = TRUE), max = max(temp, na.rm = TRUE) ) ) # ----------------------------- # 6. Grafy # ----------------------------- ggplot(df_all, aes(x = temp, fill = zone)) + geom_density(alpha = 0.4) + theme_minimal() + labs(title = "Rozdelenie LST: urban vs rural", x = "LST", y = "Hustota") ggplot(df_all, aes(x = zone, y = temp, fill = zone)) + geom_boxplot(alpha = 0.7) + theme_minimal() + labs(title = "LST v urban a rural uzemi", x = "", y = "LST") # ========================================================= # PRIKLAD 1: VSETKY PIXLE # ========================================================= cat("\n==============================\n") cat("PRIKLAD 1: VSETKY PIXLE\n") cat("==============================\n") # H0: priemerna teplota v meste nie je vyssia ako v rural uzemi # H1: priemerna teplota v meste je vyssia ako v rural uzemi tt_all <- t.test( x = vals_urban, y = vals_rural, alternative = "greater", var.equal = FALSE ) print(tt_all) cat("\nInterpretacia:\n") cat("- Tento test pouziva vsetky pixle ako keby boli nezavisle.\n") cat("- To je metodicky problem, lebo susedne pixle mozu byt priestorovo autokorelovane.\n") # ========================================================= # PRIKLAD 2: NAHODNY VYBER PIXLOV # ========================================================= samp_urban <- sample(vals_urban, n_random) samp_rural <- sample(vals_rural, n_random) df_rand <- data.frame( temp = c(samp_urban, samp_rural), zone = c(rep("urban", length(samp_urban)), rep("rural", length(samp_rural))) ) print( df_rand |> group_by(zone) |> summarise( n = n(), mean = mean(temp), sd = sd(temp) ) ) tt_rand <- t.test( x = samp_urban, y = samp_rural, alternative = "greater", var.equal = FALSE ) print(tt_rand) cat("\nInterpretacia:\n") cat("- Nahodny vyber znizi pocet pozorovani.\n") cat("- Stale vsak nemusi odstranovat priestorovu autokorelaciu.\n") # ========================================================= # PRIKLAD 3: ZOHLADNENIE PRIESTOROVEJ AUTOKORELACIE # ========================================================= # Prevedieme raster na body pts_urban <- as.points(lst_urban, na.rm = TRUE) |> st_as_sf() pts_rural <- as.points(lst_rural, na.rm = TRUE) |> st_as_sf() # Zistenie nazvu atributu s hodnotou LST non_geom_cols <- setdiff(names(pts_urban), attr(pts_urban, "sf_column")) lst_col <- non_geom_cols[1] # Premenovanie atributu na temp names(pts_urban)[names(pts_urban) == lst_col] <- "temp" names(pts_rural)[names(pts_rural) == lst_col] <- "temp" pts_urban$zone <- "urban" pts_rural$zone <- "rural" pts_all <- rbind(pts_urban, pts_rural) cat("\n--- POCET BODOV PRE AUTOKORELACIU ---\n") cat("Urban body:", nrow(pts_urban), "\n") cat("Rural body:", nrow(pts_rural), "\n") # ----------------------------- # 3A. Podvzorka pre variogram # ----------------------------- # Odporucanie: # variogram pocitaj len pre urban, aby sa nemiesali dve odlisne zony pts_var_source <- pts_urban cat("\n--- VARIOGRAM LEN PRE URBAN ---\n") cat("Pocet dostupnych bodov:", nrow(pts_var_source), "\n") # Zvysime pocet bodov pre variogram # Ak mas vykonnejsi pocitac, mozes dat aj 10000 alebo 15000 n_for_variogram <- min(10000, nrow(pts_var_source)) pts_var <- pts_var_source[sample(seq_len(nrow(pts_var_source)), n_for_variogram), ] cat("Pocet bodov pouzitych pre variogram:", nrow(pts_var), "\n") # sf -> sp pts_var_sp <- as(pts_var, "Spatial") # Rozsah uzemia bb <- st_bbox(pts_var) diag_dist <- sqrt((bb$xmax - bb$xmin)^2 + (bb$ymax - bb$ymin)^2) # Nastavenie cutoff # Pre variogram je casto vhodne pouzit max. polovicu diagonalnej vzdialenosti cutoff_dist <- as.numeric(diag_dist) * 0.5 # Jemnejsie lagy -> viac bodov vo variograme n_lags <- 25 lag_width <- cutoff_dist / n_lags cat("Cutoff:", round(cutoff_dist, 2), "\n") cat("Lag width:", round(lag_width, 2), "\n") cat("Pocet lagov:", n_lags, "\n") # Experimentalny variogram vg_emp <- variogram( temp ~ 1, data = pts_var_sp, cutoff = cutoff_dist, width = lag_width ) print(head(vg_emp)) cat("Pocet bodov experimentalneho variogramu:", nrow(vg_emp), "\n") # Fit modelu vg_mod_init <- vgm( psill = var(pts_var$temp, na.rm = TRUE) * 0.8, model = "Sph", nugget = var(pts_var$temp, na.rm = TRUE) * 0.2, range = cutoff_dist / 3 ) vg_fit <- fit.variogram(vg_emp, vg_mod_init) cat("\n--- FITNUTY VARIOGRAMOVY MODEL ---\n") print(vg_fit) plot(vg_emp, vg_fit, main = "Experimentalny variogram (urban) a fitnuty model") # Odhad dosahu autokorelacie range_est <- vg_fit$range[2] if(is.na(range_est) || length(range_est) == 0) { range_est <- 5 * res(lst)[1] warning("Dosah autokorelacie sa nepodarilo spolahlivo odhadnut. Pouzivam nahradnu hodnotu.") } cat("\nOdhadnuty dosah priestorovej autokorelacie (range): ", round(range_est, 2), " m\n", sep = "") # ----------------------------- # 3C. Preriedenie bodov # ----------------------------- # Jednoducha metoda: # rozdelime priestor na grid s velkostou bunky = range_est # z kazdej bunky ponechame 1 bod thin_by_grid <- function(sf_points, cellsize) { coords <- st_coordinates(sf_points) sf_points$gx <- floor(coords[,1] / cellsize) sf_points$gy <- floor(coords[,2] / cellsize) sf_points$grid_id <- paste(sf_points$gx, sf_points$gy, sep = "_") sf_points |> group_by(grid_id) |> slice_sample(n = 1) |> ungroup() |> dplyr::select(temp, zone, geometry) } pts_urban_thin <- thin_by_grid(pts_urban[, c("temp", "zone", "geometry")], range_est) pts_rural_thin <- thin_by_grid(pts_rural[, c("temp", "zone", "geometry")], range_est) cat("\n--- POCET BODOV PO PRERIEDENI ---\n") cat("Urban:", nrow(pts_urban_thin), "\n") cat("Rural:", nrow(pts_rural_thin), "\n") thin_urban_vals <- pts_urban_thin$temp thin_rural_vals <- pts_rural_thin$temp df_thin <- data.frame( temp = c(thin_urban_vals, thin_rural_vals), zone = c(rep("urban", length(thin_urban_vals)), rep("rural", length(thin_rural_vals))) ) print( df_thin |> group_by(zone) |> summarise( n = n(), mean = mean(temp), sd = sd(temp) ) ) tt_thin <- t.test( x = thin_urban_vals, y = thin_rural_vals, alternative = "greater", var.equal = FALSE ) cat("\n--- T-TEST PO ZOHLADNENI AUTOKORELACIE ---\n") print(tt_thin) cat("\nInterpretacia:\n") cat("- Dosah autokorelacie bol odhadnuty z variogramu.\n") cat("- Body boli preriedene tak, aby boli priblizne dalej od seba ako odhadnuty range.\n") cat("- Tento pristup je metodicky vhodnejsi ako test nad vsetkymi pixlami.\n") # ========================================================= # 7. SUHRN VYSLEDKOV # ========================================================= cat("\n==============================\n") cat("SUHRN\n") cat("==============================\n") summary_table <- data.frame( approach = c("Vsetky pixle", "Nahodny vyber", "Po zohladneni autokorelacie"), urban_mean = c(mean(vals_urban), mean(samp_urban), mean(thin_urban_vals)), rural_mean = c(mean(vals_rural), mean(samp_rural), mean(thin_rural_vals)), urban_n = c(length(vals_urban), length(samp_urban), length(thin_urban_vals)), rural_n = c(length(vals_rural), length(samp_rural), length(thin_rural_vals)), p_value = c(tt_all$p.value, tt_rand$p.value, tt_thin$p.value) ) print(summary_table) cat("\nMozny zaver:\n") cat("Ak je urban_mean > rural_mean a p-hodnota < 0.05,\n") cat("mozeme tvrdit, ze priemerna LST v meste je statisticky vyznamne vyssia ako v rural uzemi.\n") cat("Najvhodnejsie je interpretovat vysledok po zohladneni priestorovej autokorelacie.\n")