library(stringr) library(dplyr) source("funcs.R") # oldalok számának megszerzése root_link <- "https://ingatlan.com/lista/kiado+lakas+budapest" { pageno <- read_html(root_link) %>% html_elements(".pagination__page-number") %>% html_text2() %>% strsplit(" ") pageno <- as.numeric(unlist(pageno)[3]) } #egyenkénti url-ek leszedése berelt_url <- "" data <- data.frame() na_urls <- c("") last <- 1 for( i in last:pageno){ last <- i url <- paste0(root_link,"?page=",as.character(i)) print(paste0(as.character(i),". oldal")) repeat{ berelt_url <- try(get_urls(url),silent = TRUE) if (class(berelt_url) != "try-error") { break } if(grepl(".*404.*",berelt_url[1])){ print("404") break } if(grepl(".*403.*",berelt_url[1])){ print(paste("next try:",paste(Sys.time()+60)," ",berelt_url[1])) system("nmcli c show --active | grep CORVINUS && nmcli c down CORVINUS || nmcli c up CORVINUS") Sys.sleep(10) } Sys.sleep(10) } berelt_url <- unique(berelt_url[berelt_url!=""]) lastrent <- 1 for(i in lastrent:length(berelt_url)){ pb <- txtProgressBar(min = 0, # Minimum value of the progress bar max = length(berelt_url), # Maximum value of the progress bar style = 3, # Progress bar style (also available style = 1 and style = 2) char = "=") # Character used to create the bar lastrent <- i setTxtProgressBar(pb, i) url <- berelt_url[i] #print(paste0(round(i/length(berelt_url)*100,digits = 2),"%")) repeat{ rent <- try(get_rent(url),silent = TRUE) if (class(rent) != "try-error") { break } if(grepl(".*404.*",rent[1])){ print("404") break } if(grepl(".*403.*",rent[1])){ print(paste("next try:",paste(Sys.time()+60)," ",rent[1])) system("nmcli c show --active | grep CORVINUS && nmcli c down CORVINUS || nmcli c up CORVINUS") Sys.sleep(10) } Sys.sleep(10) } if(is.data.frame(rent)){ data <- rowbind(data,rent) } else { na_urls <- c(na_urls, url) } } close(pb) } data <- data[-1,] data <- data %>% distinct() dt <- data.table::as.data.table(data) dt write.csv(dt, file = "data.csv") nrow(data) unique(na_urls) str(data)