88 lines
2.1 KiB
R
88 lines
2.1 KiB
R
library(stringr)
|
|
library(dplyr)
|
|
source("funcs.R")
|
|
|
|
# oldalok számának megszerzése
|
|
|
|
root_link <- "https://ingatlan.com/lista/kiado+lakas+budapest"
|
|
{
|
|
pageno <- read_html(root_link) %>% html_elements(".pagination__page-number") %>%
|
|
html_text2() %>% strsplit(" ")
|
|
pageno <- as.numeric(unlist(pageno)[3])
|
|
}
|
|
|
|
#egyenkénti url-ek leszedése
|
|
berelt_url <- ""
|
|
data <- data.frame()
|
|
na_urls <- c("")
|
|
last <- 1
|
|
for( i in last:pageno){
|
|
last <- i
|
|
url <- paste0(root_link,"?page=",as.character(i))
|
|
print(paste0(as.character(i),". oldal"))
|
|
repeat{
|
|
berelt_url <- try(get_urls(url),silent = TRUE)
|
|
|
|
if (class(berelt_url) != "try-error") {
|
|
break
|
|
}
|
|
if(grepl(".*404.*",berelt_url[1])){
|
|
print("404")
|
|
break
|
|
}
|
|
if(grepl(".*403.*",berelt_url[1])){
|
|
print(paste("next try:",paste(Sys.time()+60)," ",berelt_url[1]))
|
|
system("nmcli c show --active | grep CORVINUS && nmcli c down CORVINUS || nmcli c up CORVINUS")
|
|
Sys.sleep(10)
|
|
}
|
|
Sys.sleep(10)
|
|
}
|
|
berelt_url <- unique(berelt_url[berelt_url!=""])
|
|
|
|
lastrent <- 1
|
|
|
|
for(i in lastrent:length(berelt_url)){
|
|
pb <- txtProgressBar(min = 0, # Minimum value of the progress bar
|
|
max = length(berelt_url), # Maximum value of the progress bar
|
|
style = 3, # Progress bar style (also available style = 1 and style = 2)
|
|
char = "=") # Character used to create the bar
|
|
lastrent <- i
|
|
setTxtProgressBar(pb, i)
|
|
url <- berelt_url[i]
|
|
#print(paste0(round(i/length(berelt_url)*100,digits = 2),"%"))
|
|
repeat{
|
|
rent <- try(get_rent(url),silent = TRUE)
|
|
|
|
if (class(rent) != "try-error") {
|
|
break
|
|
}
|
|
if(grepl(".*404.*",rent[1])){
|
|
print("404")
|
|
break
|
|
}
|
|
if(grepl(".*403.*",rent[1])){
|
|
print(paste("next try:",paste(Sys.time()+60)," ",rent[1]))
|
|
system("nmcli c show --active | grep CORVINUS && nmcli c down CORVINUS || nmcli c up CORVINUS")
|
|
Sys.sleep(10)
|
|
}
|
|
Sys.sleep(10)
|
|
}
|
|
if(is.data.frame(rent)){
|
|
data <- rowbind(data,rent)
|
|
} else {
|
|
na_urls <- c(na_urls, url)
|
|
}
|
|
}
|
|
close(pb)
|
|
}
|
|
|
|
data <- data[-1,]
|
|
data <- data %>% distinct()
|
|
dt <- data.table::as.data.table(data)
|
|
dt
|
|
write.csv(dt, file = "data.csv")
|
|
|
|
nrow(data)
|
|
unique(na_urls)
|
|
str(data)
|