albérletes dolgok egyben
This commit is contained in:
parent
7988d6cbc5
commit
2b82d298ed
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
.Rproj.user
|
||||
.Rhistory
|
||||
.RData
|
||||
.Ruserdata
|
BIN
Determinants_of_Market_Rent.pdf
Normal file
BIN
Determinants_of_Market_Rent.pdf
Normal file
Binary file not shown.
46981
adat_pozicioval.csv
Normal file
46981
adat_pozicioval.csv
Normal file
File diff suppressed because it is too large
Load Diff
16
beolvas.R
Normal file
16
beolvas.R
Normal file
@ -0,0 +1,16 @@
|
||||
library(data.table)
|
||||
library(dplyr)
|
||||
# df <- read.csv("data.csv") %>% as.data.table()
|
||||
df <- read.csv("adat_pozicioval.csv") %>% as.data.table()
|
||||
df <- df[,-c("X","X.1")]
|
||||
factorlist <- c("tipus","Akadálymentesített","Átlagáramfogyasztásinfo","Átlaggázfogyasztásinfo",
|
||||
"Belmagasság","Bútorozott","Dohányzás","Emelet","Energiatanúsítvány","Építéséve",
|
||||
"Épületszintjei","Fürdőéswc","Fűtés","Gépesített","Ingatlanállapota",
|
||||
"Kertkapcsolatos","Kilátás","Kisállat","Komfort","Költözhető","Közösköltség",
|
||||
"Légkondicionáló","Lift","Minbérletiidő","Parkolás","Rezsiköltség",
|
||||
"Szigetelés","Tájolás","Tetőtér","Panelprogram","Parkolóhelyára")
|
||||
df[,factorlist]<- lapply(df[,..factorlist],function(x){ifelse(x=="nincs megadva",NA,x)})
|
||||
df[,factorlist]<- lapply(df[,..factorlist],as.factor)
|
||||
# Erkélymérete
|
||||
# lapply(df,typeof)
|
||||
### tavolsag.R kovetkezik
|
5127
data/prc_fsc_idx.tsv
Normal file
5127
data/prc_fsc_idx.tsv
Normal file
File diff suppressed because it is too large
Load Diff
13
docs/bevezetes
Normal file
13
docs/bevezetes
Normal file
@ -0,0 +1,13 @@
|
||||
# Bevezetés
|
||||
Az előző pár évben az ingatlanok árai jelentős emelkedésen mentek keresztül (valami portfolio cikk vagy ilyesmi cite)
|
||||
, ezzel egyre kevesebb embernek van lehetősége, adatik meg, hogy a jelenlegi magas árak mellett is saját lakást tudjanak venni.
|
||||
Ennek köszönhetően az ingatlan bérpiacnak nőtt a jelentősége, és az ezen a piacon megfigyelhető árazási faktorok jelentős hatással
|
||||
lehetnek ezrek mindennapi életére.
|
||||
Elemzésemben a budapesti ingatlanok áraira hatással levő változókat elemzem, mind a mennyiségi, minőségi és területi változókra
|
||||
kitérve a teljesség kedvéért.
|
||||
|
||||
Az elemzésben az Ingatlan.com-ról scrapelt adatokat kombináltam az OpenStreetMaps nevű online térkép adataival, így megkapva a
|
||||
felhasznált bérlakás adathalmazt, amelyet "Geographically Weighted Regression" azaz földrajzilag súlyozott regresszióval elemzek.
|
||||
|
||||
#Absztrakt
|
||||
A közelmúltban jelentősen emelked
|
15793
epitett.json
Normal file
15793
epitett.json
Normal file
File diff suppressed because it is too large
Load Diff
98
funcs.R
Normal file
98
funcs.R
Normal file
@ -0,0 +1,98 @@
|
||||
#functions for scrape.R
|
||||
|
||||
library(rvest)
|
||||
library(stringr)
|
||||
library(sf)
|
||||
library(jsonlite)
|
||||
library(purrr)
|
||||
# library(dplyr)
|
||||
|
||||
rowbind <- function(a,b){
|
||||
rbind(
|
||||
data.frame(c(a, sapply(setdiff(names(b), names(a)), function(x) NA))),
|
||||
data.frame(c(b, sapply(setdiff(names(a), names(b)), function(x) NA))))
|
||||
}
|
||||
|
||||
get_urls <- function(url){
|
||||
html <- read_html(url)
|
||||
ends <- html %>% html_elements(".listing") %>% html_elements("a") %>%
|
||||
html_attr("href") %>% unique()
|
||||
paste("https://ingatlan.com", ends, sep="")
|
||||
}
|
||||
|
||||
get_rent <- function(url){
|
||||
# url <- "https://ingatlan.com/xiii-ker/kiado+lakas/csuszozsalus-lakas/33411349"
|
||||
html <- read_html(url)
|
||||
alapok <- html %>% html_elements(".listing-property") %>%
|
||||
html_elements("span") %>% html_text2() %>% unique()
|
||||
alapok <- alapok[!grepl(".*€.*",alapok)]
|
||||
if(length(alapok) == 6){
|
||||
leiras <- html %>% html_element("#listing-description") %>% html_text2()
|
||||
tipus <- html %>% html_elements(".card-title") %>% html_text2() %>% unique()
|
||||
tablazat <- html %>% html_elements("table") %>% html_table()
|
||||
|
||||
data <- t(data.frame(alapok[seq(from=2, to=length(alapok), by=2)]))
|
||||
names(data) <- gsub("[[:space:].]+","",alapok[seq(from=1, to=length(alapok), by=2)])
|
||||
rownames(data) <- 1
|
||||
data$ar <- alapok[grepl(".*(Ft)|€.*",alapok)][1]
|
||||
data$terulet <- alapok[grepl(".*(m2)",alapok)][1]
|
||||
data$szobak <- alapok[length(alapok)]
|
||||
data$leiras <- leiras
|
||||
data$cim <- tipus[grepl(".*[kK]erület.*",tipus)]
|
||||
data$tipus <- tipus[grepl("^Kiadó.*",tipus)]
|
||||
|
||||
tdt <- as.data.frame(merge(merge(merge(tablazat[1],tablazat[2],all=T, no.dups=F),
|
||||
merge(tablazat[3],tablazat[4],all=T, no.dups=F),all=T, no.dups=F),
|
||||
merge(tablazat[5],tablazat[6],all=T, no.dups=F),all=T, no.dups=F), no.dups=F)
|
||||
cbind(data,setNames(data.frame(t(tdt[,-1])), gsub("[[:space:].]+","",tdt[,1])))
|
||||
} else {
|
||||
NA
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
arconv <- function(x) {
|
||||
arr <- str_split(x, " ", simplify = T)
|
||||
ar <- as.numeric(str_replace(arr[1], pattern = ",", replacement = "."))
|
||||
if(arr[2] == "ezer"){
|
||||
ar*1000
|
||||
} else if(arr[2] == "millió"){
|
||||
ar*1000000
|
||||
} else if(arr[2] == "milliárd"){
|
||||
ar*1000000000
|
||||
} else {
|
||||
NA
|
||||
}
|
||||
}
|
||||
|
||||
get_coords <- function(cim){
|
||||
url<-URLencode(paste0("http://localhost:8080/search.php?q=",cim,"&limit=1"))
|
||||
data <- read_json(url, simplifyVector = T)
|
||||
if(length(data) == 0){
|
||||
NA
|
||||
} else {
|
||||
c(data$lat,data$lon)
|
||||
}
|
||||
}
|
||||
|
||||
hazszamconv <- function(x) {
|
||||
arr <- str_split(x, " ", simplify = T)[,-c(1:2)]
|
||||
logical <- sapply(x,function(x){tail(grepl("[0-9]+",str_split(x, " ", simplify = T)[,-c(1:2)]), n=1)})
|
||||
if(logical){
|
||||
arr[length(arr)]
|
||||
}else{
|
||||
NA
|
||||
}
|
||||
}
|
||||
|
||||
utcaconv <- function(x) {
|
||||
arr <- str_split(x, " ", simplify = T)[,-c(1:2)]
|
||||
logical <- sapply(x,function(x){tail(grepl("[0-9]+",str_split(x, " ", simplify = T)[,-c(1:2)]), n=1)})
|
||||
if(logical){
|
||||
# paste(arr[-c(length(arr)-1,length(arr))], collapse = " ", sep = " ")
|
||||
paste(arr[-c(length(arr))], collapse = " ", sep = " ")
|
||||
}else{
|
||||
# paste(arr[-c(length(arr))], collapse = " ", sep = " ")
|
||||
paste(arr, collapse = " ", sep = " ")
|
||||
}
|
||||
}
|
16
mapout.R
Normal file
16
mapout.R
Normal file
@ -0,0 +1,16 @@
|
||||
library(leaflet)
|
||||
library(purrr)
|
||||
# df2 <- df[sample(1:nrow(df),4000),]
|
||||
df2 <- df
|
||||
|
||||
bins <- c(0, quantile(df$ar/df$ater,probs=c(0.2,0.4,0.6,0.8)), Inf) # Kvintilisek
|
||||
pal <- colorBin("YlOrRd", domain = df2$ar/df2$ater, bins = bins)
|
||||
|
||||
leaflet(df2) %>%
|
||||
addTiles() %>%
|
||||
# addCircleMarkers(~lon,~lat,popup = df2$cim,radius = 10,fillColor = ~pal(ar/ater),fillOpacity = 0.8,stroke = F) %>%
|
||||
addMarkers(~lon,~lat,popup = df2$cim) %>%
|
||||
addLegend(pal = pal, values = df2$ar/df2$ater, title = "Ft/m2/hó",
|
||||
position = "bottomright") %>%
|
||||
addMarkers(metro$lon, metro$lat,popup = metro$name) %>%
|
||||
addMarkers(vonat$lon, vonat$lat,popup = vonat$name)
|
4062
metro.html
Normal file
4062
metro.html
Normal file
File diff suppressed because one or more lines are too long
30
model.R
Normal file
30
model.R
Normal file
@ -0,0 +1,30 @@
|
||||
library(GWmodel)
|
||||
df3 <- df[df$tipus != "Kiadó szoba",]
|
||||
df3 <- df3[df3$ar <= 400000,]
|
||||
df4 <- df3[,c("ar","ater","szoba","felszoba","ker","metrotav","vonattav")]
|
||||
lmmod <- lm(log(ar)~ater+log(tomkoz+1)+szoba+felszoba+Emelet,df3)
|
||||
lmmod <- lm(log(ar)~as.factor(ker)+ater+log(tomkoz+1)+szoba+felszoba+Emelet,df3)
|
||||
lmmod <- lm(log(ar)~as.factor(ker)+ater+log(tomkoz+1)+szoba+felszoba+Emelet,df3)
|
||||
lmmod <- lm(log(ar)~as.factor(ker)+log(ater)+log(tomkoz+1)+szoba+felszoba+Emelet,df3)
|
||||
lmmod <- lm(log(ar/ater)~as.factor(ker)+log(tomkoz+1)+szoba+felszoba+Emelet,df3)
|
||||
lmmod <- lm((ar/ater)~as.factor(ker)+log(tomkoz+1)+szoba+felszoba+Emelet,df3)
|
||||
hist(df4$ar/df4$ater)
|
||||
hist(log(df4$ar/df4$ater))
|
||||
summary(lmmod)
|
||||
str(df3)
|
||||
hist(log(df4$ater))
|
||||
min(df3$ar)
|
||||
pairs(df3[,39:48])
|
||||
str(df3)
|
||||
hist(log(df3$tomkoz))
|
||||
hist(log(df3$vonattav))
|
||||
hist(log(df3$metrotav))
|
||||
hist(df3$ar)
|
||||
hist(log(df3$ar))
|
||||
hist(df3$ater)
|
||||
hist(log(df3$ater))
|
||||
plot(as.factor(df3$ker))
|
||||
plot(df3$ar,df3$tomkoz)
|
||||
cor(df3[,c("ater","ar","szoba","felszoba","metrotav","vonattav")])
|
||||
colnames(df3)
|
||||
psych::describe(df3)
|
BIN
rossz/1-s2.0-0094119083900451-main.pdf
Normal file
BIN
rossz/1-s2.0-0094119083900451-main.pdf
Normal file
Binary file not shown.
BIN
rossz/65-162-4-PB.pdf
Normal file
BIN
rossz/65-162-4-PB.pdf
Normal file
Binary file not shown.
87
scrape.R
Normal file
87
scrape.R
Normal file
@ -0,0 +1,87 @@
|
||||
library(stringr)
|
||||
library(dplyr)
|
||||
source("funcs.R")
|
||||
|
||||
# oldalok számának megszerzése
|
||||
|
||||
root_link <- "https://ingatlan.com/lista/kiado+lakas+budapest"
|
||||
{
|
||||
pageno <- read_html(root_link) %>% html_elements(".pagination__page-number") %>%
|
||||
html_text2() %>% strsplit(" ")
|
||||
pageno <- as.numeric(unlist(pageno)[3])
|
||||
}
|
||||
|
||||
#egyenkénti url-ek leszedése
|
||||
berelt_url <- ""
|
||||
data <- data.frame()
|
||||
na_urls <- c("")
|
||||
last <- 1
|
||||
for( i in last:pageno){
|
||||
last <- i
|
||||
url <- paste0(root_link,"?page=",as.character(i))
|
||||
print(paste0(as.character(i),". oldal"))
|
||||
repeat{
|
||||
berelt_url <- try(get_urls(url),silent = TRUE)
|
||||
|
||||
if (class(berelt_url) != "try-error") {
|
||||
break
|
||||
}
|
||||
if(grepl(".*404.*",berelt_url[1])){
|
||||
print("404")
|
||||
break
|
||||
}
|
||||
if(grepl(".*403.*",berelt_url[1])){
|
||||
print(paste("next try:",paste(Sys.time()+60)," ",berelt_url[1]))
|
||||
system("nmcli c show --active | grep CORVINUS && nmcli c down CORVINUS || nmcli c up CORVINUS")
|
||||
Sys.sleep(10)
|
||||
}
|
||||
Sys.sleep(10)
|
||||
}
|
||||
berelt_url <- unique(berelt_url[berelt_url!=""])
|
||||
|
||||
lastrent <- 1
|
||||
|
||||
for(i in lastrent:length(berelt_url)){
|
||||
pb <- txtProgressBar(min = 0, # Minimum value of the progress bar
|
||||
max = length(berelt_url), # Maximum value of the progress bar
|
||||
style = 3, # Progress bar style (also available style = 1 and style = 2)
|
||||
char = "=") # Character used to create the bar
|
||||
lastrent <- i
|
||||
setTxtProgressBar(pb, i)
|
||||
url <- berelt_url[i]
|
||||
#print(paste0(round(i/length(berelt_url)*100,digits = 2),"%"))
|
||||
repeat{
|
||||
rent <- try(get_rent(url),silent = TRUE)
|
||||
|
||||
if (class(rent) != "try-error") {
|
||||
break
|
||||
}
|
||||
if(grepl(".*404.*",rent[1])){
|
||||
print("404")
|
||||
break
|
||||
}
|
||||
if(grepl(".*403.*",rent[1])){
|
||||
print(paste("next try:",paste(Sys.time()+60)," ",rent[1]))
|
||||
system("nmcli c show --active | grep CORVINUS && nmcli c down CORVINUS || nmcli c up CORVINUS")
|
||||
Sys.sleep(10)
|
||||
}
|
||||
Sys.sleep(10)
|
||||
}
|
||||
if(is.data.frame(rent)){
|
||||
data <- rowbind(data,rent)
|
||||
} else {
|
||||
na_urls <- c(na_urls, url)
|
||||
}
|
||||
}
|
||||
close(pb)
|
||||
}
|
||||
|
||||
data <- data[-1,]
|
||||
data <- data %>% distinct()
|
||||
dt <- data.table::as.data.table(data)
|
||||
dt
|
||||
write.csv(dt, file = "data.csv")
|
||||
|
||||
nrow(data)
|
||||
unique(na_urls)
|
||||
str(data)
|
21
scratchpad.R
Normal file
21
scratchpad.R
Normal file
@ -0,0 +1,21 @@
|
||||
html <- read_html(url)
|
||||
alapok <- html %>% html_elements(".listing-property") %>%
|
||||
html_elements("span") %>% html_text2() %>% unique()
|
||||
alapok <- alapok[!grepl(".*€.*",alapok)]
|
||||
alapok
|
||||
ar <- alapok[grepl(".*(ft)|€.*",alapok)][1]
|
||||
ar
|
||||
|
||||
get_coords(df$cim[1])
|
||||
|
||||
url<-URLencode(paste0("http://localhost:8080/search.php?q=",df$cim[1]))
|
||||
data <- head(read_json(url, simplifyVector = T),n=1)
|
||||
if(length(data) == 0){
|
||||
NA
|
||||
} else {
|
||||
c(data$lat,data$lon)
|
||||
}
|
||||
|
||||
dump("df","adat.R")
|
||||
dfbak <- df
|
||||
source("adat.R")
|
2
spacialregr.R
Normal file
2
spacialregr.R
Normal file
@ -0,0 +1,2 @@
|
||||
library(sp)
|
||||
sdf <- SpatialPointsDataFrame(df3[,c("lon","lat")],df3)
|
34
tavolsag.R
Normal file
34
tavolsag.R
Normal file
@ -0,0 +1,34 @@
|
||||
library(osmdata)
|
||||
library(sf)
|
||||
library(sfheaders)
|
||||
library(geosphere)
|
||||
|
||||
getmindist<-function(x1,y1,x2,y2){
|
||||
tav <- distm(data.frame(x1,y1),data.frame(x2,y2))
|
||||
apply(tav,1,min, simplify = T)
|
||||
}
|
||||
|
||||
metro <- getbb("Budapest") %>%
|
||||
opq() %>%
|
||||
add_osm_feature("station","subway") %>%
|
||||
osmdata_sf()
|
||||
|
||||
metro <- data.frame(metro$osm_points)
|
||||
|
||||
metro$lat <- sapply(metro$geometry, unlist)[2,]
|
||||
metro$lon <- sapply(metro$geometry, unlist)[1,]
|
||||
|
||||
vonat <- getbb("Budapest") %>%
|
||||
opq()%>%
|
||||
add_osm_feature(c("railway","train"),c("station","yes"))%>%
|
||||
osmdata_sf()
|
||||
|
||||
vonat <- vonat$osm_points
|
||||
|
||||
vonat$lat <- sapply(vonat$geometry,unlist)[2,]
|
||||
vonat$lon <- sapply(vonat$geometry,unlist)[1,]
|
||||
|
||||
df$metrotav <- getmindist(df$lon,df$lat,metro$lon,metro$lat)
|
||||
df$vonattav <- getmindist(df$lon,df$lat,vonat$lon,vonat$lat)
|
||||
|
||||
df$tomkoz <- apply(df[,c("vonattav","metrotav")],1,min, simplify = T)
|
16
tdk.Rproj
Normal file
16
tdk.Rproj
Normal file
@ -0,0 +1,16 @@
|
||||
Version: 1.0
|
||||
|
||||
RestoreWorkspace: Default
|
||||
SaveWorkspace: Default
|
||||
AlwaysSaveHistory: Default
|
||||
|
||||
EnableCodeIndexing: Yes
|
||||
UseSpacesForTab: No
|
||||
NumSpacesForTab: 2
|
||||
Encoding: UTF-8
|
||||
|
||||
RnwWeave: Sweave
|
||||
LaTeX: pdfLaTeX
|
||||
|
||||
AutoAppendNewline: Yes
|
||||
StripTrailingWhitespace: Yes
|
25
terkepek.R
Normal file
25
terkepek.R
Normal file
@ -0,0 +1,25 @@
|
||||
bins <- c(0, quantile(df$vonattav,probs=c(0.02,0.05,0.1,0.25,0.35,0.5,0.75,0.9)), Inf) # Kvintilisek
|
||||
pal <- colorBin("YlOrRd", domain = df$vonattav, bins = bins)
|
||||
|
||||
leaflet(df) |>
|
||||
addTiles() |>
|
||||
addCircles(~lon,~lat,popup = paste0(df$cim,"<br>",round(df$metrotav,2)," m"),radius = 80,fillColor = ~pal(vonattav),fillOpacity = 0.8,stroke = F) |>
|
||||
addLegend(pal = pal, values = df$vonattav, title = "Távolság a vonattól",
|
||||
position = "bottomright") |>
|
||||
addMarkers(vonat$lon, vonat$lat,popup = vonat$name)
|
||||
|
||||
leaflet(df) |>
|
||||
addTiles() |>
|
||||
addCircles(~lon,~lat,popup = paste0(df$cim,"<br>",round(df$metrotav,2)," m"),radius = 80,fillColor = ~pal(metrotav),fillOpacity = 0.8,stroke = F) |>
|
||||
addLegend(pal = pal, values = df$metrotav, title = "Távolság a metrótól",
|
||||
position = "bottomright") |>
|
||||
addMarkers(metro$lon, metro$lat,popup = metro$name)
|
||||
|
||||
leaflet(df) |>
|
||||
addTiles() |>
|
||||
addCircles(~lon,~lat,popup = paste0(df$cim,"<br>",round(df$tomkoz,2)," m"),radius = 100,fillColor = ~pal(tomkoz),fillOpacity = 0.8,stroke = F) |>
|
||||
addLegend(pal = pal, values = df$tomkoz, title = "Távolság a legközelebbi <br> metrótól vagy vonattól",
|
||||
position = "bottomright") |>
|
||||
addMarkers(metro$lon, metro$lat,popup = metro$name) |>
|
||||
addMarkers(vonat$lon, vonat$lat,popup = vonat$name)
|
||||
|
20
tisztitas.R
Normal file
20
tisztitas.R
Normal file
@ -0,0 +1,20 @@
|
||||
source("funcs.R")
|
||||
|
||||
df$ater <-as.numeric(str_split(df$Alapterület, " ",n=2,simplify = T)[,1])
|
||||
df$ar <- sapply(df$Árhavonta, arconv)
|
||||
df$ker <- as.factor(as.numeric(as.roman(str_remove(str_split(df$cim, " ", simplify = T)[,1],"\\."))))
|
||||
|
||||
# ez hosszú !!
|
||||
df$point <- sapply(df$cim, get_coords)
|
||||
df <- df[!is.na(df$point),]
|
||||
df$lat <- as.double(unlist(map(df$point,1)))
|
||||
df$lon <- as.double(unlist(map(df[!is.na(df$point),]$point,2)))
|
||||
|
||||
|
||||
df$szoba <- ifelse(grepl("fél",df$Szobák),as.numeric(unlist(map(str_split(df$Szobák," "),1))),as.numeric(df$Szobák))
|
||||
df$felszoba <- ifelse(grepl("fél",df$Szobák),as.numeric(unlist(map(str_split(df$Szobák," "),3))),0)
|
||||
# unlist(map(str_split(df$Szobák," "),3))
|
||||
# df[felszoba==3,]$Szobák
|
||||
# summary(grepl("fél",df$Szobák))
|
||||
|
||||
write.csv(df3,file = "adat_pozicioval.csv")
|
4062
tomkoz.html
Normal file
4062
tomkoz.html
Normal file
File diff suppressed because one or more lines are too long
4062
vonat.html
Normal file
4062
vonat.html
Normal file
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user