Rental Information Analysis

import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns file_data = CSV ") file_data file_data.shape file_data.head() file_data.info() file_data.describe() # Duplicated () FILe_data = FILe_data. Drop_duplicates () file_data. Shape # Duplicated () File_data = file_data.dropna() file_data.shapeCopy the code

File_data = file_data.dropna()

Values [0][:-2] # create an empty array data_new = np.array([]) data_area = file_data[" area (㎡)"]. Values for I  in data_area: data_new = np.append(data_new, Array (I [:-2])) # Convert data_new = data_new.astype(Np.float64) file_data.loc[:, Data_new house_data = file_data[" house_size "] temp_list = [] for I in house_data: # print(I) new_info = i.place (" room ", "room ") temp_list.appEnd (new_info) file_data.loc[:," house "] = temp_listCopy the code
! [](https://p9-tt-ipv6.byteimg.com/origin/pgc-image/07487e0bb48c4ad296910f15d4d862cf)

Analysis of the distribution of housing quantity and location

File_data [r]. "area" () new_df = to chassis pd. DataFrame ({" area ": file_data [r]." regional "unique (), Area_count = file_data.groupby(by=" region ").count() new_df[" quantity "] = area_count.values New_df. sort_values(by=" amount ", Ascending =False)Copy the code

House type quantitative analysis

def all_house(arr):
    key = np.unique(arr)

    result = {}

    for k in key:
        mask = (arr == k)
        arr_new = arr[mask]
        v = arr_new.size
        result[k] = v

    return result 
house_info = all_house(house_data)
Copy the code

Remove values with fewer statistics

House_data = dict((key, value) for key, Value in house_info.items() if value > 50) show_houses = pd.dataframe ({" house_type ": [x for x in house_data.keys()], "数 ": [x for x in house_data.values()]})Copy the code
! [](https://p1.pstatp.com/origin/pgc-image/d5431ca4e77d499c9cadd1e5afb4b04e)

A graphical representation

House_type = show_houses[" housetype "] house_type_num = show_houses[" quantity "] plt.barh(range(11), house_type_num) plt.yticks(range(11), house_type) plt.xlim(0, Xlabel (" quantity ") plt.ylabel(" housing type ") # add a specific number to each bar for x, Y in enumerate(house_type_num): # print(x, y) plt.text(y+0.5, x-0.2, "%s" %y) plt.show()Copy the code

For x, y in enumerate(house_type_num): print(x, y

! [](https://p9-tt-ipv6.byteimg.com/origin/pgc-image/b7f18a1d95414def9d8092ebd0c2e3d0)
! [](https://p1.pstatp.com/origin/pgc-image/61ad9e5ac5fb41b8a396a89577af1e54)

Average rent analysis

Df_all = pd. DataFrame ({" area ": file_data [r]." regional "unique ()," the rent amount ": [0] * 13," total ": [0] * 13}) sum_price = file_data [" price (RMB/month) "]. Groupby (file_data [" regional "]), the sum () sum_area = Sum () df_all[" total rent "] = sum_price. Values df_all[" total rent "] = sum_area.valuesCopy the code

Calculate the rent per square meter for each area

Df_all [" total rent "] = round(df_all[" total rent "] / df_all[" total area "], 2)Copy the code
! [](https://p1.pstatp.com/origin/pgc-image/094c7b79493a4e6cbf863c86ca1c992e)

merge

df_merge = pd.merge(new_df, df_all)
Copy the code
! [](https://p1.pstatp.com/origin/pgc-image/48e49e219a024f11ae79754c82a1c211)

Graphic visualization

Num = df_merge price = df_merge price = df_merge price = df_merge price = df_merge price = df_merge price = df_merge price = df_merge price = df_merge price = df_merge price = df_merge price = df_merge price = df_merge price = df_merge price = df_merge price = df_merge price = df_merge[" area "] l = [I for I in range(13)] FIG = Figure (figsize=(10, 8), dpi=100) ax1 = FIG. Add_subplot (111) ax1.plot(l, price, "or-", label=" price ") (_x, _y) in enumerate(zip(l, price)): PLT. Text (_x + 0.2 _y, price [I]) ax1. Set_ylim ([0, Ax2 = ax1.twinx() plt.bar(l, num, label=" number ", Alpha =0.2, color="green") ax2.set_ylabel("数 ") plt.legend(loc="upper left") plt.xticks(l, lx) plt.show()Copy the code

Create an X and y share

Add scale, string one – to – one correspondence

! [](https://p6-tt-ipv6.byteimg.com/origin/pgc-image/5b5daf3eca53431f9975238b28e3d8f6)

Basic area analysis

# to check the maximum and minimum area of the building print (' housing is the most widespread % d square meters' % (file_data [' area (㎡)] Max ())) print (' minimum area of the building is % d square meters' % (file_data [' area (㎡)]. Min ())) # See the rent to the highest level and minimum print (' rent highest price is RMB monthly % d % (file_data [' price (RMB/month)]. The Max ())) print (' housing lowest price RMB monthly % d '% (file_data [' price (RMB/month)]. The min ()))Copy the code

Area is divided

Area_divide = [1, 30, 50, 70, 90, 120, 140, 160, 1200] area_cut = pd. Cut (list(file_data[" square meters "]), area_divide) area_cut_num = area_cut.describe()Copy the code
! [](https://p1.pstatp.com/origin/pgc-image/a3a80883d862498882460362c5de1b92)

The pie chart shows

Area_per = (area_cut_num["freqs"]. Values)*100 labels = [' less than 30 sq.m ', '30-50 sq.m ', '50-70 sq.m ', '70-90 sq.m ', Figure (figsize=(20, 8), figsize=(20, 8) dpi=100) # plt.axes(aspect=1) plt.pie(x=area_per, labels=labels, autopct="%.2f %%") plt.legend() plt.show()Copy the code

Plt.axes (aspect=1) set ellipse

! [](https://p1.pstatp.com/origin/pgc-image/ed35506698da4920abc44b196b2fbdbf)

PS: If you need Python learning materials, please click on the link below to obtain them

Free Python learning materials and group communication solutions click to join