> > < p style = “max-width: 100%; clear: both; min-height: 1em

Analysis on the principle of video barrage crawling of Bingbing station B

The data analysis

import pandas as pd 

data = pd.read_excel(r"bingbing.xlsx")data.head() 
Copy the code

The user

gender

level

comments

give a like

0

Food being original

male

6

[stay][Stay][Stay] There you are hey!

158457

1

Never put off till tomorrow what you can guide THU

male

6

I am one of the only 3 followers of Bingbing

148439

2

Hello, teacher. My name is He

male

6

[add knowledge]

89634

3

Cctv.com look

A secret

6

Ice bing is coming! Are we gonna lose our jobs? [doge][doge]

118370

4

Xiamen university,

A secret

5

Welcome bingbing!!

66196

The original link

Data preprocessing

Data description

data.describe() 
Copy the code

level

give a like

count

1180.000000

1180.000000

mean

4.481356

2200.617797

std

1.041379

10872.524850

min

2.000000

1.000000

25%

4.000000

4.000000

50%

5.000000

9.000000

75%

5.000000

203.750000

max

6.000000

158457.000000

To delete a null value

data.dropna() 
Copy the code

The user

gender

level

comments

give a like

0

Food being original

male

6

[stay][Stay][Stay] There you are hey!

158457

1

Never put off till tomorrow what you can guide THU

male

6

I am one of the only 3 followers of Bingbing

148439

2

Hello, teacher. My name is He

male

6

[add knowledge]

89634

3

Cctv.com look

A secret

6

Ice bing is coming! Are we gonna lose our jobs? [doge][doge]

118370

4

Xiamen university,

A secret

5

Welcome bingbing!!

66196

.

.

.

.

.

.

1175

Black swordfish

A secret

5

A million dollars in 11 hours. How fast.

5

1176

It’s your pleasure

male

6

Bingbing fans rise rate: 1.073,000 in 11 hours, 97,500 in average per hour, 1,625 per minute, per second…

5

1177

Fast boy Treaster

male

4

I went to the bathroom during military training and forgot which team it was. After watching the video of UP, I realized that it was triple [doge]

5

1178

Very serious big bear

male

5

I think bingbing hosting the Spring Festival Gala should not be a problem. [OK]

5

1179

Fly slippers and roar

A secret

5

How a Tier 2 player became a Top 100 player on the last day of 2020

5

1180 rows × 5 columns

To delete a null value

data.drop_duplicates() 
Copy the code

The user

gender

level

comments

give a like

0

Food being original

male

6

[stay][Stay][Stay] There you are hey!

158457

1

Never put off till tomorrow what you can guide THU

male

6

I am one of the only 3 followers of Bingbing

148439

2

Hello, teacher. My name is He

male

6

[add knowledge]

89634

3

Cctv.com look

A secret

6

Ice bing is coming! Are we gonna lose our jobs? [doge][doge]

118370

4

Xiamen university,

A secret

5

Welcome bingbing!!

66196

.

.

.

.

.

.

1175

Black swordfish

A secret

5

A million dollars in 11 hours. How fast.

5

1176

It’s your pleasure

male

6

Bingbing fans rise rate: 1.073,000 in 11 hours, 97,500 in average per hour, 1,625 per minute, per second…

5

1177

Fast boy Treaster

male

4

I went to the bathroom during military training and forgot which team it was. After watching the video of UP, I realized that it was triple [doge]

5

1178

Very serious big bear

male

5

I think bingbing hosting the Spring Festival Gala should not be a problem. [OK]

5

1179

Fly slippers and roar

A secret

5

How a Tier 2 player became a Top 100 player on the last day of 2020

5

1179 rows × 5 columns

visualization

Thumb up TOP20

Df1 = data.sort_values(by=" like ", Ascending =False). Head (20) from Pyecharts import options as optsfrom Pyecharts.charts Import Barfrom pyecharts.faker import faker c1 = (Bar().add_xaxis(df1[" comment "].to_list()).add_yaxis(" likes ", Df1 [" like "].to_list(), color= faker.rand_color ()).set_global_opts(title_opts= opts.titleopts (title=" comment hot Top20"), datazoom_opts=[opts.DataZoomOpts(), opts.DataZoomOpts(type_="inside")], ) .render_notebook())c1Copy the code

Grade distribution

Value_counts ().sort_index(Ascending =False) 6 1655 5024 3123 1382 63Name: level, dtype: int64 from pyecharts import options as optsfrom pyecharts.charts import Piefrom pyecharts.faker import Faker c2 = ( Pie (), add (" ", [the list (z) for z in zip ([STR (I) for I in range (2, 7)], [63138312502165])], the radius = (" 40% ", "75%"), Title_opts = opts.titleopts (title_opts= opts.titleopts (title=" distribution "), legend_opts= opts.legendopts (Orient ="vertical", pos_top="15%", pos_left="2%"), ) .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}")) .render_notebook())c2Copy the code

Gender distribution

Value_counts ().sort_index(Ascending =False) from Pyecharts import options as optsfrom Pyecharts.charts import Piefrom pyecharts. Faker import faker c4 = (Pie().add("", [list(z) for z in zip([" male "," male "," secret "], [" 404 ", '103', '673']], the radius = "40%", "75%"],). Set_global_opts (title_opts = opts. TitleOpts (title = "gender distribution"), legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%"), ) .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}")) .render_notebook() )c4Copy the code

Draw word cloud map

from wordcloud import WordCloudimport jiebafrom tkinter import _flattenfrom matplotlib.pyplot import imreadfrom PIL import Image, ImageDraw, ImageFontimport matplotlib.pyplot as pltwith open('stoplist.txt', 'r', encoding='utf-8') as f: StopWords = f.read()with open('.txt','r',encoding=' utF-8 ') as t: stopWord = t.read()total = stopWord.split() + stopWords.split()def my_word_cloud(data=None, stopWords=None, img=None): DataCut = data.apply(jieba.lcut) # 分词 dataAfter = datacut. apply(lambda x: WordFre = pd.series (_flatten(list(dataAfter))).value_counts() # = plt.imread(img) plt.figure(figsize=(20,20)) wc = WordCloud(scale=10,font_path='C:/Windows/Fonts/STXINGKA.TTF',mask=mask,background_color="white",) wc.fit_words(wordFre) Plt.imshow (wc) plt.axis('off')my_word_cloud(data=data[" comments "],stopWords=stopWords,img="1.jpeg")Copy the code

It’s so cold, I’m all ice – ice

Well, that’s all for today’s sharing. If you’re interested in Python, join us.Python Learning communicationSkirt 】, receive free learning materials and source code.