能绕赤道(4万公里)5圈的车是什么样的?Python爬取懂车帝网站数据,并做数据可视化展示二手车概况

2021年9月27日 24点热度 0条评论 来源: 松鼠爱出饼干

知识点:

  • requests 发送网络请求
  • parsel 解析数据
  • csv 保存数据

第三方库

  • requests >>> pip install requests
  • parsel >>> pip install parsel

开发环境:

  • 版 本:anaconda5.2.0(python3.6.5)
  • 编辑器:pycharm

 

【付费VIP完整版】只要看了就能学会的教程,80集Python基础入门视频教学

爬虫代码

导入模块

import requests
import parsel
import csv

 

发送请求

url = f'https://www.dongchedi.com/usedcar/x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x?sh_city_name=%E5%85%A8%E5%9B%BD&page=1'
html_data = requests.get(url).text

 

解析数据 筛选数据

selector = parsel.Selector(html_data)
lis = selector.css('#__next > div:nth-child(2) > div.new-main.new > div > div > div.wrap > ul > li')
for li in lis:
    title = li.css('dl dt p::text').get()
    info_list = li.css('dl dd')
    info = ''.join(info_list.css('dd:nth-child(2)::text').getall()).split('|')
    car_age = info[0]
    mileage = info[1].replace('万公里', '')
    city = info[2]

 

做一个判断

if len(info_list) == 4:
    dcd_auth = info_list.css('dd span::text').get()
    price = info_list.css('dd:nth-child(4)::text').get()
    original_price = info_list.css('dd:nth-child(5)::text').get()
else:
    dcd_auth = '无认证'
    price = info_list.css('dd:nth-child(3)::text').get()
    original_price = info_list.css('dd:nth-child(4)::text').get()
price = price.replace('', '')
original_price = original_price.replace('新车含税价: ', '').replace('', '')
print(title, car_age, mileage, city, dcd_auth, price, original_price)

 

保存数据

csv_dcd = open('dcd.csv', mode='a', encoding='utf-8', newline='')
csv_write = csv.writer(csv_dcd)
csv_write.writerow(['品牌', '车龄', '里程(万公里)', '城市', '认证', '售价(万元)', '原价(万元)'])

 

数据可视化

导入模块

import pandas as pd
from pyecharts.charts import *
from pyecharts.commons.utils import JsCode
from pyecharts import options as opts

 

读取数据

df = pd.read_csv('dcd.csv', encoding = 'utf-8')
df.head()

 

各省市二手车数量柱状图

bar=(
    Bar(init_opts=opts.InitOpts(height='500px',width='1000px',theme='dark'))
    .add_xaxis(counts.index.tolist())
    .add_yaxis(
        '城市二手车数量',
        counts.values.tolist(),
        label_opts=opts.LabelOpts(is_show=True,position='top'),
        itemstyle_opts=opts.ItemStyleOpts(
            color=JsCode("""new echarts.graphic.LinearGradient(
            0, 0, 0, 1,[{offset: 0,color: 'rgb(255,99,71)'}, {offset: 1,color: 'rgb(32,178,170)'}])
            """
            )
        )
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title='各个城市二手车数量柱状图'),
            xaxis_opts=opts.AxisOpts(name='书籍名称',
            type_='category',                                           
            axislabel_opts=opts.LabelOpts(rotate=90),
        ),
        yaxis_opts=opts.AxisOpts(
            name='数量',
            min_=0,
            max_=1400.0,
            splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(type_='dash'))
        ),
        tooltip_opts=opts.TooltipOpts(trigger='axis',axis_pointer_type='cross')
    )

    .set_series_opts(
        markline_opts=opts.MarkLineOpts(
            data=[
                opts.MarkLineItem(type_='average',name='均值'),
                opts.MarkLineItem(type_='max',name='最大值'),
                opts.MarkLineItem(type_='min',name='最小值'),
            ]
        )
    )
)
bar.render_notebook()

 

各省市二手车平均价格柱状图

bar=(
    Bar(init_opts=opts.InitOpts(height='500px',width='1000px',theme='dark'))
    .add_xaxis(means.index.tolist())
    .add_yaxis(
        '城市二手车平均价格',
        means.values.tolist(),
        label_opts=opts.LabelOpts(is_show=True,position='top'),
        itemstyle_opts=opts.ItemStyleOpts(
            color=JsCode("""new echarts.graphic.LinearGradient(
            0, 0, 0, 1,[{offset: 0,color: 'rgb(255,99,71)'}, {offset: 1,color: 'rgb(32,178,170)'}])
            """
            )
        )
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title='各个城市二手车平均价格柱状图'),
            xaxis_opts=opts.AxisOpts(name='城市名称',
            type_='category',                                           
            axislabel_opts=opts.LabelOpts(rotate=90),
        ),
        yaxis_opts=opts.AxisOpts(
            name='平均价格',
            min_=0,
            max_=40.0,
            splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(type_='dash'))
        ),
        tooltip_opts=opts.TooltipOpts(trigger='axis',axis_pointer_type='cross')
    )

    .set_series_opts(
        markline_opts=opts.MarkLineOpts(
            data=[
                opts.MarkLineItem(type_='average',name='均值'),
                opts.MarkLineItem(type_='max',name='最大值'),
                opts.MarkLineItem(type_='min',name='最小值'),
            ]
        )
    )
)
bar.render_notebook()

 

二手车品牌占比情况

pie1 = (
    Pie(init_opts=opts.InitOpts(theme='dark',width='1000px',height='600px'))
    .add('', datas_pair_1, radius=['35%', '60%'])
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%"))
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title="懂车帝二手车\n\n数量占比区间", 
            pos_left='center', 
            pos_top='center',
            title_textstyle_opts=opts.TextStyleOpts(
                color='#F0F8FF',
                font_size=20,
                font_weight='bold'
            ),
        )
    )
)
pie1.render_notebook() 

 

二手车里程区间

pie1 = (
    Pie(init_opts=opts.InitOpts(theme='dark',width='1000px',height='600px'))
    .add('', datas_pair_1, radius=['35%', '60%'])
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%"))
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title="懂车帝二手车\n\n里程占比区间", 
            pos_left='center', 
            pos_top='center',
            title_textstyle_opts=opts.TextStyleOpts(
                color='#F0F8FF',
                font_size=20,
                font_weight='bold'
            ),
        )
    )
)
pie1.render_notebook() 

 

对于本篇文章有疑问,或者想要数据集的同学也可以点这里

    原文作者:松鼠爱出饼干
    原文地址: https://www.cnblogs.com/qshhl/p/15334565.html
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系管理员进行删除。