基于Hadoop和Hive的聊天数据(FineBI)可视化分析 | 您所在的位置:网站首页 › hive数据分析项目PPT › 基于Hadoop和Hive的聊天数据(FineBI)可视化分析 |
目录 1. 准备工作 2. 新建数据库连接 3. 在Hive数据库中创建存放数据的表 4. ETL数据清洗 5. 指标 6. 进入Fine BI数据中心 参考内容https://www.bilibili.com/read/cv15490959/ 数据文件、jar包、插件 https://pan.baidu.com/s/1Mpquo0EgkyZtLHrCPIK2Qg?pwd=7w0k 1. 准备工作在FineBI6.0\webapps\webroot\WEB-INF\lib下放置jar包 启动FineBI服务器 安装hive隔离插件 选择该文件 重启服务器 2. 新建数据库连接在虚拟机后台启动metastore和hiveserver2服务(在hive目录下) 进入beeline客户端 --hive2://后可以是主机名-- !connect jdbc:hive2://192.168.224.112:10000回车然后输入用户名,我的是root,再回车 密码根据自己的填(我没有),回车 如果不成功,就先配置虚拟机中/hadoop父文件夹/hadoop/etc/hadoop/core-site.xml文件 和/hive父文件夹/hive/conf/hive-site.xml文件 然后重启sh,后台挂起metastore,hiveserver2,启动beeline。 在Fine BI上新建hive数据库连接 数据库名称为自己在hive中创建的数据库,主机为虚拟机IP,端口10000,用户名root 创建dgy_30w表(myhive为我自己的数据库),操作在hive和beeline中都可以 create table myhive.dgy_30w ( msg_time string comment "消息发送时间", sender_name string comment "发送人昵称", sender_account string comment "发送人账号", sender_sex string comment "发送人性别", sender_ip string comment "发送人ip地址", sender_os string comment "发送人操作系统", sender_phonetype string comment "发送人手机型号", sender_network string comment "发送人网络类型", sender_gps string comment "发送人的GPS定位", receiver_name string comment "接收人呢称", receiver_ip string comment "接收人IP", receiver_account string comment "接收人账号", receiver_os string comment "接收人操作系统", receiver_phonetype string comment"接收人手机型号", receiver_network string comment "接收人网络类型", receiver_gps string comment"接收人的GPS定位", receiver_sex string comment"接收人性别", msg_type string comment"消息类型", distance string comment"双方距离", message string comment"消息内容" );上传数据 方法一: 通过Xshell的Xftp把csv文件上传到虚拟机opt目录下 把csv文件数据上传到dgy_30w表中 LOAD DATA LOCAL INPATH '/opt/chat_data-30W.csv' OVERWRITE INTO TABLE dgy_30w;方法二: HDFS数据加载 将csv文件上传到hdfs /data下 hdfs dfs -put /opt/chat_data-30W.csv /data在终端beeline中输入load data inpath '/data/chat_data-30W.csv' into table dgy_30w; LOAD DATA INPATH '/data/chat_data-30W.csv' OVERWRITE INTO TABLE dgy_30w;导入成功。 4. ETL数据清洗建立dgy_30w_etl表 create table myhive.dgy_30w_etl ( msg_time string comment "消息发送时间", sender_name string comment "发送人昵称", sender_account string comment "发送人账号", sender_sex string comment "发送人性别", sender_ip string comment "发送人ip地址", sender_os string comment "发送人操作系统", sender_phonetype string comment "发送人手机型号", sender_network string comment "发送人网络类型", sender_gps string comment "发送人的GPS定位", receiver_name string comment "接收人呢称", receiver_ip string comment "接收人IP", receiver_account string comment "接收人账号", receiver_os string comment "接收人操作系统", receiver_phonetype string comment"接收人手机型号", receiver_network string comment "接收人网络类型", receiver_gps string comment"接收人的GPS定位", receiver_sex string comment"接收人性别", msg_type string comment"消息类型", distance string comment"双方距离", message string comment"消息内容", msg_day string comment"消息日期(日)", msg_hour string comment"消息时间(小时)", sender_lng double comment"经度", sender_lat double comment"纬度" );开始清洗 INSERT OVERWRITE TABLE myhive.dgy_30w_etl SELECT *, to_date(msg_time) As msg_day, HOUR(msg_time) As msg_hour, SPLIT(sender_gps,',')[0] As sender_lng, SPLIT(sender_gps,',')[1] As sender_lat FROM myhive. dgy_30w WHERE LENGTH(sender_gps)>0;运行成功,查询 统计今日消息总量 CREATE TABLE IF NOT EXISTS myhive.tb_rs_total_msg_cnt COMMENT"每日消息总量" AS SELECT msg_day,COUNT(*) AS total_msg_cnt FROM myhive.dgy_30w_etl GROUP BY msg_day;统计每小时消息量、发送和接收用户数 CREATE TABLE IF NOT EXISTS myhive.tb_rs_hour_msg_cnt COMMENT"每小时消息量趋势" AS SELECT msg_hour, COUNT(*)AS total_msg_cnt, COUNT(DISTINCT sender_account)AS sender_user_cnt, COUNT(DISTINCT receiver_account)AS receiver_user_cnt FROM myhive.dgy_30w_etl GROUP BY msg_hour;统计今日各地区发送消息总量 CREATE TABLE IF NOT EXISTS myhive.tb_rs_loc_cnt COMMENT"今日各地区发送消息总量"AS SELECT msg_day,sender_lng,sender_lat,sender_gps, COUNT(*)AS total_msg_cnt FROM myhive.dgy_30w_etl GROUP BY msg_day,sender_lng,sender_lat,sender_gps;统计今日发送和接收用户人数 CREATE TABLE IF NOT EXISTS myhive.tb_rs_user_cnt COMMENT"今日发送消息人数、接收消息人数"AS SELECT msg_day, COUNT(DISTINCT sender_account)AS sender_user_cnt, COUNT(DISTINCT receiver_account)AS receiver_user_cnt FROM myhive.dgy_30w_etl GROUP BY msg_day;统计发送消息条数最多的Top10用户 CREATE TABLE IF NOT EXISTS myhive.tb_rs_s_user_top10 COMMENT"发送消息条数最多的Top10用户"AS SELECT sender_name AS username, COUNT(*)AS sender_msg_cnt FROM myhive.dgy_30w_etl GROUP BY sender_name ORDER BY sender_msg_cnt DESC LIMIT 10;统计接收消息条数最多的Top10用户 CREATE TABLE IF NOT EXISTS myhive.tb_rs_r_user_top10 COMMENT"接收消息条数最多的Top10用户" AS SELECT receiver_name AS username, COUNT(*)AS receiver_msg_cnt FROM myhive.dgy_30w_etl GROUP BY receiver_name ORDER BY receiver_msg_cnt DESC LIMIT 10;统计发送人的手机型号分布情况 CREATE TABLE IF NOT EXISTS myhive.tb_rs_sender_phone COMMENT"发送人的手机型号分布"AS SELECT sender_phonetype, COUNT(sender_account)AS cnt FROM myhive.dgy_30w_etl GROUP BY sender_phonetype;统计发送人的手机操作系统分布 CREATE TABLE IF NOT EXISTS myhive.tb_rs_sender_os COMMENT"发送人的手机操作系统分布"AS SELECT sender_os, COUNT(sender_account)AS cnt FROM myhive.dgy_30w_etl GROUP BY sender_os;进入myhive数据库,查看创建的十个表 use myhive; show tables;![]() 启动服务器 进入FineBI 新建数据集,把数据库表导入FinBI中 更新数据 新建分析主题 选择数据表 底栏选择组件,对相应表选择合适的图表,添加仪表板 在组件中给每个表选择合适的图例,适当调整样式 最终展示 |
CopyRight 2018-2019 实验室设备网 版权所有 |