elasticsearch-hive外部表插入数据到elasticsearch
1.创建hive源表,并插入数据
1.1创建hive表
drop table if exists a_usr_overview_d;
CREATE TABLE
IF NOT EXISTS a_usr_overview_d
(
day_id string COMMENT 'Daily account period(for example:yyyymmdd)',
online_usr_cnt string COMMENT 'Number of users on the network',
offline_usr_cnt string COMMENT 'Number of users off the network',
new_usr_cnt string COMMENT 'New users'
)
comment 'es测试表'
partitioned by (p_day_id string comment 'Daily zoning')
row format delimited fields terminated by '\t' null defined as 'null' stored as textfile;
1.2往hive表插入数据
insert into table a_usr_overview_d partition(p_day_id='20200202')
select 20200201, 320, 2, 15
union all
select 20200202, 340, 3, 20
union all
select 20200203, 360, 6, 20;
2.elasticsearch-hive外部表创建
2.1修改es的端口,索引
drop table if exists a_usr_overview_d_es;
create external table a_usr_overview_d_es (
day_id string comment 'Daily account period(for example:yyyymmdd)'
,online_usr_cnt string comment 'Number of users on the network'
,offline_usr_cnt string comment 'Number of users off the network'
,new_usr_cnt string comment 'New users'
)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES(
'es.resource' = 'a_usr_overview_d_20200202/a_usr_overview_d_20200202',
'es.nodes'='172.21.72.166',
'es.port'='9200',
'es.nodes.wan.only' = 'true',
'es.index.auto.create' = 'true',
'es.net.http.auth.pass'='',
'es.net.http.auth.user'='',
'es.index.refresh_interval' = '-1',
'es.index.number_of_replicas' = '0',
'es.batch.write.retry.count' = '6',
'es.batch.write.retry.wait' = '60s');
若报错如下,则说明hive中缺少elasticsearch-hadoop-6.8.2.jar对应的jar依赖,即可:
FAILED: SemanticException Cannot find class 'org.elasticsearch.hadoop.hive.EsStorageHandler'
hive> add jar /home/bdp/shangeshishi/bigdata/smartEs/elasticsearch-hadoop-6.8.2.jar;
Added [/home/bdp/shangeshishi/bigdata/smartEs/elasticsearch-hadoop-6.8.2.jar] to class path
Added resources: [/home/bdp/shangeshishi/bigdata/smartEs/elasticsearch-hadoop-6.8.2.jar]
hive> list jars;
/home/bdp/shangeshishi/bigdata/smartEs/elasticsearch-hadoop-6.8.2.jar
2.2插入数据到es
hive> insert overwrite table a_usr_overview_d_es select day_id,online_usr_cnt,offline_usr_cnt,new_usr_cnt from a_usr_overview_d;