오늘 해볼 거는
뉴진스의 hive boy
hive를 설치하여 hdfs에 연동을 해보겠다.
mkdir /hive_home
cd /hive_home
wget https://dlcdn.apache.org/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz
tar xvzf apache-hive-2.3.9-bin.tar.gz
rm apache-hive-2.3.9-bin.tar.gz
cd /hive_home
wget https://dlcdn.apache.org/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz
tar xvzf apache-hive-2.3.9-bin.tar.gz
rm apache-hive-2.3.9-bin.tar.gz
cd $HIVE_HOME/conf
cp hive-env.sh.template hive-env.sh
vi hive-env.sh
HADOOP_HOME=/hadoop_home/hadoop-2.7.7
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:postgresql://localhost/hive</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.postgresql.Driver</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>hdfs://master:9000/user/hive/warehouse</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>postgres</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>mypassword</value>
</property>
</configuration>
<property>
<name>hive.server2.enable.doAs</name>
<!-- value>true</value -->
<value>false</value>
<description>
Setting this property to true will have HiveServer2 execute
Hive operations as the user making the calls to it.
</description>
</property>
<property>
<name>hive.support.concurrency</name>
<value>true</value>
</property>
<property>
<name>hive.enforce.bucketing</name>
<value>true</value>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>nonstrict</value>
</property>
<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
</property>
<property>
<name>hive.compactor.initiator.on</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.worker.threads</name>
<value>1</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>master,slave1,slave2,slave3</value>
</property>
mkdir /postgresql
cd /postgresql
wget https://jdbc.postgresql.org/download/postgresql-42.5.1.jar
chmod 775 postgresql-42.5.1.jar
cp postgresql-42.5.1.jar $HIVE_HOME/lib
/usr/sbin/sshd
start-all.sh
hadoop fs -mkdir -p /tmp
hadoop fs -mkdir -p /user/hive/warehouse
hadoop fs -chmod g+w /tmp
hadoop fs -chmod g+w /user/hive/warehouse
/hive_home/apache-hive-2.3.9-bin/bin/schematool -dbType postgres -initSchema
hive
create database test;
create table test.tab1(
col1 integer,
col2 string
)
CLUSTERED BY (col1) into 3 buckets
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED as orc tblproperties(
'transactional'='true',
'orc.compress'='SNAPPY'
);
insert into table test.tab1
select 1 as col1, 'avasdf' as col2;