Hadoop之Hive

Hive基本介绍

查询管理存储在HDFS中的大数据集合,提供像SQL的查询语句HiveQLHive表的元数据存储在Derby数据库中,MysqlOracle也可以作为Hive的存储源

Hive操作

use:切换数据库

hive> use default;
OK
Time taken: 0.454 seconds
hive> show tables;
OK
categories
customers
departments
intermediate_access_logs
order_items
orders
products
tokenized_access_logs
wlslog
Time taken: 0.404 seconds, Fetched: 9 row(s)

 CREATE DATABASE:创建数据库

CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name //数据库名
[COMMENT database_comment] //数据库说明
[LOCATION hdfs_path] //存储路径
[WITH DBPROPERTIES (property_name=property_value, ...)]; //键值

  直接创建数据库 

hive> CREATE DATABASE IF NOT  EXISTS testdb;
OK
Time taken: 1.45 seconds
hive> DESC database extended testdb;
OK
testdb          hdfs://quickstart.cloudera:8020/user/hive/warehouse/testdb.db   cloudera        USER
Time taken: 0.046 seconds, Fetched: 1 row(s)

 CREATE TABLE:创建表

 ROW FORMAT:DELIMITED,用于分割文件

 FIELDS TERMINATED BY:指定字段

 LINES TERMINATE BY:行标识

CREATE TABLE wlslog(time_stamp STRING,category STRING, type STRING,servername STRING,code
STRING,msg STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n';
hive> show tables;
OK
wlslog
Time taken: 0.019 seconds, Fetched: 1 row(s)

 LOAD DATA:导入数据

 LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION
(partcol1=val1, partcol2=val2 ...)]

  导入文件

hive> LOAD DATA LOCAL INPATH 'cdh/wlslog.log' OVERWRITE INTO TABLE wlslog;
Loading data to table testdb.wlslog
Table testdb.wlslog stats: [numFiles=1, numRows=0, totalSize=717, rawDataSize=0]
OK
Time taken: 0.757 seconds
hive> select * from wlslog;
OK
Apr-8-2014-7:06:16-PM-PDT       Notice  WebLogicServer  AdminServer     BEA-000365      Server state changed to STANDBY
Apr-8-2014-7:06:17-PM-PDT       Notice  WebLogicServer  AdminServer     BEA-000365      Server state changed to STARTING
Apr-8-2014-7:06:18-PM-PDT       Notice  WebLogicServer  AdminServer     BEA-000365      Server state changed to ADMIN
Apr-8-2014-7:06:19-PM-PDT       Notice  WebLogicServer  AdminServer     BEA-000365      Server state changed to RESUMING
Apr-8-2014-7:06:20-PM-PDT       Notice  WebLogicServer  AdminServer     BEA-000331      Started WebLogic AdminServer
Apr-8-2014-7:06:21-PM-PDT       Notice  WebLogicServer  AdminServer     BEA-000365      Server state changed to RUNNING
Apr-8-2014-7:06:22-PM-PDT       Notice  WebLogicServer  AdminServer     BEA-000360      Server started in RUNNING mode
Time taken: 0.41 seconds, Fetched: 7 row(s)

 复制表结构

CREATE TABLE wlslog_2 LIKE wlslog;
 插入数据

 INSERT OVERWRITE TABLE tablename1 [PARTITION(partcol1=val1,partcol2=val2 ...) [IF NOT   EXISTS]]  select_statement1 FROM from_statement;

 INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1,partcol2=val2 ...)] select_statement1 FROM from_statement;

 创建表时插入数据

CREATE TABLE wlslog_copy
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
STORED AS TEXTFILE(SEQUENCEFILE/ORC/PARQUET)
AS
SELECT time_stamp,category,type,servername,code,msg
FROM wlslog 

 ALTER TABLE:修改表名、注释、属性、存储

 TRUNCATE TABLE:删除数据行

  TRUNCATE TABLE table_name [PARTITION partition_spec];

 DROP TABLE:删除表

 DROP TABLE [IF EXISTS] table_name;

猜你喜欢

转载自theseus.iteye.com/blog/2425096