sql server和oracle 创建分区表
sql server:
1:首先我们创建test1数据库和shop表
2:创建文件组和文件
3:创建分区
①:右键Shop表,弹出菜单中选择 “存储” => "创建分区"
②:创建“分区函数”名 和 “分区方案”名。
③:创建分区映射,也就是将”分区函数“和“文件组”进行关联。
④: 最后我们可以看一下界面给我生成的分区函数以及分区方案,蛮有意思的。
1 USE [Test1] 2 GO 3 BEGIN TRANSACTION 4 CREATE PARTITION FUNCTION [MyRangeCreatTime](datetime) AS RANGE LEFT FOR VALUES (N'2012-12-31T00:00:00', N'2013-12-31T00:00:00', N'2014-12-31T00:00:00') 5 6 7 CREATE PARTITION SCHEME [MySchemeCreateTime] AS PARTITION [MyRangeCreatTime] TO ([Before2013], [T2013], [T2014], [After2014]) 8 9 10 ALTER TABLE [dbo].[Shop] DROP CONSTRAINT [PK__Shop__3214EC277F60ED59] 11 12 13 ALTER TABLE [dbo].[Shop] ADD PRIMARY KEY NONCLUSTERED 14 ( 15 [ID] ASC 16 )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] 17 18 19 CREATE CLUSTERED INDEX [ClusteredIndex_on_MySchemeCreateTime_635288828144372217] ON [dbo].[Shop] 20 ( 21 [CreateTime] 22 )WITH (SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF) ON [MySchemeCreateTime]([CreateTime]) 23 24 25 DROP INDEX [ClusteredIndex_on_MySchemeCreateTime_635288828144372217] ON [dbo].[Shop] WITH ( ONLINE = OFF ) 26 27 28 29 30 COMMIT TRANSACTION
从图中可以看到生成好的分区函数名”[MyRangeCreatTime]“ 和分区架构名“[MySchemeCreateTime]”,最后我们执行下该sql就ok了。
⑤ 插入测试数据并进行简单的测试
这里测试下“2013-1-1”是在哪个分区下。
总结一下可能用到的所有sql语句:
1,新建默认的 filegroup
alter database Test add filegroup Before2013
2, 新建默认的 file
alter database Test add file
(Name=N'Before2013',filename='D:\Program Files\Microsoft SQL Server\MSSQL10.MSSQLSERVER\MSSQL\DATA\Before20131.ndf',size=5mb,maxsize=100Mb,filegrowth=5mb)
to filegroup Before2013
3,新建分区函数
create partition function RangeTime (datetime) as range left for values ('2012-12-31','2013-12-31','2014-12-31')
4,新建分区方案
create partition scheme RangeSchema_CreateTime as partition RangeTime to (before2013,T2013,T2014,after2014)
5,新建表
create table equipment_n
(
[dataId] [int] NOT NULL,
[operaDate] datetime NULL,
[eqId] [int] NULL,
[l1] [varchar](30) NULL,
[l2] [varchar](30) NULL,
[lc] [varchar](30) NULL,
) on RangeSchema_CreateTime(operaDate)
6, 自动添加数据
declare @i int
set @i = 4
while @i < 1000
begin insert into equipment_n ([operaDate]
,[eqId]
,[l1]
,[l2]
,[lc]) values(dateadd(dd,datediff(dd,'2012-01-01','2015-01-30')*RAND(),'2012-01-01'),1,@i+1,@i+2,@i+3)
set @i = @i +1
end
7, 查询分区表中数据分布情况
select $partition.RangeTime(operaDate) as number, COUNT(*) as count
from equipment_n group by $partition.RangeTime(operaDate)
8, 设置自增ID
alter table equipment_n drop column dataId alter table equipment_n add dataId int identity(1,1)
ALTER TABLE dbo.equipment_n ADD CONSTRAINT
PK_equipment_n PRIMARY KEY NONCLUSTERED
(
dataId
) WITH( STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
9, 定制计划
DECLARE @maxValue INT,
@secondMaxValue INT,
@differ INT,
@fileGroupName VARCHAR(200),
@fileNamePath VARCHAR(200),
@fileName VARCHAR(200),
@sql NVARCHAR(1000)
if(MONTH(GETDATE()) != '12')--计划每月执行,如果是12月则可以更新分局
begin
--生成新的文件组
SET @fileGroupName='IN_'+ convert(varchar(4),(YEAR(GETDATE())+1))
PRINT @fileGroupName
SET @sql='ALTER DATABASE [iom_db] ADD FILEGROUP '[email protected]
PRINT @sql
EXEC(@sql)
--新建文件
SET @fileNamePath='C:\Program Files\Microsoft SQL Server\MSSQL10_50.MSSQLSERVER\MSSQL\DATA'
+ @fileGroupName +'.NDF'
SET @fileName=N'File'+convert(varchar(4),(YEAR(GETDATE())+1))
SET @sql='ALTER DATABASE [iom_db] ADD FILE (NAME='''[email protected]+''',FILENAME=N'''[email protected]+''') TO FILEGROUP'+' '[email protected]
PRINT @sql
PRINT 1
EXEC(@sql)
PRINT 2
--修改分区方案,用一个新的文件组用于存放下一新增的数据
SET @sql='ALTER PARTITION SCHEME [RangeSchema_CreateTime] NEXT USED'+' '[email protected]
EXEC(@sql)
PRINT 3
ALTER PARTITION FUNCTION RangeTime() --分区函数
SPLIT RANGE (convert(datetime, convert(varchar(4),(YEAR(GETDATE())+1))+'-12-31 00:00:00.000'))
end
oracle:
首先明确分区表和表分区的区别:表分区是一种思想,分区表示一种技术实现。当表的大小过G的时候可以考虑进行表分区,提高查询效率,均衡IO。oracle分区表是oracle数据库提供的一种表分区的实现形式。表进行分区后,逻辑上仍然是一张表,原来的查询SQL同样生效,同时可以采用使用分区查询来优化SQL查询效率,不至于每次都扫描整个表
一、分区表基本操作
1、按时间分区表创建:
create table t_test (
pk_id number(30) not null,
add_date_time DATE,
constraintPK_T_TEST primary key (pk_id)
)
PARTITION BY RANGE (add_date_time)
(
PARTITIONt_test_2013_less VALUES LESS THAN (TO_DATE('2013-01-01 00:00:00','yyyy-mm-ddhh24:mi:ss')) TABLESPACE TS_MISPS,
PARTITIONt_test_2013 VALUES LESS THAN (TO_DATE('2014-01-01 00:00:00','yyyy-mm-ddhh24:mi:ss')) TABLESPACE TS_MISPS,
PARTITION t_test_2014VALUES LESS THAN (TO_DATE('2015-01-01 00:00:00','yyyy-mm-dd hh24:mi:ss'))TABLESPACE TS_MISPS
)
其中add_date_time为分区字段,每一年一个分区。
插入100W数据
declare
i int := 1;
yearVARCHAR2(20);
begin
loop
year := CASEmod(i, 3)
WHEN 0 THEN
'2012-01-14 12:00:00'
WHEN 1 THEN
'2013-01-14 12:00:00'
ELSE
'2014-01-14 12:00:00'
END;
insert into t_test values(i, to_date(year, 'yyyy-mm-dd hh24:mi:ss'));
exit when i= 1000000;
i := i + 1;
end loop;
end;
查看分区表的分区的详细信息
Select table_name,partition_name,high_value fromdba_tab_partitions where table_name='T_TEST';
2、分区表修改
2.1增加一个分区
分两种情况:1.没有maxvalue分区。2.有maxvalue分区。我们创建的分区就是没有maxValue的分区
1.没有maxvalue分区添加新分区:
alter table t_test add partition t_test_2015 VALUESLESS THAN (TO_DATE('2015-01-01 00:00:00','yyyy-mm-dd hh24:mi:ss')) TABLESPACETS_MISPS ;
2、有maxvalue分区添加新分区:
有了maxvalue,就不能直接add partition,而是需要max分区split。例如我们将创建的分区的语句修改下:
create table t_test (
pk_id number(30) not null,
add_date_time DATE,
constraintPK_T_TEST primary key (pk_id)
)
PARTITION BY RANGE (add_date_time)
(
PARTITIONt_test_2013_less VALUES LESS THAN (TO_DATE('2013-01-01 00:00:00','yyyy-mm-ddhh24:mi:ss')) TABLESPACE TS_MISPS,
PARTITIONt_test_2013 VALUES LESS THAN (TO_DATE('2014-01-01 00:00:00','yyyy-mm-ddhh24:mi:ss')) TABLESPACE TS_MISPS,
PARTITIONt_test_2014 VALUES LESS THAN (TO_DATE('2015-01-01 00:00:00','yyyy-mm-ddhh24:mi:ss')) TABLESPACE TS_MISPS,
PARTITION t_test_maxVALUES LESS THAN (MAXVALUE)
)
增加一个2016年的分区语句为:
alter table t_test split partition t_test_max at(TO_DATE('2016-01-01 00:00:00','yyyy-mm-dd hh24:mi:ss')) into (partitiont_test_2015,partition t_test_max);
2.2删除一个分区
alter table t_test drop partition t_test_2014
注:droppartition时,该分区内存储的数据也将同时删除,你的本意是希望删除掉指定的分区但保留数据,你应该使用merge partition,执行该语句会导致glocal索引的失效需要重建全局索引
2.3合并分区
相邻的分区可以merge为一个分区,新分区的下边界为原来边界值较低的分区,上边界为原来边界值较高的分区,原先的局部索引相应也会合并,全局索引会失效,需要rebuild。
Alter table t_test merge partitions t_test_2013 ,t_Test_2014 into partition t_Test_2013_to_2014
二、对分区表进行查询
2.1查询
不使用分区查询:默认查询所有分区数据
select * from t_test
使用分区查询:只查询该分区数据
select * from t_testpartition(t_test_2014) where add_date_time >=TO_DATE('2014-01-01 00:00:00','yyyy-mm-dd hh24:mi:ss');
2.1插入
insert into t_test values(i, to_date(year,'yyyy-mm-dd hh24:mi:ss'));
2.1删除
使用分区删除
更新的时候指定了分区,而根据查询的记录不在该分区中时,将不会删除数据
delete t_test partition(t_test_2013) where id=1;
不使用分区删除
delete t_test whereid=1;
2.1修改
使用分区更新
更新的时候指定了分区,而根据查询的记录不在该分区中时,将不会更新数据
delete t_test where id=1;
update t_test partition(t_test) set id=1 where id=2;
不使用分区
delete t_test where id=1;
update t_test set id=1 where id=2;
三、普通表和分区表互转
普通表—>分区表
1、新建一个字段一样的中间的分区表(T_NEW)
2、将T数据导入到T_NEW中
INSERT INTO T SELECT field1,filed2, …from T
将老表重命名
RENAME T TO T_OLD;
将新表重命名
RENAME T_NEW TO T;
这种适合静态操作,不保证数据一致性。如果在生产环境切换,利用利用在线重定义功能
mysql:
CREATE TABLE p1
-> ( a INT,
-> b INT,
-> c INT )
-> PARTITION BY RANGE COLUMNS (a)
-> ( PARTITION p01 VALUES LESS THAN (10),
-> PARTITION p02 VALUES LESS THAN (20),
-> PARTITION p03 VALUES LESS THAN (30),
-> PARTITION p04 VALUES LESS THAN (MAXVALUE) );
---------------------
Mysql的安装方法可以参考:
http://blog.****.net/jhq0113/article/details/43812895
Mysql分区表的介绍可以参考:
http://blog.****.net/jhq0113/article/details/44592865
1.检查你的Mysql是否支持分区
mysql> SHOW VARIABLES LIKE '%partition%';
若结果如下,表示你的Mysql支持表分区:
+-----------------------+-------+
| Variable_name | Value |
+-----------------------+-------+
| have_partition_engine | YES |
+-----------------------+-------+
1 row in set (0.00 sec)
RANGE分区表创建方式:
DROP TABLE IF EXISTS `my_orders`;
CREATE TABLE `my_orders` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT COMMENT '表主键',
`pid` int(10) unsigned NOT NULL COMMENT '产品ID',
`price` decimal(15,2) NOT NULL COMMENT '单价',
`num` int(11) NOT NULL COMMENT '购买数量',
`uid` int(10) unsigned NOT NULL COMMENT '客户ID',
`atime` datetime NOT NULL COMMENT '下单时间',
`utime` int(10) unsigned NOT NULL DEFAULT 0 COMMENT '修改时间',
`isdel` tinyint(4) NOT NULL DEFAULT '0' COMMENT '软删除标识',
PRIMARY KEY (`id`,`atime`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8
/*********分区信息**************/
PARTITION BY RANGE (YEAR(atime))
(
PARTITION p0 VALUES LESS THAN (2016),
PARTITION p1 VALUES LESS THAN (2017),
PARTITION p2 VALUES LESS THAN MAXVALUE
);
以上是一个简单的订单表,分区字段是atime,根据RANGE分区,这样当你向该表中插入数据的时候,Mysql会根据YEAR(atime)的值进行分区存储。
检查分区是否创建成功,执行查询语句:
EXPLAIN PARTITIONS SELECT * FROM `my_orders`
若成功,结果如下:
性能分析:
1).创建同样表结构,但没有进行分区的表
DROP TABLE IF EXISTS `my_order`;
CREATE TABLE `my_order` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT COMMENT '表主键',
`pid` int(10) unsigned NOT NULL COMMENT '产品ID',
`price` decimal(15,2) NOT NULL COMMENT '单价',
`num` int(11) NOT NULL COMMENT '购买数量',
`uid` int(10) unsigned NOT NULL COMMENT '客户ID',
`atime` datetime NOT NULL COMMENT '下单时间',
`utime` int(10) unsigned NOT NULL DEFAULT 0 COMMENT '修改时间',
`isdel` tinyint(4) NOT NULL DEFAULT '0' COMMENT '软删除标识',
PRIMARY KEY (`id`,`atime`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
2).向两张表中插入相同的数据
/**************************向分区表插入数据****************************/
INSERT INTO my_orders(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89757,CURRENT_TIMESTAMP());
INSERT INTO my_orders(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89757,'2016-05-01 00:00:00');
INSERT INTO my_orders(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89757,'2017-05-01 00:00:00');
INSERT INTO my_orders(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89757,'2018-05-01 00:00:00');
INSERT INTO my_orders(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89756,'2015-05-01 00:00:00');
INSERT INTO my_orders(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89756,'2016-05-01 00:00:00');
INSERT INTO my_orders(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89756,'2017-05-01 00:00:00');
INSERT INTO my_orders(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89756,'2018-05-01 00:00:00');
/**************************向未分区表插入数据****************************/
INSERT INTO my_order(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89757,CURRENT_TIMESTAMP());
INSERT INTO my_order(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89757,'2016-05-01 00:00:00');
INSERT INTO my_order(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89757,'2017-05-01 00:00:00');
INSERT INTO my_order(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89757,'2018-05-01 00:00:00');
INSERT INTO my_order(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89756,'2015-05-01 00:00:00');
INSERT INTO my_order(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89756,'2016-05-01 00:00:00');
INSERT INTO my_order(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89756,'2017-05-01 00:00:00');
INSERT INTO my_order(`pid`,`price`,`num`,`uid`,`atime`) VALUES(1,12.23,1,89756,'2018-05-01 00:00:00');
3).主从复制,大约20万条左右(主从复制的数据和真实环境有差距,但是能体现出表分区查询的性能优劣)
/**********************************主从复制大量数据******************************/
INSERT INTO `my_orders`(`pid`,`price`,`num`,`uid`,`atime`) SELECT `pid`,`price`,`num`,`uid`,`atime` FROM `my_orders`;
INSERT INTO `my_order`(`pid`,`price`,`num`,`uid`,`atime`) SELECT `pid`,`price`,`num`,`uid`,`atime` FROM `my_order`;
4).查询测试
/***************************查询性能分析**************************************/
SELECT * FROM `my_orders` WHERE `uid`=89757 AND `atime`< CURRENT_TIMESTAMP();
/****用时0.084s****/
SELECT * FROM `my_order` WHERE `uid`=89757 AND `atime`< CURRENT_TIMESTAMP();
/****用时0.284s****/
通过以上查询可以明显看出进行表分区的查询性能更好,查询所花费的时间更短。
分析查询过程:
EXPLAIN PARTITIONS SELECT * FROM `my_orders` WHERE `uid`=89757 AND `atime`< CURRENT_TIMESTAMP();
EXPLAIN PARTITIONS SELECT * FROM `my_order` WHERE `uid`=89757 AND `atime`< CURRENT_TIMESTAMP();
通过以上结果可以看出,my_orders表查询直接经过p0分区,只扫描了49386行,而my_order表没有进行分区,扫描了196983行,这也是性能得到提升的关键所在。
当然,表的分区并不是分的越多越好,当表的分区太多时找分区又是一个性能的瓶颈了,建议在200个分区以内。
LIST分区表创建方式:
/*****************创建分区表*********************/
CREATE TABLE `products` (
`id` bigint UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '表主键' ,
`name` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '产品名称' ,
`metrial` tinyint UNSIGNED NOT NULL COMMENT '材质' ,
`weight` double UNSIGNED NOT NULL DEFAULT 0 COMMENT '重量' ,
`vol` double UNSIGNED NOT NULL DEFAULT 0 COMMENT '容积' ,
`c_id` tinyint UNSIGNED NOT NULL COMMENT '供货公司ID' ,
PRIMARY KEY (`id`,`c_id`)
)ENGINE=InnoDB DEFAULT CHARSET=utf8
/*********分区信息**************/
PARTITION BY LIST(c_id)
(
PARTITION pA VALUES IN (1,3,11,13),
PARTITION pB VALUES IN (2,4,12,14),
PARTITION pC VALUES IN (5,7,15,17),
PARTITION pD VALUES IN (6,8,16,18),
PARTITION pE VALUES IN (9,10,19,20)
);
可以看出,LIST分区和RANGE分区很类似,这里就不做性能分析了,和RANGE很类似。
HASH分区表的创建方式:
/*****************分区表*****************/
CREATE TABLE `msgs` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT '表主键',
`sender` int(10) unsigned NOT NULL COMMENT '发送者ID',
`reciver` int(10) unsigned NOT NULL COMMENT '接收者ID',
`msg_type` tinyint(3) unsigned NOT NULL COMMENT '消息类型',
`msg` varchar(225) NOT NULL COMMENT '消息内容',
`atime` int(10) unsigned NOT NULL COMMENT '发送时间',
`sub_id` tinyint(3) unsigned NOT NULL COMMENT '部门ID',
PRIMARY KEY (`id`,`sub_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8
/*********分区信息**************/
PARTITION BY HASH(sub_id)
PARTITIONS 10;
以上语句代表,msgs表按照sub_id进行HASH分区,一共分了十个区。
Key分区和HASH分区很类似,不再介绍,若想了解可以参考Mysql官方文档进行详细了解。
子分区的创建方式:
CREATE TABLE `msgss` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT '表主键',
`sender` int(10) unsigned NOT NULL COMMENT '发送者ID',
`reciver` int(10) unsigned NOT NULL COMMENT '接收者ID',
`msg_type` tinyint(3) unsigned NOT NULL COMMENT '消息类型',
`msg` varchar(225) NOT NULL COMMENT '消息内容',
`atime` int(10) unsigned NOT NULL COMMENT '发送时间',
`sub_id` tinyint(3) unsigned NOT NULL COMMENT '部门ID',
PRIMARY KEY (`id`,`atime`,`sub_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8
/*********分区信息**************/
PARTITION BY RANGE (atime) SUBPARTITION BY HASH (sub_id)
(
PARTITION t0 VALUES LESS THAN(1451577600)
(
SUBPARTITION s0,
SUBPARTITION s1,
SUBPARTITION s2,
SUBPARTITION s3,
SUBPARTITION s4,
SUBPARTITION s5
),
PARTITION t1 VALUES LESS THAN(1483200000)
(
SUBPARTITION s6,
SUBPARTITION s7,
SUBPARTITION s8,
SUBPARTITION s9,
SUBPARTITION s10,
SUBPARTITION s11
),
PARTITION t2 VALUES LESS THAN MAXVALUE
(
SUBPARTITION s12,
SUBPARTITION s13,
SUBPARTITION s14,
SUBPARTITION s15,
SUBPARTITION s16,
SUBPARTITION s17
)
);
检查子分区是否创建成功:
EXPLAIN PARTITIONS SELECT * FROM msgss;