不会根据执行计划进行sql调优的dba是不合格的,sql tuning是基本技能
0. 优化器的基本概念
为sql语句找到最好的,执行成本最低的执行计划
制定执行计划是以sql语句中涉及到的对象的统计信息为基础的。
1. 统计信息的介绍
–表的统计信息(user_tables, user_tab_statistics)
select num_rows, –表中的记录数
blocks, –表中数据所占的数据块数
empty_blocks, –表中的空块数
avg_space, –数据块中平均的使用空间
chain_cnt, –表中行连接和行迁移的数量
avg_row_len, –每条记录的平均长度
last_analyzed — 最近一次搜集统计信息的时间
from user_tables where table_name=’new_sales’;
–搜集表的统计信息
exec dbms_stats.gather_table_stats(‘sh’,’new_sales’);
–再来执行一次
select num_rows, –表中的记录数
blocks, –表中数据所占的数据块数
empty_blocks, –表中的空块数
avg_space, –数据块中平均的使用空间
chain_cnt, –表中行连接和行迁移的数量
avg_row_len, –每条记录的平均长度
last_analyzed — 最近一次搜集统计信息的时间
from user_tables where table_name=’new_sales’;
–列的统计信息 (user_tab_columns, user_tab_col_statistics,user_tab_histograms)
select column_name,
num_distinct, –唯一值的个数
low_value, –列上的最小值
high_value, –列上的最大值
density, –选择率因子(密度) = 1/(ndv),如果不存在柱状图的话
num_nulls, –空值的个数
num_buckets, –直方图的bucket个数
histogram –直方图的类型
from user_tab_columns
where table_name=’new_sales’
–搜集柱状图
exec dbms_stats.gather_table_stats(‘sh’, ‘new_sales’, method_opt => ‘for all columns size 1 for columns size 254 cust_id’);
–再来看看统计信息
select column_name,
num_distinct, –唯一值的个数
low_value, –列上的最小值
high_value, –列上的最大值
density, –选择率因子(密度) = 1/(ndv),如果不存在柱状图的话
num_nulls, –空值的个数
num_buckets, –直方图的bucket个数
histogram –直方图的类型
from user_tab_columns
where table_name=’new_sales’
select
column_name,
endpoint_number,
endpoint_value,
from user_tab_histograms
where table_name=’new_sales’ and column_name=’cust_id’
扩展统计信息 (user_stat_extensions)
select e.extension col_group, t.num_distinct, t.histogram
from user_stat_extensions e, user_tab_col_statistics t
where e.extension_name=t.column_name
and t.table_name=’new_sales’;
–搜集扩展统计信息
declare
cg_name varchar2(30);
begin
cg_name := dbms_stats.create_extended_stats(‘sh’,’new_sales’,'(prod_id,cust_id)’);
end;
select sys.dbms_stats.show_extended_stats_name(‘sh’,’new_sales’, ‘(prod_id,cust_id)’) col_group_name
from dual;
exec dbms_stats.gather_table_stats(‘sh’,’new_sales’, method_opt => –
‘for columns (prod_id,cust_id) size skewonly’);
2.统计信息不准确容易导致的问题
表统计信息不准确
导致了表的访问方式出现了问题(全表扫描和使用索引)
导致了表和表的链接方式出现问题(应该使用hash join,却是用了nest loop)
列统计信息不准确
导致了访问表的方式不同(错误的索引)
导致了表的连接方式不同(应该使用hash join , 但是使用了nest loop)
索引的统计信息不准确
导致了访问表的方式不同(应该使用索引,但是使用了全表扫描)
+++++++++++++++++++++++++++++++++++++++++++
–当天线上表
create table sales_online
(
prod_id number not null ,
cust_id number not null,
time_id date not null,
channel_id number not null,
promo_id number not null,
quantity_sold number(10,2) not null,
amount_sold number(10,2) not null)
–历史归档表
create table sales_part (
prod_id number not null ,
cust_id number not null,
time_id date not null,
channel_id number not null,
promo_id number not null,
quantity_sold number(10,2) not null,
amount_sold number(10,2) not null)
partition by range (time_id)
(
partition part_20171218 values less than (to_date(’19-12-2017′,’dd-mm-yyyy’)),
partition part_20171219 values less than (to_date(’20-12-2017′,’dd-mm-yyyy’))
);
insert into sales_part
select prod_id,cust_id, sysdate-2,channel_id,promo_id,quantity_sold,amount_sold
from new_sales;
insert into sales_part
select prod_id,cust_id, sysdate-1,channel_id,promo_id,quantity_sold,amount_sold
from new_sales;
commit;
create index sales_cust_idx on sales_online(cust_id);
create index sales_part_cust_idx on sales_part(cust_id);
–每天晚上把当天数据归档之后,再删除
declare
v_sql varchar2(3000);
begin
v_sql := ‘alter table sales_part drop partition part_20171219’;
execute immediate v_sql;
v_sql := ‘alter table sales_part add partition part_’||to_char(sysdate+1,’yyyymmdd’)||
‘ values less than (to_date(‘||””||to_char(sysdate+1,’dd-mm-yyyy’)||””||’,’||””||’dd-mm-yyyy’||
””||’))’;
dbms_output.put_line(v_sql);
execute immediate v_sql;
v_sql := ‘alter table sales_part exchange partition part_’||to_char(sysdate+1,’yyyymmdd’)||
‘ with table sales_online’;
execute immediate v_sql;
dbms_output.put_line(v_sql);
v_sql := ‘truncate table sales_online’;
execute immediate v_sql;
dbms_output.put_line(v_sql);
v_sql := ‘alter index sales_part_cust_idx rebuild online’;
execute immediate v_sql;
dbms_output.put_line(v_sql);
v_sql := ‘alter index sales_cust_idx rebuild online’;
execute immediate v_sql;
dbms_output.put_line(v_sql);
dbms_stats.gather_table_stats(‘sh’, ‘sales_part’);
dbms_stats.gather_table_stats(‘sh’, ‘sales_online’);
end;
— 检查统计信息
select column_name,
num_distinct, –唯一值的个数
low_value, –列上的最小值
high_value, –列上的最大值
density, –选择率因子(密度) = 1/(ndv),如果不存在柱状图的话
num_nulls, –空值的个数
num_buckets, –直方图的bucket个数
histogram –直方图的类型
from user_tab_columns
where table_name=’sales_online’
select num_rows, –表中的记录数
blocks, –表中数据所占的数据块数
empty_blocks, –表中的空块数
avg_space, –数据块中平均的使用空间
chain_cnt, –表中行连接和行迁移的数量
avg_row_len, –每条记录的平均长度
last_analyzed — 最近一次搜集统计信息的时间
from user_tables where table_name=’sales_online’;
====实例1
–进行查询
select c.cust_city, sum(amount_sold) from sales_part s, new_customers c
where s.cust_id = c.cust_id
and s.cust_id > 100
and time_id between to_date(‘2017-12-18 00:00:00′ ,’yyyy-mm-dd hh24:mi:ss’) and
to_date(‘2017-12-18 01:00:00’, ‘yyyy-mm-dd hh24:mi:ss’)
group by c.cust_city
———————————————————————————————————-
| 0 | select statement | | 1 | 48 | 1779 (1)| 00:00:22 | | |
| 1 | hash group by | | 1 | 48 | 1779 (1)| 00:00:22 | | |
|* 2 | hash join | | 1 | 48 | 1778 (1)| 00:00:22 | | |
| 3 | partition range single| | 1 | 18 | 1373 (1)| 00:00:17 | 2 | 2 |
|* 4 | table access full | sales_part | 1 | 18 | 1373 (1)| 00:00:17 | 2 | 2 |
|* 5 | table access full | new_customers | 54144 | 1586k| 405 (1)| 00:00:05 | | |
——————————————————————————————-
select c.cust_city, sum(amount_sold) from sales_online s, new_customers c
where s.cust_id = c.cust_id
and s.cust_id > 100
and time_id between to_date(‘2017-12-20 00:00:00′ ,’yyyy-mm-dd hh24:mi:ss’) and
to_date(‘2017-12-20 01:00:00’, ‘yyyy-mm-dd hh24:mi:ss’)
group by c.cust_city
————————————————————————————————
| id | operation | name | rows | bytes | cost (%cpu)| time |
————————————————————————————————
| 0 | select statement | | 1 | 65 | 406 (1)| 00:00:05 |
| 1 | hash group by | | 1 | 65 | 406 (1)| 00:00:05 |
|* 2 | hash join | | 1 | 65 | 405 (1)| 00:00:05 |
|* 3 | table access by index rowid| sales_online | 1 | 35 | 0 (0)| 00:00:01 |
|* 4 | index range scan | sales_cust_idx | 1 | | 0 (0)| 00:00:01 |
|* 5 | table access full | new_customers | 54144 | 1586k| 405 (1)| 00:00:05 |
————————————————————————————————
–向表sales_online 中插入一些数据
insert into sales_online
select prod_id,cust_id, sysdate,channel_id,promo_id,quantity_sold,amount_sold
from new_sales;
commit;
–再次查询数据
select c.cust_city, sum(amount_sold) from sales_online s, new_customers c
where s.cust_id = c.cust_id
and s.cust_id > 100
and time_id between to_date(‘2017-12-20 00:00:00′ ,’yyyy-mm-dd hh24:mi:ss’) and
to_date(‘2017-12-20 01:00:00’, ‘yyyy-mm-dd hh24:mi:ss’)
group by c.cust_city
————————————————————————————————
| 0 | select statement | | 1 | 65 | 406 (1)| 00:00:05 |
| 1 | hash group by | | 1 | 65 | 406 (1)| 00:00:05 |
|* 2 | hash join | | 1 | 65 | 405 (1)| 00:00:05 |
|* 3 | table access by index rowid| sales_online | 1 | 35 | 0 (0)| 00:00:01 |
|* 4 | index range scan | sales_cust_idx | 1 | | 0 (0)| 00:00:01 |
|* 5 | table access full | new_customers | 54144 | 1586k| 405 (1)| 00:00:05 |
————————————————————————————————
–手动搜集统计信息或许是一个办法
exec dbms_stats.gather_table_stats(‘sh’, ‘sales_online’, cascade => true);
————————————————————————————-
| id | operation | name | rows | bytes | cost (%cpu)| time |
————————————————————————————-
| 0 | select statement | | 1 | 48 | 1641 (1)| 00:00:20 |
| 1 | hash group by | | 1 | 48 | 1641 (1)| 00:00:20 |
|* 2 | hash join | | 1 | 48 | 1640 (1)| 00:00:20 |
|* 3 | table access full| sales_online | 1 | 18 | 1235 (1)| 00:00:15 |
|* 4 | table access full| new_customers | 54144 | 1586k| 405 (1)| 00:00:05 |
————————————————————————————-
执行计划变了过来。但是这不是一个好的办法,因为在生产时间搜集统计信息比较危险。
—-可以这样做
declare
v_sql varchar2(3000);
begin
v_sql := ‘alter table sales_part drop partition part_20171219’;
execute immediate v_sql;
–导出统计信息
dbms_stats.export_table_stats(ownname =>’sh’,tabname=>’sales_online’,stattab=>’sales_online_st’,statid => ‘a2’);
v_sql := ‘alter table sales_part add partition part_’||to_char(sysdate+1,’yyyymmdd’)||
‘ values less than (to_date(‘||””||to_char(sysdate+1,’dd-mm-yyyy’)||””||’,’||””||’dd-mm-yyyy’||
””||’))’;
dbms_output.put_line(v_sql);
execute immediate v_sql;
v_sql := ‘alter table sales_part exchange partition part_’||to_char(sysdate+1,’yyyymmdd’)||
‘ with table sales_online’;
execute immediate v_sql;
dbms_output.put_line(v_sql);
v_sql := ‘truncate table sales_online’;
execute immediate v_sql;
dbms_output.put_line(v_sql);
v_sql := ‘alter index sales_part_cust_idx rebuild online’;
execute immediate v_sql;
dbms_output.put_line(v_sql);
v_sql := ‘alter index sales_cust_idx rebuild online’;
execute immediate v_sql;
dbms_output.put_line(v_sql);
dbms_stats.gather_table_stats(‘sh’, ‘sales_part’);
–dbms_stats.gather_table_stats(‘sh’, ‘sales_online’);
–导入统计信息
dbms_stats.import_table_stats(ownname => ‘sh’, tabname => ‘sales_online’, stattab => ‘sales_online_st’, statid => ‘a2’, no_invalidate => true);
end;