测试说明:
merge是oracle提供的一种特殊的sql语法,非常适用于数据同步场景,即: (把a表数据插到b表,如果b表存在相同主键的记录则使用a表数据对b表进行更新) 数据同步的常规做法是先尝试插入,插入失败再进行更新,merge比这种常规做法效率高很多。 (特别是a与b表基本一致,同步时主键冲突比较多的情况,效率能相差10倍以上)
为了验证merge效率,我建了两张表,tab_test_c(初始化生成50000条记录)和tab_test_q(初始化从tab_test_c生成40000条记录), 写了两个plsql脚本,分别将tab_test_c的数据同步到tab_test_q,看它们效率区别。
第一个脚本使用merge语法,第二个脚本使用常规先插入,出现主键冲突的操作。
测试结果:
使用merge语法的脚本同步数据耗时0.04秒,使用常规操作耗时14.77秒,效率差369倍
测试脚本:
set serveroutput on -- 启动计时 以便观察脚本执行时间 set timing on set time on
-- 数据初始化 drop table tab_test_c; create table tab_test_c ( c1 varchar2(512), c2 varchar2(512), c3 varchar2(512), c4 varchar2(512), c5 varchar2(512), c6 varchar2(512), c7 varchar2(512), c8 varchar2(512), c9 varchar2(512), c10 varchar2(512) ); declare v_total number; begin v_total := 0; loop exit when v_total >= 50000; for cur in (select owner, object_name, subobject_name, object_id, data_object_id, object_type, created, last_ddl_time, timestamp from all_objects where rownum < 101) loop insert into tab_test_c values (cur.owner, cur.object_name, cur.subobject_name, cur.object_id, cur.data_object_id, cur.object_type, cur.created, cur.last_ddl_time, cur.timestamp, v_total); v_total := v_total + 1; end loop; end loop; commit; end; / -- 建唯一索引 select count(1) from tab_test_c; create unique index uid_test_c_1 on tab_test_c(c10);
--初始化tab_test_q表数据,先从tab_test_c生成同步40000条数据,剩下10000条数据使用脚本同步过来 drop table tab_test_q; create table tab_test_q as select * from tab_test_c where rownum < 40001; create unique index uid_test_q_1 on tab_test_q(c10); -- 验证数据未同步成功 此时记录数差1000 select count(*) from tab_test_q;
-- 使用merge语法同步tab_test_c的数据到tab_test_q declare cursor cur is select * from tab_test_c; type mergearray_t is table of tab_test_c % rowtype index by binary_integer; mergearray mergearray_t; begin open cur; loop exit when cur % notfound; fetch cur bulk collect into mergearray limit 16; -- 每次限十几条记录,不要占用太多内存 这个数字调大点效率会更高 begin forall rw in 1 .. mergearray.count merge into tab_test_q a using (select mergearray(rw).c1 c1, mergearray(rw).c2 c2, mergearray(rw).c3 c3, mergearray(rw).c4 c4, mergearray(rw).c5 c5, mergearray(rw).c6 c6, mergearray(rw).c7 c7, mergearray(rw).c8 c8, mergearray(rw).c9 c9, mergearray(rw).c10 c10 from dual) b on (a.c10 = b.c10) when matched then update set a.c1 = mergearray(rw).c1, a.c2 = mergearray(rw).c2, a.c3 = mergearray(rw).c3, a.c4 = mergearray(rw).c4, a.c5 = mergearray(rw).c5, a.c6 = mergearray(rw).c6, a.c7 = mergearray(rw).c7, a.c8 = mergearray(rw).c8, a.c9 = mergearray(rw).c9 when not matched then insert (c1, c2, c3, c4, c5, c6, c7, c8, c9, c10) values(mergearray(rw).c1, mergearray(rw).c2, mergearray(rw).c3, mergearray(rw).c4, mergearray(rw).c5, mergearray(rw).c6, mergearray(rw).c7, mergearray(rw).c8, mergearray(rw).c9, mergearray(rw).c10); -- dbms_output.put_line(mergearray.count); exception when others then dbms_output.put_line('error1'); end; end loop; close cur; commit; end; /
--耗时0.04秒 -- 验证数据同步成功 select count(*) from tab_test_q;
--初始化tab_test_q表数据,先从tab_test_c生成同步40000条数据,剩下10000条数据使用脚本同步过来 drop table tab_test_q; create table tab_test_q as select * from tab_test_c where rownum < 40001; create unique index uid_test_q_1 on tab_test_q(c10); -- 验证数据未同步成功 此时记录数差1000 select count(*) from tab_test_q;
-- 使用常规语法同步tab_test_c的数据到tab_test_q begin for cur in (select * from tab_test_c) loop begin insert into tab_test_q(c1, c2, c3, c4, c5, c6, c7, c8, c9, c10) values(cur.c1, cur.c2, cur.c3, cur.c4, cur.c5, cur.c6, cur.c7, cur.c8, cur.c9, cur.c10); exception when dup_val_on_index then --唯一索引冲突时更新 update tab_test_q set c1 = cur.c1, c2 = cur.c2, c3 = cur.c3, c4 = cur.c4, c5 = cur.c5, c6 = cur.c6, c7 = cur.c7, c8 = cur.c8, c9 = cur.c9 where c10 = cur.c10; when others then dbms_output.put_line('error1'); end; end loop; commit; end; /
--耗时14.77秒 -- 验证数据同步成功 select count(*) from tab_test_q;