Session重叠问题教学
发布时间:2022-04-06 11:13:54 所属栏目:MySql教程 来源:互联网
导读:周五晚上终于把这个算法初步实现了. 连续加班忙碌了一个星期,终于有点曙光了. 从这个问题的缘起,到目前应该已经优化了快100倍了 但是周末的时候,想想还是不对. 小花狸Session合并算法(对,以后这个算法就叫这个名称了)实现的合并速度应该是非常快的.代价
周五晚上终于把这个算法初步实现了. 连续加班忙碌了一个星期,终于有点曙光了. 从这个问题的缘起,到目前应该已经优化了快100倍了 但是周末的时候,想想还是不对. 小花狸Session合并算法(对,以后这个算法就叫这个名称了)实现的合并速度应该是非常快的.代价仅仅是扫描一遍记录. 这1.6秒到底用在哪里了? 后来经过反复调试.发现还有两块可以优化改进的地方. 改进后的过程如下: drop procedure p; DELIMITER $$ CREATE DEFINER=`root`@`localhost` PROCEDURE `p`() BEGIN declare done int default 0; declare v_roomid bigint; declare v_time timestamp(6); declare v_cur_type smallint; declare v_before_roomid bigint default -1; declare v_before_type smallint default -1; declare v_before_time timestamp(6) ; declare v_num bigint default 0; declare cur_test CURSOR for select roomid,type,timepoint from tmp_time_point order by roomid,timepoint,type ; DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1; drop table if exists t1; drop table if exists t2; drop table if exists tmp_time_point; drop table if exists tmp_result; drop table if exists tmp_min_range; drop table if exists tmp_s; CREATE temporary TABLE `t1` ( `roomid` int(11) NOT NULL DEFAULT '0', `userid` bigint(20) NOT NULL DEFAULT '0', `s` timestamp(6), `e` timestamp(6), primary key(roomid,userid,s,e) ) ENGINE=memory; CREATE temporary TABLE `t2` ( `roomid` int(11) NOT NULL DEFAULT '0', `s` timestamp(6), `e` timestamp(6) ) ENGINE=memory; CREATE temporary TABLE `tmp_min_range` ( `roomid` int(11) NOT NULL DEFAULT '0', `s` timestamp(6), `e` timestamp(6), primary key(roomid,s,e), key(roomid,e) ) ENGINE=memory; create temporary table tmp_time_point( roomid bigint, timepoint timestamp(6), type smallint, key(roomid,timepoint) ) engine=memory; create temporary table tmp_result( roomid bigint, timepoint timestamp(6), c int ) engine=memory; create temporary table tmp_s( roomid bigint, userid bigint, s timestamp, e timestamp, i int ) engine=memory; SET @A=0; SET @B=0; insert into tmp_s SELECT x.roomid,x.userid,s,e,datediff(e,s)+1 i FROM ( ( SELECT @B:=@B+1 AS id,roomid,userid,s FROM ( SELECT DISTINCT roomid, userid, roomstart AS s FROM u_room_log a WHERE NOT EXISTS (SELECT * FROM u_room_log b WHERE a.roomid = b.roomid AND a.userid = b.userid AND a.roomstart > b.roomstart AND a.roomstart <= b.roomend) ) AS p ) AS x, ( SELECT @A:=@A+1 AS id,roomid,userid,e FROM ( SELECT DISTINCT roomid, userid, roomend AS e FROM u_room_log a WHERE NOT EXISTS (SELECT * FROM u_room_log b WHERE a.roomid = b.roomid AND a.userid = b.userid AND a.roomend >= b.roomstart AND a.roomend < b.roomend) ) AS o ) AS y ) WHERE x.id = y.id AND x.roomid = y.roomid AND x.userid = y.userid ; select max(i) into @c from tmp_s; insert ignore into t1(roomid,userid,s,e) select roomid, userid, if(date(s)!=date(e) and id>1,date(s+interval id-1 date(s+interval id-1 date(e) ,e,date_format(s+interval id-1 '%Y-%m-%d 23:59:59')) e from tmp_s t1 STRAIGHT_JOIN nums on(nums.id<=t1.i) where nums.id<=@c ; insert into t2 (roomid,s,e) select roomid, s+interval startnum/1000000 second s, e-interval endnum/1000000 second e from ( select roomid, s,e, startnum, when @eflag=eflag then @rn:=@rn+1 when @eflag:=eflag then @rn else @rn end endnum from ( select * from ( select when @sflag=sflag then @rn:=@rn+1 when @sflag:=sflag then @rn else @rn end startnum,roomid,s,e,sflag,eflag from ( select * from ( select t1.*,concat('[',roomid,'],',s) sflag,concat('[',roomid,'],',e) eflag from t1 order by roomid ,sflag )a,(select @sflag:='',@rn:=0,@eflag:='') vars ) b ) bb order by roomid,eflag ) c ) d ; insert into tmp_time_point(roomid,timepoint,type) select roomid,s,1 from t2; insert into tmp_time_point(roomid,timepoint,type) select roomid,e,0 from t2; insert ignore into tmp_min_range(roomid,s,e) select roomid,starttime starttime, endtime endtime from ( select if(@roomid=roomid,@d,'') as starttime,@d:=str_to_date(timepoint,'%Y-%m-%d %H:%i:%s.%f'),@roomid:=roomid,p.roomid,str_to_date(timepoint,'%Y-%m-%d %H:%i:%s.%f') endtime from tmp_time_point p,(select @d:='',@roomid:=-1) vars order by roomid,timepoint ) v4 where starttime!='' and date(starttime)=date(endtime); open cur_test; repeat fetch cur_test into v_roomid,v_cur_type,v_time; if done !=1 then -- 第一行或者每个房间的第一行 if v_before_roomid=-1 or v_roomid!=v_before_roomid then set v_before_roomid:=v_roomid; set v_before_type:=1; set v_before_time:='0000-00-00 00:00:00'; set v_num:=0; end if; if v_before_type=1 then set v_num:=v_num+1; insert into tmp_result(roomid,timepoint,c) values(v_roomid,v_time,v_num); end if; if v_before_type=0 then set v_num:=v_num-1; insert into tmp_result(roomid,timepoint,c) values(v_roomid,v_time,v_num); end if; set v_before_roomid:=v_roomid; set v_before_type:=v_cur_type; set v_before_time:=v_time; end if; until done end repeat; close cur_test; select roomid,date(s) dt,round(second,date_format(s,'%Y-%m-%d %H:%i:%s'),date_format(e,'%Y-%m-%d %H:%i:%s')))/60) ts,max(c)-1 c from ( select a.roomid,a.s,a.e,r.c,r.timepoint from tmp_result r inner join tmp_min_range a on( r.timepoint=a.e and r.roomid=a.roomid) where c>2 ) a group by roomid,date(s); END 第一处改进 原来同一房间同一用户重叠时间合并,然后再拆分跨天数据,用的是一条SQL 现在改进如下 create temporary table tmp_s( roomid bigint, userid bigint, s timestamp, e timestamp, i int ) engine=memory; SET @A=0; SET @B=0; insert into tmp_s SELECT x.roomid,x.userid,s,e,datediff(e,s)+1 i FROM ( ( SELECT @B:=@B+1 AS id,roomid,userid,s FROM ( SELECT DISTINCT roomid, userid, roomstart AS s FROM u_room_log a WHERE NOT EXISTS (SELECT * FROM u_room_log b WHERE a.roomid = b.roomid AND a.userid = b.userid AND a.roomstart > b.roomstart AND a.roomstart <= b.roomend) ) AS p ) AS x, ( SELECT @A:=@A+1 AS id,roomid,userid,e FROM ( SELECT DISTINCT roomid, userid, roomend AS e FROM u_room_log a WHERE NOT EXISTS (SELECT * FROM u_room_log b WHERE a.roomid = b.roomid AND a.userid = b.userid AND a.roomend >= b.roomstart AND a.roomend < b.roomend) ) AS o ) AS y ) WHERE x.id = y.id AND x.roomid = y.roomid AND x.userid = y.userid ; select max(i) into @c from tmp_s; insert ignore into t1(roomid,userid,s,e) select roomid, userid, if(date(s)!=date(e) and id>1,date(s+interval id-1 date(s+interval id-1 date(e) ,e,date_format(s+interval id-1 '%Y-%m-%d 23:59:59')) e from tmp_s t1 STRAIGHT_JOIN nums on(nums.id<=t1.i) where nums.id<=@c ; 先把同一房间同一用户的重叠部分合并,然后暂存临时表 记录最大的间隔时间,然后再拆分数据 拆分数据的时候 使用STRAIGHT_JOIN 强制连接顺序. 这样避免因为数字辅助表过大,而导致性能陡然变差. 第二处改进 原来使用distinct的查询, 都改为在临时表上增加主键. 然后使用insert ignore into 代替 insert into 这样大概优化了300毫秒 经过反复优化之后,执行时间大致稳定在1250毫秒 至 1300 毫秒 各个部分耗时分析如下 填充tmp_s,合并同一房间同一用户的重叠部分,耗时655毫秒 填充t1,拆分跨天的用户数据,耗时62毫秒 填充t2,用户时间段首尾相交或者首尾全部重合的数据拆分,耗时140毫秒 填充tmp_min_range,计算最小间隔范围,耗时156毫秒 小花狸Session合并算法,耗时219毫秒 结果统计展示,耗时47毫秒 ![]() (编辑:海南站长网) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |
站长推荐