From 0b03e79889099ff8830c5b6f4dbd3c25be04688c Mon Sep 17 00:00:00 2001 From: azuregc7 Date: Sat, 30 Jan 2021 20:24:57 -0800 Subject: [PATCH] [home] Remove duplicate reposts from timeline feed, improve query performance, and increase batch size --- .../api/v1/timelines/home_controller.rb | 2 +- app/models/home_feed.rb | 48 ++++++++++++------- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/app/controllers/api/v1/timelines/home_controller.rb b/app/controllers/api/v1/timelines/home_controller.rb index 93ad337b..a5ac5e79 100644 --- a/app/controllers/api/v1/timelines/home_controller.rb +++ b/app/controllers/api/v1/timelines/home_controller.rb @@ -24,7 +24,7 @@ class Api::V1::Timelines::HomeController < Api::BaseController end def home_statuses - theLimit = params[:max_id].nil? ? 10 : limit_param(DEFAULT_STATUSES_LIMIT) + theLimit = params[:max_id].nil? ? 20 : limit_param(DEFAULT_STATUSES_LIMIT) account_home_feed.get( theLimit, params[:max_id], diff --git a/app/models/home_feed.rb b/app/models/home_feed.rb index 4d102c67..b210f58b 100644 --- a/app/models/home_feed.rb +++ b/app/models/home_feed.rb @@ -19,24 +19,38 @@ class HomeFeed < Feed pagination_max = "and s.id < #{max_id}" unless max_id.nil? pagination_min = "and s.id > #{min_id}" unless min_id.nil? Status.find_by_sql " - select s.* - from statuses s - left join statuses r - on s.reblog_of_id = r.id + with cte as + ( + select + row_number() over (partition by sid.reblog_of_id order by sid.id desc) as rn_dupe, + sid.* + FROM + (select + s.id, + s.reblog_of_id + from statuses s + left join statuses r + on s.reblog_of_id = r.id + where + s.created_at > NOW() - INTERVAL '7 days' + and s.reply is false + and (exists(select ff.target_account_id from follows ff where ff.account_id = #{@id} and ff.target_account_id = s.account_id) + or s.account_id = #{@id}) + and not exists(select mm.target_account_id from mutes mm where mm.account_id = #{@id} and mm.target_account_id in (s.account_id, r.account_id)) + and not exists(select bb.target_account_id from blocks bb where bb.account_id = #{@id} and bb.target_account_id in (s.account_id, r.account_id)) + #{pagination_max} + #{pagination_min} + order by s.created_at desc + limit #{limit} + ) sid + inner join statuses s on sid.id = s.id + ) + select + s.* + from cte + inner join statuses s on cte.id = s.id where - s.created_at > NOW() - INTERVAL '7 days' - and s.reply is false - and ( - exists(select ff.target_account_id from follows ff - where ff.account_id = #{@id} and ff.target_account_id = s.account_id) - or s.account_id = #{@id}) - and not exists(select mm.target_account_id from mutes mm - where mm.account_id = #{@id} and mm.target_account_id in (s.account_id, r.account_id)) - and not exists(select bb.target_account_id from blocks bb - where bb.account_id = #{@id} and bb.target_account_id in (s.account_id, r.account_id)) - #{pagination_max} - #{pagination_min} - order by s.created_at desc limit #{limit} + (cte.rn_dupe = 1 or cte.reblog_of_id is null) " end end