forked from cybrespace/mastodon
Batched remove status service (#3735)
* Make Pubsubhubbub::DistributionWorker handle both single stream entry arguments, as well as arrays of stream entries * Add BatchedRemoveStatusService, make SuspendAccountService use it * Improve method names * Add test * Add more tests * Use PuSH payloads of 100 to have a clear mapping of 1000 input statuses -> 10 PuSH payloads It was nice while it lasted
This commit is contained in:
parent
4a618908e8
commit
e17c2e5da5
|
@ -0,0 +1,114 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
class BatchedRemoveStatusService < BaseService
|
||||||
|
include StreamEntryRenderer
|
||||||
|
|
||||||
|
# Delete given statuses and reblogs of them
|
||||||
|
# Dispatch PuSH updates of the deleted statuses, but only local ones
|
||||||
|
# Dispatch Salmon deletes, unique per domain, of the deleted statuses, but only local ones
|
||||||
|
# Remove statuses from home feeds
|
||||||
|
# Push delete events to streaming API for home feeds and public feeds
|
||||||
|
# @param [Status] statuses A preferably batched array of statuses
|
||||||
|
def call(statuses)
|
||||||
|
statuses = Status.where(id: statuses.map(&:id)).includes(:account, :stream_entry).flat_map { |status| [status] + status.reblogs.includes(:account, :stream_entry).to_a }
|
||||||
|
|
||||||
|
@mentions = statuses.map { |s| [s.id, s.mentions.includes(:account).to_a] }.to_h
|
||||||
|
@tags = statuses.map { |s| [s.id, s.tags.pluck(:name)] }.to_h
|
||||||
|
|
||||||
|
@stream_entry_batches = []
|
||||||
|
@salmon_batches = []
|
||||||
|
@json_payloads = statuses.map { |s| [s.id, Oj.dump(event: :delete, payload: s.id)] }.to_h
|
||||||
|
|
||||||
|
# Ensure that rendered XML reflects destroyed state
|
||||||
|
Status.where(id: statuses.map(&:id)).in_batches.destroy_all
|
||||||
|
|
||||||
|
# Batch by source account
|
||||||
|
statuses.group_by(&:account_id).each do |_, account_statuses|
|
||||||
|
account = account_statuses.first.account
|
||||||
|
|
||||||
|
unpush_from_home_timelines(account_statuses)
|
||||||
|
batch_stream_entries(account_statuses) if account.local?
|
||||||
|
end
|
||||||
|
|
||||||
|
# Cannot be batched
|
||||||
|
statuses.each do |status|
|
||||||
|
unpush_from_public_timelines(status)
|
||||||
|
batch_salmon_slaps(status) if status.local?
|
||||||
|
end
|
||||||
|
|
||||||
|
Pubsubhubbub::DistributionWorker.push_bulk(@stream_entry_batches) { |batch| batch }
|
||||||
|
NotificationWorker.push_bulk(@salmon_batches) { |batch| batch }
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def batch_stream_entries(statuses)
|
||||||
|
stream_entry_ids = statuses.map { |s| s.stream_entry.id }
|
||||||
|
|
||||||
|
stream_entry_ids.each_slice(100) do |batch_of_stream_entry_ids|
|
||||||
|
@stream_entry_batches << [batch_of_stream_entry_ids]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def unpush_from_home_timelines(statuses)
|
||||||
|
account = statuses.first.account
|
||||||
|
recipients = account.followers.local.pluck(:id)
|
||||||
|
|
||||||
|
recipients << account.id if account.local?
|
||||||
|
|
||||||
|
recipients.each do |follower_id|
|
||||||
|
unpush(follower_id, statuses)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def unpush_from_public_timelines(status)
|
||||||
|
payload = @json_payloads[status.id]
|
||||||
|
|
||||||
|
redis.pipelined do
|
||||||
|
redis.publish('timeline:public', payload)
|
||||||
|
redis.publish('timeline:public:local', payload) if status.local?
|
||||||
|
|
||||||
|
@tags[status.id].each do |hashtag|
|
||||||
|
redis.publish("timeline:hashtag:#{hashtag}", payload)
|
||||||
|
redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local?
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def batch_salmon_slaps(status)
|
||||||
|
return if @mentions[status.id].empty?
|
||||||
|
|
||||||
|
payload = stream_entry_to_xml(status.stream_entry.reload)
|
||||||
|
recipients = @mentions[status.id].map(&:account).reject(&:local?).uniq(&:domain).map(&:id)
|
||||||
|
|
||||||
|
recipients.each do |recipient_id|
|
||||||
|
@salmon_batches << [payload, status.account_id, recipient_id]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def unpush(follower_id, statuses)
|
||||||
|
key = FeedManager.instance.key(:home, follower_id)
|
||||||
|
|
||||||
|
originals = statuses.reject(&:reblog?)
|
||||||
|
reblogs = statuses.reject { |s| !s.reblog? }
|
||||||
|
|
||||||
|
# Quickly remove all originals
|
||||||
|
redis.pipelined do
|
||||||
|
originals.each do |status|
|
||||||
|
redis.zremrangebyscore(key, status.id, status.id)
|
||||||
|
redis.publish("timeline:#{follower_id}", @json_payloads[status.id])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# For reblogs, re-add original status to feed, unless the reblog
|
||||||
|
# was not in the feed in the first place
|
||||||
|
reblogs.each do |status|
|
||||||
|
redis.zadd(key, status.reblog_of_id, status.reblog_of_id) unless redis.zscore(key, status.reblog_of_id).nil?
|
||||||
|
redis.publish("timeline:#{follower_id}", @json_payloads[status.id])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def redis
|
||||||
|
Redis.current
|
||||||
|
end
|
||||||
|
end
|
|
@ -17,9 +17,8 @@ class SuspendAccountService < BaseService
|
||||||
end
|
end
|
||||||
|
|
||||||
def purge_content
|
def purge_content
|
||||||
@account.statuses.reorder(nil).find_each do |status|
|
@account.statuses.reorder(nil).find_in_batches do |statuses|
|
||||||
# This federates out deletes to previous followers
|
BatchedRemoveStatusService.new.call(statuses)
|
||||||
RemoveStatusService.new.call(status)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
[
|
[
|
||||||
|
|
|
@ -5,25 +5,45 @@ class Pubsubhubbub::DistributionWorker
|
||||||
|
|
||||||
sidekiq_options queue: 'push'
|
sidekiq_options queue: 'push'
|
||||||
|
|
||||||
def perform(stream_entry_id)
|
def perform(stream_entry_ids)
|
||||||
stream_entry = StreamEntry.find(stream_entry_id)
|
stream_entries = StreamEntry.where(id: stream_entry_ids).includes(:status).reject { |e| e.status&.direct_visibility? }
|
||||||
|
|
||||||
return if stream_entry.status&.direct_visibility?
|
return if stream_entries.empty?
|
||||||
|
|
||||||
@account = stream_entry.account
|
@account = stream_entries.first.account
|
||||||
@payload = AtomSerializer.render(AtomSerializer.new.feed(@account, [stream_entry]))
|
@subscriptions = active_subscriptions.to_a
|
||||||
@domains = @account.followers_domains
|
|
||||||
|
|
||||||
Subscription.where(account: @account).active.select('id, callback_url').find_each do |subscription|
|
distribute_public!(stream_entries.reject(&:hidden?))
|
||||||
next if stream_entry.hidden? && !allowed_to_receive?(subscription.callback_url)
|
distribute_hidden!(stream_entries.reject { |s| !s.hidden? })
|
||||||
Pubsubhubbub::DeliveryWorker.perform_async(subscription.id, @payload)
|
|
||||||
end
|
|
||||||
rescue ActiveRecord::RecordNotFound
|
|
||||||
true
|
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
|
def distribute_public!(stream_entries)
|
||||||
|
return if stream_entries.empty?
|
||||||
|
|
||||||
|
@payload = AtomSerializer.render(AtomSerializer.new.feed(@account, stream_entries))
|
||||||
|
|
||||||
|
Pubsubhubbub::DeliveryWorker.push_bulk(@subscriptions) do |subscription|
|
||||||
|
[subscription.id, @payload]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def distribute_hidden!(stream_entries)
|
||||||
|
return if stream_entries.empty?
|
||||||
|
|
||||||
|
@payload = AtomSerializer.render(AtomSerializer.new.feed(@account, stream_entries))
|
||||||
|
@domains = @account.followers_domains
|
||||||
|
|
||||||
|
Pubsubhubbub::DeliveryWorker.push_bulk(@subscriptions.reject { |s| !allowed_to_receive?(s.callback_url) }) do |subscription|
|
||||||
|
[subscription.id, @payload]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def active_subscriptions
|
||||||
|
Subscription.where(account: @account).active.select('id, callback_url')
|
||||||
|
end
|
||||||
|
|
||||||
def allowed_to_receive?(callback_url)
|
def allowed_to_receive?(callback_url)
|
||||||
@domains.include?(Addressable::URI.parse(callback_url).host)
|
@domains.include?(Addressable::URI.parse(callback_url).host)
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
require 'rails_helper'
|
||||||
|
|
||||||
|
RSpec.describe BatchedRemoveStatusService do
|
||||||
|
subject { BatchedRemoveStatusService.new }
|
||||||
|
|
||||||
|
let!(:alice) { Fabricate(:account) }
|
||||||
|
let!(:bob) { Fabricate(:account, username: 'bob', domain: 'example.com', salmon_url: 'http://example.com/salmon') }
|
||||||
|
let!(:jeff) { Fabricate(:account) }
|
||||||
|
|
||||||
|
let(:status1) { PostStatusService.new.call(alice, 'Hello @bob@example.com') }
|
||||||
|
let(:status2) { PostStatusService.new.call(alice, 'Another status') }
|
||||||
|
|
||||||
|
before do
|
||||||
|
allow(Redis.current).to receive_messages(publish: nil)
|
||||||
|
|
||||||
|
stub_request(:post, 'http://example.com/push').to_return(status: 200, body: '', headers: {})
|
||||||
|
stub_request(:post, 'http://example.com/salmon').to_return(status: 200, body: '', headers: {})
|
||||||
|
|
||||||
|
Fabricate(:subscription, account: alice, callback_url: 'http://example.com/push', confirmed: true, expires_at: 30.days.from_now)
|
||||||
|
jeff.follow!(alice)
|
||||||
|
|
||||||
|
status1
|
||||||
|
status2
|
||||||
|
|
||||||
|
subject.call([status1, status2])
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'removes statuses from author\'s home feed' do
|
||||||
|
expect(Feed.new(:home, alice).get(10)).to_not include([status1.id, status2.id])
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'removes statuses from local follower\'s home feed' do
|
||||||
|
expect(Feed.new(:home, jeff).get(10)).to_not include([status1.id, status2.id])
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'notifies streaming API of followers' do
|
||||||
|
expect(Redis.current).to have_received(:publish).with("timeline:#{jeff.id}", any_args).at_least(:once)
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'notifies streaming API of author' do
|
||||||
|
expect(Redis.current).to have_received(:publish).with("timeline:#{alice.id}", any_args).at_least(:once)
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'notifies streaming API of public timeline' do
|
||||||
|
expect(Redis.current).to have_received(:publish).with('timeline:public', any_args).at_least(:once)
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'sends PuSH update to PuSH subscribers with two payloads united' do
|
||||||
|
expect(a_request(:post, 'http://example.com/push').with { |req|
|
||||||
|
matches = req.body.scan(TagManager::VERBS[:delete])
|
||||||
|
matches.size == 2
|
||||||
|
}).to have_been_made
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'sends Salmon slap to previously mentioned users' do
|
||||||
|
expect(a_request(:post, "http://example.com/salmon").with { |req|
|
||||||
|
xml = OStatus2::Salmon.new.unpack(req.body)
|
||||||
|
xml.match(TagManager::VERBS[:delete])
|
||||||
|
}).to have_been_made.once
|
||||||
|
end
|
||||||
|
end
|
|
@ -16,10 +16,9 @@ describe Pubsubhubbub::DistributionWorker do
|
||||||
let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :public) }
|
let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :public) }
|
||||||
|
|
||||||
it 'delivers payload to all subscriptions' do
|
it 'delivers payload to all subscriptions' do
|
||||||
allow(Pubsubhubbub::DeliveryWorker).to receive(:perform_async)
|
allow(Pubsubhubbub::DeliveryWorker).to receive(:push_bulk)
|
||||||
subject.perform(status.stream_entry.id)
|
subject.perform(status.stream_entry.id)
|
||||||
expect(Pubsubhubbub::DeliveryWorker).to have_received(:perform_async).with(subscription_with_follower.id, /.*/)
|
expect(Pubsubhubbub::DeliveryWorker).to have_received(:push_bulk).with([anonymous_subscription, subscription_with_follower])
|
||||||
expect(Pubsubhubbub::DeliveryWorker).to have_received(:perform_async).with(anonymous_subscription.id, /.*/)
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -27,10 +26,10 @@ describe Pubsubhubbub::DistributionWorker do
|
||||||
let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :private) }
|
let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :private) }
|
||||||
|
|
||||||
it 'delivers payload only to subscriptions with followers' do
|
it 'delivers payload only to subscriptions with followers' do
|
||||||
allow(Pubsubhubbub::DeliveryWorker).to receive(:perform_async)
|
allow(Pubsubhubbub::DeliveryWorker).to receive(:push_bulk)
|
||||||
subject.perform(status.stream_entry.id)
|
subject.perform(status.stream_entry.id)
|
||||||
expect(Pubsubhubbub::DeliveryWorker).to have_received(:perform_async).with(subscription_with_follower.id, /.*/)
|
expect(Pubsubhubbub::DeliveryWorker).to have_received(:push_bulk).with([subscription_with_follower])
|
||||||
expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:perform_async).with(anonymous_subscription.id, /.*/)
|
expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:push_bulk).with([anonymous_subscription])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -38,9 +37,9 @@ describe Pubsubhubbub::DistributionWorker do
|
||||||
let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :direct) }
|
let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :direct) }
|
||||||
|
|
||||||
it 'does not deliver payload' do
|
it 'does not deliver payload' do
|
||||||
allow(Pubsubhubbub::DeliveryWorker).to receive(:perform_async)
|
allow(Pubsubhubbub::DeliveryWorker).to receive(:push_bulk)
|
||||||
subject.perform(status.stream_entry.id)
|
subject.perform(status.stream_entry.id)
|
||||||
expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:perform_async)
|
expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:push_bulk)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue