forked from cybrespace/mastodon
Improved remote thread fetching (#10106)
* Fetch up to 5 replies when discovering a new remote status This is used for resolving threads downwards. The originating server must add a “replies” attributes with such replies for it to be useful. * Add some tests for ActivityPub::FetchRepliesWorker * Add specs for ActivityPub::FetchRepliesService * Serialize up to 5 public self-replies for ActivityPub notes * Add specs for ActivityPub::NoteSerializer * Move exponential backoff logic to a worker concern * Fetch first page of paginated collections when fetching thread replies * Add specs for paginated collections in replies * Move Note replies serialization to a first CollectionPage The collection isn't actually paginable yet as it has no id nor a `next` field. This may come in another PR. * Use pluck(:uri) instead of map(&:uri) to improve performances * Fix fetching replies when they are in a CollectionPage
This commit is contained in:
parent
6e8743d17a
commit
9d3c6f1849
|
@ -40,6 +40,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
|
||||||
end
|
end
|
||||||
|
|
||||||
resolve_thread(@status)
|
resolve_thread(@status)
|
||||||
|
fetch_replies(@status)
|
||||||
distribute(@status)
|
distribute(@status)
|
||||||
forward_for_reply if @status.public_visibility? || @status.unlisted_visibility?
|
forward_for_reply if @status.public_visibility? || @status.unlisted_visibility?
|
||||||
end
|
end
|
||||||
|
@ -213,6 +214,15 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
|
||||||
ThreadResolveWorker.perform_async(status.id, in_reply_to_uri)
|
ThreadResolveWorker.perform_async(status.id, in_reply_to_uri)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def fetch_replies(status)
|
||||||
|
collection = @object['replies']
|
||||||
|
return if collection.nil?
|
||||||
|
replies = ActivityPub::FetchRepliesService.new.call(status, collection, false)
|
||||||
|
return if replies.present?
|
||||||
|
uri = value_or_id(collection)
|
||||||
|
ActivityPub::FetchRepliesWorker.perform_async(status.id, uri) unless uri.nil?
|
||||||
|
end
|
||||||
|
|
||||||
def conversation_from_uri(uri)
|
def conversation_from_uri(uri)
|
||||||
return nil if uri.nil?
|
return nil if uri.nil?
|
||||||
return Conversation.find_by(id: OStatus::TagManager.instance.unique_tag_to_local_id(uri, 'Conversation')) if OStatus::TagManager.instance.local_id?(uri)
|
return Conversation.find_by(id: OStatus::TagManager.instance.unique_tag_to_local_id(uri, 'Conversation')) if OStatus::TagManager.instance.local_id?(uri)
|
||||||
|
|
|
@ -11,6 +11,10 @@ module StatusThreadingConcern
|
||||||
find_statuses_from_tree_path(descendant_ids(limit, max_child_id, since_child_id, depth), account, promote: true)
|
find_statuses_from_tree_path(descendant_ids(limit, max_child_id, since_child_id, depth), account, promote: true)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self_replies(limit)
|
||||||
|
account.statuses.where(in_reply_to_id: id, visibility: [:public, :unlisted]).reorder(id: :asc).limit(limit)
|
||||||
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def ancestor_ids(limit)
|
def ancestor_ids(limit)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
class ActivityPub::CollectionPresenter < ActiveModelSerializers::Model
|
class ActivityPub::CollectionPresenter < ActiveModelSerializers::Model
|
||||||
attributes :id, :type, :size, :items, :part_of, :first, :last, :next, :prev
|
attributes :id, :type, :size, :items, :page, :part_of, :first, :last, :next, :prev
|
||||||
end
|
end
|
||||||
|
|
|
@ -7,7 +7,8 @@ class ActivityPub::CollectionSerializer < ActiveModel::Serializer
|
||||||
super
|
super
|
||||||
end
|
end
|
||||||
|
|
||||||
attributes :id, :type
|
attribute :id, if: -> { object.id.present? }
|
||||||
|
attribute :type
|
||||||
attribute :total_items, if: -> { object.size.present? }
|
attribute :total_items, if: -> { object.size.present? }
|
||||||
attribute :next, if: -> { object.next.present? }
|
attribute :next, if: -> { object.next.present? }
|
||||||
attribute :prev, if: -> { object.prev.present? }
|
attribute :prev, if: -> { object.prev.present? }
|
||||||
|
@ -37,6 +38,6 @@ class ActivityPub::CollectionSerializer < ActiveModel::Serializer
|
||||||
end
|
end
|
||||||
|
|
||||||
def page?
|
def page?
|
||||||
object.part_of.present?
|
object.part_of.present? || object.page.present?
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -13,6 +13,8 @@ class ActivityPub::NoteSerializer < ActiveModel::Serializer
|
||||||
has_many :media_attachments, key: :attachment
|
has_many :media_attachments, key: :attachment
|
||||||
has_many :virtual_tags, key: :tag
|
has_many :virtual_tags, key: :tag
|
||||||
|
|
||||||
|
has_one :replies, serializer: ActivityPub::CollectionSerializer
|
||||||
|
|
||||||
def id
|
def id
|
||||||
ActivityPub::TagManager.instance.uri_for(object)
|
ActivityPub::TagManager.instance.uri_for(object)
|
||||||
end
|
end
|
||||||
|
@ -33,6 +35,17 @@ class ActivityPub::NoteSerializer < ActiveModel::Serializer
|
||||||
{ object.language => Formatter.instance.format(object) }
|
{ object.language => Formatter.instance.format(object) }
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def replies
|
||||||
|
ActivityPub::CollectionPresenter.new(
|
||||||
|
type: :unordered,
|
||||||
|
first: ActivityPub::CollectionPresenter.new(
|
||||||
|
type: :unordered,
|
||||||
|
page: true,
|
||||||
|
items: object.self_replies(5).pluck(:uri)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
def language?
|
def language?
|
||||||
object.language.present?
|
object.language.present?
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
class ActivityPub::FetchRepliesService < BaseService
|
||||||
|
include JsonLdHelper
|
||||||
|
|
||||||
|
def call(parent_status, collection_or_uri, allow_synchronous_requests = true)
|
||||||
|
@account = parent_status.account
|
||||||
|
@allow_synchronous_requests = allow_synchronous_requests
|
||||||
|
|
||||||
|
@items = collection_items(collection_or_uri)
|
||||||
|
return if @items.nil?
|
||||||
|
|
||||||
|
FetchReplyWorker.push_bulk(filtered_replies)
|
||||||
|
|
||||||
|
@items
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def collection_items(collection_or_uri)
|
||||||
|
collection = fetch_collection(collection_or_uri)
|
||||||
|
return unless collection.is_a?(Hash)
|
||||||
|
|
||||||
|
collection = fetch_collection(collection['first']) if collection['first'].present?
|
||||||
|
return unless collection.is_a?(Hash)
|
||||||
|
|
||||||
|
case collection['type']
|
||||||
|
when 'Collection', 'CollectionPage'
|
||||||
|
collection['items']
|
||||||
|
when 'OrderedCollection', 'OrderedCollectionPage'
|
||||||
|
collection['orderedItems']
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def fetch_collection(collection_or_uri)
|
||||||
|
return collection_or_uri if collection_or_uri.is_a?(Hash)
|
||||||
|
return unless @allow_synchronous_requests
|
||||||
|
return if invalid_origin?(collection_or_uri)
|
||||||
|
collection = fetch_resource_without_id_validation(collection_or_uri)
|
||||||
|
raise Mastodon::UnexpectedResponseError if collection.nil?
|
||||||
|
collection
|
||||||
|
end
|
||||||
|
|
||||||
|
def filtered_replies
|
||||||
|
# Only fetch replies to the same server as the original status to avoid
|
||||||
|
# amplification attacks.
|
||||||
|
|
||||||
|
# Also limit to 5 fetched replies to limit potential for DoS.
|
||||||
|
@items.map { |item| value_or_id(item) }.reject { |uri| invalid_origin?(uri) }.take(5)
|
||||||
|
end
|
||||||
|
|
||||||
|
def invalid_origin?(url)
|
||||||
|
return true if unsupported_uri_scheme?(url)
|
||||||
|
|
||||||
|
needle = Addressable::URI.parse(url).host
|
||||||
|
haystack = Addressable::URI.parse(@account.uri).host
|
||||||
|
|
||||||
|
!haystack.casecmp(needle).zero?
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,12 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
class ActivityPub::FetchRepliesWorker
|
||||||
|
include Sidekiq::Worker
|
||||||
|
include ExponentialBackoff
|
||||||
|
|
||||||
|
sidekiq_options queue: 'pull', retry: 3
|
||||||
|
|
||||||
|
def perform(parent_status_id, replies_uri)
|
||||||
|
ActivityPub::FetchRepliesService.new.call(Status.find(parent_status_id), replies_uri)
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,11 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
module ExponentialBackoff
|
||||||
|
extend ActiveSupport::Concern
|
||||||
|
|
||||||
|
included do
|
||||||
|
sidekiq_retry_in do |count|
|
||||||
|
15 + 10 * (count**4) + rand(10 * (count**4))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,12 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
class FetchReplyWorker
|
||||||
|
include Sidekiq::Worker
|
||||||
|
include ExponentialBackoff
|
||||||
|
|
||||||
|
sidekiq_options queue: 'pull', retry: 3
|
||||||
|
|
||||||
|
def perform(child_url)
|
||||||
|
FetchRemoteStatusService.new.call(child_url)
|
||||||
|
end
|
||||||
|
end
|
|
@ -2,13 +2,10 @@
|
||||||
|
|
||||||
class ThreadResolveWorker
|
class ThreadResolveWorker
|
||||||
include Sidekiq::Worker
|
include Sidekiq::Worker
|
||||||
|
include ExponentialBackoff
|
||||||
|
|
||||||
sidekiq_options queue: 'pull', retry: 3
|
sidekiq_options queue: 'pull', retry: 3
|
||||||
|
|
||||||
sidekiq_retry_in do |count|
|
|
||||||
15 + 10 * (count**4) + rand(10 * (count**4))
|
|
||||||
end
|
|
||||||
|
|
||||||
def perform(child_status_id, parent_url)
|
def perform(child_status_id, parent_url)
|
||||||
child_status = Status.find(child_status_id)
|
child_status = Status.find(child_status_id)
|
||||||
parent_status = FetchRemoteStatusService.new.call(parent_url)
|
parent_status = FetchRemoteStatusService.new.call(parent_url)
|
||||||
|
|
|
@ -0,0 +1,44 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require 'rails_helper'
|
||||||
|
|
||||||
|
describe ActivityPub::NoteSerializer do
|
||||||
|
let!(:account) { Fabricate(:account) }
|
||||||
|
let!(:other) { Fabricate(:account) }
|
||||||
|
let!(:parent) { Fabricate(:status, account: account, visibility: :public) }
|
||||||
|
let!(:reply1) { Fabricate(:status, account: account, thread: parent, visibility: :public) }
|
||||||
|
let!(:reply2) { Fabricate(:status, account: account, thread: parent, visibility: :public) }
|
||||||
|
let!(:reply3) { Fabricate(:status, account: other, thread: parent, visibility: :public) }
|
||||||
|
let!(:reply4) { Fabricate(:status, account: account, thread: parent, visibility: :public) }
|
||||||
|
let!(:reply5) { Fabricate(:status, account: account, thread: parent, visibility: :direct) }
|
||||||
|
|
||||||
|
before(:each) do
|
||||||
|
@serialization = ActiveModelSerializers::SerializableResource.new(parent, serializer: ActivityPub::NoteSerializer, adapter: ActivityPub::Adapter)
|
||||||
|
end
|
||||||
|
|
||||||
|
subject { JSON.parse(@serialization.to_json) }
|
||||||
|
|
||||||
|
it 'has a Note type' do
|
||||||
|
expect(subject['type']).to eql('Note')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'has a replies collection' do
|
||||||
|
expect(subject['replies']['type']).to eql('Collection')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'has a replies collection with a first Page' do
|
||||||
|
expect(subject['replies']['first']['type']).to eql('CollectionPage')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'includes public self-replies in its replies collection' do
|
||||||
|
expect(subject['replies']['first']['items']).to include(reply1.uri, reply2.uri, reply4.uri)
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'does not include replies from others in its replies collection' do
|
||||||
|
expect(subject['replies']['first']['items']).to_not include(reply3.uri)
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'does not include replies with direct visibility in its replies collection' do
|
||||||
|
expect(subject['replies']['first']['items']).to_not include(reply5.uri)
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,122 @@
|
||||||
|
require 'rails_helper'
|
||||||
|
|
||||||
|
RSpec.describe ActivityPub::FetchRepliesService, type: :service do
|
||||||
|
let(:actor) { Fabricate(:account, domain: 'example.com', uri: 'http://example.com/account') }
|
||||||
|
let(:status) { Fabricate(:status, account: actor) }
|
||||||
|
let(:collection_uri) { 'http://example.com/replies/1' }
|
||||||
|
|
||||||
|
let(:items) do
|
||||||
|
[
|
||||||
|
'http://example.com/self-reply-1',
|
||||||
|
'http://example.com/self-reply-2',
|
||||||
|
'http://example.com/self-reply-3',
|
||||||
|
'http://other.com/other-reply-1',
|
||||||
|
'http://other.com/other-reply-2',
|
||||||
|
'http://other.com/other-reply-3',
|
||||||
|
'http://example.com/self-reply-4',
|
||||||
|
'http://example.com/self-reply-5',
|
||||||
|
'http://example.com/self-reply-6',
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
let(:payload) do
|
||||||
|
{
|
||||||
|
'@context': 'https://www.w3.org/ns/activitystreams',
|
||||||
|
type: 'Collection',
|
||||||
|
id: collection_uri,
|
||||||
|
items: items,
|
||||||
|
}.with_indifferent_access
|
||||||
|
end
|
||||||
|
|
||||||
|
subject { described_class.new }
|
||||||
|
|
||||||
|
describe '#call' do
|
||||||
|
context 'when the payload is a Collection with inlined replies' do
|
||||||
|
context 'when passing the collection itself' do
|
||||||
|
it 'spawns workers for up to 5 replies on the same server' do
|
||||||
|
allow(FetchReplyWorker).to receive(:push_bulk)
|
||||||
|
subject.call(status, payload)
|
||||||
|
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when passing the URL to the collection' do
|
||||||
|
before do
|
||||||
|
stub_request(:get, collection_uri).to_return(status: 200, body: Oj.dump(payload))
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'spawns workers for up to 5 replies on the same server' do
|
||||||
|
allow(FetchReplyWorker).to receive(:push_bulk)
|
||||||
|
subject.call(status, collection_uri)
|
||||||
|
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when the payload is an OrderedCollection with inlined replies' do
|
||||||
|
let(:payload) do
|
||||||
|
{
|
||||||
|
'@context': 'https://www.w3.org/ns/activitystreams',
|
||||||
|
type: 'OrderedCollection',
|
||||||
|
id: collection_uri,
|
||||||
|
orderedItems: items,
|
||||||
|
}.with_indifferent_access
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when passing the collection itself' do
|
||||||
|
it 'spawns workers for up to 5 replies on the same server' do
|
||||||
|
allow(FetchReplyWorker).to receive(:push_bulk)
|
||||||
|
subject.call(status, payload)
|
||||||
|
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when passing the URL to the collection' do
|
||||||
|
before do
|
||||||
|
stub_request(:get, collection_uri).to_return(status: 200, body: Oj.dump(payload))
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'spawns workers for up to 5 replies on the same server' do
|
||||||
|
allow(FetchReplyWorker).to receive(:push_bulk)
|
||||||
|
subject.call(status, collection_uri)
|
||||||
|
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when the payload is a paginated Collection with inlined replies' do
|
||||||
|
let(:payload) do
|
||||||
|
{
|
||||||
|
'@context': 'https://www.w3.org/ns/activitystreams',
|
||||||
|
type: 'Collection',
|
||||||
|
id: collection_uri,
|
||||||
|
first: {
|
||||||
|
type: 'CollectionPage',
|
||||||
|
partOf: collection_uri,
|
||||||
|
items: items,
|
||||||
|
}
|
||||||
|
}.with_indifferent_access
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when passing the collection itself' do
|
||||||
|
it 'spawns workers for up to 5 replies on the same server' do
|
||||||
|
allow(FetchReplyWorker).to receive(:push_bulk)
|
||||||
|
subject.call(status, payload)
|
||||||
|
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when passing the URL to the collection' do
|
||||||
|
before do
|
||||||
|
stub_request(:get, collection_uri).to_return(status: 200, body: Oj.dump(payload))
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'spawns workers for up to 5 replies on the same server' do
|
||||||
|
allow(FetchReplyWorker).to receive(:push_bulk)
|
||||||
|
subject.call(status, collection_uri)
|
||||||
|
expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5'])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,40 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require 'rails_helper'
|
||||||
|
|
||||||
|
describe ActivityPub::FetchRepliesWorker do
|
||||||
|
subject { described_class.new }
|
||||||
|
|
||||||
|
let(:account) { Fabricate(:account, uri: 'https://example.com/user/1') }
|
||||||
|
let(:status) { Fabricate(:status, account: account) }
|
||||||
|
|
||||||
|
let(:payload) do
|
||||||
|
{
|
||||||
|
'@context': 'https://www.w3.org/ns/activitystreams',
|
||||||
|
id: 'https://example.com/statuses_replies/1',
|
||||||
|
type: 'Collection',
|
||||||
|
items: [],
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
let(:json) { Oj.dump(payload) }
|
||||||
|
|
||||||
|
describe 'perform' do
|
||||||
|
it 'performs a request if the collection URI is from the same host' do
|
||||||
|
stub_request(:get, 'https://example.com/statuses_replies/1').to_return(status: 200, body: json)
|
||||||
|
subject.perform(status.id, 'https://example.com/statuses_replies/1')
|
||||||
|
expect(a_request(:get, 'https://example.com/statuses_replies/1')).to have_been_made.once
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'does not perform a request if the collection URI is from a different host' do
|
||||||
|
stub_request(:get, 'https://other.com/statuses_replies/1').to_return(status: 200)
|
||||||
|
subject.perform(status.id, 'https://other.com/statuses_replies/1')
|
||||||
|
expect(a_request(:get, 'https://other.com/statuses_replies/1')).to_not have_been_made
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'raises when request fails' do
|
||||||
|
stub_request(:get, 'https://example.com/statuses_replies/1').to_return(status: 500)
|
||||||
|
expect { subject.perform(status.id, 'https://example.com/statuses_replies/1') }.to raise_error Mastodon::UnexpectedResponseError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in New Issue