Fix some link previews being incorrectly generated from other prior links (#16885)

* Add tests

* Fix some link previews being incorrectly generated from different prior links

PR #12403 added a cache to avoid redundant queries when the OEmbed endpoint can
be guessed from the URL. This caching mechanism is not perfectly correct as
there is no guarantee that all pages from a given domain share the same
OEmbed provider endpoint.

This PR prevents the FetchOEmbedService from caching OEmbed endpoint that
cannot be generalized by replacing a fully-qualified URL from the endpoint's
parameters, greatly reducing the number of incorrect cached generalizations.
This commit is contained in:
Claire 2021-10-21 20:39:35 +02:00 committed by Eugen Rochko
parent e63370db19
commit 123a88b6b5
3 changed files with 52 additions and 1 deletions

View File

@ -2,6 +2,7 @@
class FetchOEmbedService class FetchOEmbedService
ENDPOINT_CACHE_EXPIRES_IN = 24.hours.freeze ENDPOINT_CACHE_EXPIRES_IN = 24.hours.freeze
URL_REGEX = /(=(http[s]?(%3A|:)(\/\/|%2F%2F)))([^&]*)/i.freeze
attr_reader :url, :options, :format, :endpoint_url attr_reader :url, :options, :format, :endpoint_url
@ -65,10 +66,12 @@ class FetchOEmbedService
end end
def cache_endpoint! def cache_endpoint!
return unless URL_REGEX.match?(@endpoint_url)
url_domain = Addressable::URI.parse(@url).normalized_host url_domain = Addressable::URI.parse(@url).normalized_host
endpoint_hash = { endpoint_hash = {
endpoint: @endpoint_url.gsub(/(=(http[s]?(%3A|:)(\/\/|%2F%2F)))([^&]*)/i, '={url}'), endpoint: @endpoint_url.gsub(URL_REGEX, '={url}'),
format: @format, format: @format,
} }

View File

@ -0,0 +1,7 @@
<!DOCTYPE html>
<html>
<head>
<link rel="alternate" type="application/json+oembed" href="https://www.youtube.com/oembed?format=json&amp;url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DIPSbNdBmWKE" title="What is Mastodon?">
</head>
<body></body>
</html>

View File

@ -13,6 +13,32 @@ describe FetchOEmbedService, type: :service do
describe 'discover_provider' do describe 'discover_provider' do
context 'when status code is 200 and MIME type is text/html' do context 'when status code is 200 and MIME type is text/html' do
context 'when OEmbed endpoint contains URL as parameter' do
before do
stub_request(:get, 'https://www.youtube.com/watch?v=IPSbNdBmWKE').to_return(
status: 200,
headers: { 'Content-Type': 'text/html' },
body: request_fixture('oembed_youtube.html'),
)
stub_request(:get, 'https://www.youtube.com/oembed?format=json&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DIPSbNdBmWKE').to_return(
status: 200,
headers: { 'Content-Type': 'text/html' },
body: request_fixture('oembed_json_empty.html')
)
end
it 'returns new OEmbed::Provider for JSON provider' do
subject.call('https://www.youtube.com/watch?v=IPSbNdBmWKE')
expect(subject.endpoint_url).to eq 'https://www.youtube.com/oembed?format=json&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DIPSbNdBmWKE'
expect(subject.format).to eq :json
end
it 'stores URL template' do
subject.call('https://www.youtube.com/watch?v=IPSbNdBmWKE')
expect(Rails.cache.read('oembed_endpoint:www.youtube.com')[:endpoint]).to eq 'https://www.youtube.com/oembed?format=json&url={url}'
end
end
context 'Both of JSON and XML provider are discoverable' do context 'Both of JSON and XML provider are discoverable' do
before do before do
stub_request(:get, 'https://host.test/oembed.html').to_return( stub_request(:get, 'https://host.test/oembed.html').to_return(
@ -33,6 +59,11 @@ describe FetchOEmbedService, type: :service do
expect(subject.endpoint_url).to eq 'https://host.test/provider.xml' expect(subject.endpoint_url).to eq 'https://host.test/provider.xml'
expect(subject.format).to eq :xml expect(subject.format).to eq :xml
end end
it 'does not cache OEmbed endpoint' do
subject.call('https://host.test/oembed.html', format: :xml)
expect(Rails.cache.exist?('oembed_endpoint:host.test')).to eq false
end
end end
context 'JSON provider is discoverable while XML provider is not' do context 'JSON provider is discoverable while XML provider is not' do
@ -49,6 +80,11 @@ describe FetchOEmbedService, type: :service do
expect(subject.endpoint_url).to eq 'https://host.test/provider.json' expect(subject.endpoint_url).to eq 'https://host.test/provider.json'
expect(subject.format).to eq :json expect(subject.format).to eq :json
end end
it 'does not cache OEmbed endpoint' do
subject.call('https://host.test/oembed.html')
expect(Rails.cache.exist?('oembed_endpoint:host.test')).to eq false
end
end end
context 'XML provider is discoverable while JSON provider is not' do context 'XML provider is discoverable while JSON provider is not' do
@ -65,6 +101,11 @@ describe FetchOEmbedService, type: :service do
expect(subject.endpoint_url).to eq 'https://host.test/provider.xml' expect(subject.endpoint_url).to eq 'https://host.test/provider.xml'
expect(subject.format).to eq :xml expect(subject.format).to eq :xml
end end
it 'does not cache OEmbed endpoint' do
subject.call('https://host.test/oembed.html')
expect(Rails.cache.exist?('oembed_endpoint:host.test')).to eq false
end
end end
context 'Invalid XML provider is discoverable while JSON provider is not' do context 'Invalid XML provider is discoverable while JSON provider is not' do