From 11932bbce21fc2f8340d35f04ca992a4c0966e5b Mon Sep 17 00:00:00 2001 From: Maksym Pavlenko Date: Wed, 29 May 2019 15:10:39 -0700 Subject: [PATCH] Rework playlist updates --- cmd/updater/main.py | 29 ++++++++++++++++++++++++----- cmd/updater/updater.py | 23 +++++++++++++++++------ pkg/feeds/feeds.go | 22 ++++++++-------------- pkg/queue/sender.go | 17 ++++++++++------- 4 files changed, 59 insertions(+), 32 deletions(-) diff --git a/cmd/updater/main.py b/cmd/updater/main.py index ebb490c..c909704 100644 --- a/cmd/updater/main.py +++ b/cmd/updater/main.py @@ -16,6 +16,18 @@ print('Using DynamoDB table: {}'.format(feeds_table_name)) feeds_table = dynamodb.Table(feeds_table_name) +def _unique(episodes): + unique = set() + output = [] + for item in episodes: + video_id = item['ID'] + if video_id in unique: + continue + unique.add(video_id) + output.append(item) + return output + + def _update(item): # Unpack fields @@ -24,20 +36,21 @@ def _update(item): last_id = item['last_id'] start = int(item['start']) count = int(item['count']) + link_type = item.get('link_type') fmt = item.get('format', 'video') quality = item.get('quality', 'high') ytdl_fmt = updater.get_format(fmt, quality) # Invoke youtube-dl and pull updates - print('Updating feed {} (last id: {}, start: {}, count: {}, fmt: {})'.format( - feed_id, last_id, start, count, ytdl_fmt)) - _, new_episodes, new_last_id = updater.get_updates(start, count, url, ytdl_fmt, last_id) + print('Updating feed {} (last id: {}, start: {}, count: {}, fmt: {}, type: {})'.format( + feed_id, last_id, start, count, ytdl_fmt, link_type)) + _, new_episodes, new_last_id = updater.get_updates(start, count, url, ytdl_fmt, last_id, link_type) if new_last_id is None: # Sometimes youtube-dl fails to pull updates print('! New last id is None, retrying...') - _, new_episodes, new_last_id = updater.get_updates(start, count, url, ytdl_fmt, last_id) + _, new_episodes, new_last_id = updater.get_updates(start, count, url, ytdl_fmt, last_id, link_type) if new_last_id == last_id: print('No updates found for {}'.format(feed_id)) @@ -53,6 +66,7 @@ def _update(item): ExpressionAttributeNames={'#D': 'EpisodesData'} ) + is_playlist = link_type == 'playlist' old_episodes = [] resp_item = resp['Item'] raw = resp_item.get('EpisodesData') @@ -61,7 +75,12 @@ def _update(item): old_content = gzip.decompress(raw.value).decode('utf-8') # Decompress from gzip old_episodes = json.loads(old_content) # Deserialize from string to json - episodes = new_episodes + old_episodes # Prepand new episodes to the list + if is_playlist: + episodes = old_episodes + new_episodes # Playlist items are added to the end of list + episodes = _unique(episodes) + else: + episodes = new_episodes + old_episodes # Otherwise prepand the new episodes + if len(episodes) > count: del episodes[count:] # Truncate list diff --git a/cmd/updater/updater.py b/cmd/updater/updater.py index 308a34e..a276c63 100644 --- a/cmd/updater/updater.py +++ b/cmd/updater/updater.py @@ -20,7 +20,7 @@ def get_format(fmt, quality): return 'worstaudio' -def get_updates(start, count, url, fmt, last_id=None): +def get_updates(start, count, url, fmt, last_id=None, link_type=None): if start < 1: raise ValueError('Invalid start value') @@ -55,15 +55,23 @@ def get_updates(start, count, url, fmt, last_id=None): videos = [] new_last_id = None - + is_playlist = link_type == 'playlist' entries = feed_info['entries'] + + if not len(entries): + # No episodes + return feed, videos, new_last_id + + if is_playlist: + # Playlist items are added to the end, so compare 'last_id' by the last episode instead of the first one + entries.reverse() + + # Remember new last id + new_last_id = entries[0]['id'] + for idx, entry in enumerate(entries): video_id = entry['id'] - # Remember new last id - if idx == 0: - new_last_id = video_id - # If already seen this video previously, stop pulling updates if last_id and video_id == last_id: break @@ -90,6 +98,9 @@ def get_updates(start, count, url, fmt, last_id=None): 'Size': size, }) + if is_playlist: + videos.reverse() + return feed, videos, new_last_id diff --git a/pkg/feeds/feeds.go b/pkg/feeds/feeds.go index 478ac05..a0c7bd8 100644 --- a/pkg/feeds/feeds.go +++ b/pkg/feeds/feeds.go @@ -153,20 +153,14 @@ func (s *Service) BuildFeed(hashID string) ([]byte, error) { // Submit to SQS for background update item := &queue.Item{ - ID: feed.HashID, - URL: feed.ItemURL, - Start: 1, - Count: feed.PageSize, - LastID: feed.LastID, - Format: string(feed.Format), - Quality: string(feed.Quality), - } - - if feed.LinkType == api.LinkTypePlaylist { - // Playlist is a special case. Last ID tracks a latest episode on a channel, - // it appears as the first one in the list. New playlist items are added to - // the end of list, so sync with last seen ID doesn't work here as expected. - item.LastID = "" + ID: feed.HashID, + URL: feed.ItemURL, + Start: 1, + Count: feed.PageSize, + LastID: feed.LastID, + LinkType: feed.LinkType, + Format: string(feed.Format), + Quality: string(feed.Quality), } s.sender.Add(item) diff --git a/pkg/queue/sender.go b/pkg/queue/sender.go index 3ca5bcf..6c916e4 100644 --- a/pkg/queue/sender.go +++ b/pkg/queue/sender.go @@ -10,6 +10,8 @@ import ( "github.com/aws/aws-sdk-go/service/sqs" "github.com/pkg/errors" log "github.com/sirupsen/logrus" + + "github.com/mxpv/podsync/pkg/api" ) var ( @@ -23,13 +25,14 @@ const ( ) type Item struct { - ID string `json:"id"` - URL string `json:"url"` - Start int `json:"start"` - Count int `json:"count"` - LastID string `json:"last_id"` - Format string `json:"format"` - Quality string `json:"quality"` + ID string `json:"id"` + URL string `json:"url"` + Start int `json:"start"` + Count int `json:"count"` + LastID string `json:"last_id"` + LinkType api.LinkType `json:"link_type"` + Format string `json:"format"` + Quality string `json:"quality"` } type Sender struct {