mirror of
https://github.com/mxpv/podsync.git
synced 2024-05-11 05:55:04 +00:00
224 lines
6.2 KiB
Python
224 lines
6.2 KiB
Python
import youtube_dl
|
|
import boto3
|
|
import os
|
|
import time
|
|
from datetime import datetime
|
|
|
|
BEST_FORMAT = "bestvideo+bestaudio/best"
|
|
DEFAULT_PAGE_SIZE = 50
|
|
|
|
dynamodb = boto3.resource('dynamodb')
|
|
|
|
feeds_table = dynamodb.Table(os.getenv('UPDATER_DYNAMO_FEEDS_TABLE', 'Feeds'))
|
|
|
|
|
|
def handler(event, context):
|
|
url = event.get('url', None)
|
|
if not url:
|
|
raise ValueError('Invalid resource URL %s' % url)
|
|
|
|
start = event.get('start', 1)
|
|
count = event.get('count', DEFAULT_PAGE_SIZE)
|
|
|
|
kind = event.get('kind', 'video_high')
|
|
last_id = event.get('last_id', None)
|
|
|
|
print('Getting updated for %s (start=%d, count=%d, kind: %s, last id: %s)' % (url, start, count, kind, last_id))
|
|
return _get_updates(start, count, url, kind, last_id)
|
|
|
|
|
|
def _update_feed(hash_id):
|
|
print('Updating feed {}'.format(hash_id))
|
|
feed = _query_feed(hash_id)
|
|
|
|
page_size = int(feed.get('PageSize', DEFAULT_PAGE_SIZE))
|
|
last_id = feed.get('LastID', None)
|
|
episodes = feed.get('Episodes', [])
|
|
item_url = feed['ItemURL']
|
|
|
|
# Rebuild episode list from scratch
|
|
if not last_id:
|
|
episodes = []
|
|
|
|
start = time.time()
|
|
_, items, new_last_id = _get_updates(1, page_size, item_url, _get_format(feed), last_id)
|
|
end = time.time()
|
|
|
|
print('Got feed update: new {}, current {}. Update took: {}'.format(len(items), len(episodes), end-start))
|
|
|
|
# Update feed and submit back to Dynamo
|
|
|
|
unix_time = int(datetime.utcnow().timestamp())
|
|
feed['UpdatedAt'] = unix_time
|
|
|
|
if len(items) > 0:
|
|
episodes = items + episodes # Prepand new episodes
|
|
del episodes[page_size:] # Truncate list
|
|
feed['Episodes'] = episodes
|
|
|
|
# Update last seen video ID
|
|
feed['LastID'] = new_last_id
|
|
|
|
_update_feed_episodes(hash_id, feed)
|
|
else:
|
|
# Update last access field only
|
|
_update_feed_updated_at(hash_id, unix_time)
|
|
|
|
|
|
def _query_feed(hash_id):
|
|
response = feeds_table.get_item(
|
|
Key={'HashID': hash_id},
|
|
ProjectionExpression='#prov,#type,#size,#fmt,#quality,#level,#id,#last_id,#episodes,#updated_at,#item_url',
|
|
ExpressionAttributeNames={
|
|
'#prov': 'Provider',
|
|
'#type': 'LinkType',
|
|
'#size': 'PageSize',
|
|
'#fmt': 'Format',
|
|
'#quality': 'Quality',
|
|
'#level': 'FeatureLevel',
|
|
'#id': 'ItemID',
|
|
'#last_id': 'LastID',
|
|
'#episodes': 'Episodes',
|
|
'#updated_at': 'UpdatedAt',
|
|
'#item_url': 'ItemURL',
|
|
},
|
|
)
|
|
|
|
item = response['Item']
|
|
return item
|
|
|
|
|
|
def _update_feed_episodes(hash_id, feed):
|
|
feeds_table.update_item(
|
|
Key={
|
|
'HashID': hash_id,
|
|
},
|
|
UpdateExpression='SET #updated_at = :updated_at, #episodes = :episodes, #last_id = :last_id',
|
|
ExpressionAttributeNames={
|
|
'#updated_at': 'UpdatedAt',
|
|
'#episodes': 'Episodes',
|
|
'#last_id': 'LastID',
|
|
},
|
|
ExpressionAttributeValues={
|
|
':updated_at': feed['UpdatedAt'],
|
|
':episodes': feed['Episodes'],
|
|
':last_id': feed['LastID'],
|
|
},
|
|
ReturnValues='NONE',
|
|
)
|
|
|
|
|
|
def _update_feed_updated_at(hash_id, updated_at):
|
|
feeds_table.update_item(
|
|
Key={
|
|
'HashID': hash_id,
|
|
},
|
|
UpdateExpression='SET #updated_at = :updated_at',
|
|
ExpressionAttributeNames={
|
|
'#updated_at': 'UpdatedAt',
|
|
},
|
|
ExpressionAttributeValues={
|
|
':updated_at': updated_at,
|
|
},
|
|
ReturnValues='NONE',
|
|
)
|
|
|
|
|
|
def _get_format(feed):
|
|
fmt = feed.get('Format', 'video')
|
|
quality = feed.get('Quality', 'high')
|
|
|
|
if fmt == 'video':
|
|
# Video
|
|
if quality == 'high':
|
|
return 'best[ext=mp4]'
|
|
else:
|
|
return 'worst[ext=mp4]'
|
|
else:
|
|
# Audio
|
|
if quality == 'high':
|
|
return 'bestaudio'
|
|
else:
|
|
return 'worstaudio'
|
|
|
|
|
|
def _get_updates(start, count, url, fmt, last_id=None):
|
|
if start < 1:
|
|
raise ValueError('Invalid start value')
|
|
|
|
if count < 1 or count > 600:
|
|
raise ValueError('Invalid count value')
|
|
|
|
end = start + count - 1
|
|
|
|
opts = {
|
|
'playliststart': start,
|
|
'playlistend': end,
|
|
'extract_flat': 'in_playlist',
|
|
'quiet': True,
|
|
'no_warnings': True,
|
|
'simulate': True,
|
|
'skip_download': True,
|
|
}
|
|
|
|
with youtube_dl.YoutubeDL(opts) as ytdl:
|
|
selector = ytdl.build_format_selector(fmt)
|
|
feed_info = ytdl.extract_info(url, download=False)
|
|
|
|
# Record basic feed metadata
|
|
feed = {
|
|
'ID': feed_info.get('id'),
|
|
'Title': feed_info.get('uploader'),
|
|
'PageURL': feed_info.get('webpage_url'),
|
|
}
|
|
|
|
videos = []
|
|
new_last_id = None
|
|
|
|
entries = feed_info['entries']
|
|
for idx, entry in enumerate(entries):
|
|
video_id = entry['id']
|
|
|
|
# If already seen this video previously, stop pulling updates
|
|
if last_id and video_id == last_id:
|
|
break
|
|
|
|
# Remember new last id
|
|
if idx == 0:
|
|
new_last_id = video_id
|
|
|
|
# Query video metadata from YouTube
|
|
result = ytdl.process_ie_result(entry, download=False)
|
|
|
|
# Convert '20190101' to unix time
|
|
date_str = result.get('upload_date')
|
|
date = datetime.strptime(date_str, '%Y%m%d')
|
|
|
|
videos.append({
|
|
'ID': video_id,
|
|
'Title': result.get('title'),
|
|
'Description': result.get('description'),
|
|
'Thumbnail': result.get('thumbnail'),
|
|
'Duration': int(result.get('duration')),
|
|
'VideoURL': result.get('webpage_url'),
|
|
'PubDate': int(date.timestamp()),
|
|
'Size': _get_size(result, selector),
|
|
})
|
|
|
|
return feed, videos, new_last_id
|
|
|
|
|
|
def _get_size(video, selector):
|
|
try:
|
|
selected = next(selector(video))
|
|
except KeyError:
|
|
selected = video
|
|
|
|
if 'requested_formats' in selected:
|
|
return sum(int(f['filesize']) for f in selected['requested_formats'])
|
|
|
|
if selected.get('filesize') is not None:
|
|
return int(selected['filesize'])
|
|
|
|
return 0
|