Re-use imdb page parser
This commit is contained in:
@@ -28,6 +28,34 @@ class IMDBBase(Automation, RSS):
|
||||
def getInfo(self, imdb_id):
|
||||
return fireEvent('movie.info', identifier = imdb_id, extended = False, merge = True)
|
||||
|
||||
def getFromURL(self, url):
|
||||
log.debug('Getting IMDBs from: %s', url)
|
||||
html = self.getHTMLData(url)
|
||||
|
||||
try:
|
||||
split = splitString(html, split_on = "<div class=\"list compact\">")[1]
|
||||
html = splitString(split, split_on = "<div class=\"pages\">")[0]
|
||||
except:
|
||||
try:
|
||||
split = splitString(html, split_on = "<div id=\"main\">")
|
||||
html = BeautifulSoup(split[1])
|
||||
for x in ['list compact', 'lister', 'list detail sub-list']:
|
||||
html2 = html.find('div', attrs = {
|
||||
'class': x
|
||||
})
|
||||
|
||||
if html2:
|
||||
html = html2.contents
|
||||
html = ''.join([str(x) for x in html])
|
||||
break
|
||||
except:
|
||||
log.error('Failed parsing IMDB page "%s": %s', (url, traceback.format_exc()))
|
||||
|
||||
html = str(html)
|
||||
imdbs = getImdb(html, multiple = True) if html else []
|
||||
|
||||
return imdbs
|
||||
|
||||
|
||||
class IMDBWatchlist(IMDBBase):
|
||||
|
||||
@@ -65,22 +93,7 @@ class IMDBWatchlist(IMDBBase):
|
||||
try:
|
||||
|
||||
w_url = '%s&start=%s' % (watchlist_url, start)
|
||||
log.debug('Started IMDB watchlists: %s', w_url)
|
||||
html = self.getHTMLData(w_url)
|
||||
|
||||
try:
|
||||
split = splitString(html, split_on="<div id=\"main\">")
|
||||
html2 = BeautifulSoup(split[1])
|
||||
html = html2.find('div', attrs = {'class': 'list compact'}).contents
|
||||
html = ''.join([str(x) for x in html])
|
||||
except:
|
||||
try:
|
||||
split = splitString(html, split_on="<div class=\"list compact\">")[1]
|
||||
html = splitString(split, split_on="<div class=\"pages\">")[0]
|
||||
except:
|
||||
pass
|
||||
|
||||
imdbs = getImdb(html, multiple = True) if html else []
|
||||
imdbs = self.getFromURL(w_url)
|
||||
|
||||
for imdb in imdbs:
|
||||
if imdb not in movies:
|
||||
@@ -115,12 +128,12 @@ class IMDBAutomation(IMDBBase):
|
||||
'boxoffice': {
|
||||
'order': 2,
|
||||
'name': 'IMDB - Box Office',
|
||||
'url': 'http://www.imdb.com/chart/',
|
||||
'url': 'http://www.imdb.com/boxoffice/',
|
||||
},
|
||||
'rentals': {
|
||||
'order': 3,
|
||||
'name': 'IMDB - Top DVD rentals',
|
||||
'url': 'http://m.imdb.com/boxoffice_json',
|
||||
'url': 'http://www.imdb.com/boxoffice/rentals',
|
||||
'type': 'json',
|
||||
},
|
||||
'top250': {
|
||||
@@ -130,8 +143,6 @@ class IMDBAutomation(IMDBBase):
|
||||
},
|
||||
}
|
||||
|
||||
first_table = ['boxoffice']
|
||||
|
||||
def getIMDBids(self):
|
||||
|
||||
movies = []
|
||||
@@ -141,36 +152,19 @@ class IMDBAutomation(IMDBBase):
|
||||
url = chart.get('url')
|
||||
|
||||
if self.conf('automation_charts_%s' % name):
|
||||
data = self.getHTMLData(url)
|
||||
imdb_ids = self.getFromURL(url)
|
||||
|
||||
if data:
|
||||
try:
|
||||
html = BeautifulSoup(data)
|
||||
try:
|
||||
for imdb_id in imdb_ids:
|
||||
info = self.getInfo(imdb_id)
|
||||
if info and self.isMinimalMovie(info):
|
||||
movies.append(imdb_id)
|
||||
|
||||
if chart.get('type', 'html') == 'html':
|
||||
result_div = html.find('div', attrs = {'id': 'main'})
|
||||
if self.shuttingDown():
|
||||
break
|
||||
|
||||
try:
|
||||
if url in self.first_table:
|
||||
table = result_div.find('table')
|
||||
result_div = table if table else result_div
|
||||
except:
|
||||
pass
|
||||
|
||||
imdb_ids = getImdb(str(result_div), multiple = True)
|
||||
else:
|
||||
imdb_ids = getImdb(str(data), multiple = True)
|
||||
|
||||
for imdb_id in imdb_ids:
|
||||
info = self.getInfo(imdb_id)
|
||||
if info and self.isMinimalMovie(info):
|
||||
movies.append(imdb_id)
|
||||
|
||||
if self.shuttingDown():
|
||||
break
|
||||
|
||||
except:
|
||||
log.error('Failed loading IMDB chart results from %s: %s', (url, traceback.format_exc()))
|
||||
except:
|
||||
log.error('Failed loading IMDB chart results from %s: %s', (url, traceback.format_exc()))
|
||||
|
||||
return movies
|
||||
|
||||
@@ -188,42 +182,25 @@ class IMDBAutomation(IMDBBase):
|
||||
|
||||
chart['list'] = []
|
||||
|
||||
data = self.getHTMLData(url)
|
||||
if data:
|
||||
html = BeautifulSoup(data)
|
||||
imdb_ids = self.getFromURL(url)
|
||||
|
||||
try:
|
||||
try:
|
||||
for imdb_id in imdb_ids[0:max_items]:
|
||||
|
||||
if chart.get('type', 'html') == 'html':
|
||||
result_div = html.find('div', attrs = {'id': 'main'})
|
||||
is_movie = fireEvent('movie.is_movie', identifier = imdb_id, single = True)
|
||||
if not is_movie:
|
||||
continue
|
||||
|
||||
try:
|
||||
if url in self.first_table:
|
||||
table = result_div.find('table')
|
||||
result_div = table if table else result_div
|
||||
except:
|
||||
pass
|
||||
info = self.getInfo(imdb_id)
|
||||
chart['list'].append(info)
|
||||
|
||||
imdb_ids = getImdb(str(result_div), multiple = True)
|
||||
else:
|
||||
imdb_ids = getImdb(str(data), multiple = True)
|
||||
if self.shuttingDown():
|
||||
break
|
||||
except:
|
||||
log.error('Failed loading IMDB chart results from %s: %s', (url, traceback.format_exc()))
|
||||
|
||||
for imdb_id in imdb_ids[0:max_items]:
|
||||
|
||||
is_movie = fireEvent('movie.is_movie', identifier = imdb_id, single = True)
|
||||
if not is_movie:
|
||||
continue
|
||||
|
||||
info = self.getInfo(imdb_id)
|
||||
chart['list'].append(info)
|
||||
|
||||
if self.shuttingDown():
|
||||
break
|
||||
except:
|
||||
log.error('Failed loading IMDB chart results from %s: %s', (url, traceback.format_exc()))
|
||||
|
||||
if chart['list']:
|
||||
movie_lists.append(chart)
|
||||
if chart['list']:
|
||||
movie_lists.append(chart)
|
||||
|
||||
|
||||
return movie_lists
|
||||
|
||||
Reference in New Issue
Block a user