")
+ html = BeautifulSoup(split[1])
+ for x in ['list compact', 'lister', 'list detail sub-list']:
+ html2 = html.find('div', attrs = {
+ 'class': x
+ })
+
+ if html2:
+ html = html2.contents
+ html = ''.join([str(x) for x in html])
+ break
+ except:
+ log.error('Failed parsing IMDB page "%s": %s', (url, traceback.format_exc()))
+
+ html = str(html)
+ imdbs = getImdb(html, multiple = True) if html else []
+
+ return imdbs
+
class IMDBWatchlist(IMDBBase):
@@ -65,22 +93,7 @@ class IMDBWatchlist(IMDBBase):
try:
w_url = '%s&start=%s' % (watchlist_url, start)
- log.debug('Started IMDB watchlists: %s', w_url)
- html = self.getHTMLData(w_url)
-
- try:
- split = splitString(html, split_on="
")
- html2 = BeautifulSoup(split[1])
- html = html2.find('div', attrs = {'class': 'list compact'}).contents
- html = ''.join([str(x) for x in html])
- except:
- try:
- split = splitString(html, split_on="
")[1]
- html = splitString(split, split_on="
")[0]
- except:
- pass
-
- imdbs = getImdb(html, multiple = True) if html else []
+ imdbs = self.getFromURL(w_url)
for imdb in imdbs:
if imdb not in movies:
@@ -115,12 +128,12 @@ class IMDBAutomation(IMDBBase):
'boxoffice': {
'order': 2,
'name': 'IMDB - Box Office',
- 'url': 'http://www.imdb.com/chart/',
+ 'url': 'http://www.imdb.com/boxoffice/',
},
'rentals': {
'order': 3,
'name': 'IMDB - Top DVD rentals',
- 'url': 'http://m.imdb.com/boxoffice_json',
+ 'url': 'http://www.imdb.com/boxoffice/rentals',
'type': 'json',
},
'top250': {
@@ -130,8 +143,6 @@ class IMDBAutomation(IMDBBase):
},
}
- first_table = ['boxoffice']
-
def getIMDBids(self):
movies = []
@@ -141,36 +152,19 @@ class IMDBAutomation(IMDBBase):
url = chart.get('url')
if self.conf('automation_charts_%s' % name):
- data = self.getHTMLData(url)
+ imdb_ids = self.getFromURL(url)
- if data:
- try:
- html = BeautifulSoup(data)
+ try:
+ for imdb_id in imdb_ids:
+ info = self.getInfo(imdb_id)
+ if info and self.isMinimalMovie(info):
+ movies.append(imdb_id)
- if chart.get('type', 'html') == 'html':
- result_div = html.find('div', attrs = {'id': 'main'})
+ if self.shuttingDown():
+ break
- try:
- if url in self.first_table:
- table = result_div.find('table')
- result_div = table if table else result_div
- except:
- pass
-
- imdb_ids = getImdb(str(result_div), multiple = True)
- else:
- imdb_ids = getImdb(str(data), multiple = True)
-
- for imdb_id in imdb_ids:
- info = self.getInfo(imdb_id)
- if info and self.isMinimalMovie(info):
- movies.append(imdb_id)
-
- if self.shuttingDown():
- break
-
- except:
- log.error('Failed loading IMDB chart results from %s: %s', (url, traceback.format_exc()))
+ except:
+ log.error('Failed loading IMDB chart results from %s: %s', (url, traceback.format_exc()))
return movies
@@ -188,42 +182,25 @@ class IMDBAutomation(IMDBBase):
chart['list'] = []
- data = self.getHTMLData(url)
- if data:
- html = BeautifulSoup(data)
+ imdb_ids = self.getFromURL(url)
- try:
+ try:
+ for imdb_id in imdb_ids[0:max_items]:
- if chart.get('type', 'html') == 'html':
- result_div = html.find('div', attrs = {'id': 'main'})
+ is_movie = fireEvent('movie.is_movie', identifier = imdb_id, single = True)
+ if not is_movie:
+ continue
- try:
- if url in self.first_table:
- table = result_div.find('table')
- result_div = table if table else result_div
- except:
- pass
+ info = self.getInfo(imdb_id)
+ chart['list'].append(info)
- imdb_ids = getImdb(str(result_div), multiple = True)
- else:
- imdb_ids = getImdb(str(data), multiple = True)
+ if self.shuttingDown():
+ break
+ except:
+ log.error('Failed loading IMDB chart results from %s: %s', (url, traceback.format_exc()))
- for imdb_id in imdb_ids[0:max_items]:
-
- is_movie = fireEvent('movie.is_movie', identifier = imdb_id, single = True)
- if not is_movie:
- continue
-
- info = self.getInfo(imdb_id)
- chart['list'].append(info)
-
- if self.shuttingDown():
- break
- except:
- log.error('Failed loading IMDB chart results from %s: %s', (url, traceback.format_exc()))
-
- if chart['list']:
- movie_lists.append(chart)
+ if chart['list']:
+ movie_lists.append(chart)
return movie_lists