From dfd4c2eacf6183f195892c92e1dce3f7b90ad4b9 Mon Sep 17 00:00:00 2001
From: Ruud
Date: Sun, 5 Feb 2012 15:01:05 +0100
Subject: [PATCH] imdb get info
---
couchpotato/core/providers/movie/imdb/main.py | 90 +-
libs/imdb/Character.py | 197 --
libs/imdb/Company.py | 195 --
libs/imdb/Movie.py | 398 ----
libs/imdb/Person.py | 275 ---
libs/imdb/__init__.py | 907 --------
libs/imdb/_compat.py | 72 -
libs/imdb/_exceptions.py | 46 -
libs/imdb/_logging.py | 63 -
libs/imdb/articles.py | 142 --
libs/imdb/helpers.py | 548 -----
libs/imdb/locale/__init__.py | 29 -
libs/imdb/locale/generatepot.py | 78 -
libs/imdb/locale/imdbpy-en.po | 1257 -----------
libs/imdb/locale/imdbpy-it.po | 1300 -----------
libs/imdb/locale/imdbpy-tr.po | 1300 -----------
libs/imdb/locale/imdbpy.pot | 1301 -----------
libs/imdb/locale/msgfmt.py | 204 --
libs/imdb/locale/rebuildmo.py | 49 -
libs/imdb/parser/__init__.py | 28 -
libs/imdb/parser/http/__init__.py | 775 -------
libs/imdb/parser/http/bsouplxml/__init__.py | 0
libs/imdb/parser/http/bsouplxml/_bsoup.py | 1970 -----------------
libs/imdb/parser/http/bsouplxml/bsoupxpath.py | 394 ----
libs/imdb/parser/http/bsouplxml/etree.py | 75 -
libs/imdb/parser/http/bsouplxml/html.py | 31 -
libs/imdb/parser/http/characterParser.py | 203 --
libs/imdb/parser/http/companyParser.py | 91 -
libs/imdb/parser/http/movieParser.py | 1965 ----------------
libs/imdb/parser/http/personParser.py | 559 -----
.../imdb/parser/http/searchCharacterParser.py | 69 -
libs/imdb/parser/http/searchCompanyParser.py | 71 -
libs/imdb/parser/http/searchKeywordParser.py | 111 -
libs/imdb/parser/http/searchMovieParser.py | 178 --
libs/imdb/parser/http/searchPersonParser.py | 92 -
libs/imdb/parser/http/topBottomParser.py | 106 -
libs/imdb/parser/http/utils.py | 855 -------
libs/imdb/parser/mobile/__init__.py | 833 -------
libs/imdb/parser/sql/__init__.py | 1589 -------------
libs/imdb/parser/sql/alchemyadapter.py | 508 -----
libs/imdb/parser/sql/cutils.c | 269 ---
libs/imdb/parser/sql/dbschema.py | 461 ----
libs/imdb/parser/sql/objectadapter.py | 203 --
libs/imdb/utils.py | 1545 -------------
44 files changed, 33 insertions(+), 21399 deletions(-)
delete mode 100644 libs/imdb/Character.py
delete mode 100644 libs/imdb/Company.py
delete mode 100644 libs/imdb/Movie.py
delete mode 100644 libs/imdb/Person.py
delete mode 100644 libs/imdb/__init__.py
delete mode 100644 libs/imdb/_compat.py
delete mode 100644 libs/imdb/_exceptions.py
delete mode 100644 libs/imdb/_logging.py
delete mode 100644 libs/imdb/articles.py
delete mode 100644 libs/imdb/helpers.py
delete mode 100644 libs/imdb/locale/__init__.py
delete mode 100644 libs/imdb/locale/generatepot.py
delete mode 100644 libs/imdb/locale/imdbpy-en.po
delete mode 100644 libs/imdb/locale/imdbpy-it.po
delete mode 100644 libs/imdb/locale/imdbpy-tr.po
delete mode 100644 libs/imdb/locale/imdbpy.pot
delete mode 100644 libs/imdb/locale/msgfmt.py
delete mode 100644 libs/imdb/locale/rebuildmo.py
delete mode 100644 libs/imdb/parser/__init__.py
delete mode 100644 libs/imdb/parser/http/__init__.py
delete mode 100644 libs/imdb/parser/http/bsouplxml/__init__.py
delete mode 100644 libs/imdb/parser/http/bsouplxml/_bsoup.py
delete mode 100644 libs/imdb/parser/http/bsouplxml/bsoupxpath.py
delete mode 100644 libs/imdb/parser/http/bsouplxml/etree.py
delete mode 100644 libs/imdb/parser/http/bsouplxml/html.py
delete mode 100644 libs/imdb/parser/http/characterParser.py
delete mode 100644 libs/imdb/parser/http/companyParser.py
delete mode 100644 libs/imdb/parser/http/movieParser.py
delete mode 100644 libs/imdb/parser/http/personParser.py
delete mode 100644 libs/imdb/parser/http/searchCharacterParser.py
delete mode 100644 libs/imdb/parser/http/searchCompanyParser.py
delete mode 100644 libs/imdb/parser/http/searchKeywordParser.py
delete mode 100644 libs/imdb/parser/http/searchMovieParser.py
delete mode 100644 libs/imdb/parser/http/searchPersonParser.py
delete mode 100644 libs/imdb/parser/http/topBottomParser.py
delete mode 100644 libs/imdb/parser/http/utils.py
delete mode 100644 libs/imdb/parser/mobile/__init__.py
delete mode 100644 libs/imdb/parser/sql/__init__.py
delete mode 100644 libs/imdb/parser/sql/alchemyadapter.py
delete mode 100644 libs/imdb/parser/sql/cutils.c
delete mode 100644 libs/imdb/parser/sql/dbschema.py
delete mode 100644 libs/imdb/parser/sql/objectadapter.py
delete mode 100644 libs/imdb/utils.py
diff --git a/couchpotato/core/providers/movie/imdb/main.py b/couchpotato/core/providers/movie/imdb/main.py
index a7ba2e0c..c9fd046f 100644
--- a/couchpotato/core/providers/movie/imdb/main.py
+++ b/couchpotato/core/providers/movie/imdb/main.py
@@ -1,81 +1,57 @@
from couchpotato.core.event import addEvent
from couchpotato.core.logger import CPLog
from couchpotato.core.providers.movie.base import MovieProvider
-from imdb import IMDb
+from imdb import IMDb, helpers
+from imdb._logging import setLevel
+import time
log = CPLog(__name__)
class IMDB(MovieProvider):
+ info_list = ('main', 'plot', 'release dates', 'taglines', 'synopsis')
+
def __init__(self):
#addEvent('movie.search', self.search)
+ #addEvent('movie.info', self.getInfo)
self.p = IMDb('http')
+ setLevel('warn')
- def search(self):
- print 'search'
-
- def conf(self, option):
- return self.config.get('IMDB', option)
-
- def find(self, q, limit = 8, alternative = True):
- ''' Find movie by name '''
-
- log.info('IMDB - Searching for movie: %s' % q)
+ def search(self, q, limit = 12):
r = self.p.search_movie(q)
-
- return self.toResults(r, limit)
-
- def toResults(self, r, limit = 8, one = False):
- results = []
-
- if one:
- new = self.feedItem()
- new.imdb = 'tt' + r.movieID
- new.name = self.toSaveString(r['title'])
- try:
- new.year = r['year']
- except:
- new.year = ''
-
- return new
- else :
- nr = 0
- for movie in r:
- results.append(self.toResults(movie, one = True))
- nr += 1
- if nr == limit:
- break
-
- return results
-
- def findById(self, id):
- ''' Find movie by TheMovieDB ID '''
+ print '==' * 80
return []
+ def getInfo(self, identifier = None):
- def findByImdbId(self, id, details = False):
- ''' Find movie by IMDB ID '''
+ m = self.p.get_movie(identifier.replace('tt', ''), info = self.info_list)
- log.info('IMDB - Searching for movie: %s' % str(id))
+ poster = m['cover url']
+ poster_original = helpers.fullSizeCoverURL(m)
- r = self.p.get_movie(id.replace('tt', ''))
+ movie_data = {
+ 'id': identifier,
+ 'titles': [m['title']],
+ 'original_title': m['title'],
+ 'rating': {
+ 'imdb': (m.get('rating'), m.get('votes')),
+ },
+ 'images': {
+ 'poster': [poster] if poster else [],
+ 'poster_original': [poster_original] if poster_original else [],
+ },
+ 'imdb': identifier,
+ 'runtime': m.get('runtime')[0].split(':')[1],
+ 'released': m.get('release dates')[0].split('::')[1],
+ 'year': m['year'],
+ 'plot': m.get('synopsis', ''),
+ 'tagline': m.get('taglines', '')[0],
+ 'genres': m.get('genres', []),
+ }
- if not details:
- return self.toResults(r, one = True)
- else:
- self.p.update(r)
- self.p.update(r, info = 'release dates')
- self.p.update(r, info = 'taglines')
- return r
-
- def get_IMDb_instance(self):
- return IMDb('http')
-
-
- def findReleaseDate(self, movie):
- pass
+ return movie_data
diff --git a/libs/imdb/Character.py b/libs/imdb/Character.py
deleted file mode 100644
index 21264948..00000000
--- a/libs/imdb/Character.py
+++ /dev/null
@@ -1,197 +0,0 @@
-"""
-Character module (imdb package).
-
-This module provides the Character class, used to store information about
-a given character.
-
-Copyright 2007-2010 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-from copy import deepcopy
-
-from imdb.utils import analyze_name, build_name, flatten, _Container, cmpPeople
-
-
-class Character(_Container):
- """A Character.
-
- Every information about a character can be accessed as:
- characterObject['information']
- to get a list of the kind of information stored in a
- Character object, use the keys() method; some useful aliases
- are defined (as "also known as" for the "akas" key);
- see the keys_alias dictionary.
- """
- # The default sets of information retrieved.
- default_info = ('main', 'filmography', 'biography')
-
- # Aliases for some not-so-intuitive keys.
- keys_alias = {'mini biography': 'biography',
- 'bio': 'biography',
- 'character biography': 'biography',
- 'character biographies': 'biography',
- 'biographies': 'biography',
- 'character bio': 'biography',
- 'aka': 'akas',
- 'also known as': 'akas',
- 'alternate names': 'akas',
- 'personal quotes': 'quotes',
- 'keys': 'keywords',
- 'keyword': 'keywords'}
-
- keys_tomodify_list = ('biography', 'quotes')
-
- cmpFunct = cmpPeople
-
- def _init(self, **kwds):
- """Initialize a Character object.
-
- *characterID* -- the unique identifier for the character.
- *name* -- the name of the Character, if not in the data dictionary.
- *myName* -- the nickname you use for this character.
- *myID* -- your personal id for this character.
- *data* -- a dictionary used to initialize the object.
- *notes* -- notes about the given character.
- *accessSystem* -- a string representing the data access system used.
- *titlesRefs* -- a dictionary with references to movies.
- *namesRefs* -- a dictionary with references to persons.
- *charactersRefs* -- a dictionary with references to characters.
- *modFunct* -- function called returning text fields.
- """
- name = kwds.get('name')
- if name and not self.data.has_key('name'):
- self.set_name(name)
- self.characterID = kwds.get('characterID', None)
- self.myName = kwds.get('myName', u'')
-
- def _reset(self):
- """Reset the Character object."""
- self.characterID = None
- self.myName = u''
-
- def set_name(self, name):
- """Set the name of the character."""
- # XXX: convert name to unicode, if it's a plain string?
- d = analyze_name(name, canonical=0)
- self.data.update(d)
-
- def _additional_keys(self):
- """Valid keys to append to the data.keys() list."""
- addkeys = []
- if self.data.has_key('name'):
- addkeys += ['long imdb name']
- if self.data.has_key('headshot'):
- addkeys += ['full-size headshot']
- return addkeys
-
- def _getitem(self, key):
- """Handle special keys."""
- ## XXX: can a character have an imdbIndex?
- if self.data.has_key('name'):
- if key == 'long imdb name':
- return build_name(self.data)
- if key == 'full-size headshot' and self.data.has_key('headshot'):
- return self._re_fullsizeURL.sub('', self.data.get('headshot', ''))
- return None
-
- def getID(self):
- """Return the characterID."""
- return self.characterID
-
- def __nonzero__(self):
- """The Character is "false" if the self.data does not contain a name."""
- # XXX: check the name and the characterID?
- if self.data.get('name'): return 1
- return 0
-
- def __contains__(self, item):
- """Return true if this Character was portrayed in the given Movie
- or it was impersonated by the given Person."""
- from Movie import Movie
- from Person import Person
- if isinstance(item, Person):
- for m in flatten(self.data, yieldDictKeys=1, scalar=Movie):
- if item.isSame(m.currentRole):
- return 1
- elif isinstance(item, Movie):
- for m in flatten(self.data, yieldDictKeys=1, scalar=Movie):
- if item.isSame(m):
- return 1
- return 0
-
- def isSameName(self, other):
- """Return true if two character have the same name
- and/or characterID."""
- if not isinstance(other, self.__class__):
- return 0
- if self.data.has_key('name') and \
- other.data.has_key('name') and \
- build_name(self.data, canonical=0) == \
- build_name(other.data, canonical=0):
- return 1
- if self.accessSystem == other.accessSystem and \
- self.characterID is not None and \
- self.characterID == other.characterID:
- return 1
- return 0
- isSameCharacter = isSameName
-
- def __deepcopy__(self, memo):
- """Return a deep copy of a Character instance."""
- c = Character(name=u'', characterID=self.characterID,
- myName=self.myName, myID=self.myID,
- data=deepcopy(self.data, memo),
- notes=self.notes, accessSystem=self.accessSystem,
- titlesRefs=deepcopy(self.titlesRefs, memo),
- namesRefs=deepcopy(self.namesRefs, memo),
- charactersRefs=deepcopy(self.charactersRefs, memo))
- c.current_info = list(self.current_info)
- c.set_mod_funct(self.modFunct)
- return c
-
- def __repr__(self):
- """String representation of a Character object."""
- r = '' % (self.characterID,
- self.accessSystem,
- self.get('name'))
- if isinstance(r, unicode): r = r.encode('utf_8', 'replace')
- return r
-
- def __str__(self):
- """Simply print the short name."""
- return self.get('name', u'').encode('utf_8', 'replace')
-
- def __unicode__(self):
- """Simply print the short title."""
- return self.get('name', u'')
-
- def summary(self):
- """Return a string with a pretty-printed summary for the character."""
- if not self: return u''
- s = u'Character\n=====\nName: %s\n' % \
- self.get('name', u'')
- bio = self.get('biography')
- if bio:
- s += u'Biography: %s\n' % bio[0]
- filmo = self.get('filmography')
- if filmo:
- a_list = [x.get('long imdb canonical title', u'')
- for x in filmo[:5]]
- s += u'Last movies with this character: %s.\n' % u'; '.join(a_list)
- return s
-
-
diff --git a/libs/imdb/Company.py b/libs/imdb/Company.py
deleted file mode 100644
index 5e05c840..00000000
--- a/libs/imdb/Company.py
+++ /dev/null
@@ -1,195 +0,0 @@
-"""
-company module (imdb package).
-
-This module provides the company class, used to store information about
-a given company.
-
-Copyright 2008-2009 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-from copy import deepcopy
-
-from imdb.utils import analyze_company_name, build_company_name, \
- flatten, _Container, cmpCompanies
-
-
-class Company(_Container):
- """A company.
-
- Every information about a company can be accessed as:
- companyObject['information']
- to get a list of the kind of information stored in a
- company object, use the keys() method; some useful aliases
- are defined (as "also known as" for the "akas" key);
- see the keys_alias dictionary.
- """
- # The default sets of information retrieved.
- default_info = ('main',)
-
- # Aliases for some not-so-intuitive keys.
- keys_alias = {
- 'distributor': 'distributors',
- 'special effects company': 'special effects companies',
- 'other company': 'miscellaneous companies',
- 'miscellaneous company': 'miscellaneous companies',
- 'other companies': 'miscellaneous companies',
- 'misc companies': 'miscellaneous companies',
- 'misc company': 'miscellaneous companies',
- 'production company': 'production companies'}
-
- keys_tomodify_list = ()
-
- cmpFunct = cmpCompanies
-
- def _init(self, **kwds):
- """Initialize a company object.
-
- *companyID* -- the unique identifier for the company.
- *name* -- the name of the company, if not in the data dictionary.
- *myName* -- the nickname you use for this company.
- *myID* -- your personal id for this company.
- *data* -- a dictionary used to initialize the object.
- *notes* -- notes about the given company.
- *accessSystem* -- a string representing the data access system used.
- *titlesRefs* -- a dictionary with references to movies.
- *namesRefs* -- a dictionary with references to persons.
- *charactersRefs* -- a dictionary with references to companies.
- *modFunct* -- function called returning text fields.
- """
- name = kwds.get('name')
- if name and not self.data.has_key('name'):
- self.set_name(name)
- self.companyID = kwds.get('companyID', None)
- self.myName = kwds.get('myName', u'')
-
- def _reset(self):
- """Reset the company object."""
- self.companyID = None
- self.myName = u''
-
- def set_name(self, name):
- """Set the name of the company."""
- # XXX: convert name to unicode, if it's a plain string?
- # Company diverges a bit from other classes, being able
- # to directly handle its "notes". AND THAT'S PROBABLY A BAD IDEA!
- oname = name = name.strip()
- notes = u''
- if name.endswith(')'):
- fparidx = name.find('(')
- if fparidx != -1:
- notes = name[fparidx:]
- name = name[:fparidx].rstrip()
- if self.notes:
- name = oname
- d = analyze_company_name(name)
- self.data.update(d)
- if notes and not self.notes:
- self.notes = notes
-
- def _additional_keys(self):
- """Valid keys to append to the data.keys() list."""
- if self.data.has_key('name'):
- return ['long imdb name']
- return []
-
- def _getitem(self, key):
- """Handle special keys."""
- ## XXX: can a company have an imdbIndex?
- if self.data.has_key('name'):
- if key == 'long imdb name':
- return build_company_name(self.data)
- return None
-
- def getID(self):
- """Return the companyID."""
- return self.companyID
-
- def __nonzero__(self):
- """The company is "false" if the self.data does not contain a name."""
- # XXX: check the name and the companyID?
- if self.data.get('name'): return 1
- return 0
-
- def __contains__(self, item):
- """Return true if this company and the given Movie are related."""
- from Movie import Movie
- if isinstance(item, Movie):
- for m in flatten(self.data, yieldDictKeys=1, scalar=Movie):
- if item.isSame(m):
- return 1
- return 0
-
- def isSameName(self, other):
- """Return true if two company have the same name
- and/or companyID."""
- if not isinstance(other, self.__class__):
- return 0
- if self.data.has_key('name') and \
- other.data.has_key('name') and \
- build_company_name(self.data) == \
- build_company_name(other.data):
- return 1
- if self.accessSystem == other.accessSystem and \
- self.companyID is not None and \
- self.companyID == other.companyID:
- return 1
- return 0
- isSameCompany = isSameName
-
- def __deepcopy__(self, memo):
- """Return a deep copy of a company instance."""
- c = Company(name=u'', companyID=self.companyID,
- myName=self.myName, myID=self.myID,
- data=deepcopy(self.data, memo),
- notes=self.notes, accessSystem=self.accessSystem,
- titlesRefs=deepcopy(self.titlesRefs, memo),
- namesRefs=deepcopy(self.namesRefs, memo),
- charactersRefs=deepcopy(self.charactersRefs, memo))
- c.current_info = list(self.current_info)
- c.set_mod_funct(self.modFunct)
- return c
-
- def __repr__(self):
- """String representation of a Company object."""
- r = '' % (self.companyID,
- self.accessSystem,
- self.get('long imdb name'))
- if isinstance(r, unicode): r = r.encode('utf_8', 'replace')
- return r
-
- def __str__(self):
- """Simply print the short name."""
- return self.get('name', u'').encode('utf_8', 'replace')
-
- def __unicode__(self):
- """Simply print the short title."""
- return self.get('name', u'')
-
- def summary(self):
- """Return a string with a pretty-printed summary for the company."""
- if not self: return u''
- s = u'Company\n=======\nName: %s\n' % \
- self.get('name', u'')
- for k in ('distributor', 'production company', 'miscellaneous company',
- 'special effects company'):
- d = self.get(k, [])[:5]
- if not d: continue
- s += u'Last movies from this company (%s): %s.\n' % \
- (k, u'; '.join([x.get('long imdb title', u'') for x in d]))
- return s
-
-
diff --git a/libs/imdb/Movie.py b/libs/imdb/Movie.py
deleted file mode 100644
index 37ae49e6..00000000
--- a/libs/imdb/Movie.py
+++ /dev/null
@@ -1,398 +0,0 @@
-"""
-Movie module (imdb package).
-
-This module provides the Movie class, used to store information about
-a given movie.
-
-Copyright 2004-2010 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-from copy import deepcopy
-
-from imdb import articles
-from imdb.utils import analyze_title, build_title, canonicalTitle, \
- flatten, _Container, cmpMovies
-
-
-class Movie(_Container):
- """A Movie.
-
- Every information about a movie can be accessed as:
- movieObject['information']
- to get a list of the kind of information stored in a
- Movie object, use the keys() method; some useful aliases
- are defined (as "casting" for the "casting director" key); see
- the keys_alias dictionary.
- """
- # The default sets of information retrieved.
- default_info = ('main', 'plot')
-
- # Aliases for some not-so-intuitive keys.
- keys_alias = {
- 'tv schedule': 'airing',
- 'user rating': 'rating',
- 'plot summary': 'plot',
- 'plot summaries': 'plot',
- 'directed by': 'director',
- 'created by': 'creator',
- 'writing credits': 'writer',
- 'produced by': 'producer',
- 'original music by': 'original music',
- 'non-original music by': 'non-original music',
- 'music': 'original music',
- 'cinematography by': 'cinematographer',
- 'cinematography': 'cinematographer',
- 'film editing by': 'editor',
- 'film editing': 'editor',
- 'editing': 'editor',
- 'actors': 'cast',
- 'actresses': 'cast',
- 'casting by': 'casting director',
- 'casting': 'casting director',
- 'art direction by': 'art direction',
- 'set decoration by': 'set decoration',
- 'costume design by': 'costume designer',
- 'costume design': 'costume designer',
- 'makeup department': 'make up',
- 'makeup': 'make up',
- 'make-up': 'make up',
- 'production management': 'production manager',
- 'production company': 'production companies',
- 'second unit director or assistant director':
- 'assistant director',
- 'second unit director': 'assistant director',
- 'sound department': 'sound crew',
- 'costume and wardrobe department': 'costume department',
- 'special effects by': 'special effects',
- 'visual effects by': 'visual effects',
- 'special effects company': 'special effects companies',
- 'stunts': 'stunt performer',
- 'other crew': 'miscellaneous crew',
- 'misc crew': 'miscellaneous crew',
- 'miscellaneouscrew': 'miscellaneous crew',
- 'crewmembers': 'miscellaneous crew',
- 'crew members': 'miscellaneous crew',
- 'other companies': 'miscellaneous companies',
- 'misc companies': 'miscellaneous companies',
- 'miscellaneous company': 'miscellaneous companies',
- 'misc company': 'miscellaneous companies',
- 'other company': 'miscellaneous companies',
- 'aka': 'akas',
- 'also known as': 'akas',
- 'country': 'countries',
- 'production country': 'countries',
- 'production countries': 'countries',
- 'genre': 'genres',
- 'runtime': 'runtimes',
- 'lang': 'languages',
- 'color': 'color info',
- 'cover': 'cover url',
- 'full-size cover': 'full-size cover url',
- 'seasons': 'number of seasons',
- 'language': 'languages',
- 'certificate': 'certificates',
- 'certifications': 'certificates',
- 'certification': 'certificates',
- 'miscellaneous links': 'misc links',
- 'miscellaneous': 'misc links',
- 'soundclips': 'sound clips',
- 'videoclips': 'video clips',
- 'photographs': 'photo sites',
- 'distributor': 'distributors',
- 'distribution': 'distributors',
- 'distribution companies': 'distributors',
- 'distribution company': 'distributors',
- 'guest': 'guests',
- 'guest appearances': 'guests',
- 'tv guests': 'guests',
- 'notable tv guest appearances': 'guests',
- 'episodes cast': 'guests',
- 'episodes number': 'number of episodes',
- 'amazon review': 'amazon reviews',
- 'merchandising': 'merchandising links',
- 'merchandise': 'merchandising links',
- 'sales': 'merchandising links',
- 'faq': 'faqs',
- 'parental guide': 'parents guide',
- 'frequently asked questions': 'faqs'}
-
- keys_tomodify_list = ('plot', 'trivia', 'alternate versions', 'goofs',
- 'quotes', 'dvd', 'laserdisc', 'news', 'soundtrack',
- 'crazy credits', 'business', 'supplements',
- 'video review', 'faqs')
-
- cmpFunct = cmpMovies
-
- def _init(self, **kwds):
- """Initialize a Movie object.
-
- *movieID* -- the unique identifier for the movie.
- *title* -- the title of the Movie, if not in the data dictionary.
- *myTitle* -- your personal title for the movie.
- *myID* -- your personal identifier for the movie.
- *data* -- a dictionary used to initialize the object.
- *currentRole* -- a Character instance representing the current role
- or duty of a person in this movie, or a Person
- object representing the actor/actress who played
- a given character in a Movie. If a string is
- passed, an object is automatically build.
- *roleID* -- if available, the characterID/personID of the currentRole
- object.
- *roleIsPerson* -- when False (default) the currentRole is assumed
- to be a Character object, otherwise a Person.
- *notes* -- notes for the person referred in the currentRole
- attribute; e.g.: '(voice)'.
- *accessSystem* -- a string representing the data access system used.
- *titlesRefs* -- a dictionary with references to movies.
- *namesRefs* -- a dictionary with references to persons.
- *charactersRefs* -- a dictionary with references to characters.
- *modFunct* -- function called returning text fields.
- """
- title = kwds.get('title')
- if title and not self.data.has_key('title'):
- self.set_title(title)
- self.movieID = kwds.get('movieID', None)
- self.myTitle = kwds.get('myTitle', u'')
-
- def _reset(self):
- """Reset the Movie object."""
- self.movieID = None
- self.myTitle = u''
-
- def set_title(self, title):
- """Set the title of the movie."""
- # XXX: convert title to unicode, if it's a plain string?
- d_title = analyze_title(title)
- self.data.update(d_title)
-
- def _additional_keys(self):
- """Valid keys to append to the data.keys() list."""
- addkeys = []
- if self.data.has_key('title'):
- addkeys += ['canonical title', 'long imdb title',
- 'long imdb canonical title',
- 'smart canonical title',
- 'smart long imdb canonical title']
- if self.data.has_key('episode of'):
- addkeys += ['long imdb episode title', 'series title',
- 'canonical series title', 'episode title',
- 'canonical episode title',
- 'smart canonical series title',
- 'smart canonical episode title']
- if self.data.has_key('cover url'):
- addkeys += ['full-size cover url']
- return addkeys
-
- def guessLanguage(self):
- """Guess the language of the title of this movie; returns None
- if there are no hints."""
- lang = self.get('languages')
- if lang:
- lang = lang[0]
- else:
- country = self.get('countries')
- if country:
- lang = articles.COUNTRY_LANG.get(country[0])
- return lang
-
- def smartCanonicalTitle(self, title=None, lang=None):
- """Return the canonical title, guessing its language.
- The title can be forces with the 'title' argument (internally
- used) and the language can be forced with the 'lang' argument,
- otherwise it's auto-detected."""
- if title is None:
- title = self.data.get('title', u'')
- if lang is None:
- lang = self.guessLanguage()
- return canonicalTitle(title, lang=lang)
-
- def _getitem(self, key):
- """Handle special keys."""
- if self.data.has_key('episode of'):
- if key == 'long imdb episode title':
- return build_title(self.data)
- elif key == 'series title':
- return self.data['episode of']['title']
- elif key == 'canonical series title':
- ser_title = self.data['episode of']['title']
- return canonicalTitle(ser_title)
- elif key == 'smart canonical series title':
- ser_title = self.data['episode of']['title']
- return self.smartCanonicalTitle(ser_title)
- elif key == 'episode title':
- return self.data.get('title', u'')
- elif key == 'canonical episode title':
- return canonicalTitle(self.data.get('title', u''))
- elif key == 'smart canonical episode title':
- return self.smartCanonicalTitle(self.data.get('title', u''))
- if self.data.has_key('title'):
- if key == 'title':
- return self.data['title']
- elif key == 'long imdb title':
- return build_title(self.data)
- elif key == 'canonical title':
- return canonicalTitle(self.data['title'])
- elif key == 'smart canonical title':
- return self.smartCanonicalTitle(self.data['title'])
- elif key == 'long imdb canonical title':
- return build_title(self.data, canonical=1)
- elif key == 'smart long imdb canonical title':
- return build_title(self.data, canonical=1,
- lang=self.guessLanguage())
- if key == 'full-size cover url' and self.data.has_key('cover url'):
- return self._re_fullsizeURL.sub('', self.data.get('cover url', ''))
- return None
-
- def getID(self):
- """Return the movieID."""
- return self.movieID
-
- def __nonzero__(self):
- """The Movie is "false" if the self.data does not contain a title."""
- # XXX: check the title and the movieID?
- if self.data.has_key('title'): return 1
- return 0
-
- def isSameTitle(self, other):
- """Return true if this and the compared object have the same
- long imdb title and/or movieID.
- """
- # XXX: obsolete?
- if not isinstance(other, self.__class__): return 0
- if self.data.has_key('title') and \
- other.data.has_key('title') and \
- build_title(self.data, canonical=0) == \
- build_title(other.data, canonical=0):
- return 1
- if self.accessSystem == other.accessSystem and \
- self.movieID is not None and self.movieID == other.movieID:
- return 1
- return 0
- isSameMovie = isSameTitle # XXX: just for backward compatiblity.
-
- def __contains__(self, item):
- """Return true if the given Person object is listed in this Movie,
- or if the the given Character is represented in this Movie."""
- from Person import Person
- from Character import Character
- from Company import Company
- if isinstance(item, Person):
- for p in flatten(self.data, yieldDictKeys=1, scalar=Person,
- toDescend=(list, dict, tuple, Movie)):
- if item.isSame(p):
- return 1
- elif isinstance(item, Character):
- for p in flatten(self.data, yieldDictKeys=1, scalar=Person,
- toDescend=(list, dict, tuple, Movie)):
- if item.isSame(p.currentRole):
- return 1
- elif isinstance(item, Company):
- for c in flatten(self.data, yieldDictKeys=1, scalar=Company,
- toDescend=(list, dict, tuple, Movie)):
- if item.isSame(c):
- return 1
- return 0
-
- def __deepcopy__(self, memo):
- """Return a deep copy of a Movie instance."""
- m = Movie(title=u'', movieID=self.movieID, myTitle=self.myTitle,
- myID=self.myID, data=deepcopy(self.data, memo),
- currentRole=deepcopy(self.currentRole, memo),
- roleIsPerson=self._roleIsPerson,
- notes=self.notes, accessSystem=self.accessSystem,
- titlesRefs=deepcopy(self.titlesRefs, memo),
- namesRefs=deepcopy(self.namesRefs, memo),
- charactersRefs=deepcopy(self.charactersRefs, memo))
- m.current_info = list(self.current_info)
- m.set_mod_funct(self.modFunct)
- return m
-
- def __repr__(self):
- """String representation of a Movie object."""
- # XXX: add also currentRole and notes, if present?
- if self.has_key('long imdb episode title'):
- title = self.get('long imdb episode title')
- else:
- title = self.get('long imdb title')
- r = '' % (self.movieID, self.accessSystem,
- title)
- if isinstance(r, unicode): r = r.encode('utf_8', 'replace')
- return r
-
- def __str__(self):
- """Simply print the short title."""
- return self.get('title', u'').encode('utf_8', 'replace')
-
- def __unicode__(self):
- """Simply print the short title."""
- return self.get('title', u'')
-
- def summary(self):
- """Return a string with a pretty-printed summary for the movie."""
- if not self: return u''
- def _nameAndRole(personList, joiner=u', '):
- """Build a pretty string with name and role."""
- nl = []
- for person in personList:
- n = person.get('name', u'')
- if person.currentRole: n += u' (%s)' % person.currentRole
- nl.append(n)
- return joiner.join(nl)
- s = u'Movie\n=====\nTitle: %s\n' % \
- self.get('long imdb canonical title', u'')
- genres = self.get('genres')
- if genres: s += u'Genres: %s.\n' % u', '.join(genres)
- director = self.get('director')
- if director:
- s += u'Director: %s.\n' % _nameAndRole(director)
- writer = self.get('writer')
- if writer:
- s += u'Writer: %s.\n' % _nameAndRole(writer)
- cast = self.get('cast')
- if cast:
- cast = cast[:5]
- s += u'Cast: %s.\n' % _nameAndRole(cast)
- runtime = self.get('runtimes')
- if runtime:
- s += u'Runtime: %s.\n' % u', '.join(runtime)
- countries = self.get('countries')
- if countries:
- s += u'Country: %s.\n' % u', '.join(countries)
- lang = self.get('languages')
- if lang:
- s += u'Language: %s.\n' % u', '.join(lang)
- rating = self.get('rating')
- if rating:
- s += u'Rating: %s' % rating
- nr_votes = self.get('votes')
- if nr_votes:
- s += u' (%s votes)' % nr_votes
- s += u'.\n'
- plot = self.get('plot')
- if not plot:
- plot = self.get('plot summary')
- if plot:
- plot = [plot]
- if plot:
- plot = plot[0]
- i = plot.find('::')
- if i != -1:
- plot = plot[:i]
- s += u'Plot: %s' % plot
- return s
-
-
diff --git a/libs/imdb/Person.py b/libs/imdb/Person.py
deleted file mode 100644
index 6e3e4623..00000000
--- a/libs/imdb/Person.py
+++ /dev/null
@@ -1,275 +0,0 @@
-"""
-Person module (imdb package).
-
-This module provides the Person class, used to store information about
-a given person.
-
-Copyright 2004-2010 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-from copy import deepcopy
-
-from imdb.utils import analyze_name, build_name, normalizeName, \
- flatten, _Container, cmpPeople
-
-
-class Person(_Container):
- """A Person.
-
- Every information about a person can be accessed as:
- personObject['information']
- to get a list of the kind of information stored in a
- Person object, use the keys() method; some useful aliases
- are defined (as "biography" for the "mini biography" key);
- see the keys_alias dictionary.
- """
- # The default sets of information retrieved.
- default_info = ('main', 'filmography', 'biography')
-
- # Aliases for some not-so-intuitive keys.
- keys_alias = {'biography': 'mini biography',
- 'bio': 'mini biography',
- 'aka': 'akas',
- 'also known as': 'akas',
- 'nick name': 'nick names',
- 'nicks': 'nick names',
- 'nickname': 'nick names',
- 'miscellaneouscrew': 'miscellaneous crew',
- 'crewmembers': 'miscellaneous crew',
- 'misc': 'miscellaneous crew',
- 'guest': 'notable tv guest appearances',
- 'guests': 'notable tv guest appearances',
- 'tv guest': 'notable tv guest appearances',
- 'guest appearances': 'notable tv guest appearances',
- 'spouses': 'spouse',
- 'salary': 'salary history',
- 'salaries': 'salary history',
- 'otherworks': 'other works',
- "maltin's biography":
- "biography from leonard maltin's movie encyclopedia",
- "leonard maltin's biography":
- "biography from leonard maltin's movie encyclopedia",
- 'real name': 'birth name',
- 'where are they now': 'where now',
- 'personal quotes': 'quotes',
- 'mini-biography author': 'imdb mini-biography by',
- 'biography author': 'imdb mini-biography by',
- 'genre': 'genres',
- 'portrayed': 'portrayed in',
- 'keys': 'keywords',
- 'trademarks': 'trade mark',
- 'trade mark': 'trade mark',
- 'trade marks': 'trade mark',
- 'trademark': 'trade mark',
- 'pictorials': 'pictorial',
- 'magazine covers': 'magazine cover photo',
- 'magazine-covers': 'magazine cover photo',
- 'tv series episodes': 'episodes',
- 'tv-series episodes': 'episodes',
- 'articles': 'article',
- 'keyword': 'keywords'}
-
- # 'nick names'???
- keys_tomodify_list = ('mini biography', 'spouse', 'quotes', 'other works',
- 'salary history', 'trivia', 'trade mark', 'news',
- 'books', 'biographical movies', 'portrayed in',
- 'where now', 'interviews', 'article',
- "biography from leonard maltin's movie encyclopedia")
-
- cmpFunct = cmpPeople
-
- def _init(self, **kwds):
- """Initialize a Person object.
-
- *personID* -- the unique identifier for the person.
- *name* -- the name of the Person, if not in the data dictionary.
- *myName* -- the nickname you use for this person.
- *myID* -- your personal id for this person.
- *data* -- a dictionary used to initialize the object.
- *currentRole* -- a Character instance representing the current role
- or duty of a person in this movie, or a Person
- object representing the actor/actress who played
- a given character in a Movie. If a string is
- passed, an object is automatically build.
- *roleID* -- if available, the characterID/personID of the currentRole
- object.
- *roleIsPerson* -- when False (default) the currentRole is assumed
- to be a Character object, otherwise a Person.
- *notes* -- notes about the given person for a specific movie
- or role (e.g.: the alias used in the movie credits).
- *accessSystem* -- a string representing the data access system used.
- *titlesRefs* -- a dictionary with references to movies.
- *namesRefs* -- a dictionary with references to persons.
- *modFunct* -- function called returning text fields.
- *billingPos* -- position of this person in the credits list.
- """
- name = kwds.get('name')
- if name and not self.data.has_key('name'):
- self.set_name(name)
- self.personID = kwds.get('personID', None)
- self.myName = kwds.get('myName', u'')
- self.billingPos = kwds.get('billingPos', None)
-
- def _reset(self):
- """Reset the Person object."""
- self.personID = None
- self.myName = u''
- self.billingPos = None
-
- def _clear(self):
- """Reset the dictionary."""
- self.billingPos = None
-
- def set_name(self, name):
- """Set the name of the person."""
- # XXX: convert name to unicode, if it's a plain string?
- d = analyze_name(name, canonical=1)
- self.data.update(d)
-
- def _additional_keys(self):
- """Valid keys to append to the data.keys() list."""
- addkeys = []
- if self.data.has_key('name'):
- addkeys += ['canonical name', 'long imdb name',
- 'long imdb canonical name']
- if self.data.has_key('headshot'):
- addkeys += ['full-size headshot']
- return addkeys
-
- def _getitem(self, key):
- """Handle special keys."""
- if self.data.has_key('name'):
- if key == 'name':
- return normalizeName(self.data['name'])
- elif key == 'canonical name':
- return self.data['name']
- elif key == 'long imdb name':
- return build_name(self.data, canonical=0)
- elif key == 'long imdb canonical name':
- return build_name(self.data)
- if key == 'full-size headshot' and self.data.has_key('headshot'):
- return self._re_fullsizeURL.sub('', self.data.get('headshot', ''))
- return None
-
- def getID(self):
- """Return the personID."""
- return self.personID
-
- def __nonzero__(self):
- """The Person is "false" if the self.data does not contain a name."""
- # XXX: check the name and the personID?
- if self.data.has_key('name'): return 1
- return 0
-
- def __contains__(self, item):
- """Return true if this Person has worked in the given Movie,
- or if the fiven Character was played by this Person."""
- from Movie import Movie
- from Character import Character
- if isinstance(item, Movie):
- for m in flatten(self.data, yieldDictKeys=1, scalar=Movie):
- if item.isSame(m):
- return 1
- elif isinstance(item, Character):
- for m in flatten(self.data, yieldDictKeys=1, scalar=Movie):
- if item.isSame(m.currentRole):
- return 1
- return 0
-
- def isSameName(self, other):
- """Return true if two persons have the same name and imdbIndex
- and/or personID.
- """
- if not isinstance(other, self.__class__):
- return 0
- if self.data.has_key('name') and \
- other.data.has_key('name') and \
- build_name(self.data, canonical=1) == \
- build_name(other.data, canonical=1):
- return 1
- if self.accessSystem == other.accessSystem and \
- self.personID and self.personID == other.personID:
- return 1
- return 0
- isSamePerson = isSameName # XXX: just for backward compatiblity.
-
- def __deepcopy__(self, memo):
- """Return a deep copy of a Person instance."""
- p = Person(name=u'', personID=self.personID, myName=self.myName,
- myID=self.myID, data=deepcopy(self.data, memo),
- currentRole=deepcopy(self.currentRole, memo),
- roleIsPerson=self._roleIsPerson,
- notes=self.notes, accessSystem=self.accessSystem,
- titlesRefs=deepcopy(self.titlesRefs, memo),
- namesRefs=deepcopy(self.namesRefs, memo),
- charactersRefs=deepcopy(self.charactersRefs, memo))
- p.current_info = list(self.current_info)
- p.set_mod_funct(self.modFunct)
- p.billingPos = self.billingPos
- return p
-
- def __repr__(self):
- """String representation of a Person object."""
- # XXX: add also currentRole and notes, if present?
- r = '' % (self.personID, self.accessSystem,
- self.get('long imdb canonical name'))
- if isinstance(r, unicode): r = r.encode('utf_8', 'replace')
- return r
-
- def __str__(self):
- """Simply print the short name."""
- return self.get('name', u'').encode('utf_8', 'replace')
-
- def __unicode__(self):
- """Simply print the short title."""
- return self.get('name', u'')
-
- def summary(self):
- """Return a string with a pretty-printed summary for the person."""
- if not self: return u''
- s = u'Person\n=====\nName: %s\n' % \
- self.get('long imdb canonical name', u'')
- bdate = self.get('birth date')
- if bdate:
- s += u'Birth date: %s' % bdate
- bnotes = self.get('birth notes')
- if bnotes:
- s += u' (%s)' % bnotes
- s += u'.\n'
- ddate = self.get('death date')
- if ddate:
- s += u'Death date: %s' % ddate
- dnotes = self.get('death notes')
- if dnotes:
- s += u' (%s)' % dnotes
- s += u'.\n'
- bio = self.get('mini biography')
- if bio:
- s += u'Biography: %s\n' % bio[0]
- director = self.get('director')
- if director:
- d_list = [x.get('long imdb canonical title', u'')
- for x in director[:3]]
- s += u'Last movies directed: %s.\n' % u'; '.join(d_list)
- act = self.get('actor') or self.get('actress')
- if act:
- a_list = [x.get('long imdb canonical title', u'')
- for x in act[:5]]
- s += u'Last movies acted: %s.\n' % u'; '.join(a_list)
- return s
-
-
diff --git a/libs/imdb/__init__.py b/libs/imdb/__init__.py
deleted file mode 100644
index faaa7d38..00000000
--- a/libs/imdb/__init__.py
+++ /dev/null
@@ -1,907 +0,0 @@
-"""
-imdb package.
-
-This package can be used to retrieve information about a movie or
-a person from the IMDb database.
-It can fetch data through different media (e.g.: the IMDb web pages,
-a SQL database, etc.)
-
-Copyright 2004-2010 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-__all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company',
- 'available_access_systems']
-__version__ = VERSION = '4.8dev20110303'
-
-# Import compatibility module (importing it is enough).
-import _compat
-
-import sys, os, ConfigParser, logging
-from types import MethodType
-
-from imdb import Movie, Person, Character, Company
-import imdb._logging
-from imdb._exceptions import IMDbError, IMDbDataAccessError
-from imdb.utils import build_title, build_name, build_company_name
-
-_aux_logger = logging.getLogger('imdbpy.aux')
-
-
-# URLs of the main pages for movies, persons, characters and queries.
-imdbURL_base = 'http://akas.imdb.com/'
-# http://akas.imdb.com/title/
-imdbURL_movie_base = '%stitle/' % imdbURL_base
-# http://akas.imdb.com/title/tt%s/
-imdbURL_movie_main = imdbURL_movie_base + 'tt%s/'
-# http://akas.imdb.com/name/
-imdbURL_person_base = '%sname/' % imdbURL_base
-# http://akas.imdb.com/name/nm%s/
-imdbURL_person_main = imdbURL_person_base + 'nm%s/'
-# http://akas.imdb.com/character/
-imdbURL_character_base = '%scharacter/' % imdbURL_base
-# http://akas.imdb.com/character/ch%s/
-imdbURL_character_main = imdbURL_character_base + 'ch%s/'
-# http://akas.imdb.com/company/
-imdbURL_company_base = '%scompany/' % imdbURL_base
-# http://akas.imdb.com/company/co%s/
-imdbURL_company_main = imdbURL_company_base + 'co%s/'
-# http://akas.imdb.com/keyword/%s/
-imdbURL_keyword_main = imdbURL_base + 'keyword/%s/'
-# http://akas.imdb.com/chart/top
-imdbURL_top250 = imdbURL_base + 'chart/top'
-# http://akas.imdb.com/chart/bottom
-imdbURL_bottom100 = imdbURL_base + 'chart/bottom'
-# http://akas.imdb.com/find?%s
-imdbURL_find = imdbURL_base + 'find?%s'
-
-# Name of the configuration file.
-confFileName = 'imdbpy.cfg'
-
-class ConfigParserWithCase(ConfigParser.ConfigParser):
- """A case-sensitive parser for configuration files."""
- def __init__(self, defaults=None, confFile=None, *args, **kwds):
- """Initialize the parser.
-
- *defaults* -- defaults values.
- *confFile* -- the file (or list of files) to parse."""
- ConfigParser.ConfigParser.__init__(self, defaults=defaults)
- if confFile is None:
- dotFileName = '.' + confFileName
- # Current and home directory.
- confFile = [os.path.join(os.getcwd(), confFileName),
- os.path.join(os.getcwd(), dotFileName),
- os.path.join(os.path.expanduser('~'), confFileName),
- os.path.join(os.path.expanduser('~'), dotFileName)]
- if os.name == 'posix':
- sep = getattr(os.path, 'sep', '/')
- # /etc/ and /etc/conf.d/
- confFile.append(os.path.join(sep, 'etc', confFileName))
- confFile.append(os.path.join(sep, 'etc', 'conf.d',
- confFileName))
- else:
- # etc subdirectory of sys.prefix, for non-unix systems.
- confFile.append(os.path.join(sys.prefix, 'etc', confFileName))
- for fname in confFile:
- try:
- self.read(fname)
- except (ConfigParser.MissingSectionHeaderError,
- ConfigParser.ParsingError), e:
- _aux_logger.warn('Troubles reading config file: %s' % e)
- # Stop at the first valid file.
- if self.has_section('imdbpy'):
- break
-
- def optionxform(self, optionstr):
- """Option names are case sensitive."""
- return optionstr
-
- def _manageValue(self, value):
- """Custom substitutions for values."""
- if not isinstance(value, (str, unicode)):
- return value
- vlower = value.lower()
- if vlower in self._boolean_states:
- return self._boolean_states[vlower]
- elif vlower == 'none':
- return None
- return value
-
- def get(self, section, option, *args, **kwds):
- """Return the value of an option from a given section."""
- value = ConfigParser.ConfigParser.get(self, section, option,
- *args, **kwds)
- return self._manageValue(value)
-
- def items(self, section, *args, **kwds):
- """Return a list of (key, value) tuples of items of the
- given section."""
- if section != 'DEFAULT' and not self.has_section(section):
- return []
- keys = ConfigParser.ConfigParser.options(self, section)
- return [(k, self.get(section, k, *args, **kwds)) for k in keys]
-
- def getDict(self, section):
- """Return a dictionary of items of the specified section."""
- return dict(self.items(section))
-
-
-def IMDb(accessSystem=None, *arguments, **keywords):
- """Return an instance of the appropriate class.
- The accessSystem parameter is used to specify the kind of
- the preferred access system."""
- if accessSystem is None or accessSystem in ('auto', 'config'):
- try:
- cfg_file = ConfigParserWithCase(*arguments, **keywords)
- # Parameters set by the code take precedence.
- kwds = cfg_file.getDict('imdbpy')
- if 'accessSystem' in kwds:
- accessSystem = kwds['accessSystem']
- del kwds['accessSystem']
- else:
- accessSystem = 'http'
- kwds.update(keywords)
- keywords = kwds
- except Exception, e:
- import logging
- logging.getLogger('imdbpy').warn('Unable to read configuration' \
- ' file; complete error: %s' % e)
- # It just LOOKS LIKE a bad habit: we tried to read config
- # options from some files, but something is gone horribly
- # wrong: ignore everything and pretend we were called with
- # the 'http' accessSystem.
- accessSystem = 'http'
- if 'loggingLevel' in keywords:
- imdb._logging.setLevel(keywords['loggingLevel'])
- del keywords['loggingLevel']
- if 'loggingConfig' in keywords:
- logCfg = keywords['loggingConfig']
- del keywords['loggingConfig']
- try:
- import logging.config
- logging.config.fileConfig(os.path.expanduser(logCfg))
- except Exception, e:
- logging.getLogger('imdbpy').warn('unable to read logger ' \
- 'config: %s' % e)
- if accessSystem in ('http', 'web', 'html'):
- from parser.http import IMDbHTTPAccessSystem
- return IMDbHTTPAccessSystem(*arguments, **keywords)
- elif accessSystem in ('httpThin', 'webThin', 'htmlThin'):
- import logging
- logging.warn('httpThin is badly broken and' \
- ' will not be fixed; please switch' \
- ' to "http" or "mobile"')
- from parser.http import IMDbHTTPAccessSystem
- return IMDbHTTPAccessSystem(isThin=1, *arguments, **keywords)
- elif accessSystem in ('mobile',):
- from parser.mobile import IMDbMobileAccessSystem
- return IMDbMobileAccessSystem(*arguments, **keywords)
- elif accessSystem in ('local', 'files'):
- # The local access system was removed since IMDbPY 4.2.
- raise IMDbError, 'the local access system was removed since IMDbPY 4.2'
- elif accessSystem in ('sql', 'db', 'database'):
- try:
- from parser.sql import IMDbSqlAccessSystem
- except ImportError:
- raise IMDbError, 'the sql access system is not installed'
- return IMDbSqlAccessSystem(*arguments, **keywords)
- else:
- raise IMDbError, 'unknown kind of data access system: "%s"' \
- % accessSystem
-
-
-def available_access_systems():
- """Return the list of available data access systems."""
- asList = []
- # XXX: trying to import modules is a good thing?
- try:
- from parser.http import IMDbHTTPAccessSystem
- asList += ['http', 'httpThin']
- except ImportError:
- pass
- try:
- from parser.mobile import IMDbMobileAccessSystem
- asList.append('mobile')
- except ImportError:
- pass
- try:
- from parser.sql import IMDbSqlAccessSystem
- asList.append('sql')
- except ImportError:
- pass
- return asList
-
-
-# XXX: I'm not sure this is a good guess.
-# I suppose that an argument of the IMDb function can be used to
-# set a default encoding for the output, and then Movie, Person and
-# Character objects can use this default encoding, returning strings.
-# Anyway, passing unicode strings to search_movie(), search_person()
-# and search_character() methods is always safer.
-encoding = getattr(sys.stdin, 'encoding', '') or sys.getdefaultencoding()
-
-class IMDbBase:
- """The base class used to search for a movie/person/character and
- to get a Movie/Person/Character object.
-
- This class cannot directly fetch data of any kind and so you
- have to search the "real" code into a subclass."""
-
- # The name of the preferred access system (MUST be overridden
- # in the subclasses).
- accessSystem = 'UNKNOWN'
-
- # Top-level logger for IMDbPY.
- _imdb_logger = logging.getLogger('imdbpy')
-
- def __init__(self, defaultModFunct=None, results=20, keywordsResults=100,
- *arguments, **keywords):
- """Initialize the access system.
- If specified, defaultModFunct is the function used by
- default by the Person, Movie and Character objects, when
- accessing their text fields.
- """
- # The function used to output the strings that need modification (the
- # ones containing references to movie titles and person names).
- self._defModFunct = defaultModFunct
- # Number of results to get.
- try:
- results = int(results)
- except (TypeError, ValueError):
- results = 20
- if results < 1:
- results = 20
- self._results = results
- try:
- keywordsResults = int(keywordsResults)
- except (TypeError, ValueError):
- keywordsResults = 100
- if keywordsResults < 1:
- keywordsResults = 100
- self._keywordsResults = keywordsResults
-
- def _normalize_movieID(self, movieID):
- """Normalize the given movieID."""
- # By default, do nothing.
- return movieID
-
- def _normalize_personID(self, personID):
- """Normalize the given personID."""
- # By default, do nothing.
- return personID
-
- def _normalize_characterID(self, characterID):
- """Normalize the given characterID."""
- # By default, do nothing.
- return characterID
-
- def _normalize_companyID(self, companyID):
- """Normalize the given companyID."""
- # By default, do nothing.
- return companyID
-
- def _get_real_movieID(self, movieID):
- """Handle title aliases."""
- # By default, do nothing.
- return movieID
-
- def _get_real_personID(self, personID):
- """Handle name aliases."""
- # By default, do nothing.
- return personID
-
- def _get_real_characterID(self, characterID):
- """Handle character name aliases."""
- # By default, do nothing.
- return characterID
-
- def _get_real_companyID(self, companyID):
- """Handle company name aliases."""
- # By default, do nothing.
- return companyID
-
- def _get_infoset(self, prefname):
- """Return methods with the name starting with prefname."""
- infoset = []
- excludes = ('%sinfoset' % prefname,)
- preflen = len(prefname)
- for name in dir(self.__class__):
- if name.startswith(prefname) and name not in excludes:
- member = getattr(self.__class__, name)
- if isinstance(member, MethodType):
- infoset.append(name[preflen:].replace('_', ' '))
- return infoset
-
- def get_movie_infoset(self):
- """Return the list of info set available for movies."""
- return self._get_infoset('get_movie_')
-
- def get_person_infoset(self):
- """Return the list of info set available for persons."""
- return self._get_infoset('get_person_')
-
- def get_character_infoset(self):
- """Return the list of info set available for characters."""
- return self._get_infoset('get_character_')
-
- def get_company_infoset(self):
- """Return the list of info set available for companies."""
- return self._get_infoset('get_company_')
-
- def get_movie(self, movieID, info=Movie.Movie.default_info, modFunct=None):
- """Return a Movie object for the given movieID.
-
- The movieID is something used to univocally identify a movie;
- it can be the imdbID used by the IMDb web server, a file
- pointer, a line number in a file, an ID in a database, etc.
-
- info is the list of sets of information to retrieve.
-
- If specified, modFunct will be the function used by the Movie
- object when accessing its text fields (like 'plot')."""
- movieID = self._normalize_movieID(movieID)
- movieID = self._get_real_movieID(movieID)
- movie = Movie.Movie(movieID=movieID, accessSystem=self.accessSystem)
- modFunct = modFunct or self._defModFunct
- if modFunct is not None:
- movie.set_mod_funct(modFunct)
- self.update(movie, info)
- return movie
-
- get_episode = get_movie
-
- def _search_movie(self, title, results):
- """Return a list of tuples (movieID, {movieData})"""
- # XXX: for the real implementation, see the method of the
- # subclass, somewhere under the imdb.parser package.
- raise NotImplementedError, 'override this method'
-
- def search_movie(self, title, results=None, _episodes=False):
- """Return a list of Movie objects for a query for the given title.
- The results argument is the maximum number of results to return."""
- if results is None:
- results = self._results
- try:
- results = int(results)
- except (ValueError, OverflowError):
- results = 20
- # XXX: I suppose it will be much safer if the user provides
- # an unicode string... this is just a guess.
- if not isinstance(title, unicode):
- title = unicode(title, encoding, 'replace')
- if not _episodes:
- res = self._search_movie(title, results)
- else:
- res = self._search_episode(title, results)
- return [Movie.Movie(movieID=self._get_real_movieID(mi),
- data=md, modFunct=self._defModFunct,
- accessSystem=self.accessSystem) for mi, md in res][:results]
-
- def _search_episode(self, title, results):
- """Return a list of tuples (movieID, {movieData})"""
- # XXX: for the real implementation, see the method of the
- # subclass, somewhere under the imdb.parser package.
- raise NotImplementedError, 'override this method'
-
- def search_episode(self, title, results=None):
- """Return a list of Movie objects for a query for the given title.
- The results argument is the maximum number of results to return;
- this method searches only for titles of tv (mini) series' episodes."""
- return self.search_movie(title, results=results, _episodes=True)
-
- def get_person(self, personID, info=Person.Person.default_info,
- modFunct=None):
- """Return a Person object for the given personID.
-
- The personID is something used to univocally identify a person;
- it can be the imdbID used by the IMDb web server, a file
- pointer, a line number in a file, an ID in a database, etc.
-
- info is the list of sets of information to retrieve.
-
- If specified, modFunct will be the function used by the Person
- object when accessing its text fields (like 'mini biography')."""
- personID = self._normalize_personID(personID)
- personID = self._get_real_personID(personID)
- person = Person.Person(personID=personID,
- accessSystem=self.accessSystem)
- modFunct = modFunct or self._defModFunct
- if modFunct is not None:
- person.set_mod_funct(modFunct)
- self.update(person, info)
- return person
-
- def _search_person(self, name, results):
- """Return a list of tuples (personID, {personData})"""
- # XXX: for the real implementation, see the method of the
- # subclass, somewhere under the imdb.parser package.
- raise NotImplementedError, 'override this method'
-
- def search_person(self, name, results=None):
- """Return a list of Person objects for a query for the given name.
-
- The results argument is the maximum number of results to return."""
- if results is None:
- results = self._results
- try:
- results = int(results)
- except (ValueError, OverflowError):
- results = 20
- if not isinstance(name, unicode):
- name = unicode(name, encoding, 'replace')
- res = self._search_person(name, results)
- return [Person.Person(personID=self._get_real_personID(pi),
- data=pd, modFunct=self._defModFunct,
- accessSystem=self.accessSystem) for pi, pd in res][:results]
-
- def get_character(self, characterID, info=Character.Character.default_info,
- modFunct=None):
- """Return a Character object for the given characterID.
-
- The characterID is something used to univocally identify a character;
- it can be the imdbID used by the IMDb web server, a file
- pointer, a line number in a file, an ID in a database, etc.
-
- info is the list of sets of information to retrieve.
-
- If specified, modFunct will be the function used by the Character
- object when accessing its text fields (like 'biography')."""
- characterID = self._normalize_characterID(characterID)
- characterID = self._get_real_characterID(characterID)
- character = Character.Character(characterID=characterID,
- accessSystem=self.accessSystem)
- modFunct = modFunct or self._defModFunct
- if modFunct is not None:
- character.set_mod_funct(modFunct)
- self.update(character, info)
- return character
-
- def _search_character(self, name, results):
- """Return a list of tuples (characterID, {characterData})"""
- # XXX: for the real implementation, see the method of the
- # subclass, somewhere under the imdb.parser package.
- raise NotImplementedError, 'override this method'
-
- def search_character(self, name, results=None):
- """Return a list of Character objects for a query for the given name.
-
- The results argument is the maximum number of results to return."""
- if results is None:
- results = self._results
- try:
- results = int(results)
- except (ValueError, OverflowError):
- results = 20
- if not isinstance(name, unicode):
- name = unicode(name, encoding, 'replace')
- res = self._search_character(name, results)
- return [Character.Character(characterID=self._get_real_characterID(pi),
- data=pd, modFunct=self._defModFunct,
- accessSystem=self.accessSystem) for pi, pd in res][:results]
-
- def get_company(self, companyID, info=Company.Company.default_info,
- modFunct=None):
- """Return a Company object for the given companyID.
-
- The companyID is something used to univocally identify a company;
- it can be the imdbID used by the IMDb web server, a file
- pointer, a line number in a file, an ID in a database, etc.
-
- info is the list of sets of information to retrieve.
-
- If specified, modFunct will be the function used by the Company
- object when accessing its text fields (none, so far)."""
- companyID = self._normalize_companyID(companyID)
- companyID = self._get_real_companyID(companyID)
- company = Company.Company(companyID=companyID,
- accessSystem=self.accessSystem)
- modFunct = modFunct or self._defModFunct
- if modFunct is not None:
- company.set_mod_funct(modFunct)
- self.update(company, info)
- return company
-
- def _search_company(self, name, results):
- """Return a list of tuples (companyID, {companyData})"""
- # XXX: for the real implementation, see the method of the
- # subclass, somewhere under the imdb.parser package.
- raise NotImplementedError, 'override this method'
-
- def search_company(self, name, results=None):
- """Return a list of Company objects for a query for the given name.
-
- The results argument is the maximum number of results to return."""
- if results is None:
- results = self._results
- try:
- results = int(results)
- except (ValueError, OverflowError):
- results = 20
- if not isinstance(name, unicode):
- name = unicode(name, encoding, 'replace')
- res = self._search_company(name, results)
- return [Company.Company(companyID=self._get_real_companyID(pi),
- data=pd, modFunct=self._defModFunct,
- accessSystem=self.accessSystem) for pi, pd in res][:results]
-
- def _search_keyword(self, keyword, results):
- """Return a list of 'keyword' strings."""
- # XXX: for the real implementation, see the method of the
- # subclass, somewhere under the imdb.parser package.
- raise NotImplementedError, 'override this method'
-
- def search_keyword(self, keyword, results=None):
- """Search for existing keywords, similar to the given one."""
- if results is None:
- results = self._keywordsResults
- try:
- results = int(results)
- except (ValueError, OverflowError):
- results = 100
- if not isinstance(keyword, unicode):
- keyword = unicode(keyword, encoding, 'replace')
- return self._search_keyword(keyword, results)
-
- def _get_keyword(self, keyword, results):
- """Return a list of tuples (movieID, {movieData})"""
- # XXX: for the real implementation, see the method of the
- # subclass, somewhere under the imdb.parser package.
- raise NotImplementedError, 'override this method'
-
- def get_keyword(self, keyword, results=None):
- """Return a list of movies for the given keyword."""
- if results is None:
- results = self._keywordsResults
- try:
- results = int(results)
- except (ValueError, OverflowError):
- results = 100
- # XXX: I suppose it will be much safer if the user provides
- # an unicode string... this is just a guess.
- if not isinstance(keyword, unicode):
- keyword = unicode(keyword, encoding, 'replace')
- res = self._get_keyword(keyword, results)
- return [Movie.Movie(movieID=self._get_real_movieID(mi),
- data=md, modFunct=self._defModFunct,
- accessSystem=self.accessSystem) for mi, md in res][:results]
-
- def _get_top_bottom_movies(self, kind):
- """Return the list of the top 250 or bottom 100 movies."""
- # XXX: for the real implementation, see the method of the
- # subclass, somewhere under the imdb.parser package.
- # This method must return a list of (movieID, {movieDict})
- # tuples. The kind parameter can be 'top' or 'bottom'.
- raise NotImplementedError, 'override this method'
-
- def get_top250_movies(self):
- """Return the list of the top 250 movies."""
- res = self._get_top_bottom_movies('top')
- return [Movie.Movie(movieID=self._get_real_movieID(mi),
- data=md, modFunct=self._defModFunct,
- accessSystem=self.accessSystem) for mi, md in res]
-
- def get_bottom100_movies(self):
- """Return the list of the bottom 100 movies."""
- res = self._get_top_bottom_movies('bottom')
- return [Movie.Movie(movieID=self._get_real_movieID(mi),
- data=md, modFunct=self._defModFunct,
- accessSystem=self.accessSystem) for mi, md in res]
-
- def new_movie(self, *arguments, **keywords):
- """Return a Movie object."""
- # XXX: not really useful...
- if 'title' in keywords:
- if not isinstance(keywords['title'], unicode):
- keywords['title'] = unicode(keywords['title'],
- encoding, 'replace')
- elif len(arguments) > 1:
- if not isinstance(arguments[1], unicode):
- arguments[1] = unicode(arguments[1], encoding, 'replace')
- return Movie.Movie(accessSystem=self.accessSystem,
- *arguments, **keywords)
-
- def new_person(self, *arguments, **keywords):
- """Return a Person object."""
- # XXX: not really useful...
- if 'name' in keywords:
- if not isinstance(keywords['name'], unicode):
- keywords['name'] = unicode(keywords['name'],
- encoding, 'replace')
- elif len(arguments) > 1:
- if not isinstance(arguments[1], unicode):
- arguments[1] = unicode(arguments[1], encoding, 'replace')
- return Person.Person(accessSystem=self.accessSystem,
- *arguments, **keywords)
-
- def new_character(self, *arguments, **keywords):
- """Return a Character object."""
- # XXX: not really useful...
- if 'name' in keywords:
- if not isinstance(keywords['name'], unicode):
- keywords['name'] = unicode(keywords['name'],
- encoding, 'replace')
- elif len(arguments) > 1:
- if not isinstance(arguments[1], unicode):
- arguments[1] = unicode(arguments[1], encoding, 'replace')
- return Character.Character(accessSystem=self.accessSystem,
- *arguments, **keywords)
-
- def new_company(self, *arguments, **keywords):
- """Return a Company object."""
- # XXX: not really useful...
- if 'name' in keywords:
- if not isinstance(keywords['name'], unicode):
- keywords['name'] = unicode(keywords['name'],
- encoding, 'replace')
- elif len(arguments) > 1:
- if not isinstance(arguments[1], unicode):
- arguments[1] = unicode(arguments[1], encoding, 'replace')
- return Company.Company(accessSystem=self.accessSystem,
- *arguments, **keywords)
-
- def update(self, mop, info=None, override=0):
- """Given a Movie, Person, Character or Company object with only
- partial information, retrieve the required set of information.
-
- info is the list of sets of information to retrieve.
-
- If override is set, the information are retrieved and updated
- even if they're already in the object."""
- # XXX: should this be a method of the Movie/Person/Character/Company
- # classes? NO! What for instances created by external functions?
- mopID = None
- prefix = ''
- if isinstance(mop, Movie.Movie):
- mopID = mop.movieID
- prefix = 'movie'
- elif isinstance(mop, Person.Person):
- mopID = mop.personID
- prefix = 'person'
- elif isinstance(mop, Character.Character):
- mopID = mop.characterID
- prefix = 'character'
- elif isinstance(mop, Company.Company):
- mopID = mop.companyID
- prefix = 'company'
- else:
- raise IMDbError, 'object ' + repr(mop) + \
- ' is not a Movie, Person, Character or Company instance'
- if mopID is None:
- # XXX: enough? It's obvious that there are Characters
- # objects without characterID, so I think they should
- # just do nothing, when an i.update(character) is tried.
- if prefix == 'character':
- return
- raise IMDbDataAccessError, \
- 'the supplied object has null movieID, personID or companyID'
- if mop.accessSystem == self.accessSystem:
- aSystem = self
- else:
- aSystem = IMDb(mop.accessSystem)
- if info is None:
- info = mop.default_info
- elif info == 'all':
- if isinstance(mop, Movie.Movie):
- info = self.get_movie_infoset()
- elif isinstance(mop, Person.Person):
- info = self.get_person_infoset()
- elif isinstance(mop, Character.Character):
- info = self.get_character_infoset()
- else:
- info = self.get_company_infoset()
- if not isinstance(info, (tuple, list)):
- info = (info,)
- res = {}
- for i in info:
- if i in mop.current_info and not override:
- continue
- if not i:
- continue
- self._imdb_logger.debug('retrieving "%s" info set', i)
- try:
- method = getattr(aSystem, 'get_%s_%s' %
- (prefix, i.replace(' ', '_')))
- except AttributeError:
- self._imdb_logger.error('unknown information set "%s"', i)
- # Keeps going.
- method = lambda *x: {}
- try:
- ret = method(mopID)
- except Exception, e:
- self._imdb_logger.critical('caught an exception retrieving ' \
- 'or parsing "%s" info set for mopID ' \
- '"%s" (accessSystem: %s)',
- i, mopID, mop.accessSystem, exc_info=True)
- ret = {}
- keys = None
- if 'data' in ret:
- res.update(ret['data'])
- if isinstance(ret['data'], dict):
- keys = ret['data'].keys()
- if 'info sets' in ret:
- for ri in ret['info sets']:
- mop.add_to_current_info(ri, keys, mainInfoset=i)
- else:
- mop.add_to_current_info(i, keys)
- if 'titlesRefs' in ret:
- mop.update_titlesRefs(ret['titlesRefs'])
- if 'namesRefs' in ret:
- mop.update_namesRefs(ret['namesRefs'])
- if 'charactersRefs' in ret:
- mop.update_charactersRefs(ret['charactersRefs'])
- mop.set_data(res, override=0)
-
- def get_imdbMovieID(self, movieID):
- """Translate a movieID in an imdbID (the ID used by the IMDb
- web server); must be overridden by the subclass."""
- # XXX: for the real implementation, see the method of the
- # subclass, somewhere under the imdb.parser package.
- raise NotImplementedError, 'override this method'
-
- def get_imdbPersonID(self, personID):
- """Translate a personID in a imdbID (the ID used by the IMDb
- web server); must be overridden by the subclass."""
- # XXX: for the real implementation, see the method of the
- # subclass, somewhere under the imdb.parser package.
- raise NotImplementedError, 'override this method'
-
- def get_imdbCharacterID(self, characterID):
- """Translate a characterID in a imdbID (the ID used by the IMDb
- web server); must be overridden by the subclass."""
- # XXX: for the real implementation, see the method of the
- # subclass, somewhere under the imdb.parser package.
- raise NotImplementedError, 'override this method'
-
- def get_imdbCompanyID(self, companyID):
- """Translate a companyID in a imdbID (the ID used by the IMDb
- web server); must be overridden by the subclass."""
- # XXX: for the real implementation, see the method of the
- # subclass, somewhere under the imdb.parser package.
- raise NotImplementedError, 'override this method'
-
- def _searchIMDb(self, kind, ton):
- """Search the IMDb akas server for the given title or name."""
- # The Exact Primary search system has gone AWOL, so we resort
- # to the mobile search. :-/
- if not ton:
- return None
- aSystem = IMDb('mobile')
- if kind == 'tt':
- searchFunct = aSystem.search_movie
- check = 'long imdb canonical title'
- elif kind == 'nm':
- searchFunct = aSystem.search_person
- check = 'long imdb canonical name'
- elif kind == 'char':
- searchFunct = aSystem.search_character
- check = 'long imdb canonical name'
- elif kind == 'co':
- # XXX: are [COUNTRY] codes included in the results?
- searchFunct = aSystem.search_company
- check = 'long imdb name'
- try:
- searchRes = searchFunct(ton)
- except IMDbError:
- return None
- # When only one result is returned, assume it was from an
- # exact match.
- if len(searchRes) == 1:
- return searchRes[0].getID()
- for item in searchRes:
- # Return the first perfect match.
- if item[check] == ton:
- return item.getID()
- return None
-
- def title2imdbID(self, title):
- """Translate a movie title (in the plain text data files format)
- to an imdbID.
- Try an Exact Primary Title search on IMDb;
- return None if it's unable to get the imdbID."""
- return self._searchIMDb('tt', title)
-
- def name2imdbID(self, name):
- """Translate a person name in an imdbID.
- Try an Exact Primary Name search on IMDb;
- return None if it's unable to get the imdbID."""
- return self._searchIMDb('tt', name)
-
- def character2imdbID(self, name):
- """Translate a character name in an imdbID.
- Try an Exact Primary Name search on IMDb;
- return None if it's unable to get the imdbID."""
- return self._searchIMDb('char', name)
-
- def company2imdbID(self, name):
- """Translate a company name in an imdbID.
- Try an Exact Primary Name search on IMDb;
- return None if it's unable to get the imdbID."""
- return self._searchIMDb('co', name)
-
- def get_imdbID(self, mop):
- """Return the imdbID for the given Movie, Person, Character or Company
- object."""
- imdbID = None
- if mop.accessSystem == self.accessSystem:
- aSystem = self
- else:
- aSystem = IMDb(mop.accessSystem)
- if isinstance(mop, Movie.Movie):
- if mop.movieID is not None:
- imdbID = aSystem.get_imdbMovieID(mop.movieID)
- else:
- imdbID = aSystem.title2imdbID(build_title(mop, canonical=0,
- ptdf=1))
- elif isinstance(mop, Person.Person):
- if mop.personID is not None:
- imdbID = aSystem.get_imdbPersonID(mop.personID)
- else:
- imdbID = aSystem.name2imdbID(build_name(mop, canonical=1))
- elif isinstance(mop, Character.Character):
- if mop.characterID is not None:
- imdbID = aSystem.get_imdbCharacterID(mop.characterID)
- else:
- # canonical=0 ?
- imdbID = aSystem.character2imdbID(build_name(mop, canonical=1))
- elif isinstance(mop, Company.Company):
- if mop.companyID is not None:
- imdbID = aSystem.get_imdbCompanyID(mop.companyID)
- else:
- imdbID = aSystem.company2imdbID(build_company_name(mop))
- else:
- raise IMDbError, 'object ' + repr(mop) + \
- ' is not a Movie, Person or Character instance'
- return imdbID
-
- def get_imdbURL(self, mop):
- """Return the main IMDb URL for the given Movie, Person,
- Character or Company object, or None if unable to get it."""
- imdbID = self.get_imdbID(mop)
- if imdbID is None:
- return None
- if isinstance(mop, Movie.Movie):
- url_firstPart = imdbURL_movie_main
- elif isinstance(mop, Person.Person):
- url_firstPart = imdbURL_person_main
- elif isinstance(mop, Character.Character):
- url_firstPart = imdbURL_character_main
- elif isinstance(mop, Company.Company):
- url_firstPart = imdbURL_company_main
- else:
- raise IMDbError, 'object ' + repr(mop) + \
- ' is not a Movie, Person, Character or Company instance'
- return url_firstPart % imdbID
-
- def get_special_methods(self):
- """Return the special methods defined by the subclass."""
- sm_dict = {}
- base_methods = []
- for name in dir(IMDbBase):
- member = getattr(IMDbBase, name)
- if isinstance(member, MethodType):
- base_methods.append(name)
- for name in dir(self.__class__):
- if name.startswith('_') or name in base_methods or \
- name.startswith('get_movie_') or \
- name.startswith('get_person_') or \
- name.startswith('get_company_') or \
- name.startswith('get_character_'):
- continue
- member = getattr(self.__class__, name)
- if isinstance(member, MethodType):
- sm_dict.update({name: member.__doc__})
- return sm_dict
-
diff --git a/libs/imdb/_compat.py b/libs/imdb/_compat.py
deleted file mode 100644
index 73a4dd1b..00000000
--- a/libs/imdb/_compat.py
+++ /dev/null
@@ -1,72 +0,0 @@
-"""
-_compat module (imdb package).
-
-This module provides compatibility functions used by the imdb package
-to deal with unusual environments.
-
-Copyright 2008-2010 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-# TODO: now we're heavily using the 'logging' module, which was not
-# present in Python 2.2. To work in a Symbian environment, we
-# need to create a fake 'logging' module (its functions may call
-# the 'warnings' module, or do nothing at all).
-
-
-import os
-# If true, we're working on a Symbian device.
-if os.name == 'e32':
- # Replace os.path.expandvars and os.path.expanduser, if needed.
- def _noact(x):
- """Ad-hoc replacement for IMDbPY."""
- return x
- try:
- os.path.expandvars
- except AttributeError:
- os.path.expandvars = _noact
- try:
- os.path.expanduser
- except AttributeError:
- os.path.expanduser = _noact
-
- # time.strptime is missing, on Symbian devices.
- import time
- try:
- time.strptime
- except AttributeError:
- import re
- _re_web_time = re.compile(r'Episode dated (\d+) (\w+) (\d+)')
- _re_ptdf_time = re.compile(r'\((\d+)-(\d+)-(\d+)\)')
- _month2digit = {'January': '1', 'February': '2', 'March': '3',
- 'April': '4', 'May': '5', 'June': '6', 'July': '7',
- 'August': '8', 'September': '9', 'October': '10',
- 'November': '11', 'December': '12'}
- def strptime(s, format):
- """Ad-hoc strptime replacement for IMDbPY."""
- try:
- if format.startswith('Episode'):
- res = _re_web_time.findall(s)[0]
- return (int(res[2]), int(_month2digit[res[1]]), int(res[0]),
- 0, 0, 0, 0, 1, 0)
- else:
- res = _re_ptdf_time.findall(s)[0]
- return (int(res[0]), int(res[1]), int(res[2]),
- 0, 0, 0, 0, 1, 0)
- except:
- raise ValueError, u'error in IMDbPY\'s ad-hoc strptime!'
- time.strptime = strptime
-
diff --git a/libs/imdb/_exceptions.py b/libs/imdb/_exceptions.py
deleted file mode 100644
index 436d01a7..00000000
--- a/libs/imdb/_exceptions.py
+++ /dev/null
@@ -1,46 +0,0 @@
-"""
-_exceptions module (imdb package).
-
-This module provides the exception hierarchy used by the imdb package.
-
-Copyright 2004-2009 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-import logging
-
-
-class IMDbError(Exception, object):
- """Base class for every exception raised by the imdb package."""
- _logger = logging.getLogger('imdbpy')
-
- def __init__(self, *args, **kwargs):
- """Initialize the exception and pass the message to the log system."""
- # Every raised exception also dispatch a critical log.
- self._logger.critical('%s exception raised; args: %s; kwds: %s',
- self.__class__.__name__, args, kwargs,
- exc_info=True)
- super(IMDbError, self).__init__(*args, **kwargs)
-
-class IMDbDataAccessError(IMDbError):
- """Exception raised when is not possible to access needed data."""
- pass
-
-class IMDbParserError(IMDbError):
- """Exception raised when an error occurred parsing the data."""
- pass
-
-
diff --git a/libs/imdb/_logging.py b/libs/imdb/_logging.py
deleted file mode 100644
index 2b8a286a..00000000
--- a/libs/imdb/_logging.py
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
-_logging module (imdb package).
-
-This module provides the logging facilities used by the imdb package.
-
-Copyright 2009-2010 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-import logging
-
-LEVELS = {'debug': logging.DEBUG,
- 'info': logging.INFO,
- 'warn': logging.WARNING,
- 'warning': logging.WARNING,
- 'error': logging.ERROR,
- 'critical': logging.CRITICAL}
-
-
-imdbpyLogger = logging.getLogger('imdbpy')
-imdbpyStreamHandler = logging.StreamHandler()
-imdbpyFormatter = logging.Formatter('%(asctime)s %(levelname)s [%(name)s]' \
- ' %(pathname)s:%(lineno)d: %(message)s')
-imdbpyStreamHandler.setFormatter(imdbpyFormatter)
-imdbpyLogger.addHandler(imdbpyStreamHandler)
-
-def setLevel(level):
- """Set logging level for the main logger."""
- level = level.lower().strip()
- imdbpyLogger.setLevel(LEVELS.get(level, logging.NOTSET))
- imdbpyLogger.log(imdbpyLogger.level, 'set logging threshold to "%s"',
- logging.getLevelName(imdbpyLogger.level))
-
-
-#imdbpyLogger.setLevel(logging.DEBUG)
-
-
-# It can be an idea to have a single function to log and warn:
-#import warnings
-#def log_and_warn(msg, args=None, logger=None, level=None):
-# """Log the message and issue a warning."""
-# if logger is None:
-# logger = imdbpyLogger
-# if level is None:
-# level = logging.WARNING
-# if args is None:
-# args = ()
-# #warnings.warn(msg % args, stacklevel=0)
-# logger.log(level, msg % args)
-
diff --git a/libs/imdb/articles.py b/libs/imdb/articles.py
deleted file mode 100644
index 73ac6901..00000000
--- a/libs/imdb/articles.py
+++ /dev/null
@@ -1,142 +0,0 @@
-"""
-articles module (imdb package).
-
-This module provides functions and data to handle in a smart way
-articles (in various languages) at the beginning of movie titles.
-
-Copyright 2009 Davide Alberani
- 2009 H. Turgut Uyar
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-# List of generic articles used when the language of the title is unknown (or
-# we don't have information about articles in that language).
-# XXX: Managing titles in a lot of different languages, a function to recognize
-# an initial article can't be perfect; sometimes we'll stumble upon a short
-# word that is an article in some language, but it's not in another; in these
-# situations we have to choose if we want to interpret this little word
-# as an article or not (remember that we don't know what the original language
-# of the title was).
-# Example: 'en' is (I suppose) an article in Some Language. Unfortunately it
-# seems also to be a preposition in other languages (French?).
-# Running a script over the whole list of titles (and aliases), I've found
-# that 'en' is used as an article only 376 times, and as another thing 594
-# times, so I've decided to _always_ consider 'en' as a non article.
-#
-# Here is a list of words that are _never_ considered as articles, complete
-# with the cound of times they are used in a way or another:
-# 'en' (376 vs 594), 'to' (399 vs 727), 'as' (198 vs 276), 'et' (79 vs 99),
-# 'des' (75 vs 150), 'al' (78 vs 304), 'ye' (14 vs 70),
-# 'da' (23 vs 298), "'n" (8 vs 12)
-#
-# I've left in the list 'i' (1939 vs 2151) and 'uno' (52 vs 56)
-# I'm not sure what '-al' is, and so I've left it out...
-#
-# Generic list of articles in utf-8 encoding:
-GENERIC_ARTICLES = ('the', 'la', 'a', 'die', 'der', 'le', 'el',
- "l'", 'il', 'das', 'les', 'i', 'o', 'ein', 'un', 'de', 'los',
- 'an', 'una', 'las', 'eine', 'den', 'het', 'gli', 'lo', 'os',
- 'ang', 'oi', 'az', 'een', 'ha-', 'det', 'ta', 'al-',
- 'mga', "un'", 'uno', 'ett', 'dem', 'egy', 'els', 'eines',
- '\xc3\x8f', '\xc3\x87', '\xc3\x94\xc3\xaf', '\xc3\x8f\xc3\xa9')
-
-
-# Lists of articles separated by language. If possible, the list should
-# be sorted by frequency (not very important, but...)
-# If you want to add a list of articles for another language, mail it
-# it at imdbpy-devel@lists.sourceforge.net; non-ascii articles must be utf-8
-# encoded.
-LANG_ARTICLES = {
- 'English': ('the', 'a', 'an'),
- 'Italian': ('la', 'le', "l'", 'il', 'i', 'un', 'una', 'gli', 'lo', "un'",
- 'uno'),
- 'Spanish': ('la', 'le', 'el', 'les', 'un', 'los', 'una', 'uno', 'unos',
- 'unas'),
- 'Portuguese': ('a', 'as', 'o', 'os', 'um', 'uns', 'uma', 'umas'),
- 'Turkish': (), # Some languages doesn't have articles.
-}
-LANG_ARTICLESget = LANG_ARTICLES.get
-
-
-# Maps a language to countries where it is the main language.
-# If you want to add an entry for another language or country, mail it at
-# imdbpy-devel@lists.sourceforge.net .
-_LANG_COUNTRIES = {
- 'English': ('USA', 'UK', 'Canada', 'Ireland', 'Australia'),
- 'Italian': ('Italy',),
- 'Spanish': ('Spain', 'Mexico'),
- 'Portuguese': ('Portugal', 'Brazil'),
- 'Turkish': ('Turkey',),
- #'German': ('Germany', 'East Germany', 'West Germany'),
- #'French': ('France'),
-}
-
-# Maps countries to their main language.
-COUNTRY_LANG = {}
-for lang in _LANG_COUNTRIES:
- for country in _LANG_COUNTRIES[lang]:
- COUNTRY_LANG[country] = lang
-
-
-def toUnicode(articles):
- """Convert a list of articles utf-8 encoded to unicode strings."""
- return tuple([art.decode('utf_8') for art in articles])
-
-
-def toDicts(articles):
- """Given a list of utf-8 encoded articles, build two dictionary (one
- utf-8 encoded and another one with unicode keys) for faster matches."""
- uArticles = toUnicode(articles)
- return dict([(x, x) for x in articles]), dict([(x, x) for x in uArticles])
-
-
-def addTrailingSpace(articles):
- """From the given list of utf-8 encoded articles, return two
- lists (one utf-8 encoded and another one in unicode) where a space
- is added at the end - if the last char is not ' or -."""
- _spArticles = []
- _spUnicodeArticles = []
- for article in articles:
- if article[-1] not in ("'", '-'):
- article += ' '
- _spArticles.append(article)
- _spUnicodeArticles.append(article.decode('utf_8'))
- return _spArticles, _spUnicodeArticles
-
-
-# Caches.
-_ART_CACHE = {}
-_SP_ART_CACHE = {}
-
-def articlesDictsForLang(lang):
- """Return dictionaries of articles specific for the given language, or the
- default one if the language is not known."""
- if lang in _ART_CACHE:
- return _ART_CACHE[lang]
- artDicts = toDicts(LANG_ARTICLESget(lang, GENERIC_ARTICLES))
- _ART_CACHE[lang] = artDicts
- return artDicts
-
-
-def spArticlesForLang(lang):
- """Return lists of articles (plus optional spaces) specific for the
- given language, or the default one if the language is not known."""
- if lang in _SP_ART_CACHE:
- return _SP_ART_CACHE[lang]
- spArticles = addTrailingSpace(LANG_ARTICLESget(lang, GENERIC_ARTICLES))
- _SP_ART_CACHE[lang] = spArticles
- return spArticles
-
diff --git a/libs/imdb/helpers.py b/libs/imdb/helpers.py
deleted file mode 100644
index 2ca53068..00000000
--- a/libs/imdb/helpers.py
+++ /dev/null
@@ -1,548 +0,0 @@
-"""
-helpers module (imdb package).
-
-This module provides functions not used directly by the imdb package,
-but useful for IMDbPY-based programs.
-
-Copyright 2006-2010 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-# XXX: find better names for the functions in this modules.
-
-import re
-from cgi import escape
-import gettext
-from gettext import gettext as _
-gettext.textdomain('imdbpy')
-
-# The modClearRefs can be used to strip names and titles references from
-# the strings in Movie and Person objects.
-from imdb.utils import modClearRefs, re_titleRef, re_nameRef, \
- re_characterRef, _tagAttr, _Container, TAGS_TO_MODIFY
-from imdb import IMDb, imdbURL_movie_base, imdbURL_person_base, \
- imdbURL_character_base
-import imdb.locale
-from imdb.Movie import Movie
-from imdb.Person import Person
-from imdb.Character import Character
-from imdb.Company import Company
-from imdb.parser.http.utils import re_entcharrefssub, entcharrefs, \
- subXMLRefs, subSGMLRefs
-from imdb.parser.http.bsouplxml.etree import BeautifulSoup
-
-
-# An URL, more or less.
-_re_href = re.compile(r'(http://.+?)(?=\s|$)', re.I)
-_re_hrefsub = _re_href.sub
-
-
-def makeCgiPrintEncoding(encoding):
- """Make a function to pretty-print strings for the web."""
- def cgiPrint(s):
- """Encode the given string using the %s encoding, and replace
- chars outside the given charset with XML char references.""" % encoding
- s = escape(s, quote=1)
- if isinstance(s, unicode):
- s = s.encode(encoding, 'xmlcharrefreplace')
- return s
- return cgiPrint
-
-# cgiPrint uses the latin_1 encoding.
-cgiPrint = makeCgiPrintEncoding('latin_1')
-
-# Regular expression for %(varname)s substitutions.
-re_subst = re.compile(r'%\((.+?)\)s')
-# Regular expression for .... clauses.
-re_conditional = re.compile(r'(.+?)')
-
-
-def makeTextNotes(replaceTxtNotes):
- """Create a function useful to handle text[::optional_note] values.
- replaceTxtNotes is a format string, which can include the following
- values: %(text)s and %(notes)s.
- Portions of the text can be conditionally excluded, if one of the
- values is absent. E.g.: [%(notes)s] will be replaced
- with '[notes]' if notes exists, or by an empty string otherwise.
- The returned function is suitable be passed as applyToValues argument
- of the makeObject2Txt function."""
- def _replacer(s):
- outS = replaceTxtNotes
- if not isinstance(s, (unicode, str)):
- return s
- ssplit = s.split('::', 1)
- text = ssplit[0]
- # Used to keep track of text and note existence.
- keysDict = {}
- if text:
- keysDict['text'] = True
- outS = outS.replace('%(text)s', text)
- if len(ssplit) == 2:
- keysDict['notes'] = True
- outS = outS.replace('%(notes)s', ssplit[1])
- else:
- outS = outS.replace('%(notes)s', u'')
- def _excludeFalseConditionals(matchobj):
- # Return an empty string if the conditional is false/empty.
- if matchobj.group(1) in keysDict:
- return matchobj.group(2)
- return u''
- while re_conditional.search(outS):
- outS = re_conditional.sub(_excludeFalseConditionals, outS)
- return outS
- return _replacer
-
-
-def makeObject2Txt(movieTxt=None, personTxt=None, characterTxt=None,
- companyTxt=None, joiner=' / ',
- applyToValues=lambda x: x, _recurse=True):
- """"Return a function useful to pretty-print Movie, Person,
- Character and Company instances.
-
- *movieTxt* -- how to format a Movie object.
- *personTxt* -- how to format a Person object.
- *characterTxt* -- how to format a Character object.
- *companyTxt* -- how to format a Company object.
- *joiner* -- string used to join a list of objects.
- *applyToValues* -- function to apply to values.
- *_recurse* -- if True (default) manage only the given object.
- """
- # Some useful defaults.
- if movieTxt is None:
- movieTxt = '%(long imdb title)s'
- if personTxt is None:
- personTxt = '%(long imdb name)s'
- if characterTxt is None:
- characterTxt = '%(long imdb name)s'
- if companyTxt is None:
- companyTxt = '%(long imdb name)s'
- def object2txt(obj, _limitRecursion=None):
- """Pretty-print objects."""
- # Prevent unlimited recursion.
- if _limitRecursion is None:
- _limitRecursion = 0
- elif _limitRecursion > 5:
- return u''
- _limitRecursion += 1
- if isinstance(obj, (list, tuple)):
- return joiner.join([object2txt(o, _limitRecursion=_limitRecursion)
- for o in obj])
- elif isinstance(obj, dict):
- # XXX: not exactly nice, neither useful, I fear.
- return joiner.join([u'%s::%s' %
- (object2txt(k, _limitRecursion=_limitRecursion),
- object2txt(v, _limitRecursion=_limitRecursion))
- for k, v in obj.items()])
- objData = {}
- if isinstance(obj, Movie):
- objData['movieID'] = obj.movieID
- outs = movieTxt
- elif isinstance(obj, Person):
- objData['personID'] = obj.personID
- outs = personTxt
- elif isinstance(obj, Character):
- objData['characterID'] = obj.characterID
- outs = characterTxt
- elif isinstance(obj, Company):
- objData['companyID'] = obj.companyID
- outs = companyTxt
- else:
- return obj
- def _excludeFalseConditionals(matchobj):
- # Return an empty string if the conditional is false/empty.
- condition = matchobj.group(1)
- proceed = obj.get(condition) or getattr(obj, condition, None)
- if proceed:
- return matchobj.group(2)
- else:
- return u''
- return matchobj.group(2)
- while re_conditional.search(outs):
- outs = re_conditional.sub(_excludeFalseConditionals, outs)
- for key in re_subst.findall(outs):
- value = obj.get(key) or getattr(obj, key, None)
- if not isinstance(value, (unicode, str)):
- if not _recurse:
- if value:
- value = unicode(value)
- if value:
- value = object2txt(value, _limitRecursion=_limitRecursion)
- elif value:
- value = applyToValues(unicode(value))
- if not value:
- value = u''
- elif not isinstance(value, (unicode, str)):
- value = unicode(value)
- outs = outs.replace(u'%(' + key + u')s', value)
- return outs
- return object2txt
-
-
-def makeModCGILinks(movieTxt, personTxt, characterTxt=None,
- encoding='latin_1'):
- """Make a function used to pretty-print movies and persons refereces;
- movieTxt and personTxt are the strings used for the substitutions.
- movieTxt must contains %(movieID)s and %(title)s, while personTxt
- must contains %(personID)s and %(name)s and characterTxt %(characterID)s
- and %(name)s; characterTxt is optional, for backward compatibility."""
- _cgiPrint = makeCgiPrintEncoding(encoding)
- def modCGILinks(s, titlesRefs, namesRefs, characterRefs=None):
- """Substitute movies and persons references."""
- if characterRefs is None: characterRefs = {}
- # XXX: look ma'... more nested scopes!
- def _replaceMovie(match):
- to_replace = match.group(1)
- item = titlesRefs.get(to_replace)
- if item:
- movieID = item.movieID
- to_replace = movieTxt % {'movieID': movieID,
- 'title': unicode(_cgiPrint(to_replace),
- encoding,
- 'xmlcharrefreplace')}
- return to_replace
- def _replacePerson(match):
- to_replace = match.group(1)
- item = namesRefs.get(to_replace)
- if item:
- personID = item.personID
- to_replace = personTxt % {'personID': personID,
- 'name': unicode(_cgiPrint(to_replace),
- encoding,
- 'xmlcharrefreplace')}
- return to_replace
- def _replaceCharacter(match):
- to_replace = match.group(1)
- if characterTxt is None:
- return to_replace
- item = characterRefs.get(to_replace)
- if item:
- characterID = item.characterID
- if characterID is None:
- return to_replace
- to_replace = characterTxt % {'characterID': characterID,
- 'name': unicode(_cgiPrint(to_replace),
- encoding,
- 'xmlcharrefreplace')}
- return to_replace
- s = s.replace('<', '<').replace('>', '>')
- s = _re_hrefsub(r'\1', s)
- s = re_titleRef.sub(_replaceMovie, s)
- s = re_nameRef.sub(_replacePerson, s)
- s = re_characterRef.sub(_replaceCharacter, s)
- return s
- modCGILinks.movieTxt = movieTxt
- modCGILinks.personTxt = personTxt
- modCGILinks.characterTxt = characterTxt
- return modCGILinks
-
-# links to the imdb.com web site.
-_movieTxt = '%(title)s'
-_personTxt = '%(name)s'
-_characterTxt = '%(name)s'
-modHtmlLinks = makeModCGILinks(movieTxt=_movieTxt, personTxt=_personTxt,
- characterTxt=_characterTxt)
-modHtmlLinksASCII = makeModCGILinks(movieTxt=_movieTxt, personTxt=_personTxt,
- characterTxt=_characterTxt,
- encoding='ascii')
-
-
-everyentcharrefs = entcharrefs.copy()
-for k, v in {'lt':u'<','gt':u'>','amp':u'&','quot':u'"','apos':u'\''}.items():
- everyentcharrefs[k] = v
- everyentcharrefs['#%s' % ord(v)] = v
-everyentcharrefsget = everyentcharrefs.get
-re_everyentcharrefs = re.compile('&(%s|\#160|\#\d{1,5});' %
- '|'.join(map(re.escape, everyentcharrefs)))
-re_everyentcharrefssub = re_everyentcharrefs.sub
-
-def _replAllXMLRef(match):
- """Replace the matched XML reference."""
- ref = match.group(1)
- value = everyentcharrefsget(ref)
- if value is None:
- if ref[0] == '#':
- return unichr(int(ref[1:]))
- else:
- return ref
- return value
-
-def subXMLHTMLSGMLRefs(s):
- """Return the given string with XML/HTML/SGML entity and char references
- replaced."""
- return re_everyentcharrefssub(_replAllXMLRef, s)
-
-
-def sortedSeasons(m):
- """Return a sorted list of seasons of the given series."""
- seasons = m.get('episodes', {}).keys()
- seasons.sort()
- return seasons
-
-
-def sortedEpisodes(m, season=None):
- """Return a sorted list of episodes of the given series,
- considering only the specified season(s) (every season, if None)."""
- episodes = []
- seasons = season
- if season is None:
- seasons = sortedSeasons(m)
- else:
- if not isinstance(season, (tuple, list)):
- seasons = [season]
- for s in seasons:
- eps_indx = m.get('episodes', {}).get(s, {}).keys()
- eps_indx.sort()
- for e in eps_indx:
- episodes.append(m['episodes'][s][e])
- return episodes
-
-
-# Idea and portions of the code courtesy of none none (dclist at gmail.com)
-_re_imdbIDurl = re.compile(r'\b(nm|tt|ch|co)([0-9]{7})\b')
-def get_byURL(url, info=None, args=None, kwds=None):
- """Return a Movie, Person, Character or Company object for the given URL;
- info is the info set to retrieve, args and kwds are respectively a list
- and a dictionary or arguments to initialize the data access system.
- Returns None if unable to correctly parse the url; can raise
- exceptions if unable to retrieve the data."""
- if args is None: args = []
- if kwds is None: kwds = {}
- ia = IMDb(*args, **kwds)
- match = _re_imdbIDurl.search(url)
- if not match:
- return None
- imdbtype = match.group(1)
- imdbID = match.group(2)
- if imdbtype == 'tt':
- return ia.get_movie(imdbID, info=info)
- elif imdbtype == 'nm':
- return ia.get_person(imdbID, info=info)
- elif imdbtype == 'ch':
- return ia.get_character(imdbID, info=info)
- elif imdbtype == 'co':
- return ia.get_company(imdbID, info=info)
- return None
-
-
-# Idea and portions of code courtesy of Basil Shubin.
-# Beware that these information are now available directly by
-# the Movie/Person/Character instances.
-def fullSizeCoverURL(obj):
- """Given an URL string or a Movie, Person or Character instance,
- returns an URL to the full-size version of the cover/headshot,
- or None otherwise. This function is obsolete: the same information
- are available as keys: 'full-size cover url' and 'full-size headshot',
- respectively for movies and persons/characters."""
- if isinstance(obj, Movie):
- coverUrl = obj.get('cover url')
- elif isinstance(obj, (Person, Character)):
- coverUrl = obj.get('headshot')
- else:
- coverUrl = obj
- if not coverUrl:
- return None
- return _Container._re_fullsizeURL.sub('', coverUrl)
-
-
-def keyToXML(key):
- """Return a key (the ones used to access information in Movie and
- other classes instances) converted to the style of the XML output."""
- return _tagAttr(key, '')[0]
-
-
-def translateKey(key):
- """Translate a given key."""
- return _(keyToXML(key))
-
-
-# Maps tags to classes.
-_MAP_TOP_OBJ = {
- 'person': Person,
- 'movie': Movie,
- 'character': Character,
- 'company': Company
-}
-
-# Tags to be converted to lists.
-_TAGS_TO_LIST = dict([(x[0], None) for x in TAGS_TO_MODIFY.values()])
-_TAGS_TO_LIST.update(_MAP_TOP_OBJ)
-
-def tagToKey(tag):
- """Return the name of the tag, taking it from the 'key' attribute,
- if present."""
- keyAttr = tag.get('key')
- if keyAttr:
- if tag.get('keytype') == 'int':
- keyAttr = int(keyAttr)
- return keyAttr
- return tag.name
-
-
-def _valueWithType(tag, tagValue):
- """Return tagValue, handling some type conversions."""
- tagType = tag.get('type')
- if tagType == 'int':
- tagValue = int(tagValue)
- elif tagType == 'float':
- tagValue = float(tagValue)
- return tagValue
-
-
-# Extra tags to get (if values were not already read from title/name).
-_titleTags = ('imdbindex', 'kind', 'year')
-_nameTags = ('imdbindex')
-_companyTags = ('imdbindex', 'country')
-
-def parseTags(tag, _topLevel=True, _as=None, _infoset2keys=None,
- _key2infoset=None):
- """Recursively parse a tree of tags."""
- # The returned object (usually a _Container subclass, but it can
- # be a string, an int, a float, a list or a dictionary).
- item = None
- if _infoset2keys is None:
- _infoset2keys = {}
- if _key2infoset is None:
- _key2infoset = {}
- name = tagToKey(tag)
- firstChild = tag.find(recursive=False)
- tagStr = (tag.string or u'').strip()
- if not tagStr and name == 'item':
- # Handles 'item' tags containing text and a 'notes' sub-tag.
- tagContent = tag.contents[0]
- if isinstance(tagContent, BeautifulSoup.NavigableString):
- tagStr = (unicode(tagContent) or u'').strip()
- tagType = tag.get('type')
- infoset = tag.get('infoset')
- if infoset:
- _key2infoset[name] = infoset
- _infoset2keys.setdefault(infoset, []).append(name)
- # Here we use tag.name to avoid tags like -
- if tag.name in _MAP_TOP_OBJ:
- # One of the subclasses of _Container.
- item = _MAP_TOP_OBJ[name]()
- itemAs = tag.get('access-system')
- if itemAs:
- if not _as:
- _as = itemAs
- else:
- itemAs = _as
- item.accessSystem = itemAs
- tagsToGet = []
- theID = tag.get('id')
- if name == 'movie':
- item.movieID = theID
- tagsToGet = _titleTags
- theTitle = tag.find('title', recursive=False)
- if tag.title:
- item.set_title(tag.title.string)
- tag.title.extract()
- else:
- if name == 'person':
- item.personID = theID
- tagsToGet = _nameTags
- theName = tag.find('long imdb canonical name', recursive=False)
- if not theName:
- theName = tag.find('name', recursive=False)
- elif name == 'character':
- item.characterID = theID
- tagsToGet = _nameTags
- theName = tag.find('name', recursive=False)
- elif name == 'company':
- item.companyID = theID
- tagsToGet = _companyTags
- theName = tag.find('name', recursive=False)
- if theName:
- item.set_name(theName.string)
- if theName:
- theName.extract()
- for t in tagsToGet:
- if t in item.data:
- continue
- dataTag = tag.find(t, recursive=False)
- if dataTag:
- item.data[tagToKey(dataTag)] = _valueWithType(dataTag,
- dataTag.string)
- if tag.notes:
- item.notes = tag.notes.string
- tag.notes.extract()
- episodeOf = tag.find('episode-of', recursive=False)
- if episodeOf:
- item.data['episode of'] = parseTags(episodeOf, _topLevel=False,
- _as=_as, _infoset2keys=_infoset2keys,
- _key2infoset=_key2infoset)
- episodeOf.extract()
- cRole = tag.find('current-role', recursive=False)
- if cRole:
- cr = parseTags(cRole, _topLevel=False, _as=_as,
- _infoset2keys=_infoset2keys, _key2infoset=_key2infoset)
- item.currentRole = cr
- cRole.extract()
- # XXX: big assumption, here. What about Movie instances used
- # as keys in dictionaries? What about other keys (season and
- # episode number, for example?)
- if not _topLevel:
- #tag.extract()
- return item
- _adder = lambda key, value: item.data.update({key: value})
- elif tagStr:
- if tag.notes:
- notes = (tag.notes.string or u'').strip()
- if notes:
- tagStr += u'::%s' % notes
- else:
- tagStr = _valueWithType(tag, tagStr)
- return tagStr
- elif firstChild:
- firstChildName = tagToKey(firstChild)
- if firstChildName in _TAGS_TO_LIST:
- item = []
- _adder = lambda key, value: item.append(value)
- else:
- item = {}
- _adder = lambda key, value: item.update({key: value})
- else:
- item = {}
- _adder = lambda key, value: item.update({name: value})
- for subTag in tag(recursive=False):
- subTagKey = tagToKey(subTag)
- # Exclude dinamically generated keys.
- if tag.name in _MAP_TOP_OBJ and subTagKey in item._additional_keys():
- continue
- subItem = parseTags(subTag, _topLevel=False, _as=_as,
- _infoset2keys=_infoset2keys, _key2infoset=_key2infoset)
- if subItem:
- _adder(subTagKey, subItem)
- if _topLevel and name in _MAP_TOP_OBJ:
- # Add information about 'info sets', but only to the top-level object.
- item.infoset2keys = _infoset2keys
- item.key2infoset = _key2infoset
- item.current_info = _infoset2keys.keys()
- return item
-
-
-def parseXML(xml):
- """Parse a XML string, returning an appropriate object (usually an
- instance of a subclass of _Container."""
- xmlObj = BeautifulSoup.BeautifulStoneSoup(xml,
- convertEntities=BeautifulSoup.BeautifulStoneSoup.XHTML_ENTITIES)
- if xmlObj:
- mainTag = xmlObj.find()
- if mainTag:
- return parseTags(mainTag)
- return None
-
-
diff --git a/libs/imdb/locale/__init__.py b/libs/imdb/locale/__init__.py
deleted file mode 100644
index 9bc2e466..00000000
--- a/libs/imdb/locale/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-locale package (imdb package).
-
-This package provides scripts and files for internationalization
-of IMDbPY.
-
-Copyright 2009 H. Turgut Uyar
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-import gettext
-import os
-
-LOCALE_DIR = os.path.dirname(__file__)
-
-gettext.bindtextdomain('imdbpy', LOCALE_DIR)
diff --git a/libs/imdb/locale/generatepot.py b/libs/imdb/locale/generatepot.py
deleted file mode 100644
index 282f7d41..00000000
--- a/libs/imdb/locale/generatepot.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-"""
-generatepot.py script.
-
-This script generates the imdbpy.pot file, from the DTD.
-
-Copyright 2009 H. Turgut Uyar
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-import re
-import sys
-
-from datetime import datetime as dt
-
-DEFAULT_MESSAGES = { }
-
-ELEMENT_PATTERN = r"""\n"
-"Language-Team: TEAM NAME \n"
-"MIME-Version: 1.0\n"
-"Content-Type: text/plain; charset=UTF-8\n"
-"Content-Transfer-Encoding: 8bit\n"
-"Plural-Forms: nplurals=1; plural=0;\n"
-"Language-Code: en\n"
-"Language-Name: English\n"
-"Preferred-Encodings: utf-8\n"
-"Domain: imdbpy\n"
-"""
-
-if len(sys.argv) != 2:
- print "Usage: %s dtd_file" % sys.argv[0]
- sys.exit()
-
-dtdfilename = sys.argv[1]
-dtd = open(dtdfilename).read()
-elements = re_element.findall(dtd)
-uniq = set(elements)
-elements = list(uniq)
-
-print POT_HEADER_TEMPLATE % {
- 'now': dt.strftime(dt.now(), "%Y-%m-%d %H:%M+0000")
-}
-for element in sorted(elements):
- if element in DEFAULT_MESSAGES:
- print '# Default: %s' % DEFAULT_MESSAGES[element]
- else:
- print '# Default: %s' % element.replace('-', ' ').capitalize()
- print 'msgid "%s"' % element
- print 'msgstr ""'
- # use this part instead of the line above to generate the po file for English
- #if element in DEFAULT_MESSAGES:
- # print 'msgstr "%s"' % DEFAULT_MESSAGES[element]
- #else:
- # print 'msgstr "%s"' % element.replace('-', ' ').capitalize()
- print
-
diff --git a/libs/imdb/locale/imdbpy-en.po b/libs/imdb/locale/imdbpy-en.po
deleted file mode 100644
index 3b3013c3..00000000
--- a/libs/imdb/locale/imdbpy-en.po
+++ /dev/null
@@ -1,1257 +0,0 @@
-# Gettext message file for imdbpy
-msgid ""
-msgstr ""
-"Project-Id-Version: imdbpy\n"
-"POT-Creation-Date: 2009-04-16 14:27+0000\n"
-"PO-Revision-Date: YYYY-MM-DD HH:MM+0000\n"
-"Last-Translator: YOUR NAME \n"
-"Language-Team: TEAM NAME \n"
-"MIME-Version: 1.0\n"
-"Content-Type: text/plain; charset=UTF-8\n"
-"Content-Transfer-Encoding: 8bit\n"
-"Plural-Forms: nplurals=1; plural=0;\n"
-"Language-Code: en\n"
-"Language-Name: English\n"
-"Preferred-Encodings: utf-8\n"
-"Domain: imdbpy\n"
-
-# Default: Actor
-msgid "actor"
-msgstr "Actor"
-
-# Default: Actress
-msgid "actress"
-msgstr "Actress"
-
-# Default: Adaption
-msgid "adaption"
-msgstr "Adaption"
-
-# Default: Additional information
-msgid "additional-information"
-msgstr "Additional information"
-
-# Default: Admissions
-msgid "admissions"
-msgstr "Admissions"
-
-# Default: Agent address
-msgid "agent-address"
-msgstr "Agent address"
-
-# Default: Airing
-msgid "airing"
-msgstr "Airing"
-
-# Default: Akas
-msgid "akas"
-msgstr "Akas"
-
-# Default: All products
-msgid "all-products"
-msgstr "All products"
-
-# Default: Alternate language version of
-msgid "alternate-language-version-of"
-msgstr "Alternate language version of"
-
-# Default: Alternate versions
-msgid "alternate-versions"
-msgstr "Alternate versions"
-
-# Default: Amazon reviews
-msgid "amazon-reviews"
-msgstr "Amazon reviews"
-
-# Default: Analog left
-msgid "analog-left"
-msgstr "Analog left"
-
-# Default: Analog right
-msgid "analog-right"
-msgstr "Analog right"
-
-# Default: Animation department
-msgid "animation-department"
-msgstr "Animation department"
-
-# Default: Archive footage
-msgid "archive-footage"
-msgstr "Archive footage"
-
-# Default: Arithmetic mean
-msgid "arithmetic-mean"
-msgstr "Arithmetic mean"
-
-# Default: Art department
-msgid "art-department"
-msgstr "Art department"
-
-# Default: Art direction
-msgid "art-direction"
-msgstr "Art direction"
-
-# Default: Art director
-msgid "art-director"
-msgstr "Art director"
-
-# Default: Article
-msgid "article"
-msgstr "Article"
-
-# Default: Asin
-msgid "asin"
-msgstr "Asin"
-
-# Default: Aspect ratio
-msgid "aspect-ratio"
-msgstr "Aspect ratio"
-
-# Default: Assigner
-msgid "assigner"
-msgstr "Assigner"
-
-# Default: Assistant director
-msgid "assistant-director"
-msgstr "Assistant director"
-
-# Default: Auctions
-msgid "auctions"
-msgstr "Auctions"
-
-# Default: Audio noise
-msgid "audio-noise"
-msgstr "Audio noise"
-
-# Default: Audio quality
-msgid "audio-quality"
-msgstr "Audio quality"
-
-# Default: Award
-msgid "award"
-msgstr "Award"
-
-# Default: Awards
-msgid "awards"
-msgstr "Awards"
-
-# Default: Biographical movies
-msgid "biographical-movies"
-msgstr "Biographical movies"
-
-# Default: Biography
-msgid "biography"
-msgstr "Biography"
-
-# Default: Biography print
-msgid "biography-print"
-msgstr "Biography print"
-
-# Default: Birth date
-msgid "birth-date"
-msgstr "Birth date"
-
-# Default: Birth name
-msgid "birth-name"
-msgstr "Birth name"
-
-# Default: Birth notes
-msgid "birth-notes"
-msgstr "Birth notes"
-
-# Default: Body
-msgid "body"
-msgstr "Body"
-
-# Default: Book
-msgid "book"
-msgstr "Book"
-
-# Default: Books
-msgid "books"
-msgstr "Books"
-
-# Default: Bottom 100 rank
-msgid "bottom-100-rank"
-msgstr "Bottom 100 rank"
-
-# Default: Budget
-msgid "budget"
-msgstr "Budget"
-
-# Default: Business
-msgid "business"
-msgstr "Business"
-
-# Default: By arrangement with
-msgid "by-arrangement-with"
-msgstr "By arrangement with"
-
-# Default: Camera
-msgid "camera"
-msgstr "Camera"
-
-# Default: Camera and electrical department
-msgid "camera-and-electrical-department"
-msgstr "Camera and electrical department"
-
-# Default: Canonical episode title
-msgid "canonical-episode-title"
-msgstr "Canonical episode title"
-
-# Default: Canonical name
-msgid "canonical-name"
-msgstr "Canonical name"
-
-# Default: Canonical series title
-msgid "canonical-series-title"
-msgstr "Canonical series title"
-
-# Default: Canonical title
-msgid "canonical-title"
-msgstr "Canonical title"
-
-# Default: Cast
-msgid "cast"
-msgstr "Cast"
-
-# Default: Casting department
-msgid "casting-department"
-msgstr "Casting department"
-
-# Default: Casting director
-msgid "casting-director"
-msgstr "Casting director"
-
-# Default: Catalog number
-msgid "catalog-number"
-msgstr "Catalog number"
-
-# Default: Category
-msgid "category"
-msgstr "Category"
-
-# Default: Certificate
-msgid "certificate"
-msgstr "Certificate"
-
-# Default: Certificates
-msgid "certificates"
-msgstr "Certificates"
-
-# Default: Certification
-msgid "certification"
-msgstr "Certification"
-
-# Default: Channel
-msgid "channel"
-msgstr "Channel"
-
-# Default: Character
-msgid "character"
-msgstr "Character"
-
-# Default: Cinematographer
-msgid "cinematographer"
-msgstr "Cinematographer"
-
-# Default: Cinematographic process
-msgid "cinematographic-process"
-msgstr "Cinematographic process"
-
-# Default: Close captions teletext ld g
-msgid "close-captions-teletext-ld-g"
-msgstr "Close captions teletext ld g"
-
-# Default: Color info
-msgid "color-info"
-msgstr "Color info"
-
-# Default: Color information
-msgid "color-information"
-msgstr "Color information"
-
-# Default: Color rendition
-msgid "color-rendition"
-msgstr "Color rendition"
-
-# Default: Company
-msgid "company"
-msgstr "Company"
-
-# Default: Complete cast
-msgid "complete-cast"
-msgstr "Complete cast"
-
-# Default: Complete crew
-msgid "complete-crew"
-msgstr "Complete crew"
-
-# Default: Composer
-msgid "composer"
-msgstr "Composer"
-
-# Default: Connections
-msgid "connections"
-msgstr "Connections"
-
-# Default: Contrast
-msgid "contrast"
-msgstr "Contrast"
-
-# Default: Copyright holder
-msgid "copyright-holder"
-msgstr "Copyright holder"
-
-# Default: Costume department
-msgid "costume-department"
-msgstr "Costume department"
-
-# Default: Costume designer
-msgid "costume-designer"
-msgstr "Costume designer"
-
-# Default: Countries
-msgid "countries"
-msgstr "Countries"
-
-# Default: Country
-msgid "country"
-msgstr "Country"
-
-# Default: Courtesy of
-msgid "courtesy-of"
-msgstr "Courtesy of"
-
-# Default: Cover
-msgid "cover"
-msgstr "Cover"
-
-# Default: Cover url
-msgid "cover-url"
-msgstr "Cover url"
-
-# Default: Crazy credits
-msgid "crazy-credits"
-msgstr "Crazy credits"
-
-# Default: Creator
-msgid "creator"
-msgstr "Creator"
-
-# Default: Current role
-msgid "current-role"
-msgstr "Current role"
-
-# Default: Database
-msgid "database"
-msgstr "Database"
-
-# Default: Date
-msgid "date"
-msgstr "Date"
-
-# Default: Death date
-msgid "death-date"
-msgstr "Death date"
-
-# Default: Death notes
-msgid "death-notes"
-msgstr "Death notes"
-
-# Default: Demographic
-msgid "demographic"
-msgstr "Demographic"
-
-# Default: Description
-msgid "description"
-msgstr "Description"
-
-# Default: Dialogue intellegibility
-msgid "dialogue-intellegibility"
-msgstr "Dialogue intellegibility"
-
-# Default: Digital sound
-msgid "digital-sound"
-msgstr "Digital sound"
-
-# Default: Director
-msgid "director"
-msgstr "Director"
-
-# Default: Disc format
-msgid "disc-format"
-msgstr "Disc format"
-
-# Default: Disc size
-msgid "disc-size"
-msgstr "Disc size"
-
-# Default: Distributors
-msgid "distributors"
-msgstr "Distributors"
-
-# Default: Dvd
-msgid "dvd"
-msgstr "Dvd"
-
-# Default: Dvd features
-msgid "dvd-features"
-msgstr "Dvd features"
-
-# Default: Dvd format
-msgid "dvd-format"
-msgstr "Dvd format"
-
-# Default: Dvds
-msgid "dvds"
-msgstr "Dvds"
-
-# Default: Dynamic range
-msgid "dynamic-range"
-msgstr "Dynamic range"
-
-# Default: Edited from
-msgid "edited-from"
-msgstr "Edited from"
-
-# Default: Edited into
-msgid "edited-into"
-msgstr "Edited into"
-
-# Default: Editor
-msgid "editor"
-msgstr "Editor"
-
-# Default: Editorial department
-msgid "editorial-department"
-msgstr "Editorial department"
-
-# Default: Episode
-msgid "episode"
-msgstr "Episode"
-
-# Default: Episode of
-msgid "episode-of"
-msgstr "Episode of"
-
-# Default: Episode title
-msgid "episode-title"
-msgstr "Episode title"
-
-# Default: Episodes
-msgid "episodes"
-msgstr "Episodes"
-
-# Default: Episodes rating
-msgid "episodes-rating"
-msgstr "Episodes rating"
-
-# Default: Essays
-msgid "essays"
-msgstr "Essays"
-
-# Default: External reviews
-msgid "external-reviews"
-msgstr "External reviews"
-
-# Default: Faqs
-msgid "faqs"
-msgstr "Faqs"
-
-# Default: Featured in
-msgid "featured-in"
-msgstr "Featured in"
-
-# Default: Features
-msgid "features"
-msgstr "Features"
-
-# Default: Film negative format
-msgid "film-negative-format"
-msgstr "Film negative format"
-
-# Default: Filming dates
-msgid "filming-dates"
-msgstr "Filming dates"
-
-# Default: Filmography
-msgid "filmography"
-msgstr "Filmography"
-
-# Default: Followed by
-msgid "followed-by"
-msgstr "Followed by"
-
-# Default: Follows
-msgid "follows"
-msgstr "Follows"
-
-# Default: For
-msgid "for"
-msgstr "For"
-
-# Default: Frequency response
-msgid "frequency-response"
-msgstr "Frequency response"
-
-# Default: From
-msgid "from"
-msgstr "From"
-
-# Default: Full article link
-msgid "full-article-link"
-msgstr "Full article link"
-
-# Default: Genres
-msgid "genres"
-msgstr "Genres"
-
-# Default: Goofs
-msgid "goofs"
-msgstr "Goofs"
-
-# Default: Gross
-msgid "gross"
-msgstr "Gross"
-
-# Default: Group genre
-msgid "group-genre"
-msgstr "Group genre"
-
-# Default: Headshot
-msgid "headshot"
-msgstr "Headshot"
-
-# Default: Height
-msgid "height"
-msgstr "Height"
-
-# Default: Imdbindex
-msgid "imdbindex"
-msgstr "Imdbindex"
-
-# Default: Interview
-msgid "interview"
-msgstr "Interview"
-
-# Default: Interviews
-msgid "interviews"
-msgstr "Interviews"
-
-# Default: Introduction
-msgid "introduction"
-msgstr "Introduction"
-
-# Default: Item
-msgid "item"
-msgstr "Item"
-
-# Default: Keywords
-msgid "keywords"
-msgstr "Keywords"
-
-# Default: Kind
-msgid "kind"
-msgstr "Kind"
-
-# Default: Label
-msgid "label"
-msgstr "Label"
-
-# Default: Laboratory
-msgid "laboratory"
-msgstr "Laboratory"
-
-# Default: Language
-msgid "language"
-msgstr "Language"
-
-# Default: Languages
-msgid "languages"
-msgstr "Languages"
-
-# Default: Laserdisc
-msgid "laserdisc"
-msgstr "Laserdisc"
-
-# Default: Laserdisc title
-msgid "laserdisc-title"
-msgstr "Laserdisc title"
-
-# Default: Length
-msgid "length"
-msgstr "Length"
-
-# Default: Line
-msgid "line"
-msgstr "Line"
-
-# Default: Link
-msgid "link"
-msgstr "Link"
-
-# Default: Link text
-msgid "link-text"
-msgstr "Link text"
-
-# Default: Literature
-msgid "literature"
-msgstr "Literature"
-
-# Default: Locations
-msgid "locations"
-msgstr "Locations"
-
-# Default: Long imdb canonical name
-msgid "long-imdb-canonical-name"
-msgstr "Long imdb canonical name"
-
-# Default: Long imdb canonical title
-msgid "long-imdb-canonical-title"
-msgstr "Long imdb canonical title"
-
-# Default: Long imdb episode title
-msgid "long-imdb-episode-title"
-msgstr "Long imdb episode title"
-
-# Default: Long imdb name
-msgid "long-imdb-name"
-msgstr "Long imdb name"
-
-# Default: Long imdb title
-msgid "long-imdb-title"
-msgstr "Long imdb title"
-
-# Default: Magazine cover photo
-msgid "magazine-cover-photo"
-msgstr "Magazine cover photo"
-
-# Default: Make up
-msgid "make-up"
-msgstr "Make up"
-
-# Default: Master format
-msgid "master-format"
-msgstr "Master format"
-
-# Default: Median
-msgid "median"
-msgstr "Median"
-
-# Default: Merchandising links
-msgid "merchandising-links"
-msgstr "Merchandising links"
-
-# Default: Mini biography
-msgid "mini-biography"
-msgstr "Mini biography"
-
-# Default: Misc links
-msgid "misc-links"
-msgstr "Misc links"
-
-# Default: Miscellaneous companies
-msgid "miscellaneous-companies"
-msgstr "Miscellaneous companies"
-
-# Default: Miscellaneous crew
-msgid "miscellaneous-crew"
-msgstr "Miscellaneous crew"
-
-# Default: Movie
-msgid "movie"
-msgstr "Movie"
-
-# Default: Mpaa
-msgid "mpaa"
-msgstr "Mpaa"
-
-# Default: Music department
-msgid "music-department"
-msgstr "Music department"
-
-# Default: Name
-msgid "name"
-msgstr "Name"
-
-# Default: News
-msgid "news"
-msgstr "News"
-
-# Default: Newsgroup reviews
-msgid "newsgroup-reviews"
-msgstr "Newsgroup reviews"
-
-# Default: Nick names
-msgid "nick-names"
-msgstr "Nick names"
-
-# Default: Notes
-msgid "notes"
-msgstr "Notes"
-
-# Default: Novel
-msgid "novel"
-msgstr "Novel"
-
-# Default: Number
-msgid "number"
-msgstr "Number"
-
-# Default: Number of chapter stops
-msgid "number-of-chapter-stops"
-msgstr "Number of chapter stops"
-
-# Default: Number of episodes
-msgid "number-of-episodes"
-msgstr "Number of episodes"
-
-# Default: Number of seasons
-msgid "number-of-seasons"
-msgstr "Number of seasons"
-
-# Default: Number of sides
-msgid "number-of-sides"
-msgstr "Number of sides"
-
-# Default: Number of votes
-msgid "number-of-votes"
-msgstr "Number of votes"
-
-# Default: Official retail price
-msgid "official-retail-price"
-msgstr "Official retail price"
-
-# Default: Official sites
-msgid "official-sites"
-msgstr "Official sites"
-
-# Default: Opening weekend
-msgid "opening-weekend"
-msgstr "Opening weekend"
-
-# Default: Original air date
-msgid "original-air-date"
-msgstr "Original air date"
-
-# Default: Original music
-msgid "original-music"
-msgstr "Original music"
-
-# Default: Original title
-msgid "original-title"
-msgstr "Original title"
-
-# Default: Other literature
-msgid "other-literature"
-msgstr "Other literature"
-
-# Default: Other works
-msgid "other-works"
-msgstr "Other works"
-
-# Default: Parents guide
-msgid "parents-guide"
-msgstr "Parents guide"
-
-# Default: Performed by
-msgid "performed-by"
-msgstr "Performed by"
-
-# Default: Person
-msgid "person"
-msgstr "Person"
-
-# Default: Photo sites
-msgid "photo-sites"
-msgstr "Photo sites"
-
-# Default: Pictorial
-msgid "pictorial"
-msgstr "Pictorial"
-
-# Default: Picture format
-msgid "picture-format"
-msgstr "Picture format"
-
-# Default: Plot
-msgid "plot"
-msgstr "Plot"
-
-# Default: Plot outline
-msgid "plot-outline"
-msgstr "Plot outline"
-
-# Default: Portrayed in
-msgid "portrayed-in"
-msgstr "Portrayed in"
-
-# Default: Pressing plant
-msgid "pressing-plant"
-msgstr "Pressing plant"
-
-# Default: Printed film format
-msgid "printed-film-format"
-msgstr "Printed film format"
-
-# Default: Printed media reviews
-msgid "printed-media-reviews"
-msgstr "Printed media reviews"
-
-# Default: Producer
-msgid "producer"
-msgstr "Producer"
-
-# Default: Production companies
-msgid "production-companies"
-msgstr "Production companies"
-
-# Default: Production country
-msgid "production-country"
-msgstr "Production country"
-
-# Default: Production dates
-msgid "production-dates"
-msgstr "Production dates"
-
-# Default: Production design
-msgid "production-design"
-msgstr "Production design"
-
-# Default: Production designer
-msgid "production-designer"
-msgstr "Production designer"
-
-# Default: Production manager
-msgid "production-manager"
-msgstr "Production manager"
-
-# Default: Production process protocol
-msgid "production-process-protocol"
-msgstr "Production process protocol"
-
-# Default: Quality of source
-msgid "quality-of-source"
-msgstr "Quality of source"
-
-# Default: Quality program
-msgid "quality-program"
-msgstr "Quality program"
-
-# Default: Quote
-msgid "quote"
-msgstr "Quote"
-
-# Default: Quotes
-msgid "quotes"
-msgstr "Quotes"
-
-# Default: Rating
-msgid "rating"
-msgstr "Rating"
-
-# Default: Recommendations
-msgid "recommendations"
-msgstr "Recommendations"
-
-# Default: Referenced in
-msgid "referenced-in"
-msgstr "Referenced in"
-
-# Default: References
-msgid "references"
-msgstr "References"
-
-# Default: Region
-msgid "region"
-msgstr "Region"
-
-# Default: Release country
-msgid "release-country"
-msgstr "Release country"
-
-# Default: Release date
-msgid "release-date"
-msgstr "Release date"
-
-# Default: Release dates
-msgid "release-dates"
-msgstr "Release dates"
-
-# Default: Remade as
-msgid "remade-as"
-msgstr "Remade as"
-
-# Default: Remake of
-msgid "remake-of"
-msgstr "Remake of"
-
-# Default: Rentals
-msgid "rentals"
-msgstr "Rentals"
-
-# Default: Result
-msgid "result"
-msgstr "Result"
-
-# Default: Review
-msgid "review"
-msgstr "Review"
-
-# Default: Review author
-msgid "review-author"
-msgstr "Review author"
-
-# Default: Review kind
-msgid "review-kind"
-msgstr "Review kind"
-
-# Default: Runtime
-msgid "runtime"
-msgstr "Runtime"
-
-# Default: Runtimes
-msgid "runtimes"
-msgstr "Runtimes"
-
-# Default: Salary history
-msgid "salary-history"
-msgstr "Salary history"
-
-# Default: Screenplay teleplay
-msgid "screenplay-teleplay"
-msgstr "Screenplay teleplay"
-
-# Default: Season
-msgid "season"
-msgstr "Season"
-
-# Default: Second unit director or assistant director
-msgid "second-unit-director-or-assistant-director"
-msgstr "Second unit director or assistant director"
-
-# Default: Self
-msgid "self"
-msgstr "Self"
-
-# Default: Series animation department
-msgid "series-animation-department"
-msgstr "Series animation department"
-
-# Default: Series art department
-msgid "series-art-department"
-msgstr "Series art department"
-
-# Default: Series assistant directors
-msgid "series-assistant-directors"
-msgstr "Series assistant directors"
-
-# Default: Series camera department
-msgid "series-camera-department"
-msgstr "Series camera department"
-
-# Default: Series casting department
-msgid "series-casting-department"
-msgstr "Series casting department"
-
-# Default: Series cinematographers
-msgid "series-cinematographers"
-msgstr "Series cinematographers"
-
-# Default: Series costume department
-msgid "series-costume-department"
-msgstr "Series costume department"
-
-# Default: Series editorial department
-msgid "series-editorial-department"
-msgstr "Series editorial department"
-
-# Default: Series editors
-msgid "series-editors"
-msgstr "Series editors"
-
-# Default: Series make up department
-msgid "series-make-up-department"
-msgstr "Series make up department"
-
-# Default: Series miscellaneous
-msgid "series-miscellaneous"
-msgstr "Series miscellaneous"
-
-# Default: Series music department
-msgid "series-music-department"
-msgstr "Series music department"
-
-# Default: Series producers
-msgid "series-producers"
-msgstr "Series producers"
-
-# Default: Series production designers
-msgid "series-production-designers"
-msgstr "Series production designers"
-
-# Default: Series production managers
-msgid "series-production-managers"
-msgstr "Series production managers"
-
-# Default: Series sound department
-msgid "series-sound-department"
-msgstr "Series sound department"
-
-# Default: Series special effects department
-msgid "series-special-effects-department"
-msgstr "Series special effects department"
-
-# Default: Series stunts
-msgid "series-stunts"
-msgstr "Series stunts"
-
-# Default: Series title
-msgid "series-title"
-msgstr "Series title"
-
-# Default: Series transportation department
-msgid "series-transportation-department"
-msgstr "Series transportation department"
-
-# Default: Series visual effects department
-msgid "series-visual-effects-department"
-msgstr "Series visual effects department"
-
-# Default: Series writers
-msgid "series-writers"
-msgstr "Series writers"
-
-# Default: Series years
-msgid "series-years"
-msgstr "Series years"
-
-# Default: Set decoration
-msgid "set-decoration"
-msgstr "Set decoration"
-
-# Default: Sharpness
-msgid "sharpness"
-msgstr "Sharpness"
-
-# Default: Similar to
-msgid "similar-to"
-msgstr "Similar to"
-
-# Default: Sound clips
-msgid "sound-clips"
-msgstr "Sound clips"
-
-# Default: Sound crew
-msgid "sound-crew"
-msgstr "Sound crew"
-
-# Default: Sound encoding
-msgid "sound-encoding"
-msgstr "Sound encoding"
-
-# Default: Sound mix
-msgid "sound-mix"
-msgstr "Sound mix"
-
-# Default: Soundtrack
-msgid "soundtrack"
-msgstr "Soundtrack"
-
-# Default: Spaciality
-msgid "spaciality"
-msgstr "Spaciality"
-
-# Default: Special effects
-msgid "special-effects"
-msgstr "Special effects"
-
-# Default: Special effects companies
-msgid "special-effects-companies"
-msgstr "Special effects companies"
-
-# Default: Special effects department
-msgid "special-effects-department"
-msgstr "Special effects department"
-
-# Default: Spin off
-msgid "spin-off"
-msgstr "Spin off"
-
-# Default: Spin off from
-msgid "spin-off-from"
-msgstr "Spin off from"
-
-# Default: Spoofed in
-msgid "spoofed-in"
-msgstr "Spoofed in"
-
-# Default: Spoofs
-msgid "spoofs"
-msgstr "Spoofs"
-
-# Default: Spouse
-msgid "spouse"
-msgstr "Spouse"
-
-# Default: Status of availablility
-msgid "status-of-availablility"
-msgstr "Status of availablility"
-
-# Default: Studio
-msgid "studio"
-msgstr "Studio"
-
-# Default: Studios
-msgid "studios"
-msgstr "Studios"
-
-# Default: Stunt performer
-msgid "stunt-performer"
-msgstr "Stunt performer"
-
-# Default: Stunts
-msgid "stunts"
-msgstr "Stunts"
-
-# Default: Subtitles
-msgid "subtitles"
-msgstr "Subtitles"
-
-# Default: Supplement
-msgid "supplement"
-msgstr "Supplement"
-
-# Default: Supplements
-msgid "supplements"
-msgstr "Supplements"
-
-# Default: Synopsis
-msgid "synopsis"
-msgstr "Synopsis"
-
-# Default: Taglines
-msgid "taglines"
-msgstr "Taglines"
-
-# Default: Tech info
-msgid "tech-info"
-msgstr "Tech info"
-
-# Default: Thanks
-msgid "thanks"
-msgstr "Thanks"
-
-# Default: Time
-msgid "time"
-msgstr "Time"
-
-# Default: Title
-msgid "title"
-msgstr "Title"
-
-# Default: Titles in this product
-msgid "titles-in-this-product"
-msgstr "Titles in this product"
-
-# Default: To
-msgid "to"
-msgstr "To"
-
-# Default: Top 250 rank
-msgid "top-250-rank"
-msgstr "Top 250 rank"
-
-# Default: Trade mark
-msgid "trade-mark"
-msgstr "Trade mark"
-
-# Default: Transportation department
-msgid "transportation-department"
-msgstr "Transportation department"
-
-# Default: Trivia
-msgid "trivia"
-msgstr "Trivia"
-
-# Default: Under license from
-msgid "under-license-from"
-msgstr "Under license from"
-
-# Default: Unknown link
-msgid "unknown-link"
-msgstr "Unknown link"
-
-# Default: Upc
-msgid "upc"
-msgstr "Upc"
-
-# Default: Version of
-msgid "version-of"
-msgstr "Version of"
-
-# Default: Vhs
-msgid "vhs"
-msgstr "Vhs"
-
-# Default: Video artifacts
-msgid "video-artifacts"
-msgstr "Video artifacts"
-
-# Default: Video clips
-msgid "video-clips"
-msgstr "Video clips"
-
-# Default: Video noise
-msgid "video-noise"
-msgstr "Video noise"
-
-# Default: Video quality
-msgid "video-quality"
-msgstr "Video quality"
-
-# Default: Video standard
-msgid "video-standard"
-msgstr "Video standard"
-
-# Default: Visual effects
-msgid "visual-effects"
-msgstr "Visual effects"
-
-# Default: Votes
-msgid "votes"
-msgstr "Votes"
-
-# Default: Votes distribution
-msgid "votes-distribution"
-msgstr "Votes distribution"
-
-# Default: Weekend gross
-msgid "weekend-gross"
-msgstr "Weekend gross"
-
-# Default: Where now
-msgid "where-now"
-msgstr "Where now"
-
-# Default: With
-msgid "with"
-msgstr "With"
-
-# Default: Writer
-msgid "writer"
-msgstr "Writer"
-
-# Default: Written by
-msgid "written-by"
-msgstr "Written by"
-
-# Default: Year
-msgid "year"
-msgstr "Year"
-
-# Default: Zshops
-msgid "zshops"
-msgstr "Zshops"
-
diff --git a/libs/imdb/locale/imdbpy-it.po b/libs/imdb/locale/imdbpy-it.po
deleted file mode 100644
index 17cfce46..00000000
--- a/libs/imdb/locale/imdbpy-it.po
+++ /dev/null
@@ -1,1300 +0,0 @@
-# Gettext message file for imdbpy
-msgid ""
-msgstr ""
-"Project-Id-Version: imdbpy\n"
-"POT-Creation-Date: 2010-03-18 14:35+0000\n"
-"PO-Revision-Date: 2009-07-03 13:00+0000\n"
-"Last-Translator: Davide Alberani \n"
-"Language-Team: Davide Alberani \n"
-"MIME-Version: 1.0\n"
-"Content-Type: text/plain; charset=UTF-8\n"
-"Content-Transfer-Encoding: 8bit\n"
-"Plural-Forms: nplurals=2; plural=(n != 1);\n"
-"Language-Code: it\n"
-"Language-Name: Italian\n"
-"Preferred-Encodings: utf-8\n"
-"Domain: imdbpy\n"
-
-# Default: Actor
-msgid "actor"
-msgstr "Attore"
-
-# Default: Actress
-msgid "actress"
-msgstr "Attrice"
-
-# Default: Adaption
-msgid "adaption"
-msgstr "Adattamento"
-
-# Default: Additional information
-msgid "additional-information"
-msgstr "Ulteriori informazioni"
-
-# Default: Admissions
-msgid "admissions"
-msgstr "Biglietti venduti"
-
-# Default: Agent address
-msgid "agent-address"
-msgstr "Indirizzo dell'agente"
-
-# Default: Airing
-msgid "airing"
-msgstr "In onda"
-
-# Default: Akas
-msgid "akas"
-msgstr "Alias"
-
-# Default: Akas from release info
-msgid "akas-from-release-info"
-msgstr "Alias dalle informazioni di rilascio"
-
-# Default: All products
-msgid "all-products"
-msgstr "Tutti i prodotti"
-
-# Default: Alternate language version of
-msgid "alternate-language-version-of"
-msgstr "Versione in altra lingua di"
-
-# Default: Alternate versions
-msgid "alternate-versions"
-msgstr "Versioni alternative"
-
-# Default: Amazon reviews
-msgid "amazon-reviews"
-msgstr "Recensione di Amazon"
-
-# Default: Analog left
-msgid "analog-left"
-msgstr "Analogico sinistro"
-
-# Default: Analog right
-msgid "analog-right"
-msgstr "Analogico destro"
-
-# Default: Animation department
-msgid "animation-department"
-msgstr "Dipartimento animazione"
-
-# Default: Archive footage
-msgid "archive-footage"
-msgstr "Materiale d'archivio"
-
-# Default: Arithmetic mean
-msgid "arithmetic-mean"
-msgstr "Media aritmetica"
-
-# Default: Art department
-msgid "art-department"
-msgstr "Dipartimento artistico"
-
-# Default: Art direction
-msgid "art-direction"
-msgstr "Direzione artistica"
-
-# Default: Art director
-msgid "art-director"
-msgstr "Direttore artistico"
-
-# Default: Article
-msgid "article"
-msgstr "Articolo"
-
-# Default: Asin
-msgid "asin"
-msgstr "Asin"
-
-# Default: Aspect ratio
-msgid "aspect-ratio"
-msgstr "Rapporto d'aspetto"
-
-# Default: Assigner
-msgid "assigner"
-msgstr "Assegnatario"
-
-# Default: Assistant director
-msgid "assistant-director"
-msgstr "Assistente regista"
-
-# Default: Auctions
-msgid "auctions"
-msgstr "Aste"
-
-# Default: Audio noise
-msgid "audio-noise"
-msgstr "Rumore audio"
-
-# Default: Audio quality
-msgid "audio-quality"
-msgstr "Qualità audio"
-
-# Default: Award
-msgid "award"
-msgstr "Premio"
-
-# Default: Awards
-msgid "awards"
-msgstr "Premi"
-
-# Default: Biographical movies
-msgid "biographical-movies"
-msgstr "Film biografici"
-
-# Default: Biography
-msgid "biography"
-msgstr "Biografia"
-
-# Default: Biography print
-msgid "biography-print"
-msgstr "Biografia"
-
-# Default: Birth date
-msgid "birth-date"
-msgstr "Data di nascita"
-
-# Default: Birth name
-msgid "birth-name"
-msgstr "Nome di nascita"
-
-# Default: Birth notes
-msgid "birth-notes"
-msgstr "Note di nascita"
-
-# Default: Body
-msgid "body"
-msgstr "Corpo"
-
-# Default: Book
-msgid "book"
-msgstr "Libro"
-
-# Default: Books
-msgid "books"
-msgstr "Libri"
-
-# Default: Bottom 100 rank
-msgid "bottom-100-rank"
-msgstr "Posizione nella bottom 100"
-
-# Default: Budget
-msgid "budget"
-msgstr "Bilancio"
-
-# Default: Business
-msgid "business"
-msgstr "Affari"
-
-# Default: By arrangement with
-msgid "by-arrangement-with"
-msgstr "Arrangiamento con"
-
-# Default: Camera
-msgid "camera"
-msgstr "Cinepresa"
-
-# Default: Camera and electrical department
-msgid "camera-and-electrical-department"
-msgstr "Cinepresa e dipartimento elettrico"
-
-# Default: Canonical episode title
-msgid "canonical-episode-title"
-msgstr "Titolo dell'episodio in forma canonica"
-
-# Default: Canonical name
-msgid "canonical-name"
-msgstr "Nome in forma canonica"
-
-# Default: Canonical series title
-msgid "canonical-series-title"
-msgstr "Titolo della serie in forma canonica"
-
-# Default: Canonical title
-msgid "canonical-title"
-msgstr "Titolo in forma canonica"
-
-# Default: Cast
-msgid "cast"
-msgstr "Cast"
-
-# Default: Casting department
-msgid "casting-department"
-msgstr "Casting"
-
-# Default: Casting director
-msgid "casting-director"
-msgstr "Direttore del casting"
-
-# Default: Catalog number
-msgid "catalog-number"
-msgstr "Numero di catalogo"
-
-# Default: Category
-msgid "category"
-msgstr "Categoria"
-
-# Default: Certificate
-msgid "certificate"
-msgstr "Certificazione"
-
-# Default: Certificates
-msgid "certificates"
-msgstr "Certificazioni"
-
-# Default: Certification
-msgid "certification"
-msgstr "Certificazioni"
-
-# Default: Channel
-msgid "channel"
-msgstr "Canale"
-
-# Default: Character
-msgid "character"
-msgstr "Personaggio"
-
-# Default: Cinematographer
-msgid "cinematographer"
-msgstr "Fotografia"
-
-# Default: Cinematographic process
-msgid "cinematographic-process"
-msgstr "Processo cinematografico"
-
-# Default: Close captions teletext ld g
-msgid "close-captions-teletext-ld-g"
-msgstr ""
-
-# Default: Color info
-msgid "color-info"
-msgstr "Colore"
-
-# Default: Color information
-msgid "color-information"
-msgstr "Informazioni sul colore"
-
-# Default: Color rendition
-msgid "color-rendition"
-msgstr "Resa dei colori"
-
-# Default: Company
-msgid "company"
-msgstr "Compagnia"
-
-# Default: Complete cast
-msgid "complete-cast"
-msgstr "Cast completo"
-
-# Default: Complete crew
-msgid "complete-crew"
-msgstr "Troupe completa"
-
-# Default: Composer
-msgid "composer"
-msgstr "Compositore"
-
-# Default: Connections
-msgid "connections"
-msgstr "Collegamenti"
-
-# Default: Contrast
-msgid "contrast"
-msgstr "Contrasto"
-
-# Default: Copyright holder
-msgid "copyright-holder"
-msgstr "Detentore dei diritti d'autore"
-
-# Default: Costume department
-msgid "costume-department"
-msgstr "Dipartimento costumi"
-
-# Default: Costume designer
-msgid "costume-designer"
-msgstr "Costumista"
-
-# Default: Countries
-msgid "countries"
-msgstr "Paesi"
-
-# Default: Country
-msgid "country"
-msgstr "Paese"
-
-# Default: Courtesy of
-msgid "courtesy-of"
-msgstr "Cortesia di"
-
-# Default: Cover
-msgid "cover"
-msgstr "Copertina"
-
-# Default: Cover url
-msgid "cover-url"
-msgstr "Locandina"
-
-# Default: Crazy credits
-msgid "crazy-credits"
-msgstr "Titoli pazzi"
-
-# Default: Creator
-msgid "creator"
-msgstr "Creatore"
-
-# Default: Current role
-msgid "current-role"
-msgstr "Ruolo"
-
-# Default: Database
-msgid "database"
-msgstr "Database"
-
-# Default: Date
-msgid "date"
-msgstr "Data"
-
-# Default: Death date
-msgid "death-date"
-msgstr "Data di morte"
-
-# Default: Death notes
-msgid "death-notes"
-msgstr "Note di morte"
-
-# Default: Demographic
-msgid "demographic"
-msgstr "Spaccato demografico"
-
-# Default: Description
-msgid "description"
-msgstr "Descrizione"
-
-# Default: Dialogue intellegibility
-msgid "dialogue-intellegibility"
-msgstr "Comprensibilità dei dialoghi"
-
-# Default: Digital sound
-msgid "digital-sound"
-msgstr "Suono digitale"
-
-# Default: Director
-msgid "director"
-msgstr "Regista"
-
-# Default: Disc format
-msgid "disc-format"
-msgstr "Formato del disco"
-
-# Default: Disc size
-msgid "disc-size"
-msgstr "Dimensione del disco"
-
-# Default: Distributors
-msgid "distributors"
-msgstr "Distributori"
-
-# Default: Dvd
-msgid "dvd"
-msgstr "Dvd"
-
-# Default: Dvd features
-msgid "dvd-features"
-msgstr "Caratteristiche del DVD"
-
-# Default: Dvd format
-msgid "dvd-format"
-msgstr "Formato del DVD"
-
-# Default: Dvds
-msgid "dvds"
-msgstr "Dvd"
-
-# Default: Dynamic range
-msgid "dynamic-range"
-msgstr "Intervallo dinamico"
-
-# Default: Edited from
-msgid "edited-from"
-msgstr "Tratto da"
-
-# Default: Edited into
-msgid "edited-into"
-msgstr "Montato in"
-
-# Default: Editor
-msgid "editor"
-msgstr "Editore"
-
-# Default: Editorial department
-msgid "editorial-department"
-msgstr "Dipartimento editoriale"
-
-# Default: Episode
-msgid "episode"
-msgstr "Episodio"
-
-# Default: Episode of
-msgid "episode-of"
-msgstr "Episodio di"
-
-# Default: Episode title
-msgid "episode-title"
-msgstr "Titolo dell'episodio"
-
-# Default: Episodes
-msgid "episodes"
-msgstr "Episodi"
-
-# Default: Episodes rating
-msgid "episodes-rating"
-msgstr "Voto degli episodi"
-
-# Default: Essays
-msgid "essays"
-msgstr "Saggi"
-
-# Default: External reviews
-msgid "external-reviews"
-msgstr "Recensioni esterne"
-
-# Default: Faqs
-msgid "faqs"
-msgstr "Domande ricorrenti"
-
-# Default: Feature
-msgid "feature"
-msgstr "Caratteristica"
-
-# Default: Featured in
-msgid "featured-in"
-msgstr "Ripreso in"
-
-# Default: Features
-msgid "features"
-msgstr "Caratteristiche"
-
-# Default: Film negative format
-msgid "film-negative-format"
-msgstr "Formato del negativo"
-
-# Default: Filming dates
-msgid "filming-dates"
-msgstr "Data delle riprese"
-
-# Default: Filmography
-msgid "filmography"
-msgstr "Filmografia"
-
-# Default: Followed by
-msgid "followed-by"
-msgstr "Seguito da"
-
-# Default: Follows
-msgid "follows"
-msgstr "Segue"
-
-# Default: For
-msgid "for"
-msgstr "Per"
-
-# Default: Frequency response
-msgid "frequency-response"
-msgstr "Frequenze di risposta"
-
-# Default: From
-msgid "from"
-msgstr "Da"
-
-# Default: Full article link
-msgid "full-article-link"
-msgstr "Collegamento all'articolo completo"
-
-# Default: Full size cover url
-msgid "full-size-cover-url"
-msgstr "URL della copertina nelle dimensioni originali"
-
-# Default: Full size headshot
-msgid "full-size-headshot"
-msgstr "Ritratto nelle dimensioni originali"
-
-# Default: Genres
-msgid "genres"
-msgstr "Generi"
-
-# Default: Goofs
-msgid "goofs"
-msgstr "Errori"
-
-# Default: Gross
-msgid "gross"
-msgstr "Lordo"
-
-# Default: Group genre
-msgid "group-genre"
-msgstr ""
-
-# Default: Headshot
-msgid "headshot"
-msgstr "Foto"
-
-# Default: Height
-msgid "height"
-msgstr "Altezza"
-
-# Default: Imdbindex
-msgid "imdbindex"
-msgstr ""
-
-# Default: In development
-msgid "in-development"
-msgstr "In sviluppo"
-
-# Default: Interview
-msgid "interview"
-msgstr "Intervista"
-
-# Default: Interviews
-msgid "interviews"
-msgstr "Interviste"
-
-# Default: Introduction
-msgid "introduction"
-msgstr "Introduzione"
-
-# Default: Item
-msgid "item"
-msgstr "Elemento"
-
-# Default: Keywords
-msgid "keywords"
-msgstr "Parole chiave"
-
-# Default: Kind
-msgid "kind"
-msgstr "Tipo"
-
-# Default: Label
-msgid "label"
-msgstr "Etichetta"
-
-# Default: Laboratory
-msgid "laboratory"
-msgstr "Laboratorio"
-
-# Default: Language
-msgid "language"
-msgstr "Lingua"
-
-# Default: Languages
-msgid "languages"
-msgstr "Lingue"
-
-# Default: Laserdisc
-msgid "laserdisc"
-msgstr "Laserdisc"
-
-# Default: Laserdisc title
-msgid "laserdisc-title"
-msgstr "Titolo del laserdisc"
-
-# Default: Length
-msgid "length"
-msgstr "Durata"
-
-# Default: Line
-msgid "line"
-msgstr "Battuta"
-
-# Default: Link
-msgid "link"
-msgstr "Collegamento"
-
-# Default: Link text
-msgid "link-text"
-msgstr "Testo del link"
-
-# Default: Literature
-msgid "literature"
-msgstr "Letteratura"
-
-# Default: Locations
-msgid "locations"
-msgstr "Luoghi"
-
-# Default: Long imdb canonical name
-msgid "long-imdb-canonical-name"
-msgstr "Nome canonico IMDb lungo"
-
-# Default: Long imdb canonical title
-msgid "long-imdb-canonical-title"
-msgstr "Titolo canonico IMDb lungo"
-
-# Default: Long imdb episode title
-msgid "long-imdb-episode-title"
-msgstr "Titolo dell'episodio canonico IMDb lungo"
-
-# Default: Long imdb name
-msgid "long-imdb-name"
-msgstr "Nome IMDb lungo"
-
-# Default: Long imdb title
-msgid "long-imdb-title"
-msgstr "Titolo IMDb lungo"
-
-# Default: Magazine cover photo
-msgid "magazine-cover-photo"
-msgstr "Foto di copertina"
-
-# Default: Make up
-msgid "make-up"
-msgstr "Trucco"
-
-# Default: Master format
-msgid "master-format"
-msgstr "Formato del master"
-
-# Default: Median
-msgid "median"
-msgstr "Mediana"
-
-# Default: Merchandising links
-msgid "merchandising-links"
-msgstr "Collegamenti al merchandising"
-
-# Default: Mini biography
-msgid "mini-biography"
-msgstr "Biografia"
-
-# Default: Misc links
-msgid "misc-links"
-msgstr "Altri collegamenti"
-
-# Default: Miscellaneous companies
-msgid "miscellaneous-companies"
-msgstr "Altre compagnie"
-
-# Default: Miscellaneous crew
-msgid "miscellaneous-crew"
-msgstr "Altra troupe"
-
-# Default: Movie
-msgid "movie"
-msgstr "Film"
-
-# Default: Mpaa
-msgid "mpaa"
-msgstr "Visto MPAA"
-
-# Default: Music department
-msgid "music-department"
-msgstr "Dipartimento musicale"
-
-# Default: Name
-msgid "name"
-msgstr "Nome"
-
-# Default: News
-msgid "news"
-msgstr "Notizie"
-
-# Default: Newsgroup reviews
-msgid "newsgroup-reviews"
-msgstr "Recensioni dai gruppi di discussione"
-
-# Default: Nick names
-msgid "nick-names"
-msgstr "Soprannomi"
-
-# Default: Notes
-msgid "notes"
-msgstr "Note"
-
-# Default: Novel
-msgid "novel"
-msgstr "Novella"
-
-# Default: Number
-msgid "number"
-msgstr "Numero"
-
-# Default: Number of chapter stops
-msgid "number-of-chapter-stops"
-msgstr "Numero di interruzioni di capitolo"
-
-# Default: Number of episodes
-msgid "number-of-episodes"
-msgstr "Numero di episodi"
-
-# Default: Number of seasons
-msgid "number-of-seasons"
-msgstr "Numero di stagioni"
-
-# Default: Number of sides
-msgid "number-of-sides"
-msgstr "Numero di lati"
-
-# Default: Number of votes
-msgid "number-of-votes"
-msgstr "Numero di voti"
-
-# Default: Official retail price
-msgid "official-retail-price"
-msgstr "Prezzo ufficiale al pubblico"
-
-# Default: Official sites
-msgid "official-sites"
-msgstr "Siti ufficiali"
-
-# Default: Opening weekend
-msgid "opening-weekend"
-msgstr "Weekend d'apertura"
-
-# Default: Original air date
-msgid "original-air-date"
-msgstr "Data della prima messa in onda"
-
-# Default: Original music
-msgid "original-music"
-msgstr "Musica originale"
-
-# Default: Original title
-msgid "original-title"
-msgstr "Titolo originale"
-
-# Default: Other literature
-msgid "other-literature"
-msgstr "Altre opere letterarie"
-
-# Default: Other works
-msgid "other-works"
-msgstr "Altri lavori"
-
-# Default: Parents guide
-msgid "parents-guide"
-msgstr "Guida per i genitori"
-
-# Default: Performed by
-msgid "performed-by"
-msgstr "Eseguito da"
-
-# Default: Person
-msgid "person"
-msgstr "Persona"
-
-# Default: Photo sites
-msgid "photo-sites"
-msgstr "Siti con fotografie"
-
-# Default: Pictorial
-msgid "pictorial"
-msgstr "Ritratto"
-
-# Default: Picture format
-msgid "picture-format"
-msgstr "Formato dell'immagine"
-
-# Default: Plot
-msgid "plot"
-msgstr "Trama"
-
-# Default: Plot outline
-msgid "plot-outline"
-msgstr "Trama in breve"
-
-# Default: Portrayed in
-msgid "portrayed-in"
-msgstr "Rappresentato in"
-
-# Default: Pressing plant
-msgid "pressing-plant"
-msgstr "Impianto di stampa"
-
-# Default: Printed film format
-msgid "printed-film-format"
-msgstr "Formato della pellicola"
-
-# Default: Printed media reviews
-msgid "printed-media-reviews"
-msgstr "Recensioni su carta stampata"
-
-# Default: Producer
-msgid "producer"
-msgstr "Produttore"
-
-# Default: Production companies
-msgid "production-companies"
-msgstr "Compagnie di produzione"
-
-# Default: Production country
-msgid "production-country"
-msgstr "Paese di produzione"
-
-# Default: Production dates
-msgid "production-dates"
-msgstr "Date di produzione"
-
-# Default: Production design
-msgid "production-design"
-msgstr "Design di produzione"
-
-# Default: Production designer
-msgid "production-designer"
-msgstr "Designer di produzione"
-
-# Default: Production manager
-msgid "production-manager"
-msgstr "Manager di produzione"
-
-# Default: Production process protocol
-msgid "production-process-protocol"
-msgstr "Controllo del processo di produzione"
-
-# Default: Quality of source
-msgid "quality-of-source"
-msgstr "Qualità dell'originale"
-
-# Default: Quality program
-msgid "quality-program"
-msgstr "Programma di Qualità "
-
-# Default: Quote
-msgid "quote"
-msgstr "Citazione"
-
-# Default: Quotes
-msgid "quotes"
-msgstr "Citazioni"
-
-# Default: Rating
-msgid "rating"
-msgstr "Voto"
-
-# Default: Recommendations
-msgid "recommendations"
-msgstr "Raccomandazioni"
-
-# Default: Referenced in
-msgid "referenced-in"
-msgstr "Citato in"
-
-# Default: References
-msgid "references"
-msgstr "Cita"
-
-# Default: Region
-msgid "region"
-msgstr "Regione"
-
-# Default: Release country
-msgid "release-country"
-msgstr "Paese d'uscita"
-
-# Default: Release date
-msgid "release-date"
-msgstr "Data d'uscita"
-
-# Default: Release dates
-msgid "release-dates"
-msgstr "Date d'uscita"
-
-# Default: Remade as
-msgid "remade-as"
-msgstr "Rifatto come"
-
-# Default: Remake of
-msgid "remake-of"
-msgstr "Rifacimento di"
-
-# Default: Rentals
-msgid "rentals"
-msgstr "Noleggi"
-
-# Default: Result
-msgid "result"
-msgstr "Risultato"
-
-# Default: Review
-msgid "review"
-msgstr "Recensione"
-
-# Default: Review author
-msgid "review-author"
-msgstr "Autore della recensione"
-
-# Default: Review kind
-msgid "review-kind"
-msgstr "Tipo di recensione"
-
-# Default: Runtime
-msgid "runtime"
-msgstr "Durata"
-
-# Default: Runtimes
-msgid "runtimes"
-msgstr "Durate"
-
-# Default: Salary history
-msgid "salary-history"
-msgstr "Stipendi"
-
-# Default: Screenplay teleplay
-msgid "screenplay-teleplay"
-msgstr ""
-
-# Default: Season
-msgid "season"
-msgstr "Stagione"
-
-# Default: Second unit director or assistant director
-msgid "second-unit-director-or-assistant-director"
-msgstr "Regista della seconda unità o aiuto regista"
-
-# Default: Self
-msgid "self"
-msgstr "Se stesso"
-
-# Default: Series animation department
-msgid "series-animation-department"
-msgstr "Dipartimento animazione della serie"
-
-# Default: Series art department
-msgid "series-art-department"
-msgstr "Dipartimento artistico della serie"
-
-# Default: Series assistant directors
-msgid "series-assistant-directors"
-msgstr "Assistenti registi della serie"
-
-# Default: Series camera department
-msgid "series-camera-department"
-msgstr ""
-
-# Default: Series casting department
-msgid "series-casting-department"
-msgstr ""
-
-# Default: Series cinematographers
-msgid "series-cinematographers"
-msgstr ""
-
-# Default: Series costume department
-msgid "series-costume-department"
-msgstr ""
-
-# Default: Series editorial department
-msgid "series-editorial-department"
-msgstr ""
-
-# Default: Series editors
-msgid "series-editors"
-msgstr ""
-
-# Default: Series make up department
-msgid "series-make-up-department"
-msgstr ""
-
-# Default: Series miscellaneous
-msgid "series-miscellaneous"
-msgstr ""
-
-# Default: Series music department
-msgid "series-music-department"
-msgstr ""
-
-# Default: Series producers
-msgid "series-producers"
-msgstr ""
-
-# Default: Series production designers
-msgid "series-production-designers"
-msgstr ""
-
-# Default: Series production managers
-msgid "series-production-managers"
-msgstr ""
-
-# Default: Series sound department
-msgid "series-sound-department"
-msgstr "Dipartimento sonoro della serie"
-
-# Default: Series special effects department
-msgid "series-special-effects-department"
-msgstr "Dipartimento effetti speciali della serie"
-
-# Default: Series stunts
-msgid "series-stunts"
-msgstr "Controfigure della serie"
-
-# Default: Series title
-msgid "series-title"
-msgstr "Titolo della serie"
-
-# Default: Series transportation department
-msgid "series-transportation-department"
-msgstr ""
-
-# Default: Series visual effects department
-msgid "series-visual-effects-department"
-msgstr ""
-
-# Default: Series writers
-msgid "series-writers"
-msgstr "Scrittori della serie"
-
-# Default: Series years
-msgid "series-years"
-msgstr "Anni della serie"
-
-# Default: Set decoration
-msgid "set-decoration"
-msgstr "Decorazione del set"
-
-# Default: Sharpness
-msgid "sharpness"
-msgstr ""
-
-# Default: Similar to
-msgid "similar-to"
-msgstr "Simile a"
-
-# Default: Smart canonical episode title
-msgid "smart-canonical-episode-title"
-msgstr "Titolo canonico intelligente dell'episodio"
-
-# Default: Smart canonical series title
-msgid "smart-canonical-series-title"
-msgstr "Titolo canonico intelligente della serie"
-
-# Default: Smart canonical title
-msgid "smart-canonical-title"
-msgstr "Titolo canonico intelligente"
-
-# Default: Smart long imdb canonical title
-msgid "smart-long-imdb-canonical-title"
-msgstr "Titolo canonico lungo intelligente"
-
-# Default: Sound clips
-msgid "sound-clips"
-msgstr ""
-
-# Default: Sound crew
-msgid "sound-crew"
-msgstr ""
-
-# Default: Sound encoding
-msgid "sound-encoding"
-msgstr "Codifica sonora"
-
-# Default: Sound mix
-msgid "sound-mix"
-msgstr "Mix audio"
-
-# Default: Soundtrack
-msgid "soundtrack"
-msgstr "Colonna sonora"
-
-# Default: Spaciality
-msgid "spaciality"
-msgstr "Specialità "
-
-# Default: Special effects
-msgid "special-effects"
-msgstr "Effetti speciali"
-
-# Default: Special effects companies
-msgid "special-effects-companies"
-msgstr "Compagnie di effetti speciali"
-
-# Default: Special effects department
-msgid "special-effects-department"
-msgstr "Dipartimento effetti speciali"
-
-# Default: Spin off
-msgid "spin-off"
-msgstr "Derivati"
-
-# Default: Spin off from
-msgid "spin-off-from"
-msgstr "Deriva da"
-
-# Default: Spoofed in
-msgid "spoofed-in"
-msgstr "Preso in giro in"
-
-# Default: Spoofs
-msgid "spoofs"
-msgstr "Prende in giro"
-
-# Default: Spouse
-msgid "spouse"
-msgstr "Coniuge"
-
-# Default: Status of availablility
-msgid "status-of-availablility"
-msgstr "Disponibilità "
-
-# Default: Studio
-msgid "studio"
-msgstr "Studio"
-
-# Default: Studios
-msgid "studios"
-msgstr "Studi"
-
-# Default: Stunt performer
-msgid "stunt-performer"
-msgstr ""
-
-# Default: Stunts
-msgid "stunts"
-msgstr "Stuntman"
-
-# Default: Subtitles
-msgid "subtitles"
-msgstr "Sottotitoli"
-
-# Default: Supplement
-msgid "supplement"
-msgstr "Extra"
-
-# Default: Supplements
-msgid "supplements"
-msgstr "Extra"
-
-# Default: Synopsis
-msgid "synopsis"
-msgstr "Compendio della trama"
-
-# Default: Taglines
-msgid "taglines"
-msgstr "Slogan"
-
-# Default: Tech info
-msgid "tech-info"
-msgstr "Informazioni tecniche"
-
-# Default: Thanks
-msgid "thanks"
-msgstr "Ringraziamenti"
-
-# Default: Time
-msgid "time"
-msgstr "Tempo"
-
-# Default: Title
-msgid "title"
-msgstr "Titolo"
-
-# Default: Titles in this product
-msgid "titles-in-this-product"
-msgstr "Titoli in questo prodotto"
-
-# Default: To
-msgid "to"
-msgstr "A"
-
-# Default: Top 250 rank
-msgid "top-250-rank"
-msgstr "Posizione nella top 250"
-
-# Default: Trade mark
-msgid "trade-mark"
-msgstr "Marchio registrato"
-
-# Default: Transportation department
-msgid "transportation-department"
-msgstr "Dipartimento trasporti"
-
-# Default: Trivia
-msgid "trivia"
-msgstr "Frivolezze"
-
-# Default: Tv
-msgid "tv"
-msgstr "Tv"
-
-# Default: Under license from
-msgid "under-license-from"
-msgstr "Sotto licenza da"
-
-# Default: Unknown link
-msgid "unknown-link"
-msgstr "Collegamento sconosciuto"
-
-# Default: Upc
-msgid "upc"
-msgstr ""
-
-# Default: Version of
-msgid "version-of"
-msgstr "Versione di"
-
-# Default: Vhs
-msgid "vhs"
-msgstr "VHS"
-
-# Default: Video
-msgid "video"
-msgstr "Video"
-
-# Default: Video artifacts
-msgid "video-artifacts"
-msgstr "Imperfezioni video"
-
-# Default: Video clips
-msgid "video-clips"
-msgstr "Video clips"
-
-# Default: Video noise
-msgid "video-noise"
-msgstr "Rumore video"
-
-# Default: Video quality
-msgid "video-quality"
-msgstr "Qualità video"
-
-# Default: Video standard
-msgid "video-standard"
-msgstr "Standard video"
-
-# Default: Visual effects
-msgid "visual-effects"
-msgstr "Effetti visivi"
-
-# Default: Votes
-msgid "votes"
-msgstr "Voti"
-
-# Default: Votes distribution
-msgid "votes-distribution"
-msgstr "Distribuzione dei voti"
-
-# Default: Weekend gross
-msgid "weekend-gross"
-msgstr "Lordo del primo fine settimana"
-
-# Default: Where now
-msgid "where-now"
-msgstr "Cosa sta facendo ora"
-
-# Default: With
-msgid "with"
-msgstr "Con"
-
-# Default: Writer
-msgid "writer"
-msgstr "Scrittore"
-
-# Default: Written by
-msgid "written-by"
-msgstr "Scritto da"
-
-# Default: Year
-msgid "year"
-msgstr "Anno"
-
-# Default: Zshops
-msgid "zshops"
-msgstr ""
diff --git a/libs/imdb/locale/imdbpy-tr.po b/libs/imdb/locale/imdbpy-tr.po
deleted file mode 100644
index a44452ae..00000000
--- a/libs/imdb/locale/imdbpy-tr.po
+++ /dev/null
@@ -1,1300 +0,0 @@
-# Gettext message file for imdbpy
-msgid ""
-msgstr ""
-"Project-Id-Version: imdbpy\n"
-"POT-Creation-Date: 2010-03-18 14:35+0000\n"
-"PO-Revision-Date: 2009-04-21 19:04+0200\n"
-"Last-Translator: H. Turgut Uyar \n"
-"Language-Team: IMDbPY Türkçe \n"
-"MIME-Version: 1.0\n"
-"Content-Type: text/plain; charset=UTF-8\n"
-"Content-Transfer-Encoding: 8bit\n"
-"Plural-Forms: nplurals=1; plural=0;\n"
-"Language-Code: tr\n"
-"Language-Name: Türkçe\n"
-"Preferred-Encodings: utf-8\n"
-"Domain: imdbpy\n"
-
-# Default: Actor
-msgid "actor"
-msgstr "Oyuncu"
-
-# Default: Actress
-msgid "actress"
-msgstr "Oyuncu"
-
-# Default: Adaption
-msgid "adaption"
-msgstr ""
-
-# Default: Additional information
-msgid "additional-information"
-msgstr "Ek bilgi"
-
-# Default: Admissions
-msgid "admissions"
-msgstr ""
-
-# Default: Agent address
-msgid "agent-address"
-msgstr ""
-
-# Default: Airing
-msgid "airing"
-msgstr "Yayımlanma"
-
-# Default: Akas
-msgid "akas"
-msgstr "Diğer başlıklar"
-
-# Default: Akas from release info
-msgid "akas-from-release-info"
-msgstr ""
-
-# Default: All products
-msgid "all-products"
-msgstr "Bütün ürünler"
-
-# Default: Alternate language version of
-msgid "alternate-language-version-of"
-msgstr ""
-
-# Default: Alternate versions
-msgid "alternate-versions"
-msgstr ""
-
-# Default: Amazon reviews
-msgid "amazon-reviews"
-msgstr "Amazon eleÅŸtirileri"
-
-# Default: Analog left
-msgid "analog-left"
-msgstr "Analog sol"
-
-# Default: Analog right
-msgid "analog-right"
-msgstr "Analog saÄŸ"
-
-# Default: Animation department
-msgid "animation-department"
-msgstr "Animasyon departmanı"
-
-# Default: Archive footage
-msgid "archive-footage"
-msgstr "Arşiv çekimleri"
-
-# Default: Arithmetic mean
-msgid "arithmetic-mean"
-msgstr "Aritmetik ortalama"
-
-# Default: Art department
-msgid "art-department"
-msgstr "Sanat departmanı"
-
-# Default: Art direction
-msgid "art-direction"
-msgstr "Sanat yönetmenliği"
-
-# Default: Art director
-msgid "art-director"
-msgstr "Sanat yönetmeni"
-
-# Default: Article
-msgid "article"
-msgstr ""
-
-# Default: Asin
-msgid "asin"
-msgstr "ASIN"
-
-# Default: Aspect ratio
-msgid "aspect-ratio"
-msgstr "En-boy oranı"
-
-# Default: Assigner
-msgid "assigner"
-msgstr "Veren"
-
-# Default: Assistant director
-msgid "assistant-director"
-msgstr "Yardımcı yönetmen"
-
-# Default: Auctions
-msgid "auctions"
-msgstr "Açık artırmalar"
-
-# Default: Audio noise
-msgid "audio-noise"
-msgstr "Ses gürültüsü"
-
-# Default: Audio quality
-msgid "audio-quality"
-msgstr "Ses kalitesi"
-
-# Default: Award
-msgid "award"
-msgstr "Ödül"
-
-# Default: Awards
-msgid "awards"
-msgstr "Ödüller"
-
-# Default: Biographical movies
-msgid "biographical-movies"
-msgstr "Biyografik filmler"
-
-# Default: Biography
-msgid "biography"
-msgstr "Biyografi"
-
-# Default: Biography print
-msgid "biography-print"
-msgstr "Basılı biyografi"
-
-# Default: Birth date
-msgid "birth-date"
-msgstr "DoÄŸum tarihi"
-
-# Default: Birth name
-msgid "birth-name"
-msgstr "Asıl ismi"
-
-# Default: Birth notes
-msgid "birth-notes"
-msgstr "Doğum notları"
-
-# Default: Body
-msgid "body"
-msgstr "Metin"
-
-# Default: Book
-msgid "book"
-msgstr "Kitap"
-
-# Default: Books
-msgid "books"
-msgstr "Kitaplar"
-
-# Default: Bottom 100 rank
-msgid "bottom-100-rank"
-msgstr "En kötü 100 içindeki sırası"
-
-# Default: Budget
-msgid "budget"
-msgstr "Bütçe"
-
-# Default: Business
-msgid "business"
-msgstr "GiÅŸe"
-
-# Default: By arrangement with
-msgid "by-arrangement-with"
-msgstr ""
-
-# Default: Camera
-msgid "camera"
-msgstr "Kamera"
-
-# Default: Camera and electrical department
-msgid "camera-and-electrical-department"
-msgstr "Kamera ve elektrik departmanı"
-
-# Default: Canonical episode title
-msgid "canonical-episode-title"
-msgstr ""
-
-# Default: Canonical name
-msgid "canonical-name"
-msgstr ""
-
-# Default: Canonical series title
-msgid "canonical-series-title"
-msgstr ""
-
-# Default: Canonical title
-msgid "canonical-title"
-msgstr ""
-
-# Default: Cast
-msgid "cast"
-msgstr "Oynayanlar"
-
-# Default: Casting department
-msgid "casting-department"
-msgstr "Oyuncu seçme departmanı"
-
-# Default: Casting director
-msgid "casting-director"
-msgstr "Oyuncu seçme yönetmeni"
-
-# Default: Catalog number
-msgid "catalog-number"
-msgstr "Katalog numarası"
-
-# Default: Category
-msgid "category"
-msgstr "Kategori"
-
-# Default: Certificate
-msgid "certificate"
-msgstr "Sertifika"
-
-# Default: Certificates
-msgid "certificates"
-msgstr "Sertifikalar"
-
-# Default: Certification
-msgid "certification"
-msgstr ""
-
-# Default: Channel
-msgid "channel"
-msgstr "Kanal"
-
-# Default: Character
-msgid "character"
-msgstr "Karakter"
-
-# Default: Cinematographer
-msgid "cinematographer"
-msgstr "Kameraman"
-
-# Default: Cinematographic process
-msgid "cinematographic-process"
-msgstr ""
-
-# Default: Close captions teletext ld g
-msgid "close-captions-teletext-ld-g"
-msgstr ""
-
-# Default: Color info
-msgid "color-info"
-msgstr "Renk bilgisi"
-
-# Default: Color information
-msgid "color-information"
-msgstr "Renk bilgisi"
-
-# Default: Color rendition
-msgid "color-rendition"
-msgstr ""
-
-# Default: Company
-msgid "company"
-msgstr "Åžirket"
-
-# Default: Complete cast
-msgid "complete-cast"
-msgstr "Bütün oynayanlar"
-
-# Default: Complete crew
-msgid "complete-crew"
-msgstr "Bütün çalışanlar"
-
-# Default: Composer
-msgid "composer"
-msgstr "Besteci"
-
-# Default: Connections
-msgid "connections"
-msgstr "Bağlantılar"
-
-# Default: Contrast
-msgid "contrast"
-msgstr "Kontrast"
-
-# Default: Copyright holder
-msgid "copyright-holder"
-msgstr "Telif sahibi"
-
-# Default: Costume department
-msgid "costume-department"
-msgstr "Kostüm departmanı"
-
-# Default: Costume designer
-msgid "costume-designer"
-msgstr "Kostüm tasarımcısı"
-
-# Default: Countries
-msgid "countries"
-msgstr "Ülkeler"
-
-# Default: Country
-msgid "country"
-msgstr "Ülke"
-
-# Default: Courtesy of
-msgid "courtesy-of"
-msgstr ""
-
-# Default: Cover
-msgid "cover"
-msgstr "Poster"
-
-# Default: Cover url
-msgid "cover-url"
-msgstr "Poster adresi"
-
-# Default: Crazy credits
-msgid "crazy-credits"
-msgstr ""
-
-# Default: Creator
-msgid "creator"
-msgstr "Yaratıcı"
-
-# Default: Current role
-msgid "current-role"
-msgstr "Åžimdiki rol"
-
-# Default: Database
-msgid "database"
-msgstr "Veritabanı"
-
-# Default: Date
-msgid "date"
-msgstr "Tarih"
-
-# Default: Death date
-msgid "death-date"
-msgstr "Ölüm tarihi"
-
-# Default: Death notes
-msgid "death-notes"
-msgstr "Ölüm notları"
-
-# Default: Demographic
-msgid "demographic"
-msgstr "Demografi"
-
-# Default: Description
-msgid "description"
-msgstr "Tarif"
-
-# Default: Dialogue intellegibility
-msgid "dialogue-intellegibility"
-msgstr ""
-
-# Default: Digital sound
-msgid "digital-sound"
-msgstr "Dijital ses"
-
-# Default: Director
-msgid "director"
-msgstr "Yönetmen"
-
-# Default: Disc format
-msgid "disc-format"
-msgstr "Disk formatı"
-
-# Default: Disc size
-msgid "disc-size"
-msgstr "Disk boyu"
-
-# Default: Distributors
-msgid "distributors"
-msgstr "Dağıtıcılar"
-
-# Default: Dvd
-msgid "dvd"
-msgstr "DVD"
-
-# Default: Dvd features
-msgid "dvd-features"
-msgstr "DVD özellikleri"
-
-# Default: Dvd format
-msgid "dvd-format"
-msgstr "DVD formatı"
-
-# Default: Dvds
-msgid "dvds"
-msgstr "DVD'ler"
-
-# Default: Dynamic range
-msgid "dynamic-range"
-msgstr ""
-
-# Default: Edited from
-msgid "edited-from"
-msgstr ""
-
-# Default: Edited into
-msgid "edited-into"
-msgstr ""
-
-# Default: Editor
-msgid "editor"
-msgstr "Montajcı"
-
-# Default: Editorial department
-msgid "editorial-department"
-msgstr "Montaj departmanı"
-
-# Default: Episode
-msgid "episode"
-msgstr "Bölüm"
-
-# Default: Episode of
-msgid "episode-of"
-msgstr "Dizi"
-
-# Default: Episode title
-msgid "episode-title"
-msgstr "Bölüm başlığı"
-
-# Default: Episodes
-msgid "episodes"
-msgstr "Bölümler"
-
-# Default: Episodes rating
-msgid "episodes-rating"
-msgstr "Bölüm puanı"
-
-# Default: Essays
-msgid "essays"
-msgstr "Denemeler"
-
-# Default: External reviews
-msgid "external-reviews"
-msgstr "Harici eleÅŸtiriler"
-
-# Default: Faqs
-msgid "faqs"
-msgstr "SSS"
-
-# Default: Feature
-msgid "feature"
-msgstr ""
-
-# Default: Featured in
-msgid "featured-in"
-msgstr ""
-
-# Default: Features
-msgid "features"
-msgstr ""
-
-# Default: Film negative format
-msgid "film-negative-format"
-msgstr "Film negatif formatı"
-
-# Default: Filming dates
-msgid "filming-dates"
-msgstr "Çekim tarihleri"
-
-# Default: Filmography
-msgid "filmography"
-msgstr "Filmografi"
-
-# Default: Followed by
-msgid "followed-by"
-msgstr "PeÅŸinden gelen film"
-
-# Default: Follows
-msgid "follows"
-msgstr "PeÅŸinden geldiÄŸi film"
-
-# Default: For
-msgid "for"
-msgstr "Film"
-
-# Default: Frequency response
-msgid "frequency-response"
-msgstr ""
-
-# Default: From
-msgid "from"
-msgstr ""
-
-# Default: Full article link
-msgid "full-article-link"
-msgstr ""
-
-# Default: Full size cover url
-msgid "full-size-cover-url"
-msgstr ""
-
-# Default: Full size headshot
-msgid "full-size-headshot"
-msgstr ""
-
-# Default: Genres
-msgid "genres"
-msgstr "Türler"
-
-# Default: Goofs
-msgid "goofs"
-msgstr "Hatalar"
-
-# Default: Gross
-msgid "gross"
-msgstr "Hasılat"
-
-# Default: Group genre
-msgid "group-genre"
-msgstr ""
-
-# Default: Headshot
-msgid "headshot"
-msgstr "Resim"
-
-# Default: Height
-msgid "height"
-msgstr "Boy"
-
-# Default: Imdbindex
-msgid "imdbindex"
-msgstr ""
-
-# Default: In development
-msgid "in-development"
-msgstr ""
-
-# Default: Interview
-msgid "interview"
-msgstr "Söyleşi"
-
-# Default: Interviews
-msgid "interviews"
-msgstr "Söyleşiler"
-
-# Default: Introduction
-msgid "introduction"
-msgstr "İlk filmi"
-
-# Default: Item
-msgid "item"
-msgstr ""
-
-# Default: Keywords
-msgid "keywords"
-msgstr "Anahtar sözcükler"
-
-# Default: Kind
-msgid "kind"
-msgstr "Tip"
-
-# Default: Label
-msgid "label"
-msgstr ""
-
-# Default: Laboratory
-msgid "laboratory"
-msgstr "Laboratuar"
-
-# Default: Language
-msgid "language"
-msgstr "Dil"
-
-# Default: Languages
-msgid "languages"
-msgstr "Diller"
-
-# Default: Laserdisc
-msgid "laserdisc"
-msgstr "Lazer Disk"
-
-# Default: Laserdisc title
-msgid "laserdisc-title"
-msgstr ""
-
-# Default: Length
-msgid "length"
-msgstr "Süre"
-
-# Default: Line
-msgid "line"
-msgstr "Replik"
-
-# Default: Link
-msgid "link"
-msgstr "Bağlantı"
-
-# Default: Link text
-msgid "link-text"
-msgstr "Bağlantı metni"
-
-# Default: Literature
-msgid "literature"
-msgstr "Edebiyat"
-
-# Default: Locations
-msgid "locations"
-msgstr "Çekim yerleri"
-
-# Default: Long imdb canonical name
-msgid "long-imdb-canonical-name"
-msgstr ""
-
-# Default: Long imdb canonical title
-msgid "long-imdb-canonical-title"
-msgstr ""
-
-# Default: Long imdb episode title
-msgid "long-imdb-episode-title"
-msgstr "IMDb uzun bölüm başlığı"
-
-# Default: Long imdb name
-msgid "long-imdb-name"
-msgstr "IMDb uzun ismi"
-
-# Default: Long imdb title
-msgid "long-imdb-title"
-msgstr "IMDb uzun başlığı"
-
-# Default: Magazine cover photo
-msgid "magazine-cover-photo"
-msgstr "Dergi kapağı resmi"
-
-# Default: Make up
-msgid "make-up"
-msgstr "Makyaj"
-
-# Default: Master format
-msgid "master-format"
-msgstr "Master format"
-
-# Default: Median
-msgid "median"
-msgstr "Orta deÄŸer"
-
-# Default: Merchandising links
-msgid "merchandising-links"
-msgstr ""
-
-# Default: Mini biography
-msgid "mini-biography"
-msgstr "Mini biyografi"
-
-# Default: Misc links
-msgid "misc-links"
-msgstr ""
-
-# Default: Miscellaneous companies
-msgid "miscellaneous-companies"
-msgstr ""
-
-# Default: Miscellaneous crew
-msgid "miscellaneous-crew"
-msgstr ""
-
-# Default: Movie
-msgid "movie"
-msgstr "Film"
-
-# Default: Mpaa
-msgid "mpaa"
-msgstr "MPAA"
-
-# Default: Music department
-msgid "music-department"
-msgstr "Müzik departmanı"
-
-# Default: Name
-msgid "name"
-msgstr "İsim"
-
-# Default: News
-msgid "news"
-msgstr "Haberler"
-
-# Default: Newsgroup reviews
-msgid "newsgroup-reviews"
-msgstr "Haber grubu eleÅŸtirileri"
-
-# Default: Nick names
-msgid "nick-names"
-msgstr "Takma isimler"
-
-# Default: Notes
-msgid "notes"
-msgstr "Notlar"
-
-# Default: Novel
-msgid "novel"
-msgstr "Roman"
-
-# Default: Number
-msgid "number"
-msgstr "Sayı"
-
-# Default: Number of chapter stops
-msgid "number-of-chapter-stops"
-msgstr ""
-
-# Default: Number of episodes
-msgid "number-of-episodes"
-msgstr "Bölüm sayısı"
-
-# Default: Number of seasons
-msgid "number-of-seasons"
-msgstr "Sezon sayısı"
-
-# Default: Number of sides
-msgid "number-of-sides"
-msgstr ""
-
-# Default: Number of votes
-msgid "number-of-votes"
-msgstr "Oy sayısı"
-
-# Default: Official retail price
-msgid "official-retail-price"
-msgstr "Resmi perakende satış fiyatı"
-
-# Default: Official sites
-msgid "official-sites"
-msgstr "Resmi siteler"
-
-# Default: Opening weekend
-msgid "opening-weekend"
-msgstr "Açılış haftasonu"
-
-# Default: Original air date
-msgid "original-air-date"
-msgstr "İlk yayımlanma tarihi"
-
-# Default: Original music
-msgid "original-music"
-msgstr "Orijinal müzik"
-
-# Default: Original title
-msgid "original-title"
-msgstr ""
-
-# Default: Other literature
-msgid "other-literature"
-msgstr ""
-
-# Default: Other works
-msgid "other-works"
-msgstr "Diğer çalışmalar"
-
-# Default: Parents guide
-msgid "parents-guide"
-msgstr "Ana-baba kılavuzu"
-
-# Default: Performed by
-msgid "performed-by"
-msgstr "İcra eden"
-
-# Default: Person
-msgid "person"
-msgstr "KiÅŸi"
-
-# Default: Photo sites
-msgid "photo-sites"
-msgstr "FotoÄŸraf siteleri"
-
-# Default: Pictorial
-msgid "pictorial"
-msgstr ""
-
-# Default: Picture format
-msgid "picture-format"
-msgstr "Resim formatı"
-
-# Default: Plot
-msgid "plot"
-msgstr "Konu"
-
-# Default: Plot outline
-msgid "plot-outline"
-msgstr "Konu kısa özeti"
-
-# Default: Portrayed in
-msgid "portrayed-in"
-msgstr ""
-
-# Default: Pressing plant
-msgid "pressing-plant"
-msgstr ""
-
-# Default: Printed film format
-msgid "printed-film-format"
-msgstr "Basılı film formatı"
-
-# Default: Printed media reviews
-msgid "printed-media-reviews"
-msgstr "Basın eleştirileri"
-
-# Default: Producer
-msgid "producer"
-msgstr "Yapımcı"
-
-# Default: Production companies
-msgid "production-companies"
-msgstr "Yapım şirketleri"
-
-# Default: Production country
-msgid "production-country"
-msgstr "Yapımcı ülke"
-
-# Default: Production dates
-msgid "production-dates"
-msgstr "Yapım tarihleri"
-
-# Default: Production design
-msgid "production-design"
-msgstr "Yapım tasarımı"
-
-# Default: Production designer
-msgid "production-designer"
-msgstr "Yapım tasarımcısı"
-
-# Default: Production manager
-msgid "production-manager"
-msgstr "Yapım yöneticisi"
-
-# Default: Production process protocol
-msgid "production-process-protocol"
-msgstr ""
-
-# Default: Quality of source
-msgid "quality-of-source"
-msgstr ""
-
-# Default: Quality program
-msgid "quality-program"
-msgstr ""
-
-# Default: Quote
-msgid "quote"
-msgstr "Alıntı"
-
-# Default: Quotes
-msgid "quotes"
-msgstr "Alıntılar"
-
-# Default: Rating
-msgid "rating"
-msgstr "Puan"
-
-# Default: Recommendations
-msgid "recommendations"
-msgstr "Tavsiyeler"
-
-# Default: Referenced in
-msgid "referenced-in"
-msgstr "Gönderme yapılan filmler"
-
-# Default: References
-msgid "references"
-msgstr "Gönderme yaptığı filmler"
-
-# Default: Region
-msgid "region"
-msgstr "Bölge"
-
-# Default: Release country
-msgid "release-country"
-msgstr ""
-
-# Default: Release date
-msgid "release-date"
-msgstr ""
-
-# Default: Release dates
-msgid "release-dates"
-msgstr ""
-
-# Default: Remade as
-msgid "remade-as"
-msgstr "Yeniden çekilişi"
-
-# Default: Remake of
-msgid "remake-of"
-msgstr "Yeniden çekimi olduğu film"
-
-# Default: Rentals
-msgid "rentals"
-msgstr "Kiralamalar"
-
-# Default: Result
-msgid "result"
-msgstr "Sonuç"
-
-# Default: Review
-msgid "review"
-msgstr "EleÅŸtiri"
-
-# Default: Review author
-msgid "review-author"
-msgstr "Eleştiri yazarı"
-
-# Default: Review kind
-msgid "review-kind"
-msgstr "EleÅŸtiri tipi"
-
-# Default: Runtime
-msgid "runtime"
-msgstr "Süre"
-
-# Default: Runtimes
-msgid "runtimes"
-msgstr "Süreler"
-
-# Default: Salary history
-msgid "salary-history"
-msgstr "Üçret tarihçesi"
-
-# Default: Screenplay teleplay
-msgid "screenplay-teleplay"
-msgstr "Senaryo"
-
-# Default: Season
-msgid "season"
-msgstr "Sezon"
-
-# Default: Second unit director or assistant director
-msgid "second-unit-director-or-assistant-director"
-msgstr "İkinci birim yönetmeni ya da yardımcı yönetmen"
-
-# Default: Self
-msgid "self"
-msgstr "Kendisi"
-
-# Default: Series animation department
-msgid "series-animation-department"
-msgstr "Dizinin animasyon departmanı"
-
-# Default: Series art department
-msgid "series-art-department"
-msgstr "Dizinin sanat departmanı"
-
-# Default: Series assistant directors
-msgid "series-assistant-directors"
-msgstr "Dizinin yardımcı yönetmenleri"
-
-# Default: Series camera department
-msgid "series-camera-department"
-msgstr "Dizinin kamera departmanı"
-
-# Default: Series casting department
-msgid "series-casting-department"
-msgstr "Dizinin oyuncu seçimi departmanı"
-
-# Default: Series cinematographers
-msgid "series-cinematographers"
-msgstr "Dizinin kameramanları"
-
-# Default: Series costume department
-msgid "series-costume-department"
-msgstr "Dizinin kostüm departmanı"
-
-# Default: Series editorial department
-msgid "series-editorial-department"
-msgstr "Dizinin montaj departmanı"
-
-# Default: Series editors
-msgid "series-editors"
-msgstr "Dizinin montajcıları"
-
-# Default: Series make up department
-msgid "series-make-up-department"
-msgstr "Dizinin makyaj departmanı"
-
-# Default: Series miscellaneous
-msgid "series-miscellaneous"
-msgstr ""
-
-# Default: Series music department
-msgid "series-music-department"
-msgstr "Dizinin müzik departmanı"
-
-# Default: Series producers
-msgid "series-producers"
-msgstr "Dizinin yapımcıları"
-
-# Default: Series production designers
-msgid "series-production-designers"
-msgstr "Dizinin yapım tasarımcıları"
-
-# Default: Series production managers
-msgid "series-production-managers"
-msgstr "Dizinin yapım yöneticileri"
-
-# Default: Series sound department
-msgid "series-sound-department"
-msgstr "Dizinin ses departmanı"
-
-# Default: Series special effects department
-msgid "series-special-effects-department"
-msgstr "Dizinin özel efekt departmanı"
-
-# Default: Series stunts
-msgid "series-stunts"
-msgstr "Dizinin dublörleri"
-
-# Default: Series title
-msgid "series-title"
-msgstr "Dizinin başlığı"
-
-# Default: Series transportation department
-msgid "series-transportation-department"
-msgstr "Dizinin ulaşım departmanı"
-
-# Default: Series visual effects department
-msgid "series-visual-effects-department"
-msgstr "Dizinin görsel efekt departmanı"
-
-# Default: Series writers
-msgid "series-writers"
-msgstr "Dizinin yazarları"
-
-# Default: Series years
-msgid "series-years"
-msgstr "Dizinin yılları"
-
-# Default: Set decoration
-msgid "set-decoration"
-msgstr "Set dekorasyonu"
-
-# Default: Sharpness
-msgid "sharpness"
-msgstr "Keskinlik"
-
-# Default: Similar to
-msgid "similar-to"
-msgstr "Benzer"
-
-# Default: Smart canonical episode title
-msgid "smart-canonical-episode-title"
-msgstr ""
-
-# Default: Smart canonical series title
-msgid "smart-canonical-series-title"
-msgstr ""
-
-# Default: Smart canonical title
-msgid "smart-canonical-title"
-msgstr ""
-
-# Default: Smart long imdb canonical title
-msgid "smart-long-imdb-canonical-title"
-msgstr ""
-
-# Default: Sound clips
-msgid "sound-clips"
-msgstr "Ses klipleri"
-
-# Default: Sound crew
-msgid "sound-crew"
-msgstr "Ses ekibi"
-
-# Default: Sound encoding
-msgid "sound-encoding"
-msgstr "Ses kodlaması"
-
-# Default: Sound mix
-msgid "sound-mix"
-msgstr ""
-
-# Default: Soundtrack
-msgid "soundtrack"
-msgstr "Film müzikleri"
-
-# Default: Spaciality
-msgid "spaciality"
-msgstr ""
-
-# Default: Special effects
-msgid "special-effects"
-msgstr "Özel efektler"
-
-# Default: Special effects companies
-msgid "special-effects-companies"
-msgstr "Özel efekt şirketleri"
-
-# Default: Special effects department
-msgid "special-effects-department"
-msgstr "Özel efekt departmanı"
-
-# Default: Spin off
-msgid "spin-off"
-msgstr ""
-
-# Default: Spin off from
-msgid "spin-off-from"
-msgstr ""
-
-# Default: Spoofed in
-msgid "spoofed-in"
-msgstr "Dalga geçildiği filmler"
-
-# Default: Spoofs
-msgid "spoofs"
-msgstr "Dalga geçtiği filmler"
-
-# Default: Spouse
-msgid "spouse"
-msgstr "EÅŸi"
-
-# Default: Status of availablility
-msgid "status-of-availablility"
-msgstr ""
-
-# Default: Studio
-msgid "studio"
-msgstr "Stüdyo"
-
-# Default: Studios
-msgid "studios"
-msgstr "Stüdyolar"
-
-# Default: Stunt performer
-msgid "stunt-performer"
-msgstr ""
-
-# Default: Stunts
-msgid "stunts"
-msgstr "Dublörler"
-
-# Default: Subtitles
-msgid "subtitles"
-msgstr "Altyazılar"
-
-# Default: Supplement
-msgid "supplement"
-msgstr ""
-
-# Default: Supplements
-msgid "supplements"
-msgstr ""
-
-# Default: Synopsis
-msgid "synopsis"
-msgstr "Sinopsis"
-
-# Default: Taglines
-msgid "taglines"
-msgstr "Spotlar"
-
-# Default: Tech info
-msgid "tech-info"
-msgstr "Teknik bilgi"
-
-# Default: Thanks
-msgid "thanks"
-msgstr "Teşekkürler"
-
-# Default: Time
-msgid "time"
-msgstr "Zaman"
-
-# Default: Title
-msgid "title"
-msgstr "Başlık"
-
-# Default: Titles in this product
-msgid "titles-in-this-product"
-msgstr "Bu üründeki başlıklar"
-
-# Default: To
-msgid "to"
-msgstr "Alan"
-
-# Default: Top 250 rank
-msgid "top-250-rank"
-msgstr "En iyi 250 içindeki sırası"
-
-# Default: Trade mark
-msgid "trade-mark"
-msgstr "Kendine has özelliği"
-
-# Default: Transportation department
-msgid "transportation-department"
-msgstr "Ulaşım departmanı"
-
-# Default: Trivia
-msgid "trivia"
-msgstr "İlginç notlar"
-
-# Default: Tv
-msgid "tv"
-msgstr ""
-
-# Default: Under license from
-msgid "under-license-from"
-msgstr ""
-
-# Default: Unknown link
-msgid "unknown-link"
-msgstr ""
-
-# Default: Upc
-msgid "upc"
-msgstr ""
-
-# Default: Version of
-msgid "version-of"
-msgstr ""
-
-# Default: Vhs
-msgid "vhs"
-msgstr "VHS"
-
-# Default: Video
-msgid "video"
-msgstr ""
-
-# Default: Video artifacts
-msgid "video-artifacts"
-msgstr ""
-
-# Default: Video clips
-msgid "video-clips"
-msgstr "Video klipleri"
-
-# Default: Video noise
-msgid "video-noise"
-msgstr "Video gürültüsü"
-
-# Default: Video quality
-msgid "video-quality"
-msgstr "Video kalitesi"
-
-# Default: Video standard
-msgid "video-standard"
-msgstr "Video standardı"
-
-# Default: Visual effects
-msgid "visual-effects"
-msgstr "Görsel efektler"
-
-# Default: Votes
-msgid "votes"
-msgstr "Oylar"
-
-# Default: Votes distribution
-msgid "votes-distribution"
-msgstr "Oyların dağılımı"
-
-# Default: Weekend gross
-msgid "weekend-gross"
-msgstr "Haftasonu hasılatı"
-
-# Default: Where now
-msgid "where-now"
-msgstr "Åžu anda nerede"
-
-# Default: With
-msgid "with"
-msgstr ""
-
-# Default: Writer
-msgid "writer"
-msgstr "Yazar"
-
-# Default: Written by
-msgid "written-by"
-msgstr "Yazan"
-
-# Default: Year
-msgid "year"
-msgstr "Yıl"
-
-# Default: Zshops
-msgid "zshops"
-msgstr "ZShops"
diff --git a/libs/imdb/locale/imdbpy.pot b/libs/imdb/locale/imdbpy.pot
deleted file mode 100644
index 14ac1669..00000000
--- a/libs/imdb/locale/imdbpy.pot
+++ /dev/null
@@ -1,1301 +0,0 @@
-# Gettext message file for imdbpy
-msgid ""
-msgstr ""
-"Project-Id-Version: imdbpy\n"
-"POT-Creation-Date: 2010-03-18 14:35+0000\n"
-"PO-Revision-Date: YYYY-MM-DD HH:MM+0000\n"
-"Last-Translator: YOUR NAME \n"
-"Language-Team: TEAM NAME \n"
-"MIME-Version: 1.0\n"
-"Content-Type: text/plain; charset=UTF-8\n"
-"Content-Transfer-Encoding: 8bit\n"
-"Plural-Forms: nplurals=1; plural=0;\n"
-"Language-Code: en\n"
-"Language-Name: English\n"
-"Preferred-Encodings: utf-8\n"
-"Domain: imdbpy\n"
-
-# Default: Actor
-msgid "actor"
-msgstr ""
-
-# Default: Actress
-msgid "actress"
-msgstr ""
-
-# Default: Adaption
-msgid "adaption"
-msgstr ""
-
-# Default: Additional information
-msgid "additional-information"
-msgstr ""
-
-# Default: Admissions
-msgid "admissions"
-msgstr ""
-
-# Default: Agent address
-msgid "agent-address"
-msgstr ""
-
-# Default: Airing
-msgid "airing"
-msgstr ""
-
-# Default: Akas
-msgid "akas"
-msgstr ""
-
-# Default: Akas from release info
-msgid "akas-from-release-info"
-msgstr ""
-
-# Default: All products
-msgid "all-products"
-msgstr ""
-
-# Default: Alternate language version of
-msgid "alternate-language-version-of"
-msgstr ""
-
-# Default: Alternate versions
-msgid "alternate-versions"
-msgstr ""
-
-# Default: Amazon reviews
-msgid "amazon-reviews"
-msgstr ""
-
-# Default: Analog left
-msgid "analog-left"
-msgstr ""
-
-# Default: Analog right
-msgid "analog-right"
-msgstr ""
-
-# Default: Animation department
-msgid "animation-department"
-msgstr ""
-
-# Default: Archive footage
-msgid "archive-footage"
-msgstr ""
-
-# Default: Arithmetic mean
-msgid "arithmetic-mean"
-msgstr ""
-
-# Default: Art department
-msgid "art-department"
-msgstr ""
-
-# Default: Art direction
-msgid "art-direction"
-msgstr ""
-
-# Default: Art director
-msgid "art-director"
-msgstr ""
-
-# Default: Article
-msgid "article"
-msgstr ""
-
-# Default: Asin
-msgid "asin"
-msgstr ""
-
-# Default: Aspect ratio
-msgid "aspect-ratio"
-msgstr ""
-
-# Default: Assigner
-msgid "assigner"
-msgstr ""
-
-# Default: Assistant director
-msgid "assistant-director"
-msgstr ""
-
-# Default: Auctions
-msgid "auctions"
-msgstr ""
-
-# Default: Audio noise
-msgid "audio-noise"
-msgstr ""
-
-# Default: Audio quality
-msgid "audio-quality"
-msgstr ""
-
-# Default: Award
-msgid "award"
-msgstr ""
-
-# Default: Awards
-msgid "awards"
-msgstr ""
-
-# Default: Biographical movies
-msgid "biographical-movies"
-msgstr ""
-
-# Default: Biography
-msgid "biography"
-msgstr ""
-
-# Default: Biography print
-msgid "biography-print"
-msgstr ""
-
-# Default: Birth date
-msgid "birth-date"
-msgstr ""
-
-# Default: Birth name
-msgid "birth-name"
-msgstr ""
-
-# Default: Birth notes
-msgid "birth-notes"
-msgstr ""
-
-# Default: Body
-msgid "body"
-msgstr ""
-
-# Default: Book
-msgid "book"
-msgstr ""
-
-# Default: Books
-msgid "books"
-msgstr ""
-
-# Default: Bottom 100 rank
-msgid "bottom-100-rank"
-msgstr ""
-
-# Default: Budget
-msgid "budget"
-msgstr ""
-
-# Default: Business
-msgid "business"
-msgstr ""
-
-# Default: By arrangement with
-msgid "by-arrangement-with"
-msgstr ""
-
-# Default: Camera
-msgid "camera"
-msgstr ""
-
-# Default: Camera and electrical department
-msgid "camera-and-electrical-department"
-msgstr ""
-
-# Default: Canonical episode title
-msgid "canonical-episode-title"
-msgstr ""
-
-# Default: Canonical name
-msgid "canonical-name"
-msgstr ""
-
-# Default: Canonical series title
-msgid "canonical-series-title"
-msgstr ""
-
-# Default: Canonical title
-msgid "canonical-title"
-msgstr ""
-
-# Default: Cast
-msgid "cast"
-msgstr ""
-
-# Default: Casting department
-msgid "casting-department"
-msgstr ""
-
-# Default: Casting director
-msgid "casting-director"
-msgstr ""
-
-# Default: Catalog number
-msgid "catalog-number"
-msgstr ""
-
-# Default: Category
-msgid "category"
-msgstr ""
-
-# Default: Certificate
-msgid "certificate"
-msgstr ""
-
-# Default: Certificates
-msgid "certificates"
-msgstr ""
-
-# Default: Certification
-msgid "certification"
-msgstr ""
-
-# Default: Channel
-msgid "channel"
-msgstr ""
-
-# Default: Character
-msgid "character"
-msgstr ""
-
-# Default: Cinematographer
-msgid "cinematographer"
-msgstr ""
-
-# Default: Cinematographic process
-msgid "cinematographic-process"
-msgstr ""
-
-# Default: Close captions teletext ld g
-msgid "close-captions-teletext-ld-g"
-msgstr ""
-
-# Default: Color info
-msgid "color-info"
-msgstr ""
-
-# Default: Color information
-msgid "color-information"
-msgstr ""
-
-# Default: Color rendition
-msgid "color-rendition"
-msgstr ""
-
-# Default: Company
-msgid "company"
-msgstr ""
-
-# Default: Complete cast
-msgid "complete-cast"
-msgstr ""
-
-# Default: Complete crew
-msgid "complete-crew"
-msgstr ""
-
-# Default: Composer
-msgid "composer"
-msgstr ""
-
-# Default: Connections
-msgid "connections"
-msgstr ""
-
-# Default: Contrast
-msgid "contrast"
-msgstr ""
-
-# Default: Copyright holder
-msgid "copyright-holder"
-msgstr ""
-
-# Default: Costume department
-msgid "costume-department"
-msgstr ""
-
-# Default: Costume designer
-msgid "costume-designer"
-msgstr ""
-
-# Default: Countries
-msgid "countries"
-msgstr ""
-
-# Default: Country
-msgid "country"
-msgstr ""
-
-# Default: Courtesy of
-msgid "courtesy-of"
-msgstr ""
-
-# Default: Cover
-msgid "cover"
-msgstr ""
-
-# Default: Cover url
-msgid "cover-url"
-msgstr ""
-
-# Default: Crazy credits
-msgid "crazy-credits"
-msgstr ""
-
-# Default: Creator
-msgid "creator"
-msgstr ""
-
-# Default: Current role
-msgid "current-role"
-msgstr ""
-
-# Default: Database
-msgid "database"
-msgstr ""
-
-# Default: Date
-msgid "date"
-msgstr ""
-
-# Default: Death date
-msgid "death-date"
-msgstr ""
-
-# Default: Death notes
-msgid "death-notes"
-msgstr ""
-
-# Default: Demographic
-msgid "demographic"
-msgstr ""
-
-# Default: Description
-msgid "description"
-msgstr ""
-
-# Default: Dialogue intellegibility
-msgid "dialogue-intellegibility"
-msgstr ""
-
-# Default: Digital sound
-msgid "digital-sound"
-msgstr ""
-
-# Default: Director
-msgid "director"
-msgstr ""
-
-# Default: Disc format
-msgid "disc-format"
-msgstr ""
-
-# Default: Disc size
-msgid "disc-size"
-msgstr ""
-
-# Default: Distributors
-msgid "distributors"
-msgstr ""
-
-# Default: Dvd
-msgid "dvd"
-msgstr ""
-
-# Default: Dvd features
-msgid "dvd-features"
-msgstr ""
-
-# Default: Dvd format
-msgid "dvd-format"
-msgstr ""
-
-# Default: Dvds
-msgid "dvds"
-msgstr ""
-
-# Default: Dynamic range
-msgid "dynamic-range"
-msgstr ""
-
-# Default: Edited from
-msgid "edited-from"
-msgstr ""
-
-# Default: Edited into
-msgid "edited-into"
-msgstr ""
-
-# Default: Editor
-msgid "editor"
-msgstr ""
-
-# Default: Editorial department
-msgid "editorial-department"
-msgstr ""
-
-# Default: Episode
-msgid "episode"
-msgstr ""
-
-# Default: Episode of
-msgid "episode-of"
-msgstr ""
-
-# Default: Episode title
-msgid "episode-title"
-msgstr ""
-
-# Default: Episodes
-msgid "episodes"
-msgstr ""
-
-# Default: Episodes rating
-msgid "episodes-rating"
-msgstr ""
-
-# Default: Essays
-msgid "essays"
-msgstr ""
-
-# Default: External reviews
-msgid "external-reviews"
-msgstr ""
-
-# Default: Faqs
-msgid "faqs"
-msgstr ""
-
-# Default: Feature
-msgid "feature"
-msgstr ""
-
-# Default: Featured in
-msgid "featured-in"
-msgstr ""
-
-# Default: Features
-msgid "features"
-msgstr ""
-
-# Default: Film negative format
-msgid "film-negative-format"
-msgstr ""
-
-# Default: Filming dates
-msgid "filming-dates"
-msgstr ""
-
-# Default: Filmography
-msgid "filmography"
-msgstr ""
-
-# Default: Followed by
-msgid "followed-by"
-msgstr ""
-
-# Default: Follows
-msgid "follows"
-msgstr ""
-
-# Default: For
-msgid "for"
-msgstr ""
-
-# Default: Frequency response
-msgid "frequency-response"
-msgstr ""
-
-# Default: From
-msgid "from"
-msgstr ""
-
-# Default: Full article link
-msgid "full-article-link"
-msgstr ""
-
-# Default: Full size cover url
-msgid "full-size-cover-url"
-msgstr ""
-
-# Default: Full size headshot
-msgid "full-size-headshot"
-msgstr ""
-
-# Default: Genres
-msgid "genres"
-msgstr ""
-
-# Default: Goofs
-msgid "goofs"
-msgstr ""
-
-# Default: Gross
-msgid "gross"
-msgstr ""
-
-# Default: Group genre
-msgid "group-genre"
-msgstr ""
-
-# Default: Headshot
-msgid "headshot"
-msgstr ""
-
-# Default: Height
-msgid "height"
-msgstr ""
-
-# Default: Imdbindex
-msgid "imdbindex"
-msgstr ""
-
-# Default: In development
-msgid "in-development"
-msgstr ""
-
-# Default: Interview
-msgid "interview"
-msgstr ""
-
-# Default: Interviews
-msgid "interviews"
-msgstr ""
-
-# Default: Introduction
-msgid "introduction"
-msgstr ""
-
-# Default: Item
-msgid "item"
-msgstr ""
-
-# Default: Keywords
-msgid "keywords"
-msgstr ""
-
-# Default: Kind
-msgid "kind"
-msgstr ""
-
-# Default: Label
-msgid "label"
-msgstr ""
-
-# Default: Laboratory
-msgid "laboratory"
-msgstr ""
-
-# Default: Language
-msgid "language"
-msgstr ""
-
-# Default: Languages
-msgid "languages"
-msgstr ""
-
-# Default: Laserdisc
-msgid "laserdisc"
-msgstr ""
-
-# Default: Laserdisc title
-msgid "laserdisc-title"
-msgstr ""
-
-# Default: Length
-msgid "length"
-msgstr ""
-
-# Default: Line
-msgid "line"
-msgstr ""
-
-# Default: Link
-msgid "link"
-msgstr ""
-
-# Default: Link text
-msgid "link-text"
-msgstr ""
-
-# Default: Literature
-msgid "literature"
-msgstr ""
-
-# Default: Locations
-msgid "locations"
-msgstr ""
-
-# Default: Long imdb canonical name
-msgid "long-imdb-canonical-name"
-msgstr ""
-
-# Default: Long imdb canonical title
-msgid "long-imdb-canonical-title"
-msgstr ""
-
-# Default: Long imdb episode title
-msgid "long-imdb-episode-title"
-msgstr ""
-
-# Default: Long imdb name
-msgid "long-imdb-name"
-msgstr ""
-
-# Default: Long imdb title
-msgid "long-imdb-title"
-msgstr ""
-
-# Default: Magazine cover photo
-msgid "magazine-cover-photo"
-msgstr ""
-
-# Default: Make up
-msgid "make-up"
-msgstr ""
-
-# Default: Master format
-msgid "master-format"
-msgstr ""
-
-# Default: Median
-msgid "median"
-msgstr ""
-
-# Default: Merchandising links
-msgid "merchandising-links"
-msgstr ""
-
-# Default: Mini biography
-msgid "mini-biography"
-msgstr ""
-
-# Default: Misc links
-msgid "misc-links"
-msgstr ""
-
-# Default: Miscellaneous companies
-msgid "miscellaneous-companies"
-msgstr ""
-
-# Default: Miscellaneous crew
-msgid "miscellaneous-crew"
-msgstr ""
-
-# Default: Movie
-msgid "movie"
-msgstr ""
-
-# Default: Mpaa
-msgid "mpaa"
-msgstr ""
-
-# Default: Music department
-msgid "music-department"
-msgstr ""
-
-# Default: Name
-msgid "name"
-msgstr ""
-
-# Default: News
-msgid "news"
-msgstr ""
-
-# Default: Newsgroup reviews
-msgid "newsgroup-reviews"
-msgstr ""
-
-# Default: Nick names
-msgid "nick-names"
-msgstr ""
-
-# Default: Notes
-msgid "notes"
-msgstr ""
-
-# Default: Novel
-msgid "novel"
-msgstr ""
-
-# Default: Number
-msgid "number"
-msgstr ""
-
-# Default: Number of chapter stops
-msgid "number-of-chapter-stops"
-msgstr ""
-
-# Default: Number of episodes
-msgid "number-of-episodes"
-msgstr ""
-
-# Default: Number of seasons
-msgid "number-of-seasons"
-msgstr ""
-
-# Default: Number of sides
-msgid "number-of-sides"
-msgstr ""
-
-# Default: Number of votes
-msgid "number-of-votes"
-msgstr ""
-
-# Default: Official retail price
-msgid "official-retail-price"
-msgstr ""
-
-# Default: Official sites
-msgid "official-sites"
-msgstr ""
-
-# Default: Opening weekend
-msgid "opening-weekend"
-msgstr ""
-
-# Default: Original air date
-msgid "original-air-date"
-msgstr ""
-
-# Default: Original music
-msgid "original-music"
-msgstr ""
-
-# Default: Original title
-msgid "original-title"
-msgstr ""
-
-# Default: Other literature
-msgid "other-literature"
-msgstr ""
-
-# Default: Other works
-msgid "other-works"
-msgstr ""
-
-# Default: Parents guide
-msgid "parents-guide"
-msgstr ""
-
-# Default: Performed by
-msgid "performed-by"
-msgstr ""
-
-# Default: Person
-msgid "person"
-msgstr ""
-
-# Default: Photo sites
-msgid "photo-sites"
-msgstr ""
-
-# Default: Pictorial
-msgid "pictorial"
-msgstr ""
-
-# Default: Picture format
-msgid "picture-format"
-msgstr ""
-
-# Default: Plot
-msgid "plot"
-msgstr ""
-
-# Default: Plot outline
-msgid "plot-outline"
-msgstr ""
-
-# Default: Portrayed in
-msgid "portrayed-in"
-msgstr ""
-
-# Default: Pressing plant
-msgid "pressing-plant"
-msgstr ""
-
-# Default: Printed film format
-msgid "printed-film-format"
-msgstr ""
-
-# Default: Printed media reviews
-msgid "printed-media-reviews"
-msgstr ""
-
-# Default: Producer
-msgid "producer"
-msgstr ""
-
-# Default: Production companies
-msgid "production-companies"
-msgstr ""
-
-# Default: Production country
-msgid "production-country"
-msgstr ""
-
-# Default: Production dates
-msgid "production-dates"
-msgstr ""
-
-# Default: Production design
-msgid "production-design"
-msgstr ""
-
-# Default: Production designer
-msgid "production-designer"
-msgstr ""
-
-# Default: Production manager
-msgid "production-manager"
-msgstr ""
-
-# Default: Production process protocol
-msgid "production-process-protocol"
-msgstr ""
-
-# Default: Quality of source
-msgid "quality-of-source"
-msgstr ""
-
-# Default: Quality program
-msgid "quality-program"
-msgstr ""
-
-# Default: Quote
-msgid "quote"
-msgstr ""
-
-# Default: Quotes
-msgid "quotes"
-msgstr ""
-
-# Default: Rating
-msgid "rating"
-msgstr ""
-
-# Default: Recommendations
-msgid "recommendations"
-msgstr ""
-
-# Default: Referenced in
-msgid "referenced-in"
-msgstr ""
-
-# Default: References
-msgid "references"
-msgstr ""
-
-# Default: Region
-msgid "region"
-msgstr ""
-
-# Default: Release country
-msgid "release-country"
-msgstr ""
-
-# Default: Release date
-msgid "release-date"
-msgstr ""
-
-# Default: Release dates
-msgid "release-dates"
-msgstr ""
-
-# Default: Remade as
-msgid "remade-as"
-msgstr ""
-
-# Default: Remake of
-msgid "remake-of"
-msgstr ""
-
-# Default: Rentals
-msgid "rentals"
-msgstr ""
-
-# Default: Result
-msgid "result"
-msgstr ""
-
-# Default: Review
-msgid "review"
-msgstr ""
-
-# Default: Review author
-msgid "review-author"
-msgstr ""
-
-# Default: Review kind
-msgid "review-kind"
-msgstr ""
-
-# Default: Runtime
-msgid "runtime"
-msgstr ""
-
-# Default: Runtimes
-msgid "runtimes"
-msgstr ""
-
-# Default: Salary history
-msgid "salary-history"
-msgstr ""
-
-# Default: Screenplay teleplay
-msgid "screenplay-teleplay"
-msgstr ""
-
-# Default: Season
-msgid "season"
-msgstr ""
-
-# Default: Second unit director or assistant director
-msgid "second-unit-director-or-assistant-director"
-msgstr ""
-
-# Default: Self
-msgid "self"
-msgstr ""
-
-# Default: Series animation department
-msgid "series-animation-department"
-msgstr ""
-
-# Default: Series art department
-msgid "series-art-department"
-msgstr ""
-
-# Default: Series assistant directors
-msgid "series-assistant-directors"
-msgstr ""
-
-# Default: Series camera department
-msgid "series-camera-department"
-msgstr ""
-
-# Default: Series casting department
-msgid "series-casting-department"
-msgstr ""
-
-# Default: Series cinematographers
-msgid "series-cinematographers"
-msgstr ""
-
-# Default: Series costume department
-msgid "series-costume-department"
-msgstr ""
-
-# Default: Series editorial department
-msgid "series-editorial-department"
-msgstr ""
-
-# Default: Series editors
-msgid "series-editors"
-msgstr ""
-
-# Default: Series make up department
-msgid "series-make-up-department"
-msgstr ""
-
-# Default: Series miscellaneous
-msgid "series-miscellaneous"
-msgstr ""
-
-# Default: Series music department
-msgid "series-music-department"
-msgstr ""
-
-# Default: Series producers
-msgid "series-producers"
-msgstr ""
-
-# Default: Series production designers
-msgid "series-production-designers"
-msgstr ""
-
-# Default: Series production managers
-msgid "series-production-managers"
-msgstr ""
-
-# Default: Series sound department
-msgid "series-sound-department"
-msgstr ""
-
-# Default: Series special effects department
-msgid "series-special-effects-department"
-msgstr ""
-
-# Default: Series stunts
-msgid "series-stunts"
-msgstr ""
-
-# Default: Series title
-msgid "series-title"
-msgstr ""
-
-# Default: Series transportation department
-msgid "series-transportation-department"
-msgstr ""
-
-# Default: Series visual effects department
-msgid "series-visual-effects-department"
-msgstr ""
-
-# Default: Series writers
-msgid "series-writers"
-msgstr ""
-
-# Default: Series years
-msgid "series-years"
-msgstr ""
-
-# Default: Set decoration
-msgid "set-decoration"
-msgstr ""
-
-# Default: Sharpness
-msgid "sharpness"
-msgstr ""
-
-# Default: Similar to
-msgid "similar-to"
-msgstr ""
-
-# Default: Smart canonical episode title
-msgid "smart-canonical-episode-title"
-msgstr ""
-
-# Default: Smart canonical series title
-msgid "smart-canonical-series-title"
-msgstr ""
-
-# Default: Smart canonical title
-msgid "smart-canonical-title"
-msgstr ""
-
-# Default: Smart long imdb canonical title
-msgid "smart-long-imdb-canonical-title"
-msgstr ""
-
-# Default: Sound clips
-msgid "sound-clips"
-msgstr ""
-
-# Default: Sound crew
-msgid "sound-crew"
-msgstr ""
-
-# Default: Sound encoding
-msgid "sound-encoding"
-msgstr ""
-
-# Default: Sound mix
-msgid "sound-mix"
-msgstr ""
-
-# Default: Soundtrack
-msgid "soundtrack"
-msgstr ""
-
-# Default: Spaciality
-msgid "spaciality"
-msgstr ""
-
-# Default: Special effects
-msgid "special-effects"
-msgstr ""
-
-# Default: Special effects companies
-msgid "special-effects-companies"
-msgstr ""
-
-# Default: Special effects department
-msgid "special-effects-department"
-msgstr ""
-
-# Default: Spin off
-msgid "spin-off"
-msgstr ""
-
-# Default: Spin off from
-msgid "spin-off-from"
-msgstr ""
-
-# Default: Spoofed in
-msgid "spoofed-in"
-msgstr ""
-
-# Default: Spoofs
-msgid "spoofs"
-msgstr ""
-
-# Default: Spouse
-msgid "spouse"
-msgstr ""
-
-# Default: Status of availablility
-msgid "status-of-availablility"
-msgstr ""
-
-# Default: Studio
-msgid "studio"
-msgstr ""
-
-# Default: Studios
-msgid "studios"
-msgstr ""
-
-# Default: Stunt performer
-msgid "stunt-performer"
-msgstr ""
-
-# Default: Stunts
-msgid "stunts"
-msgstr ""
-
-# Default: Subtitles
-msgid "subtitles"
-msgstr ""
-
-# Default: Supplement
-msgid "supplement"
-msgstr ""
-
-# Default: Supplements
-msgid "supplements"
-msgstr ""
-
-# Default: Synopsis
-msgid "synopsis"
-msgstr ""
-
-# Default: Taglines
-msgid "taglines"
-msgstr ""
-
-# Default: Tech info
-msgid "tech-info"
-msgstr ""
-
-# Default: Thanks
-msgid "thanks"
-msgstr ""
-
-# Default: Time
-msgid "time"
-msgstr ""
-
-# Default: Title
-msgid "title"
-msgstr ""
-
-# Default: Titles in this product
-msgid "titles-in-this-product"
-msgstr ""
-
-# Default: To
-msgid "to"
-msgstr ""
-
-# Default: Top 250 rank
-msgid "top-250-rank"
-msgstr ""
-
-# Default: Trade mark
-msgid "trade-mark"
-msgstr ""
-
-# Default: Transportation department
-msgid "transportation-department"
-msgstr ""
-
-# Default: Trivia
-msgid "trivia"
-msgstr ""
-
-# Default: Tv
-msgid "tv"
-msgstr ""
-
-# Default: Under license from
-msgid "under-license-from"
-msgstr ""
-
-# Default: Unknown link
-msgid "unknown-link"
-msgstr ""
-
-# Default: Upc
-msgid "upc"
-msgstr ""
-
-# Default: Version of
-msgid "version-of"
-msgstr ""
-
-# Default: Vhs
-msgid "vhs"
-msgstr ""
-
-# Default: Video
-msgid "video"
-msgstr ""
-
-# Default: Video artifacts
-msgid "video-artifacts"
-msgstr ""
-
-# Default: Video clips
-msgid "video-clips"
-msgstr ""
-
-# Default: Video noise
-msgid "video-noise"
-msgstr ""
-
-# Default: Video quality
-msgid "video-quality"
-msgstr ""
-
-# Default: Video standard
-msgid "video-standard"
-msgstr ""
-
-# Default: Visual effects
-msgid "visual-effects"
-msgstr ""
-
-# Default: Votes
-msgid "votes"
-msgstr ""
-
-# Default: Votes distribution
-msgid "votes-distribution"
-msgstr ""
-
-# Default: Weekend gross
-msgid "weekend-gross"
-msgstr ""
-
-# Default: Where now
-msgid "where-now"
-msgstr ""
-
-# Default: With
-msgid "with"
-msgstr ""
-
-# Default: Writer
-msgid "writer"
-msgstr ""
-
-# Default: Written by
-msgid "written-by"
-msgstr ""
-
-# Default: Year
-msgid "year"
-msgstr ""
-
-# Default: Zshops
-msgid "zshops"
-msgstr ""
-
diff --git a/libs/imdb/locale/msgfmt.py b/libs/imdb/locale/msgfmt.py
deleted file mode 100644
index 9e0ab747..00000000
--- a/libs/imdb/locale/msgfmt.py
+++ /dev/null
@@ -1,204 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: iso-8859-1 -*-
-"""Generate binary message catalog from textual translation description.
-
-This program converts a textual Uniforum-style message catalog (.po file) into
-a binary GNU catalog (.mo file). This is essentially the same function as the
-GNU msgfmt program, however, it is a simpler implementation.
-
-Usage: msgfmt.py [OPTIONS] filename.po
-
-Options:
- -o file
- --output-file=file
- Specify the output file to write to. If omitted, output will go to a
- file named filename.mo (based off the input file name).
-
- -h
- --help
- Print this message and exit.
-
- -V
- --version
- Display version information and exit.
-
-Written by Martin v. Löwis ,
-refactored / fixed by Thomas Waldmann .
-"""
-
-import sys, os
-import getopt, struct, array
-
-__version__ = "1.3"
-
-class SyntaxErrorException(Exception):
- """raised when having trouble parsing the po file content"""
- pass
-
-class MsgFmt(object):
- """transform .po -> .mo format"""
- def __init__(self):
- self.messages = {}
-
- def make_filenames(self, filename, outfile=None):
- """Compute .mo name from .po name or language"""
- if filename.endswith('.po'):
- infile = filename
- else:
- infile = filename + '.po'
- if outfile is None:
- outfile = os.path.splitext(infile)[0] + '.mo'
- return infile, outfile
-
- def add(self, id, str, fuzzy):
- """Add a non-fuzzy translation to the dictionary."""
- if not fuzzy and str:
- self.messages[id] = str
-
- def read_po(self, lines):
- ID = 1
- STR = 2
- section = None
- fuzzy = False
- line_no = 0
- msgid = msgstr = ''
- # Parse the catalog
- for line in lines:
- line_no += 1
- # If we get a comment line after a msgstr, this is a new entry
- if line.startswith('#') and section == STR:
- self.add(msgid, msgstr, fuzzy)
- section = None
- fuzzy = False
- # Record a fuzzy mark
- if line.startswith('#,') and 'fuzzy' in line:
- fuzzy = True
- # Skip comments
- if line.startswith('#'):
- continue
- # Now we are in a msgid section, output previous section
- if line.startswith('msgid'):
- if section == STR:
- self.add(msgid, msgstr, fuzzy)
- fuzzy = False
- section = ID
- line = line[5:]
- msgid = msgstr = ''
- # Now we are in a msgstr section
- elif line.startswith('msgstr'):
- section = STR
- line = line[6:]
- # Skip empty lines
- line = line.strip()
- if not line:
- continue
- # XXX: Does this always follow Python escape semantics?
- line = eval(line)
- if section == ID:
- msgid += line
- elif section == STR:
- msgstr += line
- else:
- raise SyntaxErrorException('Syntax error on line %d, before:\n%s' % (line_no, line))
- # Add last entry
- if section == STR:
- self.add(msgid, msgstr, fuzzy)
-
- def generate_mo(self):
- """Return the generated output."""
- keys = self.messages.keys()
- # the keys are sorted in the .mo file
- keys.sort()
- offsets = []
- ids = ''
- strs = ''
- for id in keys:
- # For each string, we need size and file offset. Each string is NUL
- # terminated; the NUL does not count into the size.
- offsets.append((len(ids), len(id), len(strs), len(self.messages[id])))
- ids += id + '\0'
- strs += self.messages[id] + '\0'
- output = []
- # The header is 7 32-bit unsigned integers. We don't use hash tables, so
- # the keys start right after the index tables.
- # translated string.
- keystart = 7*4 + 16*len(keys)
- # and the values start after the keys
- valuestart = keystart + len(ids)
- koffsets = []
- voffsets = []
- # The string table first has the list of keys, then the list of values.
- # Each entry has first the size of the string, then the file offset.
- for o1, l1, o2, l2 in offsets:
- koffsets += [l1, o1 + keystart]
- voffsets += [l2, o2 + valuestart]
- offsets = koffsets + voffsets
- output.append(struct.pack("Iiiiiii",
- 0x950412deL, # Magic
- 0, # Version
- len(keys), # # of entries
- 7*4, # start of key index
- 7*4 + len(keys)*8, # start of value index
- 0, 0)) # size and offset of hash table
- output.append(array.array("i", offsets).tostring())
- output.append(ids)
- output.append(strs)
- return ''.join(output)
-
-
-def make(filename, outfile):
- mf = MsgFmt()
- infile, outfile = mf.make_filenames(filename, outfile)
- try:
- lines = file(infile).readlines()
- except IOError, msg:
- print >> sys.stderr, msg
- sys.exit(1)
- try:
- mf.read_po(lines)
- output = mf.generate_mo()
- except SyntaxErrorException, msg:
- print >> sys.stderr, msg
-
- try:
- open(outfile, "wb").write(output)
- except IOError, msg:
- print >> sys.stderr, msg
-
-
-def usage(code, msg=''):
- print >> sys.stderr, __doc__
- if msg:
- print >> sys.stderr, msg
- sys.exit(code)
-
-
-def main():
- try:
- opts, args = getopt.getopt(sys.argv[1:], 'hVo:', ['help', 'version', 'output-file='])
- except getopt.error, msg:
- usage(1, msg)
-
- outfile = None
- # parse options
- for opt, arg in opts:
- if opt in ('-h', '--help'):
- usage(0)
- elif opt in ('-V', '--version'):
- print >> sys.stderr, "msgfmt.py", __version__
- sys.exit(0)
- elif opt in ('-o', '--output-file'):
- outfile = arg
- # do it
- if not args:
- print >> sys.stderr, 'No input file given'
- print >> sys.stderr, "Try `msgfmt --help' for more information."
- return
-
- for filename in args:
- make(filename, outfile)
-
-
-if __name__ == '__main__':
- main()
-
diff --git a/libs/imdb/locale/rebuildmo.py b/libs/imdb/locale/rebuildmo.py
deleted file mode 100644
index b72a74c3..00000000
--- a/libs/imdb/locale/rebuildmo.py
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-"""
-rebuildmo.py script.
-
-This script builds the .mo files, from the .po files.
-
-Copyright 2009 H. Turgut Uyar
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-import glob
-import msgfmt
-import os
-
-#LOCALE_DIR = os.path.dirname(__file__)
-
-def rebuildmo():
- lang_glob = 'imdbpy-*.po'
- created = []
- for input_file in glob.glob(lang_glob):
- lang = input_file[7:-3]
- if not os.path.exists(lang):
- os.mkdir(lang)
- mo_dir = os.path.join(lang, 'LC_MESSAGES')
- if not os.path.exists(mo_dir):
- os.mkdir(mo_dir)
- output_file = os.path.join(mo_dir, 'imdbpy.mo')
- msgfmt.make(input_file, output_file)
- created.append(lang)
- return created
-
-
-if __name__ == '__main__':
- languages = rebuildmo()
- print 'Created locale for: %s.' % ' '.join(languages)
-
diff --git a/libs/imdb/parser/__init__.py b/libs/imdb/parser/__init__.py
deleted file mode 100644
index 4c3c90a8..00000000
--- a/libs/imdb/parser/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""
-parser package (imdb package).
-
-This package provides various parsers to access IMDb data (e.g.: a
-parser for the web/http interface, a parser for the SQL database
-interface, etc.).
-So far, the http/httpThin, mobile and sql parsers are implemented.
-
-Copyright 2004-2009 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-__all__ = ['http', 'mobile', 'sql']
-
-
diff --git a/libs/imdb/parser/http/__init__.py b/libs/imdb/parser/http/__init__.py
deleted file mode 100644
index e05d9afa..00000000
--- a/libs/imdb/parser/http/__init__.py
+++ /dev/null
@@ -1,775 +0,0 @@
-"""
-parser.http package (imdb package).
-
-This package provides the IMDbHTTPAccessSystem class used to access
-IMDb's data through the web interface.
-the imdb.IMDb function will return an instance of this class when
-called with the 'accessSystem' argument set to "http" or "web"
-or "html" (this is the default).
-
-Copyright 2004-2010 Davide Alberani
- 2008 H. Turgut Uyar
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-import sys
-import logging
-from urllib import FancyURLopener, quote_plus
-from codecs import lookup
-
-from imdb import IMDbBase, imdbURL_movie_main, imdbURL_person_main, \
- imdbURL_character_main, imdbURL_company_main, \
- imdbURL_keyword_main, imdbURL_find, imdbURL_top250, \
- imdbURL_bottom100
-from imdb.utils import analyze_title
-from imdb._exceptions import IMDbDataAccessError, IMDbParserError
-
-import searchMovieParser
-import searchPersonParser
-import searchCharacterParser
-import searchCompanyParser
-import searchKeywordParser
-import movieParser
-import personParser
-import characterParser
-import companyParser
-import topBottomParser
-
-# Logger for miscellaneous functions.
-_aux_logger = logging.getLogger('imdbpy.parser.http.aux')
-
-IN_GAE = False
-try:
- import google.appengine
- IN_GAE = True
- _aux_logger.info('IMDbPY is running in the Google App Engine environment')
-except ImportError:
- pass
-
-
-class _ModuleProxy:
- """A proxy to instantiate and access parsers."""
- def __init__(self, module, defaultKeys=None, oldParsers=False,
- useModule=None, fallBackToNew=False):
- """Initialize a proxy for the given module; defaultKeys, if set,
- muste be a dictionary of values to set for instanced objects."""
- if oldParsers or fallBackToNew:
- _aux_logger.warn('The old set of parsers was removed; falling ' \
- 'back to the new parsers.')
- self.useModule = useModule
- if defaultKeys is None:
- defaultKeys = {}
- self._defaultKeys = defaultKeys
- self._module = module
-
- def __getattr__(self, name):
- """Called only when no look-up is found."""
- _sm = self._module
- # Read the _OBJECTS dictionary to build the asked parser.
- if name in _sm._OBJECTS:
- _entry = _sm._OBJECTS[name]
- # Initialize the parser.
- kwds = {}
- if self.useModule:
- kwds = {'useModule': self.useModule}
- parserClass = _entry[0][0]
- obj = parserClass(**kwds)
- attrsToSet = self._defaultKeys.copy()
- attrsToSet.update(_entry[1] or {})
- # Set attribute to the object.
- for key in attrsToSet:
- setattr(obj, key, attrsToSet[key])
- setattr(self, name, obj)
- return obj
- return getattr(_sm, name)
-
-
-PY_VERSION = sys.version_info[:2]
-
-
-# The cookies for the "adult" search.
-# Please don't mess with these account.
-# Old 'IMDbPY' account.
-_old_cookie_id = 'boM2bYxz9MCsOnH9gZ0S9QHs12NWrNdApxsls1Vb5/NGrNdjcHx3dUas10UASoAjVEvhAbGagERgOpNkAPvxdbfKwaV2ikEj9SzXY1WPxABmDKQwdqzwRbM+12NSeJFGUEx3F8as10WwidLzVshDtxaPIbP13NdjVS9UZTYqgTVGrNcT9vyXU1'
-_old_cookie_uu = '3M3AXsquTU5Gur/Svik+ewflPm5Rk2ieY3BIPlLjyK3C0Dp9F8UoPgbTyKiGtZp4x1X+uAUGKD7BM2g+dVd8eqEzDErCoYvdcvGLvVLAen1y08hNQtALjVKAe+1hM8g9QbNonlG1/t4S82ieUsBbrSIQbq1yhV6tZ6ArvSbA7rgHc8n5AdReyAmDaJ5Wm/ee3VDoCnGj/LlBs2ieUZNorhHDKK5Q=='
-# New 'IMDbPYweb' account.
-_cookie_id = 'rH1jNAkjTlNXvHolvBVBsgaPICNZbNdjVjzFwzas9JRmusdjVoqBs/Hs12NR+1WFxEoR9bGKEDUg6sNlADqXwkas12N131Rwdb+UQNGKN8PWrNdjcdqBQVLq8mbGDHP3hqzxhbD692NQi9D0JjpBtRaPIbP1zNdjUOqENQYv1ADWrNcT9vyXU1'
-_cookie_uu = 'su4/m8cho4c6HP+W1qgq6wchOmhnF0w+lIWvHjRUPJ6nRA9sccEafjGADJ6hQGrMd4GKqLcz2X4z5+w+M4OIKnRn7FpENH7dxDQu3bQEHyx0ZEyeRFTPHfQEX03XF+yeN1dsPpcXaqjUZAw+lGRfXRQEfz3RIX9IgVEffdBAHw2wQXyf9xdMPrQELw0QNB8dsffsqcdQemjPB0w+moLcPh0JrKrHJ9hjBzdMPpcXTH7XRwwOk='
-
-# imdbpy2010 account.
-#_cookie_id = 'QrCdxVi+L+WgqOLrQJJgBgRRXGInphxiBPU/YXSFDyExMFzCp6YcYgSVXyEUhS/xMID8wqemHGID4DlntwZ49vemP5UXsAxiJ4D6goSmHGIgNT9hMXBaRSF2vMS3phxB0bVfQiQlP1RxdrzhB6YcRHFASyIhQVowwXCKtDSlD2YhgRvxBsCKtGemHBKH9mxSI='
-#_cookie_uu = 'oiEo2yoJFCA2Zbn/o7Z1LAPIwotAu6QdALv3foDb1x5F/tdrFY63XkSfty4kntS8Y8jkHSDLt3406+d+JThEilPI0mtTaOQdA/t2/iErp22jaLdeVU5ya4PIREpj7HFdpzhEHadcIAngSER50IoHDpD6Bz4Qy3b+UIhE/hBbhz5Q63ceA2hEvhPo5B0FnrL9Q8jkWjDIbA0Au3d+AOtnXoCIRL4Q28c+UOtnXpP4RL4T6OQdA+6ijUCI5B0AW2d+UOtnXpPYRL4T6OQdA8jkTUOYlC0A=='
-
-
-class _FakeURLOpener(object):
- """Fake URLOpener object, used to return empty strings instead of
- errors.
- """
- def __init__(self, url, headers):
- self.url = url
- self.headers = headers
- def read(self, *args, **kwds): return ''
- def close(self, *args, **kwds): pass
- def info(self, *args, **kwds): return self.headers
-
-
-class IMDbURLopener(FancyURLopener):
- """Fetch web pages and handle errors."""
- _logger = logging.getLogger('imdbpy.parser.http.urlopener')
-
- def __init__(self, *args, **kwargs):
- self._last_url = u''
- FancyURLopener.__init__(self, *args, **kwargs)
- # Headers to add to every request.
- # XXX: IMDb's web server doesn't like urllib-based programs,
- # so lets fake to be Mozilla.
- # Wow! I'm shocked by my total lack of ethic!
- for header in ('User-Agent', 'User-agent', 'user-agent'):
- self.del_header(header)
- self.set_header('User-Agent', 'Mozilla/5.0')
- # XXX: This class is used also to perform "Exact Primary
- # [Title|Name]" searches, and so by default the cookie is set.
- c_header = 'id=%s; uu=%s' % (_cookie_id, _cookie_uu)
- self.set_header('Cookie', c_header)
-
- def get_proxy(self):
- """Return the used proxy, or an empty string."""
- return self.proxies.get('http', '')
-
- def set_proxy(self, proxy):
- """Set the proxy."""
- if not proxy:
- if self.proxies.has_key('http'):
- del self.proxies['http']
- else:
- if not proxy.lower().startswith('http://'):
- proxy = 'http://%s' % proxy
- self.proxies['http'] = proxy
-
- def set_header(self, header, value, _overwrite=True):
- """Set a default header."""
- if _overwrite:
- self.del_header(header)
- self.addheaders.append((header, value))
-
- def del_header(self, header):
- """Remove a default header."""
- for index in xrange(len(self.addheaders)):
- if self.addheaders[index][0] == header:
- del self.addheaders[index]
- break
-
- def retrieve_unicode(self, url, size=-1):
- """Retrieves the given URL, and returns a unicode string,
- trying to guess the encoding of the data (assuming latin_1
- by default)"""
- encode = None
- try:
- if size != -1:
- self.set_header('Range', 'bytes=0-%d' % size)
- uopener = self.open(url)
- kwds = {}
- if PY_VERSION > (2, 3) and not IN_GAE:
- kwds['size'] = size
- content = uopener.read(**kwds)
- self._last_url = uopener.url
- # Maybe the server is so nice to tell us the charset...
- server_encode = uopener.info().getparam('charset')
- # Otherwise, look at the content-type HTML meta tag.
- if server_encode is None and content:
- first_bytes = content[:512]
- begin_h = first_bytes.find('text/html; charset=')
- if begin_h != -1:
- end_h = first_bytes[19+begin_h:].find('"')
- if end_h != -1:
- server_encode = first_bytes[19+begin_h:19+begin_h+end_h]
- if server_encode:
- try:
- if lookup(server_encode):
- encode = server_encode
- except (LookupError, ValueError, TypeError):
- pass
- uopener.close()
- if size != -1:
- self.del_header('Range')
- self.close()
- except IOError, e:
- if size != -1:
- # Ensure that the Range header is removed.
- self.del_header('Range')
- raise IMDbDataAccessError, {'errcode': e.errno,
- 'errmsg': str(e.strerror),
- 'url': url,
- 'proxy': self.get_proxy(),
- 'exception type': 'IOError',
- 'original exception': e}
- if encode is None:
- encode = 'latin_1'
- # The detection of the encoding is error prone...
- self._logger.warn('Unable to detect the encoding of the retrieved '
- 'page [%s]; falling back to default latin1.', encode)
- ##print unicode(content, encode, 'replace').encode('utf8')
- return unicode(content, encode, 'replace')
-
- def http_error_default(self, url, fp, errcode, errmsg, headers):
- if errcode == 404:
- self._logger.warn('404 code returned for %s: %s (headers: %s)',
- url, errmsg, headers)
- return _FakeURLOpener(url, headers)
- raise IMDbDataAccessError, {'url': 'http:%s' % url,
- 'errcode': errcode,
- 'errmsg': errmsg,
- 'headers': headers,
- 'error type': 'http_error_default',
- 'proxy': self.get_proxy()}
-
- def open_unknown(self, fullurl, data=None):
- raise IMDbDataAccessError, {'fullurl': fullurl,
- 'data': str(data),
- 'error type': 'open_unknown',
- 'proxy': self.get_proxy()}
-
- def open_unknown_proxy(self, proxy, fullurl, data=None):
- raise IMDbDataAccessError, {'proxy': str(proxy),
- 'fullurl': fullurl,
- 'error type': 'open_unknown_proxy',
- 'data': str(data)}
-
-
-class IMDbHTTPAccessSystem(IMDbBase):
- """The class used to access IMDb's data through the web."""
-
- accessSystem = 'http'
- _http_logger = logging.getLogger('imdbpy.parser.http')
-
- def __init__(self, isThin=0, adultSearch=1, proxy=-1, oldParsers=False,
- fallBackToNew=False, useModule=None, cookie_id=-1,
- cookie_uu=None, *arguments, **keywords):
- """Initialize the access system."""
- IMDbBase.__init__(self, *arguments, **keywords)
- self.urlOpener = IMDbURLopener()
- # When isThin is set, we're parsing the "maindetails" page
- # of a movie (instead of the "combined" page) and movie/person
- # references are not collected if no defaultModFunct is provided.
- self.isThin = isThin
- self._getRefs = True
- self._mdparse = False
- if isThin:
- if self.accessSystem == 'http':
- self.accessSystem = 'httpThin'
- self._mdparse = True
- if self._defModFunct is None:
- self._getRefs = False
- from imdb.utils import modNull
- self._defModFunct = modNull
- self.do_adult_search(adultSearch)
- if cookie_id != -1:
- if cookie_id is None:
- self.del_cookies()
- elif cookie_uu is not None:
- self.set_cookies(cookie_id, cookie_uu)
- if proxy != -1:
- self.set_proxy(proxy)
- if useModule is not None:
- if not isinstance(useModule, (list, tuple)) and ',' in useModule:
- useModule = useModule.split(',')
- _def = {'_modFunct': self._defModFunct, '_as': self.accessSystem}
- # Proxy objects.
- self.smProxy = _ModuleProxy(searchMovieParser, defaultKeys=_def,
- oldParsers=oldParsers, useModule=useModule,
- fallBackToNew=fallBackToNew)
- self.spProxy = _ModuleProxy(searchPersonParser, defaultKeys=_def,
- oldParsers=oldParsers, useModule=useModule,
- fallBackToNew=fallBackToNew)
- self.scProxy = _ModuleProxy(searchCharacterParser, defaultKeys=_def,
- oldParsers=oldParsers, useModule=useModule,
- fallBackToNew=fallBackToNew)
- self.scompProxy = _ModuleProxy(searchCompanyParser, defaultKeys=_def,
- oldParsers=oldParsers, useModule=useModule,
- fallBackToNew=fallBackToNew)
- self.skProxy = _ModuleProxy(searchKeywordParser, defaultKeys=_def,
- oldParsers=oldParsers, useModule=useModule,
- fallBackToNew=fallBackToNew)
- self.mProxy = _ModuleProxy(movieParser, defaultKeys=_def,
- oldParsers=oldParsers, useModule=useModule,
- fallBackToNew=fallBackToNew)
- self.pProxy = _ModuleProxy(personParser, defaultKeys=_def,
- oldParsers=oldParsers, useModule=useModule,
- fallBackToNew=fallBackToNew)
- self.cProxy = _ModuleProxy(characterParser, defaultKeys=_def,
- oldParsers=oldParsers, useModule=useModule,
- fallBackToNew=fallBackToNew)
- self.compProxy = _ModuleProxy(companyParser, defaultKeys=_def,
- oldParsers=oldParsers, useModule=useModule,
- fallBackToNew=fallBackToNew)
- self.topBottomProxy = _ModuleProxy(topBottomParser, defaultKeys=_def,
- oldParsers=oldParsers, useModule=useModule,
- fallBackToNew=fallBackToNew)
-
- def _normalize_movieID(self, movieID):
- """Normalize the given movieID."""
- try:
- return '%07d' % int(movieID)
- except ValueError, e:
- raise IMDbParserError, 'invalid movieID "%s": %s' % (movieID, e)
-
- def _normalize_personID(self, personID):
- """Normalize the given personID."""
- try:
- return '%07d' % int(personID)
- except ValueError, e:
- raise IMDbParserError, 'invalid personID "%s": %s' % (personID, e)
-
- def _normalize_characterID(self, characterID):
- """Normalize the given characterID."""
- try:
- return '%07d' % int(characterID)
- except ValueError, e:
- raise IMDbParserError, 'invalid characterID "%s": %s' % \
- (characterID, e)
-
- def _normalize_companyID(self, companyID):
- """Normalize the given companyID."""
- try:
- return '%07d' % int(companyID)
- except ValueError, e:
- raise IMDbParserError, 'invalid companyID "%s": %s' % \
- (companyID, e)
-
- def get_imdbMovieID(self, movieID):
- """Translate a movieID in an imdbID; in this implementation
- the movieID _is_ the imdbID.
- """
- return movieID
-
- def get_imdbPersonID(self, personID):
- """Translate a personID in an imdbID; in this implementation
- the personID _is_ the imdbID.
- """
- return personID
-
- def get_imdbCharacterID(self, characterID):
- """Translate a characterID in an imdbID; in this implementation
- the characterID _is_ the imdbID.
- """
- return characterID
-
- def get_imdbCompanyID(self, companyID):
- """Translate a companyID in an imdbID; in this implementation
- the companyID _is_ the imdbID.
- """
- return companyID
-
- def get_proxy(self):
- """Return the used proxy or an empty string."""
- return self.urlOpener.get_proxy()
-
- def set_proxy(self, proxy):
- """Set the web proxy to use.
-
- It should be a string like 'http://localhost:8080/'; if the
- string is empty, no proxy will be used.
- If set, the value of the environment variable HTTP_PROXY is
- automatically used.
- """
- self.urlOpener.set_proxy(proxy)
-
- def set_cookies(self, cookie_id, cookie_uu):
- """Set a cookie to access an IMDb's account."""
- c_header = 'id=%s; uu=%s' % (cookie_id, cookie_uu)
- self.urlOpener.set_header('Cookie', c_header)
-
- def del_cookies(self):
- """Remove the used cookie."""
- self.urlOpener.del_header('Cookie')
-
- def do_adult_search(self, doAdult,
- cookie_id=_cookie_id, cookie_uu=_cookie_uu):
- """If doAdult is true, 'adult' movies are included in the
- search results; cookie_id and cookie_uu are optional
- parameters to select a specific account (see your cookie
- or cookies.txt file."""
- if doAdult:
- self.set_cookies(cookie_id, cookie_uu)
- #c_header = 'id=%s; uu=%s' % (cookie_id, cookie_uu)
- #self.urlOpener.set_header('Cookie', c_header)
- else:
- self.urlOpener.del_header('Cookie')
-
- def _retrieve(self, url, size=-1):
- """Retrieve the given URL."""
- ##print url
- self._http_logger.debug('fetching url %s (size: %d)', url, size)
- return self.urlOpener.retrieve_unicode(url, size=size)
-
- def _get_search_content(self, kind, ton, results):
- """Retrieve the web page for a given search.
- kind can be 'tt' (for titles), 'nm' (for names),
- 'char' (for characters) or 'co' (for companies).
- ton is the title or the name to search.
- results is the maximum number of results to be retrieved."""
- if isinstance(ton, unicode):
- ton = ton.encode('utf-8')
- ##params = 'q=%s&%s=on&mx=%s' % (quote_plus(ton), kind, str(results))
- params = 's=%s;mx=%s;q=%s' % (kind, str(results), quote_plus(ton))
- if kind == 'ep':
- params = params.replace('s=ep;', 's=tt;ttype=ep;', 1)
- cont = self._retrieve(imdbURL_find % params)
- #print 'URL:', imdbURL_find % params
- if cont.find('Your search returned more than') == -1 or \
- cont.find("displayed the exact matches") == -1:
- return cont
- # The retrieved page contains no results, because too many
- # titles or names contain the string we're looking for.
- params = 's=%s;q=%s;lm=0' % (kind, quote_plus(ton))
- size = 22528 + results * 512
- return self._retrieve(imdbURL_find % params, size=size)
-
- def _search_movie(self, title, results):
- # The URL of the query.
- # XXX: To retrieve the complete results list:
- # params = urllib.urlencode({'more': 'tt', 'q': title})
- ##params = urllib.urlencode({'tt': 'on','mx': str(results),'q': title})
- ##params = 'q=%s&tt=on&mx=%s' % (quote_plus(title), str(results))
- ##cont = self._retrieve(imdbURL_find % params)
- cont = self._get_search_content('tt', title, results)
- return self.smProxy.search_movie_parser.parse(cont, results=results)['data']
-
- def _search_episode(self, title, results):
- t_dict = analyze_title(title)
- if t_dict['kind'] == 'episode':
- title = t_dict['title']
- cont = self._get_search_content('ep', title, results)
- return self.smProxy.search_movie_parser.parse(cont, results=results)['data']
-
- def get_movie_main(self, movieID):
- if not self.isThin:
- cont = self._retrieve(imdbURL_movie_main % movieID + 'combined')
- else:
- cont = self._retrieve(imdbURL_movie_main % movieID + 'maindetails')
- return self.mProxy.movie_parser.parse(cont, mdparse=self._mdparse)
-
- def get_movie_full_credits(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'fullcredits')
- return self.mProxy.movie_parser.parse(cont)
-
- def get_movie_plot(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'plotsummary')
- return self.mProxy.plot_parser.parse(cont, getRefs=self._getRefs)
-
- def get_movie_awards(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'awards')
- return self.mProxy.movie_awards_parser.parse(cont)
-
- def get_movie_taglines(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'taglines')
- return self.mProxy.taglines_parser.parse(cont)
-
- def get_movie_keywords(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'keywords')
- return self.mProxy.keywords_parser.parse(cont)
-
- def get_movie_alternate_versions(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'alternateversions')
- return self.mProxy.alternateversions_parser.parse(cont,
- getRefs=self._getRefs)
-
- def get_movie_crazy_credits(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'crazycredits')
- return self.mProxy.crazycredits_parser.parse(cont,
- getRefs=self._getRefs)
-
- def get_movie_goofs(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'goofs')
- return self.mProxy.goofs_parser.parse(cont, getRefs=self._getRefs)
-
- def get_movie_quotes(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'quotes')
- return self.mProxy.quotes_parser.parse(cont, getRefs=self._getRefs)
-
- def get_movie_release_dates(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'releaseinfo')
- ret = self.mProxy.releasedates_parser.parse(cont)
- ret['info sets'] = ('release dates', 'akas')
- return ret
- get_movie_akas = get_movie_release_dates
-
- def get_movie_vote_details(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'ratings')
- return self.mProxy.ratings_parser.parse(cont)
-
- def get_movie_official_sites(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'officialsites')
- return self.mProxy.officialsites_parser.parse(cont)
-
- def get_movie_trivia(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'trivia')
- return self.mProxy.trivia_parser.parse(cont, getRefs=self._getRefs)
-
- def get_movie_connections(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'movieconnections')
- return self.mProxy.connections_parser.parse(cont)
-
- def get_movie_technical(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'technical')
- return self.mProxy.tech_parser.parse(cont)
-
- def get_movie_business(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'business')
- return self.mProxy.business_parser.parse(cont, getRefs=self._getRefs)
-
- def get_movie_literature(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'literature')
- return self.mProxy.literature_parser.parse(cont)
-
- def get_movie_locations(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'locations')
- return self.mProxy.locations_parser.parse(cont)
-
- def get_movie_soundtrack(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'soundtrack')
- return self.mProxy.soundtrack_parser.parse(cont)
-
- def get_movie_dvd(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'dvd')
- return self.mProxy.dvd_parser.parse(cont, getRefs=self._getRefs)
-
- def get_movie_recommendations(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'recommendations')
- return self.mProxy.rec_parser.parse(cont)
-
- def get_movie_external_reviews(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'externalreviews')
- return self.mProxy.externalrev_parser.parse(cont)
-
- def get_movie_newsgroup_reviews(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'newsgroupreviews')
- return self.mProxy.newsgrouprev_parser.parse(cont)
-
- def get_movie_misc_sites(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'miscsites')
- return self.mProxy.misclinks_parser.parse(cont)
-
- def get_movie_sound_clips(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'soundsites')
- return self.mProxy.soundclips_parser.parse(cont)
-
- def get_movie_video_clips(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'videosites')
- return self.mProxy.videoclips_parser.parse(cont)
-
- def get_movie_photo_sites(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'photosites')
- return self.mProxy.photosites_parser.parse(cont)
-
- def get_movie_news(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'news')
- return self.mProxy.news_parser.parse(cont, getRefs=self._getRefs)
-
- def get_movie_amazon_reviews(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'amazon')
- return self.mProxy.amazonrev_parser.parse(cont)
-
- def get_movie_guests(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'epcast')
- return self.mProxy.episodes_cast_parser.parse(cont)
- get_movie_episodes_cast = get_movie_guests
-
- def get_movie_merchandising_links(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'sales')
- return self.mProxy.sales_parser.parse(cont)
-
- def get_movie_episodes(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'episodes')
- data_d = self.mProxy.episodes_parser.parse(cont)
- # set movie['episode of'].movieID for every episode of the series.
- if data_d.get('data', {}).has_key('episodes'):
- nr_eps = 0
- for season in data_d['data']['episodes'].values():
- for episode in season.values():
- episode['episode of'].movieID = movieID
- nr_eps += 1
- # Number of episodes.
- if nr_eps:
- data_d['data']['number of episodes'] = nr_eps
- return data_d
-
- def get_movie_episodes_rating(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'epdate')
- data_d = self.mProxy.eprating_parser.parse(cont)
- # set movie['episode of'].movieID for every episode.
- if data_d.get('data', {}).has_key('episodes rating'):
- for item in data_d['data']['episodes rating']:
- episode = item['episode']
- episode['episode of'].movieID = movieID
- return data_d
-
- def get_movie_faqs(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'faq')
- return self.mProxy.movie_faqs_parser.parse(cont, getRefs=self._getRefs)
-
- def get_movie_airing(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'tvschedule')
- return self.mProxy.airing_parser.parse(cont)
-
- get_movie_tv_schedule = get_movie_airing
-
- def get_movie_synopsis(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'synopsis')
- return self.mProxy.synopsis_parser.parse(cont)
-
- def get_movie_parents_guide(self, movieID):
- cont = self._retrieve(imdbURL_movie_main % movieID + 'parentalguide')
- return self.mProxy.parentsguide_parser.parse(cont)
-
- def _search_person(self, name, results):
- # The URL of the query.
- # XXX: To retrieve the complete results list:
- # params = urllib.urlencode({'more': 'nm', 'q': name})
- ##params = urllib.urlencode({'nm': 'on', 'mx': str(results), 'q': name})
- #params = 'q=%s&nm=on&mx=%s' % (quote_plus(name), str(results))
- #cont = self._retrieve(imdbURL_find % params)
- cont = self._get_search_content('nm', name, results)
- return self.spProxy.search_person_parser.parse(cont, results=results)['data']
-
- def get_person_main(self, personID):
- cont = self._retrieve(imdbURL_person_main % personID + 'maindetails')
- ret = self.pProxy.maindetails_parser.parse(cont)
- ret['info sets'] = ('main', 'filmography')
- return ret
-
- def get_person_filmography(self, personID):
- return self.get_person_main(personID)
-
- def get_person_biography(self, personID):
- cont = self._retrieve(imdbURL_person_main % personID + 'bio')
- return self.pProxy.bio_parser.parse(cont, getRefs=self._getRefs)
-
- def get_person_awards(self, personID):
- cont = self._retrieve(imdbURL_person_main % personID + 'awards')
- return self.pProxy.person_awards_parser.parse(cont)
-
- def get_person_other_works(self, personID):
- cont = self._retrieve(imdbURL_person_main % personID + 'otherworks')
- return self.pProxy.otherworks_parser.parse(cont, getRefs=self._getRefs)
-
- #def get_person_agent(self, personID):
- # cont = self._retrieve(imdbURL_person_main % personID + 'agent')
- # return self.pProxy.agent_parser.parse(cont)
-
- def get_person_publicity(self, personID):
- cont = self._retrieve(imdbURL_person_main % personID + 'publicity')
- return self.pProxy.publicity_parser.parse(cont)
-
- def get_person_official_sites(self, personID):
- cont = self._retrieve(imdbURL_person_main % personID + 'officialsites')
- return self.pProxy.person_officialsites_parser.parse(cont)
-
- def get_person_news(self, personID):
- cont = self._retrieve(imdbURL_person_main % personID + 'news')
- return self.pProxy.news_parser.parse(cont)
-
- def get_person_episodes(self, personID):
- cont = self._retrieve(imdbURL_person_main % personID + 'filmoseries')
- return self.pProxy.person_series_parser.parse(cont)
-
- def get_person_merchandising_links(self, personID):
- cont = self._retrieve(imdbURL_person_main % personID + 'forsale')
- return self.pProxy.sales_parser.parse(cont)
-
- def get_person_genres_links(self, personID):
- cont = self._retrieve(imdbURL_person_main % personID + 'filmogenre')
- return self.pProxy.person_genres_parser.parse(cont)
-
- def get_person_keywords_links(self, personID):
- cont = self._retrieve(imdbURL_person_main % personID + 'filmokey')
- return self.pProxy.person_keywords_parser.parse(cont)
-
- def _search_character(self, name, results):
- cont = self._get_search_content('char', name, results)
- return self.scProxy.search_character_parser.parse(cont, results=results)['data']
-
- def get_character_main(self, characterID):
- cont = self._retrieve(imdbURL_character_main % characterID)
- ret = self.cProxy.character_main_parser.parse(cont)
- ret['info sets'] = ('main', 'filmography')
- return ret
-
- get_character_filmography = get_character_main
-
- def get_character_biography(self, characterID):
- cont = self._retrieve(imdbURL_character_main % characterID + 'bio')
- return self.cProxy.character_bio_parser.parse(cont,
- getRefs=self._getRefs)
-
- def get_character_episodes(self, characterID):
- cont = self._retrieve(imdbURL_character_main % characterID +
- 'filmoseries')
- return self.cProxy.character_series_parser.parse(cont)
-
- def get_character_quotes(self, characterID):
- cont = self._retrieve(imdbURL_character_main % characterID + 'quotes')
- return self.cProxy.character_quotes_parser.parse(cont,
- getRefs=self._getRefs)
-
- def _search_company(self, name, results):
- cont = self._get_search_content('co', name, results)
- url = self.urlOpener._last_url
- return self.scompProxy.search_company_parser.parse(cont, url=url,
- results=results)['data']
-
- def get_company_main(self, companyID):
- cont = self._retrieve(imdbURL_company_main % companyID)
- ret = self.compProxy.company_main_parser.parse(cont)
- return ret
-
- def _search_keyword(self, keyword, results):
- # XXX: the IMDb web server seems to have some serious problem with
- # non-ascii keyword.
- # E.g.: http://akas.imdb.com/keyword/fianc%E9/
- # will return a 500 Internal Server Error: Redirect Recursion.
- keyword = keyword.encode('utf8', 'ignore')
- try:
- cont = self._get_search_content('kw', keyword, results)
- except IMDbDataAccessError:
- self._http_logger.warn('unable to search for keyword %s', keyword,
- exc_info=True)
- return []
- return self.skProxy.search_keyword_parser.parse(cont, results=results)['data']
-
- def _get_keyword(self, keyword, results):
- keyword = keyword.encode('utf8', 'ignore')
- try:
- cont = self._retrieve(imdbURL_keyword_main % keyword)
- except IMDbDataAccessError:
- self._http_logger.warn('unable to get keyword %s', keyword,
- exc_info=True)
- return []
- return self.skProxy.search_moviekeyword_parser.parse(cont, results=results)['data']
-
- def _get_top_bottom_movies(self, kind):
- if kind == 'top':
- parser = self.topBottomProxy.top250_parser
- url = imdbURL_top250
- elif kind == 'bottom':
- parser = self.topBottomProxy.bottom100_parser
- url = imdbURL_bottom100
- else:
- return []
- cont = self._retrieve(url)
- return parser.parse(cont)['data']
-
-
diff --git a/libs/imdb/parser/http/bsouplxml/__init__.py b/libs/imdb/parser/http/bsouplxml/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/libs/imdb/parser/http/bsouplxml/_bsoup.py b/libs/imdb/parser/http/bsouplxml/_bsoup.py
deleted file mode 100644
index afab5da9..00000000
--- a/libs/imdb/parser/http/bsouplxml/_bsoup.py
+++ /dev/null
@@ -1,1970 +0,0 @@
-"""
-imdb.parser.http._bsoup module (imdb.parser.http package).
-This is the BeautifulSoup.py module, not modified; it's included here
-so that it's not an external dependency.
-
-Beautiful Soup
-Elixir and Tonic
-"The Screen-Scraper's Friend"
-http://www.crummy.com/software/BeautifulSoup/
-
-Beautiful Soup parses a (possibly invalid) XML or HTML document into a
-tree representation. It provides methods and Pythonic idioms that make
-it easy to navigate, search, and modify the tree.
-
-A well-formed XML/HTML document yields a well-formed data
-structure. An ill-formed XML/HTML document yields a correspondingly
-ill-formed data structure. If your document is only locally
-well-formed, you can use this library to find and process the
-well-formed part of it.
-
-Beautiful Soup works with Python 2.2 and up. It has no external
-dependencies, but you'll have more success at converting data to UTF-8
-if you also install these three packages:
-
-* chardet, for auto-detecting character encodings
- http://chardet.feedparser.org/
-* cjkcodecs and iconv_codec, which add more encodings to the ones supported
- by stock Python.
- http://cjkpython.i18n.org/
-
-Beautiful Soup defines classes for two main parsing strategies:
-
- * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific
- language that kind of looks like XML.
-
- * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
- or invalid. This class has web browser-like heuristics for
- obtaining a sensible parse tree in the face of common HTML errors.
-
-Beautiful Soup also defines a class (UnicodeDammit) for autodetecting
-the encoding of an HTML or XML document, and converting it to
-Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser.
-
-For more than you ever wanted to know about Beautiful Soup, see the
-documentation:
-http://www.crummy.com/software/BeautifulSoup/documentation.html
-
-Here, have some legalese:
-
-Copyright (c) 2004-2008, Leonard Richardson
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following
- disclaimer in the documentation and/or other materials provided
- with the distribution.
-
- * Neither the name of the the Beautiful Soup Consortium and All
- Night Kosher Bakery nor the names of its contributors may be
- used to endorse or promote products derived from this software
- without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
-
-"""
-from __future__ import generators
-
-__author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "3.0.7a"
-__copyright__ = "Copyright (c) 2004-2008 Leonard Richardson"
-__license__ = "New-style BSD"
-
-from sgmllib import SGMLParser, SGMLParseError
-import codecs
-import markupbase
-import types
-import re
-import sgmllib
-try:
- from htmlentitydefs import name2codepoint
-except ImportError:
- name2codepoint = {}
-try:
- set
-except NameError:
- from sets import Set as set
-
-#These hacks make Beautiful Soup able to parse XML with namespaces
-sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
-markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match
-
-DEFAULT_OUTPUT_ENCODING = "utf-8"
-
-# First, the classes that represent markup elements.
-
-class PageElement:
- """Contains the navigational information for some part of the page
- (either a tag or a piece of text)"""
-
- def setup(self, parent=None, previous=None):
- """Sets up the initial relations between this element and
- other elements."""
- self.parent = parent
- self.previous = previous
- self.next = None
- self.previousSibling = None
- self.nextSibling = None
- if self.parent and self.parent.contents:
- self.previousSibling = self.parent.contents[-1]
- self.previousSibling.nextSibling = self
-
- def replaceWith(self, replaceWith):
- oldParent = self.parent
- myIndex = self.parent.contents.index(self)
- if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent:
- # We're replacing this element with one of its siblings.
- index = self.parent.contents.index(replaceWith)
- if index and index < myIndex:
- # Furthermore, it comes before this element. That
- # means that when we extract it, the index of this
- # element will change.
- myIndex = myIndex - 1
- self.extract()
- oldParent.insert(myIndex, replaceWith)
-
- def extract(self):
- """Destructively rips this element out of the tree."""
- if self.parent:
- try:
- self.parent.contents.remove(self)
- except ValueError:
- pass
-
- #Find the two elements that would be next to each other if
- #this element (and any children) hadn't been parsed. Connect
- #the two.
- lastChild = self._lastRecursiveChild()
- nextElement = lastChild.next
-
- if self.previous:
- self.previous.next = nextElement
- if nextElement:
- nextElement.previous = self.previous
- self.previous = None
- lastChild.next = None
-
- self.parent = None
- if self.previousSibling:
- self.previousSibling.nextSibling = self.nextSibling
- if self.nextSibling:
- self.nextSibling.previousSibling = self.previousSibling
- self.previousSibling = self.nextSibling = None
- return self
-
- def _lastRecursiveChild(self):
- "Finds the last element beneath this object to be parsed."
- lastChild = self
- while hasattr(lastChild, 'contents') and lastChild.contents:
- lastChild = lastChild.contents[-1]
- return lastChild
-
- def insert(self, position, newChild):
- if (isinstance(newChild, basestring)
- or isinstance(newChild, unicode)) \
- and not isinstance(newChild, NavigableString):
- newChild = NavigableString(newChild)
-
- position = min(position, len(self.contents))
- if hasattr(newChild, 'parent') and newChild.parent != None:
- # We're 'inserting' an element that's already one
- # of this object's children.
- if newChild.parent == self:
- index = self.find(newChild)
- if index and index < position:
- # Furthermore we're moving it further down the
- # list of this object's children. That means that
- # when we extract this element, our target index
- # will jump down one.
- position = position - 1
- newChild.extract()
-
- newChild.parent = self
- previousChild = None
- if position == 0:
- newChild.previousSibling = None
- newChild.previous = self
- else:
- previousChild = self.contents[position-1]
- newChild.previousSibling = previousChild
- newChild.previousSibling.nextSibling = newChild
- newChild.previous = previousChild._lastRecursiveChild()
- if newChild.previous:
- newChild.previous.next = newChild
-
- newChildsLastElement = newChild._lastRecursiveChild()
-
- if position >= len(self.contents):
- newChild.nextSibling = None
-
- parent = self
- parentsNextSibling = None
- while not parentsNextSibling:
- parentsNextSibling = parent.nextSibling
- parent = parent.parent
- if not parent: # This is the last element in the document.
- break
- if parentsNextSibling:
- newChildsLastElement.next = parentsNextSibling
- else:
- newChildsLastElement.next = None
- else:
- nextChild = self.contents[position]
- newChild.nextSibling = nextChild
- if newChild.nextSibling:
- newChild.nextSibling.previousSibling = newChild
- newChildsLastElement.next = nextChild
-
- if newChildsLastElement.next:
- newChildsLastElement.next.previous = newChildsLastElement
- self.contents.insert(position, newChild)
-
- def append(self, tag):
- """Appends the given tag to the contents of this tag."""
- self.insert(len(self.contents), tag)
-
- def findNext(self, name=None, attrs={}, text=None, **kwargs):
- """Returns the first item that matches the given criteria and
- appears after this Tag in the document."""
- return self._findOne(self.findAllNext, name, attrs, text, **kwargs)
-
- def findAllNext(self, name=None, attrs={}, text=None, limit=None,
- **kwargs):
- """Returns all items that match the given criteria and appear
- after this Tag in the document."""
- return self._findAll(name, attrs, text, limit, self.nextGenerator,
- **kwargs)
-
- def findNextSibling(self, name=None, attrs={}, text=None, **kwargs):
- """Returns the closest sibling to this Tag that matches the
- given criteria and appears after this Tag in the document."""
- return self._findOne(self.findNextSiblings, name, attrs, text,
- **kwargs)
-
- def findNextSiblings(self, name=None, attrs={}, text=None, limit=None,
- **kwargs):
- """Returns the siblings of this Tag that match the given
- criteria and appear after this Tag in the document."""
- return self._findAll(name, attrs, text, limit,
- self.nextSiblingGenerator, **kwargs)
- fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x
-
- def findPrevious(self, name=None, attrs={}, text=None, **kwargs):
- """Returns the first item that matches the given criteria and
- appears before this Tag in the document."""
- return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs)
-
- def findAllPrevious(self, name=None, attrs={}, text=None, limit=None,
- **kwargs):
- """Returns all items that match the given criteria and appear
- before this Tag in the document."""
- return self._findAll(name, attrs, text, limit, self.previousGenerator,
- **kwargs)
- fetchPrevious = findAllPrevious # Compatibility with pre-3.x
-
- def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs):
- """Returns the closest sibling to this Tag that matches the
- given criteria and appears before this Tag in the document."""
- return self._findOne(self.findPreviousSiblings, name, attrs, text,
- **kwargs)
-
- def findPreviousSiblings(self, name=None, attrs={}, text=None,
- limit=None, **kwargs):
- """Returns the siblings of this Tag that match the given
- criteria and appear before this Tag in the document."""
- return self._findAll(name, attrs, text, limit,
- self.previousSiblingGenerator, **kwargs)
- fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x
-
- def findParent(self, name=None, attrs={}, **kwargs):
- """Returns the closest parent of this Tag that matches the given
- criteria."""
- # NOTE: We can't use _findOne because findParents takes a different
- # set of arguments.
- r = None
- l = self.findParents(name, attrs, 1)
- if l:
- r = l[0]
- return r
-
- def findParents(self, name=None, attrs={}, limit=None, **kwargs):
- """Returns the parents of this Tag that match the given
- criteria."""
-
- return self._findAll(name, attrs, None, limit, self.parentGenerator,
- **kwargs)
- fetchParents = findParents # Compatibility with pre-3.x
-
- #These methods do the real heavy lifting.
-
- def _findOne(self, method, name, attrs, text, **kwargs):
- r = None
- l = method(name, attrs, text, 1, **kwargs)
- if l:
- r = l[0]
- return r
-
- def _findAll(self, name, attrs, text, limit, generator, **kwargs):
- "Iterates over a generator looking for things that match."
-
- if isinstance(name, SoupStrainer):
- strainer = name
- else:
- # Build a SoupStrainer
- strainer = SoupStrainer(name, attrs, text, **kwargs)
- results = ResultSet(strainer)
- g = generator()
- while True:
- try:
- i = g.next()
- except StopIteration:
- break
- if i:
- found = strainer.search(i)
- if found:
- results.append(found)
- if limit and len(results) >= limit:
- break
- return results
-
- #These Generators can be used to navigate starting from both
- #NavigableStrings and Tags.
- def nextGenerator(self):
- i = self
- while i:
- i = i.next
- yield i
-
- def nextSiblingGenerator(self):
- i = self
- while i:
- i = i.nextSibling
- yield i
-
- def previousGenerator(self):
- i = self
- while i:
- i = i.previous
- yield i
-
- def previousSiblingGenerator(self):
- i = self
- while i:
- i = i.previousSibling
- yield i
-
- def parentGenerator(self):
- i = self
- while i:
- i = i.parent
- yield i
-
- # Utility methods
- def substituteEncoding(self, str, encoding=None):
- encoding = encoding or "utf-8"
- return str.replace("%SOUP-ENCODING%", encoding)
-
- def toEncoding(self, s, encoding=None):
- """Encodes an object to a string in some encoding, or to Unicode.
- ."""
- if isinstance(s, unicode):
- if encoding:
- s = s.encode(encoding)
- elif isinstance(s, str):
- if encoding:
- s = s.encode(encoding)
- else:
- s = unicode(s)
- else:
- if encoding:
- s = self.toEncoding(str(s), encoding)
- else:
- s = unicode(s)
- return s
-
-class NavigableString(unicode, PageElement):
-
- def __new__(cls, value):
- """Create a new NavigableString.
-
- When unpickling a NavigableString, this method is called with
- the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be
- passed in to the superclass's __new__ or the superclass won't know
- how to handle non-ASCII characters.
- """
- if isinstance(value, unicode):
- return unicode.__new__(cls, value)
- return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
-
- def __getnewargs__(self):
- return (NavigableString.__str__(self),)
-
- def __getattr__(self, attr):
- """text.string gives you text. This is for backwards
- compatibility for Navigable*String, but for CData* it lets you
- get the string without the CData wrapper."""
- if attr == 'string':
- return self
- else:
- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
-
- def __unicode__(self):
- return str(self).decode(DEFAULT_OUTPUT_ENCODING)
-
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- if encoding:
- return self.encode(encoding)
- else:
- return self
-
-class CData(NavigableString):
-
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- return "" % NavigableString.__str__(self, encoding)
-
-class ProcessingInstruction(NavigableString):
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- output = self
- if "%SOUP-ENCODING%" in output:
- output = self.substituteEncoding(output, encoding)
- return "%s?>" % self.toEncoding(output, encoding)
-
-class Comment(NavigableString):
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- return "" % NavigableString.__str__(self, encoding)
-
-class Declaration(NavigableString):
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- return "" % NavigableString.__str__(self, encoding)
-
-class Tag(PageElement):
-
- """Represents a found HTML tag with its attributes and contents."""
-
- def _invert(h):
- "Cheap function to invert a hash."
- i = {}
- for k,v in h.items():
- i[v] = k
- return i
-
- XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'",
- "quot" : '"',
- "amp" : "&",
- "lt" : "<",
- "gt" : ">" }
-
- XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
-
- def _convertEntities(self, match):
- """Used in a call to re.sub to replace HTML, XML, and numeric
- entities with the appropriate Unicode characters. If HTML
- entities are being converted, any unrecognized entities are
- escaped."""
- x = match.group(1)
- if self.convertHTMLEntities and x in name2codepoint:
- return unichr(name2codepoint[x])
- elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:
- if self.convertXMLEntities:
- return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]
- else:
- return u'&%s;' % x
- elif len(x) > 0 and x[0] == '#':
- # Handle numeric entities
- if len(x) > 1 and x[1] == 'x':
- return unichr(int(x[2:], 16))
- else:
- return unichr(int(x[1:]))
-
- elif self.escapeUnrecognizedEntities:
- return u'&%s;' % x
- else:
- return u'&%s;' % x
-
- def __init__(self, parser, name, attrs=None, parent=None,
- previous=None):
- "Basic constructor."
-
- # We don't actually store the parser object: that lets extracted
- # chunks be garbage-collected
- self.parserClass = parser.__class__
- self.isSelfClosing = parser.isSelfClosingTag(name)
- self.name = name
- if attrs == None:
- attrs = []
- self.attrs = attrs
- self.contents = []
- self.setup(parent, previous)
- self.hidden = False
- self.containsSubstitutions = False
- self.convertHTMLEntities = parser.convertHTMLEntities
- self.convertXMLEntities = parser.convertXMLEntities
- self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities
-
- # Convert any HTML, XML, or numeric entities in the attribute values.
- convert = lambda(k, val): (k,
- re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
- self._convertEntities,
- val))
- self.attrs = map(convert, self.attrs)
-
- def get(self, key, default=None):
- """Returns the value of the 'key' attribute for the tag, or
- the value given for 'default' if it doesn't have that
- attribute."""
- return self._getAttrMap().get(key, default)
-
- def has_key(self, key):
- return self._getAttrMap().has_key(key)
-
- def __getitem__(self, key):
- """tag[key] returns the value of the 'key' attribute for the tag,
- and throws an exception if it's not there."""
- return self._getAttrMap()[key]
-
- def __iter__(self):
- "Iterating over a tag iterates over its contents."
- return iter(self.contents)
-
- def __len__(self):
- "The length of a tag is the length of its list of contents."
- return len(self.contents)
-
- def __contains__(self, x):
- return x in self.contents
-
- def __nonzero__(self):
- "A tag is non-None even if it has no contents."
- return True
-
- def __setitem__(self, key, value):
- """Setting tag[key] sets the value of the 'key' attribute for the
- tag."""
- self._getAttrMap()
- self.attrMap[key] = value
- found = False
- for i in range(0, len(self.attrs)):
- if self.attrs[i][0] == key:
- self.attrs[i] = (key, value)
- found = True
- if not found:
- self.attrs.append((key, value))
- self._getAttrMap()[key] = value
-
- def __delitem__(self, key):
- "Deleting tag[key] deletes all 'key' attributes for the tag."
- for item in self.attrs:
- if item[0] == key:
- self.attrs.remove(item)
- #We don't break because bad HTML can define the same
- #attribute multiple times.
- self._getAttrMap()
- if self.attrMap.has_key(key):
- del self.attrMap[key]
-
- def __call__(self, *args, **kwargs):
- """Calling a tag like a function is the same as calling its
- findAll() method. Eg. tag('a') returns a list of all the A tags
- found within this tag."""
- return apply(self.findAll, args, kwargs)
-
- def __getattr__(self, tag):
- #print "Getattr %s.%s" % (self.__class__, tag)
- if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3:
- return self.find(tag[:-3])
- elif tag.find('__') != 0:
- return self.find(tag)
- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)
-
- def __eq__(self, other):
- """Returns true iff this tag has the same name, the same attributes,
- and the same contents (recursively) as the given tag.
-
- NOTE: right now this will return false if two tags have the
- same attributes in a different order. Should this be fixed?"""
- if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
- return False
- for i in range(0, len(self.contents)):
- if self.contents[i] != other.contents[i]:
- return False
- return True
-
- def __ne__(self, other):
- """Returns true iff this tag is not identical to the other tag,
- as defined in __eq__."""
- return not self == other
-
- def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- """Renders this tag as a string."""
- return self.__str__(encoding)
-
- def __unicode__(self):
- return self.__str__(None)
-
- BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
- + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
- + ")")
-
- def _sub_entity(self, x):
- """Used with a regular expression to substitute the
- appropriate XML entity for an XML special character."""
- return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
-
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING,
- prettyPrint=False, indentLevel=0):
- """Returns a string or Unicode representation of this tag and
- its contents. To get Unicode, pass None for encoding.
-
- NOTE: since Python's HTML parser consumes whitespace, this
- method is not certain to reproduce the whitespace present in
- the original string."""
-
- encodedName = self.toEncoding(self.name, encoding)
-
- attrs = []
- if self.attrs:
- for key, val in self.attrs:
- fmt = '%s="%s"'
- if isString(val):
- if self.containsSubstitutions and '%SOUP-ENCODING%' in val:
- val = self.substituteEncoding(val, encoding)
-
- # The attribute value either:
- #
- # * Contains no embedded double quotes or single quotes.
- # No problem: we enclose it in double quotes.
- # * Contains embedded single quotes. No problem:
- # double quotes work here too.
- # * Contains embedded double quotes. No problem:
- # we enclose it in single quotes.
- # * Embeds both single _and_ double quotes. This
- # can't happen naturally, but it can happen if
- # you modify an attribute value after parsing
- # the document. Now we have a bit of a
- # problem. We solve it by enclosing the
- # attribute in single quotes, and escaping any
- # embedded single quotes to XML entities.
- if '"' in val:
- fmt = "%s='%s'"
- if "'" in val:
- # TODO: replace with apos when
- # appropriate.
- val = val.replace("'", "&squot;")
-
- # Now we're okay w/r/t quotes. But the attribute
- # value might also contain angle brackets, or
- # ampersands that aren't part of entities. We need
- # to escape those to XML entities too.
- val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val)
-
- attrs.append(fmt % (self.toEncoding(key, encoding),
- self.toEncoding(val, encoding)))
- close = ''
- closeTag = ''
- if self.isSelfClosing:
- close = ' /'
- else:
- closeTag = '%s>' % encodedName
-
- indentTag, indentContents = 0, 0
- if prettyPrint:
- indentTag = indentLevel
- space = (' ' * (indentTag-1))
- indentContents = indentTag + 1
- contents = self.renderContents(encoding, prettyPrint, indentContents)
- if self.hidden:
- s = contents
- else:
- s = []
- attributeString = ''
- if attrs:
- attributeString = ' ' + ' '.join(attrs)
- if prettyPrint:
- s.append(space)
- s.append('<%s%s%s>' % (encodedName, attributeString, close))
- if prettyPrint:
- s.append("\n")
- s.append(contents)
- if prettyPrint and contents and contents[-1] != "\n":
- s.append("\n")
- if prettyPrint and closeTag:
- s.append(space)
- s.append(closeTag)
- if prettyPrint and closeTag and self.nextSibling:
- s.append("\n")
- s = ''.join(s)
- return s
-
- def decompose(self):
- """Recursively destroys the contents of this tree."""
- contents = [i for i in self.contents]
- for i in contents:
- if isinstance(i, Tag):
- i.decompose()
- else:
- i.extract()
- self.extract()
-
- def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING):
- return self.__str__(encoding, True)
-
- def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
- prettyPrint=False, indentLevel=0):
- """Renders the contents of this tag as a string in the given
- encoding. If encoding is None, returns a Unicode string.."""
- s=[]
- for c in self:
- text = None
- if isinstance(c, NavigableString):
- text = c.__str__(encoding)
- elif isinstance(c, Tag):
- s.append(c.__str__(encoding, prettyPrint, indentLevel))
- if text and prettyPrint:
- text = text.strip()
- if text:
- if prettyPrint:
- s.append(" " * (indentLevel-1))
- s.append(text)
- if prettyPrint:
- s.append("\n")
- return ''.join(s)
-
- #Soup methods
-
- def find(self, name=None, attrs={}, recursive=True, text=None,
- **kwargs):
- """Return only the first child of this Tag matching the given
- criteria."""
- r = None
- l = self.findAll(name, attrs, recursive, text, 1, **kwargs)
- if l:
- r = l[0]
- return r
- findChild = find
-
- def findAll(self, name=None, attrs={}, recursive=True, text=None,
- limit=None, **kwargs):
- """Extracts a list of Tag objects that match the given
- criteria. You can specify the name of the Tag and any
- attributes you want the Tag to have.
-
- The value of a key-value pair in the 'attrs' map can be a
- string, a list of strings, a regular expression object, or a
- callable that takes a string and returns whether or not the
- string matches for some custom definition of 'matches'. The
- same is true of the tag name."""
- generator = self.recursiveChildGenerator
- if not recursive:
- generator = self.childGenerator
- return self._findAll(name, attrs, text, limit, generator, **kwargs)
- findChildren = findAll
-
- # Pre-3.x compatibility methods
- first = find
- fetch = findAll
-
- def fetchText(self, text=None, recursive=True, limit=None):
- return self.findAll(text=text, recursive=recursive, limit=limit)
-
- def firstText(self, text=None, recursive=True):
- return self.find(text=text, recursive=recursive)
-
- #Private methods
-
- def _getAttrMap(self):
- """Initializes a map representation of this tag's attributes,
- if not already initialized."""
- if not getattr(self, 'attrMap'):
- self.attrMap = {}
- for (key, value) in self.attrs:
- self.attrMap[key] = value
- return self.attrMap
-
- #Generator methods
- def childGenerator(self):
- for i in range(0, len(self.contents)):
- yield self.contents[i]
- raise StopIteration
-
- def recursiveChildGenerator(self):
- stack = [(self, 0)]
- while stack:
- tag, start = stack.pop()
- if isinstance(tag, Tag):
- for i in range(start, len(tag.contents)):
- a = tag.contents[i]
- yield a
- if isinstance(a, Tag) and tag.contents:
- if i < len(tag.contents) - 1:
- stack.append((tag, i+1))
- stack.append((a, 0))
- break
- raise StopIteration
-
-# Next, a couple classes to represent queries and their results.
-class SoupStrainer:
- """Encapsulates a number of ways of matching a markup element (tag or
- text)."""
-
- def __init__(self, name=None, attrs={}, text=None, **kwargs):
- self.name = name
- if isString(attrs):
- kwargs['class'] = attrs
- attrs = None
- if kwargs:
- if attrs:
- attrs = attrs.copy()
- attrs.update(kwargs)
- else:
- attrs = kwargs
- self.attrs = attrs
- self.text = text
-
- def __str__(self):
- if self.text:
- return self.text
- else:
- return "%s|%s" % (self.name, self.attrs)
-
- def searchTag(self, markupName=None, markupAttrs={}):
- found = None
- markup = None
- if isinstance(markupName, Tag):
- markup = markupName
- markupAttrs = markup
- callFunctionWithTagData = callable(self.name) \
- and not isinstance(markupName, Tag)
-
- if (not self.name) \
- or callFunctionWithTagData \
- or (markup and self._matches(markup, self.name)) \
- or (not markup and self._matches(markupName, self.name)):
- if callFunctionWithTagData:
- match = self.name(markupName, markupAttrs)
- else:
- match = True
- markupAttrMap = None
- for attr, matchAgainst in self.attrs.items():
- if not markupAttrMap:
- if hasattr(markupAttrs, 'get'):
- markupAttrMap = markupAttrs
- else:
- markupAttrMap = {}
- for k,v in markupAttrs:
- markupAttrMap[k] = v
- attrValue = markupAttrMap.get(attr)
- if not self._matches(attrValue, matchAgainst):
- match = False
- break
- if match:
- if markup:
- found = markup
- else:
- found = markupName
- return found
-
- def search(self, markup):
- #print 'looking for %s in %s' % (self, markup)
- found = None
- # If given a list of items, scan it for a text element that
- # matches.
- if isList(markup) and not isinstance(markup, Tag):
- for element in markup:
- if isinstance(element, NavigableString) \
- and self.search(element):
- found = element
- break
- # If it's a Tag, make sure its name or attributes match.
- # Don't bother with Tags if we're searching for text.
- elif isinstance(markup, Tag):
- if not self.text:
- found = self.searchTag(markup)
- # If it's text, make sure the text matches.
- elif isinstance(markup, NavigableString) or \
- isString(markup):
- if self._matches(markup, self.text):
- found = markup
- else:
- raise Exception, "I don't know how to match against a %s" \
- % markup.__class__
- return found
-
- def _matches(self, markup, matchAgainst):
- #print "Matching %s against %s" % (markup, matchAgainst)
- result = False
- if matchAgainst == True and type(matchAgainst) == types.BooleanType:
- result = markup != None
- elif callable(matchAgainst):
- result = matchAgainst(markup)
- else:
- #Custom match methods take the tag as an argument, but all
- #other ways of matching match the tag name as a string.
- if isinstance(markup, Tag):
- markup = markup.name
- if markup and not isString(markup):
- markup = unicode(markup)
- #Now we know that chunk is either a string, or None.
- if hasattr(matchAgainst, 'match'):
- # It's a regexp object.
- result = markup and matchAgainst.search(markup)
- elif isList(matchAgainst):
- result = markup in matchAgainst
- elif hasattr(matchAgainst, 'items'):
- result = markup.has_key(matchAgainst)
- elif matchAgainst and isString(markup):
- if isinstance(markup, unicode):
- matchAgainst = unicode(matchAgainst)
- else:
- matchAgainst = str(matchAgainst)
-
- if not result:
- result = matchAgainst == markup
- return result
-
-class ResultSet(list):
- """A ResultSet is just a list that keeps track of the SoupStrainer
- that created it."""
- def __init__(self, source):
- list.__init__([])
- self.source = source
-
-# Now, some helper functions.
-
-def isList(l):
- """Convenience method that works with all 2.x versions of Python
- to determine whether or not something is listlike."""
- return hasattr(l, '__iter__') \
- or (type(l) in (types.ListType, types.TupleType))
-
-def isString(s):
- """Convenience method that works with all 2.x versions of Python
- to determine whether or not something is stringlike."""
- try:
- return isinstance(s, unicode) or isinstance(s, basestring)
- except NameError:
- return isinstance(s, str)
-
-def buildTagMap(default, *args):
- """Turns a list of maps, lists, or scalars into a single map.
- Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and
- NESTING_RESET_TAGS maps out of lists and partial maps."""
- built = {}
- for portion in args:
- if hasattr(portion, 'items'):
- #It's a map. Merge it.
- for k,v in portion.items():
- built[k] = v
- elif isList(portion):
- #It's a list. Map each item to the default.
- for k in portion:
- built[k] = default
- else:
- #It's a scalar. Map it to the default.
- built[portion] = default
- return built
-
-# Now, the parser classes.
-
-class BeautifulStoneSoup(Tag, SGMLParser):
-
- """This class contains the basic parser and search code. It defines
- a parser that knows nothing about tag behavior except for the
- following:
-
- You can't close a tag without closing all the tags it encloses.
- That is, "" actually means
- "".
-
- [Another possible explanation is "", but since
- this class defines no SELF_CLOSING_TAGS, it will never use that
- explanation.]
-
- This class is useful for parsing XML or made-up markup languages,
- or when BeautifulSoup makes an assumption counter to what you were
- expecting."""
-
- SELF_CLOSING_TAGS = {}
- NESTABLE_TAGS = {}
- RESET_NESTING_TAGS = {}
- QUOTE_TAGS = {}
- PRESERVE_WHITESPACE_TAGS = []
-
- MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'),
- lambda x: x.group(1) + ' />'),
- (re.compile(']*)>'),
- lambda x: '')
- ]
-
- ROOT_TAG_NAME = u'[document]'
-
- HTML_ENTITIES = "html"
- XML_ENTITIES = "xml"
- XHTML_ENTITIES = "xhtml"
- # TODO: This only exists for backwards-compatibility
- ALL_ENTITIES = XHTML_ENTITIES
-
- # Used when determining whether a text node is all whitespace and
- # can be replaced with a single space. A text node that contains
- # fancy Unicode spaces (usually non-breaking) should be left
- # alone.
- STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, }
-
- def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None,
- markupMassage=True, smartQuotesTo=XML_ENTITIES,
- convertEntities=None, selfClosingTags=None, isHTML=False):
- """The Soup object is initialized as the 'root tag', and the
- provided markup (which can be a string or a file-like object)
- is fed into the underlying parser.
-
- sgmllib will process most bad HTML, and the BeautifulSoup
- class has some tricks for dealing with some HTML that kills
- sgmllib, but Beautiful Soup can nonetheless choke or lose data
- if your data uses self-closing tags or declarations
- incorrectly.
-
- By default, Beautiful Soup uses regexes to sanitize input,
- avoiding the vast majority of these problems. If the problems
- don't apply to you, pass in False for markupMassage, and
- you'll get better performance.
-
- The default parser massage techniques fix the two most common
- instances of invalid HTML that choke sgmllib:
-
-
(No space between name of closing tag and tag close)
- (Extraneous whitespace in declaration)
-
- You can pass in a custom list of (RE object, replace method)
- tuples to get Beautiful Soup to scrub your input the way you
- want."""
-
- self.parseOnlyThese = parseOnlyThese
- self.fromEncoding = fromEncoding
- self.smartQuotesTo = smartQuotesTo
- self.convertEntities = convertEntities
- # Set the rules for how we'll deal with the entities we
- # encounter
- if self.convertEntities:
- # It doesn't make sense to convert encoded characters to
- # entities even while you're converting entities to Unicode.
- # Just convert it all to Unicode.
- self.smartQuotesTo = None
- if convertEntities == self.HTML_ENTITIES:
- self.convertXMLEntities = False
- self.convertHTMLEntities = True
- self.escapeUnrecognizedEntities = True
- elif convertEntities == self.XHTML_ENTITIES:
- self.convertXMLEntities = True
- self.convertHTMLEntities = True
- self.escapeUnrecognizedEntities = False
- elif convertEntities == self.XML_ENTITIES:
- self.convertXMLEntities = True
- self.convertHTMLEntities = False
- self.escapeUnrecognizedEntities = False
- else:
- self.convertXMLEntities = False
- self.convertHTMLEntities = False
- self.escapeUnrecognizedEntities = False
-
- self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags)
- SGMLParser.__init__(self)
-
- if hasattr(markup, 'read'): # It's a file-type object.
- markup = markup.read()
- self.markup = markup
- self.markupMassage = markupMassage
- try:
- self._feed(isHTML=isHTML)
- except StopParsing:
- pass
- self.markup = None # The markup can now be GCed
-
- def convert_charref(self, name):
- """This method fixes a bug in Python's SGMLParser."""
- try:
- n = int(name)
- except ValueError:
- return
- if not 0 <= n <= 127 : # ASCII ends at 127, not 255
- return
- return self.convert_codepoint(n)
-
- def _feed(self, inDocumentEncoding=None, isHTML=False):
- # Convert the document to Unicode.
- markup = self.markup
- if isinstance(markup, unicode):
- if not hasattr(self, 'originalEncoding'):
- self.originalEncoding = None
- else:
- dammit = UnicodeDammit\
- (markup, [self.fromEncoding, inDocumentEncoding],
- smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
- markup = dammit.unicode
- self.originalEncoding = dammit.originalEncoding
- self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
- if markup:
- if self.markupMassage:
- if not isList(self.markupMassage):
- self.markupMassage = self.MARKUP_MASSAGE
- for fix, m in self.markupMassage:
- markup = fix.sub(m, markup)
- # TODO: We get rid of markupMassage so that the
- # soup object can be deepcopied later on. Some
- # Python installations can't copy regexes. If anyone
- # was relying on the existence of markupMassage, this
- # might cause problems.
- del(self.markupMassage)
- self.reset()
-
- SGMLParser.feed(self, markup)
- # Close out any unfinished strings and close all the open tags.
- self.endData()
- while self.currentTag.name != self.ROOT_TAG_NAME:
- self.popTag()
-
- def __getattr__(self, methodName):
- """This method routes method call requests to either the SGMLParser
- superclass or the Tag superclass, depending on the method name."""
- #print "__getattr__ called on %s.%s" % (self.__class__, methodName)
-
- if methodName.find('start_') == 0 or methodName.find('end_') == 0 \
- or methodName.find('do_') == 0:
- return SGMLParser.__getattr__(self, methodName)
- elif methodName.find('__') != 0:
- return Tag.__getattr__(self, methodName)
- else:
- raise AttributeError
-
- def isSelfClosingTag(self, name):
- """Returns true iff the given string is the name of a
- self-closing tag according to this parser."""
- return self.SELF_CLOSING_TAGS.has_key(name) \
- or self.instanceSelfClosingTags.has_key(name)
-
- def reset(self):
- Tag.__init__(self, self, self.ROOT_TAG_NAME)
- self.hidden = 1
- SGMLParser.reset(self)
- self.currentData = []
- self.currentTag = None
- self.tagStack = []
- self.quoteStack = []
- self.pushTag(self)
-
- def popTag(self):
- tag = self.tagStack.pop()
- # Tags with just one string-owning child get the child as a
- # 'string' property, so that soup.tag.string is shorthand for
- # soup.tag.contents[0]
- if len(self.currentTag.contents) == 1 and \
- isinstance(self.currentTag.contents[0], NavigableString):
- self.currentTag.string = self.currentTag.contents[0]
-
- #print "Pop", tag.name
- if self.tagStack:
- self.currentTag = self.tagStack[-1]
- return self.currentTag
-
- def pushTag(self, tag):
- #print "Push", tag.name
- if self.currentTag:
- self.currentTag.contents.append(tag)
- self.tagStack.append(tag)
- self.currentTag = self.tagStack[-1]
-
- def endData(self, containerClass=NavigableString):
- if self.currentData:
- currentData = u''.join(self.currentData)
- if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
- not set([tag.name for tag in self.tagStack]).intersection(
- self.PRESERVE_WHITESPACE_TAGS)):
- if '\n' in currentData:
- currentData = '\n'
- else:
- currentData = ' '
- self.currentData = []
- if self.parseOnlyThese and len(self.tagStack) <= 1 and \
- (not self.parseOnlyThese.text or \
- not self.parseOnlyThese.search(currentData)):
- return
- o = containerClass(currentData)
- o.setup(self.currentTag, self.previous)
- if self.previous:
- self.previous.next = o
- self.previous = o
- self.currentTag.contents.append(o)
-
-
- def _popToTag(self, name, inclusivePop=True):
- """Pops the tag stack up to and including the most recent
- instance of the given tag. If inclusivePop is false, pops the tag
- stack up to but *not* including the most recent instqance of
- the given tag."""
- #print "Popping to %s" % name
- if name == self.ROOT_TAG_NAME:
- return
-
- numPops = 0
- mostRecentTag = None
- for i in range(len(self.tagStack)-1, 0, -1):
- if name == self.tagStack[i].name:
- numPops = len(self.tagStack)-i
- break
- if not inclusivePop:
- numPops = numPops - 1
-
- for i in range(0, numPops):
- mostRecentTag = self.popTag()
- return mostRecentTag
-
- def _smartPop(self, name):
-
- """We need to pop up to the previous tag of this type, unless
- one of this tag's nesting reset triggers comes between this
- tag and the previous tag of this type, OR unless this tag is a
- generic nesting trigger and another generic nesting trigger
- comes between this tag and the previous tag of this type.
-
- Examples:
- FooBar *
* should pop to 'p', not 'b'.
-
Foo
Bar ** should pop to 'table', not 'p'.
-
Foo
Bar ** should pop to 'tr', not 'p'.
-
-
- *
- * should pop to 'ul', not the first 'li'.
-
*
* should pop to 'table', not the first 'tr'
- |
| * | * should pop to 'tr', not the first 'td'
- """
-
- nestingResetTriggers = self.NESTABLE_TAGS.get(name)
- isNestable = nestingResetTriggers != None
- isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
- popTo = None
- inclusive = True
- for i in range(len(self.tagStack)-1, 0, -1):
- p = self.tagStack[i]
- if (not p or p.name == name) and not isNestable:
- #Non-nestable tags get popped to the top or to their
- #last occurance.
- popTo = name
- break
- if (nestingResetTriggers != None
- and p.name in nestingResetTriggers) \
- or (nestingResetTriggers == None and isResetNesting
- and self.RESET_NESTING_TAGS.has_key(p.name)):
-
- #If we encounter one of the nesting reset triggers
- #peculiar to this tag, or we encounter another tag
- #that causes nesting to reset, pop up to but not
- #including that tag.
- popTo = p.name
- inclusive = False
- break
- p = p.parent
- if popTo:
- self._popToTag(popTo, inclusive)
-
- def unknown_starttag(self, name, attrs, selfClosing=0):
- #print "Start tag %s: %s" % (name, attrs)
- if self.quoteStack:
- #This is not a real tag.
- #print "<%s> is not real!" % name
- attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs))
- self.handle_data('<%s%s>' % (name, attrs))
- return
- self.endData()
-
- if not self.isSelfClosingTag(name) and not selfClosing:
- self._smartPop(name)
-
- if self.parseOnlyThese and len(self.tagStack) <= 1 \
- and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)):
- return
-
- tag = Tag(self, name, attrs, self.currentTag, self.previous)
- if self.previous:
- self.previous.next = tag
- self.previous = tag
- self.pushTag(tag)
- if selfClosing or self.isSelfClosingTag(name):
- self.popTag()
- if name in self.QUOTE_TAGS:
- #print "Beginning quote (%s)" % name
- self.quoteStack.append(name)
- self.literal = 1
- return tag
-
- def unknown_endtag(self, name):
- #print "End tag %s" % name
- if self.quoteStack and self.quoteStack[-1] != name:
- #This is not a real end tag.
- #print "%s> is not real!" % name
- self.handle_data('%s>' % name)
- return
- self.endData()
- self._popToTag(name)
- if self.quoteStack and self.quoteStack[-1] == name:
- self.quoteStack.pop()
- self.literal = (len(self.quoteStack) > 0)
-
- def handle_data(self, data):
- self.currentData.append(data)
-
- def _toStringSubclass(self, text, subclass):
- """Adds a certain piece of text to the tree as a NavigableString
- subclass."""
- self.endData()
- self.handle_data(text)
- self.endData(subclass)
-
- def handle_pi(self, text):
- """Handle a processing instruction as a ProcessingInstruction
- object, possibly one with a %SOUP-ENCODING% slot into which an
- encoding will be plugged later."""
- if text[:3] == "xml":
- text = u"xml version='1.0' encoding='%SOUP-ENCODING%'"
- self._toStringSubclass(text, ProcessingInstruction)
-
- def handle_comment(self, text):
- "Handle comments as Comment objects."
- self._toStringSubclass(text, Comment)
-
- def handle_charref(self, ref):
- "Handle character references as data."
- if self.convertEntities:
- data = unichr(int(ref))
- else:
- data = '%s;' % ref
- self.handle_data(data)
-
- def handle_entityref(self, ref):
- """Handle entity references as data, possibly converting known
- HTML and/or XML entity references to the corresponding Unicode
- characters."""
- data = None
- if self.convertHTMLEntities:
- try:
- data = unichr(name2codepoint[ref])
- except KeyError:
- pass
-
- if not data and self.convertXMLEntities:
- data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref)
-
- if not data and self.convertHTMLEntities and \
- not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref):
- # TODO: We've got a problem here. We're told this is
- # an entity reference, but it's not an XML entity
- # reference or an HTML entity reference. Nonetheless,
- # the logical thing to do is to pass it through as an
- # unrecognized entity reference.
- #
- # Except: when the input is "&carol;" this function
- # will be called with input "carol". When the input is
- # "AT&T", this function will be called with input
- # "T". We have no way of knowing whether a semicolon
- # was present originally, so we don't know whether
- # this is an unknown entity or just a misplaced
- # ampersand.
- #
- # The more common case is a misplaced ampersand, so I
- # escape the ampersand and omit the trailing semicolon.
- data = "&%s" % ref
- if not data:
- # This case is different from the one above, because we
- # haven't already gone through a supposedly comprehensive
- # mapping of entities to Unicode characters. We might not
- # have gone through any mapping at all. So the chances are
- # very high that this is a real entity, and not a
- # misplaced ampersand.
- data = "&%s;" % ref
- self.handle_data(data)
-
- def handle_decl(self, data):
- "Handle DOCTYPEs and the like as Declaration objects."
- self._toStringSubclass(data, Declaration)
-
- def parse_declaration(self, i):
- """Treat a bogus SGML declaration as raw data. Treat a CDATA
- declaration as a CData object."""
- j = None
- if self.rawdata[i:i+9] == '', i)
- if k == -1:
- k = len(self.rawdata)
- data = self.rawdata[i+9:k]
- j = k+3
- self._toStringSubclass(data, CData)
- else:
- try:
- j = SGMLParser.parse_declaration(self, i)
- except SGMLParseError:
- toHandle = self.rawdata[i:]
- self.handle_data(toHandle)
- j = i + len(toHandle)
- return j
-
-class BeautifulSoup(BeautifulStoneSoup):
-
- """This parser knows the following facts about HTML:
-
- * Some tags have no closing tag and should be interpreted as being
- closed as soon as they are encountered.
-
- * The text inside some tags (ie. 'script') may contain tags which
- are not really part of the document and which should be parsed
- as text, not tags. If you want to parse the text as tags, you can
- always fetch it and parse it explicitly.
-
- * Tag nesting rules:
-
- Most tags can't be nested at all. For instance, the occurance of
- a tag should implicitly close the previous tag.
-
- Para1 Para2
- should be transformed into:
- Para1 Para2
-
- Some tags can be nested arbitrarily. For instance, the occurance
- of a tag should _not_ implicitly close the previous
- tag.
-
- Alice said: Bob said: Blah
- should NOT be transformed into:
- Alice said: Bob said: Blah
-
- Some tags can be nested, but the nesting is reset by the
- interposition of other tags. For instance, a tag should
- implicitly close the previous tag within the same ,
- but not close a tag in another table.
-
- Blah Blah
- should be transformed into:
- Blah Blah
- but,
- BlahBlah
- should NOT be transformed into
- BlahBlah
-
- Differing assumptions about tag nesting rules are a major source
- of problems with the BeautifulSoup class. If BeautifulSoup is not
- treating as nestable a tag your page author treats as nestable,
- try ICantBelieveItsBeautifulSoup, MinimalSoup, or
- BeautifulStoneSoup before writing your own subclass."""
-
- def __init__(self, *args, **kwargs):
- if not kwargs.has_key('smartQuotesTo'):
- kwargs['smartQuotesTo'] = self.HTML_ENTITIES
- kwargs['isHTML'] = True
- BeautifulStoneSoup.__init__(self, *args, **kwargs)
-
- SELF_CLOSING_TAGS = buildTagMap(None,
- ['br' , 'hr', 'input', 'img', 'meta',
- 'spacer', 'link', 'frame', 'base'])
-
- PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
-
- QUOTE_TAGS = {'script' : None, 'textarea' : None}
-
- #According to the HTML standard, each of these inline tags can
- #contain another tag of the same type. Furthermore, it's common
- #to actually use these tags this way.
- NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup',
- 'center']
-
- #According to the HTML standard, these block tags can contain
- #another tag of the same type. Furthermore, it's common
- #to actually use these tags this way.
- NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del']
-
- #Lists can contain other lists, but there are restrictions.
- NESTABLE_LIST_TAGS = { 'ol' : [],
- 'ul' : [],
- 'li' : ['ul', 'ol'],
- 'dl' : [],
- 'dd' : ['dl'],
- 'dt' : ['dl'] }
-
- #Tables can contain other tables, but there are restrictions.
- NESTABLE_TABLE_TAGS = {'table' : [],
- 'tr' : ['table', 'tbody', 'tfoot', 'thead'],
- 'td' : ['tr'],
- 'th' : ['tr'],
- 'thead' : ['table'],
- 'tbody' : ['table'],
- 'tfoot' : ['table'],
- }
-
- NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre']
-
- #If one of these tags is encountered, all tags up to the next tag of
- #this type are popped.
- RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript',
- NON_NESTABLE_BLOCK_TAGS,
- NESTABLE_LIST_TAGS,
- NESTABLE_TABLE_TAGS)
-
- NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS,
- NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS)
-
- # Used to detect the charset in a META tag; see start_meta
- CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
-
- def start_meta(self, attrs):
- """Beautiful Soup can detect a charset included in a META tag,
- try to convert the document to that charset, and re-parse the
- document from the beginning."""
- httpEquiv = None
- contentType = None
- contentTypeIndex = None
- tagNeedsEncodingSubstitution = False
-
- for i in range(0, len(attrs)):
- key, value = attrs[i]
- key = key.lower()
- if key == 'http-equiv':
- httpEquiv = value
- elif key == 'content':
- contentType = value
- contentTypeIndex = i
-
- if httpEquiv and contentType: # It's an interesting meta tag.
- match = self.CHARSET_RE.search(contentType)
- if match:
- if (self.declaredHTMLEncoding is not None or
- self.originalEncoding == self.fromEncoding):
- # An HTML encoding was sniffed while converting
- # the document to Unicode, or an HTML encoding was
- # sniffed during a previous pass through the
- # document, or an encoding was specified
- # explicitly and it worked. Rewrite the meta tag.
- def rewrite(match):
- return match.group(1) + "%SOUP-ENCODING%"
- newAttr = self.CHARSET_RE.sub(rewrite, contentType)
- attrs[contentTypeIndex] = (attrs[contentTypeIndex][0],
- newAttr)
- tagNeedsEncodingSubstitution = True
- else:
- # This is our first pass through the document.
- # Go through it again with the encoding information.
- newCharset = match.group(3)
- if newCharset and newCharset != self.originalEncoding:
- self.declaredHTMLEncoding = newCharset
- self._feed(self.declaredHTMLEncoding)
- raise StopParsing
- pass
- tag = self.unknown_starttag("meta", attrs)
- if tag and tagNeedsEncodingSubstitution:
- tag.containsSubstitutions = True
-
-class StopParsing(Exception):
- pass
-
-class ICantBelieveItsBeautifulSoup(BeautifulSoup):
-
- """The BeautifulSoup class is oriented towards skipping over
- common HTML errors like unclosed tags. However, sometimes it makes
- errors of its own. For instance, consider this fragment:
-
- FooBar
-
- This is perfectly valid (if bizarre) HTML. However, the
- BeautifulSoup class will implicitly close the first b tag when it
- encounters the second 'b'. It will think the author wrote
- "FooBar", and didn't close the first 'b' tag, because
- there's no real-world reason to bold something that's already
- bold. When it encounters '' it will close two more 'b'
- tags, for a grand total of three tags closed instead of two. This
- can throw off the rest of your document structure. The same is
- true of a number of other tags, listed below.
-
- It's much more common for someone to forget to close a 'b' tag
- than to actually use nested 'b' tags, and the BeautifulSoup class
- handles the common case. This class handles the not-co-common
- case: where you can't believe someone wrote what they did, but
- it's valid HTML and BeautifulSoup screwed up by assuming it
- wouldn't be."""
-
- I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
- ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',
- 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',
- 'big']
-
- I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript']
-
- NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,
- I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
- I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
-
-class MinimalSoup(BeautifulSoup):
- """The MinimalSoup class is for parsing HTML that contains
- pathologically bad markup. It makes no assumptions about tag
- nesting, but it does know which tags are self-closing, that
- ', re.I|re.S), ''),
- # For BeautifulSoup.
- (re.compile('', re.I), '')
- ]
-
- def preprocess_dom(self, dom):
- # Remove "link this quote" links.
- for qLink in self.xpath(dom, "//p[@class='linksoda']"):
- qLink.drop_tree()
- return dom
-
- def postprocess_data(self, data):
- if 'quotes' not in data:
- return {}
- for idx, quote in enumerate(data['quotes']):
- data['quotes'][idx] = quote.split('::')
- return data
-
-
-class DOMHTMLReleaseinfoParser(DOMParserBase):
- """Parser for the "release dates" page of a given movie.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- rdparser = DOMHTMLReleaseinfoParser()
- result = rdparser.parse(releaseinfo_html_string)
- """
- extractors = [Extractor(label='release dates',
- path="//th[@class='xxxx']/../../tr",
- attrs=Attribute(key='release dates', multi=True,
- path={'country': ".//td[1]//text()",
- 'date': ".//td[2]//text()",
- 'notes': ".//td[3]//text()"})),
- Extractor(label='akas',
- path="//div[@class='_imdbpy_akas']/table/tr",
- attrs=Attribute(key='akas', multi=True,
- path={'title': "./td[1]/text()",
- 'countries': "./td[2]/text()"}))]
-
- preprocessors = [
- (re.compile('( ))\n? (?!'),
- # the ones below are for the publicity parser
- (re.compile('(.*?) ', re.I), r'\1 '),
- (re.compile('()', re.I), r'\1::'),
- (re.compile('( | )', re.I), r'\n\1'),
- # this is for splitting individual entries
- (re.compile(' ', re.I), r'\n'),
- ]
-
- def postprocess_data(self, data):
- for key in data:
- data[key] = filter(None, data[key])
- if self.kind in ('literature', 'business', 'contacts') and data:
- if 'screenplay/teleplay' in data:
- data['screenplay-teleplay'] = data['screenplay/teleplay']
- del data['screenplay/teleplay']
- data = {self.kind: data}
- else:
- if self.kind == 'publicity':
- if 'biography (print)' in data:
- data['biography-print'] = data['biography (print)']
- del data['biography (print)']
- # Tech info.
- for key in data.keys():
- if key.startswith('film negative format'):
- data['film negative format'] = data[key]
- del data[key]
- elif key.startswith('film length'):
- data['film length'] = data[key]
- del data[key]
- return data
-
-
-class DOMHTMLDvdParser(DOMParserBase):
- """Parser for the "dvd" page of a given movie.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- dparser = DOMHTMLDvdParser()
- result = dparser.parse(dvd_html_string)
- """
- _defGetRefs = True
- extractors = [Extractor(label='dvd',
- path="//div[@class='base_layer']",
- attrs=[Attribute(key=None,
- multi=True,
- path={
- 'title': "../table[1]//h3/text()",
- 'cover': "../table[1]//img/@src",
- 'region': ".//p[b='Region:']/text()",
- 'asin': ".//p[b='ASIN:']/text()",
- 'upc': ".//p[b='UPC:']/text()",
- 'rating': ".//p/b[starts-with(text(), 'Rating:')]/../img/@alt",
- 'certificate': ".//p[b='Certificate:']/text()",
- 'runtime': ".//p[b='Runtime:']/text()",
- 'label': ".//p[b='Label:']/text()",
- 'studio': ".//p[b='Studio:']/text()",
- 'release date': ".//p[b='Release Date:']/text()",
- 'dvd format': ".//p[b='DVD Format:']/text()",
- 'dvd features': ".//p[b='DVD Features: ']//text()",
- 'supplements': "..//div[span='Supplements']" \
- "/following-sibling::div[1]//text()",
- 'review': "..//div[span='Review']/following-sibling::div[1]//text()",
- 'titles': "..//div[starts-with(text(), 'Titles in this Product')]" \
- "/..//text()",
- },
- postprocess=lambda x: {
- 'title': (x.get('title') or u'').strip(),
- 'cover': (x.get('cover') or u'').strip(),
- 'region': (x.get('region') or u'').strip(),
- 'asin': (x.get('asin') or u'').strip(),
- 'upc': (x.get('upc') or u'').strip(),
- 'rating': (x.get('rating') or u'Not Rated').strip().replace('Rating: ', ''),
- 'certificate': (x.get('certificate') or u'').strip(),
- 'runtime': (x.get('runtime') or u'').strip(),
- 'label': (x.get('label') or u'').strip(),
- 'studio': (x.get('studio') or u'').strip(),
- 'release date': (x.get('release date') or u'').strip(),
- 'dvd format': (x.get('dvd format') or u'').strip(),
- 'dvd features': (x.get('dvd features') or u'').strip().replace('DVD Features: ', ''),
- 'supplements': (x.get('supplements') or u'').strip(),
- 'review': (x.get('review') or u'').strip(),
- 'titles in this product': (x.get('titles') or u'').strip().replace('Titles in this Product::', ''),
- }
- )])]
-
- preprocessors = [
- (re.compile('( \s* ', re.I),
- r'\1 '),
- (re.compile('( \s* ( ', re.I), r'::')
- ]
-
- def postprocess_data(self, data):
- if not data:
- return data
- dvds = data['dvd']
- for dvd in dvds:
- if dvd['cover'].find('noposter') != -1:
- del dvd['cover']
- for key in dvd.keys():
- if not dvd[key]:
- del dvd[key]
- if 'supplements' in dvd:
- dvd['supplements'] = dvd['supplements'].split('::')
- return data
-
-
-class DOMHTMLRecParser(DOMParserBase):
- """Parser for the "recommendations" page of a given movie.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- rparser = HTMLRecParser()
- result = rparser.parse(recommendations_html_string)
- """
- _containsObjects = True
-
- extractors = [Extractor(label='recommendations',
- path="//td[@valign='middle'][1]",
- attrs=Attribute(key='../../tr/td[1]//text()',
- multi=True,
- path={'title': ".//text()",
- 'movieID': ".//a/@href"}))]
- def postprocess_data(self, data):
- for key in data.keys():
- n_key = key
- n_keyl = n_key.lower()
- if n_keyl == 'suggested by the database':
- n_key = 'database'
- elif n_keyl == 'imdb users recommend':
- n_key = 'users'
- data[n_key] = [Movie(title=x['title'],
- movieID=analyze_imdbid(x['movieID']),
- accessSystem=self._as, modFunct=self._modFunct)
- for x in data[key]]
- del data[key]
- if data: return {'recommendations': data}
- return data
-
-
-class DOMHTMLNewsParser(DOMParserBase):
- """Parser for the "news" page of a given movie or person.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- nwparser = DOMHTMLNewsParser()
- result = nwparser.parse(news_html_string)
- """
- _defGetRefs = True
-
- extractors = [
- Extractor(label='news',
- path="//h2",
- attrs=Attribute(key='news',
- multi=True,
- path={
- 'title': "./text()",
- 'fromdate': "../following-sibling::p[1]/small//text()",
- # FIXME: sometimes (see The Matrix (1999)) is found
- # inside news text.
- 'body': "../following-sibling::p[2]//text()",
- 'link': "../..//a[text()='Permalink']/@href",
- 'fulllink': "../..//a[starts-with(text(), " \
- "'See full article at')]/@href"
- },
- postprocess=lambda x: {
- 'title': x.get('title').strip(),
- 'date': x.get('fromdate').split('|')[0].strip(),
- 'from': x.get('fromdate').split('|')[1].replace('From ',
- '').strip(),
- 'body': (x.get('body') or u'').strip(),
- 'link': _normalize_href(x.get('link')),
- 'full article link': _normalize_href(x.get('fulllink'))
- }))
- ]
-
- preprocessors = [
- (re.compile('(]+>)', re.I), r'\1'),
- (re.compile('( )', re.I), r' \1'),
- (re.compile('', re.I), r'')
- ]
-
- def postprocess_data(self, data):
- if not data.has_key('news'):
- return {}
- for news in data['news']:
- if news.has_key('full article link'):
- if news['full article link'] is None:
- del news['full article link']
- return data
-
-
-def _parse_review(x):
- result = {}
- title = x.get('title').strip()
- if title[-1] == ':': title = title[:-1]
- result['title'] = title
- result['link'] = _normalize_href(x.get('link'))
- kind = x.get('kind').strip()
- if kind[-1] == ':': kind = kind[:-1]
- result['review kind'] = kind
- text = x.get('review').replace('\n\n', '||').replace('\n', ' ').split('||')
- review = '\n'.join(text)
- if x.get('author') is not None:
- author = x.get('author').strip()
- review = review.split(author)[0].strip()
- result['review author'] = author[2:]
- if x.get('item') is not None:
- item = x.get('item').strip()
- review = review[len(item):].strip()
- review = "%s: %s" % (item, review)
- result['review'] = review
- return result
-
-
-class DOMHTMLAmazonReviewsParser(DOMParserBase):
- """Parser for the "amazon reviews" page of a given movie.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- arparser = DOMHTMLAmazonReviewsParser()
- result = arparser.parse(amazonreviews_html_string)
- """
- extractors = [
- Extractor(label='amazon reviews',
- group="//h3",
- group_key="./a/text()",
- group_key_normalize=lambda x: x[:-1],
- path="./following-sibling::p[1]/span[@class='_review']",
- attrs=Attribute(key=None,
- multi=True,
- path={
- 'title': "../preceding-sibling::h3[1]/a[1]/text()",
- 'link': "../preceding-sibling::h3[1]/a[1]/@href",
- 'kind': "./preceding-sibling::b[1]/text()",
- 'item': "./i/b/text()",
- 'review': ".//text()",
- 'author': "./i[starts-with(text(), '--')]/text()"
- },
- postprocess=_parse_review))
- ]
-
- preprocessors = [
- (re.compile('\n(?!)', re.I), r'\n'),
- (re.compile('(\n\n)', re.I), r'\1'),
- (re.compile('( \n\n)', re.I), r'\1'),
- (re.compile('(\s\n)()', re.I), r'\1\2')
- ]
-
- def postprocess_data(self, data):
- if len(data) == 0:
- return {}
- nd = []
- for item in data.keys():
- nd = nd + data[item]
- return {'amazon reviews': nd}
-
-
-def _parse_merchandising_link(x):
- result = {}
- link = x.get('link')
- result['link'] = _normalize_href(link)
- text = x.get('text')
- if text is not None:
- result['link-text'] = text.strip()
- cover = x.get('cover')
- if cover is not None:
- result['cover'] = cover
- description = x.get('description')
- if description is not None:
- shop = x.get('shop')
- if shop is not None:
- result['description'] = u'%s::%s' % (shop, description.strip())
- else:
- result['description'] = description.strip()
- return result
-
-
-class DOMHTMLSalesParser(DOMParserBase):
- """Parser for the "merchandising links" page of a given movie.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- sparser = DOMHTMLSalesParser()
- result = sparser.parse(sales_html_string)
- """
- extractors = [
- Extractor(label='shops',
- group="//h5/a[@name]/..",
- group_key="./a[1]/text()",
- group_key_normalize=lambda x: x.lower(),
- path=".//following-sibling::table[1]/" \
- "/td[@class='w_rowtable_colshop']//tr[1]",
- attrs=Attribute(key=None,
- multi=True,
- path={
- 'link': "./td[2]/a[1]/@href",
- 'text': "./td[1]/img[1]/@alt",
- 'cover': "./ancestor::td[1]/../td[1]"\
- "/a[1]/img[1]/@src",
- },
- postprocess=_parse_merchandising_link)),
- Extractor(label='others',
- group="//span[@class='_info']/..",
- group_key="./h5/a[1]/text()",
- group_key_normalize=lambda x: x.lower(),
- path="./span[@class='_info']",
- attrs=Attribute(key=None,
- multi=True,
- path={
- 'link': "./preceding-sibling::a[1]/@href",
- 'shop': "./preceding-sibling::a[1]/text()",
- 'description': ".//text()",
- },
- postprocess=_parse_merchandising_link))
- ]
-
- preprocessors = [
- (re.compile('(\1'),
- (re.compile('(\n \n) ', re.I), r'\1'),
- (re.compile('(
\n)(\n)', re.I), r'\1
\2'),
- (re.compile('(\n)(Search.*?)()(\n)', re.I), r'\3\1\2\4'),
- (re.compile('(\n)(Search.*?)(\n)', re.I),
- r'\1 \2\3')
- ]
-
- def postprocess_data(self, data):
- if len(data) == 0:
- return {}
- return {'merchandising links': data}
-
-
-def _build_episode(x):
- """Create a Movie object for a given series' episode."""
- episode_id = analyze_imdbid(x.get('link'))
- episode_title = x.get('title')
- e = Movie(movieID=episode_id, title=episode_title)
- e['kind'] = u'episode'
- oad = x.get('oad')
- if oad:
- e['original air date'] = oad.strip()
- year = x.get('year')
- if year is not None:
- year = year[5:]
- if year == 'unknown': year = u'????'
- if year and year.isdigit():
- year = int(year)
- e['year'] = year
- else:
- if oad and oad[-4:].isdigit():
- e['year'] = int(oad[-4:])
- epinfo = x.get('episode')
- if epinfo is not None:
- season, episode = epinfo.split(':')[0].split(',')
- e['season'] = int(season[7:])
- e['episode'] = int(episode[8:])
- else:
- e['season'] = 'unknown'
- e['episode'] = 'unknown'
- plot = x.get('plot')
- if plot:
- e['plot'] = plot.strip()
- return e
-
-
-class DOMHTMLEpisodesParser(DOMParserBase):
- """Parser for the "episode list" page of a given movie.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- eparser = DOMHTMLEpisodesParser()
- result = eparser.parse(episodes_html_string)
- """
- _containsObjects = True
-
- kind = 'episodes list'
- _episodes_path = "..//h4"
- _oad_path = "./following-sibling::span/strong[1]/text()"
-
- def _init(self):
- self.extractors = [
- Extractor(label='series',
- path="//html",
- attrs=[Attribute(key='series title',
- path=".//title/text()"),
- Attribute(key='series movieID',
- path=".//h1/a[@class='main']/@href",
- postprocess=analyze_imdbid)
- ]),
- Extractor(label='episodes',
- group="//div[@class='_imdbpy']/h3",
- group_key="./a/@name",
- path=self._episodes_path,
- attrs=Attribute(key=None,
- multi=True,
- path={
- 'link': "./a/@href",
- 'title': "./a/text()",
- 'year': "./preceding-sibling::a[1]/@name",
- 'episode': "./text()[1]",
- 'oad': self._oad_path,
- 'plot': "./following-sibling::text()[1]"
- },
- postprocess=_build_episode))]
- if self.kind == 'episodes cast':
- self.extractors += [
- Extractor(label='cast',
- group="//h4",
- group_key="./text()[1]",
- group_key_normalize=lambda x: x.strip(),
- path="./following-sibling::table[1]//td[@class='nm']",
- attrs=Attribute(key=None,
- multi=True,
- path={'person': "..//text()",
- 'link': "./a/@href",
- 'roleID': \
- "../td[4]/div[@class='_imdbpyrole']/@roleid"},
- postprocess=lambda x: \
- build_person(x.get('person') or u'',
- personID=analyze_imdbid(x.get('link')),
- roleID=(x.get('roleID') or u'').split('/'),
- accessSystem=self._as,
- modFunct=self._modFunct)))
- ]
-
- preprocessors = [
- (re.compile('(
\n)( )', re.I),
- r'\1 \2'),
- (re.compile('(\n\n) ', re.I), r'\1'),
- (re.compile(' (.*?)', re.I), r' \1'),
- (_reRolesMovie, _manageRoles),
- (re.compile('( \n)(
)', re.I), r'\1 \2')
- ]
-
- def postprocess_data(self, data):
- # A bit extreme?
- if not 'series title' in data: return {}
- if not 'series movieID' in data: return {}
- stitle = data['series title'].replace('- Episode list', '')
- stitle = stitle.replace('- Episodes list', '')
- stitle = stitle.replace('- Episode cast', '')
- stitle = stitle.replace('- Episodes cast', '')
- stitle = stitle.strip()
- if not stitle: return {}
- seriesID = data['series movieID']
- if seriesID is None: return {}
- series = Movie(title=stitle, movieID=str(seriesID),
- accessSystem=self._as, modFunct=self._modFunct)
- nd = {}
- for key in data.keys():
- if key.startswith('season-'):
- season_key = key[7:]
- try: season_key = int(season_key)
- except: pass
- nd[season_key] = {}
- ep_counter = 1
- for episode in data[key]:
- if not episode: continue
- episode_key = episode.get('episode')
- if episode_key is None: continue
- if not isinstance(episode_key, int):
- episode_key = ep_counter
- ep_counter += 1
- cast_key = 'Season %s, Episode %s:' % (season_key,
- episode_key)
- if data.has_key(cast_key):
- cast = data[cast_key]
- for i in xrange(len(cast)):
- cast[i].billingPos = i + 1
- episode['cast'] = cast
- episode['episode of'] = series
- nd[season_key][episode_key] = episode
- if len(nd) == 0:
- return {}
- return {'episodes': nd}
-
-
-class DOMHTMLEpisodesCastParser(DOMHTMLEpisodesParser):
- """Parser for the "episodes cast" page of a given movie.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- eparser = DOMHTMLEpisodesParser()
- result = eparser.parse(episodes_html_string)
- """
- kind = 'episodes cast'
- _episodes_path = "..//h4"
- _oad_path = "./following-sibling::b[1]/text()"
-
-
-class DOMHTMLFaqsParser(DOMParserBase):
- """Parser for the "FAQ" page of a given movie.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- fparser = DOMHTMLFaqsParser()
- result = fparser.parse(faqs_html_string)
- """
- _defGetRefs = True
-
- # XXX: bsoup and lxml don't match (looks like a minor issue, anyway).
-
- extractors = [
- Extractor(label='faqs',
- path="//div[@class='section']",
- attrs=Attribute(key='faqs',
- multi=True,
- path={
- 'question': "./h3/a/span/text()",
- 'answer': "../following-sibling::div[1]//text()"
- },
- postprocess=lambda x: u'%s::%s' % (x.get('question').strip(),
- '\n\n'.join(x.get('answer').replace(
- '\n\n', '\n').strip().split('||')))))
- ]
-
- preprocessors = [
- (re.compile('
', re.I), r'||'),
- (re.compile('(.*?)\n', re.I), r'||\1--'),
- (re.compile('(.*?)', re.I),
- r'[spoiler]\1[/spoiler]')
- ]
-
-
-class DOMHTMLAiringParser(DOMParserBase):
- """Parser for the "airing" page of a given movie.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- aparser = DOMHTMLAiringParser()
- result = aparser.parse(airing_html_string)
- """
- _containsObjects = True
-
- extractors = [
- Extractor(label='series title',
- path="//title",
- attrs=Attribute(key='series title', path="./text()",
- postprocess=lambda x: \
- x.replace(' - TV schedule', u''))),
- Extractor(label='series id',
- path="//h1/a[@href]",
- attrs=Attribute(key='series id', path="./@href")),
-
- Extractor(label='tv airings',
- path="//tr[@class]",
- attrs=Attribute(key='airing',
- multi=True,
- path={
- 'date': "./td[1]//text()",
- 'time': "./td[2]//text()",
- 'channel': "./td[3]//text()",
- 'link': "./td[4]/a[1]/@href",
- 'title': "./td[4]//text()",
- 'season': "./td[5]//text()",
- },
- postprocess=lambda x: {
- 'date': x.get('date'),
- 'time': x.get('time'),
- 'channel': x.get('channel').strip(),
- 'link': x.get('link'),
- 'title': x.get('title'),
- 'season': (x.get('season') or '').strip()
- }
- ))
- ]
-
- def postprocess_data(self, data):
- if len(data) == 0:
- return {}
- seriesTitle = data['series title']
- seriesID = analyze_imdbid(data['series id'])
- if data.has_key('airing'):
- for airing in data['airing']:
- title = airing.get('title', '').strip()
- if not title:
- epsTitle = seriesTitle
- if seriesID is None:
- continue
- epsID = seriesID
- else:
- epsTitle = '%s {%s}' % (data['series title'],
- airing['title'])
- epsID = analyze_imdbid(airing['link'])
- e = Movie(title=epsTitle, movieID=epsID)
- airing['episode'] = e
- del airing['link']
- del airing['title']
- if not airing['season']:
- del airing['season']
- if 'series title' in data:
- del data['series title']
- if 'series id' in data:
- del data['series id']
- if 'airing' in data:
- data['airing'] = filter(None, data['airing'])
- if 'airing' not in data or not data['airing']:
- return {}
- return data
-
-
-class DOMHTMLSynopsisParser(DOMParserBase):
- """Parser for the "synopsis" page of a given movie.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- sparser = HTMLSynopsisParser()
- result = sparser.parse(synopsis_html_string)
- """
- extractors = [
- Extractor(label='synopsis',
- path="//div[@class='display'][not(@style)]",
- attrs=Attribute(key='synopsis',
- path=".//text()",
- postprocess=lambda x: '\n\n'.join(x.strip().split('||'))))
- ]
-
- preprocessors = [
- (re.compile('
', re.I), r'||')
- ]
-
-
-class DOMHTMLParentsGuideParser(DOMParserBase):
- """Parser for the "parents guide" page of a given movie.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- pgparser = HTMLParentsGuideParser()
- result = pgparser.parse(parentsguide_html_string)
- """
- extractors = [
- Extractor(label='parents guide',
- group="//div[@class='section']",
- group_key="./h3/a/span/text()",
- group_key_normalize=lambda x: x.lower(),
- path="../following-sibling::div[1]/p",
- attrs=Attribute(key=None,
- path=".//text()",
- postprocess=lambda x: [t.strip().replace('\n', ' ')
- for t in x.split('||') if t.strip()]))
- ]
-
- preprocessors = [
- (re.compile('
', re.I), r'||')
- ]
-
- def postprocess_data(self, data):
- data2 = {}
- for key in data:
- if data[key]:
- data2[key] = data[key]
- if not data2:
- return {}
- return {'parents guide': data2}
-
-
-_OBJECTS = {
- 'movie_parser': ((DOMHTMLMovieParser,), None),
- 'plot_parser': ((DOMHTMLPlotParser,), None),
- 'movie_awards_parser': ((DOMHTMLAwardsParser,), None),
- 'taglines_parser': ((DOMHTMLTaglinesParser,), None),
- 'keywords_parser': ((DOMHTMLKeywordsParser,), None),
- 'crazycredits_parser': ((DOMHTMLCrazyCreditsParser,), None),
- 'goofs_parser': ((DOMHTMLGoofsParser,), None),
- 'alternateversions_parser': ((DOMHTMLAlternateVersionsParser,), None),
- 'trivia_parser': ((DOMHTMLTriviaParser,), None),
- 'soundtrack_parser': ((DOMHTMLSoundtrackParser,), {'kind': 'soundtrack'}),
- 'quotes_parser': ((DOMHTMLQuotesParser,), None),
- 'releasedates_parser': ((DOMHTMLReleaseinfoParser,), None),
- 'ratings_parser': ((DOMHTMLRatingsParser,), None),
- 'officialsites_parser': ((DOMHTMLOfficialsitesParser,), None),
- 'externalrev_parser': ((DOMHTMLOfficialsitesParser,),
- {'kind': 'external reviews'}),
- 'newsgrouprev_parser': ((DOMHTMLOfficialsitesParser,),
- {'kind': 'newsgroup reviews'}),
- 'misclinks_parser': ((DOMHTMLOfficialsitesParser,),
- {'kind': 'misc links'}),
- 'soundclips_parser': ((DOMHTMLOfficialsitesParser,),
- {'kind': 'sound clips'}),
- 'videoclips_parser': ((DOMHTMLOfficialsitesParser,),
- {'kind': 'video clips'}),
- 'photosites_parser': ((DOMHTMLOfficialsitesParser,),
- {'kind': 'photo sites'}),
- 'connections_parser': ((DOMHTMLConnectionParser,), None),
- 'tech_parser': ((DOMHTMLTechParser,), None),
- 'business_parser': ((DOMHTMLTechParser,),
- {'kind': 'business', '_defGetRefs': 1}),
- 'literature_parser': ((DOMHTMLTechParser,), {'kind': 'literature'}),
- 'locations_parser': ((DOMHTMLLocationsParser,), None),
- 'dvd_parser': ((DOMHTMLDvdParser,), None),
- 'rec_parser': ((DOMHTMLRecParser,), None),
- 'news_parser': ((DOMHTMLNewsParser,), None),
- 'amazonrev_parser': ((DOMHTMLAmazonReviewsParser,), None),
- 'sales_parser': ((DOMHTMLSalesParser,), None),
- 'episodes_parser': ((DOMHTMLEpisodesParser,), None),
- 'episodes_cast_parser': ((DOMHTMLEpisodesCastParser,), None),
- 'eprating_parser': ((DOMHTMLEpisodesRatings,), None),
- 'movie_faqs_parser': ((DOMHTMLFaqsParser,), None),
- 'airing_parser': ((DOMHTMLAiringParser,), None),
- 'synopsis_parser': ((DOMHTMLSynopsisParser,), None),
- 'parentsguide_parser': ((DOMHTMLParentsGuideParser,), None)
-}
-
diff --git a/libs/imdb/parser/http/personParser.py b/libs/imdb/parser/http/personParser.py
deleted file mode 100644
index a5fbc081..00000000
--- a/libs/imdb/parser/http/personParser.py
+++ /dev/null
@@ -1,559 +0,0 @@
-"""
-parser.http.personParser module (imdb package).
-
-This module provides the classes (and the instances), used to parse
-the IMDb pages on the akas.imdb.com server about a person.
-E.g., for "Mel Gibson" the referred pages would be:
- categorized: http://akas.imdb.com/name/nm0000154/maindetails
- biography: http://akas.imdb.com/name/nm0000154/bio
- ...and so on...
-
-Copyright 2004-2010 Davide Alberani
- 2008 H. Turgut Uyar
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-import re
-from imdb.Movie import Movie
-from imdb.utils import analyze_name, canonicalName, normalizeName, \
- analyze_title, date_and_notes
-from utils import build_movie, DOMParserBase, Attribute, Extractor, \
- analyze_imdbid
-
-
-from movieParser import _manageRoles
-_reRoles = re.compile(r'(- .*? \.\.\.\. )(.*?)(
| )',
- re.I | re.M | re.S)
-
-def build_date(date):
- day = date.get('day')
- year = date.get('year')
- if day and year:
- return "%s %s" % (day, year)
- if day:
- return day
- if year:
- return year
- return ""
-
-class DOMHTMLMaindetailsParser(DOMParserBase):
- """Parser for the "categorized" (maindetails) page of a given person.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- cparser = DOMHTMLMaindetailsParser()
- result = cparser.parse(categorized_html_string)
- """
- _containsObjects = True
-
- _birth_attrs = [Attribute(key='birth date',
- path={
- 'day': ".//a[starts-with(@href, " \
- "'/date/')]/text()",
- 'year': ".//a[starts-with(@href, " \
- "'/search/name?birth_year=')]/text()"
- },
- postprocess=build_date),
- Attribute(key='birth place',
- path=".//a[starts-with(@href, " \
- "'/search/name?birth_place=')]/text()")]
- _death_attrs = [Attribute(key='death date',
- path={
- 'day': ".//a[starts-with(@href, " \
- "'/date/')]/text()",
- 'year': ".//a[starts-with(@href, " \
- "'/search/name?death_year=')]/text()"
- },
- postprocess=build_date),
- Attribute(key='death place',
- path=".//a[starts-with(@href, " \
- "'/search/name?death_place=')]/text()")]
- _film_attrs = [Attribute(key=None,
- multi=True,
- path={
- 'link': "./b/a[1]/@href",
- 'title': "./b/a[1]/text()",
- 'notes': "./b/following-sibling::text()",
- 'year': "./span[@class='year_column']/text()",
- 'status': "./a[@class='in_production']/text()",
- 'rolesNoChar': './/br/following-sibling::text()',
- 'chrRoles': "./a[@imdbpyname]/@imdbpyname",
- 'roleID': "./a[starts-with(@href, '/character/')]/@href"
- },
- postprocess=lambda x:
- build_movie(x.get('title') or u'',
- year=x.get('year'),
- movieID=analyze_imdbid(x.get('link') or u''),
- rolesNoChar=(x.get('rolesNoChar') or u'').strip(),
- chrRoles=(x.get('chrRoles') or u'').strip(),
- additionalNotes=x.get('notes'),
- roleID=(x.get('roleID') or u''),
- status=x.get('status') or None))]
-
- extractors = [
- Extractor(label='name',
- path="//h1[@class='header']",
- attrs=Attribute(key='name',
- path=".//text()",
- postprocess=lambda x: analyze_name(x,
- canonical=1))),
-
- Extractor(label='birth info',
- path="//div[h4='Born:']",
- attrs=_birth_attrs),
-
- Extractor(label='death info',
- path="//div[h4='Died:']",
- attrs=_death_attrs),
-
- Extractor(label='headshot',
- path="//td[@id='img_primary']/a",
- attrs=Attribute(key='headshot',
- path="./img/@src")),
-
- Extractor(label='akas',
- path="//div[h4='Alternate Names:']",
- attrs=Attribute(key='akas',
- path="./text()",
- postprocess=lambda x: x.strip().split(' '))),
-
- Extractor(label='filmography',
- group="//div[starts-with(@id, 'filmo-head-')]",
- group_key="./a[@name]/text()",
- group_key_normalize=lambda x: x.lower().replace(': ', ' '),
- path="./following-sibling::div[1]" \
- "/div[starts-with(@class, 'filmo-row')]",
- attrs=_film_attrs),
-
- Extractor(label='indevelopment',
- path="//div[starts-with(@class,'devitem')]",
- attrs=Attribute(key='in development',
- multi=True,
- path={
- 'link': './a/@href',
- 'title': './a/text()'
- },
- postprocess=lambda x:
- build_movie(x.get('title') or u'',
- movieID=analyze_imdbid(x.get('link') or u''),
- roleID=(x.get('roleID') or u'').split('/'),
- status=x.get('status') or None)))
- ]
-
- preprocessors = [(' ', ''),
- (' ', ' '),
- (re.compile(r'((.*?)'),
- r'\1 imdbpyname="\2@@">\2')]
-
- def postprocess_data(self, data):
- for what in 'birth date', 'death date':
- if what in data and not data[what]:
- del data[what]
- # XXX: the code below is for backwards compatibility
- # probably could be removed
- for key in data.keys():
- if key.startswith('actor '):
- if not data.has_key('actor'):
- data['actor'] = []
- data['actor'].extend(data[key])
- del data[key]
- if key.startswith('actress '):
- if not data.has_key('actress'):
- data['actress'] = []
- data['actress'].extend(data[key])
- del data[key]
- if key.startswith('self '):
- if not data.has_key('self'):
- data['self'] = []
- data['self'].extend(data[key])
- del data[key]
- if key == 'birth place':
- data['birth notes'] = data[key]
- del data[key]
- if key == 'death place':
- data['death notes'] = data[key]
- del data[key]
- return data
-
-
-class DOMHTMLBioParser(DOMParserBase):
- """Parser for the "biography" page of a given person.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- bioparser = DOMHTMLBioParser()
- result = bioparser.parse(biography_html_string)
- """
- _defGetRefs = True
-
- _birth_attrs = [Attribute(key='birth date',
- path={
- 'day': "./a[starts-with(@href, " \
- "'/date/')]/text()",
- 'year': "./a[starts-with(@href, " \
- "'/search/name?birth_year=')]/text()"
- },
- postprocess=build_date),
- Attribute(key='birth notes',
- path="./a[starts-with(@href, " \
- "'/search/name?birth_place=')]/text()")]
- _death_attrs = [Attribute(key='death date',
- path={
- 'day': "./a[starts-with(@href, " \
- "'/date/')]/text()",
- 'year': "./a[starts-with(@href, " \
- "'/search/name?death_date=')]/text()"
- },
- postprocess=build_date),
- Attribute(key='death notes',
- path="./text()",
- # TODO: check if this slicing is always correct
- postprocess=lambda x: u''.join(x).strip()[2:])]
- extractors = [
- Extractor(label='headshot',
- path="//a[@name='headshot']",
- attrs=Attribute(key='headshot',
- path="./img/@src")),
- Extractor(label='birth info',
- path="//div[h5='Date of Birth']",
- attrs=_birth_attrs),
- Extractor(label='death info',
- path="//div[h5='Date of Death']",
- attrs=_death_attrs),
- Extractor(label='nick names',
- path="//div[h5='Nickname']",
- attrs=Attribute(key='nick names',
- path="./text()",
- joiner='|',
- postprocess=lambda x: [n.strip().replace(' (',
- '::(', 1) for n in x.split('|')
- if n.strip()])),
- Extractor(label='birth name',
- path="//div[h5='Birth Name']",
- attrs=Attribute(key='birth name',
- path="./text()",
- postprocess=lambda x: canonicalName(x.strip()))),
- Extractor(label='height',
- path="//div[h5='Height']",
- attrs=Attribute(key='height',
- path="./text()",
- postprocess=lambda x: x.strip())),
- Extractor(label='mini biography',
- path="//div[h5='Mini Biography']",
- attrs=Attribute(key='mini biography',
- multi=True,
- path={
- 'bio': "./p//text()",
- 'by': "./b/following-sibling::a/text()"
- },
- postprocess=lambda x: "%s::%s" % \
- (x.get('bio').strip(),
- (x.get('by') or u'').strip() or u'Anonymous'))),
- Extractor(label='spouse',
- path="//div[h5='Spouse']/table/tr",
- attrs=Attribute(key='spouse',
- multi=True,
- path={
- 'name': "./td[1]//text()",
- 'info': "./td[2]//text()"
- },
- postprocess=lambda x: ("%s::%s" % \
- (x.get('name').strip(),
- (x.get('info') or u'').strip())).strip(':'))),
- Extractor(label='trade mark',
- path="//div[h5='Trade Mark']/p",
- attrs=Attribute(key='trade mark',
- multi=True,
- path=".//text()",
- postprocess=lambda x: x.strip())),
- Extractor(label='trivia',
- path="//div[h5='Trivia']/p",
- attrs=Attribute(key='trivia',
- multi=True,
- path=".//text()",
- postprocess=lambda x: x.strip())),
- Extractor(label='quotes',
- path="//div[h5='Personal Quotes']/p",
- attrs=Attribute(key='quotes',
- multi=True,
- path=".//text()",
- postprocess=lambda x: x.strip())),
- Extractor(label='salary',
- path="//div[h5='Salary']/table/tr",
- attrs=Attribute(key='salary history',
- multi=True,
- path={
- 'title': "./td[1]//text()",
- 'info': "./td[2]/text()",
- },
- postprocess=lambda x: "%s::%s" % \
- (x.get('title').strip(),
- x.get('info').strip()))),
- Extractor(label='where now',
- path="//div[h5='Where Are They Now']/p",
- attrs=Attribute(key='where now',
- multi=True,
- path=".//text()",
- postprocess=lambda x: x.strip())),
- ]
-
- preprocessors = [
- (re.compile('()', re.I), r'\1'),
- (re.compile('( \n\s+)', re.I + re.DOTALL), r'\1'),
- (re.compile('()'), r' \1'),
- (re.compile('\.
([^\s])', re.I), r'. \1')
- ]
-
- def postprocess_data(self, data):
- for what in 'birth date', 'death date':
- if what in data and not data[what]:
- del data[what]
- return data
-
-
-class DOMHTMLOtherWorksParser(DOMParserBase):
- """Parser for the "other works" and "agent" pages of a given person.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- owparser = DOMHTMLOtherWorksParser()
- result = owparser.parse(otherworks_html_string)
- """
- _defGetRefs = True
- kind = 'other works'
-
- # XXX: looks like the 'agent' page is no more public.
- extractors = [
- Extractor(label='other works',
- path="//h5[text()='Other works']/" \
- "following-sibling::div[1]",
- attrs=Attribute(key='self.kind',
- path=".//text()",
- postprocess=lambda x: x.strip().split('\n\n')))
- ]
-
- preprocessors = [
- (re.compile('([^<]+)', re.I),
- r'\1'),
- (re.compile('( \n\s+)', re.I), r'\1'),
- (re.compile('()'), r' \1'),
- (re.compile('
', re.I), r'\n\n')
- ]
-
-
-def _build_episode(link, title, minfo, role, roleA, roleAID):
- """Build an Movie object for a given episode of a series."""
- episode_id = analyze_imdbid(link)
- notes = u''
- minidx = minfo.find(' -')
- # Sometimes, for some unknown reason, the role is left in minfo.
- if minidx != -1:
- slfRole = minfo[minidx+3:].lstrip()
- minfo = minfo[:minidx].rstrip()
- if slfRole.endswith(')'):
- commidx = slfRole.rfind('(')
- if commidx != -1:
- notes = slfRole[commidx:]
- slfRole = slfRole[:commidx]
- if slfRole and role is None and roleA is None:
- role = slfRole
- eps_data = analyze_title(title)
- eps_data['kind'] = u'episode'
- # FIXME: it's wrong for multiple characters (very rare on tv series?).
- if role is None:
- role = roleA # At worse, it's None.
- if role is None:
- roleAID = None
- if roleAID is not None:
- roleAID = analyze_imdbid(roleAID)
- e = Movie(movieID=episode_id, data=eps_data, currentRole=role,
- roleID=roleAID, notes=notes)
- # XXX: are we missing some notes?
- # XXX: does it parse things as "Episode dated 12 May 2005 (12 May 2005)"?
- if minfo.startswith('('):
- pe = minfo.find(')')
- if pe != -1:
- date = minfo[1:pe]
- if date != '????':
- e['original air date'] = date
- if eps_data.get('year', '????') == '????':
- syear = date.split()[-1]
- if syear.isdigit():
- e['year'] = int(syear)
- return e
-
-
-class DOMHTMLSeriesParser(DOMParserBase):
- """Parser for the "by TV series" page of a given person.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- sparser = DOMHTMLSeriesParser()
- result = sparser.parse(filmoseries_html_string)
- """
- _containsObjects = True
-
- extractors = [
- Extractor(label='series',
- group="//div[@class='filmo']/span[1]",
- group_key="./a[1]",
- path="./following-sibling::ol[1]/li/a[1]",
- attrs=Attribute(key=None,
- multi=True,
- path={
- 'link': "./@href",
- 'title': "./text()",
- 'info': "./following-sibling::text()",
- 'role': "./following-sibling::i[1]/text()",
- 'roleA': "./following-sibling::a[1]/text()",
- 'roleAID': "./following-sibling::a[1]/@href"
- },
- postprocess=lambda x: _build_episode(x.get('link'),
- x.get('title'),
- (x.get('info') or u'').strip(),
- x.get('role'),
- x.get('roleA'),
- x.get('roleAID'))))
- ]
-
- def postprocess_data(self, data):
- if len(data) == 0:
- return {}
- nd = {}
- for key in data.keys():
- dom = self.get_dom(key)
- link = self.xpath(dom, "//a/@href")[0]
- title = self.xpath(dom, "//a/text()")[0][1:-1]
- series = Movie(movieID=analyze_imdbid(link),
- data=analyze_title(title),
- accessSystem=self._as, modFunct=self._modFunct)
- nd[series] = []
- for episode in data[key]:
- # XXX: should we create a copy of 'series', to avoid
- # circular references?
- episode['episode of'] = series
- nd[series].append(episode)
- return {'episodes': nd}
-
-
-class DOMHTMLPersonGenresParser(DOMParserBase):
- """Parser for the "by genre" and "by keywords" pages of a given person.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- gparser = DOMHTMLPersonGenresParser()
- result = gparser.parse(bygenre_html_string)
- """
- kind = 'genres'
- _containsObjects = True
-
- extractors = [
- Extractor(label='genres',
- group="//b/a[@name]/following-sibling::a[1]",
- group_key="./text()",
- group_key_normalize=lambda x: x.lower(),
- path="../../following-sibling::ol[1]/li//a[1]",
- attrs=Attribute(key=None,
- multi=True,
- path={
- 'link': "./@href",
- 'title': "./text()",
- 'info': "./following-sibling::text()"
- },
- postprocess=lambda x: \
- build_movie(x.get('title') + \
- x.get('info').split('[')[0],
- analyze_imdbid(x.get('link')))))
- ]
-
- def postprocess_data(self, data):
- if len(data) == 0:
- return {}
- return {self.kind: data}
-
-
-from movieParser import _parse_merchandising_link
-
-class DOMHTMLPersonSalesParser(DOMParserBase):
- """Parser for the "merchandising links" page of a given person.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- sparser = DOMHTMLPersonSalesParser()
- result = sparser.parse(sales_html_string)
- """
- extractors = [
- Extractor(label='merchandising links',
- group="//span[@class='merch_title']",
- group_key=".//text()",
- path="./following-sibling::table[1]/" \
- "/td[@class='w_rowtable_colshop']//tr[1]",
- attrs=Attribute(key=None,
- multi=True,
- path={
- 'link': "./td[2]/a[1]/@href",
- 'text': "./td[1]/img[1]/@alt",
- 'cover': "./ancestor::td[1]/../" \
- "td[1]/a[1]/img[1]/@src",
- },
- postprocess=_parse_merchandising_link)),
- ]
-
- preprocessors = [
- (re.compile('(', re.I), r'\1>')
- ]
-
- def postprocess_data(self, data):
- if len(data) == 0:
- return {}
- return {'merchandising links': data}
-
-
-from movieParser import DOMHTMLTechParser
-from movieParser import DOMHTMLOfficialsitesParser
-from movieParser import DOMHTMLAwardsParser
-from movieParser import DOMHTMLNewsParser
-
-
-_OBJECTS = {
- 'maindetails_parser': ((DOMHTMLMaindetailsParser,), None),
- 'bio_parser': ((DOMHTMLBioParser,), None),
- 'otherworks_parser': ((DOMHTMLOtherWorksParser,), None),
- #'agent_parser': ((DOMHTMLOtherWorksParser,), {'kind': 'agent'}),
- 'person_officialsites_parser': ((DOMHTMLOfficialsitesParser,), None),
- 'person_awards_parser': ((DOMHTMLAwardsParser,), {'subject': 'name'}),
- 'publicity_parser': ((DOMHTMLTechParser,), {'kind': 'publicity'}),
- 'person_series_parser': ((DOMHTMLSeriesParser,), None),
- 'person_contacts_parser': ((DOMHTMLTechParser,), {'kind': 'contacts'}),
- 'person_genres_parser': ((DOMHTMLPersonGenresParser,), None),
- 'person_keywords_parser': ((DOMHTMLPersonGenresParser,),
- {'kind': 'keywords'}),
- 'news_parser': ((DOMHTMLNewsParser,), None),
- 'sales_parser': ((DOMHTMLPersonSalesParser,), None)
-}
-
diff --git a/libs/imdb/parser/http/searchCharacterParser.py b/libs/imdb/parser/http/searchCharacterParser.py
deleted file mode 100644
index c81ca7e4..00000000
--- a/libs/imdb/parser/http/searchCharacterParser.py
+++ /dev/null
@@ -1,69 +0,0 @@
-"""
-parser.http.searchCharacterParser module (imdb package).
-
-This module provides the HTMLSearchCharacterParser class (and the
-search_character_parser instance), used to parse the results of a search
-for a given character.
-E.g., when searching for the name "Jesse James", the parsed page would be:
- http://akas.imdb.com/find?s=Characters;mx=20;q=Jesse+James
-
-Copyright 2007-2009 Davide Alberani
- 2008 H. Turgut Uyar
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-from imdb.utils import analyze_name, build_name
-from utils import Extractor, Attribute, analyze_imdbid
-
-from searchMovieParser import DOMHTMLSearchMovieParser, DOMBasicMovieParser
-
-
-class DOMBasicCharacterParser(DOMBasicMovieParser):
- """Simply get the name of a character and the imdbID.
-
- It's used by the DOMHTMLSearchCharacterParser class to return a result
- for a direct match (when a search on IMDb results in a single
- character, the web server sends directly the movie page."""
- _titleFunct = lambda self, x: analyze_name(x or u'', canonical=False)
-
-
-class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
- _BaseParser = DOMBasicCharacterParser
- _notDirectHitTitle = 'imdb search'
- _titleBuilder = lambda self, x: build_name(x, canonical=False)
- _linkPrefix = '/character/ch'
-
- _attrs = [Attribute(key='data',
- multi=True,
- path={
- 'link': "./a[1]/@href",
- 'name': "./a[1]/text()"
- },
- postprocess=lambda x: (
- analyze_imdbid(x.get('link') or u''),
- {'name': x.get('name')}
- ))]
- extractors = [Extractor(label='search',
- path="//td[3]/a[starts-with(@href, " \
- "'/character/ch')]/..",
- attrs=_attrs)]
-
-
-_OBJECTS = {
- 'search_character_parser': ((DOMHTMLSearchCharacterParser,),
- {'kind': 'character', '_basic_parser': DOMBasicCharacterParser})
-}
-
diff --git a/libs/imdb/parser/http/searchCompanyParser.py b/libs/imdb/parser/http/searchCompanyParser.py
deleted file mode 100644
index ab666fbc..00000000
--- a/libs/imdb/parser/http/searchCompanyParser.py
+++ /dev/null
@@ -1,71 +0,0 @@
-"""
-parser.http.searchCompanyParser module (imdb package).
-
-This module provides the HTMLSearchCompanyParser class (and the
-search_company_parser instance), used to parse the results of a search
-for a given company.
-E.g., when searching for the name "Columbia Pictures", the parsed page would be:
- http://akas.imdb.com/find?s=co;mx=20;q=Columbia+Pictures
-
-Copyright 2008-2009 Davide Alberani
- 2008 H. Turgut Uyar
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-from imdb.utils import analyze_company_name, build_company_name
-from utils import Extractor, Attribute, analyze_imdbid
-
-from searchMovieParser import DOMHTMLSearchMovieParser, DOMBasicMovieParser
-
-class DOMBasicCompanyParser(DOMBasicMovieParser):
- """Simply get the name of a company and the imdbID.
-
- It's used by the DOMHTMLSearchCompanyParser class to return a result
- for a direct match (when a search on IMDb results in a single
- company, the web server sends directly the company page.
- """
- _titleFunct = lambda self, x: analyze_company_name(x or u'')
-
-
-class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
- _BaseParser = DOMBasicCompanyParser
- _notDirectHitTitle = 'imdb company'
- _titleBuilder = lambda self, x: build_company_name(x)
- _linkPrefix = '/company/co'
-
- _attrs = [Attribute(key='data',
- multi=True,
- path={
- 'link': "./a[1]/@href",
- 'name': "./a[1]/text()",
- 'notes': "./text()[1]"
- },
- postprocess=lambda x: (
- analyze_imdbid(x.get('link')),
- analyze_company_name(x.get('name')+(x.get('notes')
- or u''), stripNotes=True)
- ))]
- extractors = [Extractor(label='search',
- path="//td[3]/a[starts-with(@href, " \
- "'/company/co')]/..",
- attrs=_attrs)]
-
-
-_OBJECTS = {
- 'search_company_parser': ((DOMHTMLSearchCompanyParser,),
- {'kind': 'company', '_basic_parser': DOMBasicCompanyParser})
-}
-
diff --git a/libs/imdb/parser/http/searchKeywordParser.py b/libs/imdb/parser/http/searchKeywordParser.py
deleted file mode 100644
index ed72906c..00000000
--- a/libs/imdb/parser/http/searchKeywordParser.py
+++ /dev/null
@@ -1,111 +0,0 @@
-"""
-parser.http.searchKeywordParser module (imdb package).
-
-This module provides the HTMLSearchKeywordParser class (and the
-search_company_parser instance), used to parse the results of a search
-for a given keyword.
-E.g., when searching for the keyword "alabama", the parsed page would be:
- http://akas.imdb.com/find?s=kw;mx=20;q=alabama
-
-Copyright 2009 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-from utils import Extractor, Attribute, analyze_imdbid
-from imdb.utils import analyze_title, analyze_company_name
-
-from searchMovieParser import DOMHTMLSearchMovieParser, DOMBasicMovieParser
-
-class DOMBasicKeywordParser(DOMBasicMovieParser):
- """Simply get the name of a keyword.
-
- It's used by the DOMHTMLSearchKeywordParser class to return a result
- for a direct match (when a search on IMDb results in a single
- keyword, the web server sends directly the keyword page.
- """
- # XXX: it's still to be tested!
- # I'm not even sure there can be a direct hit, searching for keywords.
- _titleFunct = lambda self, x: analyze_company_name(x or u'')
-
-
-class DOMHTMLSearchKeywordParser(DOMHTMLSearchMovieParser):
- """Parse the html page that the IMDb web server shows when the
- "new search system" is used, searching for keywords similar to
- the one given."""
-
- _BaseParser = DOMBasicKeywordParser
- _notDirectHitTitle = 'imdb keyword'
- _titleBuilder = lambda self, x: x
- _linkPrefix = '/keyword/'
-
- _attrs = [Attribute(key='data',
- multi=True,
- path="./a[1]/text()"
- )]
- extractors = [Extractor(label='search',
- path="//td[3]/a[starts-with(@href, " \
- "'/keyword/')]/..",
- attrs=_attrs)]
-
-
-def custom_analyze_title4kwd(title, yearNote, outline):
- """Return a dictionary with the needed info."""
- title = title.strip()
- if not title:
- return {}
- if yearNote:
- yearNote = '%s)' % yearNote.split(' ')[0]
- title = title + ' ' + yearNote
- retDict = analyze_title(title)
- if outline:
- retDict['plot outline'] = outline
- return retDict
-
-
-class DOMHTMLSearchMovieKeywordParser(DOMHTMLSearchMovieParser):
- """Parse the html page that the IMDb web server shows when the
- "new search system" is used, searching for movies with the given
- keyword."""
-
- _notDirectHitTitle = 'best'
-
- _attrs = [Attribute(key='data',
- multi=True,
- path={
- 'link': "./a[1]/@href",
- 'info': "./a[1]//text()",
- 'ynote': "./span[@class='desc']/text()",
- 'outline': "./span[@class='outline']//text()"
- },
- postprocess=lambda x: (
- analyze_imdbid(x.get('link') or u''),
- custom_analyze_title4kwd(x.get('info') or u'',
- x.get('ynote') or u'',
- x.get('outline') or u'')
- ))]
-
- extractors = [Extractor(label='search',
- path="//td[3]/a[starts-with(@href, " \
- "'/title/tt')]/..",
- attrs=_attrs)]
-
-
-_OBJECTS = {
- 'search_keyword_parser': ((DOMHTMLSearchKeywordParser,),
- {'kind': 'keyword', '_basic_parser': DOMBasicKeywordParser}),
- 'search_moviekeyword_parser': ((DOMHTMLSearchMovieKeywordParser,), None)
-}
-
diff --git a/libs/imdb/parser/http/searchMovieParser.py b/libs/imdb/parser/http/searchMovieParser.py
deleted file mode 100644
index 2e7ace9a..00000000
--- a/libs/imdb/parser/http/searchMovieParser.py
+++ /dev/null
@@ -1,178 +0,0 @@
-"""
-parser.http.searchMovieParser module (imdb package).
-
-This module provides the HTMLSearchMovieParser class (and the
-search_movie_parser instance), used to parse the results of a search
-for a given title.
-E.g., for when searching for the title "the passion", the parsed
-page would be:
- http://akas.imdb.com/find?q=the+passion&tt=on&mx=20
-
-Copyright 2004-2010 Davide Alberani
- 2008 H. Turgut Uyar
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-import re
-from imdb.utils import analyze_title, build_title
-from utils import DOMParserBase, Attribute, Extractor, analyze_imdbid
-
-
-class DOMBasicMovieParser(DOMParserBase):
- """Simply get the title of a movie and the imdbID.
-
- It's used by the DOMHTMLSearchMovieParser class to return a result
- for a direct match (when a search on IMDb results in a single
- movie, the web server sends directly the movie page."""
- # Stay generic enough to be used also for other DOMBasic*Parser classes.
- _titleAttrPath = ".//text()"
- _linkPath = "//link[@rel='canonical']"
- _titleFunct = lambda self, x: analyze_title(x or u'')
-
- def _init(self):
- self.preprocessors += [('',
- '')]
- self.extractors = [Extractor(label='title',
- path="//h1",
- attrs=Attribute(key='title',
- path=self._titleAttrPath,
- postprocess=self._titleFunct)),
- Extractor(label='link',
- path=self._linkPath,
- attrs=Attribute(key='link', path="./@href",
- postprocess=lambda x: \
- analyze_imdbid((x or u'').replace(
- 'http://pro.imdb.com', ''))
- ))]
-
- # Remove 'More at IMDb Pro' links.
- preprocessors = [(re.compile(r''), ''),
- (re.compile(r'< a href="')]
-
- def postprocess_data(self, data):
- if not 'link' in data:
- data = []
- else:
- link = data.pop('link')
- if (link and data):
- data = [(link, data)]
- else:
- data = []
- return data
-
-
-def custom_analyze_title(title):
- """Remove garbage notes after the (year), (year/imdbIndex) or (year) (TV)"""
- # XXX: very crappy. :-(
- nt = title.split(' ')[0]
- if nt:
- title = nt
- if not title:
- return {}
- return analyze_title(title)
-
-# Manage AKAs.
-_reAKAStitles = re.compile(r'(?:aka) "(.*?)( |<\/td>)', re.I | re.M)
-
-class DOMHTMLSearchMovieParser(DOMParserBase):
- """Parse the html page that the IMDb web server shows when the
- "new search system" is used, for movies."""
-
- _BaseParser = DOMBasicMovieParser
- _notDirectHitTitle = 'imdb title'
- _titleBuilder = lambda self, x: build_title(x)
- _linkPrefix = '/title/tt'
-
- _attrs = [Attribute(key='data',
- multi=True,
- path={
- 'link': "./a[1]/@href",
- 'info': ".//text()",
- #'akas': ".//div[@class='_imdbpyAKA']//text()"
- 'akas': ".//p[@class='find-aka']//text()"
- },
- postprocess=lambda x: (
- analyze_imdbid(x.get('link') or u''),
- custom_analyze_title(x.get('info') or u''),
- x.get('akas')
- ))]
- extractors = [Extractor(label='search',
- path="//td[3]/a[starts-with(@href, '/title/tt')]/..",
- attrs=_attrs)]
- def _init(self):
- self.url = u''
-
- def _reset(self):
- self.url = u''
-
- def preprocess_string(self, html_string):
- if self._notDirectHitTitle in html_string[:1024].lower():
- if self._linkPrefix == '/title/tt':
- # Only for movies.
- html_string = html_string.replace('(TV mini-series)', '(mini)')
- html_string = html_string.replace('',
- ' ::')
- #html_string = _reAKAStitles.sub(
- # r' \1:: \2', html_string)
- return html_string
- # Direct hit!
- dbme = self._BaseParser(useModule=self._useModule)
- res = dbme.parse(html_string, url=self.url)
- if not res: return u''
- res = res['data']
- if not (res and res[0]): return u''
- link = '%s%s' % (self._linkPrefix, res[0][0])
- # # Tries to cope with companies for which links to pro.imdb.com
- # # are missing.
- # link = self.url.replace(imdbURL_base[:-1], '')
- title = self._titleBuilder(res[0][1])
- if not (link and title): return u''
- link = link.replace('http://pro.imdb.com', '')
- new_html = ' | | %s | ' % (link,
- title)
- return new_html
-
- def postprocess_data(self, data):
- if not data.has_key('data'):
- data['data'] = []
- results = getattr(self, 'results', None)
- if results is not None:
- data['data'][:] = data['data'][:results]
- # Horrible hack to support AKAs.
- if data and data['data'] and len(data['data'][0]) == 3 and \
- isinstance(data['data'][0], tuple):
- for idx, datum in enumerate(data['data']):
- if not isinstance(datum, tuple):
- continue
- if datum[2] is not None:
- akas = filter(None, datum[2].split('::'))
- if self._linkPrefix == '/title/tt':
- akas = [a.replace('" - ', '::').rstrip() for a in akas]
- akas = [a.replace('aka "', '', 1).lstrip() for a in akas]
- datum[1]['akas'] = akas
- data['data'][idx] = (datum[0], datum[1])
- else:
- data['data'][idx] = (datum[0], datum[1])
- return data
-
- def add_refs(self, data):
- return data
-
-
-_OBJECTS = {
- 'search_movie_parser': ((DOMHTMLSearchMovieParser,), None)
-}
-
diff --git a/libs/imdb/parser/http/searchPersonParser.py b/libs/imdb/parser/http/searchPersonParser.py
deleted file mode 100644
index 1756efc5..00000000
--- a/libs/imdb/parser/http/searchPersonParser.py
+++ /dev/null
@@ -1,92 +0,0 @@
-"""
-parser.http.searchPersonParser module (imdb package).
-
-This module provides the HTMLSearchPersonParser class (and the
-search_person_parser instance), used to parse the results of a search
-for a given person.
-E.g., when searching for the name "Mel Gibson", the parsed page would be:
- http://akas.imdb.com/find?q=Mel+Gibson&nm=on&mx=20
-
-Copyright 2004-2010 Davide Alberani
- 2008 H. Turgut Uyar
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-import re
-from imdb.utils import analyze_name, build_name
-from utils import Extractor, Attribute, analyze_imdbid
-
-from searchMovieParser import DOMHTMLSearchMovieParser, DOMBasicMovieParser
-
-
-def _cleanName(n):
- """Clean the name in a title tag."""
- if not n:
- return u''
- n = n.replace('Filmography by type for', '') # FIXME: temporary.
- return n
-
-class DOMBasicPersonParser(DOMBasicMovieParser):
- """Simply get the name of a person and the imdbID.
-
- It's used by the DOMHTMLSearchPersonParser class to return a result
- for a direct match (when a search on IMDb results in a single
- person, the web server sends directly the movie page."""
- _titleFunct = lambda self, x: analyze_name(_cleanName(x), canonical=1)
-
-
-_reAKASp = re.compile(r'(?:aka|birth name) (")(.*?)"( |<\/em>|<\/td>)',
- re.I | re.M)
-
-class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser):
- """Parse the html page that the IMDb web server shows when the
- "new search system" is used, for persons."""
- _BaseParser = DOMBasicPersonParser
- _notDirectHitTitle = 'imdb name'
- _titleBuilder = lambda self, x: build_name(x, canonical=True)
- _linkPrefix = '/name/nm'
-
- _attrs = [Attribute(key='data',
- multi=True,
- path={
- 'link': "./a[1]/@href",
- 'name': "./a[1]/text()",
- 'index': "./text()[1]",
- 'akas': ".//div[@class='_imdbpyAKA']/text()"
- },
- postprocess=lambda x: (
- analyze_imdbid(x.get('link') or u''),
- analyze_name((x.get('name') or u'') + \
- (x.get('index') or u''),
- canonical=1), x.get('akas')
- ))]
- extractors = [Extractor(label='search',
- path="//td[3]/a[starts-with(@href, '/name/nm')]/..",
- attrs=_attrs)]
-
- def preprocess_string(self, html_string):
- if self._notDirectHitTitle in html_string[:1024].lower():
- html_string = _reAKASp.sub(
- r'\1\2:: \3',
- html_string)
- return DOMHTMLSearchMovieParser.preprocess_string(self, html_string)
-
-
-_OBJECTS = {
- 'search_person_parser': ((DOMHTMLSearchPersonParser,),
- {'kind': 'person', '_basic_parser': DOMBasicPersonParser})
-}
-
diff --git a/libs/imdb/parser/http/topBottomParser.py b/libs/imdb/parser/http/topBottomParser.py
deleted file mode 100644
index f0f29509..00000000
--- a/libs/imdb/parser/http/topBottomParser.py
+++ /dev/null
@@ -1,106 +0,0 @@
-"""
-parser.http.topBottomParser module (imdb package).
-
-This module provides the classes (and the instances), used to parse the
-lists of top 250 and bottom 100 movies.
-E.g.:
- http://akas.imdb.com/chart/top
- http://akas.imdb.com/chart/bottom
-
-Copyright 2009 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-from imdb.utils import analyze_title
-from utils import DOMParserBase, Attribute, Extractor, analyze_imdbid
-
-
-class DOMHTMLTop250Parser(DOMParserBase):
- """Parser for the "top 250" page.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- tparser = DOMHTMLTop250Parser()
- result = tparser.parse(top250_html_string)
- """
- label = 'top 250'
- ranktext = 'top 250 rank'
-
- def _init(self):
- self.extractors = [Extractor(label=self.label,
- path="//div[@id='main']//table//tr",
- attrs=Attribute(key=None,
- multi=True,
- path={self.ranktext: "./td[1]//text()",
- 'rating': "./td[2]//text()",
- 'title': "./td[3]//text()",
- 'movieID': "./td[3]//a/@href",
- 'votes': "./td[4]//text()"
- }))]
-
- def postprocess_data(self, data):
- if not data or self.label not in data:
- return []
- mlist = []
- data = data[self.label]
- # Avoid duplicates. A real fix, using XPath, is auspicabile.
- # XXX: probably this is no more needed.
- seenIDs = []
- for d in data:
- if 'movieID' not in d: continue
- if self.ranktext not in d: continue
- if 'title' not in d: continue
- theID = analyze_imdbid(d['movieID'])
- if theID is None:
- continue
- theID = str(theID)
- if theID in seenIDs:
- continue
- seenIDs.append(theID)
- minfo = analyze_title(d['title'])
- try: minfo[self.ranktext] = int(d[self.ranktext].replace('.', ''))
- except: pass
- if 'votes' in d:
- try: minfo['votes'] = int(d['votes'].replace(',', ''))
- except: pass
- if 'rating' in d:
- try: minfo['rating'] = float(d['rating'])
- except: pass
- mlist.append((theID, minfo))
- return mlist
-
-
-class DOMHTMLBottom100Parser(DOMHTMLTop250Parser):
- """Parser for the "bottom 100" page.
- The page should be provided as a string, as taken from
- the akas.imdb.com server. The final result will be a
- dictionary, with a key for every relevant section.
-
- Example:
- tparser = DOMHTMLBottom100Parser()
- result = tparser.parse(bottom100_html_string)
- """
- label = 'bottom 100'
- ranktext = 'bottom 100 rank'
-
-
-_OBJECTS = {
- 'top250_parser': ((DOMHTMLTop250Parser,), None),
- 'bottom100_parser': ((DOMHTMLBottom100Parser,), None)
-}
-
diff --git a/libs/imdb/parser/http/utils.py b/libs/imdb/parser/http/utils.py
deleted file mode 100644
index eeca3b07..00000000
--- a/libs/imdb/parser/http/utils.py
+++ /dev/null
@@ -1,855 +0,0 @@
-"""
-parser.http.utils module (imdb package).
-
-This module provides miscellaneous utilities used by
-the imdb.parser.http classes.
-
-Copyright 2004-2010 Davide Alberani
- 2008 H. Turgut Uyar
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-import re
-import logging
-
-from imdb._exceptions import IMDbError
-
-from imdb.utils import flatten, _Container
-from imdb.Movie import Movie
-from imdb.Person import Person
-from imdb.Character import Character
-
-
-# Year, imdbIndex and kind.
-re_yearKind_index = re.compile(r'(\([0-9\?]{4}(?:/[IVXLCDM]+)?\)(?: \(mini\)| \(TV\)| \(V\)| \(VG\))?)')
-
-# Match imdb ids in href tags
-re_imdbid = re.compile(r'(title/tt|name/nm|character/ch|company/co)([0-9]+)')
-
-def analyze_imdbid(href):
- """Return an imdbID from an URL."""
- if not href:
- return None
- match = re_imdbid.search(href)
- if not match:
- return None
- return str(match.group(2))
-
-
-_modify_keys = list(Movie.keys_tomodify_list) + list(Person.keys_tomodify_list)
-def _putRefs(d, re_titles, re_names, re_characters, lastKey=None):
- """Iterate over the strings inside list items or dictionary values,
- substitutes movie titles and person names with the (qv) references."""
- if isinstance(d, list):
- for i in xrange(len(d)):
- if isinstance(d[i], (unicode, str)):
- if lastKey in _modify_keys:
- if re_names:
- d[i] = re_names.sub(ur"'\1' (qv)", d[i])
- if re_titles:
- d[i] = re_titles.sub(ur'_\1_ (qv)', d[i])
- if re_characters:
- d[i] = re_characters.sub(ur'#\1# (qv)', d[i])
- elif isinstance(d[i], (list, dict)):
- _putRefs(d[i], re_titles, re_names, re_characters,
- lastKey=lastKey)
- elif isinstance(d, dict):
- for k, v in d.items():
- lastKey = k
- if isinstance(v, (unicode, str)):
- if lastKey in _modify_keys:
- if re_names:
- d[k] = re_names.sub(ur"'\1' (qv)", v)
- if re_titles:
- d[k] = re_titles.sub(ur'_\1_ (qv)', v)
- if re_characters:
- d[k] = re_characters.sub(ur'#\1# (qv)', v)
- elif isinstance(v, (list, dict)):
- _putRefs(d[k], re_titles, re_names, re_characters,
- lastKey=lastKey)
-
-
-# Handle HTML/XML/SGML entities.
-from htmlentitydefs import entitydefs
-entitydefs = entitydefs.copy()
-entitydefsget = entitydefs.get
-entitydefs['nbsp'] = ' '
-
-sgmlentity = {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
-sgmlentityget = sgmlentity.get
-_sgmlentkeys = sgmlentity.keys()
-
-entcharrefs = {}
-entcharrefsget = entcharrefs.get
-for _k, _v in entitydefs.items():
- if _k in _sgmlentkeys: continue
- if _v[0:2] == '':
- dec_code = _v[1:-1]
- _v = unichr(int(_v[2:-1]))
- entcharrefs[dec_code] = _v
- else:
- dec_code = '#' + str(ord(_v))
- _v = unicode(_v, 'latin_1', 'replace')
- entcharrefs[dec_code] = _v
- entcharrefs[_k] = _v
-del _sgmlentkeys, _k, _v
-entcharrefs['#160'] = u' '
-entcharrefs['#xA0'] = u' '
-entcharrefs['#xa0'] = u' '
-entcharrefs['#XA0'] = u' '
-entcharrefs['#x22'] = u'"'
-entcharrefs['#X22'] = u'"'
-# convert &x26; to &, to make BeautifulSoup happy; beware that this
-# leaves lone '&' in the html broken, but I assume this is better than
-# the contrary...
-entcharrefs['#38'] = u'&'
-entcharrefs['#x26'] = u'&'
-entcharrefs['#x26'] = u'&'
-
-re_entcharrefs = re.compile('&(%s|\#160|\#\d{1,5}|\#x[0-9a-f]{1,4});' %
- '|'.join(map(re.escape, entcharrefs)), re.I)
-re_entcharrefssub = re_entcharrefs.sub
-
-sgmlentity.update(dict([('#34', u'"'), ('#38', u'&'),
- ('#60', u'<'), ('#62', u'>'), ('#39', u"'")]))
-re_sgmlref = re.compile('&(%s);' % '|'.join(map(re.escape, sgmlentity)))
-re_sgmlrefsub = re_sgmlref.sub
-
-# Matches XML-only single tags, like ; they are invalid in HTML,
-# but widely used by IMDb web site. :-/
-re_xmltags = re.compile('<([a-zA-Z]+)/>')
-
-
-def _replXMLRef(match):
- """Replace the matched XML/HTML entities and references;
- replace everything except sgml entities like <, >, ..."""
- ref = match.group(1)
- value = entcharrefsget(ref)
- if value is None:
- if ref[0] == '#':
- ref_code = ref[1:]
- if ref_code in ('34', '38', '60', '62', '39'):
- return match.group(0)
- elif ref_code[0].lower() == 'x':
- #if ref[2:] == '26':
- # # Don't convert &x26; to &, to make BeautifulSoup happy.
- # return '&'
- return unichr(int(ref[2:], 16))
- else:
- return unichr(int(ref[1:]))
- else:
- return ref
- return value
-
-def subXMLRefs(s):
- """Return the given html string with entity and char references
- replaced."""
- return re_entcharrefssub(_replXMLRef, s)
-
-# XXX: no more used here; move it to mobile (they are imported by helpers, too)?
-def _replSGMLRefs(match):
- """Replace the matched SGML entity."""
- ref = match.group(1)
- return sgmlentityget(ref, ref)
-
-def subSGMLRefs(s):
- """Return the given html string with sgml entity and char references
- replaced."""
- return re_sgmlrefsub(_replSGMLRefs, s)
-
-
-_b_p_logger = logging.getLogger('imdbpy.parser.http.build_person')
-def build_person(txt, personID=None, billingPos=None,
- roleID=None, accessSystem='http', modFunct=None):
- """Return a Person instance from the tipical ... strings
- found in the IMDb's web site."""
- #if personID is None
- # _b_p_logger.debug('empty name or personID for "%s"', txt)
- notes = u''
- role = u''
- # Search the (optional) separator between name and role/notes.
- if txt.find('....') != -1:
- sep = '....'
- elif txt.find('...') != -1:
- sep = '...'
- else:
- sep = '...'
- # Replace the first parenthesis, assuming there are only
- # notes, after.
- # Rationale: no imdbIndex is (ever?) showed on the web site.
- txt = txt.replace('(', '...(', 1)
- txt_split = txt.split(sep, 1)
- name = txt_split[0].strip()
- if len(txt_split) == 2:
- role_comment = txt_split[1].strip()
- # Strip common endings.
- if role_comment[-4:] == ' and':
- role_comment = role_comment[:-4].rstrip()
- elif role_comment[-2:] == ' &':
- role_comment = role_comment[:-2].rstrip()
- elif role_comment[-6:] == '& ....':
- role_comment = role_comment[:-6].rstrip()
- # Get the notes.
- if roleID is not None:
- if not isinstance(roleID, list):
- cmt_idx = role_comment.find('(')
- if cmt_idx != -1:
- role = role_comment[:cmt_idx].rstrip()
- notes = role_comment[cmt_idx:]
- else:
- # Just a role, without notes.
- role = role_comment
- else:
- role = role_comment
- else:
- # We're managing something that doesn't have a 'role', so
- # everything are notes.
- notes = role_comment
- if role == '....': role = u''
- roleNotes = []
- # Manages multiple roleIDs.
- if isinstance(roleID, list):
- rolesplit = role.split('/')
- role = []
- for r in rolesplit:
- nidx = r.find('(')
- if nidx != -1:
- role.append(r[:nidx].rstrip())
- roleNotes.append(r[nidx:])
- else:
- role.append(r)
- roleNotes.append(None)
- lr = len(role)
- lrid = len(roleID)
- if lr > lrid:
- roleID += [None] * (lrid - lr)
- elif lr < lrid:
- roleID = roleID[:lr]
- for i, rid in enumerate(roleID):
- if rid is not None:
- roleID[i] = str(rid)
- if lr == 1:
- role = role[0]
- roleID = roleID[0]
- elif roleID is not None:
- roleID = str(roleID)
- if personID is not None:
- personID = str(personID)
- if (not name) or (personID is None):
- # Set to 'debug', since build_person is expected to receive some crap.
- _b_p_logger.debug('empty name or personID for "%s"', txt)
- # XXX: return None if something strange is detected?
- person = Person(name=name, personID=personID, currentRole=role,
- roleID=roleID, notes=notes, billingPos=billingPos,
- modFunct=modFunct, accessSystem=accessSystem)
- if roleNotes and len(roleNotes) == len(roleID):
- for idx, role in enumerate(person.currentRole):
- if roleNotes[idx]:
- role.notes = roleNotes[idx]
- return person
-
-
-_re_chrIDs = re.compile('[0-9]{7}')
-
-_b_m_logger = logging.getLogger('imdbpy.parser.http.build_movie')
-# To shrink spaces.
-re_spaces = re.compile(r'\s+')
-def build_movie(txt, movieID=None, roleID=None, status=None,
- accessSystem='http', modFunct=None, _parsingCharacter=False,
- _parsingCompany=False, year=None, chrRoles=None,
- rolesNoChar=None, additionalNotes=None):
- """Given a string as normally seen on the "categorized" page of
- a person on the IMDb's web site, returns a Movie instance."""
- # FIXME: Oook, lets face it: build_movie and build_person are now
- # two horrible sets of patches to support the new IMDb design. They
- # must be rewritten from scratch.
- if _parsingCharacter:
- _defSep = ' Played by '
- elif _parsingCompany:
- _defSep = ' ... '
- else:
- _defSep = ' .... '
- title = re_spaces.sub(' ', txt).strip()
- # Split the role/notes from the movie title.
- tsplit = title.split(_defSep, 1)
- role = u''
- notes = u''
- roleNotes = []
- if len(tsplit) == 2:
- title = tsplit[0].rstrip()
- role = tsplit[1].lstrip()
- if title[-9:] == 'TV Series':
- title = title[:-9].rstrip()
- elif title[-14:] == 'TV mini-series':
- title = title[:-14] + ' (mini)'
- # Try to understand where the movie title ends.
- while True:
- if year:
- break
- if title[-1:] != ')':
- # Ignore the silly "TV Series" notice.
- if title[-9:] == 'TV Series':
- title = title[:-9].rstrip()
- continue
- else:
- # Just a title: stop here.
- break
- # Try to match paired parentheses; yes: sometimes there are
- # parentheses inside comments...
- nidx = title.rfind('(')
- while (nidx != -1 and \
- title[nidx:].count('(') != title[nidx:].count(')')):
- nidx = title[:nidx].rfind('(')
- # Unbalanced parentheses: stop here.
- if nidx == -1: break
- # The last item in parentheses seems to be a year: stop here.
- first4 = title[nidx+1:nidx+5]
- if (first4.isdigit() or first4 == '????') and \
- title[nidx+5:nidx+6] in (')', '/'): break
- # The last item in parentheses is a known kind: stop here.
- if title[nidx+1:-1] in ('TV', 'V', 'mini', 'VG'): break
- # Else, in parentheses there are some notes.
- # XXX: should the notes in the role half be kept separated
- # from the notes in the movie title half?
- if notes: notes = '%s %s' % (title[nidx:], notes)
- else: notes = title[nidx:]
- title = title[:nidx].rstrip()
- if year:
- year = year.strip()
- if title[-1] == ')':
- fpIdx = title.rfind('(')
- if fpIdx != -1:
- if notes: notes = '%s %s' % (title[fpIdx:], notes)
- else: notes = title[fpIdx:]
- title = title[:fpIdx].rstrip()
- title = u'%s (%s)' % (title, year)
- if _parsingCharacter and roleID and not role:
- roleID = None
- if not roleID:
- roleID = None
- elif len(roleID) == 1:
- roleID = roleID[0]
- if not role and chrRoles and isinstance(roleID, (str, unicode)):
- roleID = _re_chrIDs.findall(roleID)
- role = ' / '.join(filter(None, chrRoles.split('@@')))
- # Manages multiple roleIDs.
- if isinstance(roleID, list):
- tmprole = role.split('/')
- role = []
- for r in tmprole:
- nidx = r.find('(')
- if nidx != -1:
- role.append(r[:nidx].rstrip())
- roleNotes.append(r[nidx:])
- else:
- role.append(r)
- roleNotes.append(None)
- lr = len(role)
- lrid = len(roleID)
- if lr > lrid:
- roleID += [None] * (lrid - lr)
- elif lr < lrid:
- roleID = roleID[:lr]
- for i, rid in enumerate(roleID):
- if rid is not None:
- roleID[i] = str(rid)
- if lr == 1:
- role = role[0]
- roleID = roleID[0]
- elif roleID is not None:
- roleID = str(roleID)
- if movieID is not None:
- movieID = str(movieID)
- if (not title) or (movieID is None):
- _b_m_logger.error('empty title or movieID for "%s"', txt)
- if rolesNoChar:
- rolesNoChar = filter(None, [x.strip() for x in rolesNoChar.split('/')])
- if not role:
- role = []
- elif not isinstance(role, list):
- role = [role]
- role += rolesNoChar
- notes = notes.strip()
- if additionalNotes:
- additionalNotes = re_spaces.sub(' ', additionalNotes).strip()
- if notes:
- notes += u' '
- notes += additionalNotes
- m = Movie(title=title, movieID=movieID, notes=notes, currentRole=role,
- roleID=roleID, roleIsPerson=_parsingCharacter,
- modFunct=modFunct, accessSystem=accessSystem)
- if roleNotes and len(roleNotes) == len(roleID):
- for idx, role in enumerate(m.currentRole):
- try:
- if roleNotes[idx]:
- role.notes = roleNotes[idx]
- except IndexError:
- break
- # Status can't be checked here, and must be detected by the parser.
- if status:
- m['status'] = status
- return m
-
-
-class DOMParserBase(object):
- """Base parser to handle HTML data from the IMDb's web server."""
- _defGetRefs = False
- _containsObjects = False
-
- preprocessors = []
- extractors = []
- usingModule = None
-
- _logger = logging.getLogger('imdbpy.parser.http.domparser')
-
- def __init__(self, useModule=None):
- """Initialize the parser. useModule can be used to force it
- to use 'BeautifulSoup' or 'lxml'; by default, it's auto-detected,
- using 'lxml' if available and falling back to 'BeautifulSoup'
- otherwise."""
- # Module to use.
- if useModule is None:
- useModule = ('lxml', 'BeautifulSoup')
- if not isinstance(useModule, (tuple, list)):
- useModule = [useModule]
- self._useModule = useModule
- nrMods = len(useModule)
- _gotError = False
- for idx, mod in enumerate(useModule):
- mod = mod.strip().lower()
- try:
- if mod == 'lxml':
- from lxml.html import fromstring
- from lxml.etree import tostring
- self._is_xml_unicode = False
- self.usingModule = 'lxml'
- elif mod == 'beautifulsoup':
- from bsouplxml.html import fromstring
- from bsouplxml.etree import tostring
- self._is_xml_unicode = True
- self.usingModule = 'beautifulsoup'
- else:
- self._logger.warn('unknown module "%s"' % mod)
- continue
- self.fromstring = fromstring
- self._tostring = tostring
- if _gotError:
- self._logger.warn('falling back to "%s"' % mod)
- break
- except ImportError, e:
- if idx+1 >= nrMods:
- # Raise the exception, if we don't have any more
- # options to try.
- raise IMDbError, 'unable to use any parser in %s: %s' % \
- (str(useModule), str(e))
- else:
- self._logger.warn('unable to use "%s": %s' % (mod, str(e)))
- _gotError = True
- continue
- else:
- raise IMDbError, 'unable to use parsers in %s' % str(useModule)
- # Fall-back defaults.
- self._modFunct = None
- self._as = 'http'
- self._cname = self.__class__.__name__
- self._init()
- self.reset()
-
- def reset(self):
- """Reset the parser."""
- # Names and titles references.
- self._namesRefs = {}
- self._titlesRefs = {}
- self._charactersRefs = {}
- self._reset()
-
- def _init(self):
- """Subclasses can override this method, if needed."""
- pass
-
- def _reset(self):
- """Subclasses can override this method, if needed."""
- pass
-
- def parse(self, html_string, getRefs=None, **kwds):
- """Return the dictionary generated from the given html string;
- getRefs can be used to force the gathering of movies/persons/characters
- references."""
- self.reset()
- if getRefs is not None:
- self.getRefs = getRefs
- else:
- self.getRefs = self._defGetRefs
- # Useful only for the testsuite.
- if not isinstance(html_string, unicode):
- html_string = unicode(html_string, 'latin_1', 'replace')
- html_string = subXMLRefs(html_string)
- # Temporary fix: self.parse_dom must work even for empty strings.
- html_string = self.preprocess_string(html_string)
- html_string = html_string.strip()
- # tag attributes like title=""Family Guy"" will be
- # converted to title=""Family Guy"" and this confuses BeautifulSoup.
- if self.usingModule == 'beautifulsoup':
- html_string = html_string.replace('""', '"')
- #print html_string.encode('utf8')
- if html_string:
- dom = self.get_dom(html_string)
- #print self.tostring(dom).encode('utf8')
- try:
- dom = self.preprocess_dom(dom)
- except Exception, e:
- self._logger.error('%s: caught exception preprocessing DOM',
- self._cname, exc_info=True)
- if self.getRefs:
- try:
- self.gather_refs(dom)
- except Exception, e:
- self._logger.warn('%s: unable to gather refs: %s',
- self._cname, exc_info=True)
- data = self.parse_dom(dom)
- else:
- data = {}
- try:
- data = self.postprocess_data(data)
- except Exception, e:
- self._logger.error('%s: caught exception postprocessing data',
- self._cname, exc_info=True)
- if self._containsObjects:
- self.set_objects_params(data)
- data = self.add_refs(data)
- return data
-
- def _build_empty_dom(self):
- from bsouplxml import _bsoup
- return _bsoup.BeautifulSoup('')
-
- def get_dom(self, html_string):
- """Return a dom object, from the given string."""
- try:
- dom = self.fromstring(html_string)
- if dom is None:
- dom = self._build_empty_dom()
- self._logger.error('%s: using a fake empty DOM', self._cname)
- return dom
- except Exception, e:
- self._logger.error('%s: caught exception parsing DOM',
- self._cname, exc_info=True)
- return self._build_empty_dom()
-
- def xpath(self, element, path):
- """Return elements matching the given XPath."""
- try:
- xpath_result = element.xpath(path)
- if self._is_xml_unicode:
- return xpath_result
- result = []
- for item in xpath_result:
- if isinstance(item, str):
- item = unicode(item)
- result.append(item)
- return result
- except Exception, e:
- self._logger.error('%s: caught exception extracting XPath "%s"',
- self._cname, path, exc_info=True)
- return []
-
- def tostring(self, element):
- """Convert the element to a string."""
- if isinstance(element, (unicode, str)):
- return unicode(element)
- else:
- try:
- return self._tostring(element, encoding=unicode)
- except Exception, e:
- self._logger.error('%s: unable to convert to string',
- self._cname, exc_info=True)
- return u''
-
- def clone(self, element):
- """Clone an element."""
- return self.fromstring(self.tostring(element))
-
- def preprocess_string(self, html_string):
- """Here we can modify the text, before it's parsed."""
- if not html_string:
- return html_string
- # Remove silly » chars.
- html_string = html_string.replace(u' \xbb', u'')
- try:
- preprocessors = self.preprocessors
- except AttributeError:
- return html_string
- for src, sub in preprocessors:
- # re._pattern_type is present only since Python 2.5.
- if callable(getattr(src, 'sub', None)):
- html_string = src.sub(sub, html_string)
- elif isinstance(src, str):
- html_string = html_string.replace(src, sub)
- elif callable(src):
- try:
- html_string = src(html_string)
- except Exception, e:
- _msg = '%s: caught exception preprocessing html'
- self._logger.error(_msg, self._cname, exc_info=True)
- continue
- ##print html_string.encode('utf8')
- return html_string
-
- def gather_refs(self, dom):
- """Collect references."""
- grParser = GatherRefs(useModule=self._useModule)
- grParser._as = self._as
- grParser._modFunct = self._modFunct
- refs = grParser.parse_dom(dom)
- refs = grParser.postprocess_data(refs)
- self._namesRefs = refs['names refs']
- self._titlesRefs = refs['titles refs']
- self._charactersRefs = refs['characters refs']
-
- def preprocess_dom(self, dom):
- """Last chance to modify the dom, before the rules in self.extractors
- are applied by the parse_dom method."""
- return dom
-
- def parse_dom(self, dom):
- """Parse the given dom according to the rules specified
- in self.extractors."""
- result = {}
- for extractor in self.extractors:
- ##print extractor.label
- if extractor.group is None:
- elements = [(extractor.label, element)
- for element in self.xpath(dom, extractor.path)]
- else:
- groups = self.xpath(dom, extractor.group)
- elements = []
- for group in groups:
- group_key = self.xpath(group, extractor.group_key)
- if not group_key: continue
- group_key = group_key[0]
- # XXX: always tries the conversion to unicode:
- # BeautifulSoup.NavigableString is a subclass
- # of unicode, and so it's never converted.
- group_key = self.tostring(group_key)
- normalizer = extractor.group_key_normalize
- if normalizer is not None:
- if callable(normalizer):
- try:
- group_key = normalizer(group_key)
- except Exception, e:
- _m = '%s: unable to apply group_key normalizer'
- self._logger.error(_m, self._cname,
- exc_info=True)
- group_elements = self.xpath(group, extractor.path)
- elements.extend([(group_key, element)
- for element in group_elements])
- for group_key, element in elements:
- for attr in extractor.attrs:
- if isinstance(attr.path, dict):
- data = {}
- for field in attr.path.keys():
- path = attr.path[field]
- value = self.xpath(element, path)
- if not value:
- data[field] = None
- else:
- # XXX: use u'' , to join?
- data[field] = ''.join(value)
- else:
- data = self.xpath(element, attr.path)
- if not data:
- data = None
- else:
- data = attr.joiner.join(data)
- if not data:
- continue
- attr_postprocess = attr.postprocess
- if callable(attr_postprocess):
- try:
- data = attr_postprocess(data)
- except Exception, e:
- _m = '%s: unable to apply attr postprocess'
- self._logger.error(_m, self._cname, exc_info=True)
- key = attr.key
- if key is None:
- key = group_key
- elif key.startswith('.'):
- # assuming this is an xpath
- try:
- key = self.xpath(element, key)[0]
- except IndexError:
- self._logger.error('%s: XPath returned no items',
- self._cname, exc_info=True)
- elif key.startswith('self.'):
- key = getattr(self, key[5:])
- if attr.multi:
- if key not in result:
- result[key] = []
- result[key].append(data)
- else:
- if isinstance(data, dict):
- result.update(data)
- else:
- result[key] = data
- return result
-
- def postprocess_data(self, data):
- """Here we can modify the data."""
- return data
-
- def set_objects_params(self, data):
- """Set parameters of Movie/Person/... instances, since they are
- not always set in the parser's code."""
- for obj in flatten(data, yieldDictKeys=True, scalar=_Container):
- obj.accessSystem = self._as
- obj.modFunct = self._modFunct
-
- def add_refs(self, data):
- """Modify data according to the expected output."""
- if self.getRefs:
- titl_re = ur'(%s)' % '|'.join([re.escape(x) for x
- in self._titlesRefs.keys()])
- if titl_re != ur'()': re_titles = re.compile(titl_re, re.U)
- else: re_titles = None
- nam_re = ur'(%s)' % '|'.join([re.escape(x) for x
- in self._namesRefs.keys()])
- if nam_re != ur'()': re_names = re.compile(nam_re, re.U)
- else: re_names = None
- chr_re = ur'(%s)' % '|'.join([re.escape(x) for x
- in self._charactersRefs.keys()])
- if chr_re != ur'()': re_characters = re.compile(chr_re, re.U)
- else: re_characters = None
- _putRefs(data, re_titles, re_names, re_characters)
- return {'data': data, 'titlesRefs': self._titlesRefs,
- 'namesRefs': self._namesRefs,
- 'charactersRefs': self._charactersRefs}
-
-
-class Extractor(object):
- """Instruct the DOM parser about how to parse a document."""
- def __init__(self, label, path, attrs, group=None, group_key=None,
- group_key_normalize=None):
- """Initialize an Extractor object, used to instruct the DOM parser
- about how to parse a document."""
- # rarely (never?) used, mostly for debugging purposes.
- self.label = label
- self.group = group
- if group_key is None:
- self.group_key = ".//text()"
- else:
- self.group_key = group_key
- self.group_key_normalize = group_key_normalize
- self.path = path
- # A list of attributes to fetch.
- if isinstance(attrs, Attribute):
- attrs = [attrs]
- self.attrs = attrs
-
- def __repr__(self):
- """String representation of an Extractor object."""
- r = '' % (id(self),
- self.label, self.path, repr(self.attrs), self.group,
- self.group_key, self.group_key_normalize)
- return r
-
-
-class Attribute(object):
- """The attribute to consider, for a given node."""
- def __init__(self, key, multi=False, path=None, joiner=None,
- postprocess=None):
- """Initialize an Attribute object, used to specify the
- attribute to consider, for a given node."""
- # The key under which information will be saved; can be a string or an
- # XPath. If None, the label of the containing extractor will be used.
- self.key = key
- self.multi = multi
- self.path = path
- if joiner is None:
- joiner = ''
- self.joiner = joiner
- # Post-process this set of information.
- self.postprocess = postprocess
-
- def __repr__(self):
- """String representation of an Attribute object."""
- r = '' % (id(self), self.key,
- self.multi, repr(self.path),
- self.joiner, repr(self.postprocess))
- return r
-
-
-def _parse_ref(text, link, info):
- """Manage links to references."""
- if link.find('/title/tt') != -1:
- yearK = re_yearKind_index.match(info)
- if yearK and yearK.start() == 0:
- text += ' %s' % info[:yearK.end()]
- return (text.replace('\n', ' '), link)
-
-
-class GatherRefs(DOMParserBase):
- """Parser used to gather references to movies, persons and characters."""
- _attrs = [Attribute(key=None, multi=True,
- path={
- 'text': './text()',
- 'link': './@href',
- 'info': './following::text()[1]'
- },
- postprocess=lambda x: _parse_ref(x.get('text'), x.get('link'),
- (x.get('info') or u'').strip()))]
- extractors = [
- Extractor(label='names refs',
- path="//a[starts-with(@href, '/name/nm')][string-length(@href)=16]",
- attrs=_attrs),
-
- Extractor(label='titles refs',
- path="//a[starts-with(@href, '/title/tt')]" \
- "[string-length(@href)=17]",
- attrs=_attrs),
-
- Extractor(label='characters refs',
- path="//a[starts-with(@href, '/character/ch')]" \
- "[string-length(@href)=21]",
- attrs=_attrs),
- ]
-
- def postprocess_data(self, data):
- result = {}
- for item in ('names refs', 'titles refs', 'characters refs'):
- result[item] = {}
- for k, v in data.get(item, []):
- if not v.endswith('/'): continue
- imdbID = analyze_imdbid(v)
- if item == 'names refs':
- obj = Person(personID=imdbID, name=k,
- accessSystem=self._as, modFunct=self._modFunct)
- elif item == 'titles refs':
- obj = Movie(movieID=imdbID, title=k,
- accessSystem=self._as, modFunct=self._modFunct)
- else:
- obj = Character(characterID=imdbID, name=k,
- accessSystem=self._as, modFunct=self._modFunct)
- # XXX: companies aren't handled: are they ever found in text,
- # as links to their page?
- result[item][k] = obj
- return result
-
- def add_refs(self, data):
- return data
-
-
diff --git a/libs/imdb/parser/mobile/__init__.py b/libs/imdb/parser/mobile/__init__.py
deleted file mode 100644
index ce623ec8..00000000
--- a/libs/imdb/parser/mobile/__init__.py
+++ /dev/null
@@ -1,833 +0,0 @@
-"""
-parser.mobile package (imdb package).
-
-This package provides the IMDbMobileAccessSystem class used to access
-IMDb's data for mobile systems.
-the imdb.IMDb function will return an instance of this class when
-called with the 'accessSystem' argument set to "mobile".
-
-Copyright 2005-2010 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-import re
-import logging
-from urllib import unquote
-
-from imdb import imdbURL_movie_main, imdbURL_person_main, imdbURL_character_main
-from imdb.Movie import Movie
-from imdb.utils import analyze_title, analyze_name, canonicalName, \
- date_and_notes
-from imdb._exceptions import IMDbDataAccessError
-from imdb.parser.http import IMDbHTTPAccessSystem
-from imdb.parser.http.utils import subXMLRefs, subSGMLRefs, build_person, \
- build_movie, re_spaces
-
-# XXX NOTE: the first version of this module was heavily based on
-# regular expressions. This new version replace regexps with
-# find() strings' method calls; despite being less flexible, it
-# seems to be at least as fast and, hopefully, much more
-# lightweight. Yes: the regexp-based version was too heavyweight
-# for systems with very limited CPU power and memory footprint.
-re_spacessub = re_spaces.sub
-# Strip html.
-re_unhtml = re.compile(r'<.+?>')
-re_unhtmlsub = re_unhtml.sub
-# imdb person or movie ids.
-re_imdbID = re.compile(r'(?<=nm|tt|ch)([0-9]{7})\b')
-
-# movie AKAs.
-re_makas = re.compile('(.*? )')
-
-# Remove episode numbers.
-re_filmo_episodes = re.compile('.*? ',
- re.M | re.I)
-
-
-def _unHtml(s):
- """Return a string without tags and no multiple spaces."""
- return subSGMLRefs(re_spacessub(' ', re_unhtmlsub('', s)).strip())
-
-
-_inttype = type(0)
-
-def _getTagsWith(s, cont, toClosure=False, maxRes=None):
- """Return the html tags in the 's' string containing the 'cont'
- string; if toClosure is True, everything between the opening
- tag and the closing tag is returned."""
- lres = []
- bi = s.find(cont)
- if bi != -1:
- btag = s[:bi].rfind('<')
- if btag != -1:
- if not toClosure:
- etag = s[bi+1:].find('>')
- if etag != -1:
- endidx = bi+2+etag
- lres.append(s[btag:endidx])
- if maxRes is not None and len(lres) >= maxRes: return lres
- lres += _getTagsWith(s[endidx:], cont,
- toClosure=toClosure)
- else:
- spaceidx = s[btag:].find(' ')
- if spaceidx != -1:
- ctag = '%s>' % s[btag+1:btag+spaceidx]
- closeidx = s[bi:].find(ctag)
- if closeidx != -1:
- endidx = bi+closeidx+len(ctag)
- lres.append(s[btag:endidx])
- if maxRes is not None and len(lres) >= maxRes:
- return lres
- lres += _getTagsWith(s[endidx:], cont,
- toClosure=toClosure)
- return lres
-
-
-def _findBetween(s, begins, ends, beginindx=0, maxRes=None, lres=None):
- """Return the list of strings from the 's' string which are included
- between the 'begins' and 'ends' strings."""
- if lres is None:
- lres = []
- bi = s.find(begins, beginindx)
- if bi != -1:
- lbegins = len(begins)
- if isinstance(ends, (list, tuple)):
- eset = [s.find(end, bi+lbegins) for end in ends]
- eset[:] = [x for x in eset if x != -1]
- if not eset: ei = -1
- else: ei = min(eset)
- else:
- ei = s.find(ends, bi+lbegins)
- if ei != -1:
- match = s[bi+lbegins:ei]
- lres.append(match)
- if maxRes is not None and len(lres) >= maxRes: return lres
- _findBetween(s, begins, ends, beginindx=ei, maxRes=maxRes,
- lres=lres)
- return lres
-
-
-class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
- """The class used to access IMDb's data through the web for
- mobile terminals."""
-
- accessSystem = 'mobile'
- _mobile_logger = logging.getLogger('imdbpy.parser.mobile')
-
- def __init__(self, isThin=1, *arguments, **keywords):
- self.accessSystem = 'mobile'
- IMDbHTTPAccessSystem.__init__(self, isThin, *arguments, **keywords)
-
- def _clean_html(self, html):
- """Normalize the retrieve html."""
- html = re_spaces.sub(' ', html)
- # Remove silly » chars.
- html = html.replace(' »', '')
- return subXMLRefs(html)
-
- def _mretrieve(self, url, size=-1):
- """Retrieve an html page and normalize it."""
- cont = self._retrieve(url, size=size)
- return self._clean_html(cont)
-
- def _getPersons(self, s, sep=' '):
- """Return a list of Person objects, from the string s; items
- are assumed to be separated by the sep string."""
- names = s.split(sep)
- pl = []
- plappend = pl.append
- counter = 1
- for name in names:
- pid = re_imdbID.findall(name)
- if not pid: continue
- characters = _getTagsWith(name, 'class="char"',
- toClosure=True, maxRes=1)
- chpids = []
- if characters:
- for ch in characters[0].split(' / '):
- chid = re_imdbID.findall(ch)
- if not chid:
- chpids.append(None)
- else:
- chpids.append(chid[-1])
- if not chpids:
- chpids = None
- elif len(chpids) == 1:
- chpids = chpids[0]
- name = _unHtml(name)
- # Catch unclosed tags.
- gt_indx = name.find('>')
- if gt_indx != -1:
- name = name[gt_indx+1:].lstrip()
- if not name: continue
- if name.endswith('...'):
- name = name[:-3]
- p = build_person(name, personID=str(pid[0]), billingPos=counter,
- modFunct=self._defModFunct, roleID=chpids,
- accessSystem=self.accessSystem)
- plappend(p)
- counter += 1
- return pl
-
- def _search_movie(self, title, results):
- ##params = urllib.urlencode({'tt': 'on','mx': str(results),'q': title})
- ##params = 'q=%s&tt=on&mx=%s' % (urllib.quote_plus(title), str(results))
- ##cont = self._mretrieve(imdbURL_search % params)
- cont = subXMLRefs(self._get_search_content('tt', title, results))
- title = _findBetween(cont, '', '', maxRes=1)
- res = []
- if not title:
- self._mobile_logger.error('no title tag searching for movie %s',
- title)
- return res
- tl = title[0].lower()
- if not tl.startswith('imdb title'):
- # a direct hit!
- title = _unHtml(title[0])
- mid = None
- midtag = _getTagsWith(cont, 'rel="canonical"', maxRes=1)
- if midtag:
- mid = _findBetween(midtag[0], '/title/tt', '/', maxRes=1)
- if not (mid and title):
- self._mobile_logger.error('no direct hit title/movieID for' \
- ' title %s', title)
- return res
- if cont.find('') != -1:
- title += ' (mini)'
- res[:] = [(str(mid[0]), analyze_title(title))]
- else:
- # XXX: this results*3 prevents some recursion errors, but...
- # it's not exactly understandable (i.e.: why 'results' is
- # not enough to get all the results?)
- lis = _findBetween(cont, 'td valign="top">', '',
- maxRes=results*3)
- for li in lis:
- akas = re_makas.findall(li)
- for idx, aka in enumerate(akas):
- aka = aka.replace('" - ', '::', 1)
- aka = _unHtml(aka)
- if aka.startswith('aka "'):
- aka = aka[5:].strip()
- if aka[-1] == '"':
- aka = aka[:-1]
- akas[idx] = aka
- imdbid = re_imdbID.findall(li)
- li = re_makas.sub('', li)
- mtitle = _unHtml(li)
- if not (imdbid and mtitle):
- self._mobile_logger.debug('no title/movieID parsing' \
- ' %s searching for title %s', li,
- title)
- continue
- mtitle = mtitle.replace('(TV mini-series)', '(mini)')
- resd = analyze_title(mtitle)
- if akas:
- resd['akas'] = akas
- res.append((str(imdbid[0]), resd))
- return res
-
- def get_movie_main(self, movieID):
- cont = self._mretrieve(imdbURL_movie_main % movieID + 'maindetails')
- title = _findBetween(cont, '', '', maxRes=1)
- if not title:
- raise IMDbDataAccessError, 'unable to get movieID "%s"' % movieID
- title = _unHtml(title[0])
- if cont.find('') != -1:
- title += ' (mini)'
- d = analyze_title(title)
- kind = d.get('kind')
- tv_series = _findBetween(cont, 'TV Series:', '', maxRes=1)
- if tv_series: mid = re_imdbID.findall(tv_series[0])
- else: mid = None
- if tv_series and mid:
- s_title = _unHtml(tv_series[0])
- s_data = analyze_title(s_title)
- m = Movie(movieID=str(mid[0]), data=s_data,
- accessSystem=self.accessSystem,
- modFunct=self._defModFunct)
- d['kind'] = kind = u'episode'
- d['episode of'] = m
- if kind in ('tv series', 'tv mini series'):
- years = _findBetween(cont, '', '', maxRes=1)
- if years:
- years[:] = _findBetween(years[0], 'TV series', '',
- maxRes=1)
- if years:
- d['series years'] = years[0].strip()
- air_date = _findBetween(cont, 'Original Air Date:', '',
- maxRes=1)
- if air_date:
- air_date = air_date[0]
- vi = air_date.find('(')
- if vi != -1:
- date = _unHtml(air_date[:vi]).strip()
- if date != '????':
- d['original air date'] = date
- air_date = air_date[vi:]
- season = _findBetween(air_date, 'Season', ',', maxRes=1)
- if season:
- season = season[0].strip()
- try: season = int(season)
- except: pass
- if season or type(season) is _inttype:
- d['season'] = season
- episode = _findBetween(air_date, 'Episode', ')', maxRes=1)
- if episode:
- episode = episode[0].strip()
- try: episode = int(episode)
- except: pass
- if episode or type(season) is _inttype:
- d['episode'] = episode
- direct = _findBetween(cont, 'Director', ('', ' '),
- maxRes=1)
- if direct:
- direct = direct[0]
- h5idx = direct.find('/h5>')
- if h5idx != -1:
- direct = direct[h5idx+4:]
- direct = self._getPersons(direct)
- if direct: d['director'] = direct
- if kind in ('tv series', 'tv mini series', 'episode'):
- if kind != 'episode':
- seasons = _findBetween(cont, 'Seasons:', '',
- maxRes=1)
- if seasons:
- d['number of seasons'] = seasons[0].count('|') + 1
- creator = _findBetween(cont, 'Created by', ('class="tn15more"',
- '',
- ' '),
- maxRes=1)
- if not creator:
- # They change 'Created by' to 'Creator' and viceversa
- # from time to time...
- # XXX: is 'Creators' also used?
- creator = _findBetween(cont, 'Creator:',
- ('class="tn15more"', '',
- ' '), maxRes=1)
- if creator:
- creator = creator[0]
- if creator.find('tn15more'): creator = '%s>' % creator
- creator = self._getPersons(creator)
- if creator: d['creator'] = creator
- writers = _findBetween(cont, 'Writer', ('', ' '),
- maxRes=1)
- if writers:
- writers = writers[0]
- h5idx = writers.find('/h5>')
- if h5idx != -1:
- writers = writers[h5idx+4:]
- writers = self._getPersons(writers)
- if writers: d['writer'] = writers
- cvurl = _getTagsWith(cont, 'name="poster"', toClosure=True, maxRes=1)
- if cvurl:
- cvurl = _findBetween(cvurl[0], 'src="', '"', maxRes=1)
- if cvurl: d['cover url'] = cvurl[0]
- genres = _findBetween(cont, 'href="/Sections/Genres/', '/')
- if genres:
- d['genres'] = list(set(genres))
- ur = _findBetween(cont, '', ' ',
- maxRes=1)
- if ur:
- rat = _findBetween(ur[0], '', '', maxRes=1)
- if rat:
- teni = rat[0].find('/10')
- if teni != -1:
- rat = rat[0][:teni]
- try:
- rat = float(rat.strip())
- d['rating'] = rat
- except ValueError:
- self._mobile_logger.warn('wrong rating: %s', rat)
- vi = ur[0].rfind('tn15more">')
- if vi != -1 and ur[0][vi+10:].find('await') == -1:
- try:
- votes = _unHtml(ur[0][vi+10:]).replace('votes', '').strip()
- votes = int(votes.replace(',', ''))
- d['votes'] = votes
- except ValueError:
- self._mobile_logger.warn('wrong votes: %s', ur)
- top250 = _findBetween(cont, 'href="/chart/top?', '', maxRes=1)
- if top250:
- fn = top250[0].rfind('#')
- if fn != -1:
- try:
- td = int(top250[0][fn+1:])
- d['top 250 rank'] = td
- except ValueError:
- self._mobile_logger.warn('wrong top250: %s', top250)
- castdata = _findBetween(cont, 'Cast overview', ' ', maxRes=1)
- if not castdata:
- castdata = _findBetween(cont, 'Credited cast', ' ', maxRes=1)
- if not castdata:
- castdata = _findBetween(cont, 'Complete credited cast', ' ',
- maxRes=1)
- if not castdata:
- castdata = _findBetween(cont, 'Series Cast Summary', ' |
',
- maxRes=1)
- if not castdata:
- castdata = _findBetween(cont, 'Episode Credited cast', '
',
- maxRes=1)
- if castdata:
- castdata = castdata[0]
- # Reintegrate the fist tag.
- fl = castdata.find('href=')
- if fl != -1: castdata = '')
- if smib != -1:
- smie = castdata.rfind(' | ')
- if smie != -1:
- castdata = castdata[:smib].strip() + \
- castdata[smie+18:].strip()
- castdata = castdata.replace('/tr> |
', '', maxRes=1)
- if akas:
- # For some reason, here
is still used in place of
.
- akas[:] = [x for x in akas[0].split('
') if x.strip()]
- akas = [_unHtml(x).replace('" - ','::', 1).lstrip('"').strip()
- for x in akas]
- if 'See more' in akas: akas.remove('See more')
- akas[:] = [x for x in akas if x]
- if akas:
- d['akas'] = akas
- mpaa = _findBetween(cont, 'MPAA:', '', maxRes=1)
- if mpaa: d['mpaa'] = _unHtml(mpaa[0])
- runtimes = _findBetween(cont, 'Runtime:', '', maxRes=1)
- if runtimes:
- runtimes = runtimes[0]
- runtimes = [x.strip().replace(' min', '').replace(' (', '::(', 1)
- for x in runtimes.split('|')]
- d['runtimes'] = [_unHtml(x).strip() for x in runtimes]
- if kind == 'episode':
- # number of episodes.
- epsn = _findBetween(cont, 'title="Full Episode List">', '',
- maxRes=1)
- if epsn:
- epsn = epsn[0].replace(' Episodes', '').strip()
- if epsn:
- try:
- epsn = int(epsn)
- except:
- self._mobile_logger.warn('wrong episodes #: %s', epsn)
- d['number of episodes'] = epsn
- country = _findBetween(cont, 'Country:', '', maxRes=1)
- if country:
- country[:] = country[0].split(' | ')
- country[:] = ['', '::')) for x in country]
- if country: d['countries'] = country
- lang = _findBetween(cont, 'Language:', '', maxRes=1)
- if lang:
- lang[:] = lang[0].split(' | ')
- lang[:] = ['', '::')) for x in lang]
- if lang: d['languages'] = lang
- col = _findBetween(cont, '"/search/title?colors=', '')
- if col:
- col[:] = col[0].split(' | ')
- col[:] = ['', '::')) for x in col]
- if col: d['color info'] = col
- sm = _findBetween(cont, '/search/title?sound_mixes=', '',
- maxRes=1)
- if sm:
- sm[:] = sm[0].split(' | ')
- sm[:] = ['', '::')) for x in sm]
- if sm: d['sound mix'] = sm
- cert = _findBetween(cont, 'Certification:', '', maxRes=1)
- if cert:
- cert[:] = cert[0].split(' | ')
- cert[:] = [_unHtml(x.replace(' ', '::')) for x in cert]
- if cert: d['certificates'] = cert
- plotoutline = _findBetween(cont, 'Plot:', [''],
- maxRes=1)
- if plotoutline:
- plotoutline = plotoutline[0].strip()
- plotoutline = plotoutline.rstrip('|').rstrip()
- if plotoutline: d['plot outline'] = _unHtml(plotoutline)
- aratio = _findBetween(cont, 'Aspect Ratio:', [''],
- maxRes=1)
- if aratio:
- aratio = aratio[0].strip().replace(' (', '::(', 1)
- if aratio:
- d['aspect ratio'] = _unHtml(aratio)
- return {'data': d}
-
- def get_movie_plot(self, movieID):
- cont = self._mretrieve(imdbURL_movie_main % movieID + 'plotsummary')
- plot = _findBetween(cont, '', '
')
- plot[:] = [_unHtml(x) for x in plot]
- for i in xrange(len(plot)):
- p = plot[i]
- wbyidx = p.rfind(' Written by ')
- if wbyidx != -1:
- plot[i] = '%s::%s' % \
- (p[:wbyidx].rstrip(),
- p[wbyidx+12:].rstrip().replace('{','<').replace('}','>'))
- if plot: return {'data': {'plot': plot}}
- return {'data': {}}
-
- def _search_person(self, name, results):
- ##params = urllib.urlencode({'nm': 'on', 'mx': str(results), 'q': name})
- ##params = 'q=%s&nm=on&mx=%s' % (urllib.quote_plus(name), str(results))
- ##cont = self._mretrieve(imdbURL_search % params)
- cont = subXMLRefs(self._get_search_content('nm', name, results))
- name = _findBetween(cont, '', '', maxRes=1)
- res = []
- if not name:
- self._mobile_logger.warn('no title tag searching for name %s', name)
- return res
- nl = name[0].lower()
- if not nl.startswith('imdb name'):
- # a direct hit!
- name = _unHtml(name[0])
- name = name.replace('- Filmography by type' , '').strip()
- pid = None
- pidtag = _getTagsWith(cont, 'rel="canonical"', maxRes=1)
- if pidtag:
- pid = _findBetween(pidtag[0], '/name/nm', '/', maxRes=1)
- if not (pid and name):
- self._mobile_logger.error('no direct hit name/personID for' \
- ' name %s', name)
- return res
- res[:] = [(str(pid[0]), analyze_name(name, canonical=1))]
- else:
- lis = _findBetween(cont, 'td valign="top">', '',
- maxRes=results*3)
- for li in lis:
- akas = _findBetween(li, '"', '"')
- for sep in [' aka', '
birth name']:
- sepIdx = li.find(sep)
- if sepIdx != -1:
- li = li[:sepIdx]
- pid = re_imdbID.findall(li)
- pname = _unHtml(li)
- if not (pid and pname):
- self._mobile_logger.debug('no name/personID parsing' \
- ' %s searching for name %s', li,
- name)
- continue
- resd = analyze_name(pname, canonical=1)
- if akas:
- resd['akas'] = akas
- res.append((str(pid[0]), resd))
- return res
-
- def get_person_main(self, personID, _parseChr=False):
- if not _parseChr:
- url = imdbURL_person_main % personID + 'maindetails'
- else:
- url = imdbURL_character_main % personID
- s = self._mretrieve(url)
- r = {}
- name = _findBetween(s, '', '', maxRes=1)
- if not name:
- if _parseChr: w = 'characterID'
- else: w = 'personID'
- raise IMDbDataAccessError, 'unable to get %s "%s"' % (w, personID)
- name = _unHtml(name[0].replace(' - IMDb', ''))
- if _parseChr:
- name = name.replace('(Character)', '').strip()
- name = name.replace('- Filmography by type', '').strip()
- else:
- name = name.replace('- Filmography by', '').strip()
- r = analyze_name(name, canonical=not _parseChr)
- for dKind in ('Born', 'Died'):
- date = _findBetween(s, '%s:' % dKind.capitalize(),
- ('', '
'), maxRes=1)
- if date:
- date = _unHtml(date[0])
- if date:
- #date, notes = date_and_notes(date)
- # TODO: fix to handle real names.
- date_notes = date.split(' in ', 1)
- notes = u''
- date = date_notes[0]
- if len(date_notes) == 2:
- notes = date_notes[1]
- dtitle = 'birth'
- if dKind == 'Died':
- dtitle = 'death'
- if date:
- r['%s date' % dtitle] = date
- if notes:
- r['%s notes' % dtitle] = notes
- akas = _findBetween(s, 'Alternate Names:', ('
',
- '
'), maxRes=1)
- if akas:
- akas = akas[0]
- if akas.find(' | ') != -1:
- akas = _unHtml(akas).split(' | ')
- else:
- akas = _unHtml(akas).split(' / ')
- if akas: r['akas'] = akas
- hs = _findBetween(s, 'name="headshot"', '', maxRes=1)
- if hs:
- hs[:] = _findBetween(hs[0], 'src="', '"', maxRes=1)
- if hs: r['headshot'] = hs[0]
- # Build a list of tuples such [('hrefLink', 'section name')]
- workkind = _findBetween(s, 'id="jumpto_', '')
- ws = []
- for work in workkind:
- sep = '" >'
- if '">' in work:
- sep = '">'
- wsplit = work.split(sep, 1)
- if len(wsplit) == 2:
- sect = wsplit[0]
- if '"' in sect:
- sect = sect[:sect.find('"')]
- ws.append((sect, wsplit[1].lower()))
- # XXX: I think "guest appearances" are gone.
- if s.find(' tag.
- if _parseChr and sect == 'filmography':
- inisect = s.find('')
- else:
- inisect = s.find('
',))
- for m in mlist:
- fCB = m.find('>')
- if fCB != -1:
- m = m[fCB+1:].lstrip()
- m = re_filmo_episodes.sub('', m)
- # For every movie in the current section.
- movieID = re_imdbID.findall(m)
- if not movieID:
- self._mobile_logger.debug('no movieID in %s', m)
- continue
- m = m.replace('
', ' .... ', 1)
- if not _parseChr:
- chrIndx = m.find(' .... ')
- else:
- chrIndx = m.find(' Played by ')
- chids = []
- if chrIndx != -1:
- chrtxt = m[chrIndx+6:]
- if _parseChr:
- chrtxt = chrtxt[5:]
- for ch in chrtxt.split(' / '):
- chid = re_imdbID.findall(ch)
- if not chid:
- chids.append(None)
- else:
- chids.append(chid[-1])
- if not chids:
- chids = None
- elif len(chids) == 1:
- chids = chids[0]
- movieID = str(movieID[0])
- # Search the status.
- stidx = m.find('
')
- status = u''
- if stidx != -1:
- stendidx = m.rfind('')
- if stendidx != -1:
- status = _unHtml(m[stidx+3:stendidx])
- m = m.replace(m[stidx+3:stendidx], '')
- year = _findBetween(m, 'year_column">', '', maxRes=1)
- if year:
- year = year[0]
- m = m.replace('
%s' % year,
- '')
- else:
- year = None
- m = _unHtml(m)
- if not m:
- self._mobile_logger.warn('no title for movieID %s', movieID)
- continue
- movie = build_movie(m, movieID=movieID, status=status,
- roleID=chids, modFunct=self._defModFunct,
- accessSystem=self.accessSystem,
- _parsingCharacter=_parseChr, year=year)
- sectName = sectName.split(':')[0]
- r.setdefault(sectName, []).append(movie)
- # If available, take the always correct name from a form.
- itag = _getTagsWith(s, 'NAME="primary"', maxRes=1)
- if not itag:
- itag = _getTagsWith(s, 'name="primary"', maxRes=1)
- if itag:
- vtag = _findBetween(itag[0], 'VALUE="', ('"', '>'), maxRes=1)
- if not vtag:
- vtag = _findBetween(itag[0], 'value="', ('"', '>'), maxRes=1)
- if vtag:
- try:
- vtag = unquote(str(vtag[0]))
- vtag = unicode(vtag, 'latin_1')
- r.update(analyze_name(vtag))
- except UnicodeEncodeError:
- pass
- return {'data': r, 'info sets': ('main', 'filmography')}
-
- def get_person_biography(self, personID):
- cont = self._mretrieve(imdbURL_person_main % personID + 'bio')
- d = {}
- spouses = _findBetween(cont, 'Spouse', ('
', ''),
- maxRes=1)
- if spouses:
- sl = []
- for spouse in spouses[0].split(''):
- if spouse.count('') > 1:
- spouse = spouse.replace('', '::', 1)
- spouse = _unHtml(spouse)
- spouse = spouse.replace(':: ', '::').strip()
- if spouse: sl.append(spouse)
- if sl: d['spouse'] = sl
- nnames = _findBetween(cont, 'Nickname
', ('
',''),
- maxRes=1)
- if nnames:
- nnames = nnames[0]
- if nnames:
- nnames = [x.strip().replace(' (', '::(', 1)
- for x in nnames.split('
')]
- if nnames:
- d['nick names'] = nnames
- misc_sects = _findBetween(cont, '', '
')
- misc_sects[:] = [x.split('
') for x in misc_sects]
- misc_sects[:] = [x for x in misc_sects if len(x) == 2]
- for sect, data in misc_sects:
- sect = sect.lower().replace(':', '').strip()
- if d.has_key(sect) and sect != 'mini biography': continue
- elif sect in ('spouse', 'nickname'): continue
- if sect == 'salary': sect = 'salary history'
- elif sect == 'where are they now': sect = 'where now'
- elif sect == 'personal quotes': sect = 'quotes'
- data = data.replace('
', '::')
- data = data.replace('
', ' ') # for multi-paragraphs 'bio'
- data = data.replace('
', '@@@@')
- data = data.replace(' | ', '::')
- data = _unHtml(data)
- data = [x.strip() for x in data.split('::')]
- data[:] = [x.replace('@@@@', '::') for x in data if x]
- if sect == 'height' and data: data = data[0]
- elif sect == 'birth name': data = canonicalName(data[0])
- elif sect == 'date of birth':
- date, notes = date_and_notes(data[0])
- if date:
- d['birth date'] = date
- if notes:
- d['birth notes'] = notes
- continue
- elif sect == 'date of death':
- date, notes = date_and_notes(data[0])
- if date:
- d['death date'] = date
- if notes:
- d['death notes'] = notes
- continue
- elif sect == 'mini biography':
- ndata = []
- for bio in data:
- byidx = bio.rfind('IMDb Mini Biography By')
- if byidx != -1:
- bioAuth = bio[:byidx].rstrip()
- else:
- bioAuth = 'Anonymous'
- bio = u'%s::%s' % (bioAuth, bio[byidx+23:].lstrip())
- ndata.append(bio)
- data[:] = ndata
- if 'mini biography' in d:
- d['mini biography'].append(ndata[0])
- continue
- d[sect] = data
- return {'data': d}
-
- def _search_character(self, name, results):
- cont = subXMLRefs(self._get_search_content('char', name, results))
- name = _findBetween(cont, '', '', maxRes=1)
- res = []
- if not name:
- self._mobile_logger.error('no title tag searching character %s',
- name)
- return res
- nl = name[0].lower()
- if not (nl.startswith('imdb search') or nl.startswith('imdb search') \
- or nl.startswith('imdb character')):
- # a direct hit!
- name = _unHtml(name[0]).replace('(Character)', '').strip()
- pid = None
- pidtag = _getTagsWith(cont, 'rel="canonical"', maxRes=1)
- if pidtag:
- pid = _findBetween(pidtag[0], '/character/ch', '/', maxRes=1)
- if not (pid and name):
- self._mobile_logger.error('no direct hit name/characterID for' \
- ' character %s', name)
- return res
- res[:] = [(str(pid[0]), analyze_name(name))]
- else:
- sects = _findBetween(cont, 'Popular Characters', '',
- maxRes=results*3)
- sects += _findBetween(cont, 'Characters', '',
- maxRes=results*3)
- for sect in sects:
- lis = _findBetween(sect, '',
- ('', ''), maxRes=1)
- if intro:
- intro = _unHtml(intro[0]).strip()
- if intro:
- d['introduction'] = intro
- bios = _findBetween(cont, '',
- '
')
- if bios:
- bios = _findBetween(bios[0], '
', ('', '
'))
- if bios:
- for bio in bios:
- bio = bio.replace('', '::')
- bio = bio.replace('\n', ' ')
- bio = bio.replace('
', '\n')
- bio = bio.replace('
', '\n')
- bio = subSGMLRefs(re_unhtmlsub('', bio).strip())
- bio = bio.replace(' ::', '::').replace(':: ', '::')
- bio = bio.replace('::', ': ', 1)
- if bio:
- d.setdefault('biography', []).append(bio)
- return {'data': d}
-
-
diff --git a/libs/imdb/parser/sql/__init__.py b/libs/imdb/parser/sql/__init__.py
deleted file mode 100644
index 22510e6c..00000000
--- a/libs/imdb/parser/sql/__init__.py
+++ /dev/null
@@ -1,1589 +0,0 @@
-"""
-parser.sql package (imdb package).
-
-This package provides the IMDbSqlAccessSystem class used to access
-IMDb's data through a SQL database. Every database supported by
-the SQLObject _AND_ SQLAlchemy Object Relational Managers is available.
-the imdb.IMDb function will return an instance of this class when
-called with the 'accessSystem' argument set to "sql", "database" or "db".
-
-Copyright 2005-2010 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-# FIXME: this whole module was written in a veeery short amount of time.
-# The code should be commented, rewritten and cleaned. :-)
-
-import re
-import logging
-from difflib import SequenceMatcher
-from codecs import lookup
-
-from imdb import IMDbBase
-from imdb.utils import normalizeName, normalizeTitle, build_title, \
- build_name, analyze_name, analyze_title, \
- canonicalTitle, canonicalName, re_titleRef, \
- build_company_name, re_episodes, _unicodeArticles, \
- analyze_company_name, re_year_index, re_nameRef
-from imdb.Person import Person
-from imdb.Movie import Movie
-from imdb.Company import Company
-from imdb._exceptions import IMDbDataAccessError, IMDbError
-
-
-# Logger for miscellaneous functions.
-_aux_logger = logging.getLogger('imdbpy.parser.sql.aux')
-
-# =============================
-# Things that once upon a time were in imdb.parser.common.locsql.
-
-def titleVariations(title, fromPtdf=0):
- """Build title variations useful for searches; if fromPtdf is true,
- the input is assumed to be in the plain text data files format."""
- if fromPtdf: title1 = u''
- else: title1 = title
- title2 = title3 = u''
- if fromPtdf or re_year_index.search(title):
- # If it appears to have a (year[/imdbIndex]) indication,
- # assume that a long imdb canonical name was provided.
- titldict = analyze_title(title, canonical=1)
- # title1: the canonical name.
- title1 = titldict['title']
- if titldict['kind'] != 'episode':
- # title3: the long imdb canonical name.
- if fromPtdf: title3 = title
- else: title3 = build_title(titldict, canonical=1, ptdf=1)
- else:
- title1 = normalizeTitle(title1)
- title3 = build_title(titldict, canonical=1, ptdf=1)
- else:
- # Just a title.
- # title1: the canonical title.
- title1 = canonicalTitle(title)
- title3 = u''
- # title2 is title1 without the article, or title1 unchanged.
- if title1:
- title2 = title1
- t2s = title2.split(u', ')
- if t2s[-1].lower() in _unicodeArticles:
- title2 = u', '.join(t2s[:-1])
- _aux_logger.debug('title variations: 1:[%s] 2:[%s] 3:[%s]',
- title1, title2, title3)
- return title1, title2, title3
-
-
-re_nameIndex = re.compile(r'\(([IVXLCDM]+)\)')
-
-def nameVariations(name, fromPtdf=0):
- """Build name variations useful for searches; if fromPtdf is true,
- the input is assumed to be in the plain text data files format."""
- name1 = name2 = name3 = u''
- if fromPtdf or re_nameIndex.search(name):
- # We've a name with an (imdbIndex)
- namedict = analyze_name(name, canonical=1)
- # name1 is the name in the canonical format.
- name1 = namedict['name']
- # name3 is the canonical name with the imdbIndex.
- if fromPtdf:
- if namedict.has_key('imdbIndex'):
- name3 = name
- else:
- name3 = build_name(namedict, canonical=1)
- else:
- # name1 is the name in the canonical format.
- name1 = canonicalName(name)
- name3 = u''
- # name2 is the name in the normal format, if it differs from name1.
- name2 = normalizeName(name1)
- if name1 == name2: name2 = u''
- _aux_logger.debug('name variations: 1:[%s] 2:[%s] 3:[%s]',
- name1, name2, name3)
- return name1, name2, name3
-
-
-try:
- from cutils import ratcliff as _ratcliff
- def ratcliff(s1, s2, sm):
- """Return the Ratcliff-Obershelp value between the two strings,
- using the C implementation."""
- return _ratcliff(s1.encode('latin_1', 'replace'),
- s2.encode('latin_1', 'replace'))
-except ImportError:
- _aux_logger.warn('Unable to import the cutils.ratcliff function.'
- ' Searching names and titles using the "sql"'
- ' data access system will be slower.')
-
- def ratcliff(s1, s2, sm):
- """Ratcliff-Obershelp similarity."""
- STRING_MAXLENDIFFER = 0.7
- s1len = len(s1)
- s2len = len(s2)
- if s1len < s2len:
- threshold = float(s1len) / s2len
- else:
- threshold = float(s2len) / s1len
- if threshold < STRING_MAXLENDIFFER:
- return 0.0
- sm.set_seq2(s2.lower())
- return sm.ratio()
-
-
-def merge_roles(mop):
- """Merge multiple roles."""
- new_list = []
- for m in mop:
- if m in new_list:
- keep_this = new_list[new_list.index(m)]
- if not isinstance(keep_this.currentRole, list):
- keep_this.currentRole = [keep_this.currentRole]
- keep_this.currentRole.append(m.currentRole)
- else:
- new_list.append(m)
- return new_list
-
-
-def scan_names(name_list, name1, name2, name3, results=0, ro_thresold=None,
- _scan_character=False):
- """Scan a list of names, searching for best matches against
- the given variations."""
- if ro_thresold is not None: RO_THRESHOLD = ro_thresold
- else: RO_THRESHOLD = 0.6
- sm1 = SequenceMatcher()
- sm2 = SequenceMatcher()
- sm3 = SequenceMatcher()
- sm1.set_seq1(name1.lower())
- if name2: sm2.set_seq1(name2.lower())
- if name3: sm3.set_seq1(name3.lower())
- resd = {}
- for i, n_data in name_list:
- nil = n_data['name']
- # XXX: on Symbian, here we get a str; not sure this is the
- # right place to fix it.
- if isinstance(nil, str):
- nil = unicode(nil, 'latin1', 'ignore')
- # Distance with the canonical name.
- ratios = [ratcliff(name1, nil, sm1) + 0.05]
- namesurname = u''
- if not _scan_character:
- nils = nil.split(', ', 1)
- surname = nils[0]
- if len(nils) == 2: namesurname = '%s %s' % (nils[1], surname)
- else:
- nils = nil.split(' ', 1)
- surname = nils[-1]
- namesurname = nil
- if surname != nil:
- # Distance with the "Surname" in the database.
- ratios.append(ratcliff(name1, surname, sm1))
- if not _scan_character:
- ratios.append(ratcliff(name1, namesurname, sm1))
- if name2:
- ratios.append(ratcliff(name2, surname, sm2))
- # Distance with the "Name Surname" in the database.
- if namesurname:
- ratios.append(ratcliff(name2, namesurname, sm2))
- if name3:
- # Distance with the long imdb canonical name.
- ratios.append(ratcliff(name3,
- build_name(n_data, canonical=1), sm3) + 0.1)
- ratio = max(ratios)
- if ratio >= RO_THRESHOLD:
- if resd.has_key(i):
- if ratio > resd[i][0]: resd[i] = (ratio, (i, n_data))
- else: resd[i] = (ratio, (i, n_data))
- res = resd.values()
- res.sort()
- res.reverse()
- if results > 0: res[:] = res[:results]
- return res
-
-
-def scan_titles(titles_list, title1, title2, title3, results=0,
- searchingEpisode=0, onlyEpisodes=0, ro_thresold=None):
- """Scan a list of titles, searching for best matches against
- the given variations."""
- if ro_thresold is not None: RO_THRESHOLD = ro_thresold
- else: RO_THRESHOLD = 0.6
- sm1 = SequenceMatcher()
- sm2 = SequenceMatcher()
- sm3 = SequenceMatcher()
- sm1.set_seq1(title1.lower())
- sm2.set_seq2(title2.lower())
- if title3:
- sm3.set_seq1(title3.lower())
- if title3[-1] == '}': searchingEpisode = 1
- hasArt = 0
- if title2 != title1: hasArt = 1
- resd = {}
- for i, t_data in titles_list:
- if onlyEpisodes:
- if t_data.get('kind') != 'episode':
- continue
- til = t_data['title']
- if til[-1] == ')':
- dateIdx = til.rfind('(')
- if dateIdx != -1:
- til = til[:dateIdx].rstrip()
- if not til:
- continue
- ratio = ratcliff(title1, til, sm1)
- if ratio >= RO_THRESHOLD:
- resd[i] = (ratio, (i, t_data))
- continue
- if searchingEpisode:
- if t_data.get('kind') != 'episode': continue
- elif t_data.get('kind') == 'episode': continue
- til = t_data['title']
- # XXX: on Symbian, here we get a str; not sure this is the
- # right place to fix it.
- if isinstance(til, str):
- til = unicode(til, 'latin1', 'ignore')
- # Distance with the canonical title (with or without article).
- # titleS -> titleR
- # titleS, the -> titleR, the
- if not searchingEpisode:
- til = canonicalTitle(til)
- ratios = [ratcliff(title1, til, sm1) + 0.05]
- # til2 is til without the article, if present.
- til2 = til
- tils = til2.split(', ')
- matchHasArt = 0
- if tils[-1].lower() in _unicodeArticles:
- til2 = ', '.join(tils[:-1])
- matchHasArt = 1
- if hasArt and not matchHasArt:
- # titleS[, the] -> titleR
- ratios.append(ratcliff(title2, til, sm2))
- elif matchHasArt and not hasArt:
- # titleS -> titleR[, the]
- ratios.append(ratcliff(title1, til2, sm1))
- else:
- ratios = [0.0]
- if title3:
- # Distance with the long imdb canonical title.
- ratios.append(ratcliff(title3,
- build_title(t_data, canonical=1, ptdf=1), sm3) + 0.1)
- ratio = max(ratios)
- if ratio >= RO_THRESHOLD:
- if resd.has_key(i):
- if ratio > resd[i][0]:
- resd[i] = (ratio, (i, t_data))
- else: resd[i] = (ratio, (i, t_data))
- res = resd.values()
- res.sort()
- res.reverse()
- if results > 0: res[:] = res[:results]
- return res
-
-
-def scan_company_names(name_list, name1, results=0, ro_thresold=None):
- """Scan a list of company names, searching for best matches against
- the given name. Notice that this function takes a list of
- strings, and not a list of dictionaries."""
- if ro_thresold is not None: RO_THRESHOLD = ro_thresold
- else: RO_THRESHOLD = 0.6
- sm1 = SequenceMatcher()
- sm1.set_seq1(name1.lower())
- resd = {}
- withoutCountry = not name1.endswith(']')
- for i, n in name_list:
- # XXX: on Symbian, here we get a str; not sure this is the
- # right place to fix it.
- if isinstance(n, str):
- n = unicode(n, 'latin1', 'ignore')
- o_name = n
- var = 0.0
- if withoutCountry and n.endswith(']'):
- cidx = n.rfind('[')
- if cidx != -1:
- n = n[:cidx].rstrip()
- var = -0.05
- # Distance with the company name.
- ratio = ratcliff(name1, n, sm1) + var
- if ratio >= RO_THRESHOLD:
- if resd.has_key(i):
- if ratio > resd[i][0]: resd[i] = (ratio,
- (i, analyze_company_name(o_name)))
- else:
- resd[i] = (ratio, (i, analyze_company_name(o_name)))
- res = resd.values()
- res.sort()
- res.reverse()
- if results > 0: res[:] = res[:results]
- return res
-
-
-try:
- from cutils import soundex
-except ImportError:
- _aux_logger.warn('Unable to import the cutils.soundex function.'
- ' Searches of movie titles and person names will be'
- ' a bit slower.')
-
- _translate = dict(B='1', C='2', D='3', F='1', G='2', J='2', K='2', L='4',
- M='5', N='5', P='1', Q='2', R='6', S='2', T='3', V='1',
- X='2', Z='2')
- _translateget = _translate.get
- _re_non_ascii = re.compile(r'^[^a-z]*', re.I)
- SOUNDEX_LEN = 5
-
- def soundex(s):
- """Return the soundex code for the given string."""
- # Maximum length of the soundex code.
- s = _re_non_ascii.sub('', s)
- if not s: return None
- s = s.upper()
- soundCode = s[0]
- for c in s[1:]:
- cw = _translateget(c, '0')
- if cw != '0' and soundCode[-1] != cw:
- soundCode += cw
- return soundCode[:SOUNDEX_LEN] or None
-
-
-def _sortKeywords(keyword, kwds):
- """Sort a list of keywords, based on the searched one."""
- sm = SequenceMatcher()
- sm.set_seq1(keyword.lower())
- ratios = [(ratcliff(keyword, k, sm), k) for k in kwds]
- checkContained = False
- if len(keyword) > 4:
- checkContained = True
- for idx, data in enumerate(ratios):
- ratio, key = data
- if key.startswith(keyword):
- ratios[idx] = (ratio+0.5, key)
- elif checkContained and keyword in key:
- ratios[idx] = (ratio+0.3, key)
- ratios.sort()
- ratios.reverse()
- return [r[1] for r in ratios]
-
-
-def filterSimilarKeywords(keyword, kwdsIterator):
- """Return a sorted list of keywords similar to the one given."""
- seenDict = {}
- kwdSndx = soundex(keyword.encode('ascii', 'ignore'))
- matches = []
- matchesappend = matches.append
- checkContained = False
- if len(keyword) > 4:
- checkContained = True
- for movieID, key in kwdsIterator:
- if key in seenDict:
- continue
- seenDict[key] = None
- if checkContained and keyword in key:
- matchesappend(key)
- continue
- if kwdSndx == soundex(key.encode('ascii', 'ignore')):
- matchesappend(key)
- return _sortKeywords(keyword, matches)
-
-
-
-# =============================
-
-_litlist = ['screenplay/teleplay', 'novel', 'adaption', 'book',
- 'production process protocol', 'interviews',
- 'printed media reviews', 'essays', 'other literature']
-_litd = dict([(x, ('literature', x)) for x in _litlist])
-
-_buslist = ['budget', 'weekend gross', 'gross', 'opening weekend', 'rentals',
- 'admissions', 'filming dates', 'production dates', 'studios',
- 'copyright holder']
-_busd = dict([(x, ('business', x)) for x in _buslist])
-
-
-def _reGroupDict(d, newgr):
- """Regroup keys in the d dictionary in subdictionaries, based on
- the scheme in the newgr dictionary.
- E.g.: in the newgr, an entry 'LD label': ('laserdisc', 'label')
- tells the _reGroupDict() function to take the entry with
- label 'LD label' (as received from the sql database)
- and put it in the subsection (another dictionary) named
- 'laserdisc', using the key 'label'."""
- r = {}
- newgrks = newgr.keys()
- for k, v in d.items():
- if k in newgrks:
- r.setdefault(newgr[k][0], {})[newgr[k][1]] = v
- # A not-so-clearer version:
- ##r.setdefault(newgr[k][0], {})
- ##r[newgr[k][0]][newgr[k][1]] = v
- else: r[k] = v
- return r
-
-
-def _groupListBy(l, index):
- """Regroup items in a list in a list of lists, grouped by
- the value at the given index."""
- tmpd = {}
- for item in l:
- tmpd.setdefault(item[index], []).append(item)
- res = tmpd.values()
- return res
-
-
-def sub_dict(d, keys):
- """Return the subdictionary of 'd', with just the keys listed in 'keys'."""
- return dict([(k, d[k]) for k in keys if k in d])
-
-
-def get_movie_data(movieID, kindDict, fromAka=0, _table=None):
- """Return a dictionary containing data about the given movieID;
- if fromAka is true, the AkaTitle table is searched; _table is
- reserved for the imdbpy2sql.py script."""
- if _table is not None:
- Table = _table
- else:
- if not fromAka: Table = Title
- else: Table = AkaTitle
- m = Table.get(movieID)
- mdict = {'title': m.title, 'kind': kindDict[m.kindID],
- 'year': m.productionYear, 'imdbIndex': m.imdbIndex,
- 'season': m.seasonNr, 'episode': m.episodeNr}
- if not fromAka:
- if m.seriesYears is not None:
- mdict['series years'] = unicode(m.seriesYears)
- if mdict['imdbIndex'] is None: del mdict['imdbIndex']
- if mdict['year'] is None: del mdict['year']
- else:
- try:
- mdict['year'] = int(mdict['year'])
- except (TypeError, ValueError):
- del mdict['year']
- if mdict['season'] is None: del mdict['season']
- else:
- try: mdict['season'] = int(mdict['season'])
- except: pass
- if mdict['episode'] is None: del mdict['episode']
- else:
- try: mdict['episode'] = int(mdict['episode'])
- except: pass
- episodeOfID = m.episodeOfID
- if episodeOfID is not None:
- ser_dict = get_movie_data(episodeOfID, kindDict, fromAka)
- mdict['episode of'] = Movie(data=ser_dict, movieID=episodeOfID,
- accessSystem='sql')
- if fromAka:
- ser_note = AkaTitle.get(episodeOfID).note
- if ser_note:
- mdict['episode of'].notes = ser_note
- return mdict
-
-
-def _iterKeywords(results):
- """Iterate over (key.id, key.keyword) columns of a selection of
- the Keyword table."""
- for key in results:
- yield key.id, key.keyword
-
-
-def getSingleInfo(table, movieID, infoType, notAList=False):
- """Return a dictionary in the form {infoType: infoListOrString},
- retrieving a single set of information about a given movie, from
- the specified table."""
- infoTypeID = InfoType.select(InfoType.q.info == infoType)
- if infoTypeID.count() == 0:
- return {}
- res = table.select(AND(table.q.movieID == movieID,
- table.q.infoTypeID == infoTypeID[0].id))
- retList = []
- for r in res:
- info = r.info
- note = r.note
- if note:
- info += u'::%s' % note
- retList.append(info)
- if not retList:
- return {}
- if not notAList: return {infoType: retList}
- else: return {infoType: retList[0]}
-
-
-def _cmpTop(a, b, what='top 250 rank'):
- """Compare function used to sort top 250/bottom 10 rank."""
- av = int(a[1].get(what))
- bv = int(b[1].get(what))
- if av == bv:
- return 0
- return (-1, 1)[av > bv]
-
-def _cmpBottom(a, b):
- """Compare function used to sort top 250/bottom 10 rank."""
- return _cmpTop(a, b, what='bottom 10 rank')
-
-
-class IMDbSqlAccessSystem(IMDbBase):
- """The class used to access IMDb's data through a SQL database."""
-
- accessSystem = 'sql'
- _sql_logger = logging.getLogger('imdbpy.parser.sql')
-
- def __init__(self, uri, adultSearch=1, useORM=None, *arguments, **keywords):
- """Initialize the access system."""
- IMDbBase.__init__(self, *arguments, **keywords)
- if useORM is None:
- useORM = ('sqlobject', 'sqlalchemy')
- if not isinstance(useORM, (tuple, list)):
- if ',' in useORM:
- useORM = useORM.split(',')
- else:
- useORM = [useORM]
- self.useORM = useORM
- nrMods = len(useORM)
- _gotError = False
- DB_TABLES = []
- for idx, mod in enumerate(useORM):
- mod = mod.strip().lower()
- try:
- if mod == 'sqlalchemy':
- from alchemyadapter import getDBTables, NotFoundError, \
- setConnection, AND, OR, IN, \
- ISNULL, CONTAINSSTRING, toUTF8
- elif mod == 'sqlobject':
- from objectadapter import getDBTables, NotFoundError, \
- setConnection, AND, OR, IN, \
- ISNULL, CONTAINSSTRING, toUTF8
- else:
- self._sql_logger.warn('unknown module "%s"' % mod)
- continue
- self._sql_logger.info('using %s ORM', mod)
- # XXX: look ma'... black magic! It's used to make
- # TableClasses and some functions accessible
- # through the whole module.
- for k, v in [('NotFoundError', NotFoundError),
- ('AND', AND), ('OR', OR), ('IN', IN),
- ('ISNULL', ISNULL),
- ('CONTAINSSTRING', CONTAINSSTRING)]:
- globals()[k] = v
- self.toUTF8 = toUTF8
- DB_TABLES = getDBTables(uri)
- for t in DB_TABLES:
- globals()[t._imdbpyName] = t
- if _gotError:
- self._sql_logger.warn('falling back to "%s"' % mod)
- break
- except ImportError, e:
- if idx+1 >= nrMods:
- raise IMDbError, 'unable to use any ORM in %s: %s' % (
- str(useORM), str(e))
- else:
- self._sql_logger.warn('unable to use "%s": %s' % (mod,
- str(e)))
- _gotError = True
- continue
- else:
- raise IMDbError, 'unable to use any ORM in %s' % str(useORM)
- # Set the connection to the database.
- self._sql_logger.debug('connecting to %s', uri)
- try:
- self._connection = setConnection(uri, DB_TABLES)
- except AssertionError, e:
- raise IMDbDataAccessError, \
- 'unable to connect to the database server; ' + \
- 'complete message: "%s"' % str(e)
- self.Error = self._connection.module.Error
- # Maps some IDs to the corresponding strings.
- self._kind = {}
- self._kindRev = {}
- self._sql_logger.debug('reading constants from the database')
- try:
- for kt in KindType.select():
- self._kind[kt.id] = kt.kind
- self._kindRev[str(kt.kind)] = kt.id
- except self.Error:
- # NOTE: you can also get the error, but - at least with
- # MySQL - it also contains the password, and I don't
- # like the idea to print it out.
- raise IMDbDataAccessError, \
- 'unable to connect to the database server'
- self._role = {}
- for rl in RoleType.select():
- self._role[rl.id] = str(rl.role)
- self._info = {}
- self._infoRev = {}
- for inf in InfoType.select():
- self._info[inf.id] = str(inf.info)
- self._infoRev[str(inf.info)] = inf.id
- self._compType = {}
- for cType in CompanyType.select():
- self._compType[cType.id] = cType.kind
- info = [(it.id, it.info) for it in InfoType.select()]
- self._compcast = {}
- for cc in CompCastType.select():
- self._compcast[cc.id] = str(cc.kind)
- self._link = {}
- for lt in LinkType.select():
- self._link[lt.id] = str(lt.link)
- self._moviesubs = {}
- # Build self._moviesubs, a dictionary used to rearrange
- # the data structure for a movie object.
- for vid, vinfo in info:
- if not vinfo.startswith('LD '): continue
- self._moviesubs[vinfo] = ('laserdisc', vinfo[3:])
- self._moviesubs.update(_litd)
- self._moviesubs.update(_busd)
- self.do_adult_search(adultSearch)
-
- def _findRefs(self, o, trefs, nrefs):
- """Find titles or names references in strings."""
- if isinstance(o, (unicode, str)):
- for title in re_titleRef.findall(o):
- a_title = analyze_title(title, canonical=0)
- rtitle = build_title(a_title, ptdf=1)
- if trefs.has_key(rtitle): continue
- movieID = self._getTitleID(rtitle)
- if movieID is None:
- movieID = self._getTitleID(title)
- if movieID is None:
- continue
- m = Movie(title=rtitle, movieID=movieID,
- accessSystem=self.accessSystem)
- trefs[rtitle] = m
- rtitle2 = canonicalTitle(a_title.get('title', u''))
- if rtitle2 and rtitle2 != rtitle and rtitle2 != title:
- trefs[rtitle2] = m
- if title != rtitle:
- trefs[title] = m
- for name in re_nameRef.findall(o):
- a_name = analyze_name(name, canonical=1)
- rname = build_name(a_name, canonical=1)
- if nrefs.has_key(rname): continue
- personID = self._getNameID(rname)
- if personID is None:
- personID = self._getNameID(name)
- if personID is None: continue
- p = Person(name=rname, personID=personID,
- accessSystem=self.accessSystem)
- nrefs[rname] = p
- rname2 = normalizeName(a_name.get('name', u''))
- if rname2 and rname2 != rname:
- nrefs[rname2] = p
- if name != rname and name != rname2:
- nrefs[name] = p
- elif isinstance(o, (list, tuple)):
- for item in o:
- self._findRefs(item, trefs, nrefs)
- elif isinstance(o, dict):
- for value in o.values():
- self._findRefs(value, trefs, nrefs)
- return (trefs, nrefs)
-
- def _extractRefs(self, o):
- """Scan for titles or names references in strings."""
- trefs = {}
- nrefs = {}
- try:
- return self._findRefs(o, trefs, nrefs)
- except RuntimeError, e:
- # Symbian/python 2.2 has a poor regexp implementation.
- import warnings
- warnings.warn('RuntimeError in '
- "imdb.parser.sql.IMDbSqlAccessSystem; "
- "if it's not a recursion limit exceeded and we're not "
- "running in a Symbian environment, it's a bug:\n%s" % e)
- return (trefs, nrefs)
-
- def _changeAKAencoding(self, akanotes, akatitle):
- """Return akatitle in the correct charset, as specified in
- the akanotes field; if akatitle doesn't need to be modified,
- return None."""
- oti = akanotes.find('(original ')
- if oti == -1: return None
- ote = akanotes[oti+10:].find(' title)')
- if ote != -1:
- cs_info = akanotes[oti+10:oti+10+ote].lower().split()
- for e in cs_info:
- # excludes some strings that clearly are not encoding.
- if e in ('script', '', 'cyrillic', 'greek'): continue
- if e.startswith('iso-') and e.find('latin') != -1:
- e = e[4:].replace('-', '')
- try:
- lookup(e)
- lat1 = akatitle.encode('latin_1', 'replace')
- return unicode(lat1, e, 'replace')
- except (LookupError, ValueError, TypeError):
- continue
- return None
-
- def _buildNULLCondition(self, col, val):
- """Build a comparison for columns where values can be NULL."""
- if val is None:
- return ISNULL(col)
- else:
- if isinstance(val, (int, long)):
- return col == val
- else:
- return col == self.toUTF8(val)
-
- def _getTitleID(self, title):
- """Given a long imdb canonical title, returns a movieID or
- None if not found."""
- td = analyze_title(title)
- condition = None
- if td['kind'] == 'episode':
- epof = td['episode of']
- seriesID = [s.id for s in Title.select(
- AND(Title.q.title == self.toUTF8(epof['title']),
- self._buildNULLCondition(Title.q.imdbIndex,
- epof.get('imdbIndex')),
- Title.q.kindID == self._kindRev[epof['kind']],
- self._buildNULLCondition(Title.q.productionYear,
- epof.get('year'))))]
- if seriesID:
- condition = AND(IN(Title.q.episodeOfID, seriesID),
- Title.q.title == self.toUTF8(td['title']),
- self._buildNULLCondition(Title.q.imdbIndex,
- td.get('imdbIndex')),
- Title.q.kindID == self._kindRev[td['kind']],
- self._buildNULLCondition(Title.q.productionYear,
- td.get('year')))
- if condition is None:
- condition = AND(Title.q.title == self.toUTF8(td['title']),
- self._buildNULLCondition(Title.q.imdbIndex,
- td.get('imdbIndex')),
- Title.q.kindID == self._kindRev[td['kind']],
- self._buildNULLCondition(Title.q.productionYear,
- td.get('year')))
- res = Title.select(condition)
- try:
- if res.count() != 1:
- return None
- except (UnicodeDecodeError, TypeError):
- return None
- return res[0].id
-
- def _getNameID(self, name):
- """Given a long imdb canonical name, returns a personID or
- None if not found."""
- nd = analyze_name(name)
- res = Name.select(AND(Name.q.name == self.toUTF8(nd['name']),
- self._buildNULLCondition(Name.q.imdbIndex,
- nd.get('imdbIndex'))))
- try:
- c = res.count()
- if res.count() != 1:
- return None
- except (UnicodeDecodeError, TypeError):
- return None
- return res[0].id
-
- def _normalize_movieID(self, movieID):
- """Normalize the given movieID."""
- try:
- return int(movieID)
- except (ValueError, OverflowError):
- raise IMDbError, 'movieID "%s" can\'t be converted to integer' % \
- movieID
-
- def _normalize_personID(self, personID):
- """Normalize the given personID."""
- try:
- return int(personID)
- except (ValueError, OverflowError):
- raise IMDbError, 'personID "%s" can\'t be converted to integer' % \
- personID
-
- def _normalize_characterID(self, characterID):
- """Normalize the given characterID."""
- try:
- return int(characterID)
- except (ValueError, OverflowError):
- raise IMDbError, 'characterID "%s" can\'t be converted to integer' \
- % characterID
-
- def _normalize_companyID(self, companyID):
- """Normalize the given companyID."""
- try:
- return int(companyID)
- except (ValueError, OverflowError):
- raise IMDbError, 'companyID "%s" can\'t be converted to integer' \
- % companyID
-
- def get_imdbMovieID(self, movieID):
- """Translate a movieID in an imdbID.
- If not in the database, try an Exact Primary Title search on IMDb;
- return None if it's unable to get the imdbID.
- """
- try: movie = Title.get(movieID)
- except NotFoundError: return None
- imdbID = movie.imdbID
- if imdbID is not None: return '%07d' % imdbID
- m_dict = get_movie_data(movie.id, self._kind)
- titline = build_title(m_dict, ptdf=1)
- imdbID = self.title2imdbID(titline)
- # If the imdbID was retrieved from the web and was not in the
- # database, update the database (ignoring errors, because it's
- # possibile that the current user has not update privileges).
- # There're times when I think I'm a genius; this one of
- # those times...
- if imdbID is not None:
- try: movie.imdbID = int(imdbID)
- except: pass
- return imdbID
-
- def get_imdbPersonID(self, personID):
- """Translate a personID in an imdbID.
- If not in the database, try an Exact Primary Name search on IMDb;
- return None if it's unable to get the imdbID.
- """
- try: person = Name.get(personID)
- except NotFoundError: return None
- imdbID = person.imdbID
- if imdbID is not None: return '%07d' % imdbID
- n_dict = {'name': person.name, 'imdbIndex': person.imdbIndex}
- namline = build_name(n_dict, canonical=1)
- imdbID = self.name2imdbID(namline)
- if imdbID is not None:
- try: person.imdbID = int(imdbID)
- except: pass
- return imdbID
-
- def get_imdbCharacterID(self, characterID):
- """Translate a characterID in an imdbID.
- If not in the database, try an Exact Primary Name search on IMDb;
- return None if it's unable to get the imdbID.
- """
- try: character = CharName.get(characterID)
- except NotFoundError: return None
- imdbID = character.imdbID
- if imdbID is not None: return '%07d' % imdbID
- n_dict = {'name': character.name, 'imdbIndex': character.imdbIndex}
- namline = build_name(n_dict, canonical=1)
- imdbID = self.character2imdbID(namline)
- if imdbID is not None:
- try: character.imdbID = int(imdbID)
- except: pass
- return imdbID
-
- def get_imdbCompanyID(self, companyID):
- """Translate a companyID in an imdbID.
- If not in the database, try an Exact Primary Name search on IMDb;
- return None if it's unable to get the imdbID.
- """
- try: company = CompanyName.get(companyID)
- except NotFoundError: return None
- imdbID = company.imdbID
- if imdbID is not None: return '%07d' % imdbID
- n_dict = {'name': company.name, 'country': company.countryCode}
- namline = build_company_name(n_dict)
- imdbID = self.company2imdbID(namline)
- if imdbID is not None:
- try: company.imdbID = int(imdbID)
- except: pass
- return imdbID
-
- def do_adult_search(self, doAdult):
- """If set to 0 or False, movies in the Adult category are not
- episodeOf = title_dict.get('episode of')
- shown in the results of a search."""
- self.doAdult = doAdult
-
- def _search_movie(self, title, results, _episodes=False):
- title = title.strip()
- if not title: return []
- title_dict = analyze_title(title, canonical=1)
- s_title = title_dict['title']
- if not s_title: return []
- episodeOf = title_dict.get('episode of')
- if episodeOf:
- _episodes = False
- s_title_split = s_title.split(', ')
- if len(s_title_split) > 1 and \
- s_title_split[-1].lower() in _unicodeArticles:
- s_title_rebuilt = ', '.join(s_title_split[:-1])
- if s_title_rebuilt:
- s_title = s_title_rebuilt
- #if not episodeOf:
- # if not _episodes:
- # s_title_split = s_title.split(', ')
- # if len(s_title_split) > 1 and \
- # s_title_split[-1].lower() in _articles:
- # s_title_rebuilt = ', '.join(s_title_split[:-1])
- # if s_title_rebuilt:
- # s_title = s_title_rebuilt
- #else:
- # _episodes = False
- if isinstance(s_title, unicode):
- s_title = s_title.encode('ascii', 'ignore')
-
- soundexCode = soundex(s_title)
-
- # XXX: improve the search restricting the kindID if the
- # "kind" of the input differs from "movie"?
- condition = conditionAka = None
- if _episodes:
- condition = AND(Title.q.phoneticCode == soundexCode,
- Title.q.kindID == self._kindRev['episode'])
- conditionAka = AND(AkaTitle.q.phoneticCode == soundexCode,
- AkaTitle.q.kindID == self._kindRev['episode'])
- elif title_dict['kind'] == 'episode' and episodeOf is not None:
- # set canonical=0 ? Should not make much difference.
- series_title = build_title(episodeOf, canonical=1)
- # XXX: is it safe to get "results" results?
- # Too many? Too few?
- serRes = results
- if serRes < 3 or serRes > 10:
- serRes = 10
- searchSeries = self._search_movie(series_title, serRes)
- seriesIDs = [result[0] for result in searchSeries]
- if seriesIDs:
- condition = AND(Title.q.phoneticCode == soundexCode,
- IN(Title.q.episodeOfID, seriesIDs),
- Title.q.kindID == self._kindRev['episode'])
- conditionAka = AND(AkaTitle.q.phoneticCode == soundexCode,
- IN(AkaTitle.q.episodeOfID, seriesIDs),
- AkaTitle.q.kindID == self._kindRev['episode'])
- else:
- # XXX: bad situation: we have found no matching series;
- # try searching everything (both episodes and
- # non-episodes) for the title.
- condition = AND(Title.q.phoneticCode == soundexCode,
- IN(Title.q.episodeOfID, seriesIDs))
- conditionAka = AND(AkaTitle.q.phoneticCode == soundexCode,
- IN(AkaTitle.q.episodeOfID, seriesIDs))
- if condition is None:
- # XXX: excludes episodes?
- condition = AND(Title.q.kindID != self._kindRev['episode'],
- Title.q.phoneticCode == soundexCode)
- conditionAka = AND(AkaTitle.q.kindID != self._kindRev['episode'],
- AkaTitle.q.phoneticCode == soundexCode)
-
- # Up to 3 variations of the title are searched, plus the
- # long imdb canonical title, if provided.
- if not _episodes:
- title1, title2, title3 = titleVariations(title)
- else:
- title1 = title
- title2 = ''
- title3 = ''
- try:
- qr = [(q.id, get_movie_data(q.id, self._kind))
- for q in Title.select(condition)]
- q2 = [(q.movieID, get_movie_data(q.id, self._kind, fromAka=1))
- for q in AkaTitle.select(conditionAka)]
- qr += q2
- except NotFoundError, e:
- raise IMDbDataAccessError, \
- 'unable to search the database: "%s"' % str(e)
-
- resultsST = results * 3
- res = scan_titles(qr, title1, title2, title3, resultsST,
- searchingEpisode=episodeOf is not None,
- onlyEpisodes=_episodes,
- ro_thresold=0.0)
- res[:] = [x[1] for x in res]
-
- if res and not self.doAdult:
- mids = [x[0] for x in res]
- genreID = self._infoRev['genres']
- adultlist = [al.movieID for al
- in MovieInfo.select(
- AND(MovieInfo.q.infoTypeID == genreID,
- MovieInfo.q.info == 'Adult',
- IN(MovieInfo.q.movieID, mids)))]
- res[:] = [x for x in res if x[0] not in adultlist]
-
- new_res = []
- # XXX: can there be duplicates?
- for r in res:
- if r not in q2:
- new_res.append(r)
- continue
- mdict = r[1]
- aka_title = build_title(mdict, ptdf=1)
- orig_dict = get_movie_data(r[0], self._kind)
- orig_title = build_title(orig_dict, ptdf=1)
- if aka_title == orig_title:
- new_res.append(r)
- continue
- orig_dict['akas'] = [aka_title]
- new_res.append((r[0], orig_dict))
- if results > 0: new_res[:] = new_res[:results]
- return new_res
-
- def _search_episode(self, title, results):
- return self._search_movie(title, results, _episodes=True)
-
- def get_movie_main(self, movieID):
- # Every movie information is retrieved from here.
- infosets = self.get_movie_infoset()
- try:
- res = get_movie_data(movieID, self._kind)
- except NotFoundError, e:
- raise IMDbDataAccessError, \
- 'unable to get movieID "%s": "%s"' % (movieID, str(e))
- if not res:
- raise IMDbDataAccessError, 'unable to get movieID "%s"' % movieID
- # Collect cast information.
- castdata = [[cd.personID, cd.personRoleID, cd.note, cd.nrOrder,
- self._role[cd.roleID]]
- for cd in CastInfo.select(CastInfo.q.movieID == movieID)]
- for p in castdata:
- person = Name.get(p[0])
- p += [person.name, person.imdbIndex]
- if p[4] in ('actor', 'actress'):
- p[4] = 'cast'
- # Regroup by role/duty (cast, writer, director, ...)
- castdata[:] = _groupListBy(castdata, 4)
- for group in castdata:
- duty = group[0][4]
- for pdata in group:
- curRole = pdata[1]
- curRoleID = None
- if curRole is not None:
- robj = CharName.get(curRole)
- curRole = robj.name
- curRoleID = robj.id
- p = Person(personID=pdata[0], name=pdata[5],
- currentRole=curRole or u'',
- roleID=curRoleID,
- notes=pdata[2] or u'',
- accessSystem='sql')
- if pdata[6]: p['imdbIndex'] = pdata[6]
- p.billingPos = pdata[3]
- res.setdefault(duty, []).append(p)
- if duty == 'cast':
- res[duty] = merge_roles(res[duty])
- res[duty].sort()
- # Info about the movie.
- minfo = [(self._info[m.infoTypeID], m.info, m.note)
- for m in MovieInfo.select(MovieInfo.q.movieID == movieID)]
- minfo += [(self._info[m.infoTypeID], m.info, m.note)
- for m in MovieInfoIdx.select(MovieInfoIdx.q.movieID == movieID)]
- minfo += [('keywords', Keyword.get(m.keywordID).keyword, None)
- for m in MovieKeyword.select(MovieKeyword.q.movieID == movieID)]
- minfo = _groupListBy(minfo, 0)
- for group in minfo:
- sect = group[0][0]
- for mdata in group:
- data = mdata[1]
- if mdata[2]: data += '::%s' % mdata[2]
- res.setdefault(sect, []).append(data)
- # Companies info about a movie.
- cinfo = [(self._compType[m.companyTypeID], m.companyID, m.note) for m
- in MovieCompanies.select(MovieCompanies.q.movieID == movieID)]
- cinfo = _groupListBy(cinfo, 0)
- for group in cinfo:
- sect = group[0][0]
- for mdata in group:
- cDb = CompanyName.get(mdata[1])
- cDbTxt = cDb.name
- if cDb.countryCode:
- cDbTxt += ' %s' % cDb.countryCode
- company = Company(name=cDbTxt,
- companyID=mdata[1],
- notes=mdata[2] or u'',
- accessSystem=self.accessSystem)
- res.setdefault(sect, []).append(company)
- # AKA titles.
- akat = [(get_movie_data(at.id, self._kind, fromAka=1), at.note)
- for at in AkaTitle.select(AkaTitle.q.movieID == movieID)]
- if akat:
- res['akas'] = []
- for td, note in akat:
- nt = build_title(td, ptdf=1)
- if note:
- net = self._changeAKAencoding(note, nt)
- if net is not None: nt = net
- nt += '::%s' % note
- if nt not in res['akas']: res['akas'].append(nt)
- # Complete cast/crew.
- compcast = [(self._compcast[cc.subjectID], self._compcast[cc.statusID])
- for cc in CompleteCast.select(CompleteCast.q.movieID == movieID)]
- if compcast:
- for entry in compcast:
- val = unicode(entry[1])
- res[u'complete %s' % entry[0]] = val
- # Movie connections.
- mlinks = [[ml.linkedMovieID, self._link[ml.linkTypeID]]
- for ml in MovieLink.select(MovieLink.q.movieID == movieID)]
- if mlinks:
- for ml in mlinks:
- lmovieData = get_movie_data(ml[0], self._kind)
- m = Movie(movieID=ml[0], data=lmovieData, accessSystem='sql')
- ml[0] = m
- res['connections'] = {}
- mlinks[:] = _groupListBy(mlinks, 1)
- for group in mlinks:
- lt = group[0][1]
- res['connections'][lt] = [i[0] for i in group]
- # Episodes.
- episodes = {}
- eps_list = list(Title.select(Title.q.episodeOfID == movieID))
- eps_list.sort()
- if eps_list:
- ps_data = {'title': res['title'], 'kind': res['kind'],
- 'year': res.get('year'),
- 'imdbIndex': res.get('imdbIndex')}
- parentSeries = Movie(movieID=movieID, data=ps_data,
- accessSystem='sql')
- for episode in eps_list:
- episodeID = episode.id
- episode_data = get_movie_data(episodeID, self._kind)
- m = Movie(movieID=episodeID, data=episode_data,
- accessSystem='sql')
- m['episode of'] = parentSeries
- season = episode_data.get('season', 'UNKNOWN')
- if season not in episodes: episodes[season] = {}
- ep_number = episode_data.get('episode')
- if ep_number is None:
- ep_number = max((episodes[season].keys() or [0])) + 1
- episodes[season][ep_number] = m
- res['episodes'] = episodes
- res['number of episodes'] = sum([len(x) for x in episodes.values()])
- res['number of seasons'] = len(episodes.keys())
- # Regroup laserdisc information.
- res = _reGroupDict(res, self._moviesubs)
- # Do some transformation to preserve consistency with other
- # data access systems.
- if 'quotes' in res:
- for idx, quote in enumerate(res['quotes']):
- res['quotes'][idx] = quote.split('::')
- if 'runtimes' in res and len(res['runtimes']) > 0:
- rt = res['runtimes'][0]
- episodes = re_episodes.findall(rt)
- if episodes:
- res['runtimes'][0] = re_episodes.sub('', rt)
- if res['runtimes'][0][-2:] == '::':
- res['runtimes'][0] = res['runtimes'][0][:-2]
- if 'votes' in res:
- res['votes'] = int(res['votes'][0])
- if 'rating' in res:
- res['rating'] = float(res['rating'][0])
- if 'votes distribution' in res:
- res['votes distribution'] = res['votes distribution'][0]
- if 'mpaa' in res:
- res['mpaa'] = res['mpaa'][0]
- if 'top 250 rank' in res:
- try: res['top 250 rank'] = int(res['top 250 rank'])
- except: pass
- if 'bottom 10 rank' in res:
- try: res['bottom 100 rank'] = int(res['bottom 10 rank'])
- except: pass
- del res['bottom 10 rank']
- for old, new in [('guest', 'guests'), ('trademarks', 'trade-mark'),
- ('articles', 'article'), ('pictorials', 'pictorial'),
- ('magazine-covers', 'magazine-cover-photo')]:
- if old in res:
- res[new] = res[old]
- del res[old]
- trefs,nrefs = {}, {}
- trefs,nrefs = self._extractRefs(sub_dict(res,Movie.keys_tomodify_list))
- return {'data': res, 'titlesRefs': trefs, 'namesRefs': nrefs,
- 'info sets': infosets}
-
- # Just to know what kind of information are available.
- get_movie_alternate_versions = get_movie_main
- get_movie_business = get_movie_main
- get_movie_connections = get_movie_main
- get_movie_crazy_credits = get_movie_main
- get_movie_goofs = get_movie_main
- get_movie_keywords = get_movie_main
- get_movie_literature = get_movie_main
- get_movie_locations = get_movie_main
- get_movie_plot = get_movie_main
- get_movie_quotes = get_movie_main
- get_movie_release_dates = get_movie_main
- get_movie_soundtrack = get_movie_main
- get_movie_taglines = get_movie_main
- get_movie_technical = get_movie_main
- get_movie_trivia = get_movie_main
- get_movie_vote_details = get_movie_main
- get_movie_episodes = get_movie_main
-
- def _search_person(self, name, results):
- name = name.strip()
- if not name: return []
- s_name = analyze_name(name)['name']
- if not s_name: return []
- if isinstance(s_name, unicode):
- s_name = s_name.encode('ascii', 'ignore')
- soundexCode = soundex(s_name)
- name1, name2, name3 = nameVariations(name)
-
- # If the soundex is None, compare only with the first
- # phoneticCode column.
- if soundexCode is not None:
- condition = IN(soundexCode, [Name.q.namePcodeCf,
- Name.q.namePcodeNf,
- Name.q.surnamePcode])
- conditionAka = IN(soundexCode, [AkaName.q.namePcodeCf,
- AkaName.q.namePcodeNf,
- AkaName.q.surnamePcode])
- else:
- condition = ISNULL(Name.q.namePcodeCf)
- conditionAka = ISNULL(AkaName.q.namePcodeCf)
-
- try:
- qr = [(q.id, {'name': q.name, 'imdbIndex': q.imdbIndex})
- for q in Name.select(condition)]
-
- q2 = [(q.personID, {'name': q.name, 'imdbIndex': q.imdbIndex})
- for q in AkaName.select(conditionAka)]
- qr += q2
- except NotFoundError, e:
- raise IMDbDataAccessError, \
- 'unable to search the database: "%s"' % str(e)
-
- res = scan_names(qr, name1, name2, name3, results)
- res[:] = [x[1] for x in res]
- # Purge empty imdbIndex.
- returnl = []
- for x in res:
- tmpd = x[1]
- if tmpd['imdbIndex'] is None:
- del tmpd['imdbIndex']
- returnl.append((x[0], tmpd))
-
- new_res = []
- # XXX: can there be duplicates?
- for r in returnl:
- if r not in q2:
- new_res.append(r)
- continue
- pdict = r[1]
- aka_name = build_name(pdict, canonical=1)
- p = Name.get(r[0])
- orig_dict = {'name': p.name, 'imdbIndex': p.imdbIndex}
- if orig_dict['imdbIndex'] is None:
- del orig_dict['imdbIndex']
- orig_name = build_name(orig_dict, canonical=1)
- if aka_name == orig_name:
- new_res.append(r)
- continue
- orig_dict['akas'] = [aka_name]
- new_res.append((r[0], orig_dict))
- if results > 0: new_res[:] = new_res[:results]
-
- return new_res
-
- def get_person_main(self, personID):
- # Every person information is retrieved from here.
- infosets = self.get_person_infoset()
- try:
- p = Name.get(personID)
- except NotFoundError, e:
- raise IMDbDataAccessError, \
- 'unable to get personID "%s": "%s"' % (personID, str(e))
- res = {'name': p.name, 'imdbIndex': p.imdbIndex}
- if res['imdbIndex'] is None: del res['imdbIndex']
- if not res:
- raise IMDbDataAccessError, 'unable to get personID "%s"' % personID
- # Collect cast information.
- castdata = [(cd.movieID, cd.personRoleID, cd.note,
- self._role[cd.roleID],
- get_movie_data(cd.movieID, self._kind))
- for cd in CastInfo.select(CastInfo.q.personID == personID)]
- # Regroup by role/duty (cast, writer, director, ...)
- castdata[:] = _groupListBy(castdata, 3)
- episodes = {}
- seenDuties = []
- for group in castdata:
- for mdata in group:
- duty = orig_duty = group[0][3]
- if duty not in seenDuties: seenDuties.append(orig_duty)
- note = mdata[2] or u''
- if 'episode of' in mdata[4]:
- duty = 'episodes'
- if orig_duty not in ('actor', 'actress'):
- if note: note = ' %s' % note
- note = '[%s]%s' % (orig_duty, note)
- curRole = mdata[1]
- curRoleID = None
- if curRole is not None:
- robj = CharName.get(curRole)
- curRole = robj.name
- curRoleID = robj.id
- m = Movie(movieID=mdata[0], data=mdata[4],
- currentRole=curRole or u'',
- roleID=curRoleID,
- notes=note, accessSystem='sql')
- if duty != 'episodes':
- res.setdefault(duty, []).append(m)
- else:
- episodes.setdefault(m['episode of'], []).append(m)
- if episodes:
- for k in episodes:
- episodes[k].sort()
- episodes[k].reverse()
- res['episodes'] = episodes
- for duty in seenDuties:
- if duty in res:
- if duty in ('actor', 'actress', 'himself', 'herself',
- 'themselves'):
- res[duty] = merge_roles(res[duty])
- res[duty].sort()
- # Info about the person.
- pinfo = [(self._info[pi.infoTypeID], pi.info, pi.note)
- for pi in PersonInfo.select(PersonInfo.q.personID == personID)]
- # Regroup by duty.
- pinfo = _groupListBy(pinfo, 0)
- for group in pinfo:
- sect = group[0][0]
- for pdata in group:
- data = pdata[1]
- if pdata[2]: data += '::%s' % pdata[2]
- res.setdefault(sect, []).append(data)
- # AKA names.
- akan = [(an.name, an.imdbIndex)
- for an in AkaName.select(AkaName.q.personID == personID)]
- if akan:
- res['akas'] = []
- for n in akan:
- nd = {'name': n[0]}
- if n[1]: nd['imdbIndex'] = n[1]
- nt = build_name(nd, canonical=1)
- res['akas'].append(nt)
- # Do some transformation to preserve consistency with other
- # data access systems.
- for key in ('birth date', 'birth notes', 'death date', 'death notes',
- 'birth name', 'height'):
- if key in res:
- res[key] = res[key][0]
- if 'guest' in res:
- res['notable tv guest appearances'] = res['guest']
- del res['guest']
- miscnames = res.get('nick names', [])
- if 'birth name' in res: miscnames.append(res['birth name'])
- if 'akas' in res:
- for mname in miscnames:
- if mname in res['akas']: res['akas'].remove(mname)
- if not res['akas']: del res['akas']
- trefs,nrefs = self._extractRefs(sub_dict(res,Person.keys_tomodify_list))
- return {'data': res, 'titlesRefs': trefs, 'namesRefs': nrefs,
- 'info sets': infosets}
-
- # Just to know what kind of information are available.
- get_person_filmography = get_person_main
- get_person_biography = get_person_main
- get_person_other_works = get_person_main
- get_person_episodes = get_person_main
-
- def _search_character(self, name, results):
- name = name.strip()
- if not name: return []
- s_name = analyze_name(name)['name']
- if not s_name: return []
- if isinstance(s_name, unicode):
- s_name = s_name.encode('ascii', 'ignore')
- s_name = normalizeName(s_name)
- soundexCode = soundex(s_name)
- surname = s_name.split(' ')[-1]
- surnameSoundex = soundex(surname)
- name2 = ''
- soundexName2 = None
- nsplit = s_name.split()
- if len(nsplit) > 1:
- name2 = '%s %s' % (nsplit[-1], ' '.join(nsplit[:-1]))
- if s_name == name2:
- name2 = ''
- else:
- soundexName2 = soundex(name2)
- # If the soundex is None, compare only with the first
- # phoneticCode column.
- if soundexCode is not None:
- if soundexName2 is not None:
- condition = OR(surnameSoundex == CharName.q.surnamePcode,
- IN(CharName.q.namePcodeNf, [soundexCode,
- soundexName2]),
- IN(CharName.q.surnamePcode, [soundexCode,
- soundexName2]))
- else:
- condition = OR(surnameSoundex == CharName.q.surnamePcode,
- IN(soundexCode, [CharName.q.namePcodeNf,
- CharName.q.surnamePcode]))
- else:
- condition = ISNULL(Name.q.namePcodeNf)
- try:
- qr = [(q.id, {'name': q.name, 'imdbIndex': q.imdbIndex})
- for q in CharName.select(condition)]
- except NotFoundError, e:
- raise IMDbDataAccessError, \
- 'unable to search the database: "%s"' % str(e)
- res = scan_names(qr, s_name, name2, '', results,
- _scan_character=True)
- res[:] = [x[1] for x in res]
- # Purge empty imdbIndex.
- returnl = []
- for x in res:
- tmpd = x[1]
- if tmpd['imdbIndex'] is None:
- del tmpd['imdbIndex']
- returnl.append((x[0], tmpd))
- return returnl
-
- def get_character_main(self, characterID, results=1000):
- # Every character information is retrieved from here.
- infosets = self.get_character_infoset()
- try:
- c = CharName.get(characterID)
- except NotFoundError, e:
- raise IMDbDataAccessError, \
- 'unable to get characterID "%s": "%s"' % (characterID, e)
- res = {'name': c.name, 'imdbIndex': c.imdbIndex}
- if res['imdbIndex'] is None: del res['imdbIndex']
- if not res:
- raise IMDbDataAccessError, 'unable to get characterID "%s"' % \
- characterID
- # Collect filmography information.
- items = CastInfo.select(CastInfo.q.personRoleID == characterID)
- if results > 0:
- items = items[:results]
- filmodata = [(cd.movieID, cd.personID, cd.note,
- get_movie_data(cd.movieID, self._kind)) for cd in items
- if self._role[cd.roleID] in ('actor', 'actress')]
- fdata = []
- for f in filmodata:
- curRole = None
- curRoleID = f[1]
- note = f[2] or u''
- if curRoleID is not None:
- robj = Name.get(curRoleID)
- curRole = robj.name
- m = Movie(movieID=f[0], data=f[3],
- currentRole=curRole or u'',
- roleID=curRoleID, roleIsPerson=True,
- notes=note, accessSystem='sql')
- fdata.append(m)
- fdata = merge_roles(fdata)
- fdata.sort()
- if fdata:
- res['filmography'] = fdata
- return {'data': res, 'info sets': infosets}
-
- get_character_filmography = get_character_main
- get_character_biography = get_character_main
-
- def _search_company(self, name, results):
- name = name.strip()
- if not name: return []
- if isinstance(name, unicode):
- name = name.encode('ascii', 'ignore')
- soundexCode = soundex(name)
- # If the soundex is None, compare only with the first
- # phoneticCode column.
- if soundexCode is None:
- condition = ISNULL(CompanyName.q.namePcodeNf)
- else:
- if name.endswith(']'):
- condition = CompanyName.q.namePcodeSf == soundexCode
- else:
- condition = CompanyName.q.namePcodeNf == soundexCode
- try:
- qr = [(q.id, {'name': q.name, 'country': q.countryCode})
- for q in CompanyName.select(condition)]
- except NotFoundError, e:
- raise IMDbDataAccessError, \
- 'unable to search the database: "%s"' % str(e)
- qr[:] = [(x[0], build_company_name(x[1])) for x in qr]
- res = scan_company_names(qr, name, results)
- res[:] = [x[1] for x in res]
- # Purge empty country keys.
- returnl = []
- for x in res:
- tmpd = x[1]
- country = tmpd.get('country')
- if country is None and 'country' in tmpd:
- del tmpd['country']
- returnl.append((x[0], tmpd))
- return returnl
-
- def get_company_main(self, companyID, results=0):
- # Every company information is retrieved from here.
- infosets = self.get_company_infoset()
- try:
- c = CompanyName.get(companyID)
- except NotFoundError, e:
- raise IMDbDataAccessError, \
- 'unable to get companyID "%s": "%s"' % (companyID, e)
- res = {'name': c.name, 'country': c.countryCode}
- if res['country'] is None: del res['country']
- if not res:
- raise IMDbDataAccessError, 'unable to get companyID "%s"' % \
- companyID
- # Collect filmography information.
- items = MovieCompanies.select(MovieCompanies.q.companyID == companyID)
- if results > 0:
- items = items[:results]
- filmodata = [(cd.movieID, cd.companyID,
- self._compType[cd.companyTypeID], cd.note,
- get_movie_data(cd.movieID, self._kind)) for cd in items]
- filmodata = _groupListBy(filmodata, 2)
- for group in filmodata:
- ctype = group[0][2]
- for movieID, companyID, ctype, note, movieData in group:
- movie = Movie(data=movieData, movieID=movieID,
- notes=note or u'', accessSystem=self.accessSystem)
- res.setdefault(ctype, []).append(movie)
- res.get(ctype, []).sort()
- return {'data': res, 'info sets': infosets}
-
- def _search_keyword(self, keyword, results):
- constr = OR(Keyword.q.phoneticCode ==
- soundex(keyword.encode('ascii', 'ignore')),
- CONTAINSSTRING(Keyword.q.keyword, self.toUTF8(keyword)))
- return filterSimilarKeywords(keyword,
- _iterKeywords(Keyword.select(constr)))[:results]
-
- def _get_keyword(self, keyword, results):
- keyID = Keyword.select(Keyword.q.keyword == keyword)
- if keyID.count() == 0:
- return []
- keyID = keyID[0].id
- movies = MovieKeyword.select(MovieKeyword.q.keywordID ==
- keyID)[:results]
- return [(m.movieID, get_movie_data(m.movieID, self._kind))
- for m in movies]
-
- def _get_top_bottom_movies(self, kind):
- if kind == 'top':
- kind = 'top 250 rank'
- elif kind == 'bottom':
- # Not a refuse: the plain text data files contains only
- # the bottom 10 movies.
- kind = 'bottom 10 rank'
- else:
- return []
- infoID = InfoType.select(InfoType.q.info == kind)
- if infoID.count() == 0:
- return []
- infoID = infoID[0].id
- movies = MovieInfoIdx.select(MovieInfoIdx.q.infoTypeID == infoID)
- ml = []
- for m in movies:
- minfo = get_movie_data(m.movieID, self._kind)
- for k in kind, 'votes', 'rating', 'votes distribution':
- valueDict = getSingleInfo(MovieInfoIdx, m.movieID,
- k, notAList=True)
- if k in (kind, 'votes') and k in valueDict:
- valueDict[k] = int(valueDict[k])
- elif k == 'rating' and k in valueDict:
- valueDict[k] = float(valueDict[k])
- minfo.update(valueDict)
- ml.append((m.movieID, minfo))
- sorter = (_cmpBottom, _cmpTop)[kind == 'top 250 rank']
- ml.sort(sorter)
- return ml
-
- def __del__(self):
- """Ensure that the connection is closed."""
- if not hasattr(self, '_connection'): return
- self._sql_logger.debug('closing connection to the database')
- self._connection.close()
-
diff --git a/libs/imdb/parser/sql/alchemyadapter.py b/libs/imdb/parser/sql/alchemyadapter.py
deleted file mode 100644
index 12cc494a..00000000
--- a/libs/imdb/parser/sql/alchemyadapter.py
+++ /dev/null
@@ -1,508 +0,0 @@
-"""
-parser.sql.alchemyadapter module (imdb.parser.sql package).
-
-This module adapts the SQLAlchemy ORM to the internal mechanism.
-
-Copyright 2008-2010 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-import re
-import sys
-import logging
-from sqlalchemy import *
-from sqlalchemy import schema
-try: from sqlalchemy import exc # 0.5
-except ImportError: from sqlalchemy import exceptions as exc # 0.4
-
-_alchemy_logger = logging.getLogger('imdbpy.parser.sql.alchemy')
-
-try:
- import migrate.changeset
- HAS_MC = True
-except ImportError:
- HAS_MC = False
- _alchemy_logger.warn('Unable to import migrate.changeset: Foreign ' \
- 'Keys will not be created.')
-
-from imdb._exceptions import IMDbDataAccessError
-from dbschema import *
-
-# Used to convert table and column names.
-re_upper = re.compile(r'([A-Z])')
-
-# XXX: I'm not sure at all that this is the best method to connect
-# to the database and bind that connection to every table.
-metadata = MetaData()
-
-# Maps our placeholders to SQLAlchemy's column types.
-MAP_COLS = {
- INTCOL: Integer,
- UNICODECOL: UnicodeText,
- STRINGCOL: String
-}
-
-
-class NotFoundError(IMDbDataAccessError):
- """Exception raised when Table.get(id) returns no value."""
- pass
-
-
-def _renameTable(tname):
- """Build the name of a table, as done by SQLObject."""
- tname = re_upper.sub(r'_\1', tname)
- if tname.startswith('_'):
- tname = tname[1:]
- return tname.lower()
-
-def _renameColumn(cname):
- """Build the name of a column, as done by SQLObject."""
- cname = cname.replace('ID', 'Id')
- return _renameTable(cname)
-
-
-class DNNameObj(object):
- """Used to access table.sqlmeta.columns[column].dbName (a string)."""
- def __init__(self, dbName):
- self.dbName = dbName
-
- def __repr__(self):
- return '' % (self.dbName, id(self))
-
-
-class DNNameDict(object):
- """Used to access table.sqlmeta.columns (a dictionary)."""
- def __init__(self, colMap):
- self.colMap = colMap
-
- def __getitem__(self, key):
- return DNNameObj(self.colMap[key])
-
- def __repr__(self):
- return '' % (self.colMap, id(self))
-
-
-class SQLMetaAdapter(object):
- """Used to access table.sqlmeta (an object with .table, .columns and
- .idName attributes)."""
- def __init__(self, table, colMap=None):
- self.table = table
- if colMap is None:
- colMap = {}
- self.colMap = colMap
-
- def __getattr__(self, name):
- if name == 'table':
- return getattr(self.table, name)
- if name == 'columns':
- return DNNameDict(self.colMap)
- if name == 'idName':
- return self.colMap.get('id', 'id')
- return None
-
- def __repr__(self):
- return '' % \
- (repr(self.table), repr(self.colMap), id(self))
-
-
-class QAdapter(object):
- """Used to access table.q attribute (remapped to SQLAlchemy table.c)."""
- def __init__(self, table, colMap=None):
- self.table = table
- if colMap is None:
- colMap = {}
- self.colMap = colMap
-
- def __getattr__(self, name):
- try: return getattr(self.table.c, self.colMap[name])
- except KeyError, e: raise AttributeError, "unable to get '%s'" % name
-
- def __repr__(self):
- return '' % \
- (repr(self.table), repr(self.colMap), id(self))
-
-
-class RowAdapter(object):
- """Adapter for a SQLAlchemy RowProxy object."""
- def __init__(self, row, table, colMap=None):
- self.row = row
- # FIXME: it's OBSCENE that 'table' should be passed from
- # TableAdapter through ResultAdapter only to land here,
- # where it's used to directly update a row item.
- self.table = table
- if colMap is None:
- colMap = {}
- self.colMap = colMap
- self.colMapKeys = colMap.keys()
-
- def __getattr__(self, name):
- try: return getattr(self.row, self.colMap[name])
- except KeyError, e: raise AttributeError, "unable to get '%s'" % name
-
- def __setattr__(self, name, value):
- # FIXME: I can't even think about how much performances suffer,
- # for this horrible hack (and it's used so rarely...)
- # For sure something like a "property" to map column names
- # to getter/setter functions would be much better, but it's
- # not possible (or at least not easy) to build them for a
- # single instance.
- if name in self.__dict__.get('colMapKeys', ()):
- # Trying to update a value in the database.
- row = self.__dict__['row']
- table = self.__dict__['table']
- colMap = self.__dict__['colMap']
- params = {colMap[name]: value}
- table.update(table.c.id==row.id).execute(**params)
- # XXX: minor bug: after a value is assigned with the
- # 'rowAdapterInstance.colName = value' syntax, for some
- # reason rowAdapterInstance.colName still returns the
- # previous value (even if the database is updated).
- # Fix it? I'm not even sure it's ever used.
- return
- # For every other attribute.
- object.__setattr__(self, name, value)
-
- def __repr__(self):
- return '' % \
- (repr(self.row), repr(self.table), repr(self.colMap), id(self))
-
-
-class ResultAdapter(object):
- """Adapter for a SQLAlchemy ResultProxy object."""
- def __init__(self, result, table, colMap=None):
- self.result = result
- self.table = table
- if colMap is None:
- colMap = {}
- self.colMap = colMap
-
- def count(self):
- return len(self)
-
- def __len__(self):
- # FIXME: why sqlite returns -1? (that's wrooong!)
- if self.result.rowcount == -1:
- return 0
- return self.result.rowcount
-
- def __getitem__(self, key):
- res = list(self.result)[key]
- if not isinstance(key, slice):
- # A single item.
- return RowAdapter(res, self.table, colMap=self.colMap)
- else:
- # A (possible empty) list of items.
- return [RowAdapter(x, self.table, colMap=self.colMap)
- for x in res]
-
- def __iter__(self):
- for item in self.result:
- yield RowAdapter(item, self.table, colMap=self.colMap)
-
- def __repr__(self):
- return '' % \
- (repr(self.result), repr(self.table),
- repr(self.colMap), id(self))
-
-
-class TableAdapter(object):
- """Adapter for a SQLAlchemy Table object, to mimic a SQLObject class."""
- def __init__(self, table, uri=None):
- """Initialize a TableAdapter object."""
- self._imdbpySchema = table
- self._imdbpyName = table.name
- self.connectionURI = uri
- self.colMap = {}
- columns = []
- for col in table.cols:
- # Column's paramters.
- params = {'nullable': True}
- params.update(col.params)
- if col.name == 'id':
- params['primary_key'] = True
- if 'notNone' in params:
- params['nullable'] = not params['notNone']
- del params['notNone']
- cname = _renameColumn(col.name)
- self.colMap[col.name] = cname
- colClass = MAP_COLS[col.kind]
- colKindParams = {}
- if 'length' in params:
- colKindParams['length'] = params['length']
- del params['length']
- elif colClass is UnicodeText and col.index:
- # XXX: limit length for UNICODECOLs that will have an index.
- # this can result in name.name and title.title truncations!
- colClass = Unicode
- # Should work for most of the database servers.
- length = 511
- if self.connectionURI:
- if self.connectionURI.startswith('mysql'):
- # To stay compatible with MySQL 4.x.
- length = 255
- colKindParams['length'] = length
- elif self._imdbpyName == 'PersonInfo' and col.name == 'info':
- if self.connectionURI:
- if self.connectionURI.startswith('ibm'):
- # There are some entries longer than 32KB.
- colClass = CLOB
- # I really do hope that this space isn't wasted
- # for each other shorter entry...
- colKindParams['length'] = 68*1024
- colKind = colClass(**colKindParams)
- if 'alternateID' in params:
- # There's no need to handle them here.
- del params['alternateID']
- # Create a column.
- colObj = Column(cname, colKind, **params)
- columns.append(colObj)
- self.tableName = _renameTable(table.name)
- # Create the table.
- self.table = Table(self.tableName, metadata, *columns)
- self._ta_insert = self.table.insert()
- self._ta_select = self.table.select
- # Adapters for special attributes.
- self.q = QAdapter(self.table, colMap=self.colMap)
- self.sqlmeta = SQLMetaAdapter(self.table, colMap=self.colMap)
-
- def select(self, conditions=None):
- """Return a list of results."""
- result = self._ta_select(conditions).execute()
- return ResultAdapter(result, self.table, colMap=self.colMap)
-
- def get(self, theID):
- """Get an object given its ID."""
- result = self.select(self.table.c.id == theID)
- #if not result:
- # raise NotFoundError, 'no data for ID %s' % theID
- # FIXME: isn't this a bit risky? We can't check len(result),
- # because sqlite returns -1...
- # What about converting it to a list and getting the first item?
- try:
- return result[0]
- except KeyError:
- raise NotFoundError, 'no data for ID %s' % theID
-
- def dropTable(self, checkfirst=True):
- """Drop the table."""
- dropParams = {'checkfirst': checkfirst}
- # Guess what? Another work-around for a ibm_db bug.
- if self.table.bind.engine.url.drivername.startswith('ibm_db'):
- del dropParams['checkfirst']
- try:
- self.table.drop(**dropParams)
- except exc.ProgrammingError:
- # As above: re-raise the exception, but only if it's not ibm_db.
- if not self.table.bind.engine.url.drivername.startswith('ibm_db'):
- raise
-
- def createTable(self, checkfirst=True):
- """Create the table."""
- self.table.create(checkfirst=checkfirst)
- # Create indexes for alternateID columns (other indexes will be
- # created later, at explicit request for performances reasons).
- for col in self._imdbpySchema.cols:
- if col.name == 'id':
- continue
- if col.params.get('alternateID', False):
- self._createIndex(col, checkfirst=checkfirst)
-
- def _createIndex(self, col, checkfirst=True):
- """Create an index for a given (schema) column."""
- # XXX: indexLen is ignored in SQLAlchemy, and that means that
- # indexes will be over the whole 255 chars strings...
- # NOTE: don't use a dot as a separator, or DB2 will do
- # nasty things.
- idx_name = '%s_%s' % (self.table.name, col.index or col.name)
- if checkfirst:
- for index in self.table.indexes:
- if index.name == idx_name:
- return
- idx = Index(idx_name, getattr(self.table.c, self.colMap[col.name]))
- # XXX: beware that exc.OperationalError can be raised, is some
- # strange circumstances; that's why the index name doesn't
- # follow the SQLObject convention, but includes the table name:
- # sqlite, for example, expects index names to be unique at
- # db-level.
- try:
- idx.create()
- except exc.OperationalError, e:
- _alchemy_logger.warn('Skipping creation of the %s.%s index: %s' %
- (self.sqlmeta.table, col.name, e))
-
- def addIndexes(self, ifNotExists=True):
- """Create all required indexes."""
- for col in self._imdbpySchema.cols:
- if col.index:
- self._createIndex(col, checkfirst=ifNotExists)
-
- def addForeignKeys(self, mapTables, ifNotExists=True):
- """Create all required foreign keys."""
- if not HAS_MC:
- return
- # It seems that there's no reason to prevent the creation of
- # indexes for columns with FK constrains: if there's already
- # an index, the FK index is not created.
- countCols = 0
- for col in self._imdbpySchema.cols:
- countCols += 1
- if not col.foreignKey:
- continue
- fks = col.foreignKey.split('.', 1)
- foreignTableName = fks[0]
- if len(fks) == 2:
- foreignColName = fks[1]
- else:
- foreignColName = 'id'
- foreignColName = mapTables[foreignTableName].colMap.get(
- foreignColName, foreignColName)
- thisColName = self.colMap.get(col.name, col.name)
- thisCol = self.table.columns[thisColName]
- foreignTable = mapTables[foreignTableName].table
- foreignCol = getattr(foreignTable.c, foreignColName)
- # Need to explicitly set an unique name, otherwise it will
- # explode, if two cols points to the same table.
- fkName = 'fk_%s_%s_%d' % (foreignTable.name, foreignColName,
- countCols)
- constrain = migrate.changeset.ForeignKeyConstraint([thisCol],
- [foreignCol],
- name=fkName)
- try:
- constrain.create()
- except exc.OperationalError:
- continue
-
- def __call__(self, *args, **kwds):
- """To insert a new row with the syntax: TableClass(key=value, ...)"""
- taArgs = {}
- for key, value in kwds.items():
- taArgs[self.colMap.get(key, key)] = value
- self._ta_insert.execute(*args, **taArgs)
-
- def __repr__(self):
- return '' % (repr(self.table), id(self))
-
-
-# Module-level "cache" for SQLObject classes, to prevent
-# "Table 'tableName' is already defined for this MetaData instance" errors,
-# when two or more connections to the database are made.
-# XXX: is this the best way to act?
-TABLES_REPOSITORY = {}
-
-def getDBTables(uri=None):
- """Return a list of TableAdapter objects to be used to access the
- database through the SQLAlchemy ORM. The connection uri is optional, and
- can be used to tailor the db schema to specific needs."""
- DB_TABLES = []
- for table in DB_SCHEMA:
- if table.name in TABLES_REPOSITORY:
- DB_TABLES.append(TABLES_REPOSITORY[table.name])
- continue
- tableAdapter = TableAdapter(table, uri)
- DB_TABLES.append(tableAdapter)
- TABLES_REPOSITORY[table.name] = tableAdapter
- return DB_TABLES
-
-
-# Functions used to emulate SQLObject's logical operators.
-def AND(*params):
- """Emulate SQLObject's AND."""
- return and_(*params)
-
-def OR(*params):
- """Emulate SQLObject's OR."""
- return or_(*params)
-
-def IN(item, inList):
- """Emulate SQLObject's IN."""
- if not isinstance(item, schema.Column):
- return OR(*[x == item for x in inList])
- else:
- return item.in_(inList)
-
-def ISNULL(x):
- """Emulate SQLObject's ISNULL."""
- # XXX: Should we use null()? Can null() be a global instance?
- # XXX: Is it safe to test None with the == operator, in this case?
- return x == None
-
-def ISNOTNULL(x):
- """Emulate SQLObject's ISNOTNULL."""
- return x != None
-
-def CONTAINSSTRING(expr, pattern):
- """Emulate SQLObject's CONTAINSSTRING."""
- return expr.like('%%%s%%' % pattern)
-
-
-def toUTF8(s):
- """For some strange reason, sometimes SQLObject wants utf8 strings
- instead of unicode; with SQLAlchemy we just return the unicode text."""
- return s
-
-
-class _AlchemyConnection(object):
- """A proxy for the connection object, required since _ConnectionFairy
- uses __slots__."""
- def __init__(self, conn):
- self.conn = conn
-
- def __getattr__(self, name):
- return getattr(self.conn, name)
-
-
-def setConnection(uri, tables, encoding='utf8', debug=False):
- """Set connection for every table."""
- # FIXME: why on earth MySQL requires an additional parameter,
- # is well beyond my understanding...
- if uri.startswith('mysql'):
- if '?' in uri:
- uri += '&'
- else:
- uri += '?'
- uri += 'charset=%s' % encoding
- params = {'encoding': encoding}
- if debug:
- params['echo'] = True
- if uri.startswith('ibm_db'):
- # Try to work-around a possible bug of the ibm_db DB2 driver.
- params['convert_unicode'] = True
- # XXX: is this the best way to connect?
- engine = create_engine(uri, **params)
- metadata.bind = engine
- eng_conn = engine.connect()
- if uri.startswith('sqlite'):
- major = sys.version_info[0]
- minor = sys.version_info[1]
- if major > 2 or (major == 2 and minor > 5):
- eng_conn.connection.connection.text_factory = str
- # XXX: OH MY, THAT'S A MESS!
- # We need to return a "connection" object, with the .dbName
- # attribute set to the db engine name (e.g. "mysql"), .paramstyle
- # set to the style of the paramters for query() calls, and the
- # .module attribute set to a module (?) with .OperationalError and
- # .IntegrityError attributes.
- # Another attribute of "connection" is the getConnection() function,
- # used to return an object with a .cursor() method.
- connection = _AlchemyConnection(eng_conn.connection)
- paramstyle = eng_conn.dialect.paramstyle
- connection.module = eng_conn.dialect.dbapi
- connection.paramstyle = paramstyle
- connection.getConnection = lambda: connection.connection
- connection.dbName = engine.url.drivername
- return connection
-
-
diff --git a/libs/imdb/parser/sql/cutils.c b/libs/imdb/parser/sql/cutils.c
deleted file mode 100644
index 677c1b1e..00000000
--- a/libs/imdb/parser/sql/cutils.c
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * cutils.c module.
- *
- * Miscellaneous functions to speed up the IMDbPY package.
- *
- * Contents:
- * - pyratcliff():
- * Function that implements the Ratcliff-Obershelp comparison
- * amongst Python strings.
- *
- * - pysoundex():
- * Return a soundex code string, for the given string.
- *
- * Copyright 2004-2009 Davide Alberani
- * Released under the GPL license.
- *
- * NOTE: The Ratcliff-Obershelp part was heavily based on code from the
- * "simil" Python module.
- * The "simil" module is copyright of Luca Montecchiani
- * and can be found here: http://spazioinwind.libero.it/montecchiani/
- * It was released under the GPL license; original comments are leaved
- * below.
- *
- */
-
-
-/*========== Ratcliff-Obershelp ==========*/
-/*****************************************************************************
- *
- * Stolen code from :
- *
- * [Python-Dev] Why is soundex marked obsolete?
- * by Eric S. Raymond [4]esr@thyrsus.com
- * on Sun, 14 Jan 2001 14:09:01 -0500
- *
- *****************************************************************************/
-
-/*****************************************************************************
- *
- * Ratcliff-Obershelp common-subpattern similarity.
- *
- * This code first appeared in a letter to the editor in Doctor
- * Dobbs's Journal, 11/1988. The original article on the algorithm,
- * "Pattern Matching by Gestalt" by John Ratcliff, had appeared in the
- * July 1988 issue (#181) but the algorithm was presented in assembly.
- * The main drawback of the Ratcliff-Obershelp algorithm is the cost
- * of the pairwise comparisons. It is significantly more expensive
- * than stemming, Hamming distance, soundex, and the like.
- *
- * Running time quadratic in the data size, memory usage constant.
- *
- *****************************************************************************/
-
-#include
-
-#define DONTCOMPARE_NULL 0.0
-#define DONTCOMPARE_SAME 1.0
-#define COMPARE 2.0
-#define STRING_MAXLENDIFFER 0.7
-
-/* As of 05 Mar 2008, the longest title is ~600 chars. */
-#define MXLINELEN 1023
-
-#define MAX(a,b) ((a) > (b) ? (a) : (b))
-
-
-//*****************************************
-// preliminary check....
-//*****************************************
-static float
-strings_check(char const *s, char const *t)
-{
- float threshold; // lenght difference
- int s_len = strlen(s); // length of s
- int t_len = strlen(t); // length of t
-
- // NULL strings ?
- if ((t_len * s_len) == 0)
- return (DONTCOMPARE_NULL);
-
- // the same ?
- if (strcmp(s, t) == 0)
- return (DONTCOMPARE_SAME);
-
- // string lenght difference threshold
- // we don't want to compare too different lenght strings ;)
- if (s_len < t_len)
- threshold = (float) s_len / (float) t_len;
- else
- threshold = (float) t_len / (float) s_len;
- if (threshold < STRING_MAXLENDIFFER)
- return (DONTCOMPARE_NULL);
-
- // proceed
- return (COMPARE);
-}
-
-
-static int
-RatcliffObershelp(char *st1, char *end1, char *st2, char *end2)
-{
- register char *a1, *a2;
- char *b1, *b2;
- char *s1 = st1, *s2 = st2; /* initializations are just to pacify GCC */
- short max, i;
-
- if (end1 <= st1 || end2 <= st2)
- return (0);
- if (end1 == st1 + 1 && end2 == st2 + 1)
- return (0);
-
- max = 0;
- b1 = end1;
- b2 = end2;
-
- for (a1 = st1; a1 < b1; a1++) {
- for (a2 = st2; a2 < b2; a2++) {
- if (*a1 == *a2) {
- /* determine length of common substring */
- for (i = 1; a1[i] && (a1[i] == a2[i]); i++)
- continue;
- if (i > max) {
- max = i;
- s1 = a1;
- s2 = a2;
- b1 = end1 - max;
- b2 = end2 - max;
- }
- }
- }
- }
- if (!max)
- return (0);
- max += RatcliffObershelp(s1 + max, end1, s2 + max, end2); /* rhs */
- max += RatcliffObershelp(st1, s1, st2, s2); /* lhs */
- return max;
-}
-
-
-static float
-ratcliff(char *s1, char *s2)
-/* compute Ratcliff-Obershelp similarity of two strings */
-{
- int l1, l2;
- float res;
-
- // preliminary tests
- res = strings_check(s1, s2);
- if (res != COMPARE)
- return(res);
-
- l1 = strlen(s1);
- l2 = strlen(s2);
-
- return 2.0 * RatcliffObershelp(s1, s1 + l1, s2, s2 + l2) / (l1 + l2);
-}
-
-
-/* Change a string to lowercase. */
-static void
-strtolower(char *s1)
-{
- int i;
- for (i=0; i < strlen(s1); i++) s1[i] = tolower(s1[i]);
-}
-
-
-/* Ratcliff-Obershelp for two python strings; returns a python float. */
-static PyObject*
-pyratcliff(PyObject *self, PyObject *pArgs)
-{
- char *s1 = NULL;
- char *s2 = NULL;
- PyObject *discard = NULL;
- char s1copy[MXLINELEN+1];
- char s2copy[MXLINELEN+1];
-
- /* The optional PyObject parameter is here to be compatible
- * with the pure python implementation, which uses a
- * difflib.SequenceMatcher object. */
- if (!PyArg_ParseTuple(pArgs, "ss|O", &s1, &s2, &discard))
- return NULL;
-
- strncpy(s1copy, s1, MXLINELEN);
- strncpy(s2copy, s2, MXLINELEN);
- /* Work on copies. */
- strtolower(s1copy);
- strtolower(s2copy);
-
- return Py_BuildValue("f", ratcliff(s1copy, s2copy));
-}
-
-
-/*========== soundex ==========*/
-/* Max length of the soundex code to output (an uppercase char and
- * _at most_ 4 digits). */
-#define SOUNDEX_LEN 5
-
-/* Group Number Lookup Table */
-static char soundTable[26] =
-{ 0 /* A */, '1' /* B */, '2' /* C */, '3' /* D */, 0 /* E */, '1' /* F */,
- '2' /* G */, 0 /* H */, 0 /* I */, '2' /* J */, '2' /* K */, '4' /* L */,
- '5' /* M */, '5' /* N */, 0 /* O */, '1' /* P */, '2' /* Q */, '6' /* R */,
- '2' /* S */, '3' /* T */, 0 /* U */, '1' /* V */, 0 /* W */, '2' /* X */,
- 0 /* Y */, '2' /* Z */};
-
-static PyObject*
-pysoundex(PyObject *self, PyObject *pArgs)
-{
- int i, j, n;
- char *s = NULL;
- char word[MXLINELEN+1];
- char soundCode[SOUNDEX_LEN+1];
- char c;
-
- if (!PyArg_ParseTuple(pArgs, "s", &s))
- return NULL;
-
- j = 0;
- n = strlen(s);
-
- /* Convert to uppercase and exclude non-ascii chars. */
- for (i = 0; i < n; i++) {
- c = toupper(s[i]);
- if (c < 91 && c > 64) {
- word[j] = c;
- j++;
- }
- }
- word[j] = '\0';
-
- n = strlen(word);
- if (n == 0) {
- /* If the string is empty, returns None. */
- return Py_BuildValue("");
- }
- soundCode[0] = word[0];
-
- /* Build the soundCode string. */
- j = 1;
- for (i = 1; j < SOUNDEX_LEN && i < n; i++) {
- c = soundTable[(word[i]-65)];
- /* Compact zeroes and equal consecutive digits ("12234112"->"123412") */
- if (c != 0 && c != soundCode[j-1]) {
- soundCode[j++] = c;
- }
- }
- soundCode[j] = '\0';
-
- return Py_BuildValue("s", soundCode);
-}
-
-
-static PyMethodDef cutils_methods[] = {
- {"ratcliff", pyratcliff,
- METH_VARARGS, "Ratcliff-Obershelp similarity."},
- {"soundex", pysoundex,
- METH_VARARGS, "Soundex code for strings."},
- {NULL}
-};
-
-
-void
-initcutils(void)
-{
- Py_InitModule("cutils", cutils_methods);
-}
-
-
diff --git a/libs/imdb/parser/sql/dbschema.py b/libs/imdb/parser/sql/dbschema.py
deleted file mode 100644
index 358dfdd5..00000000
--- a/libs/imdb/parser/sql/dbschema.py
+++ /dev/null
@@ -1,461 +0,0 @@
-#-*- encoding: utf-8 -*-
-"""
-parser.sql.dbschema module (imdb.parser.sql package).
-
-This module provides the schema used to describe the layout of the
-database used by the imdb.parser.sql package; functions to create/drop
-tables and indexes are also provided.
-
-Copyright 2005-2010 Davide Alberani
- 2006 Giuseppe "Cowo" Corbelli lugbs.linux.it>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-import logging
-
-_dbschema_logger = logging.getLogger('imdbpy.parser.sql.dbschema')
-
-
-# Placeholders for column types.
-INTCOL = 1
-UNICODECOL = 2
-STRINGCOL = 3
-_strMap = {1: 'INTCOL', 2: 'UNICODECOL', 3: 'STRINGCOL'}
-
-class DBCol(object):
- """Define column objects."""
- def __init__(self, name, kind, **params):
- self.name = name
- self.kind = kind
- self.index = None
- self.indexLen = None
- # If not None, two notations are accepted: 'TableName'
- # and 'TableName.ColName'; in the first case, 'id' is assumed
- # as the name of the pointed column.
- self.foreignKey = None
- if 'index' in params:
- self.index = params['index']
- del params['index']
- if 'indexLen' in params:
- self.indexLen = params['indexLen']
- del params['indexLen']
- if 'foreignKey' in params:
- self.foreignKey = params['foreignKey']
- del params['foreignKey']
- self.params = params
-
- def __str__(self):
- """Class representation."""
- s = ''
- return s
-
- def __repr__(self):
- """Class representation."""
- s = ''
- return s
-
-
-class DBTable(object):
- """Define table objects."""
- def __init__(self, name, *cols, **kwds):
- self.name = name
- self.cols = cols
- # Default values.
- self.values = kwds.get('values', {})
-
- def __str__(self):
- """Class representation."""
- return '' % (self.name,
- len(self.cols), sum([len(v) for v in self.values.values()]))
-
- def __repr__(self):
- """Class representation."""
- s = '').lstrip('<')
- for col in self.cols])
- if col_s:
- s += ', %s' % col_s
- if self.values:
- s += ', values=%s' % self.values
- s += ')>'
- return s
-
-
-# Default values to insert in some tables: {'column': (list, of, values, ...)}
-kindTypeDefs = {'kind': ('movie', 'tv series', 'tv movie', 'video movie',
- 'tv mini series', 'video game', 'episode')}
-companyTypeDefs = {'kind': ('distributors', 'production companies',
- 'special effects companies', 'miscellaneous companies')}
-infoTypeDefs = {'info': ('runtimes', 'color info', 'genres', 'languages',
- 'certificates', 'sound mix', 'tech info', 'countries', 'taglines',
- 'keywords', 'alternate versions', 'crazy credits', 'goofs',
- 'soundtrack', 'quotes', 'release dates', 'trivia', 'locations',
- 'mini biography', 'birth notes', 'birth date', 'height',
- 'death date', 'spouse', 'other works', 'birth name',
- 'salary history', 'nick names', 'books', 'agent address',
- 'biographical movies', 'portrayed in', 'where now', 'trade mark',
- 'interviews', 'article', 'magazine cover photo', 'pictorial',
- 'death notes', 'LD disc format', 'LD year', 'LD digital sound',
- 'LD official retail price', 'LD frequency response', 'LD pressing plant',
- 'LD length', 'LD language', 'LD review', 'LD spaciality', 'LD release date',
- 'LD production country', 'LD contrast', 'LD color rendition',
- 'LD picture format', 'LD video noise', 'LD video artifacts',
- 'LD release country', 'LD sharpness', 'LD dynamic range',
- 'LD audio noise', 'LD color information', 'LD group genre',
- 'LD quality program', 'LD close captions-teletext-ld-g',
- 'LD category', 'LD analog left', 'LD certification',
- 'LD audio quality', 'LD video quality', 'LD aspect ratio',
- 'LD analog right', 'LD additional information',
- 'LD number of chapter stops', 'LD dialogue intellegibility',
- 'LD disc size', 'LD master format', 'LD subtitles',
- 'LD status of availablility', 'LD quality of source',
- 'LD number of sides', 'LD video standard', 'LD supplement',
- 'LD original title', 'LD sound encoding', 'LD number', 'LD label',
- 'LD catalog number', 'LD laserdisc title', 'screenplay-teleplay',
- 'novel', 'adaption', 'book', 'production process protocol',
- 'printed media reviews', 'essays', 'other literature', 'mpaa',
- 'plot', 'votes distribution', 'votes', 'rating',
- 'production dates', 'copyright holder', 'filming dates', 'budget',
- 'weekend gross', 'gross', 'opening weekend', 'rentals',
- 'admissions', 'studios', 'top 250 rank', 'bottom 10 rank')}
-compCastTypeDefs = {'kind': ('cast', 'crew', 'complete', 'complete+verified')}
-linkTypeDefs = {'link': ('follows', 'followed by', 'remake of', 'remade as',
- 'references', 'referenced in', 'spoofs', 'spoofed in',
- 'features', 'featured in', 'spin off from', 'spin off',
- 'version of', 'similar to', 'edited into',
- 'edited from', 'alternate language version of',
- 'unknown link')}
-roleTypeDefs = {'role': ('actor', 'actress', 'producer', 'writer',
- 'cinematographer', 'composer', 'costume designer',
- 'director', 'editor', 'miscellaneous crew',
- 'production designer', 'guest')}
-
-# Schema of tables in our database.
-# XXX: Foreign keys can be used to create constrains between tables,
-# but they create indexes in the database, and this
-# means poor performances at insert-time.
-DB_SCHEMA = [
- DBTable('Name',
- # namePcodeCf is the soundex of the name in the canonical format.
- # namePcodeNf is the soundex of the name in the normal format, if
- # different from namePcodeCf.
- # surnamePcode is the soundex of the surname, if different from the
- # other two values.
-
- # The 'id' column is simply skipped by SQLObject (it's a default);
- # the alternateID attribute here will be ignored by SQLAlchemy.
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('name', UNICODECOL, notNone=True, index='idx_name', indexLen=6),
- DBCol('imdbIndex', UNICODECOL, length=12, default=None),
- DBCol('imdbID', INTCOL, default=None),
- DBCol('namePcodeCf', STRINGCOL, length=5, default=None,
- index='idx_pcodecf'),
- DBCol('namePcodeNf', STRINGCOL, length=5, default=None,
- index='idx_pcodenf'),
- DBCol('surnamePcode', STRINGCOL, length=5, default=None,
- index='idx_pcode'),
- DBCol('md5sum', STRINGCOL, length=32, default=None, index='idx_md5')
- ),
-
- DBTable('CharName',
- # namePcodeNf is the soundex of the name in the normal format.
- # surnamePcode is the soundex of the surname, if different
- # from namePcodeNf.
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('name', UNICODECOL, notNone=True, index='idx_name', indexLen=6),
- DBCol('imdbIndex', UNICODECOL, length=12, default=None),
- DBCol('imdbID', INTCOL, default=None),
- DBCol('namePcodeNf', STRINGCOL, length=5, default=None,
- index='idx_pcodenf'),
- DBCol('surnamePcode', STRINGCOL, length=5, default=None,
- index='idx_pcode'),
- DBCol('md5sum', STRINGCOL, length=32, default=None, index='idx_md5')
- ),
-
- DBTable('CompanyName',
- # namePcodeNf is the soundex of the name in the normal format.
- # namePcodeSf is the soundex of the name plus the country code.
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('name', UNICODECOL, notNone=True, index='idx_name', indexLen=6),
- DBCol('countryCode', UNICODECOL, length=255, default=None),
- DBCol('imdbID', INTCOL, default=None),
- DBCol('namePcodeNf', STRINGCOL, length=5, default=None,
- index='idx_pcodenf'),
- DBCol('namePcodeSf', STRINGCOL, length=5, default=None,
- index='idx_pcodesf'),
- DBCol('md5sum', STRINGCOL, length=32, default=None, index='idx_md5')
- ),
-
- DBTable('KindType',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('kind', STRINGCOL, length=15, default=None, alternateID=True),
- values=kindTypeDefs
- ),
-
- DBTable('Title',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('title', UNICODECOL, notNone=True,
- index='idx_title', indexLen=10),
- DBCol('imdbIndex', UNICODECOL, length=12, default=None),
- DBCol('kindID', INTCOL, notNone=True, foreignKey='KindType'),
- DBCol('productionYear', INTCOL, default=None),
- DBCol('imdbID', INTCOL, default=None),
- DBCol('phoneticCode', STRINGCOL, length=5, default=None,
- index='idx_pcode'),
- DBCol('episodeOfID', INTCOL, default=None, index='idx_epof',
- foreignKey='Title'),
- DBCol('seasonNr', INTCOL, default=None),
- DBCol('episodeNr', INTCOL, default=None),
- # Maximum observed length is 44; 49 can store 5 comma-separated
- # year-year pairs.
- DBCol('seriesYears', STRINGCOL, length=49, default=None),
- DBCol('md5sum', STRINGCOL, length=32, default=None, index='idx_md5')
- ),
-
- DBTable('CompanyType',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('kind', STRINGCOL, length=32, default=None, alternateID=True),
- values=companyTypeDefs
- ),
-
- DBTable('AkaName',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('personID', INTCOL, notNone=True, index='idx_person',
- foreignKey='Name'),
- DBCol('name', UNICODECOL, notNone=True),
- DBCol('imdbIndex', UNICODECOL, length=12, default=None),
- DBCol('namePcodeCf', STRINGCOL, length=5, default=None,
- index='idx_pcodecf'),
- DBCol('namePcodeNf', STRINGCOL, length=5, default=None,
- index='idx_pcodenf'),
- DBCol('surnamePcode', STRINGCOL, length=5, default=None,
- index='idx_pcode'),
- DBCol('md5sum', STRINGCOL, length=32, default=None, index='idx_md5')
- ),
-
- DBTable('AkaTitle',
- # XXX: It's safer to set notNone to False, here.
- # alias for akas are stored completely in the AkaTitle table;
- # this means that episodes will set also a "tv series" alias name.
- # Reading the aka-title.list file it looks like there are
- # episode titles with aliases to different titles for both
- # the episode and the series title, while for just the series
- # there are no aliases.
- # E.g.:
- # aka title original title
- # "Series, The" (2005) {The Episode} "Other Title" (2005) {Other Title}
- # But there is no:
- # "Series, The" (2005) "Other Title" (2005)
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('movieID', INTCOL, notNone=True, index='idx_movieid',
- foreignKey='Title'),
- DBCol('title', UNICODECOL, notNone=True),
- DBCol('imdbIndex', UNICODECOL, length=12, default=None),
- DBCol('kindID', INTCOL, notNone=True, foreignKey='KindType'),
- DBCol('productionYear', INTCOL, default=None),
- DBCol('phoneticCode', STRINGCOL, length=5, default=None,
- index='idx_pcode'),
- DBCol('episodeOfID', INTCOL, default=None, index='idx_epof',
- foreignKey='AkaTitle'),
- DBCol('seasonNr', INTCOL, default=None),
- DBCol('episodeNr', INTCOL, default=None),
- DBCol('note', UNICODECOL, default=None),
- DBCol('md5sum', STRINGCOL, length=32, default=None, index='idx_md5')
- ),
-
- DBTable('RoleType',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('role', STRINGCOL, length=32, notNone=True, alternateID=True),
- values=roleTypeDefs
- ),
-
- DBTable('CastInfo',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('personID', INTCOL, notNone=True, index='idx_pid',
- foreignKey='Name'),
- DBCol('movieID', INTCOL, notNone=True, index='idx_mid',
- foreignKey='Title'),
- DBCol('personRoleID', INTCOL, default=None, index='idx_cid',
- foreignKey='CharName'),
- DBCol('note', UNICODECOL, default=None),
- DBCol('nrOrder', INTCOL, default=None),
- DBCol('roleID', INTCOL, notNone=True, foreignKey='RoleType')
- ),
-
- DBTable('CompCastType',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('kind', STRINGCOL, length=32, notNone=True, alternateID=True),
- values=compCastTypeDefs
- ),
-
- DBTable('CompleteCast',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('movieID', INTCOL, index='idx_mid', foreignKey='Title'),
- DBCol('subjectID', INTCOL, notNone=True, foreignKey='CompCastType'),
- DBCol('statusID', INTCOL, notNone=True, foreignKey='CompCastType')
- ),
-
- DBTable('InfoType',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('info', STRINGCOL, length=32, notNone=True, alternateID=True),
- values=infoTypeDefs
- ),
-
- DBTable('LinkType',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('link', STRINGCOL, length=32, notNone=True, alternateID=True),
- values=linkTypeDefs
- ),
-
- DBTable('Keyword',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- # XXX: can't use alternateID=True, because it would create
- # a UNIQUE index; unfortunately (at least with a common
- # collation like utf8_unicode_ci) MySQL will consider
- # some different keywords identical - like
- # "fiancée" and "fiancee".
- DBCol('keyword', UNICODECOL, length=255, notNone=True,
- index='idx_keyword', indexLen=5),
- DBCol('phoneticCode', STRINGCOL, length=5, default=None,
- index='idx_pcode')
- ),
-
- DBTable('MovieKeyword',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('movieID', INTCOL, notNone=True, index='idx_mid',
- foreignKey='Title'),
- DBCol('keywordID', INTCOL, notNone=True, index='idx_keywordid',
- foreignKey='Keyword')
- ),
-
- DBTable('MovieLink',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('movieID', INTCOL, notNone=True, index='idx_mid',
- foreignKey='Title'),
- DBCol('linkedMovieID', INTCOL, notNone=True, foreignKey='Title'),
- DBCol('linkTypeID', INTCOL, notNone=True, foreignKey='LinkType')
- ),
-
- DBTable('MovieInfo',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('movieID', INTCOL, notNone=True, index='idx_mid',
- foreignKey='Title'),
- DBCol('infoTypeID', INTCOL, notNone=True, foreignKey='InfoType'),
- DBCol('info', UNICODECOL, notNone=True),
- DBCol('note', UNICODECOL, default=None)
- ),
-
- # This table is identical to MovieInfo, except that both 'infoTypeID'
- # and 'info' are indexed.
- DBTable('MovieInfoIdx',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('movieID', INTCOL, notNone=True, index='idx_mid',
- foreignKey='Title'),
- DBCol('infoTypeID', INTCOL, notNone=True, index='idx_infotypeid',
- foreignKey='InfoType'),
- DBCol('info', UNICODECOL, notNone=True, index='idx_info', indexLen=10),
- DBCol('note', UNICODECOL, default=None)
- ),
-
- DBTable('MovieCompanies',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('movieID', INTCOL, notNone=True, index='idx_mid',
- foreignKey='Title'),
- DBCol('companyID', INTCOL, notNone=True, index='idx_cid',
- foreignKey='CompanyName'),
- DBCol('companyTypeID', INTCOL, notNone=True, foreignKey='CompanyType'),
- DBCol('note', UNICODECOL, default=None)
- ),
-
- DBTable('PersonInfo',
- DBCol('id', INTCOL, notNone=True, alternateID=True),
- DBCol('personID', INTCOL, notNone=True, index='idx_pid',
- foreignKey='Name'),
- DBCol('infoTypeID', INTCOL, notNone=True, foreignKey='InfoType'),
- DBCol('info', UNICODECOL, notNone=True),
- DBCol('note', UNICODECOL, default=None)
- )
-]
-
-
-# Functions to manage tables.
-def dropTables(tables, ifExists=True):
- """Drop the tables."""
- # In reverse order (useful to avoid errors about foreign keys).
- DB_TABLES_DROP = list(tables)
- DB_TABLES_DROP.reverse()
- for table in DB_TABLES_DROP:
- _dbschema_logger.info('dropping table %s', table._imdbpyName)
- table.dropTable(ifExists)
-
-def createTables(tables, ifNotExists=True):
- """Create the tables and insert default values."""
- for table in tables:
- # Create the table.
- _dbschema_logger.info('creating table %s', table._imdbpyName)
- table.createTable(ifNotExists)
- # Insert default values, if any.
- if table._imdbpySchema.values:
- _dbschema_logger.info('inserting values into table %s',
- table._imdbpyName)
- for key in table._imdbpySchema.values:
- for value in table._imdbpySchema.values[key]:
- table(**{key: unicode(value)})
-
-def createIndexes(tables, ifNotExists=True):
- """Create the indexes in the database."""
- for table in tables:
- _dbschema_logger.info('creating indexes for table %s',
- table._imdbpyName)
- table.addIndexes(ifNotExists)
-
-def createForeignKeys(tables, ifNotExists=True):
- """Create Foreign Keys."""
- mapTables = {}
- for table in tables:
- mapTables[table._imdbpyName] = table
- for table in tables:
- _dbschema_logger.info('creating foreign keys for table %s',
- table._imdbpyName)
- table.addForeignKeys(mapTables, ifNotExists)
-
diff --git a/libs/imdb/parser/sql/objectadapter.py b/libs/imdb/parser/sql/objectadapter.py
deleted file mode 100644
index b7ca9851..00000000
--- a/libs/imdb/parser/sql/objectadapter.py
+++ /dev/null
@@ -1,203 +0,0 @@
-"""
-parser.sql.objectadapter module (imdb.parser.sql package).
-
-This module adapts the SQLObject ORM to the internal mechanism.
-
-Copyright 2008-2010 Davide Alberani
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-import sys
-import logging
-
-from sqlobject import *
-from sqlobject.sqlbuilder import ISNULL, ISNOTNULL, AND, OR, IN, CONTAINSSTRING
-
-from dbschema import *
-
-_object_logger = logging.getLogger('imdbpy.parser.sql.object')
-
-
-# Maps our placeholders to SQLAlchemy's column types.
-MAP_COLS = {
- INTCOL: IntCol,
- UNICODECOL: UnicodeCol,
- STRINGCOL: StringCol
-}
-
-
-# Exception raised when Table.get(id) returns no value.
-NotFoundError = SQLObjectNotFound
-
-
-# class method to be added to the SQLObject class.
-def addIndexes(cls, ifNotExists=True):
- """Create all required indexes."""
- for col in cls._imdbpySchema.cols:
- if col.index:
- idxName = col.index
- colToIdx = col.name
- if col.indexLen:
- colToIdx = {'column': col.name, 'length': col.indexLen}
- if idxName in [i.name for i in cls.sqlmeta.indexes]:
- # Check if the index is already present.
- continue
- idx = DatabaseIndex(colToIdx, name=idxName)
- cls.sqlmeta.addIndex(idx)
- try:
- cls.createIndexes(ifNotExists)
- except dberrors.OperationalError, e:
- _object_logger.warn('Skipping creation of the %s.%s index: %s' %
- (cls.sqlmeta.table, col.name, e))
-addIndexes = classmethod(addIndexes)
-
-
-# Global repository for "fake" tables with Foreign Keys - need to
-# prevent troubles if addForeignKeys is called more than one time.
-FAKE_TABLES_REPOSITORY = {}
-
-def _buildFakeFKTable(cls, fakeTableName):
- """Return a "fake" table, with foreign keys where needed."""
- countCols = 0
- attrs = {}
- for col in cls._imdbpySchema.cols:
- countCols += 1
- if col.name == 'id':
- continue
- if not col.foreignKey:
- # A non-foreign key column - add it as usual.
- attrs[col.name] = MAP_COLS[col.kind](**col.params)
- continue
- # XXX: Foreign Keys pointing to TableName.ColName not yet supported.
- thisColName = col.name
- if thisColName.endswith('ID'):
- thisColName = thisColName[:-2]
-
- fks = col.foreignKey.split('.', 1)
- foreignTableName = fks[0]
- if len(fks) == 2:
- foreignColName = fks[1]
- else:
- foreignColName = 'id'
- # Unused...
- #fkName = 'fk_%s_%s_%d' % (foreignTableName, foreignColName,
- # countCols)
- # Create a Foreign Key column, with the correct references.
- fk = ForeignKey(foreignTableName, name=thisColName, default=None)
- attrs[thisColName] = fk
- # Build a _NEW_ SQLObject subclass, with foreign keys, if needed.
- newcls = type(fakeTableName, (SQLObject,), attrs)
- return newcls
-
-def addForeignKeys(cls, mapTables, ifNotExists=True):
- """Create all required foreign keys."""
- # Do not even try, if there are no FK, in this table.
- if not filter(None, [col.foreignKey for col in cls._imdbpySchema.cols]):
- return
- fakeTableName = 'myfaketable%s' % cls.sqlmeta.table
- if fakeTableName in FAKE_TABLES_REPOSITORY:
- newcls = FAKE_TABLES_REPOSITORY[fakeTableName]
- else:
- newcls = _buildFakeFKTable(cls, fakeTableName)
- FAKE_TABLES_REPOSITORY[fakeTableName] = newcls
- # Connect the class with foreign keys.
- newcls.setConnection(cls._connection)
- for col in cls._imdbpySchema.cols:
- if col.name == 'id':
- continue
- if not col.foreignKey:
- continue
- # Get the SQL that _WOULD BE_ run, if we had to create
- # this "fake" table.
- fkQuery = newcls._connection.createReferenceConstraint(newcls,
- newcls.sqlmeta.columns[col.name])
- if not fkQuery:
- # Probably the db doesn't support foreign keys (SQLite).
- continue
- # Remove "myfaketable" to get references to _real_ tables.
- fkQuery = fkQuery.replace('myfaketable', '')
- # Execute the query.
- newcls._connection.query(fkQuery)
- # Disconnect it.
- newcls._connection.close()
-addForeignKeys = classmethod(addForeignKeys)
-
-
-# Module-level "cache" for SQLObject classes, to prevent
-# "class TheClass is already in the registry" errors, when
-# two or more connections to the database are made.
-# XXX: is this the best way to act?
-TABLES_REPOSITORY = {}
-
-def getDBTables(uri=None):
- """Return a list of classes to be used to access the database
- through the SQLObject ORM. The connection uri is optional, and
- can be used to tailor the db schema to specific needs."""
- DB_TABLES = []
- for table in DB_SCHEMA:
- if table.name in TABLES_REPOSITORY:
- DB_TABLES.append(TABLES_REPOSITORY[table.name])
- continue
- attrs = {'_imdbpyName': table.name, '_imdbpySchema': table,
- 'addIndexes': addIndexes, 'addForeignKeys': addForeignKeys}
- for col in table.cols:
- if col.name == 'id':
- continue
- attrs[col.name] = MAP_COLS[col.kind](**col.params)
- # Create a subclass of SQLObject.
- # XXX: use a metaclass? I can't see any advantage.
- cls = type(table.name, (SQLObject,), attrs)
- DB_TABLES.append(cls)
- TABLES_REPOSITORY[table.name] = cls
- return DB_TABLES
-
-
-def toUTF8(s):
- """For some strange reason, sometimes SQLObject wants utf8 strings
- instead of unicode."""
- return s.encode('utf_8')
-
-
-def setConnection(uri, tables, encoding='utf8', debug=False):
- """Set connection for every table."""
- kw = {}
- # FIXME: it's absolutely unclear what we should do to correctly
- # support unicode in MySQL; with some versions of SQLObject,
- # it seems that setting use_unicode=1 is the _wrong_ thing to do.
- _uriLower = uri.lower()
- if _uriLower.startswith('mysql'):
- kw['use_unicode'] = 1
- #kw['sqlobject_encoding'] = encoding
- kw['charset'] = encoding
- conn = connectionForURI(uri, **kw)
- conn.debug = debug
- if uri.startswith('sqlite'):
- major = sys.version_info[0]
- minor = sys.version_info[1]
- if major > 2 or (major == 2 and minor > 5):
- conn.connection.connection.text_factory = str
- for table in tables:
- table.setConnection(conn)
- #table.sqlmeta.cacheValues = False
- # FIXME: is it safe to set table._cacheValue to False? Looks like
- # we can't retrieve correct values after an update (I think
- # it's never needed, but...) Anyway, these are set to False
- # for performance reason at insert time (see imdbpy2sql.py).
- table._cacheValue = False
- # Required by imdbpy2sql.py.
- conn.paramstyle = conn.module.paramstyle
- return conn
-
diff --git a/libs/imdb/utils.py b/libs/imdb/utils.py
deleted file mode 100644
index e2d9551d..00000000
--- a/libs/imdb/utils.py
+++ /dev/null
@@ -1,1545 +0,0 @@
-"""
-utils module (imdb package).
-
-This module provides basic utilities for the imdb package.
-
-Copyright 2004-2010 Davide Alberani
- 2009 H. Turgut Uyar
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-"""
-
-from __future__ import generators
-import re
-import string
-import logging
-from copy import copy, deepcopy
-from time import strptime, strftime
-
-from imdb import VERSION
-from imdb import articles
-from imdb._exceptions import IMDbParserError
-
-
-# Logger for imdb.utils module.
-_utils_logger = logging.getLogger('imdbpy.utils')
-
-# The regular expression for the "long" year format of IMDb, like
-# "(1998)" and "(1986/II)", where the optional roman number (that I call
-# "imdbIndex" after the slash is used for movies with the same title
-# and year of release.
-# XXX: probably L, C, D and M are far too much! ;-)
-re_year_index = re.compile(r'\(([0-9\?]{4}(/[IVXLCDM]+)?)\)')
-
-# Match only the imdbIndex (for name strings).
-re_index = re.compile(r'^\(([IVXLCDM]+)\)$')
-
-# Match the number of episodes.
-re_episodes = re.compile('\s?\((\d+) episodes\)', re.I)
-re_episode_info = re.compile(r'{\s*(.+?)?\s?(\([0-9\?]{4}-[0-9\?]{1,2}-[0-9\?]{1,2}\))?\s?(\(#[0-9]+\.[0-9]+\))?}')
-
-# Common suffixes in surnames.
-_sname_suffixes = ('de', 'la', 'der', 'den', 'del', 'y', 'da', 'van',
- 'e', 'von', 'the', 'di', 'du', 'el', 'al')
-
-def canonicalName(name):
- """Return the given name in canonical "Surname, Name" format.
- It assumes that name is in the 'Name Surname' format."""
- # XXX: some statistics (as of 17 Apr 2008, over 2288622 names):
- # - just a surname: 69476
- # - single surname, single name: 2209656
- # - composed surname, composed name: 9490
- # - composed surname, single name: 67606
- # (2: 59764, 3: 6862, 4: 728)
- # - single surname, composed name: 242310
- # (2: 229467, 3: 9901, 4: 2041, 5: 630)
- # - Jr.: 8025
- # Don't convert names already in the canonical format.
- if name.find(', ') != -1: return name
- if isinstance(name, unicode):
- joiner = u'%s, %s'
- sur_joiner = u'%s %s'
- sur_space = u' %s'
- space = u' '
- else:
- joiner = '%s, %s'
- sur_joiner = '%s %s'
- sur_space = ' %s'
- space = ' '
- sname = name.split(' ')
- snl = len(sname)
- if snl == 2:
- # Just a name and a surname: how boring...
- name = joiner % (sname[1], sname[0])
- elif snl > 2:
- lsname = [x.lower() for x in sname]
- if snl == 3: _indexes = (0, snl-2)
- else: _indexes = (0, snl-2, snl-3)
- # Check for common surname prefixes at the beginning and near the end.
- for index in _indexes:
- if lsname[index] not in _sname_suffixes: continue
- try:
- # Build the surname.
- surn = sur_joiner % (sname[index], sname[index+1])
- del sname[index]
- del sname[index]
- try:
- # Handle the "Jr." after the name.
- if lsname[index+2].startswith('jr'):
- surn += sur_space % sname[index]
- del sname[index]
- except (IndexError, ValueError):
- pass
- name = joiner % (surn, space.join(sname))
- break
- except ValueError:
- continue
- else:
- name = joiner % (sname[-1], space.join(sname[:-1]))
- return name
-
-def normalizeName(name):
- """Return a name in the normal "Name Surname" format."""
- if isinstance(name, unicode):
- joiner = u'%s %s'
- else:
- joiner = '%s %s'
- sname = name.split(', ')
- if len(sname) == 2:
- name = joiner % (sname[1], sname[0])
- return name
-
-def analyze_name(name, canonical=None):
- """Return a dictionary with the name and the optional imdbIndex
- keys, from the given string.
-
- If canonical is None (default), the name is stored in its own style.
- If canonical is True, the name is converted to canonical style.
- If canonical is False, the name is converted to normal format.
-
- raise an IMDbParserError exception if the name is not valid.
- """
- original_n = name
- name = name.strip()
- res = {}
- imdbIndex = ''
- opi = name.rfind('(')
- if opi != -1:
- cpi = name.rfind(')')
- if cpi > opi and re_index.match(name[opi:cpi+1]):
- imdbIndex = name[opi+1:cpi]
- name = name[:opi].rstrip()
- else:
- # XXX: for the birth and death dates case like " (1926-2004)"
- name = name[:opi-1]
- if not name:
- raise IMDbParserError, 'invalid name: "%s"' % original_n
- if canonical is not None:
- if canonical:
- name = canonicalName(name)
- else:
- name = normalizeName(name)
- res['name'] = name
- if imdbIndex:
- res['imdbIndex'] = imdbIndex
- return res
-
-
-def build_name(name_dict, canonical=None):
- """Given a dictionary that represents a "long" IMDb name,
- return a string.
- If canonical is None (default), the name is returned in the stored style.
- If canonical is True, the name is converted to canonical style.
- If canonical is False, the name is converted to normal format.
- """
- name = name_dict.get('canonical name') or name_dict.get('name', '')
- if not name: return ''
- if canonical is not None:
- if canonical:
- name = canonicalName(name)
- else:
- name = normalizeName(name)
- imdbIndex = name_dict.get('imdbIndex')
- if imdbIndex:
- name += ' (%s)' % imdbIndex
- return name
-
-
-# XXX: here only for backward compatibility. Find and remove any dependency.
-_articles = articles.GENERIC_ARTICLES
-_unicodeArticles = articles.toUnicode(_articles)
-articlesDicts = articles.articlesDictsForLang(None)
-spArticles = articles.spArticlesForLang(None)
-
-def canonicalTitle(title, lang=None):
- """Return the title in the canonic format 'Movie Title, The';
- beware that it doesn't handle long imdb titles, but only the
- title portion, without year[/imdbIndex] or special markup.
- The 'lang' argument can be used to specify the language of the title.
- """
- isUnicode = isinstance(title, unicode)
- articlesDicts = articles.articlesDictsForLang(lang)
- try:
- if title.split(', ')[-1].lower() in articlesDicts[isUnicode]:
- return title
- except IndexError:
- pass
- if isUnicode:
- _format = u'%s, %s'
- else:
- _format = '%s, %s'
- ltitle = title.lower()
- spArticles = articles.spArticlesForLang(lang)
- for article in spArticles[isUnicode]:
- if ltitle.startswith(article):
- lart = len(article)
- title = _format % (title[lart:], title[:lart])
- if article[-1] == ' ':
- title = title[:-1]
- break
- ## XXX: an attempt using a dictionary lookup.
- ##for artSeparator in (' ', "'", '-'):
- ## article = _articlesDict.get(ltitle.split(artSeparator)[0])
- ## if article is not None:
- ## lart = len(article)
- ## # check titles like "una", "I'm Mad" and "L'abbacchio".
- ## if title[lart:] == '' or (artSeparator != ' ' and
- ## title[lart:][1] != artSeparator): continue
- ## title = '%s, %s' % (title[lart:], title[:lart])
- ## if artSeparator == ' ': title = title[1:]
- ## break
- return title
-
-def normalizeTitle(title, lang=None):
- """Return the title in the normal "The Title" format;
- beware that it doesn't handle long imdb titles, but only the
- title portion, without year[/imdbIndex] or special markup.
- The 'lang' argument can be used to specify the language of the title.
- """
- isUnicode = isinstance(title, unicode)
- stitle = title.split(', ')
- articlesDicts = articles.articlesDictsForLang(lang)
- if len(stitle) > 1 and stitle[-1].lower() in articlesDicts[isUnicode]:
- sep = ' '
- if stitle[-1][-1] in ("'", '-'):
- sep = ''
- if isUnicode:
- _format = u'%s%s%s'
- _joiner = u', '
- else:
- _format = '%s%s%s'
- _joiner = ', '
- title = _format % (stitle[-1], sep, _joiner.join(stitle[:-1]))
- return title
-
-
-def _split_series_episode(title):
- """Return the series and the episode titles; if this is not a
- series' episode, the returned series title is empty.
- This function recognize two different styles:
- "The Series" An Episode (2005)
- "The Series" (2004) {An Episode (2005) (#season.episode)}"""
- series_title = ''
- episode_or_year = ''
- if title[-1:] == '}':
- # Title of the episode, as in the plain text data files.
- begin_eps = title.rfind('{')
- if begin_eps == -1: return '', ''
- series_title = title[:begin_eps].rstrip()
- # episode_or_year is returned with the {...}
- episode_or_year = title[begin_eps:].strip()
- if episode_or_year[:12] == '{SUSPENDED}}': return '', ''
- # XXX: works only with tv series; it's still unclear whether
- # IMDb will support episodes for tv mini series and tv movies...
- elif title[0:1] == '"':
- second_quot = title[1:].find('"') + 2
- if second_quot != 1: # a second " was found.
- episode_or_year = title[second_quot:].lstrip()
- first_char = episode_or_year[0:1]
- if not first_char: return '', ''
- if first_char != '(':
- # There is not a (year) but the title of the episode;
- # that means this is an episode title, as returned by
- # the web server.
- series_title = title[:second_quot]
- ##elif episode_or_year[-1:] == '}':
- ## # Title of the episode, as in the plain text data files.
- ## begin_eps = episode_or_year.find('{')
- ## if begin_eps == -1: return series_title, episode_or_year
- ## series_title = title[:second_quot+begin_eps].rstrip()
- ## # episode_or_year is returned with the {...}
- ## episode_or_year = episode_or_year[begin_eps:]
- return series_title, episode_or_year
-
-
-def is_series_episode(title):
- """Return True if 'title' is an series episode."""
- title = title.strip()
- if _split_series_episode(title)[0]: return 1
- return 0
-
-
-def analyze_title(title, canonical=None, canonicalSeries=None,
- canonicalEpisode=None, _emptyString=u''):
- """Analyze the given title and return a dictionary with the
- "stripped" title, the kind of the show ("movie", "tv series", etc.),
- the year of production and the optional imdbIndex (a roman number
- used to distinguish between movies with the same title and year).
-
- If canonical is None (default), the title is stored in its own style.
- If canonical is True, the title is converted to canonical style.
- If canonical is False, the title is converted to normal format.
-
- raise an IMDbParserError exception if the title is not valid.
- """
- # XXX: introduce the 'lang' argument?
- if canonical is not None:
- canonicalSeries = canonicalEpisode = canonical
- original_t = title
- result = {}
- title = title.strip()
- year = _emptyString
- kind = _emptyString
- imdbIndex = _emptyString
- series_title, episode_or_year = _split_series_episode(title)
- if series_title:
- # It's an episode of a series.
- series_d = analyze_title(series_title, canonical=canonicalSeries)
- oad = sen = ep_year = _emptyString
- # Plain text data files format.
- if episode_or_year[0:1] == '{' and episode_or_year[-1:] == '}':
- match = re_episode_info.findall(episode_or_year)
- if match:
- # Episode title, original air date and #season.episode
- episode_or_year, oad, sen = match[0]
- episode_or_year = episode_or_year.strip()
- if not oad:
- # No year, but the title is something like (2005-04-12)
- if episode_or_year and episode_or_year[0] == '(' and \
- episode_or_year[-1:] == ')' and \
- episode_or_year[1:2] != '#':
- oad = episode_or_year
- if oad[1:5] and oad[5:6] == '-':
- try:
- ep_year = int(oad[1:5])
- except (TypeError, ValueError):
- pass
- if not oad and not sen and episode_or_year.startswith('(#'):
- sen = episode_or_year
- elif episode_or_year.startswith('Episode dated'):
- oad = episode_or_year[14:]
- if oad[-4:].isdigit():
- try:
- ep_year = int(oad[-4:])
- except (TypeError, ValueError):
- pass
- episode_d = analyze_title(episode_or_year, canonical=canonicalEpisode)
- episode_d['kind'] = u'episode'
- episode_d['episode of'] = series_d
- if oad:
- episode_d['original air date'] = oad[1:-1]
- if ep_year and episode_d.get('year') is None:
- episode_d['year'] = ep_year
- if sen and sen[2:-1].find('.') != -1:
- seas, epn = sen[2:-1].split('.')
- if seas:
- # Set season and episode.
- try: seas = int(seas)
- except: pass
- try: epn = int(epn)
- except: pass
- episode_d['season'] = seas
- if epn:
- episode_d['episode'] = epn
- return episode_d
- # First of all, search for the kind of show.
- # XXX: Number of entries at 17 Apr 2008:
- # movie: 379,871
- # episode: 483,832
- # tv movie: 61,119
- # tv series: 44,795
- # video movie: 57,915
- # tv mini series: 5,497
- # video game: 5,490
- # More up-to-date statistics: http://us.imdb.com/database_statistics
- if title.endswith('(TV)'):
- kind = u'tv movie'
- title = title[:-4].rstrip()
- elif title.endswith('(V)'):
- kind = u'video movie'
- title = title[:-3].rstrip()
- elif title.endswith('(video)'):
- kind = u'video movie'
- title = title[:-7].rstrip()
- elif title.endswith('(mini)'):
- kind = u'tv mini series'
- title = title[:-6].rstrip()
- elif title.endswith('(VG)'):
- kind = u'video game'
- title = title[:-4].rstrip()
- # Search for the year and the optional imdbIndex (a roman number).
- yi = re_year_index.findall(title)
- if yi:
- last_yi = yi[-1]
- year = last_yi[0]
- if last_yi[1]:
- imdbIndex = last_yi[1][1:]
- year = year[:-len(imdbIndex)-1]
- i = title.rfind('(%s)' % last_yi[0])
- if i != -1:
- title = title[:i-1].rstrip()
- # This is a tv (mini) series: strip the '"' at the begin and at the end.
- # XXX: strip('"') is not used for compatibility with Python 2.0.
- if title and title[0] == title[-1] == '"':
- if not kind:
- kind = u'tv series'
- title = title[1:-1].strip()
- elif title.endswith('(TV series)'):
- kind = u'tv series'
- title = title[:-11].rstrip()
- if not title:
- raise IMDbParserError, 'invalid title: "%s"' % original_t
- if canonical is not None:
- if canonical:
- title = canonicalTitle(title)
- else:
- title = normalizeTitle(title)
- # 'kind' is one in ('movie', 'episode', 'tv series', 'tv mini series',
- # 'tv movie', 'video movie', 'video game')
- result['title'] = title
- result['kind'] = kind or u'movie'
- if year and year != '????':
- try:
- result['year'] = int(year)
- except (TypeError, ValueError):
- pass
- if imdbIndex:
- result['imdbIndex'] = imdbIndex
- if isinstance(_emptyString, str):
- result['kind'] = str(kind or 'movie')
- return result
-
-
-_web_format = '%d %B %Y'
-_ptdf_format = '(%Y-%m-%d)'
-def _convertTime(title, fromPTDFtoWEB=1, _emptyString=u''):
- """Convert a time expressed in the pain text data files, to
- the 'Episode dated ...' format used on the web site; if
- fromPTDFtoWEB is false, the inverted conversion is applied."""
- try:
- if fromPTDFtoWEB:
- from_format = _ptdf_format
- to_format = _web_format
- else:
- from_format = u'Episode dated %s' % _web_format
- to_format = _ptdf_format
- t = strptime(title, from_format)
- title = strftime(to_format, t)
- if fromPTDFtoWEB:
- if title[0] == '0': title = title[1:]
- title = u'Episode dated %s' % title
- except ValueError:
- pass
- if isinstance(_emptyString, str):
- try:
- title = str(title)
- except UnicodeDecodeError:
- pass
- return title
-
-
-def build_title(title_dict, canonical=None, canonicalSeries=None,
- canonicalEpisode=None, ptdf=0, lang=None, _doYear=1,
- _emptyString=u''):
- """Given a dictionary that represents a "long" IMDb title,
- return a string.
-
- If canonical is None (default), the title is returned in the stored style.
- If canonical is True, the title is converted to canonical style.
- If canonical is False, the title is converted to normal format.
-
- lang can be used to specify the language of the title.
-
- If ptdf is true, the plain text data files format is used.
- """
- if canonical is not None:
- canonicalSeries = canonical
- pre_title = _emptyString
- kind = title_dict.get('kind')
- episode_of = title_dict.get('episode of')
- if kind == 'episode' and episode_of is not None:
- # Works with both Movie instances and plain dictionaries.
- doYear = 0
- if ptdf:
- doYear = 1
- pre_title = build_title(episode_of, canonical=canonicalSeries,
- ptdf=0, _doYear=doYear,
- _emptyString=_emptyString)
- ep_dict = {'title': title_dict.get('title', ''),
- 'imdbIndex': title_dict.get('imdbIndex')}
- ep_title = ep_dict['title']
- if not ptdf:
- doYear = 1
- ep_dict['year'] = title_dict.get('year', '????')
- if ep_title[0:1] == '(' and ep_title[-1:] == ')' and \
- ep_title[1:5].isdigit():
- ep_dict['title'] = _convertTime(ep_title, fromPTDFtoWEB=1,
- _emptyString=_emptyString)
- else:
- doYear = 0
- if ep_title.startswith('Episode dated'):
- ep_dict['title'] = _convertTime(ep_title, fromPTDFtoWEB=0,
- _emptyString=_emptyString)
- episode_title = build_title(ep_dict,
- canonical=canonicalEpisode, ptdf=ptdf,
- _doYear=doYear, _emptyString=_emptyString)
- if ptdf:
- oad = title_dict.get('original air date', _emptyString)
- if len(oad) == 10 and oad[4] == '-' and oad[7] == '-' and \
- episode_title.find(oad) == -1:
- episode_title += ' (%s)' % oad
- seas = title_dict.get('season')
- if seas is not None:
- episode_title += ' (#%s' % seas
- episode = title_dict.get('episode')
- if episode is not None:
- episode_title += '.%s' % episode
- episode_title += ')'
- episode_title = '{%s}' % episode_title
- return '%s %s' % (pre_title, episode_title)
- title = title_dict.get('title', '')
- if not title: return _emptyString
- if canonical is not None:
- if canonical:
- title = canonicalTitle(title, lang=lang)
- else:
- title = normalizeTitle(title, lang=lang)
- if pre_title:
- title = '%s %s' % (pre_title, title)
- if kind in (u'tv series', u'tv mini series'):
- title = '"%s"' % title
- if _doYear:
- imdbIndex = title_dict.get('imdbIndex')
- year = title_dict.get('year') or u'????'
- if isinstance(_emptyString, str):
- year = str(year)
- title += ' (%s' % year
- if imdbIndex:
- title += '/%s' % imdbIndex
- title += ')'
- if kind:
- if kind == 'tv movie':
- title += ' (TV)'
- elif kind == 'video movie':
- title += ' (V)'
- elif kind == 'tv mini series':
- title += ' (mini)'
- elif kind == 'video game':
- title += ' (VG)'
- return title
-
-
-def split_company_name_notes(name):
- """Return two strings, the first representing the company name,
- and the other representing the (optional) notes."""
- name = name.strip()
- notes = u''
- if name.endswith(')'):
- fpidx = name.find('(')
- if fpidx != -1:
- notes = name[fpidx:]
- name = name[:fpidx].rstrip()
- return name, notes
-
-
-def analyze_company_name(name, stripNotes=False):
- """Return a dictionary with the name and the optional 'country'
- keys, from the given string.
- If stripNotes is true, tries to not consider optional notes.
-
- raise an IMDbParserError exception if the name is not valid.
- """
- if stripNotes:
- name = split_company_name_notes(name)[0]
- o_name = name
- name = name.strip()
- country = None
- if name.endswith(']'):
- idx = name.rfind('[')
- if idx != -1:
- country = name[idx:]
- name = name[:idx].rstrip()
- if not name:
- raise IMDbParserError, 'invalid name: "%s"' % o_name
- result = {'name': name}
- if country:
- result['country'] = country
- return result
-
-
-def build_company_name(name_dict, _emptyString=u''):
- """Given a dictionary that represents a "long" IMDb company name,
- return a string.
- """
- name = name_dict.get('name')
- if not name:
- return _emptyString
- country = name_dict.get('country')
- if country is not None:
- name += ' %s' % country
- return name
-
-
-class _LastC:
- """Size matters."""
- def __cmp__(self, other):
- if isinstance(other, self.__class__): return 0
- return 1
-
-_last = _LastC()
-
-def cmpMovies(m1, m2):
- """Compare two movies by year, in reverse order; the imdbIndex is checked
- for movies with the same year of production and title."""
- # Sort tv series' episodes.
- m1e = m1.get('episode of')
- m2e = m2.get('episode of')
- if m1e is not None and m2e is not None:
- cmp_series = cmpMovies(m1e, m2e)
- if cmp_series != 0:
- return cmp_series
- m1s = m1.get('season')
- m2s = m2.get('season')
- if m1s is not None and m2s is not None:
- if m1s < m2s:
- return 1
- elif m1s > m2s:
- return -1
- m1p = m1.get('episode')
- m2p = m2.get('episode')
- if m1p < m2p:
- return 1
- elif m1p > m2p:
- return -1
- try:
- if m1e is None: m1y = int(m1.get('year', 0))
- else: m1y = int(m1e.get('year', 0))
- except ValueError:
- m1y = 0
- try:
- if m2e is None: m2y = int(m2.get('year', 0))
- else: m2y = int(m2e.get('year', 0))
- except ValueError:
- m2y = 0
- if m1y > m2y: return -1
- if m1y < m2y: return 1
- # Ok, these movies have the same production year...
- #m1t = m1.get('canonical title', _last)
- #m2t = m2.get('canonical title', _last)
- # It should works also with normal dictionaries (returned from searches).
- #if m1t is _last and m2t is _last:
- m1t = m1.get('title', _last)
- m2t = m2.get('title', _last)
- if m1t < m2t: return -1
- if m1t > m2t: return 1
- # Ok, these movies have the same title...
- m1i = m1.get('imdbIndex', _last)
- m2i = m2.get('imdbIndex', _last)
- if m1i > m2i: return -1
- if m1i < m2i: return 1
- m1id = getattr(m1, 'movieID', None)
- # Introduce this check even for other comparisons functions?
- # XXX: is it safe to check without knowning the data access system?
- # probably not a great idea. Check for 'kind', instead?
- if m1id is not None:
- m2id = getattr(m2, 'movieID', None)
- if m1id > m2id: return -1
- elif m1id < m2id: return 1
- return 0
-
-
-def cmpPeople(p1, p2):
- """Compare two people by billingPos, name and imdbIndex."""
- p1b = getattr(p1, 'billingPos', None) or _last
- p2b = getattr(p2, 'billingPos', None) or _last
- if p1b > p2b: return 1
- if p1b < p2b: return -1
- p1n = p1.get('canonical name', _last)
- p2n = p2.get('canonical name', _last)
- if p1n is _last and p2n is _last:
- p1n = p1.get('name', _last)
- p2n = p2.get('name', _last)
- if p1n > p2n: return 1
- if p1n < p2n: return -1
- p1i = p1.get('imdbIndex', _last)
- p2i = p2.get('imdbIndex', _last)
- if p1i > p2i: return 1
- if p1i < p2i: return -1
- return 0
-
-
-def cmpCompanies(p1, p2):
- """Compare two companies."""
- p1n = p1.get('long imdb name', _last)
- p2n = p2.get('long imdb name', _last)
- if p1n is _last and p2n is _last:
- p1n = p1.get('name', _last)
- p2n = p2.get('name', _last)
- if p1n > p2n: return 1
- if p1n < p2n: return -1
- p1i = p1.get('country', _last)
- p2i = p2.get('country', _last)
- if p1i > p2i: return 1
- if p1i < p2i: return -1
- return 0
-
-
-# References to titles, names and characters.
-# XXX: find better regexp!
-re_titleRef = re.compile(r'_(.+?(?: \([0-9\?]{4}(?:/[IVXLCDM]+)?\))?(?: \(mini\)| \(TV\)| \(V\)| \(VG\))?)_ \(qv\)')
-# FIXME: doesn't match persons with ' in the name.
-re_nameRef = re.compile(r"'([^']+?)' \(qv\)")
-# XXX: good choice? Are there characters with # in the name?
-re_characterRef = re.compile(r"#([^']+?)# \(qv\)")
-
-# Functions used to filter the text strings.
-def modNull(s, titlesRefs, namesRefs, charactersRefs):
- """Do nothing."""
- return s
-
-def modClearTitleRefs(s, titlesRefs, namesRefs, charactersRefs):
- """Remove titles references."""
- return re_titleRef.sub(r'\1', s)
-
-def modClearNameRefs(s, titlesRefs, namesRefs, charactersRefs):
- """Remove names references."""
- return re_nameRef.sub(r'\1', s)
-
-def modClearCharacterRefs(s, titlesRefs, namesRefs, charactersRefs):
- """Remove characters references"""
- return re_characterRef.sub(r'\1', s)
-
-def modClearRefs(s, titlesRefs, namesRefs, charactersRefs):
- """Remove titles, names and characters references."""
- s = modClearTitleRefs(s, {}, {}, {})
- s = modClearCharacterRefs(s, {}, {}, {})
- return modClearNameRefs(s, {}, {}, {})
-
-
-def modifyStrings(o, modFunct, titlesRefs, namesRefs, charactersRefs):
- """Modify a string (or string values in a dictionary or strings
- in a list), using the provided modFunct function and titlesRefs
- namesRefs and charactersRefs references dictionaries."""
- # Notice that it doesn't go any deeper than the first two levels in a list.
- if isinstance(o, (unicode, str)):
- return modFunct(o, titlesRefs, namesRefs, charactersRefs)
- elif isinstance(o, (list, tuple, dict)):
- _stillorig = 1
- if isinstance(o, (list, tuple)): keys = xrange(len(o))
- else: keys = o.keys()
- for i in keys:
- v = o[i]
- if isinstance(v, (unicode, str)):
- if _stillorig:
- o = copy(o)
- _stillorig = 0
- o[i] = modFunct(v, titlesRefs, namesRefs, charactersRefs)
- elif isinstance(v, (list, tuple)):
- modifyStrings(o[i], modFunct, titlesRefs, namesRefs,
- charactersRefs)
- return o
-
-
-def date_and_notes(s):
- """Parse (birth|death) date and notes; returns a tuple in the
- form (date, notes)."""
- s = s.strip()
- if not s: return (u'', u'')
- notes = u''
- if s[0].isdigit() or s.split()[0].lower() in ('c.', 'january', 'february',
- 'march', 'april', 'may', 'june',
- 'july', 'august', 'september',
- 'october', 'november',
- 'december', 'ca.', 'circa',
- '????,'):
- i = s.find(',')
- if i != -1:
- notes = s[i+1:].strip()
- s = s[:i]
- else:
- notes = s
- s = u''
- if s == '????': s = u''
- return s, notes
-
-
-class RolesList(list):
- """A list of Person or Character instances, used for the currentRole
- property."""
- def __unicode__(self):
- return u' / '.join([unicode(x) for x in self])
-
- def __str__(self):
- # FIXME: does it make sense at all? Return a unicode doesn't
- # seem right, in __str__.
- return u' / '.join([unicode(x).encode('utf8') for x in self])
-
-
-# Replace & with &, but only if it's not already part of a charref.
-#_re_amp = re.compile(r'(&)(?!\w+;)', re.I)
-#_re_amp = re.compile(r'(?<=\W)&(?=[^a-zA-Z0-9_#])')
-_re_amp = re.compile(r'&(?![^a-zA-Z0-9_#]{1,5};)')
-
-def escape4xml(value):
- """Escape some chars that can't be present in a XML value."""
- if isinstance(value, int):
- value = str(value)
- value = _re_amp.sub('&', value)
- value = value.replace('"', '"').replace("'", ''')
- value = value.replace('<', '<').replace('>', '>')
- if isinstance(value, unicode):
- value = value.encode('ascii', 'xmlcharrefreplace')
- return value
-
-
-def _refsToReplace(value, modFunct, titlesRefs, namesRefs, charactersRefs):
- """Return three lists - for movie titles, persons and characters names -
- with two items tuples: the first item is the reference once escaped
- by the user-provided modFunct function, the second is the same
- reference un-escaped."""
- mRefs = []
- for refRe, refTemplate in [(re_titleRef, u'_%s_ (qv)'),
- (re_nameRef, u"'%s' (qv)"),
- (re_characterRef, u'#%s# (qv)')]:
- theseRefs = []
- for theRef in refRe.findall(value):
- # refTemplate % theRef values don't change for a single
- # _Container instance, so this is a good candidate for a
- # cache or something - even if it's so rarely used that...
- # Moreover, it can grow - ia.update(...) - and change if
- # modFunct is modified.
- goodValue = modFunct(refTemplate % theRef, titlesRefs, namesRefs,
- charactersRefs)
- # Prevents problems with crap in plain text data files.
- # We should probably exclude invalid chars and string that
- # are too long in the re_*Ref expressions.
- if '_' in goodValue or len(goodValue) > 128:
- continue
- toReplace = escape4xml(goodValue)
- # Only the 'value' portion is replaced.
- replaceWith = goodValue.replace(theRef, escape4xml(theRef))
- theseRefs.append((toReplace, replaceWith))
- mRefs.append(theseRefs)
- return mRefs
-
-
-def _handleTextNotes(s):
- """Split text::notes strings."""
- ssplit = s.split('::', 1)
- if len(ssplit) == 1:
- return s
- return u'%s%s' % (ssplit[0], ssplit[1])
-
-
-def _normalizeValue(value, withRefs=False, modFunct=None, titlesRefs=None,
- namesRefs=None, charactersRefs=None):
- """Replace some chars that can't be present in a XML text."""
- # XXX: use s.encode(encoding, 'xmlcharrefreplace') ? Probably not
- # a great idea: after all, returning a unicode is safe.
- if isinstance(value, (unicode, str)):
- if not withRefs:
- value = _handleTextNotes(escape4xml(value))
- else:
- # Replace references that were accidentally escaped.
- replaceLists = _refsToReplace(value, modFunct, titlesRefs,
- namesRefs, charactersRefs)
- value = modFunct(value, titlesRefs or {}, namesRefs or {},
- charactersRefs or {})
- value = _handleTextNotes(escape4xml(value))
- for replaceList in replaceLists:
- for toReplace, replaceWith in replaceList:
- value = value.replace(toReplace, replaceWith)
- else:
- value = unicode(value)
- return value
-
-
-def _tag4TON(ton, addAccessSystem=False, _containerOnly=False):
- """Build a tag for the given _Container instance;
- both open and close tags are returned."""
- tag = ton.__class__.__name__.lower()
- what = 'name'
- if tag == 'movie':
- value = ton.get('long imdb title') or ton.get('title', '')
- what = 'title'
- else:
- value = ton.get('long imdb name') or ton.get('name', '')
- value = _normalizeValue(value)
- extras = u''
- crl = ton.currentRole
- if crl:
- if not isinstance(crl, list):
- crl = [crl]
- for cr in crl:
- crTag = cr.__class__.__name__.lower()
- crValue = cr['long imdb name']
- crValue = _normalizeValue(crValue)
- crID = cr.getID()
- if crID is not None:
- extras += u'<%s id="%s">' \
- u'%s%s>' % (crTag, crID,
- crValue, crTag)
- else:
- extras += u'<%s>%s%s>' % \
- (crTag, crValue, crTag)
- if cr.notes:
- extras += u'%s' % _normalizeValue(cr.notes)
- extras += u''
- theID = ton.getID()
- if theID is not None:
- beginTag = u'<%s id="%s"' % (tag, theID)
- if addAccessSystem and ton.accessSystem:
- beginTag += ' access-system="%s"' % ton.accessSystem
- if not _containerOnly:
- beginTag += u'><%s>%s%s>' % (what, value, what)
- else:
- beginTag += u'>'
- else:
- if not _containerOnly:
- beginTag = u'<%s><%s>%s%s>' % (tag, what, value, what)
- else:
- beginTag = u'<%s>' % tag
- beginTag += extras
- if ton.notes:
- beginTag += u'%s' % _normalizeValue(ton.notes)
- return (beginTag, u'%s>' % tag)
-
-
-TAGS_TO_MODIFY = {
- 'movie.parents-guide': ('item', True),
- 'movie.number-of-votes': ('item', True),
- 'movie.soundtrack.item': ('item', True),
- 'movie.quotes': ('quote', False),
- 'movie.quotes.quote': ('line', False),
- 'movie.demographic': ('item', True),
- 'movie.episodes': ('season', True),
- 'movie.episodes.season': ('episode', True),
- 'person.merchandising-links': ('item', True),
- 'person.genres': ('item', True),
- 'person.quotes': ('quote', False),
- 'person.keywords': ('item', True),
- 'character.quotes': ('item', True),
- 'character.quotes.item': ('quote', False),
- 'character.quotes.item.quote': ('line', False)
- }
-
-_allchars = string.maketrans('', '')
-_keepchars = _allchars.translate(_allchars, string.ascii_lowercase + '-' +
- string.digits)
-
-def _tagAttr(key, fullpath):
- """Return a tuple with a tag name and a (possibly empty) attribute,
- applying the conversions specified in TAGS_TO_MODIFY and checking
- that the tag is safe for a XML document."""
- attrs = {}
- _escapedKey = escape4xml(key)
- if fullpath in TAGS_TO_MODIFY:
- tagName, useTitle = TAGS_TO_MODIFY[fullpath]
- if useTitle:
- attrs['key'] = _escapedKey
- elif not isinstance(key, unicode):
- if isinstance(key, str):
- tagName = unicode(key, 'ascii', 'ignore')
- else:
- strType = str(type(key)).replace("", "")
- attrs['keytype'] = strType
- tagName = unicode(key)
- else:
- tagName = key
- if isinstance(key, int):
- attrs['keytype'] = 'int'
- origTagName = tagName
- tagName = tagName.lower().replace(' ', '-')
- tagName = str(tagName).translate(_allchars, _keepchars)
- if origTagName != tagName:
- if 'key' not in attrs:
- attrs['key'] = _escapedKey
- if (not tagName) or tagName[0].isdigit() or tagName[0] == '-':
- # This is a fail-safe: we should never be here, since unpredictable
- # keys must be listed in TAGS_TO_MODIFY.
- # This will proably break the DTD/schema, but at least it will
- # produce a valid XML.
- tagName = 'item'
- _utils_logger.error('invalid tag: %s [%s]' % (_escapedKey, fullpath))
- attrs['key'] = _escapedKey
- return tagName, u' '.join([u'%s="%s"' % i for i in attrs.items()])
-
-
-def _seq2xml(seq, _l=None, withRefs=False, modFunct=None,
- titlesRefs=None, namesRefs=None, charactersRefs=None,
- _topLevel=True, key2infoset=None, fullpath=''):
- """Convert a sequence or a dictionary to a list of XML
- unicode strings."""
- if _l is None:
- _l = []
- if isinstance(seq, dict):
- for key in seq:
- value = seq[key]
- if isinstance(key, _Container):
- # Here we're assuming that a _Container is never a top-level
- # key (otherwise we should handle key2infoset).
- openTag, closeTag = _tag4TON(key)
- # So that fullpath will contains something meaningful.
- tagName = key.__class__.__name__.lower()
- else:
- tagName, attrs = _tagAttr(key, fullpath)
- openTag = u'<%s' % tagName
- if attrs:
- openTag += ' %s' % attrs
- if _topLevel and key2infoset and key in key2infoset:
- openTag += u' infoset="%s"' % key2infoset[key]
- if isinstance(value, int):
- openTag += ' type="int"'
- elif isinstance(value, float):
- openTag += ' type="float"'
- openTag += u'>'
- closeTag = u'%s>' % tagName
- _l.append(openTag)
- _seq2xml(value, _l, withRefs, modFunct, titlesRefs,
- namesRefs, charactersRefs, _topLevel=False,
- fullpath='%s.%s' % (fullpath, tagName))
- _l.append(closeTag)
- elif isinstance(seq, (list, tuple)):
- tagName, attrs = _tagAttr('item', fullpath)
- beginTag = u'<%s' % tagName
- if attrs:
- beginTag += u' %s' % attrs
- #beginTag += u'>'
- closeTag = u'%s>' % tagName
- for item in seq:
- if isinstance(item, _Container):
- _seq2xml(item, _l, withRefs, modFunct, titlesRefs,
- namesRefs, charactersRefs, _topLevel=False,
- fullpath='%s.%s' % (fullpath,
- item.__class__.__name__.lower()))
- else:
- openTag = beginTag
- if isinstance(item, int):
- openTag += ' type="int"'
- elif isinstance(item, float):
- openTag += ' type="float"'
- openTag += u'>'
- _l.append(openTag)
- _seq2xml(item, _l, withRefs, modFunct, titlesRefs,
- namesRefs, charactersRefs, _topLevel=False,
- fullpath='%s.%s' % (fullpath, tagName))
- _l.append(closeTag)
- else:
- if isinstance(seq, _Container):
- _l.extend(_tag4TON(seq))
- else:
- # Text, ints, floats and the like.
- _l.append(_normalizeValue(seq, withRefs=withRefs,
- modFunct=modFunct,
- titlesRefs=titlesRefs,
- namesRefs=namesRefs,
- charactersRefs=charactersRefs))
- return _l
-
-
-_xmlHead = u"""
-
-
-"""
-_xmlHead = _xmlHead.replace('{VERSION}',
- VERSION.replace('.', '').split('dev')[0][:2])
-
-
-class _Container(object):
- """Base class for Movie, Person, Character and Company classes."""
- # The default sets of information retrieved.
- default_info = ()
-
- # Aliases for some not-so-intuitive keys.
- keys_alias = {}
-
- # List of keys to modify.
- keys_tomodify_list = ()
-
- # Function used to compare two instances of this class.
- cmpFunct = None
-
- # Regular expression used to build the 'full-size (headshot|cover url)'.
- _re_fullsizeURL = re.compile(r'\._V1\._SX(\d+)_SY(\d+)_')
-
- def __init__(self, myID=None, data=None, notes=u'',
- currentRole=u'', roleID=None, roleIsPerson=False,
- accessSystem=None, titlesRefs=None, namesRefs=None,
- charactersRefs=None, modFunct=None, *args, **kwds):
- """Initialize a Movie, Person, Character or Company object.
- *myID* -- your personal identifier for this object.
- *data* -- a dictionary used to initialize the object.
- *notes* -- notes for the person referred in the currentRole
- attribute; e.g.: '(voice)' or the alias used in the
- movie credits.
- *accessSystem* -- a string representing the data access system used.
- *currentRole* -- a Character instance representing the current role
- or duty of a person in this movie, or a Person
- object representing the actor/actress who played
- a given character in a Movie. If a string is
- passed, an object is automatically build.
- *roleID* -- if available, the characterID/personID of the currentRole
- object.
- *roleIsPerson* -- when False (default) the currentRole is assumed
- to be a Character object, otherwise a Person.
- *titlesRefs* -- a dictionary with references to movies.
- *namesRefs* -- a dictionary with references to persons.
- *charactersRefs* -- a dictionary with references to characters.
- *modFunct* -- function called returning text fields.
- """
- self.reset()
- self.accessSystem = accessSystem
- self.myID = myID
- if data is None: data = {}
- self.set_data(data, override=1)
- self.notes = notes
- if titlesRefs is None: titlesRefs = {}
- self.update_titlesRefs(titlesRefs)
- if namesRefs is None: namesRefs = {}
- self.update_namesRefs(namesRefs)
- if charactersRefs is None: charactersRefs = {}
- self.update_charactersRefs(charactersRefs)
- self.set_mod_funct(modFunct)
- self.keys_tomodify = {}
- for item in self.keys_tomodify_list:
- self.keys_tomodify[item] = None
- self._roleIsPerson = roleIsPerson
- if not roleIsPerson:
- from imdb.Character import Character
- self._roleClass = Character
- else:
- from imdb.Person import Person
- self._roleClass = Person
- self.currentRole = currentRole
- if roleID:
- self.roleID = roleID
- self._init(*args, **kwds)
-
- def _get_roleID(self):
- """Return the characterID or personID of the currentRole object."""
- if not self.__role:
- return None
- if isinstance(self.__role, list):
- return [x.getID() for x in self.__role]
- return self.currentRole.getID()
-
- def _set_roleID(self, roleID):
- """Set the characterID or personID of the currentRole object."""
- if not self.__role:
- # XXX: needed? Just ignore it? It's probably safer to
- # ignore it, to prevent some bugs in the parsers.
- #raise IMDbError,"Can't set ID of an empty Character/Person object."
- pass
- if not self._roleIsPerson:
- if not isinstance(roleID, (list, tuple)):
- self.currentRole.characterID = roleID
- else:
- for index, item in enumerate(roleID):
- self.__role[index].characterID = item
- else:
- if not isinstance(roleID, (list, tuple)):
- self.currentRole.personID = roleID
- else:
- for index, item in enumerate(roleID):
- self.__role[index].personID = item
-
- roleID = property(_get_roleID, _set_roleID,
- doc="the characterID or personID of the currentRole object.")
-
- def _get_currentRole(self):
- """Return a Character or Person instance."""
- if self.__role:
- return self.__role
- return self._roleClass(name=u'', accessSystem=self.accessSystem,
- modFunct=self.modFunct)
-
- def _set_currentRole(self, role):
- """Set self.currentRole to a Character or Person instance."""
- if isinstance(role, (unicode, str)):
- if not role:
- self.__role = None
- else:
- self.__role = self._roleClass(name=role, modFunct=self.modFunct,
- accessSystem=self.accessSystem)
- elif isinstance(role, (list, tuple)):
- self.__role = RolesList()
- for item in role:
- if isinstance(item, (unicode, str)):
- self.__role.append(self._roleClass(name=item,
- accessSystem=self.accessSystem,
- modFunct=self.modFunct))
- else:
- self.__role.append(item)
- if not self.__role:
- self.__role = None
- else:
- self.__role = role
-
- currentRole = property(_get_currentRole, _set_currentRole,
- doc="The role of a Person in a Movie" + \
- " or the interpreter of a Character in a Movie.")
-
- def _init(self, **kwds): pass
-
- def reset(self):
- """Reset the object."""
- self.data = {}
- self.myID = None
- self.notes = u''
- self.titlesRefs = {}
- self.namesRefs = {}
- self.charactersRefs = {}
- self.modFunct = modClearRefs
- self.current_info = []
- self.infoset2keys = {}
- self.key2infoset = {}
- self.__role = None
- self._reset()
-
- def _reset(self): pass
-
- def clear(self):
- """Reset the dictionary."""
- self.data.clear()
- self.notes = u''
- self.titlesRefs = {}
- self.namesRefs = {}
- self.charactersRefs = {}
- self.current_info = []
- self.infoset2keys = {}
- self.key2infoset = {}
- self.__role = None
- self._clear()
-
- def _clear(self): pass
-
- def get_current_info(self):
- """Return the current set of information retrieved."""
- return self.current_info
-
- def update_infoset_map(self, infoset, keys, mainInfoset):
- """Update the mappings between infoset and keys."""
- if keys is None:
- keys = []
- if mainInfoset is not None:
- theIS = mainInfoset
- else:
- theIS = infoset
- self.infoset2keys[theIS] = keys
- for key in keys:
- self.key2infoset[key] = theIS
-
- def set_current_info(self, ci):
- """Set the current set of information retrieved."""
- # XXX:Remove? It's never used and there's no way to update infoset2keys.
- self.current_info = ci
-
- def add_to_current_info(self, val, keys=None, mainInfoset=None):
- """Add a set of information to the current list."""
- if val not in self.current_info:
- self.current_info.append(val)
- self.update_infoset_map(val, keys, mainInfoset)
-
- def has_current_info(self, val):
- """Return true if the given set of information is in the list."""
- return val in self.current_info
-
- def set_mod_funct(self, modFunct):
- """Set the fuction used to modify the strings."""
- if modFunct is None: modFunct = modClearRefs
- self.modFunct = modFunct
-
- def update_titlesRefs(self, titlesRefs):
- """Update the dictionary with the references to movies."""
- self.titlesRefs.update(titlesRefs)
-
- def get_titlesRefs(self):
- """Return the dictionary with the references to movies."""
- return self.titlesRefs
-
- def update_namesRefs(self, namesRefs):
- """Update the dictionary with the references to names."""
- self.namesRefs.update(namesRefs)
-
- def get_namesRefs(self):
- """Return the dictionary with the references to names."""
- return self.namesRefs
-
- def update_charactersRefs(self, charactersRefs):
- """Update the dictionary with the references to characters."""
- self.charactersRefs.update(charactersRefs)
-
- def get_charactersRefs(self):
- """Return the dictionary with the references to characters."""
- return self.charactersRefs
-
- def set_data(self, data, override=0):
- """Set the movie data to the given dictionary; if 'override' is
- set, the previous data is removed, otherwise the two dictionary
- are merged.
- """
- if not override:
- self.data.update(data)
- else:
- self.data = data
-
- def getID(self):
- """Return movieID, personID, characterID or companyID."""
- raise NotImplementedError, 'override this method'
-
- def __cmp__(self, other):
- """Compare two Movie, Person, Character or Company objects."""
- # XXX: raise an exception?
- if self.cmpFunct is None: return -1
- if not isinstance(other, self.__class__): return -1
- return self.cmpFunct(other)
-
- def __hash__(self):
- """Hash for this object."""
- # XXX: does it always work correctly?
- theID = self.getID()
- if theID is not None and self.accessSystem not in ('UNKNOWN', None):
- # Handle 'http' and 'mobile' as they are the same access system.
- acs = self.accessSystem
- if acs in ('mobile', 'httpThin'):
- acs = 'http'
- # There must be some indication of the kind of the object, too.
- s4h = '%s:%s[%s]' % (self.__class__.__name__, theID, acs)
- else:
- s4h = repr(self)
- return hash(s4h)
-
- def isSame(self, other):
- """Return True if the two represent the same object."""
- if not isinstance(other, self.__class__): return 0
- if hash(self) == hash(other): return 1
- return 0
-
- def __len__(self):
- """Number of items in the data dictionary."""
- return len(self.data)
-
- def getAsXML(self, key, _with_add_keys=True):
- """Return a XML representation of the specified key, or None
- if empty. If _with_add_keys is False, dinamically generated
- keys are excluded."""
- # Prevent modifyStrings in __getitem__ to be called; if needed,
- # it will be called by the _normalizeValue function.
- origModFunct = self.modFunct
- self.modFunct = modNull
- # XXX: not totally sure it's a good idea, but could prevent
- # problems (i.e.: the returned string always contains
- # a DTD valid tag, and not something that can be only in
- # the keys_alias map).
- key = self.keys_alias.get(key, key)
- if (not _with_add_keys) and (key in self._additional_keys()):
- self.modFunct = origModFunct
- return None
- try:
- withRefs = False
- if key in self.keys_tomodify and \
- origModFunct not in (None, modNull):
- withRefs = True
- value = self.get(key)
- if value is None:
- return None
- tag = self.__class__.__name__.lower()
- return u''.join(_seq2xml({key: value}, withRefs=withRefs,
- modFunct=origModFunct,
- titlesRefs=self.titlesRefs,
- namesRefs=self.namesRefs,
- charactersRefs=self.charactersRefs,
- key2infoset=self.key2infoset,
- fullpath=tag))
- finally:
- self.modFunct = origModFunct
-
- def asXML(self, _with_add_keys=True):
- """Return a XML representation of the whole object.
- If _with_add_keys is False, dinamically generated keys are excluded."""
- beginTag, endTag = _tag4TON(self, addAccessSystem=True,
- _containerOnly=True)
- resList = [beginTag]
- for key in self.keys():
- value = self.getAsXML(key, _with_add_keys=_with_add_keys)
- if not value:
- continue
- resList.append(value)
- resList.append(endTag)
- head = _xmlHead % self.__class__.__name__.lower()
- return head + u''.join(resList)
-
- def _getitem(self, key):
- """Handle special keys."""
- return None
-
- def __getitem__(self, key):
- """Return the value for a given key, checking key aliases;
- a KeyError exception is raised if the key is not found.
- """
- value = self._getitem(key)
- if value is not None: return value
- # Handle key aliases.
- key = self.keys_alias.get(key, key)
- rawData = self.data[key]
- if key in self.keys_tomodify and \
- self.modFunct not in (None, modNull):
- try:
- return modifyStrings(rawData, self.modFunct, self.titlesRefs,
- self.namesRefs, self.charactersRefs)
- except RuntimeError, e:
- # Symbian/python 2.2 has a poor regexp implementation.
- import warnings
- warnings.warn('RuntimeError in '
- "imdb.utils._Container.__getitem__; if it's not "
- "a recursion limit exceeded and we're not running "
- "in a Symbian environment, it's a bug:\n%s" % e)
- return rawData
-
- def __setitem__(self, key, item):
- """Directly store the item with the given key."""
- self.data[key] = item
-
- def __delitem__(self, key):
- """Remove the given section or key."""
- # XXX: how to remove an item of a section?
- del self.data[key]
-
- def _additional_keys(self):
- """Valid keys to append to the data.keys() list."""
- return []
-
- def keys(self):
- """Return a list of valid keys."""
- return self.data.keys() + self._additional_keys()
-
- def items(self):
- """Return the items in the dictionary."""
- return [(k, self.get(k)) for k in self.keys()]
-
- # XXX: is this enough?
- def iteritems(self): return self.data.iteritems()
- def iterkeys(self): return self.data.iterkeys()
- def itervalues(self): return self.data.itervalues()
-
- def values(self):
- """Return the values in the dictionary."""
- return [self.get(k) for k in self.keys()]
-
- def has_key(self, key):
- """Return true if a given section is defined."""
- try:
- self.__getitem__(key)
- except KeyError:
- return 0
- return 1
-
- # XXX: really useful???
- # consider also that this will confuse people who meant to
- # call ia.update(movieObject, 'data set') instead.
- def update(self, dict):
- self.data.update(dict)
-
- def get(self, key, failobj=None):
- """Return the given section, or default if it's not found."""
- try:
- return self.__getitem__(key)
- except KeyError:
- return failobj
-
- def setdefault(self, key, failobj=None):
- if not self.has_key(key):
- self[key] = failobj
- return self[key]
-
- def pop(self, key, *args):
- return self.data.pop(key, *args)
-
- def popitem(self):
- return self.data.popitem()
-
- def __repr__(self):
- """String representation of an object."""
- raise NotImplementedError, 'override this method'
-
- def __str__(self):
- """Movie title or person name."""
- raise NotImplementedError, 'override this method'
-
- def __contains__(self, key):
- raise NotImplementedError, 'override this method'
-
- def append_item(self, key, item):
- """The item is appended to the list identified by the given key."""
- self.data.setdefault(key, []).append(item)
-
- def set_item(self, key, item):
- """Directly store the item with the given key."""
- self.data[key] = item
-
- def __nonzero__(self):
- """Return true if self.data contains something."""
- if self.data: return 1
- return 0
-
- def __deepcopy__(self, memo):
- raise NotImplementedError, 'override this method'
-
- def copy(self):
- """Return a deep copy of the object itself."""
- return deepcopy(self)
-
-
-def flatten(seq, toDescend=(list, dict, tuple), yieldDictKeys=0,
- onlyKeysType=(_Container,), scalar=None):
- """Iterate over nested lists and dictionaries; toDescend is a list
- or a tuple of types to be considered non-scalar; if yieldDictKeys is
- true, also dictionaries' keys are yielded; if scalar is not None, only
- items of the given type(s) are yielded."""
- if scalar is None or isinstance(seq, scalar):
- yield seq
- if isinstance(seq, toDescend):
- if isinstance(seq, (dict, _Container)):
- if yieldDictKeys:
- # Yield also the keys of the dictionary.
- for key in seq.iterkeys():
- for k in flatten(key, toDescend=toDescend,
- yieldDictKeys=yieldDictKeys,
- onlyKeysType=onlyKeysType, scalar=scalar):
- if onlyKeysType and isinstance(k, onlyKeysType):
- yield k
- for value in seq.itervalues():
- for v in flatten(value, toDescend=toDescend,
- yieldDictKeys=yieldDictKeys,
- onlyKeysType=onlyKeysType, scalar=scalar):
- yield v
- elif not isinstance(seq, (str, unicode, int, float)):
- for item in seq:
- for i in flatten(item, toDescend=toDescend,
- yieldDictKeys=yieldDictKeys,
- onlyKeysType=onlyKeysType, scalar=scalar):
- yield i
-
-