diff --git a/couchpotato/core/providers/movie/imdb/main.py b/couchpotato/core/providers/movie/imdb/main.py index a7ba2e0c..c9fd046f 100644 --- a/couchpotato/core/providers/movie/imdb/main.py +++ b/couchpotato/core/providers/movie/imdb/main.py @@ -1,81 +1,57 @@ from couchpotato.core.event import addEvent from couchpotato.core.logger import CPLog from couchpotato.core.providers.movie.base import MovieProvider -from imdb import IMDb +from imdb import IMDb, helpers +from imdb._logging import setLevel +import time log = CPLog(__name__) class IMDB(MovieProvider): + info_list = ('main', 'plot', 'release dates', 'taglines', 'synopsis') + def __init__(self): #addEvent('movie.search', self.search) + #addEvent('movie.info', self.getInfo) self.p = IMDb('http') + setLevel('warn') - def search(self): - print 'search' - - def conf(self, option): - return self.config.get('IMDB', option) - - def find(self, q, limit = 8, alternative = True): - ''' Find movie by name ''' - - log.info('IMDB - Searching for movie: %s' % q) + def search(self, q, limit = 12): r = self.p.search_movie(q) - - return self.toResults(r, limit) - - def toResults(self, r, limit = 8, one = False): - results = [] - - if one: - new = self.feedItem() - new.imdb = 'tt' + r.movieID - new.name = self.toSaveString(r['title']) - try: - new.year = r['year'] - except: - new.year = '' - - return new - else : - nr = 0 - for movie in r: - results.append(self.toResults(movie, one = True)) - nr += 1 - if nr == limit: - break - - return results - - def findById(self, id): - ''' Find movie by TheMovieDB ID ''' + print '==' * 80 return [] + def getInfo(self, identifier = None): - def findByImdbId(self, id, details = False): - ''' Find movie by IMDB ID ''' + m = self.p.get_movie(identifier.replace('tt', ''), info = self.info_list) - log.info('IMDB - Searching for movie: %s' % str(id)) + poster = m['cover url'] + poster_original = helpers.fullSizeCoverURL(m) - r = self.p.get_movie(id.replace('tt', '')) + movie_data = { + 'id': identifier, + 'titles': [m['title']], + 'original_title': m['title'], + 'rating': { + 'imdb': (m.get('rating'), m.get('votes')), + }, + 'images': { + 'poster': [poster] if poster else [], + 'poster_original': [poster_original] if poster_original else [], + }, + 'imdb': identifier, + 'runtime': m.get('runtime')[0].split(':')[1], + 'released': m.get('release dates')[0].split('::')[1], + 'year': m['year'], + 'plot': m.get('synopsis', ''), + 'tagline': m.get('taglines', '')[0], + 'genres': m.get('genres', []), + } - if not details: - return self.toResults(r, one = True) - else: - self.p.update(r) - self.p.update(r, info = 'release dates') - self.p.update(r, info = 'taglines') - return r - - def get_IMDb_instance(self): - return IMDb('http') - - - def findReleaseDate(self, movie): - pass + return movie_data diff --git a/libs/imdb/Character.py b/libs/imdb/Character.py deleted file mode 100644 index 21264948..00000000 --- a/libs/imdb/Character.py +++ /dev/null @@ -1,197 +0,0 @@ -""" -Character module (imdb package). - -This module provides the Character class, used to store information about -a given character. - -Copyright 2007-2010 Davide Alberani - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -from copy import deepcopy - -from imdb.utils import analyze_name, build_name, flatten, _Container, cmpPeople - - -class Character(_Container): - """A Character. - - Every information about a character can be accessed as: - characterObject['information'] - to get a list of the kind of information stored in a - Character object, use the keys() method; some useful aliases - are defined (as "also known as" for the "akas" key); - see the keys_alias dictionary. - """ - # The default sets of information retrieved. - default_info = ('main', 'filmography', 'biography') - - # Aliases for some not-so-intuitive keys. - keys_alias = {'mini biography': 'biography', - 'bio': 'biography', - 'character biography': 'biography', - 'character biographies': 'biography', - 'biographies': 'biography', - 'character bio': 'biography', - 'aka': 'akas', - 'also known as': 'akas', - 'alternate names': 'akas', - 'personal quotes': 'quotes', - 'keys': 'keywords', - 'keyword': 'keywords'} - - keys_tomodify_list = ('biography', 'quotes') - - cmpFunct = cmpPeople - - def _init(self, **kwds): - """Initialize a Character object. - - *characterID* -- the unique identifier for the character. - *name* -- the name of the Character, if not in the data dictionary. - *myName* -- the nickname you use for this character. - *myID* -- your personal id for this character. - *data* -- a dictionary used to initialize the object. - *notes* -- notes about the given character. - *accessSystem* -- a string representing the data access system used. - *titlesRefs* -- a dictionary with references to movies. - *namesRefs* -- a dictionary with references to persons. - *charactersRefs* -- a dictionary with references to characters. - *modFunct* -- function called returning text fields. - """ - name = kwds.get('name') - if name and not self.data.has_key('name'): - self.set_name(name) - self.characterID = kwds.get('characterID', None) - self.myName = kwds.get('myName', u'') - - def _reset(self): - """Reset the Character object.""" - self.characterID = None - self.myName = u'' - - def set_name(self, name): - """Set the name of the character.""" - # XXX: convert name to unicode, if it's a plain string? - d = analyze_name(name, canonical=0) - self.data.update(d) - - def _additional_keys(self): - """Valid keys to append to the data.keys() list.""" - addkeys = [] - if self.data.has_key('name'): - addkeys += ['long imdb name'] - if self.data.has_key('headshot'): - addkeys += ['full-size headshot'] - return addkeys - - def _getitem(self, key): - """Handle special keys.""" - ## XXX: can a character have an imdbIndex? - if self.data.has_key('name'): - if key == 'long imdb name': - return build_name(self.data) - if key == 'full-size headshot' and self.data.has_key('headshot'): - return self._re_fullsizeURL.sub('', self.data.get('headshot', '')) - return None - - def getID(self): - """Return the characterID.""" - return self.characterID - - def __nonzero__(self): - """The Character is "false" if the self.data does not contain a name.""" - # XXX: check the name and the characterID? - if self.data.get('name'): return 1 - return 0 - - def __contains__(self, item): - """Return true if this Character was portrayed in the given Movie - or it was impersonated by the given Person.""" - from Movie import Movie - from Person import Person - if isinstance(item, Person): - for m in flatten(self.data, yieldDictKeys=1, scalar=Movie): - if item.isSame(m.currentRole): - return 1 - elif isinstance(item, Movie): - for m in flatten(self.data, yieldDictKeys=1, scalar=Movie): - if item.isSame(m): - return 1 - return 0 - - def isSameName(self, other): - """Return true if two character have the same name - and/or characterID.""" - if not isinstance(other, self.__class__): - return 0 - if self.data.has_key('name') and \ - other.data.has_key('name') and \ - build_name(self.data, canonical=0) == \ - build_name(other.data, canonical=0): - return 1 - if self.accessSystem == other.accessSystem and \ - self.characterID is not None and \ - self.characterID == other.characterID: - return 1 - return 0 - isSameCharacter = isSameName - - def __deepcopy__(self, memo): - """Return a deep copy of a Character instance.""" - c = Character(name=u'', characterID=self.characterID, - myName=self.myName, myID=self.myID, - data=deepcopy(self.data, memo), - notes=self.notes, accessSystem=self.accessSystem, - titlesRefs=deepcopy(self.titlesRefs, memo), - namesRefs=deepcopy(self.namesRefs, memo), - charactersRefs=deepcopy(self.charactersRefs, memo)) - c.current_info = list(self.current_info) - c.set_mod_funct(self.modFunct) - return c - - def __repr__(self): - """String representation of a Character object.""" - r = '' % (self.characterID, - self.accessSystem, - self.get('name')) - if isinstance(r, unicode): r = r.encode('utf_8', 'replace') - return r - - def __str__(self): - """Simply print the short name.""" - return self.get('name', u'').encode('utf_8', 'replace') - - def __unicode__(self): - """Simply print the short title.""" - return self.get('name', u'') - - def summary(self): - """Return a string with a pretty-printed summary for the character.""" - if not self: return u'' - s = u'Character\n=====\nName: %s\n' % \ - self.get('name', u'') - bio = self.get('biography') - if bio: - s += u'Biography: %s\n' % bio[0] - filmo = self.get('filmography') - if filmo: - a_list = [x.get('long imdb canonical title', u'') - for x in filmo[:5]] - s += u'Last movies with this character: %s.\n' % u'; '.join(a_list) - return s - - diff --git a/libs/imdb/Company.py b/libs/imdb/Company.py deleted file mode 100644 index 5e05c840..00000000 --- a/libs/imdb/Company.py +++ /dev/null @@ -1,195 +0,0 @@ -""" -company module (imdb package). - -This module provides the company class, used to store information about -a given company. - -Copyright 2008-2009 Davide Alberani - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -from copy import deepcopy - -from imdb.utils import analyze_company_name, build_company_name, \ - flatten, _Container, cmpCompanies - - -class Company(_Container): - """A company. - - Every information about a company can be accessed as: - companyObject['information'] - to get a list of the kind of information stored in a - company object, use the keys() method; some useful aliases - are defined (as "also known as" for the "akas" key); - see the keys_alias dictionary. - """ - # The default sets of information retrieved. - default_info = ('main',) - - # Aliases for some not-so-intuitive keys. - keys_alias = { - 'distributor': 'distributors', - 'special effects company': 'special effects companies', - 'other company': 'miscellaneous companies', - 'miscellaneous company': 'miscellaneous companies', - 'other companies': 'miscellaneous companies', - 'misc companies': 'miscellaneous companies', - 'misc company': 'miscellaneous companies', - 'production company': 'production companies'} - - keys_tomodify_list = () - - cmpFunct = cmpCompanies - - def _init(self, **kwds): - """Initialize a company object. - - *companyID* -- the unique identifier for the company. - *name* -- the name of the company, if not in the data dictionary. - *myName* -- the nickname you use for this company. - *myID* -- your personal id for this company. - *data* -- a dictionary used to initialize the object. - *notes* -- notes about the given company. - *accessSystem* -- a string representing the data access system used. - *titlesRefs* -- a dictionary with references to movies. - *namesRefs* -- a dictionary with references to persons. - *charactersRefs* -- a dictionary with references to companies. - *modFunct* -- function called returning text fields. - """ - name = kwds.get('name') - if name and not self.data.has_key('name'): - self.set_name(name) - self.companyID = kwds.get('companyID', None) - self.myName = kwds.get('myName', u'') - - def _reset(self): - """Reset the company object.""" - self.companyID = None - self.myName = u'' - - def set_name(self, name): - """Set the name of the company.""" - # XXX: convert name to unicode, if it's a plain string? - # Company diverges a bit from other classes, being able - # to directly handle its "notes". AND THAT'S PROBABLY A BAD IDEA! - oname = name = name.strip() - notes = u'' - if name.endswith(')'): - fparidx = name.find('(') - if fparidx != -1: - notes = name[fparidx:] - name = name[:fparidx].rstrip() - if self.notes: - name = oname - d = analyze_company_name(name) - self.data.update(d) - if notes and not self.notes: - self.notes = notes - - def _additional_keys(self): - """Valid keys to append to the data.keys() list.""" - if self.data.has_key('name'): - return ['long imdb name'] - return [] - - def _getitem(self, key): - """Handle special keys.""" - ## XXX: can a company have an imdbIndex? - if self.data.has_key('name'): - if key == 'long imdb name': - return build_company_name(self.data) - return None - - def getID(self): - """Return the companyID.""" - return self.companyID - - def __nonzero__(self): - """The company is "false" if the self.data does not contain a name.""" - # XXX: check the name and the companyID? - if self.data.get('name'): return 1 - return 0 - - def __contains__(self, item): - """Return true if this company and the given Movie are related.""" - from Movie import Movie - if isinstance(item, Movie): - for m in flatten(self.data, yieldDictKeys=1, scalar=Movie): - if item.isSame(m): - return 1 - return 0 - - def isSameName(self, other): - """Return true if two company have the same name - and/or companyID.""" - if not isinstance(other, self.__class__): - return 0 - if self.data.has_key('name') and \ - other.data.has_key('name') and \ - build_company_name(self.data) == \ - build_company_name(other.data): - return 1 - if self.accessSystem == other.accessSystem and \ - self.companyID is not None and \ - self.companyID == other.companyID: - return 1 - return 0 - isSameCompany = isSameName - - def __deepcopy__(self, memo): - """Return a deep copy of a company instance.""" - c = Company(name=u'', companyID=self.companyID, - myName=self.myName, myID=self.myID, - data=deepcopy(self.data, memo), - notes=self.notes, accessSystem=self.accessSystem, - titlesRefs=deepcopy(self.titlesRefs, memo), - namesRefs=deepcopy(self.namesRefs, memo), - charactersRefs=deepcopy(self.charactersRefs, memo)) - c.current_info = list(self.current_info) - c.set_mod_funct(self.modFunct) - return c - - def __repr__(self): - """String representation of a Company object.""" - r = '' % (self.companyID, - self.accessSystem, - self.get('long imdb name')) - if isinstance(r, unicode): r = r.encode('utf_8', 'replace') - return r - - def __str__(self): - """Simply print the short name.""" - return self.get('name', u'').encode('utf_8', 'replace') - - def __unicode__(self): - """Simply print the short title.""" - return self.get('name', u'') - - def summary(self): - """Return a string with a pretty-printed summary for the company.""" - if not self: return u'' - s = u'Company\n=======\nName: %s\n' % \ - self.get('name', u'') - for k in ('distributor', 'production company', 'miscellaneous company', - 'special effects company'): - d = self.get(k, [])[:5] - if not d: continue - s += u'Last movies from this company (%s): %s.\n' % \ - (k, u'; '.join([x.get('long imdb title', u'') for x in d])) - return s - - diff --git a/libs/imdb/Movie.py b/libs/imdb/Movie.py deleted file mode 100644 index 37ae49e6..00000000 --- a/libs/imdb/Movie.py +++ /dev/null @@ -1,398 +0,0 @@ -""" -Movie module (imdb package). - -This module provides the Movie class, used to store information about -a given movie. - -Copyright 2004-2010 Davide Alberani - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -from copy import deepcopy - -from imdb import articles -from imdb.utils import analyze_title, build_title, canonicalTitle, \ - flatten, _Container, cmpMovies - - -class Movie(_Container): - """A Movie. - - Every information about a movie can be accessed as: - movieObject['information'] - to get a list of the kind of information stored in a - Movie object, use the keys() method; some useful aliases - are defined (as "casting" for the "casting director" key); see - the keys_alias dictionary. - """ - # The default sets of information retrieved. - default_info = ('main', 'plot') - - # Aliases for some not-so-intuitive keys. - keys_alias = { - 'tv schedule': 'airing', - 'user rating': 'rating', - 'plot summary': 'plot', - 'plot summaries': 'plot', - 'directed by': 'director', - 'created by': 'creator', - 'writing credits': 'writer', - 'produced by': 'producer', - 'original music by': 'original music', - 'non-original music by': 'non-original music', - 'music': 'original music', - 'cinematography by': 'cinematographer', - 'cinematography': 'cinematographer', - 'film editing by': 'editor', - 'film editing': 'editor', - 'editing': 'editor', - 'actors': 'cast', - 'actresses': 'cast', - 'casting by': 'casting director', - 'casting': 'casting director', - 'art direction by': 'art direction', - 'set decoration by': 'set decoration', - 'costume design by': 'costume designer', - 'costume design': 'costume designer', - 'makeup department': 'make up', - 'makeup': 'make up', - 'make-up': 'make up', - 'production management': 'production manager', - 'production company': 'production companies', - 'second unit director or assistant director': - 'assistant director', - 'second unit director': 'assistant director', - 'sound department': 'sound crew', - 'costume and wardrobe department': 'costume department', - 'special effects by': 'special effects', - 'visual effects by': 'visual effects', - 'special effects company': 'special effects companies', - 'stunts': 'stunt performer', - 'other crew': 'miscellaneous crew', - 'misc crew': 'miscellaneous crew', - 'miscellaneouscrew': 'miscellaneous crew', - 'crewmembers': 'miscellaneous crew', - 'crew members': 'miscellaneous crew', - 'other companies': 'miscellaneous companies', - 'misc companies': 'miscellaneous companies', - 'miscellaneous company': 'miscellaneous companies', - 'misc company': 'miscellaneous companies', - 'other company': 'miscellaneous companies', - 'aka': 'akas', - 'also known as': 'akas', - 'country': 'countries', - 'production country': 'countries', - 'production countries': 'countries', - 'genre': 'genres', - 'runtime': 'runtimes', - 'lang': 'languages', - 'color': 'color info', - 'cover': 'cover url', - 'full-size cover': 'full-size cover url', - 'seasons': 'number of seasons', - 'language': 'languages', - 'certificate': 'certificates', - 'certifications': 'certificates', - 'certification': 'certificates', - 'miscellaneous links': 'misc links', - 'miscellaneous': 'misc links', - 'soundclips': 'sound clips', - 'videoclips': 'video clips', - 'photographs': 'photo sites', - 'distributor': 'distributors', - 'distribution': 'distributors', - 'distribution companies': 'distributors', - 'distribution company': 'distributors', - 'guest': 'guests', - 'guest appearances': 'guests', - 'tv guests': 'guests', - 'notable tv guest appearances': 'guests', - 'episodes cast': 'guests', - 'episodes number': 'number of episodes', - 'amazon review': 'amazon reviews', - 'merchandising': 'merchandising links', - 'merchandise': 'merchandising links', - 'sales': 'merchandising links', - 'faq': 'faqs', - 'parental guide': 'parents guide', - 'frequently asked questions': 'faqs'} - - keys_tomodify_list = ('plot', 'trivia', 'alternate versions', 'goofs', - 'quotes', 'dvd', 'laserdisc', 'news', 'soundtrack', - 'crazy credits', 'business', 'supplements', - 'video review', 'faqs') - - cmpFunct = cmpMovies - - def _init(self, **kwds): - """Initialize a Movie object. - - *movieID* -- the unique identifier for the movie. - *title* -- the title of the Movie, if not in the data dictionary. - *myTitle* -- your personal title for the movie. - *myID* -- your personal identifier for the movie. - *data* -- a dictionary used to initialize the object. - *currentRole* -- a Character instance representing the current role - or duty of a person in this movie, or a Person - object representing the actor/actress who played - a given character in a Movie. If a string is - passed, an object is automatically build. - *roleID* -- if available, the characterID/personID of the currentRole - object. - *roleIsPerson* -- when False (default) the currentRole is assumed - to be a Character object, otherwise a Person. - *notes* -- notes for the person referred in the currentRole - attribute; e.g.: '(voice)'. - *accessSystem* -- a string representing the data access system used. - *titlesRefs* -- a dictionary with references to movies. - *namesRefs* -- a dictionary with references to persons. - *charactersRefs* -- a dictionary with references to characters. - *modFunct* -- function called returning text fields. - """ - title = kwds.get('title') - if title and not self.data.has_key('title'): - self.set_title(title) - self.movieID = kwds.get('movieID', None) - self.myTitle = kwds.get('myTitle', u'') - - def _reset(self): - """Reset the Movie object.""" - self.movieID = None - self.myTitle = u'' - - def set_title(self, title): - """Set the title of the movie.""" - # XXX: convert title to unicode, if it's a plain string? - d_title = analyze_title(title) - self.data.update(d_title) - - def _additional_keys(self): - """Valid keys to append to the data.keys() list.""" - addkeys = [] - if self.data.has_key('title'): - addkeys += ['canonical title', 'long imdb title', - 'long imdb canonical title', - 'smart canonical title', - 'smart long imdb canonical title'] - if self.data.has_key('episode of'): - addkeys += ['long imdb episode title', 'series title', - 'canonical series title', 'episode title', - 'canonical episode title', - 'smart canonical series title', - 'smart canonical episode title'] - if self.data.has_key('cover url'): - addkeys += ['full-size cover url'] - return addkeys - - def guessLanguage(self): - """Guess the language of the title of this movie; returns None - if there are no hints.""" - lang = self.get('languages') - if lang: - lang = lang[0] - else: - country = self.get('countries') - if country: - lang = articles.COUNTRY_LANG.get(country[0]) - return lang - - def smartCanonicalTitle(self, title=None, lang=None): - """Return the canonical title, guessing its language. - The title can be forces with the 'title' argument (internally - used) and the language can be forced with the 'lang' argument, - otherwise it's auto-detected.""" - if title is None: - title = self.data.get('title', u'') - if lang is None: - lang = self.guessLanguage() - return canonicalTitle(title, lang=lang) - - def _getitem(self, key): - """Handle special keys.""" - if self.data.has_key('episode of'): - if key == 'long imdb episode title': - return build_title(self.data) - elif key == 'series title': - return self.data['episode of']['title'] - elif key == 'canonical series title': - ser_title = self.data['episode of']['title'] - return canonicalTitle(ser_title) - elif key == 'smart canonical series title': - ser_title = self.data['episode of']['title'] - return self.smartCanonicalTitle(ser_title) - elif key == 'episode title': - return self.data.get('title', u'') - elif key == 'canonical episode title': - return canonicalTitle(self.data.get('title', u'')) - elif key == 'smart canonical episode title': - return self.smartCanonicalTitle(self.data.get('title', u'')) - if self.data.has_key('title'): - if key == 'title': - return self.data['title'] - elif key == 'long imdb title': - return build_title(self.data) - elif key == 'canonical title': - return canonicalTitle(self.data['title']) - elif key == 'smart canonical title': - return self.smartCanonicalTitle(self.data['title']) - elif key == 'long imdb canonical title': - return build_title(self.data, canonical=1) - elif key == 'smart long imdb canonical title': - return build_title(self.data, canonical=1, - lang=self.guessLanguage()) - if key == 'full-size cover url' and self.data.has_key('cover url'): - return self._re_fullsizeURL.sub('', self.data.get('cover url', '')) - return None - - def getID(self): - """Return the movieID.""" - return self.movieID - - def __nonzero__(self): - """The Movie is "false" if the self.data does not contain a title.""" - # XXX: check the title and the movieID? - if self.data.has_key('title'): return 1 - return 0 - - def isSameTitle(self, other): - """Return true if this and the compared object have the same - long imdb title and/or movieID. - """ - # XXX: obsolete? - if not isinstance(other, self.__class__): return 0 - if self.data.has_key('title') and \ - other.data.has_key('title') and \ - build_title(self.data, canonical=0) == \ - build_title(other.data, canonical=0): - return 1 - if self.accessSystem == other.accessSystem and \ - self.movieID is not None and self.movieID == other.movieID: - return 1 - return 0 - isSameMovie = isSameTitle # XXX: just for backward compatiblity. - - def __contains__(self, item): - """Return true if the given Person object is listed in this Movie, - or if the the given Character is represented in this Movie.""" - from Person import Person - from Character import Character - from Company import Company - if isinstance(item, Person): - for p in flatten(self.data, yieldDictKeys=1, scalar=Person, - toDescend=(list, dict, tuple, Movie)): - if item.isSame(p): - return 1 - elif isinstance(item, Character): - for p in flatten(self.data, yieldDictKeys=1, scalar=Person, - toDescend=(list, dict, tuple, Movie)): - if item.isSame(p.currentRole): - return 1 - elif isinstance(item, Company): - for c in flatten(self.data, yieldDictKeys=1, scalar=Company, - toDescend=(list, dict, tuple, Movie)): - if item.isSame(c): - return 1 - return 0 - - def __deepcopy__(self, memo): - """Return a deep copy of a Movie instance.""" - m = Movie(title=u'', movieID=self.movieID, myTitle=self.myTitle, - myID=self.myID, data=deepcopy(self.data, memo), - currentRole=deepcopy(self.currentRole, memo), - roleIsPerson=self._roleIsPerson, - notes=self.notes, accessSystem=self.accessSystem, - titlesRefs=deepcopy(self.titlesRefs, memo), - namesRefs=deepcopy(self.namesRefs, memo), - charactersRefs=deepcopy(self.charactersRefs, memo)) - m.current_info = list(self.current_info) - m.set_mod_funct(self.modFunct) - return m - - def __repr__(self): - """String representation of a Movie object.""" - # XXX: add also currentRole and notes, if present? - if self.has_key('long imdb episode title'): - title = self.get('long imdb episode title') - else: - title = self.get('long imdb title') - r = '' % (self.movieID, self.accessSystem, - title) - if isinstance(r, unicode): r = r.encode('utf_8', 'replace') - return r - - def __str__(self): - """Simply print the short title.""" - return self.get('title', u'').encode('utf_8', 'replace') - - def __unicode__(self): - """Simply print the short title.""" - return self.get('title', u'') - - def summary(self): - """Return a string with a pretty-printed summary for the movie.""" - if not self: return u'' - def _nameAndRole(personList, joiner=u', '): - """Build a pretty string with name and role.""" - nl = [] - for person in personList: - n = person.get('name', u'') - if person.currentRole: n += u' (%s)' % person.currentRole - nl.append(n) - return joiner.join(nl) - s = u'Movie\n=====\nTitle: %s\n' % \ - self.get('long imdb canonical title', u'') - genres = self.get('genres') - if genres: s += u'Genres: %s.\n' % u', '.join(genres) - director = self.get('director') - if director: - s += u'Director: %s.\n' % _nameAndRole(director) - writer = self.get('writer') - if writer: - s += u'Writer: %s.\n' % _nameAndRole(writer) - cast = self.get('cast') - if cast: - cast = cast[:5] - s += u'Cast: %s.\n' % _nameAndRole(cast) - runtime = self.get('runtimes') - if runtime: - s += u'Runtime: %s.\n' % u', '.join(runtime) - countries = self.get('countries') - if countries: - s += u'Country: %s.\n' % u', '.join(countries) - lang = self.get('languages') - if lang: - s += u'Language: %s.\n' % u', '.join(lang) - rating = self.get('rating') - if rating: - s += u'Rating: %s' % rating - nr_votes = self.get('votes') - if nr_votes: - s += u' (%s votes)' % nr_votes - s += u'.\n' - plot = self.get('plot') - if not plot: - plot = self.get('plot summary') - if plot: - plot = [plot] - if plot: - plot = plot[0] - i = plot.find('::') - if i != -1: - plot = plot[:i] - s += u'Plot: %s' % plot - return s - - diff --git a/libs/imdb/Person.py b/libs/imdb/Person.py deleted file mode 100644 index 6e3e4623..00000000 --- a/libs/imdb/Person.py +++ /dev/null @@ -1,275 +0,0 @@ -""" -Person module (imdb package). - -This module provides the Person class, used to store information about -a given person. - -Copyright 2004-2010 Davide Alberani - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -from copy import deepcopy - -from imdb.utils import analyze_name, build_name, normalizeName, \ - flatten, _Container, cmpPeople - - -class Person(_Container): - """A Person. - - Every information about a person can be accessed as: - personObject['information'] - to get a list of the kind of information stored in a - Person object, use the keys() method; some useful aliases - are defined (as "biography" for the "mini biography" key); - see the keys_alias dictionary. - """ - # The default sets of information retrieved. - default_info = ('main', 'filmography', 'biography') - - # Aliases for some not-so-intuitive keys. - keys_alias = {'biography': 'mini biography', - 'bio': 'mini biography', - 'aka': 'akas', - 'also known as': 'akas', - 'nick name': 'nick names', - 'nicks': 'nick names', - 'nickname': 'nick names', - 'miscellaneouscrew': 'miscellaneous crew', - 'crewmembers': 'miscellaneous crew', - 'misc': 'miscellaneous crew', - 'guest': 'notable tv guest appearances', - 'guests': 'notable tv guest appearances', - 'tv guest': 'notable tv guest appearances', - 'guest appearances': 'notable tv guest appearances', - 'spouses': 'spouse', - 'salary': 'salary history', - 'salaries': 'salary history', - 'otherworks': 'other works', - "maltin's biography": - "biography from leonard maltin's movie encyclopedia", - "leonard maltin's biography": - "biography from leonard maltin's movie encyclopedia", - 'real name': 'birth name', - 'where are they now': 'where now', - 'personal quotes': 'quotes', - 'mini-biography author': 'imdb mini-biography by', - 'biography author': 'imdb mini-biography by', - 'genre': 'genres', - 'portrayed': 'portrayed in', - 'keys': 'keywords', - 'trademarks': 'trade mark', - 'trade mark': 'trade mark', - 'trade marks': 'trade mark', - 'trademark': 'trade mark', - 'pictorials': 'pictorial', - 'magazine covers': 'magazine cover photo', - 'magazine-covers': 'magazine cover photo', - 'tv series episodes': 'episodes', - 'tv-series episodes': 'episodes', - 'articles': 'article', - 'keyword': 'keywords'} - - # 'nick names'??? - keys_tomodify_list = ('mini biography', 'spouse', 'quotes', 'other works', - 'salary history', 'trivia', 'trade mark', 'news', - 'books', 'biographical movies', 'portrayed in', - 'where now', 'interviews', 'article', - "biography from leonard maltin's movie encyclopedia") - - cmpFunct = cmpPeople - - def _init(self, **kwds): - """Initialize a Person object. - - *personID* -- the unique identifier for the person. - *name* -- the name of the Person, if not in the data dictionary. - *myName* -- the nickname you use for this person. - *myID* -- your personal id for this person. - *data* -- a dictionary used to initialize the object. - *currentRole* -- a Character instance representing the current role - or duty of a person in this movie, or a Person - object representing the actor/actress who played - a given character in a Movie. If a string is - passed, an object is automatically build. - *roleID* -- if available, the characterID/personID of the currentRole - object. - *roleIsPerson* -- when False (default) the currentRole is assumed - to be a Character object, otherwise a Person. - *notes* -- notes about the given person for a specific movie - or role (e.g.: the alias used in the movie credits). - *accessSystem* -- a string representing the data access system used. - *titlesRefs* -- a dictionary with references to movies. - *namesRefs* -- a dictionary with references to persons. - *modFunct* -- function called returning text fields. - *billingPos* -- position of this person in the credits list. - """ - name = kwds.get('name') - if name and not self.data.has_key('name'): - self.set_name(name) - self.personID = kwds.get('personID', None) - self.myName = kwds.get('myName', u'') - self.billingPos = kwds.get('billingPos', None) - - def _reset(self): - """Reset the Person object.""" - self.personID = None - self.myName = u'' - self.billingPos = None - - def _clear(self): - """Reset the dictionary.""" - self.billingPos = None - - def set_name(self, name): - """Set the name of the person.""" - # XXX: convert name to unicode, if it's a plain string? - d = analyze_name(name, canonical=1) - self.data.update(d) - - def _additional_keys(self): - """Valid keys to append to the data.keys() list.""" - addkeys = [] - if self.data.has_key('name'): - addkeys += ['canonical name', 'long imdb name', - 'long imdb canonical name'] - if self.data.has_key('headshot'): - addkeys += ['full-size headshot'] - return addkeys - - def _getitem(self, key): - """Handle special keys.""" - if self.data.has_key('name'): - if key == 'name': - return normalizeName(self.data['name']) - elif key == 'canonical name': - return self.data['name'] - elif key == 'long imdb name': - return build_name(self.data, canonical=0) - elif key == 'long imdb canonical name': - return build_name(self.data) - if key == 'full-size headshot' and self.data.has_key('headshot'): - return self._re_fullsizeURL.sub('', self.data.get('headshot', '')) - return None - - def getID(self): - """Return the personID.""" - return self.personID - - def __nonzero__(self): - """The Person is "false" if the self.data does not contain a name.""" - # XXX: check the name and the personID? - if self.data.has_key('name'): return 1 - return 0 - - def __contains__(self, item): - """Return true if this Person has worked in the given Movie, - or if the fiven Character was played by this Person.""" - from Movie import Movie - from Character import Character - if isinstance(item, Movie): - for m in flatten(self.data, yieldDictKeys=1, scalar=Movie): - if item.isSame(m): - return 1 - elif isinstance(item, Character): - for m in flatten(self.data, yieldDictKeys=1, scalar=Movie): - if item.isSame(m.currentRole): - return 1 - return 0 - - def isSameName(self, other): - """Return true if two persons have the same name and imdbIndex - and/or personID. - """ - if not isinstance(other, self.__class__): - return 0 - if self.data.has_key('name') and \ - other.data.has_key('name') and \ - build_name(self.data, canonical=1) == \ - build_name(other.data, canonical=1): - return 1 - if self.accessSystem == other.accessSystem and \ - self.personID and self.personID == other.personID: - return 1 - return 0 - isSamePerson = isSameName # XXX: just for backward compatiblity. - - def __deepcopy__(self, memo): - """Return a deep copy of a Person instance.""" - p = Person(name=u'', personID=self.personID, myName=self.myName, - myID=self.myID, data=deepcopy(self.data, memo), - currentRole=deepcopy(self.currentRole, memo), - roleIsPerson=self._roleIsPerson, - notes=self.notes, accessSystem=self.accessSystem, - titlesRefs=deepcopy(self.titlesRefs, memo), - namesRefs=deepcopy(self.namesRefs, memo), - charactersRefs=deepcopy(self.charactersRefs, memo)) - p.current_info = list(self.current_info) - p.set_mod_funct(self.modFunct) - p.billingPos = self.billingPos - return p - - def __repr__(self): - """String representation of a Person object.""" - # XXX: add also currentRole and notes, if present? - r = '' % (self.personID, self.accessSystem, - self.get('long imdb canonical name')) - if isinstance(r, unicode): r = r.encode('utf_8', 'replace') - return r - - def __str__(self): - """Simply print the short name.""" - return self.get('name', u'').encode('utf_8', 'replace') - - def __unicode__(self): - """Simply print the short title.""" - return self.get('name', u'') - - def summary(self): - """Return a string with a pretty-printed summary for the person.""" - if not self: return u'' - s = u'Person\n=====\nName: %s\n' % \ - self.get('long imdb canonical name', u'') - bdate = self.get('birth date') - if bdate: - s += u'Birth date: %s' % bdate - bnotes = self.get('birth notes') - if bnotes: - s += u' (%s)' % bnotes - s += u'.\n' - ddate = self.get('death date') - if ddate: - s += u'Death date: %s' % ddate - dnotes = self.get('death notes') - if dnotes: - s += u' (%s)' % dnotes - s += u'.\n' - bio = self.get('mini biography') - if bio: - s += u'Biography: %s\n' % bio[0] - director = self.get('director') - if director: - d_list = [x.get('long imdb canonical title', u'') - for x in director[:3]] - s += u'Last movies directed: %s.\n' % u'; '.join(d_list) - act = self.get('actor') or self.get('actress') - if act: - a_list = [x.get('long imdb canonical title', u'') - for x in act[:5]] - s += u'Last movies acted: %s.\n' % u'; '.join(a_list) - return s - - diff --git a/libs/imdb/__init__.py b/libs/imdb/__init__.py deleted file mode 100644 index faaa7d38..00000000 --- a/libs/imdb/__init__.py +++ /dev/null @@ -1,907 +0,0 @@ -""" -imdb package. - -This package can be used to retrieve information about a movie or -a person from the IMDb database. -It can fetch data through different media (e.g.: the IMDb web pages, -a SQL database, etc.) - -Copyright 2004-2010 Davide Alberani - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -__all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company', - 'available_access_systems'] -__version__ = VERSION = '4.8dev20110303' - -# Import compatibility module (importing it is enough). -import _compat - -import sys, os, ConfigParser, logging -from types import MethodType - -from imdb import Movie, Person, Character, Company -import imdb._logging -from imdb._exceptions import IMDbError, IMDbDataAccessError -from imdb.utils import build_title, build_name, build_company_name - -_aux_logger = logging.getLogger('imdbpy.aux') - - -# URLs of the main pages for movies, persons, characters and queries. -imdbURL_base = 'http://akas.imdb.com/' -# http://akas.imdb.com/title/ -imdbURL_movie_base = '%stitle/' % imdbURL_base -# http://akas.imdb.com/title/tt%s/ -imdbURL_movie_main = imdbURL_movie_base + 'tt%s/' -# http://akas.imdb.com/name/ -imdbURL_person_base = '%sname/' % imdbURL_base -# http://akas.imdb.com/name/nm%s/ -imdbURL_person_main = imdbURL_person_base + 'nm%s/' -# http://akas.imdb.com/character/ -imdbURL_character_base = '%scharacter/' % imdbURL_base -# http://akas.imdb.com/character/ch%s/ -imdbURL_character_main = imdbURL_character_base + 'ch%s/' -# http://akas.imdb.com/company/ -imdbURL_company_base = '%scompany/' % imdbURL_base -# http://akas.imdb.com/company/co%s/ -imdbURL_company_main = imdbURL_company_base + 'co%s/' -# http://akas.imdb.com/keyword/%s/ -imdbURL_keyword_main = imdbURL_base + 'keyword/%s/' -# http://akas.imdb.com/chart/top -imdbURL_top250 = imdbURL_base + 'chart/top' -# http://akas.imdb.com/chart/bottom -imdbURL_bottom100 = imdbURL_base + 'chart/bottom' -# http://akas.imdb.com/find?%s -imdbURL_find = imdbURL_base + 'find?%s' - -# Name of the configuration file. -confFileName = 'imdbpy.cfg' - -class ConfigParserWithCase(ConfigParser.ConfigParser): - """A case-sensitive parser for configuration files.""" - def __init__(self, defaults=None, confFile=None, *args, **kwds): - """Initialize the parser. - - *defaults* -- defaults values. - *confFile* -- the file (or list of files) to parse.""" - ConfigParser.ConfigParser.__init__(self, defaults=defaults) - if confFile is None: - dotFileName = '.' + confFileName - # Current and home directory. - confFile = [os.path.join(os.getcwd(), confFileName), - os.path.join(os.getcwd(), dotFileName), - os.path.join(os.path.expanduser('~'), confFileName), - os.path.join(os.path.expanduser('~'), dotFileName)] - if os.name == 'posix': - sep = getattr(os.path, 'sep', '/') - # /etc/ and /etc/conf.d/ - confFile.append(os.path.join(sep, 'etc', confFileName)) - confFile.append(os.path.join(sep, 'etc', 'conf.d', - confFileName)) - else: - # etc subdirectory of sys.prefix, for non-unix systems. - confFile.append(os.path.join(sys.prefix, 'etc', confFileName)) - for fname in confFile: - try: - self.read(fname) - except (ConfigParser.MissingSectionHeaderError, - ConfigParser.ParsingError), e: - _aux_logger.warn('Troubles reading config file: %s' % e) - # Stop at the first valid file. - if self.has_section('imdbpy'): - break - - def optionxform(self, optionstr): - """Option names are case sensitive.""" - return optionstr - - def _manageValue(self, value): - """Custom substitutions for values.""" - if not isinstance(value, (str, unicode)): - return value - vlower = value.lower() - if vlower in self._boolean_states: - return self._boolean_states[vlower] - elif vlower == 'none': - return None - return value - - def get(self, section, option, *args, **kwds): - """Return the value of an option from a given section.""" - value = ConfigParser.ConfigParser.get(self, section, option, - *args, **kwds) - return self._manageValue(value) - - def items(self, section, *args, **kwds): - """Return a list of (key, value) tuples of items of the - given section.""" - if section != 'DEFAULT' and not self.has_section(section): - return [] - keys = ConfigParser.ConfigParser.options(self, section) - return [(k, self.get(section, k, *args, **kwds)) for k in keys] - - def getDict(self, section): - """Return a dictionary of items of the specified section.""" - return dict(self.items(section)) - - -def IMDb(accessSystem=None, *arguments, **keywords): - """Return an instance of the appropriate class. - The accessSystem parameter is used to specify the kind of - the preferred access system.""" - if accessSystem is None or accessSystem in ('auto', 'config'): - try: - cfg_file = ConfigParserWithCase(*arguments, **keywords) - # Parameters set by the code take precedence. - kwds = cfg_file.getDict('imdbpy') - if 'accessSystem' in kwds: - accessSystem = kwds['accessSystem'] - del kwds['accessSystem'] - else: - accessSystem = 'http' - kwds.update(keywords) - keywords = kwds - except Exception, e: - import logging - logging.getLogger('imdbpy').warn('Unable to read configuration' \ - ' file; complete error: %s' % e) - # It just LOOKS LIKE a bad habit: we tried to read config - # options from some files, but something is gone horribly - # wrong: ignore everything and pretend we were called with - # the 'http' accessSystem. - accessSystem = 'http' - if 'loggingLevel' in keywords: - imdb._logging.setLevel(keywords['loggingLevel']) - del keywords['loggingLevel'] - if 'loggingConfig' in keywords: - logCfg = keywords['loggingConfig'] - del keywords['loggingConfig'] - try: - import logging.config - logging.config.fileConfig(os.path.expanduser(logCfg)) - except Exception, e: - logging.getLogger('imdbpy').warn('unable to read logger ' \ - 'config: %s' % e) - if accessSystem in ('http', 'web', 'html'): - from parser.http import IMDbHTTPAccessSystem - return IMDbHTTPAccessSystem(*arguments, **keywords) - elif accessSystem in ('httpThin', 'webThin', 'htmlThin'): - import logging - logging.warn('httpThin is badly broken and' \ - ' will not be fixed; please switch' \ - ' to "http" or "mobile"') - from parser.http import IMDbHTTPAccessSystem - return IMDbHTTPAccessSystem(isThin=1, *arguments, **keywords) - elif accessSystem in ('mobile',): - from parser.mobile import IMDbMobileAccessSystem - return IMDbMobileAccessSystem(*arguments, **keywords) - elif accessSystem in ('local', 'files'): - # The local access system was removed since IMDbPY 4.2. - raise IMDbError, 'the local access system was removed since IMDbPY 4.2' - elif accessSystem in ('sql', 'db', 'database'): - try: - from parser.sql import IMDbSqlAccessSystem - except ImportError: - raise IMDbError, 'the sql access system is not installed' - return IMDbSqlAccessSystem(*arguments, **keywords) - else: - raise IMDbError, 'unknown kind of data access system: "%s"' \ - % accessSystem - - -def available_access_systems(): - """Return the list of available data access systems.""" - asList = [] - # XXX: trying to import modules is a good thing? - try: - from parser.http import IMDbHTTPAccessSystem - asList += ['http', 'httpThin'] - except ImportError: - pass - try: - from parser.mobile import IMDbMobileAccessSystem - asList.append('mobile') - except ImportError: - pass - try: - from parser.sql import IMDbSqlAccessSystem - asList.append('sql') - except ImportError: - pass - return asList - - -# XXX: I'm not sure this is a good guess. -# I suppose that an argument of the IMDb function can be used to -# set a default encoding for the output, and then Movie, Person and -# Character objects can use this default encoding, returning strings. -# Anyway, passing unicode strings to search_movie(), search_person() -# and search_character() methods is always safer. -encoding = getattr(sys.stdin, 'encoding', '') or sys.getdefaultencoding() - -class IMDbBase: - """The base class used to search for a movie/person/character and - to get a Movie/Person/Character object. - - This class cannot directly fetch data of any kind and so you - have to search the "real" code into a subclass.""" - - # The name of the preferred access system (MUST be overridden - # in the subclasses). - accessSystem = 'UNKNOWN' - - # Top-level logger for IMDbPY. - _imdb_logger = logging.getLogger('imdbpy') - - def __init__(self, defaultModFunct=None, results=20, keywordsResults=100, - *arguments, **keywords): - """Initialize the access system. - If specified, defaultModFunct is the function used by - default by the Person, Movie and Character objects, when - accessing their text fields. - """ - # The function used to output the strings that need modification (the - # ones containing references to movie titles and person names). - self._defModFunct = defaultModFunct - # Number of results to get. - try: - results = int(results) - except (TypeError, ValueError): - results = 20 - if results < 1: - results = 20 - self._results = results - try: - keywordsResults = int(keywordsResults) - except (TypeError, ValueError): - keywordsResults = 100 - if keywordsResults < 1: - keywordsResults = 100 - self._keywordsResults = keywordsResults - - def _normalize_movieID(self, movieID): - """Normalize the given movieID.""" - # By default, do nothing. - return movieID - - def _normalize_personID(self, personID): - """Normalize the given personID.""" - # By default, do nothing. - return personID - - def _normalize_characterID(self, characterID): - """Normalize the given characterID.""" - # By default, do nothing. - return characterID - - def _normalize_companyID(self, companyID): - """Normalize the given companyID.""" - # By default, do nothing. - return companyID - - def _get_real_movieID(self, movieID): - """Handle title aliases.""" - # By default, do nothing. - return movieID - - def _get_real_personID(self, personID): - """Handle name aliases.""" - # By default, do nothing. - return personID - - def _get_real_characterID(self, characterID): - """Handle character name aliases.""" - # By default, do nothing. - return characterID - - def _get_real_companyID(self, companyID): - """Handle company name aliases.""" - # By default, do nothing. - return companyID - - def _get_infoset(self, prefname): - """Return methods with the name starting with prefname.""" - infoset = [] - excludes = ('%sinfoset' % prefname,) - preflen = len(prefname) - for name in dir(self.__class__): - if name.startswith(prefname) and name not in excludes: - member = getattr(self.__class__, name) - if isinstance(member, MethodType): - infoset.append(name[preflen:].replace('_', ' ')) - return infoset - - def get_movie_infoset(self): - """Return the list of info set available for movies.""" - return self._get_infoset('get_movie_') - - def get_person_infoset(self): - """Return the list of info set available for persons.""" - return self._get_infoset('get_person_') - - def get_character_infoset(self): - """Return the list of info set available for characters.""" - return self._get_infoset('get_character_') - - def get_company_infoset(self): - """Return the list of info set available for companies.""" - return self._get_infoset('get_company_') - - def get_movie(self, movieID, info=Movie.Movie.default_info, modFunct=None): - """Return a Movie object for the given movieID. - - The movieID is something used to univocally identify a movie; - it can be the imdbID used by the IMDb web server, a file - pointer, a line number in a file, an ID in a database, etc. - - info is the list of sets of information to retrieve. - - If specified, modFunct will be the function used by the Movie - object when accessing its text fields (like 'plot').""" - movieID = self._normalize_movieID(movieID) - movieID = self._get_real_movieID(movieID) - movie = Movie.Movie(movieID=movieID, accessSystem=self.accessSystem) - modFunct = modFunct or self._defModFunct - if modFunct is not None: - movie.set_mod_funct(modFunct) - self.update(movie, info) - return movie - - get_episode = get_movie - - def _search_movie(self, title, results): - """Return a list of tuples (movieID, {movieData})""" - # XXX: for the real implementation, see the method of the - # subclass, somewhere under the imdb.parser package. - raise NotImplementedError, 'override this method' - - def search_movie(self, title, results=None, _episodes=False): - """Return a list of Movie objects for a query for the given title. - The results argument is the maximum number of results to return.""" - if results is None: - results = self._results - try: - results = int(results) - except (ValueError, OverflowError): - results = 20 - # XXX: I suppose it will be much safer if the user provides - # an unicode string... this is just a guess. - if not isinstance(title, unicode): - title = unicode(title, encoding, 'replace') - if not _episodes: - res = self._search_movie(title, results) - else: - res = self._search_episode(title, results) - return [Movie.Movie(movieID=self._get_real_movieID(mi), - data=md, modFunct=self._defModFunct, - accessSystem=self.accessSystem) for mi, md in res][:results] - - def _search_episode(self, title, results): - """Return a list of tuples (movieID, {movieData})""" - # XXX: for the real implementation, see the method of the - # subclass, somewhere under the imdb.parser package. - raise NotImplementedError, 'override this method' - - def search_episode(self, title, results=None): - """Return a list of Movie objects for a query for the given title. - The results argument is the maximum number of results to return; - this method searches only for titles of tv (mini) series' episodes.""" - return self.search_movie(title, results=results, _episodes=True) - - def get_person(self, personID, info=Person.Person.default_info, - modFunct=None): - """Return a Person object for the given personID. - - The personID is something used to univocally identify a person; - it can be the imdbID used by the IMDb web server, a file - pointer, a line number in a file, an ID in a database, etc. - - info is the list of sets of information to retrieve. - - If specified, modFunct will be the function used by the Person - object when accessing its text fields (like 'mini biography').""" - personID = self._normalize_personID(personID) - personID = self._get_real_personID(personID) - person = Person.Person(personID=personID, - accessSystem=self.accessSystem) - modFunct = modFunct or self._defModFunct - if modFunct is not None: - person.set_mod_funct(modFunct) - self.update(person, info) - return person - - def _search_person(self, name, results): - """Return a list of tuples (personID, {personData})""" - # XXX: for the real implementation, see the method of the - # subclass, somewhere under the imdb.parser package. - raise NotImplementedError, 'override this method' - - def search_person(self, name, results=None): - """Return a list of Person objects for a query for the given name. - - The results argument is the maximum number of results to return.""" - if results is None: - results = self._results - try: - results = int(results) - except (ValueError, OverflowError): - results = 20 - if not isinstance(name, unicode): - name = unicode(name, encoding, 'replace') - res = self._search_person(name, results) - return [Person.Person(personID=self._get_real_personID(pi), - data=pd, modFunct=self._defModFunct, - accessSystem=self.accessSystem) for pi, pd in res][:results] - - def get_character(self, characterID, info=Character.Character.default_info, - modFunct=None): - """Return a Character object for the given characterID. - - The characterID is something used to univocally identify a character; - it can be the imdbID used by the IMDb web server, a file - pointer, a line number in a file, an ID in a database, etc. - - info is the list of sets of information to retrieve. - - If specified, modFunct will be the function used by the Character - object when accessing its text fields (like 'biography').""" - characterID = self._normalize_characterID(characterID) - characterID = self._get_real_characterID(characterID) - character = Character.Character(characterID=characterID, - accessSystem=self.accessSystem) - modFunct = modFunct or self._defModFunct - if modFunct is not None: - character.set_mod_funct(modFunct) - self.update(character, info) - return character - - def _search_character(self, name, results): - """Return a list of tuples (characterID, {characterData})""" - # XXX: for the real implementation, see the method of the - # subclass, somewhere under the imdb.parser package. - raise NotImplementedError, 'override this method' - - def search_character(self, name, results=None): - """Return a list of Character objects for a query for the given name. - - The results argument is the maximum number of results to return.""" - if results is None: - results = self._results - try: - results = int(results) - except (ValueError, OverflowError): - results = 20 - if not isinstance(name, unicode): - name = unicode(name, encoding, 'replace') - res = self._search_character(name, results) - return [Character.Character(characterID=self._get_real_characterID(pi), - data=pd, modFunct=self._defModFunct, - accessSystem=self.accessSystem) for pi, pd in res][:results] - - def get_company(self, companyID, info=Company.Company.default_info, - modFunct=None): - """Return a Company object for the given companyID. - - The companyID is something used to univocally identify a company; - it can be the imdbID used by the IMDb web server, a file - pointer, a line number in a file, an ID in a database, etc. - - info is the list of sets of information to retrieve. - - If specified, modFunct will be the function used by the Company - object when accessing its text fields (none, so far).""" - companyID = self._normalize_companyID(companyID) - companyID = self._get_real_companyID(companyID) - company = Company.Company(companyID=companyID, - accessSystem=self.accessSystem) - modFunct = modFunct or self._defModFunct - if modFunct is not None: - company.set_mod_funct(modFunct) - self.update(company, info) - return company - - def _search_company(self, name, results): - """Return a list of tuples (companyID, {companyData})""" - # XXX: for the real implementation, see the method of the - # subclass, somewhere under the imdb.parser package. - raise NotImplementedError, 'override this method' - - def search_company(self, name, results=None): - """Return a list of Company objects for a query for the given name. - - The results argument is the maximum number of results to return.""" - if results is None: - results = self._results - try: - results = int(results) - except (ValueError, OverflowError): - results = 20 - if not isinstance(name, unicode): - name = unicode(name, encoding, 'replace') - res = self._search_company(name, results) - return [Company.Company(companyID=self._get_real_companyID(pi), - data=pd, modFunct=self._defModFunct, - accessSystem=self.accessSystem) for pi, pd in res][:results] - - def _search_keyword(self, keyword, results): - """Return a list of 'keyword' strings.""" - # XXX: for the real implementation, see the method of the - # subclass, somewhere under the imdb.parser package. - raise NotImplementedError, 'override this method' - - def search_keyword(self, keyword, results=None): - """Search for existing keywords, similar to the given one.""" - if results is None: - results = self._keywordsResults - try: - results = int(results) - except (ValueError, OverflowError): - results = 100 - if not isinstance(keyword, unicode): - keyword = unicode(keyword, encoding, 'replace') - return self._search_keyword(keyword, results) - - def _get_keyword(self, keyword, results): - """Return a list of tuples (movieID, {movieData})""" - # XXX: for the real implementation, see the method of the - # subclass, somewhere under the imdb.parser package. - raise NotImplementedError, 'override this method' - - def get_keyword(self, keyword, results=None): - """Return a list of movies for the given keyword.""" - if results is None: - results = self._keywordsResults - try: - results = int(results) - except (ValueError, OverflowError): - results = 100 - # XXX: I suppose it will be much safer if the user provides - # an unicode string... this is just a guess. - if not isinstance(keyword, unicode): - keyword = unicode(keyword, encoding, 'replace') - res = self._get_keyword(keyword, results) - return [Movie.Movie(movieID=self._get_real_movieID(mi), - data=md, modFunct=self._defModFunct, - accessSystem=self.accessSystem) for mi, md in res][:results] - - def _get_top_bottom_movies(self, kind): - """Return the list of the top 250 or bottom 100 movies.""" - # XXX: for the real implementation, see the method of the - # subclass, somewhere under the imdb.parser package. - # This method must return a list of (movieID, {movieDict}) - # tuples. The kind parameter can be 'top' or 'bottom'. - raise NotImplementedError, 'override this method' - - def get_top250_movies(self): - """Return the list of the top 250 movies.""" - res = self._get_top_bottom_movies('top') - return [Movie.Movie(movieID=self._get_real_movieID(mi), - data=md, modFunct=self._defModFunct, - accessSystem=self.accessSystem) for mi, md in res] - - def get_bottom100_movies(self): - """Return the list of the bottom 100 movies.""" - res = self._get_top_bottom_movies('bottom') - return [Movie.Movie(movieID=self._get_real_movieID(mi), - data=md, modFunct=self._defModFunct, - accessSystem=self.accessSystem) for mi, md in res] - - def new_movie(self, *arguments, **keywords): - """Return a Movie object.""" - # XXX: not really useful... - if 'title' in keywords: - if not isinstance(keywords['title'], unicode): - keywords['title'] = unicode(keywords['title'], - encoding, 'replace') - elif len(arguments) > 1: - if not isinstance(arguments[1], unicode): - arguments[1] = unicode(arguments[1], encoding, 'replace') - return Movie.Movie(accessSystem=self.accessSystem, - *arguments, **keywords) - - def new_person(self, *arguments, **keywords): - """Return a Person object.""" - # XXX: not really useful... - if 'name' in keywords: - if not isinstance(keywords['name'], unicode): - keywords['name'] = unicode(keywords['name'], - encoding, 'replace') - elif len(arguments) > 1: - if not isinstance(arguments[1], unicode): - arguments[1] = unicode(arguments[1], encoding, 'replace') - return Person.Person(accessSystem=self.accessSystem, - *arguments, **keywords) - - def new_character(self, *arguments, **keywords): - """Return a Character object.""" - # XXX: not really useful... - if 'name' in keywords: - if not isinstance(keywords['name'], unicode): - keywords['name'] = unicode(keywords['name'], - encoding, 'replace') - elif len(arguments) > 1: - if not isinstance(arguments[1], unicode): - arguments[1] = unicode(arguments[1], encoding, 'replace') - return Character.Character(accessSystem=self.accessSystem, - *arguments, **keywords) - - def new_company(self, *arguments, **keywords): - """Return a Company object.""" - # XXX: not really useful... - if 'name' in keywords: - if not isinstance(keywords['name'], unicode): - keywords['name'] = unicode(keywords['name'], - encoding, 'replace') - elif len(arguments) > 1: - if not isinstance(arguments[1], unicode): - arguments[1] = unicode(arguments[1], encoding, 'replace') - return Company.Company(accessSystem=self.accessSystem, - *arguments, **keywords) - - def update(self, mop, info=None, override=0): - """Given a Movie, Person, Character or Company object with only - partial information, retrieve the required set of information. - - info is the list of sets of information to retrieve. - - If override is set, the information are retrieved and updated - even if they're already in the object.""" - # XXX: should this be a method of the Movie/Person/Character/Company - # classes? NO! What for instances created by external functions? - mopID = None - prefix = '' - if isinstance(mop, Movie.Movie): - mopID = mop.movieID - prefix = 'movie' - elif isinstance(mop, Person.Person): - mopID = mop.personID - prefix = 'person' - elif isinstance(mop, Character.Character): - mopID = mop.characterID - prefix = 'character' - elif isinstance(mop, Company.Company): - mopID = mop.companyID - prefix = 'company' - else: - raise IMDbError, 'object ' + repr(mop) + \ - ' is not a Movie, Person, Character or Company instance' - if mopID is None: - # XXX: enough? It's obvious that there are Characters - # objects without characterID, so I think they should - # just do nothing, when an i.update(character) is tried. - if prefix == 'character': - return - raise IMDbDataAccessError, \ - 'the supplied object has null movieID, personID or companyID' - if mop.accessSystem == self.accessSystem: - aSystem = self - else: - aSystem = IMDb(mop.accessSystem) - if info is None: - info = mop.default_info - elif info == 'all': - if isinstance(mop, Movie.Movie): - info = self.get_movie_infoset() - elif isinstance(mop, Person.Person): - info = self.get_person_infoset() - elif isinstance(mop, Character.Character): - info = self.get_character_infoset() - else: - info = self.get_company_infoset() - if not isinstance(info, (tuple, list)): - info = (info,) - res = {} - for i in info: - if i in mop.current_info and not override: - continue - if not i: - continue - self._imdb_logger.debug('retrieving "%s" info set', i) - try: - method = getattr(aSystem, 'get_%s_%s' % - (prefix, i.replace(' ', '_'))) - except AttributeError: - self._imdb_logger.error('unknown information set "%s"', i) - # Keeps going. - method = lambda *x: {} - try: - ret = method(mopID) - except Exception, e: - self._imdb_logger.critical('caught an exception retrieving ' \ - 'or parsing "%s" info set for mopID ' \ - '"%s" (accessSystem: %s)', - i, mopID, mop.accessSystem, exc_info=True) - ret = {} - keys = None - if 'data' in ret: - res.update(ret['data']) - if isinstance(ret['data'], dict): - keys = ret['data'].keys() - if 'info sets' in ret: - for ri in ret['info sets']: - mop.add_to_current_info(ri, keys, mainInfoset=i) - else: - mop.add_to_current_info(i, keys) - if 'titlesRefs' in ret: - mop.update_titlesRefs(ret['titlesRefs']) - if 'namesRefs' in ret: - mop.update_namesRefs(ret['namesRefs']) - if 'charactersRefs' in ret: - mop.update_charactersRefs(ret['charactersRefs']) - mop.set_data(res, override=0) - - def get_imdbMovieID(self, movieID): - """Translate a movieID in an imdbID (the ID used by the IMDb - web server); must be overridden by the subclass.""" - # XXX: for the real implementation, see the method of the - # subclass, somewhere under the imdb.parser package. - raise NotImplementedError, 'override this method' - - def get_imdbPersonID(self, personID): - """Translate a personID in a imdbID (the ID used by the IMDb - web server); must be overridden by the subclass.""" - # XXX: for the real implementation, see the method of the - # subclass, somewhere under the imdb.parser package. - raise NotImplementedError, 'override this method' - - def get_imdbCharacterID(self, characterID): - """Translate a characterID in a imdbID (the ID used by the IMDb - web server); must be overridden by the subclass.""" - # XXX: for the real implementation, see the method of the - # subclass, somewhere under the imdb.parser package. - raise NotImplementedError, 'override this method' - - def get_imdbCompanyID(self, companyID): - """Translate a companyID in a imdbID (the ID used by the IMDb - web server); must be overridden by the subclass.""" - # XXX: for the real implementation, see the method of the - # subclass, somewhere under the imdb.parser package. - raise NotImplementedError, 'override this method' - - def _searchIMDb(self, kind, ton): - """Search the IMDb akas server for the given title or name.""" - # The Exact Primary search system has gone AWOL, so we resort - # to the mobile search. :-/ - if not ton: - return None - aSystem = IMDb('mobile') - if kind == 'tt': - searchFunct = aSystem.search_movie - check = 'long imdb canonical title' - elif kind == 'nm': - searchFunct = aSystem.search_person - check = 'long imdb canonical name' - elif kind == 'char': - searchFunct = aSystem.search_character - check = 'long imdb canonical name' - elif kind == 'co': - # XXX: are [COUNTRY] codes included in the results? - searchFunct = aSystem.search_company - check = 'long imdb name' - try: - searchRes = searchFunct(ton) - except IMDbError: - return None - # When only one result is returned, assume it was from an - # exact match. - if len(searchRes) == 1: - return searchRes[0].getID() - for item in searchRes: - # Return the first perfect match. - if item[check] == ton: - return item.getID() - return None - - def title2imdbID(self, title): - """Translate a movie title (in the plain text data files format) - to an imdbID. - Try an Exact Primary Title search on IMDb; - return None if it's unable to get the imdbID.""" - return self._searchIMDb('tt', title) - - def name2imdbID(self, name): - """Translate a person name in an imdbID. - Try an Exact Primary Name search on IMDb; - return None if it's unable to get the imdbID.""" - return self._searchIMDb('tt', name) - - def character2imdbID(self, name): - """Translate a character name in an imdbID. - Try an Exact Primary Name search on IMDb; - return None if it's unable to get the imdbID.""" - return self._searchIMDb('char', name) - - def company2imdbID(self, name): - """Translate a company name in an imdbID. - Try an Exact Primary Name search on IMDb; - return None if it's unable to get the imdbID.""" - return self._searchIMDb('co', name) - - def get_imdbID(self, mop): - """Return the imdbID for the given Movie, Person, Character or Company - object.""" - imdbID = None - if mop.accessSystem == self.accessSystem: - aSystem = self - else: - aSystem = IMDb(mop.accessSystem) - if isinstance(mop, Movie.Movie): - if mop.movieID is not None: - imdbID = aSystem.get_imdbMovieID(mop.movieID) - else: - imdbID = aSystem.title2imdbID(build_title(mop, canonical=0, - ptdf=1)) - elif isinstance(mop, Person.Person): - if mop.personID is not None: - imdbID = aSystem.get_imdbPersonID(mop.personID) - else: - imdbID = aSystem.name2imdbID(build_name(mop, canonical=1)) - elif isinstance(mop, Character.Character): - if mop.characterID is not None: - imdbID = aSystem.get_imdbCharacterID(mop.characterID) - else: - # canonical=0 ? - imdbID = aSystem.character2imdbID(build_name(mop, canonical=1)) - elif isinstance(mop, Company.Company): - if mop.companyID is not None: - imdbID = aSystem.get_imdbCompanyID(mop.companyID) - else: - imdbID = aSystem.company2imdbID(build_company_name(mop)) - else: - raise IMDbError, 'object ' + repr(mop) + \ - ' is not a Movie, Person or Character instance' - return imdbID - - def get_imdbURL(self, mop): - """Return the main IMDb URL for the given Movie, Person, - Character or Company object, or None if unable to get it.""" - imdbID = self.get_imdbID(mop) - if imdbID is None: - return None - if isinstance(mop, Movie.Movie): - url_firstPart = imdbURL_movie_main - elif isinstance(mop, Person.Person): - url_firstPart = imdbURL_person_main - elif isinstance(mop, Character.Character): - url_firstPart = imdbURL_character_main - elif isinstance(mop, Company.Company): - url_firstPart = imdbURL_company_main - else: - raise IMDbError, 'object ' + repr(mop) + \ - ' is not a Movie, Person, Character or Company instance' - return url_firstPart % imdbID - - def get_special_methods(self): - """Return the special methods defined by the subclass.""" - sm_dict = {} - base_methods = [] - for name in dir(IMDbBase): - member = getattr(IMDbBase, name) - if isinstance(member, MethodType): - base_methods.append(name) - for name in dir(self.__class__): - if name.startswith('_') or name in base_methods or \ - name.startswith('get_movie_') or \ - name.startswith('get_person_') or \ - name.startswith('get_company_') or \ - name.startswith('get_character_'): - continue - member = getattr(self.__class__, name) - if isinstance(member, MethodType): - sm_dict.update({name: member.__doc__}) - return sm_dict - diff --git a/libs/imdb/_compat.py b/libs/imdb/_compat.py deleted file mode 100644 index 73a4dd1b..00000000 --- a/libs/imdb/_compat.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -_compat module (imdb package). - -This module provides compatibility functions used by the imdb package -to deal with unusual environments. - -Copyright 2008-2010 Davide Alberani - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -# TODO: now we're heavily using the 'logging' module, which was not -# present in Python 2.2. To work in a Symbian environment, we -# need to create a fake 'logging' module (its functions may call -# the 'warnings' module, or do nothing at all). - - -import os -# If true, we're working on a Symbian device. -if os.name == 'e32': - # Replace os.path.expandvars and os.path.expanduser, if needed. - def _noact(x): - """Ad-hoc replacement for IMDbPY.""" - return x - try: - os.path.expandvars - except AttributeError: - os.path.expandvars = _noact - try: - os.path.expanduser - except AttributeError: - os.path.expanduser = _noact - - # time.strptime is missing, on Symbian devices. - import time - try: - time.strptime - except AttributeError: - import re - _re_web_time = re.compile(r'Episode dated (\d+) (\w+) (\d+)') - _re_ptdf_time = re.compile(r'\((\d+)-(\d+)-(\d+)\)') - _month2digit = {'January': '1', 'February': '2', 'March': '3', - 'April': '4', 'May': '5', 'June': '6', 'July': '7', - 'August': '8', 'September': '9', 'October': '10', - 'November': '11', 'December': '12'} - def strptime(s, format): - """Ad-hoc strptime replacement for IMDbPY.""" - try: - if format.startswith('Episode'): - res = _re_web_time.findall(s)[0] - return (int(res[2]), int(_month2digit[res[1]]), int(res[0]), - 0, 0, 0, 0, 1, 0) - else: - res = _re_ptdf_time.findall(s)[0] - return (int(res[0]), int(res[1]), int(res[2]), - 0, 0, 0, 0, 1, 0) - except: - raise ValueError, u'error in IMDbPY\'s ad-hoc strptime!' - time.strptime = strptime - diff --git a/libs/imdb/_exceptions.py b/libs/imdb/_exceptions.py deleted file mode 100644 index 436d01a7..00000000 --- a/libs/imdb/_exceptions.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -_exceptions module (imdb package). - -This module provides the exception hierarchy used by the imdb package. - -Copyright 2004-2009 Davide Alberani - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -import logging - - -class IMDbError(Exception, object): - """Base class for every exception raised by the imdb package.""" - _logger = logging.getLogger('imdbpy') - - def __init__(self, *args, **kwargs): - """Initialize the exception and pass the message to the log system.""" - # Every raised exception also dispatch a critical log. - self._logger.critical('%s exception raised; args: %s; kwds: %s', - self.__class__.__name__, args, kwargs, - exc_info=True) - super(IMDbError, self).__init__(*args, **kwargs) - -class IMDbDataAccessError(IMDbError): - """Exception raised when is not possible to access needed data.""" - pass - -class IMDbParserError(IMDbError): - """Exception raised when an error occurred parsing the data.""" - pass - - diff --git a/libs/imdb/_logging.py b/libs/imdb/_logging.py deleted file mode 100644 index 2b8a286a..00000000 --- a/libs/imdb/_logging.py +++ /dev/null @@ -1,63 +0,0 @@ -""" -_logging module (imdb package). - -This module provides the logging facilities used by the imdb package. - -Copyright 2009-2010 Davide Alberani - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -import logging - -LEVELS = {'debug': logging.DEBUG, - 'info': logging.INFO, - 'warn': logging.WARNING, - 'warning': logging.WARNING, - 'error': logging.ERROR, - 'critical': logging.CRITICAL} - - -imdbpyLogger = logging.getLogger('imdbpy') -imdbpyStreamHandler = logging.StreamHandler() -imdbpyFormatter = logging.Formatter('%(asctime)s %(levelname)s [%(name)s]' \ - ' %(pathname)s:%(lineno)d: %(message)s') -imdbpyStreamHandler.setFormatter(imdbpyFormatter) -imdbpyLogger.addHandler(imdbpyStreamHandler) - -def setLevel(level): - """Set logging level for the main logger.""" - level = level.lower().strip() - imdbpyLogger.setLevel(LEVELS.get(level, logging.NOTSET)) - imdbpyLogger.log(imdbpyLogger.level, 'set logging threshold to "%s"', - logging.getLevelName(imdbpyLogger.level)) - - -#imdbpyLogger.setLevel(logging.DEBUG) - - -# It can be an idea to have a single function to log and warn: -#import warnings -#def log_and_warn(msg, args=None, logger=None, level=None): -# """Log the message and issue a warning.""" -# if logger is None: -# logger = imdbpyLogger -# if level is None: -# level = logging.WARNING -# if args is None: -# args = () -# #warnings.warn(msg % args, stacklevel=0) -# logger.log(level, msg % args) - diff --git a/libs/imdb/articles.py b/libs/imdb/articles.py deleted file mode 100644 index 73ac6901..00000000 --- a/libs/imdb/articles.py +++ /dev/null @@ -1,142 +0,0 @@ -""" -articles module (imdb package). - -This module provides functions and data to handle in a smart way -articles (in various languages) at the beginning of movie titles. - -Copyright 2009 Davide Alberani - 2009 H. Turgut Uyar - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -# List of generic articles used when the language of the title is unknown (or -# we don't have information about articles in that language). -# XXX: Managing titles in a lot of different languages, a function to recognize -# an initial article can't be perfect; sometimes we'll stumble upon a short -# word that is an article in some language, but it's not in another; in these -# situations we have to choose if we want to interpret this little word -# as an article or not (remember that we don't know what the original language -# of the title was). -# Example: 'en' is (I suppose) an article in Some Language. Unfortunately it -# seems also to be a preposition in other languages (French?). -# Running a script over the whole list of titles (and aliases), I've found -# that 'en' is used as an article only 376 times, and as another thing 594 -# times, so I've decided to _always_ consider 'en' as a non article. -# -# Here is a list of words that are _never_ considered as articles, complete -# with the cound of times they are used in a way or another: -# 'en' (376 vs 594), 'to' (399 vs 727), 'as' (198 vs 276), 'et' (79 vs 99), -# 'des' (75 vs 150), 'al' (78 vs 304), 'ye' (14 vs 70), -# 'da' (23 vs 298), "'n" (8 vs 12) -# -# I've left in the list 'i' (1939 vs 2151) and 'uno' (52 vs 56) -# I'm not sure what '-al' is, and so I've left it out... -# -# Generic list of articles in utf-8 encoding: -GENERIC_ARTICLES = ('the', 'la', 'a', 'die', 'der', 'le', 'el', - "l'", 'il', 'das', 'les', 'i', 'o', 'ein', 'un', 'de', 'los', - 'an', 'una', 'las', 'eine', 'den', 'het', 'gli', 'lo', 'os', - 'ang', 'oi', 'az', 'een', 'ha-', 'det', 'ta', 'al-', - 'mga', "un'", 'uno', 'ett', 'dem', 'egy', 'els', 'eines', - '\xc3\x8f', '\xc3\x87', '\xc3\x94\xc3\xaf', '\xc3\x8f\xc3\xa9') - - -# Lists of articles separated by language. If possible, the list should -# be sorted by frequency (not very important, but...) -# If you want to add a list of articles for another language, mail it -# it at imdbpy-devel@lists.sourceforge.net; non-ascii articles must be utf-8 -# encoded. -LANG_ARTICLES = { - 'English': ('the', 'a', 'an'), - 'Italian': ('la', 'le', "l'", 'il', 'i', 'un', 'una', 'gli', 'lo', "un'", - 'uno'), - 'Spanish': ('la', 'le', 'el', 'les', 'un', 'los', 'una', 'uno', 'unos', - 'unas'), - 'Portuguese': ('a', 'as', 'o', 'os', 'um', 'uns', 'uma', 'umas'), - 'Turkish': (), # Some languages doesn't have articles. -} -LANG_ARTICLESget = LANG_ARTICLES.get - - -# Maps a language to countries where it is the main language. -# If you want to add an entry for another language or country, mail it at -# imdbpy-devel@lists.sourceforge.net . -_LANG_COUNTRIES = { - 'English': ('USA', 'UK', 'Canada', 'Ireland', 'Australia'), - 'Italian': ('Italy',), - 'Spanish': ('Spain', 'Mexico'), - 'Portuguese': ('Portugal', 'Brazil'), - 'Turkish': ('Turkey',), - #'German': ('Germany', 'East Germany', 'West Germany'), - #'French': ('France'), -} - -# Maps countries to their main language. -COUNTRY_LANG = {} -for lang in _LANG_COUNTRIES: - for country in _LANG_COUNTRIES[lang]: - COUNTRY_LANG[country] = lang - - -def toUnicode(articles): - """Convert a list of articles utf-8 encoded to unicode strings.""" - return tuple([art.decode('utf_8') for art in articles]) - - -def toDicts(articles): - """Given a list of utf-8 encoded articles, build two dictionary (one - utf-8 encoded and another one with unicode keys) for faster matches.""" - uArticles = toUnicode(articles) - return dict([(x, x) for x in articles]), dict([(x, x) for x in uArticles]) - - -def addTrailingSpace(articles): - """From the given list of utf-8 encoded articles, return two - lists (one utf-8 encoded and another one in unicode) where a space - is added at the end - if the last char is not ' or -.""" - _spArticles = [] - _spUnicodeArticles = [] - for article in articles: - if article[-1] not in ("'", '-'): - article += ' ' - _spArticles.append(article) - _spUnicodeArticles.append(article.decode('utf_8')) - return _spArticles, _spUnicodeArticles - - -# Caches. -_ART_CACHE = {} -_SP_ART_CACHE = {} - -def articlesDictsForLang(lang): - """Return dictionaries of articles specific for the given language, or the - default one if the language is not known.""" - if lang in _ART_CACHE: - return _ART_CACHE[lang] - artDicts = toDicts(LANG_ARTICLESget(lang, GENERIC_ARTICLES)) - _ART_CACHE[lang] = artDicts - return artDicts - - -def spArticlesForLang(lang): - """Return lists of articles (plus optional spaces) specific for the - given language, or the default one if the language is not known.""" - if lang in _SP_ART_CACHE: - return _SP_ART_CACHE[lang] - spArticles = addTrailingSpace(LANG_ARTICLESget(lang, GENERIC_ARTICLES)) - _SP_ART_CACHE[lang] = spArticles - return spArticles - diff --git a/libs/imdb/helpers.py b/libs/imdb/helpers.py deleted file mode 100644 index 2ca53068..00000000 --- a/libs/imdb/helpers.py +++ /dev/null @@ -1,548 +0,0 @@ -""" -helpers module (imdb package). - -This module provides functions not used directly by the imdb package, -but useful for IMDbPY-based programs. - -Copyright 2006-2010 Davide Alberani - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -# XXX: find better names for the functions in this modules. - -import re -from cgi import escape -import gettext -from gettext import gettext as _ -gettext.textdomain('imdbpy') - -# The modClearRefs can be used to strip names and titles references from -# the strings in Movie and Person objects. -from imdb.utils import modClearRefs, re_titleRef, re_nameRef, \ - re_characterRef, _tagAttr, _Container, TAGS_TO_MODIFY -from imdb import IMDb, imdbURL_movie_base, imdbURL_person_base, \ - imdbURL_character_base -import imdb.locale -from imdb.Movie import Movie -from imdb.Person import Person -from imdb.Character import Character -from imdb.Company import Company -from imdb.parser.http.utils import re_entcharrefssub, entcharrefs, \ - subXMLRefs, subSGMLRefs -from imdb.parser.http.bsouplxml.etree import BeautifulSoup - - -# An URL, more or less. -_re_href = re.compile(r'(http://.+?)(?=\s|$)', re.I) -_re_hrefsub = _re_href.sub - - -def makeCgiPrintEncoding(encoding): - """Make a function to pretty-print strings for the web.""" - def cgiPrint(s): - """Encode the given string using the %s encoding, and replace - chars outside the given charset with XML char references.""" % encoding - s = escape(s, quote=1) - if isinstance(s, unicode): - s = s.encode(encoding, 'xmlcharrefreplace') - return s - return cgiPrint - -# cgiPrint uses the latin_1 encoding. -cgiPrint = makeCgiPrintEncoding('latin_1') - -# Regular expression for %(varname)s substitutions. -re_subst = re.compile(r'%\((.+?)\)s') -# Regular expression for .... clauses. -re_conditional = re.compile(r'(.+?)') - - -def makeTextNotes(replaceTxtNotes): - """Create a function useful to handle text[::optional_note] values. - replaceTxtNotes is a format string, which can include the following - values: %(text)s and %(notes)s. - Portions of the text can be conditionally excluded, if one of the - values is absent. E.g.: [%(notes)s] will be replaced - with '[notes]' if notes exists, or by an empty string otherwise. - The returned function is suitable be passed as applyToValues argument - of the makeObject2Txt function.""" - def _replacer(s): - outS = replaceTxtNotes - if not isinstance(s, (unicode, str)): - return s - ssplit = s.split('::', 1) - text = ssplit[0] - # Used to keep track of text and note existence. - keysDict = {} - if text: - keysDict['text'] = True - outS = outS.replace('%(text)s', text) - if len(ssplit) == 2: - keysDict['notes'] = True - outS = outS.replace('%(notes)s', ssplit[1]) - else: - outS = outS.replace('%(notes)s', u'') - def _excludeFalseConditionals(matchobj): - # Return an empty string if the conditional is false/empty. - if matchobj.group(1) in keysDict: - return matchobj.group(2) - return u'' - while re_conditional.search(outS): - outS = re_conditional.sub(_excludeFalseConditionals, outS) - return outS - return _replacer - - -def makeObject2Txt(movieTxt=None, personTxt=None, characterTxt=None, - companyTxt=None, joiner=' / ', - applyToValues=lambda x: x, _recurse=True): - """"Return a function useful to pretty-print Movie, Person, - Character and Company instances. - - *movieTxt* -- how to format a Movie object. - *personTxt* -- how to format a Person object. - *characterTxt* -- how to format a Character object. - *companyTxt* -- how to format a Company object. - *joiner* -- string used to join a list of objects. - *applyToValues* -- function to apply to values. - *_recurse* -- if True (default) manage only the given object. - """ - # Some useful defaults. - if movieTxt is None: - movieTxt = '%(long imdb title)s' - if personTxt is None: - personTxt = '%(long imdb name)s' - if characterTxt is None: - characterTxt = '%(long imdb name)s' - if companyTxt is None: - companyTxt = '%(long imdb name)s' - def object2txt(obj, _limitRecursion=None): - """Pretty-print objects.""" - # Prevent unlimited recursion. - if _limitRecursion is None: - _limitRecursion = 0 - elif _limitRecursion > 5: - return u'' - _limitRecursion += 1 - if isinstance(obj, (list, tuple)): - return joiner.join([object2txt(o, _limitRecursion=_limitRecursion) - for o in obj]) - elif isinstance(obj, dict): - # XXX: not exactly nice, neither useful, I fear. - return joiner.join([u'%s::%s' % - (object2txt(k, _limitRecursion=_limitRecursion), - object2txt(v, _limitRecursion=_limitRecursion)) - for k, v in obj.items()]) - objData = {} - if isinstance(obj, Movie): - objData['movieID'] = obj.movieID - outs = movieTxt - elif isinstance(obj, Person): - objData['personID'] = obj.personID - outs = personTxt - elif isinstance(obj, Character): - objData['characterID'] = obj.characterID - outs = characterTxt - elif isinstance(obj, Company): - objData['companyID'] = obj.companyID - outs = companyTxt - else: - return obj - def _excludeFalseConditionals(matchobj): - # Return an empty string if the conditional is false/empty. - condition = matchobj.group(1) - proceed = obj.get(condition) or getattr(obj, condition, None) - if proceed: - return matchobj.group(2) - else: - return u'' - return matchobj.group(2) - while re_conditional.search(outs): - outs = re_conditional.sub(_excludeFalseConditionals, outs) - for key in re_subst.findall(outs): - value = obj.get(key) or getattr(obj, key, None) - if not isinstance(value, (unicode, str)): - if not _recurse: - if value: - value = unicode(value) - if value: - value = object2txt(value, _limitRecursion=_limitRecursion) - elif value: - value = applyToValues(unicode(value)) - if not value: - value = u'' - elif not isinstance(value, (unicode, str)): - value = unicode(value) - outs = outs.replace(u'%(' + key + u')s', value) - return outs - return object2txt - - -def makeModCGILinks(movieTxt, personTxt, characterTxt=None, - encoding='latin_1'): - """Make a function used to pretty-print movies and persons refereces; - movieTxt and personTxt are the strings used for the substitutions. - movieTxt must contains %(movieID)s and %(title)s, while personTxt - must contains %(personID)s and %(name)s and characterTxt %(characterID)s - and %(name)s; characterTxt is optional, for backward compatibility.""" - _cgiPrint = makeCgiPrintEncoding(encoding) - def modCGILinks(s, titlesRefs, namesRefs, characterRefs=None): - """Substitute movies and persons references.""" - if characterRefs is None: characterRefs = {} - # XXX: look ma'... more nested scopes! - def _replaceMovie(match): - to_replace = match.group(1) - item = titlesRefs.get(to_replace) - if item: - movieID = item.movieID - to_replace = movieTxt % {'movieID': movieID, - 'title': unicode(_cgiPrint(to_replace), - encoding, - 'xmlcharrefreplace')} - return to_replace - def _replacePerson(match): - to_replace = match.group(1) - item = namesRefs.get(to_replace) - if item: - personID = item.personID - to_replace = personTxt % {'personID': personID, - 'name': unicode(_cgiPrint(to_replace), - encoding, - 'xmlcharrefreplace')} - return to_replace - def _replaceCharacter(match): - to_replace = match.group(1) - if characterTxt is None: - return to_replace - item = characterRefs.get(to_replace) - if item: - characterID = item.characterID - if characterID is None: - return to_replace - to_replace = characterTxt % {'characterID': characterID, - 'name': unicode(_cgiPrint(to_replace), - encoding, - 'xmlcharrefreplace')} - return to_replace - s = s.replace('<', '<').replace('>', '>') - s = _re_hrefsub(r'\1', s) - s = re_titleRef.sub(_replaceMovie, s) - s = re_nameRef.sub(_replacePerson, s) - s = re_characterRef.sub(_replaceCharacter, s) - return s - modCGILinks.movieTxt = movieTxt - modCGILinks.personTxt = personTxt - modCGILinks.characterTxt = characterTxt - return modCGILinks - -# links to the imdb.com web site. -_movieTxt = '%(title)s' -_personTxt = '%(name)s' -_characterTxt = '%(name)s' -modHtmlLinks = makeModCGILinks(movieTxt=_movieTxt, personTxt=_personTxt, - characterTxt=_characterTxt) -modHtmlLinksASCII = makeModCGILinks(movieTxt=_movieTxt, personTxt=_personTxt, - characterTxt=_characterTxt, - encoding='ascii') - - -everyentcharrefs = entcharrefs.copy() -for k, v in {'lt':u'<','gt':u'>','amp':u'&','quot':u'"','apos':u'\''}.items(): - everyentcharrefs[k] = v - everyentcharrefs['#%s' % ord(v)] = v -everyentcharrefsget = everyentcharrefs.get -re_everyentcharrefs = re.compile('&(%s|\#160|\#\d{1,5});' % - '|'.join(map(re.escape, everyentcharrefs))) -re_everyentcharrefssub = re_everyentcharrefs.sub - -def _replAllXMLRef(match): - """Replace the matched XML reference.""" - ref = match.group(1) - value = everyentcharrefsget(ref) - if value is None: - if ref[0] == '#': - return unichr(int(ref[1:])) - else: - return ref - return value - -def subXMLHTMLSGMLRefs(s): - """Return the given string with XML/HTML/SGML entity and char references - replaced.""" - return re_everyentcharrefssub(_replAllXMLRef, s) - - -def sortedSeasons(m): - """Return a sorted list of seasons of the given series.""" - seasons = m.get('episodes', {}).keys() - seasons.sort() - return seasons - - -def sortedEpisodes(m, season=None): - """Return a sorted list of episodes of the given series, - considering only the specified season(s) (every season, if None).""" - episodes = [] - seasons = season - if season is None: - seasons = sortedSeasons(m) - else: - if not isinstance(season, (tuple, list)): - seasons = [season] - for s in seasons: - eps_indx = m.get('episodes', {}).get(s, {}).keys() - eps_indx.sort() - for e in eps_indx: - episodes.append(m['episodes'][s][e]) - return episodes - - -# Idea and portions of the code courtesy of none none (dclist at gmail.com) -_re_imdbIDurl = re.compile(r'\b(nm|tt|ch|co)([0-9]{7})\b') -def get_byURL(url, info=None, args=None, kwds=None): - """Return a Movie, Person, Character or Company object for the given URL; - info is the info set to retrieve, args and kwds are respectively a list - and a dictionary or arguments to initialize the data access system. - Returns None if unable to correctly parse the url; can raise - exceptions if unable to retrieve the data.""" - if args is None: args = [] - if kwds is None: kwds = {} - ia = IMDb(*args, **kwds) - match = _re_imdbIDurl.search(url) - if not match: - return None - imdbtype = match.group(1) - imdbID = match.group(2) - if imdbtype == 'tt': - return ia.get_movie(imdbID, info=info) - elif imdbtype == 'nm': - return ia.get_person(imdbID, info=info) - elif imdbtype == 'ch': - return ia.get_character(imdbID, info=info) - elif imdbtype == 'co': - return ia.get_company(imdbID, info=info) - return None - - -# Idea and portions of code courtesy of Basil Shubin. -# Beware that these information are now available directly by -# the Movie/Person/Character instances. -def fullSizeCoverURL(obj): - """Given an URL string or a Movie, Person or Character instance, - returns an URL to the full-size version of the cover/headshot, - or None otherwise. This function is obsolete: the same information - are available as keys: 'full-size cover url' and 'full-size headshot', - respectively for movies and persons/characters.""" - if isinstance(obj, Movie): - coverUrl = obj.get('cover url') - elif isinstance(obj, (Person, Character)): - coverUrl = obj.get('headshot') - else: - coverUrl = obj - if not coverUrl: - return None - return _Container._re_fullsizeURL.sub('', coverUrl) - - -def keyToXML(key): - """Return a key (the ones used to access information in Movie and - other classes instances) converted to the style of the XML output.""" - return _tagAttr(key, '')[0] - - -def translateKey(key): - """Translate a given key.""" - return _(keyToXML(key)) - - -# Maps tags to classes. -_MAP_TOP_OBJ = { - 'person': Person, - 'movie': Movie, - 'character': Character, - 'company': Company -} - -# Tags to be converted to lists. -_TAGS_TO_LIST = dict([(x[0], None) for x in TAGS_TO_MODIFY.values()]) -_TAGS_TO_LIST.update(_MAP_TOP_OBJ) - -def tagToKey(tag): - """Return the name of the tag, taking it from the 'key' attribute, - if present.""" - keyAttr = tag.get('key') - if keyAttr: - if tag.get('keytype') == 'int': - keyAttr = int(keyAttr) - return keyAttr - return tag.name - - -def _valueWithType(tag, tagValue): - """Return tagValue, handling some type conversions.""" - tagType = tag.get('type') - if tagType == 'int': - tagValue = int(tagValue) - elif tagType == 'float': - tagValue = float(tagValue) - return tagValue - - -# Extra tags to get (if values were not already read from title/name). -_titleTags = ('imdbindex', 'kind', 'year') -_nameTags = ('imdbindex') -_companyTags = ('imdbindex', 'country') - -def parseTags(tag, _topLevel=True, _as=None, _infoset2keys=None, - _key2infoset=None): - """Recursively parse a tree of tags.""" - # The returned object (usually a _Container subclass, but it can - # be a string, an int, a float, a list or a dictionary). - item = None - if _infoset2keys is None: - _infoset2keys = {} - if _key2infoset is None: - _key2infoset = {} - name = tagToKey(tag) - firstChild = tag.find(recursive=False) - tagStr = (tag.string or u'').strip() - if not tagStr and name == 'item': - # Handles 'item' tags containing text and a 'notes' sub-tag. - tagContent = tag.contents[0] - if isinstance(tagContent, BeautifulSoup.NavigableString): - tagStr = (unicode(tagContent) or u'').strip() - tagType = tag.get('type') - infoset = tag.get('infoset') - if infoset: - _key2infoset[name] = infoset - _infoset2keys.setdefault(infoset, []).append(name) - # Here we use tag.name to avoid tags like - if tag.name in _MAP_TOP_OBJ: - # One of the subclasses of _Container. - item = _MAP_TOP_OBJ[name]() - itemAs = tag.get('access-system') - if itemAs: - if not _as: - _as = itemAs - else: - itemAs = _as - item.accessSystem = itemAs - tagsToGet = [] - theID = tag.get('id') - if name == 'movie': - item.movieID = theID - tagsToGet = _titleTags - theTitle = tag.find('title', recursive=False) - if tag.title: - item.set_title(tag.title.string) - tag.title.extract() - else: - if name == 'person': - item.personID = theID - tagsToGet = _nameTags - theName = tag.find('long imdb canonical name', recursive=False) - if not theName: - theName = tag.find('name', recursive=False) - elif name == 'character': - item.characterID = theID - tagsToGet = _nameTags - theName = tag.find('name', recursive=False) - elif name == 'company': - item.companyID = theID - tagsToGet = _companyTags - theName = tag.find('name', recursive=False) - if theName: - item.set_name(theName.string) - if theName: - theName.extract() - for t in tagsToGet: - if t in item.data: - continue - dataTag = tag.find(t, recursive=False) - if dataTag: - item.data[tagToKey(dataTag)] = _valueWithType(dataTag, - dataTag.string) - if tag.notes: - item.notes = tag.notes.string - tag.notes.extract() - episodeOf = tag.find('episode-of', recursive=False) - if episodeOf: - item.data['episode of'] = parseTags(episodeOf, _topLevel=False, - _as=_as, _infoset2keys=_infoset2keys, - _key2infoset=_key2infoset) - episodeOf.extract() - cRole = tag.find('current-role', recursive=False) - if cRole: - cr = parseTags(cRole, _topLevel=False, _as=_as, - _infoset2keys=_infoset2keys, _key2infoset=_key2infoset) - item.currentRole = cr - cRole.extract() - # XXX: big assumption, here. What about Movie instances used - # as keys in dictionaries? What about other keys (season and - # episode number, for example?) - if not _topLevel: - #tag.extract() - return item - _adder = lambda key, value: item.data.update({key: value}) - elif tagStr: - if tag.notes: - notes = (tag.notes.string or u'').strip() - if notes: - tagStr += u'::%s' % notes - else: - tagStr = _valueWithType(tag, tagStr) - return tagStr - elif firstChild: - firstChildName = tagToKey(firstChild) - if firstChildName in _TAGS_TO_LIST: - item = [] - _adder = lambda key, value: item.append(value) - else: - item = {} - _adder = lambda key, value: item.update({key: value}) - else: - item = {} - _adder = lambda key, value: item.update({name: value}) - for subTag in tag(recursive=False): - subTagKey = tagToKey(subTag) - # Exclude dinamically generated keys. - if tag.name in _MAP_TOP_OBJ and subTagKey in item._additional_keys(): - continue - subItem = parseTags(subTag, _topLevel=False, _as=_as, - _infoset2keys=_infoset2keys, _key2infoset=_key2infoset) - if subItem: - _adder(subTagKey, subItem) - if _topLevel and name in _MAP_TOP_OBJ: - # Add information about 'info sets', but only to the top-level object. - item.infoset2keys = _infoset2keys - item.key2infoset = _key2infoset - item.current_info = _infoset2keys.keys() - return item - - -def parseXML(xml): - """Parse a XML string, returning an appropriate object (usually an - instance of a subclass of _Container.""" - xmlObj = BeautifulSoup.BeautifulStoneSoup(xml, - convertEntities=BeautifulSoup.BeautifulStoneSoup.XHTML_ENTITIES) - if xmlObj: - mainTag = xmlObj.find() - if mainTag: - return parseTags(mainTag) - return None - - diff --git a/libs/imdb/locale/__init__.py b/libs/imdb/locale/__init__.py deleted file mode 100644 index 9bc2e466..00000000 --- a/libs/imdb/locale/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -locale package (imdb package). - -This package provides scripts and files for internationalization -of IMDbPY. - -Copyright 2009 H. Turgut Uyar - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -import gettext -import os - -LOCALE_DIR = os.path.dirname(__file__) - -gettext.bindtextdomain('imdbpy', LOCALE_DIR) diff --git a/libs/imdb/locale/generatepot.py b/libs/imdb/locale/generatepot.py deleted file mode 100644 index 282f7d41..00000000 --- a/libs/imdb/locale/generatepot.py +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env python -""" -generatepot.py script. - -This script generates the imdbpy.pot file, from the DTD. - -Copyright 2009 H. Turgut Uyar - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -import re -import sys - -from datetime import datetime as dt - -DEFAULT_MESSAGES = { } - -ELEMENT_PATTERN = r"""\n" -"Language-Team: TEAM NAME \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Language-Code: en\n" -"Language-Name: English\n" -"Preferred-Encodings: utf-8\n" -"Domain: imdbpy\n" -""" - -if len(sys.argv) != 2: - print "Usage: %s dtd_file" % sys.argv[0] - sys.exit() - -dtdfilename = sys.argv[1] -dtd = open(dtdfilename).read() -elements = re_element.findall(dtd) -uniq = set(elements) -elements = list(uniq) - -print POT_HEADER_TEMPLATE % { - 'now': dt.strftime(dt.now(), "%Y-%m-%d %H:%M+0000") -} -for element in sorted(elements): - if element in DEFAULT_MESSAGES: - print '# Default: %s' % DEFAULT_MESSAGES[element] - else: - print '# Default: %s' % element.replace('-', ' ').capitalize() - print 'msgid "%s"' % element - print 'msgstr ""' - # use this part instead of the line above to generate the po file for English - #if element in DEFAULT_MESSAGES: - # print 'msgstr "%s"' % DEFAULT_MESSAGES[element] - #else: - # print 'msgstr "%s"' % element.replace('-', ' ').capitalize() - print - diff --git a/libs/imdb/locale/imdbpy-en.po b/libs/imdb/locale/imdbpy-en.po deleted file mode 100644 index 3b3013c3..00000000 --- a/libs/imdb/locale/imdbpy-en.po +++ /dev/null @@ -1,1257 +0,0 @@ -# Gettext message file for imdbpy -msgid "" -msgstr "" -"Project-Id-Version: imdbpy\n" -"POT-Creation-Date: 2009-04-16 14:27+0000\n" -"PO-Revision-Date: YYYY-MM-DD HH:MM+0000\n" -"Last-Translator: YOUR NAME \n" -"Language-Team: TEAM NAME \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Language-Code: en\n" -"Language-Name: English\n" -"Preferred-Encodings: utf-8\n" -"Domain: imdbpy\n" - -# Default: Actor -msgid "actor" -msgstr "Actor" - -# Default: Actress -msgid "actress" -msgstr "Actress" - -# Default: Adaption -msgid "adaption" -msgstr "Adaption" - -# Default: Additional information -msgid "additional-information" -msgstr "Additional information" - -# Default: Admissions -msgid "admissions" -msgstr "Admissions" - -# Default: Agent address -msgid "agent-address" -msgstr "Agent address" - -# Default: Airing -msgid "airing" -msgstr "Airing" - -# Default: Akas -msgid "akas" -msgstr "Akas" - -# Default: All products -msgid "all-products" -msgstr "All products" - -# Default: Alternate language version of -msgid "alternate-language-version-of" -msgstr "Alternate language version of" - -# Default: Alternate versions -msgid "alternate-versions" -msgstr "Alternate versions" - -# Default: Amazon reviews -msgid "amazon-reviews" -msgstr "Amazon reviews" - -# Default: Analog left -msgid "analog-left" -msgstr "Analog left" - -# Default: Analog right -msgid "analog-right" -msgstr "Analog right" - -# Default: Animation department -msgid "animation-department" -msgstr "Animation department" - -# Default: Archive footage -msgid "archive-footage" -msgstr "Archive footage" - -# Default: Arithmetic mean -msgid "arithmetic-mean" -msgstr "Arithmetic mean" - -# Default: Art department -msgid "art-department" -msgstr "Art department" - -# Default: Art direction -msgid "art-direction" -msgstr "Art direction" - -# Default: Art director -msgid "art-director" -msgstr "Art director" - -# Default: Article -msgid "article" -msgstr "Article" - -# Default: Asin -msgid "asin" -msgstr "Asin" - -# Default: Aspect ratio -msgid "aspect-ratio" -msgstr "Aspect ratio" - -# Default: Assigner -msgid "assigner" -msgstr "Assigner" - -# Default: Assistant director -msgid "assistant-director" -msgstr "Assistant director" - -# Default: Auctions -msgid "auctions" -msgstr "Auctions" - -# Default: Audio noise -msgid "audio-noise" -msgstr "Audio noise" - -# Default: Audio quality -msgid "audio-quality" -msgstr "Audio quality" - -# Default: Award -msgid "award" -msgstr "Award" - -# Default: Awards -msgid "awards" -msgstr "Awards" - -# Default: Biographical movies -msgid "biographical-movies" -msgstr "Biographical movies" - -# Default: Biography -msgid "biography" -msgstr "Biography" - -# Default: Biography print -msgid "biography-print" -msgstr "Biography print" - -# Default: Birth date -msgid "birth-date" -msgstr "Birth date" - -# Default: Birth name -msgid "birth-name" -msgstr "Birth name" - -# Default: Birth notes -msgid "birth-notes" -msgstr "Birth notes" - -# Default: Body -msgid "body" -msgstr "Body" - -# Default: Book -msgid "book" -msgstr "Book" - -# Default: Books -msgid "books" -msgstr "Books" - -# Default: Bottom 100 rank -msgid "bottom-100-rank" -msgstr "Bottom 100 rank" - -# Default: Budget -msgid "budget" -msgstr "Budget" - -# Default: Business -msgid "business" -msgstr "Business" - -# Default: By arrangement with -msgid "by-arrangement-with" -msgstr "By arrangement with" - -# Default: Camera -msgid "camera" -msgstr "Camera" - -# Default: Camera and electrical department -msgid "camera-and-electrical-department" -msgstr "Camera and electrical department" - -# Default: Canonical episode title -msgid "canonical-episode-title" -msgstr "Canonical episode title" - -# Default: Canonical name -msgid "canonical-name" -msgstr "Canonical name" - -# Default: Canonical series title -msgid "canonical-series-title" -msgstr "Canonical series title" - -# Default: Canonical title -msgid "canonical-title" -msgstr "Canonical title" - -# Default: Cast -msgid "cast" -msgstr "Cast" - -# Default: Casting department -msgid "casting-department" -msgstr "Casting department" - -# Default: Casting director -msgid "casting-director" -msgstr "Casting director" - -# Default: Catalog number -msgid "catalog-number" -msgstr "Catalog number" - -# Default: Category -msgid "category" -msgstr "Category" - -# Default: Certificate -msgid "certificate" -msgstr "Certificate" - -# Default: Certificates -msgid "certificates" -msgstr "Certificates" - -# Default: Certification -msgid "certification" -msgstr "Certification" - -# Default: Channel -msgid "channel" -msgstr "Channel" - -# Default: Character -msgid "character" -msgstr "Character" - -# Default: Cinematographer -msgid "cinematographer" -msgstr "Cinematographer" - -# Default: Cinematographic process -msgid "cinematographic-process" -msgstr "Cinematographic process" - -# Default: Close captions teletext ld g -msgid "close-captions-teletext-ld-g" -msgstr "Close captions teletext ld g" - -# Default: Color info -msgid "color-info" -msgstr "Color info" - -# Default: Color information -msgid "color-information" -msgstr "Color information" - -# Default: Color rendition -msgid "color-rendition" -msgstr "Color rendition" - -# Default: Company -msgid "company" -msgstr "Company" - -# Default: Complete cast -msgid "complete-cast" -msgstr "Complete cast" - -# Default: Complete crew -msgid "complete-crew" -msgstr "Complete crew" - -# Default: Composer -msgid "composer" -msgstr "Composer" - -# Default: Connections -msgid "connections" -msgstr "Connections" - -# Default: Contrast -msgid "contrast" -msgstr "Contrast" - -# Default: Copyright holder -msgid "copyright-holder" -msgstr "Copyright holder" - -# Default: Costume department -msgid "costume-department" -msgstr "Costume department" - -# Default: Costume designer -msgid "costume-designer" -msgstr "Costume designer" - -# Default: Countries -msgid "countries" -msgstr "Countries" - -# Default: Country -msgid "country" -msgstr "Country" - -# Default: Courtesy of -msgid "courtesy-of" -msgstr "Courtesy of" - -# Default: Cover -msgid "cover" -msgstr "Cover" - -# Default: Cover url -msgid "cover-url" -msgstr "Cover url" - -# Default: Crazy credits -msgid "crazy-credits" -msgstr "Crazy credits" - -# Default: Creator -msgid "creator" -msgstr "Creator" - -# Default: Current role -msgid "current-role" -msgstr "Current role" - -# Default: Database -msgid "database" -msgstr "Database" - -# Default: Date -msgid "date" -msgstr "Date" - -# Default: Death date -msgid "death-date" -msgstr "Death date" - -# Default: Death notes -msgid "death-notes" -msgstr "Death notes" - -# Default: Demographic -msgid "demographic" -msgstr "Demographic" - -# Default: Description -msgid "description" -msgstr "Description" - -# Default: Dialogue intellegibility -msgid "dialogue-intellegibility" -msgstr "Dialogue intellegibility" - -# Default: Digital sound -msgid "digital-sound" -msgstr "Digital sound" - -# Default: Director -msgid "director" -msgstr "Director" - -# Default: Disc format -msgid "disc-format" -msgstr "Disc format" - -# Default: Disc size -msgid "disc-size" -msgstr "Disc size" - -# Default: Distributors -msgid "distributors" -msgstr "Distributors" - -# Default: Dvd -msgid "dvd" -msgstr "Dvd" - -# Default: Dvd features -msgid "dvd-features" -msgstr "Dvd features" - -# Default: Dvd format -msgid "dvd-format" -msgstr "Dvd format" - -# Default: Dvds -msgid "dvds" -msgstr "Dvds" - -# Default: Dynamic range -msgid "dynamic-range" -msgstr "Dynamic range" - -# Default: Edited from -msgid "edited-from" -msgstr "Edited from" - -# Default: Edited into -msgid "edited-into" -msgstr "Edited into" - -# Default: Editor -msgid "editor" -msgstr "Editor" - -# Default: Editorial department -msgid "editorial-department" -msgstr "Editorial department" - -# Default: Episode -msgid "episode" -msgstr "Episode" - -# Default: Episode of -msgid "episode-of" -msgstr "Episode of" - -# Default: Episode title -msgid "episode-title" -msgstr "Episode title" - -# Default: Episodes -msgid "episodes" -msgstr "Episodes" - -# Default: Episodes rating -msgid "episodes-rating" -msgstr "Episodes rating" - -# Default: Essays -msgid "essays" -msgstr "Essays" - -# Default: External reviews -msgid "external-reviews" -msgstr "External reviews" - -# Default: Faqs -msgid "faqs" -msgstr "Faqs" - -# Default: Featured in -msgid "featured-in" -msgstr "Featured in" - -# Default: Features -msgid "features" -msgstr "Features" - -# Default: Film negative format -msgid "film-negative-format" -msgstr "Film negative format" - -# Default: Filming dates -msgid "filming-dates" -msgstr "Filming dates" - -# Default: Filmography -msgid "filmography" -msgstr "Filmography" - -# Default: Followed by -msgid "followed-by" -msgstr "Followed by" - -# Default: Follows -msgid "follows" -msgstr "Follows" - -# Default: For -msgid "for" -msgstr "For" - -# Default: Frequency response -msgid "frequency-response" -msgstr "Frequency response" - -# Default: From -msgid "from" -msgstr "From" - -# Default: Full article link -msgid "full-article-link" -msgstr "Full article link" - -# Default: Genres -msgid "genres" -msgstr "Genres" - -# Default: Goofs -msgid "goofs" -msgstr "Goofs" - -# Default: Gross -msgid "gross" -msgstr "Gross" - -# Default: Group genre -msgid "group-genre" -msgstr "Group genre" - -# Default: Headshot -msgid "headshot" -msgstr "Headshot" - -# Default: Height -msgid "height" -msgstr "Height" - -# Default: Imdbindex -msgid "imdbindex" -msgstr "Imdbindex" - -# Default: Interview -msgid "interview" -msgstr "Interview" - -# Default: Interviews -msgid "interviews" -msgstr "Interviews" - -# Default: Introduction -msgid "introduction" -msgstr "Introduction" - -# Default: Item -msgid "item" -msgstr "Item" - -# Default: Keywords -msgid "keywords" -msgstr "Keywords" - -# Default: Kind -msgid "kind" -msgstr "Kind" - -# Default: Label -msgid "label" -msgstr "Label" - -# Default: Laboratory -msgid "laboratory" -msgstr "Laboratory" - -# Default: Language -msgid "language" -msgstr "Language" - -# Default: Languages -msgid "languages" -msgstr "Languages" - -# Default: Laserdisc -msgid "laserdisc" -msgstr "Laserdisc" - -# Default: Laserdisc title -msgid "laserdisc-title" -msgstr "Laserdisc title" - -# Default: Length -msgid "length" -msgstr "Length" - -# Default: Line -msgid "line" -msgstr "Line" - -# Default: Link -msgid "link" -msgstr "Link" - -# Default: Link text -msgid "link-text" -msgstr "Link text" - -# Default: Literature -msgid "literature" -msgstr "Literature" - -# Default: Locations -msgid "locations" -msgstr "Locations" - -# Default: Long imdb canonical name -msgid "long-imdb-canonical-name" -msgstr "Long imdb canonical name" - -# Default: Long imdb canonical title -msgid "long-imdb-canonical-title" -msgstr "Long imdb canonical title" - -# Default: Long imdb episode title -msgid "long-imdb-episode-title" -msgstr "Long imdb episode title" - -# Default: Long imdb name -msgid "long-imdb-name" -msgstr "Long imdb name" - -# Default: Long imdb title -msgid "long-imdb-title" -msgstr "Long imdb title" - -# Default: Magazine cover photo -msgid "magazine-cover-photo" -msgstr "Magazine cover photo" - -# Default: Make up -msgid "make-up" -msgstr "Make up" - -# Default: Master format -msgid "master-format" -msgstr "Master format" - -# Default: Median -msgid "median" -msgstr "Median" - -# Default: Merchandising links -msgid "merchandising-links" -msgstr "Merchandising links" - -# Default: Mini biography -msgid "mini-biography" -msgstr "Mini biography" - -# Default: Misc links -msgid "misc-links" -msgstr "Misc links" - -# Default: Miscellaneous companies -msgid "miscellaneous-companies" -msgstr "Miscellaneous companies" - -# Default: Miscellaneous crew -msgid "miscellaneous-crew" -msgstr "Miscellaneous crew" - -# Default: Movie -msgid "movie" -msgstr "Movie" - -# Default: Mpaa -msgid "mpaa" -msgstr "Mpaa" - -# Default: Music department -msgid "music-department" -msgstr "Music department" - -# Default: Name -msgid "name" -msgstr "Name" - -# Default: News -msgid "news" -msgstr "News" - -# Default: Newsgroup reviews -msgid "newsgroup-reviews" -msgstr "Newsgroup reviews" - -# Default: Nick names -msgid "nick-names" -msgstr "Nick names" - -# Default: Notes -msgid "notes" -msgstr "Notes" - -# Default: Novel -msgid "novel" -msgstr "Novel" - -# Default: Number -msgid "number" -msgstr "Number" - -# Default: Number of chapter stops -msgid "number-of-chapter-stops" -msgstr "Number of chapter stops" - -# Default: Number of episodes -msgid "number-of-episodes" -msgstr "Number of episodes" - -# Default: Number of seasons -msgid "number-of-seasons" -msgstr "Number of seasons" - -# Default: Number of sides -msgid "number-of-sides" -msgstr "Number of sides" - -# Default: Number of votes -msgid "number-of-votes" -msgstr "Number of votes" - -# Default: Official retail price -msgid "official-retail-price" -msgstr "Official retail price" - -# Default: Official sites -msgid "official-sites" -msgstr "Official sites" - -# Default: Opening weekend -msgid "opening-weekend" -msgstr "Opening weekend" - -# Default: Original air date -msgid "original-air-date" -msgstr "Original air date" - -# Default: Original music -msgid "original-music" -msgstr "Original music" - -# Default: Original title -msgid "original-title" -msgstr "Original title" - -# Default: Other literature -msgid "other-literature" -msgstr "Other literature" - -# Default: Other works -msgid "other-works" -msgstr "Other works" - -# Default: Parents guide -msgid "parents-guide" -msgstr "Parents guide" - -# Default: Performed by -msgid "performed-by" -msgstr "Performed by" - -# Default: Person -msgid "person" -msgstr "Person" - -# Default: Photo sites -msgid "photo-sites" -msgstr "Photo sites" - -# Default: Pictorial -msgid "pictorial" -msgstr "Pictorial" - -# Default: Picture format -msgid "picture-format" -msgstr "Picture format" - -# Default: Plot -msgid "plot" -msgstr "Plot" - -# Default: Plot outline -msgid "plot-outline" -msgstr "Plot outline" - -# Default: Portrayed in -msgid "portrayed-in" -msgstr "Portrayed in" - -# Default: Pressing plant -msgid "pressing-plant" -msgstr "Pressing plant" - -# Default: Printed film format -msgid "printed-film-format" -msgstr "Printed film format" - -# Default: Printed media reviews -msgid "printed-media-reviews" -msgstr "Printed media reviews" - -# Default: Producer -msgid "producer" -msgstr "Producer" - -# Default: Production companies -msgid "production-companies" -msgstr "Production companies" - -# Default: Production country -msgid "production-country" -msgstr "Production country" - -# Default: Production dates -msgid "production-dates" -msgstr "Production dates" - -# Default: Production design -msgid "production-design" -msgstr "Production design" - -# Default: Production designer -msgid "production-designer" -msgstr "Production designer" - -# Default: Production manager -msgid "production-manager" -msgstr "Production manager" - -# Default: Production process protocol -msgid "production-process-protocol" -msgstr "Production process protocol" - -# Default: Quality of source -msgid "quality-of-source" -msgstr "Quality of source" - -# Default: Quality program -msgid "quality-program" -msgstr "Quality program" - -# Default: Quote -msgid "quote" -msgstr "Quote" - -# Default: Quotes -msgid "quotes" -msgstr "Quotes" - -# Default: Rating -msgid "rating" -msgstr "Rating" - -# Default: Recommendations -msgid "recommendations" -msgstr "Recommendations" - -# Default: Referenced in -msgid "referenced-in" -msgstr "Referenced in" - -# Default: References -msgid "references" -msgstr "References" - -# Default: Region -msgid "region" -msgstr "Region" - -# Default: Release country -msgid "release-country" -msgstr "Release country" - -# Default: Release date -msgid "release-date" -msgstr "Release date" - -# Default: Release dates -msgid "release-dates" -msgstr "Release dates" - -# Default: Remade as -msgid "remade-as" -msgstr "Remade as" - -# Default: Remake of -msgid "remake-of" -msgstr "Remake of" - -# Default: Rentals -msgid "rentals" -msgstr "Rentals" - -# Default: Result -msgid "result" -msgstr "Result" - -# Default: Review -msgid "review" -msgstr "Review" - -# Default: Review author -msgid "review-author" -msgstr "Review author" - -# Default: Review kind -msgid "review-kind" -msgstr "Review kind" - -# Default: Runtime -msgid "runtime" -msgstr "Runtime" - -# Default: Runtimes -msgid "runtimes" -msgstr "Runtimes" - -# Default: Salary history -msgid "salary-history" -msgstr "Salary history" - -# Default: Screenplay teleplay -msgid "screenplay-teleplay" -msgstr "Screenplay teleplay" - -# Default: Season -msgid "season" -msgstr "Season" - -# Default: Second unit director or assistant director -msgid "second-unit-director-or-assistant-director" -msgstr "Second unit director or assistant director" - -# Default: Self -msgid "self" -msgstr "Self" - -# Default: Series animation department -msgid "series-animation-department" -msgstr "Series animation department" - -# Default: Series art department -msgid "series-art-department" -msgstr "Series art department" - -# Default: Series assistant directors -msgid "series-assistant-directors" -msgstr "Series assistant directors" - -# Default: Series camera department -msgid "series-camera-department" -msgstr "Series camera department" - -# Default: Series casting department -msgid "series-casting-department" -msgstr "Series casting department" - -# Default: Series cinematographers -msgid "series-cinematographers" -msgstr "Series cinematographers" - -# Default: Series costume department -msgid "series-costume-department" -msgstr "Series costume department" - -# Default: Series editorial department -msgid "series-editorial-department" -msgstr "Series editorial department" - -# Default: Series editors -msgid "series-editors" -msgstr "Series editors" - -# Default: Series make up department -msgid "series-make-up-department" -msgstr "Series make up department" - -# Default: Series miscellaneous -msgid "series-miscellaneous" -msgstr "Series miscellaneous" - -# Default: Series music department -msgid "series-music-department" -msgstr "Series music department" - -# Default: Series producers -msgid "series-producers" -msgstr "Series producers" - -# Default: Series production designers -msgid "series-production-designers" -msgstr "Series production designers" - -# Default: Series production managers -msgid "series-production-managers" -msgstr "Series production managers" - -# Default: Series sound department -msgid "series-sound-department" -msgstr "Series sound department" - -# Default: Series special effects department -msgid "series-special-effects-department" -msgstr "Series special effects department" - -# Default: Series stunts -msgid "series-stunts" -msgstr "Series stunts" - -# Default: Series title -msgid "series-title" -msgstr "Series title" - -# Default: Series transportation department -msgid "series-transportation-department" -msgstr "Series transportation department" - -# Default: Series visual effects department -msgid "series-visual-effects-department" -msgstr "Series visual effects department" - -# Default: Series writers -msgid "series-writers" -msgstr "Series writers" - -# Default: Series years -msgid "series-years" -msgstr "Series years" - -# Default: Set decoration -msgid "set-decoration" -msgstr "Set decoration" - -# Default: Sharpness -msgid "sharpness" -msgstr "Sharpness" - -# Default: Similar to -msgid "similar-to" -msgstr "Similar to" - -# Default: Sound clips -msgid "sound-clips" -msgstr "Sound clips" - -# Default: Sound crew -msgid "sound-crew" -msgstr "Sound crew" - -# Default: Sound encoding -msgid "sound-encoding" -msgstr "Sound encoding" - -# Default: Sound mix -msgid "sound-mix" -msgstr "Sound mix" - -# Default: Soundtrack -msgid "soundtrack" -msgstr "Soundtrack" - -# Default: Spaciality -msgid "spaciality" -msgstr "Spaciality" - -# Default: Special effects -msgid "special-effects" -msgstr "Special effects" - -# Default: Special effects companies -msgid "special-effects-companies" -msgstr "Special effects companies" - -# Default: Special effects department -msgid "special-effects-department" -msgstr "Special effects department" - -# Default: Spin off -msgid "spin-off" -msgstr "Spin off" - -# Default: Spin off from -msgid "spin-off-from" -msgstr "Spin off from" - -# Default: Spoofed in -msgid "spoofed-in" -msgstr "Spoofed in" - -# Default: Spoofs -msgid "spoofs" -msgstr "Spoofs" - -# Default: Spouse -msgid "spouse" -msgstr "Spouse" - -# Default: Status of availablility -msgid "status-of-availablility" -msgstr "Status of availablility" - -# Default: Studio -msgid "studio" -msgstr "Studio" - -# Default: Studios -msgid "studios" -msgstr "Studios" - -# Default: Stunt performer -msgid "stunt-performer" -msgstr "Stunt performer" - -# Default: Stunts -msgid "stunts" -msgstr "Stunts" - -# Default: Subtitles -msgid "subtitles" -msgstr "Subtitles" - -# Default: Supplement -msgid "supplement" -msgstr "Supplement" - -# Default: Supplements -msgid "supplements" -msgstr "Supplements" - -# Default: Synopsis -msgid "synopsis" -msgstr "Synopsis" - -# Default: Taglines -msgid "taglines" -msgstr "Taglines" - -# Default: Tech info -msgid "tech-info" -msgstr "Tech info" - -# Default: Thanks -msgid "thanks" -msgstr "Thanks" - -# Default: Time -msgid "time" -msgstr "Time" - -# Default: Title -msgid "title" -msgstr "Title" - -# Default: Titles in this product -msgid "titles-in-this-product" -msgstr "Titles in this product" - -# Default: To -msgid "to" -msgstr "To" - -# Default: Top 250 rank -msgid "top-250-rank" -msgstr "Top 250 rank" - -# Default: Trade mark -msgid "trade-mark" -msgstr "Trade mark" - -# Default: Transportation department -msgid "transportation-department" -msgstr "Transportation department" - -# Default: Trivia -msgid "trivia" -msgstr "Trivia" - -# Default: Under license from -msgid "under-license-from" -msgstr "Under license from" - -# Default: Unknown link -msgid "unknown-link" -msgstr "Unknown link" - -# Default: Upc -msgid "upc" -msgstr "Upc" - -# Default: Version of -msgid "version-of" -msgstr "Version of" - -# Default: Vhs -msgid "vhs" -msgstr "Vhs" - -# Default: Video artifacts -msgid "video-artifacts" -msgstr "Video artifacts" - -# Default: Video clips -msgid "video-clips" -msgstr "Video clips" - -# Default: Video noise -msgid "video-noise" -msgstr "Video noise" - -# Default: Video quality -msgid "video-quality" -msgstr "Video quality" - -# Default: Video standard -msgid "video-standard" -msgstr "Video standard" - -# Default: Visual effects -msgid "visual-effects" -msgstr "Visual effects" - -# Default: Votes -msgid "votes" -msgstr "Votes" - -# Default: Votes distribution -msgid "votes-distribution" -msgstr "Votes distribution" - -# Default: Weekend gross -msgid "weekend-gross" -msgstr "Weekend gross" - -# Default: Where now -msgid "where-now" -msgstr "Where now" - -# Default: With -msgid "with" -msgstr "With" - -# Default: Writer -msgid "writer" -msgstr "Writer" - -# Default: Written by -msgid "written-by" -msgstr "Written by" - -# Default: Year -msgid "year" -msgstr "Year" - -# Default: Zshops -msgid "zshops" -msgstr "Zshops" - diff --git a/libs/imdb/locale/imdbpy-it.po b/libs/imdb/locale/imdbpy-it.po deleted file mode 100644 index 17cfce46..00000000 --- a/libs/imdb/locale/imdbpy-it.po +++ /dev/null @@ -1,1300 +0,0 @@ -# Gettext message file for imdbpy -msgid "" -msgstr "" -"Project-Id-Version: imdbpy\n" -"POT-Creation-Date: 2010-03-18 14:35+0000\n" -"PO-Revision-Date: 2009-07-03 13:00+0000\n" -"Last-Translator: Davide Alberani \n" -"Language-Team: Davide Alberani \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=2; plural=(n != 1);\n" -"Language-Code: it\n" -"Language-Name: Italian\n" -"Preferred-Encodings: utf-8\n" -"Domain: imdbpy\n" - -# Default: Actor -msgid "actor" -msgstr "Attore" - -# Default: Actress -msgid "actress" -msgstr "Attrice" - -# Default: Adaption -msgid "adaption" -msgstr "Adattamento" - -# Default: Additional information -msgid "additional-information" -msgstr "Ulteriori informazioni" - -# Default: Admissions -msgid "admissions" -msgstr "Biglietti venduti" - -# Default: Agent address -msgid "agent-address" -msgstr "Indirizzo dell'agente" - -# Default: Airing -msgid "airing" -msgstr "In onda" - -# Default: Akas -msgid "akas" -msgstr "Alias" - -# Default: Akas from release info -msgid "akas-from-release-info" -msgstr "Alias dalle informazioni di rilascio" - -# Default: All products -msgid "all-products" -msgstr "Tutti i prodotti" - -# Default: Alternate language version of -msgid "alternate-language-version-of" -msgstr "Versione in altra lingua di" - -# Default: Alternate versions -msgid "alternate-versions" -msgstr "Versioni alternative" - -# Default: Amazon reviews -msgid "amazon-reviews" -msgstr "Recensione di Amazon" - -# Default: Analog left -msgid "analog-left" -msgstr "Analogico sinistro" - -# Default: Analog right -msgid "analog-right" -msgstr "Analogico destro" - -# Default: Animation department -msgid "animation-department" -msgstr "Dipartimento animazione" - -# Default: Archive footage -msgid "archive-footage" -msgstr "Materiale d'archivio" - -# Default: Arithmetic mean -msgid "arithmetic-mean" -msgstr "Media aritmetica" - -# Default: Art department -msgid "art-department" -msgstr "Dipartimento artistico" - -# Default: Art direction -msgid "art-direction" -msgstr "Direzione artistica" - -# Default: Art director -msgid "art-director" -msgstr "Direttore artistico" - -# Default: Article -msgid "article" -msgstr "Articolo" - -# Default: Asin -msgid "asin" -msgstr "Asin" - -# Default: Aspect ratio -msgid "aspect-ratio" -msgstr "Rapporto d'aspetto" - -# Default: Assigner -msgid "assigner" -msgstr "Assegnatario" - -# Default: Assistant director -msgid "assistant-director" -msgstr "Assistente regista" - -# Default: Auctions -msgid "auctions" -msgstr "Aste" - -# Default: Audio noise -msgid "audio-noise" -msgstr "Rumore audio" - -# Default: Audio quality -msgid "audio-quality" -msgstr "Qualità audio" - -# Default: Award -msgid "award" -msgstr "Premio" - -# Default: Awards -msgid "awards" -msgstr "Premi" - -# Default: Biographical movies -msgid "biographical-movies" -msgstr "Film biografici" - -# Default: Biography -msgid "biography" -msgstr "Biografia" - -# Default: Biography print -msgid "biography-print" -msgstr "Biografia" - -# Default: Birth date -msgid "birth-date" -msgstr "Data di nascita" - -# Default: Birth name -msgid "birth-name" -msgstr "Nome di nascita" - -# Default: Birth notes -msgid "birth-notes" -msgstr "Note di nascita" - -# Default: Body -msgid "body" -msgstr "Corpo" - -# Default: Book -msgid "book" -msgstr "Libro" - -# Default: Books -msgid "books" -msgstr "Libri" - -# Default: Bottom 100 rank -msgid "bottom-100-rank" -msgstr "Posizione nella bottom 100" - -# Default: Budget -msgid "budget" -msgstr "Bilancio" - -# Default: Business -msgid "business" -msgstr "Affari" - -# Default: By arrangement with -msgid "by-arrangement-with" -msgstr "Arrangiamento con" - -# Default: Camera -msgid "camera" -msgstr "Cinepresa" - -# Default: Camera and electrical department -msgid "camera-and-electrical-department" -msgstr "Cinepresa e dipartimento elettrico" - -# Default: Canonical episode title -msgid "canonical-episode-title" -msgstr "Titolo dell'episodio in forma canonica" - -# Default: Canonical name -msgid "canonical-name" -msgstr "Nome in forma canonica" - -# Default: Canonical series title -msgid "canonical-series-title" -msgstr "Titolo della serie in forma canonica" - -# Default: Canonical title -msgid "canonical-title" -msgstr "Titolo in forma canonica" - -# Default: Cast -msgid "cast" -msgstr "Cast" - -# Default: Casting department -msgid "casting-department" -msgstr "Casting" - -# Default: Casting director -msgid "casting-director" -msgstr "Direttore del casting" - -# Default: Catalog number -msgid "catalog-number" -msgstr "Numero di catalogo" - -# Default: Category -msgid "category" -msgstr "Categoria" - -# Default: Certificate -msgid "certificate" -msgstr "Certificazione" - -# Default: Certificates -msgid "certificates" -msgstr "Certificazioni" - -# Default: Certification -msgid "certification" -msgstr "Certificazioni" - -# Default: Channel -msgid "channel" -msgstr "Canale" - -# Default: Character -msgid "character" -msgstr "Personaggio" - -# Default: Cinematographer -msgid "cinematographer" -msgstr "Fotografia" - -# Default: Cinematographic process -msgid "cinematographic-process" -msgstr "Processo cinematografico" - -# Default: Close captions teletext ld g -msgid "close-captions-teletext-ld-g" -msgstr "" - -# Default: Color info -msgid "color-info" -msgstr "Colore" - -# Default: Color information -msgid "color-information" -msgstr "Informazioni sul colore" - -# Default: Color rendition -msgid "color-rendition" -msgstr "Resa dei colori" - -# Default: Company -msgid "company" -msgstr "Compagnia" - -# Default: Complete cast -msgid "complete-cast" -msgstr "Cast completo" - -# Default: Complete crew -msgid "complete-crew" -msgstr "Troupe completa" - -# Default: Composer -msgid "composer" -msgstr "Compositore" - -# Default: Connections -msgid "connections" -msgstr "Collegamenti" - -# Default: Contrast -msgid "contrast" -msgstr "Contrasto" - -# Default: Copyright holder -msgid "copyright-holder" -msgstr "Detentore dei diritti d'autore" - -# Default: Costume department -msgid "costume-department" -msgstr "Dipartimento costumi" - -# Default: Costume designer -msgid "costume-designer" -msgstr "Costumista" - -# Default: Countries -msgid "countries" -msgstr "Paesi" - -# Default: Country -msgid "country" -msgstr "Paese" - -# Default: Courtesy of -msgid "courtesy-of" -msgstr "Cortesia di" - -# Default: Cover -msgid "cover" -msgstr "Copertina" - -# Default: Cover url -msgid "cover-url" -msgstr "Locandina" - -# Default: Crazy credits -msgid "crazy-credits" -msgstr "Titoli pazzi" - -# Default: Creator -msgid "creator" -msgstr "Creatore" - -# Default: Current role -msgid "current-role" -msgstr "Ruolo" - -# Default: Database -msgid "database" -msgstr "Database" - -# Default: Date -msgid "date" -msgstr "Data" - -# Default: Death date -msgid "death-date" -msgstr "Data di morte" - -# Default: Death notes -msgid "death-notes" -msgstr "Note di morte" - -# Default: Demographic -msgid "demographic" -msgstr "Spaccato demografico" - -# Default: Description -msgid "description" -msgstr "Descrizione" - -# Default: Dialogue intellegibility -msgid "dialogue-intellegibility" -msgstr "Comprensibilità dei dialoghi" - -# Default: Digital sound -msgid "digital-sound" -msgstr "Suono digitale" - -# Default: Director -msgid "director" -msgstr "Regista" - -# Default: Disc format -msgid "disc-format" -msgstr "Formato del disco" - -# Default: Disc size -msgid "disc-size" -msgstr "Dimensione del disco" - -# Default: Distributors -msgid "distributors" -msgstr "Distributori" - -# Default: Dvd -msgid "dvd" -msgstr "Dvd" - -# Default: Dvd features -msgid "dvd-features" -msgstr "Caratteristiche del DVD" - -# Default: Dvd format -msgid "dvd-format" -msgstr "Formato del DVD" - -# Default: Dvds -msgid "dvds" -msgstr "Dvd" - -# Default: Dynamic range -msgid "dynamic-range" -msgstr "Intervallo dinamico" - -# Default: Edited from -msgid "edited-from" -msgstr "Tratto da" - -# Default: Edited into -msgid "edited-into" -msgstr "Montato in" - -# Default: Editor -msgid "editor" -msgstr "Editore" - -# Default: Editorial department -msgid "editorial-department" -msgstr "Dipartimento editoriale" - -# Default: Episode -msgid "episode" -msgstr "Episodio" - -# Default: Episode of -msgid "episode-of" -msgstr "Episodio di" - -# Default: Episode title -msgid "episode-title" -msgstr "Titolo dell'episodio" - -# Default: Episodes -msgid "episodes" -msgstr "Episodi" - -# Default: Episodes rating -msgid "episodes-rating" -msgstr "Voto degli episodi" - -# Default: Essays -msgid "essays" -msgstr "Saggi" - -# Default: External reviews -msgid "external-reviews" -msgstr "Recensioni esterne" - -# Default: Faqs -msgid "faqs" -msgstr "Domande ricorrenti" - -# Default: Feature -msgid "feature" -msgstr "Caratteristica" - -# Default: Featured in -msgid "featured-in" -msgstr "Ripreso in" - -# Default: Features -msgid "features" -msgstr "Caratteristiche" - -# Default: Film negative format -msgid "film-negative-format" -msgstr "Formato del negativo" - -# Default: Filming dates -msgid "filming-dates" -msgstr "Data delle riprese" - -# Default: Filmography -msgid "filmography" -msgstr "Filmografia" - -# Default: Followed by -msgid "followed-by" -msgstr "Seguito da" - -# Default: Follows -msgid "follows" -msgstr "Segue" - -# Default: For -msgid "for" -msgstr "Per" - -# Default: Frequency response -msgid "frequency-response" -msgstr "Frequenze di risposta" - -# Default: From -msgid "from" -msgstr "Da" - -# Default: Full article link -msgid "full-article-link" -msgstr "Collegamento all'articolo completo" - -# Default: Full size cover url -msgid "full-size-cover-url" -msgstr "URL della copertina nelle dimensioni originali" - -# Default: Full size headshot -msgid "full-size-headshot" -msgstr "Ritratto nelle dimensioni originali" - -# Default: Genres -msgid "genres" -msgstr "Generi" - -# Default: Goofs -msgid "goofs" -msgstr "Errori" - -# Default: Gross -msgid "gross" -msgstr "Lordo" - -# Default: Group genre -msgid "group-genre" -msgstr "" - -# Default: Headshot -msgid "headshot" -msgstr "Foto" - -# Default: Height -msgid "height" -msgstr "Altezza" - -# Default: Imdbindex -msgid "imdbindex" -msgstr "" - -# Default: In development -msgid "in-development" -msgstr "In sviluppo" - -# Default: Interview -msgid "interview" -msgstr "Intervista" - -# Default: Interviews -msgid "interviews" -msgstr "Interviste" - -# Default: Introduction -msgid "introduction" -msgstr "Introduzione" - -# Default: Item -msgid "item" -msgstr "Elemento" - -# Default: Keywords -msgid "keywords" -msgstr "Parole chiave" - -# Default: Kind -msgid "kind" -msgstr "Tipo" - -# Default: Label -msgid "label" -msgstr "Etichetta" - -# Default: Laboratory -msgid "laboratory" -msgstr "Laboratorio" - -# Default: Language -msgid "language" -msgstr "Lingua" - -# Default: Languages -msgid "languages" -msgstr "Lingue" - -# Default: Laserdisc -msgid "laserdisc" -msgstr "Laserdisc" - -# Default: Laserdisc title -msgid "laserdisc-title" -msgstr "Titolo del laserdisc" - -# Default: Length -msgid "length" -msgstr "Durata" - -# Default: Line -msgid "line" -msgstr "Battuta" - -# Default: Link -msgid "link" -msgstr "Collegamento" - -# Default: Link text -msgid "link-text" -msgstr "Testo del link" - -# Default: Literature -msgid "literature" -msgstr "Letteratura" - -# Default: Locations -msgid "locations" -msgstr "Luoghi" - -# Default: Long imdb canonical name -msgid "long-imdb-canonical-name" -msgstr "Nome canonico IMDb lungo" - -# Default: Long imdb canonical title -msgid "long-imdb-canonical-title" -msgstr "Titolo canonico IMDb lungo" - -# Default: Long imdb episode title -msgid "long-imdb-episode-title" -msgstr "Titolo dell'episodio canonico IMDb lungo" - -# Default: Long imdb name -msgid "long-imdb-name" -msgstr "Nome IMDb lungo" - -# Default: Long imdb title -msgid "long-imdb-title" -msgstr "Titolo IMDb lungo" - -# Default: Magazine cover photo -msgid "magazine-cover-photo" -msgstr "Foto di copertina" - -# Default: Make up -msgid "make-up" -msgstr "Trucco" - -# Default: Master format -msgid "master-format" -msgstr "Formato del master" - -# Default: Median -msgid "median" -msgstr "Mediana" - -# Default: Merchandising links -msgid "merchandising-links" -msgstr "Collegamenti al merchandising" - -# Default: Mini biography -msgid "mini-biography" -msgstr "Biografia" - -# Default: Misc links -msgid "misc-links" -msgstr "Altri collegamenti" - -# Default: Miscellaneous companies -msgid "miscellaneous-companies" -msgstr "Altre compagnie" - -# Default: Miscellaneous crew -msgid "miscellaneous-crew" -msgstr "Altra troupe" - -# Default: Movie -msgid "movie" -msgstr "Film" - -# Default: Mpaa -msgid "mpaa" -msgstr "Visto MPAA" - -# Default: Music department -msgid "music-department" -msgstr "Dipartimento musicale" - -# Default: Name -msgid "name" -msgstr "Nome" - -# Default: News -msgid "news" -msgstr "Notizie" - -# Default: Newsgroup reviews -msgid "newsgroup-reviews" -msgstr "Recensioni dai gruppi di discussione" - -# Default: Nick names -msgid "nick-names" -msgstr "Soprannomi" - -# Default: Notes -msgid "notes" -msgstr "Note" - -# Default: Novel -msgid "novel" -msgstr "Novella" - -# Default: Number -msgid "number" -msgstr "Numero" - -# Default: Number of chapter stops -msgid "number-of-chapter-stops" -msgstr "Numero di interruzioni di capitolo" - -# Default: Number of episodes -msgid "number-of-episodes" -msgstr "Numero di episodi" - -# Default: Number of seasons -msgid "number-of-seasons" -msgstr "Numero di stagioni" - -# Default: Number of sides -msgid "number-of-sides" -msgstr "Numero di lati" - -# Default: Number of votes -msgid "number-of-votes" -msgstr "Numero di voti" - -# Default: Official retail price -msgid "official-retail-price" -msgstr "Prezzo ufficiale al pubblico" - -# Default: Official sites -msgid "official-sites" -msgstr "Siti ufficiali" - -# Default: Opening weekend -msgid "opening-weekend" -msgstr "Weekend d'apertura" - -# Default: Original air date -msgid "original-air-date" -msgstr "Data della prima messa in onda" - -# Default: Original music -msgid "original-music" -msgstr "Musica originale" - -# Default: Original title -msgid "original-title" -msgstr "Titolo originale" - -# Default: Other literature -msgid "other-literature" -msgstr "Altre opere letterarie" - -# Default: Other works -msgid "other-works" -msgstr "Altri lavori" - -# Default: Parents guide -msgid "parents-guide" -msgstr "Guida per i genitori" - -# Default: Performed by -msgid "performed-by" -msgstr "Eseguito da" - -# Default: Person -msgid "person" -msgstr "Persona" - -# Default: Photo sites -msgid "photo-sites" -msgstr "Siti con fotografie" - -# Default: Pictorial -msgid "pictorial" -msgstr "Ritratto" - -# Default: Picture format -msgid "picture-format" -msgstr "Formato dell'immagine" - -# Default: Plot -msgid "plot" -msgstr "Trama" - -# Default: Plot outline -msgid "plot-outline" -msgstr "Trama in breve" - -# Default: Portrayed in -msgid "portrayed-in" -msgstr "Rappresentato in" - -# Default: Pressing plant -msgid "pressing-plant" -msgstr "Impianto di stampa" - -# Default: Printed film format -msgid "printed-film-format" -msgstr "Formato della pellicola" - -# Default: Printed media reviews -msgid "printed-media-reviews" -msgstr "Recensioni su carta stampata" - -# Default: Producer -msgid "producer" -msgstr "Produttore" - -# Default: Production companies -msgid "production-companies" -msgstr "Compagnie di produzione" - -# Default: Production country -msgid "production-country" -msgstr "Paese di produzione" - -# Default: Production dates -msgid "production-dates" -msgstr "Date di produzione" - -# Default: Production design -msgid "production-design" -msgstr "Design di produzione" - -# Default: Production designer -msgid "production-designer" -msgstr "Designer di produzione" - -# Default: Production manager -msgid "production-manager" -msgstr "Manager di produzione" - -# Default: Production process protocol -msgid "production-process-protocol" -msgstr "Controllo del processo di produzione" - -# Default: Quality of source -msgid "quality-of-source" -msgstr "Qualità dell'originale" - -# Default: Quality program -msgid "quality-program" -msgstr "Programma di Qualità" - -# Default: Quote -msgid "quote" -msgstr "Citazione" - -# Default: Quotes -msgid "quotes" -msgstr "Citazioni" - -# Default: Rating -msgid "rating" -msgstr "Voto" - -# Default: Recommendations -msgid "recommendations" -msgstr "Raccomandazioni" - -# Default: Referenced in -msgid "referenced-in" -msgstr "Citato in" - -# Default: References -msgid "references" -msgstr "Cita" - -# Default: Region -msgid "region" -msgstr "Regione" - -# Default: Release country -msgid "release-country" -msgstr "Paese d'uscita" - -# Default: Release date -msgid "release-date" -msgstr "Data d'uscita" - -# Default: Release dates -msgid "release-dates" -msgstr "Date d'uscita" - -# Default: Remade as -msgid "remade-as" -msgstr "Rifatto come" - -# Default: Remake of -msgid "remake-of" -msgstr "Rifacimento di" - -# Default: Rentals -msgid "rentals" -msgstr "Noleggi" - -# Default: Result -msgid "result" -msgstr "Risultato" - -# Default: Review -msgid "review" -msgstr "Recensione" - -# Default: Review author -msgid "review-author" -msgstr "Autore della recensione" - -# Default: Review kind -msgid "review-kind" -msgstr "Tipo di recensione" - -# Default: Runtime -msgid "runtime" -msgstr "Durata" - -# Default: Runtimes -msgid "runtimes" -msgstr "Durate" - -# Default: Salary history -msgid "salary-history" -msgstr "Stipendi" - -# Default: Screenplay teleplay -msgid "screenplay-teleplay" -msgstr "" - -# Default: Season -msgid "season" -msgstr "Stagione" - -# Default: Second unit director or assistant director -msgid "second-unit-director-or-assistant-director" -msgstr "Regista della seconda unità o aiuto regista" - -# Default: Self -msgid "self" -msgstr "Se stesso" - -# Default: Series animation department -msgid "series-animation-department" -msgstr "Dipartimento animazione della serie" - -# Default: Series art department -msgid "series-art-department" -msgstr "Dipartimento artistico della serie" - -# Default: Series assistant directors -msgid "series-assistant-directors" -msgstr "Assistenti registi della serie" - -# Default: Series camera department -msgid "series-camera-department" -msgstr "" - -# Default: Series casting department -msgid "series-casting-department" -msgstr "" - -# Default: Series cinematographers -msgid "series-cinematographers" -msgstr "" - -# Default: Series costume department -msgid "series-costume-department" -msgstr "" - -# Default: Series editorial department -msgid "series-editorial-department" -msgstr "" - -# Default: Series editors -msgid "series-editors" -msgstr "" - -# Default: Series make up department -msgid "series-make-up-department" -msgstr "" - -# Default: Series miscellaneous -msgid "series-miscellaneous" -msgstr "" - -# Default: Series music department -msgid "series-music-department" -msgstr "" - -# Default: Series producers -msgid "series-producers" -msgstr "" - -# Default: Series production designers -msgid "series-production-designers" -msgstr "" - -# Default: Series production managers -msgid "series-production-managers" -msgstr "" - -# Default: Series sound department -msgid "series-sound-department" -msgstr "Dipartimento sonoro della serie" - -# Default: Series special effects department -msgid "series-special-effects-department" -msgstr "Dipartimento effetti speciali della serie" - -# Default: Series stunts -msgid "series-stunts" -msgstr "Controfigure della serie" - -# Default: Series title -msgid "series-title" -msgstr "Titolo della serie" - -# Default: Series transportation department -msgid "series-transportation-department" -msgstr "" - -# Default: Series visual effects department -msgid "series-visual-effects-department" -msgstr "" - -# Default: Series writers -msgid "series-writers" -msgstr "Scrittori della serie" - -# Default: Series years -msgid "series-years" -msgstr "Anni della serie" - -# Default: Set decoration -msgid "set-decoration" -msgstr "Decorazione del set" - -# Default: Sharpness -msgid "sharpness" -msgstr "" - -# Default: Similar to -msgid "similar-to" -msgstr "Simile a" - -# Default: Smart canonical episode title -msgid "smart-canonical-episode-title" -msgstr "Titolo canonico intelligente dell'episodio" - -# Default: Smart canonical series title -msgid "smart-canonical-series-title" -msgstr "Titolo canonico intelligente della serie" - -# Default: Smart canonical title -msgid "smart-canonical-title" -msgstr "Titolo canonico intelligente" - -# Default: Smart long imdb canonical title -msgid "smart-long-imdb-canonical-title" -msgstr "Titolo canonico lungo intelligente" - -# Default: Sound clips -msgid "sound-clips" -msgstr "" - -# Default: Sound crew -msgid "sound-crew" -msgstr "" - -# Default: Sound encoding -msgid "sound-encoding" -msgstr "Codifica sonora" - -# Default: Sound mix -msgid "sound-mix" -msgstr "Mix audio" - -# Default: Soundtrack -msgid "soundtrack" -msgstr "Colonna sonora" - -# Default: Spaciality -msgid "spaciality" -msgstr "Specialità" - -# Default: Special effects -msgid "special-effects" -msgstr "Effetti speciali" - -# Default: Special effects companies -msgid "special-effects-companies" -msgstr "Compagnie di effetti speciali" - -# Default: Special effects department -msgid "special-effects-department" -msgstr "Dipartimento effetti speciali" - -# Default: Spin off -msgid "spin-off" -msgstr "Derivati" - -# Default: Spin off from -msgid "spin-off-from" -msgstr "Deriva da" - -# Default: Spoofed in -msgid "spoofed-in" -msgstr "Preso in giro in" - -# Default: Spoofs -msgid "spoofs" -msgstr "Prende in giro" - -# Default: Spouse -msgid "spouse" -msgstr "Coniuge" - -# Default: Status of availablility -msgid "status-of-availablility" -msgstr "Disponibilità" - -# Default: Studio -msgid "studio" -msgstr "Studio" - -# Default: Studios -msgid "studios" -msgstr "Studi" - -# Default: Stunt performer -msgid "stunt-performer" -msgstr "" - -# Default: Stunts -msgid "stunts" -msgstr "Stuntman" - -# Default: Subtitles -msgid "subtitles" -msgstr "Sottotitoli" - -# Default: Supplement -msgid "supplement" -msgstr "Extra" - -# Default: Supplements -msgid "supplements" -msgstr "Extra" - -# Default: Synopsis -msgid "synopsis" -msgstr "Compendio della trama" - -# Default: Taglines -msgid "taglines" -msgstr "Slogan" - -# Default: Tech info -msgid "tech-info" -msgstr "Informazioni tecniche" - -# Default: Thanks -msgid "thanks" -msgstr "Ringraziamenti" - -# Default: Time -msgid "time" -msgstr "Tempo" - -# Default: Title -msgid "title" -msgstr "Titolo" - -# Default: Titles in this product -msgid "titles-in-this-product" -msgstr "Titoli in questo prodotto" - -# Default: To -msgid "to" -msgstr "A" - -# Default: Top 250 rank -msgid "top-250-rank" -msgstr "Posizione nella top 250" - -# Default: Trade mark -msgid "trade-mark" -msgstr "Marchio registrato" - -# Default: Transportation department -msgid "transportation-department" -msgstr "Dipartimento trasporti" - -# Default: Trivia -msgid "trivia" -msgstr "Frivolezze" - -# Default: Tv -msgid "tv" -msgstr "Tv" - -# Default: Under license from -msgid "under-license-from" -msgstr "Sotto licenza da" - -# Default: Unknown link -msgid "unknown-link" -msgstr "Collegamento sconosciuto" - -# Default: Upc -msgid "upc" -msgstr "" - -# Default: Version of -msgid "version-of" -msgstr "Versione di" - -# Default: Vhs -msgid "vhs" -msgstr "VHS" - -# Default: Video -msgid "video" -msgstr "Video" - -# Default: Video artifacts -msgid "video-artifacts" -msgstr "Imperfezioni video" - -# Default: Video clips -msgid "video-clips" -msgstr "Video clips" - -# Default: Video noise -msgid "video-noise" -msgstr "Rumore video" - -# Default: Video quality -msgid "video-quality" -msgstr "Qualità video" - -# Default: Video standard -msgid "video-standard" -msgstr "Standard video" - -# Default: Visual effects -msgid "visual-effects" -msgstr "Effetti visivi" - -# Default: Votes -msgid "votes" -msgstr "Voti" - -# Default: Votes distribution -msgid "votes-distribution" -msgstr "Distribuzione dei voti" - -# Default: Weekend gross -msgid "weekend-gross" -msgstr "Lordo del primo fine settimana" - -# Default: Where now -msgid "where-now" -msgstr "Cosa sta facendo ora" - -# Default: With -msgid "with" -msgstr "Con" - -# Default: Writer -msgid "writer" -msgstr "Scrittore" - -# Default: Written by -msgid "written-by" -msgstr "Scritto da" - -# Default: Year -msgid "year" -msgstr "Anno" - -# Default: Zshops -msgid "zshops" -msgstr "" diff --git a/libs/imdb/locale/imdbpy-tr.po b/libs/imdb/locale/imdbpy-tr.po deleted file mode 100644 index a44452ae..00000000 --- a/libs/imdb/locale/imdbpy-tr.po +++ /dev/null @@ -1,1300 +0,0 @@ -# Gettext message file for imdbpy -msgid "" -msgstr "" -"Project-Id-Version: imdbpy\n" -"POT-Creation-Date: 2010-03-18 14:35+0000\n" -"PO-Revision-Date: 2009-04-21 19:04+0200\n" -"Last-Translator: H. Turgut Uyar \n" -"Language-Team: IMDbPY Türkçe \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Language-Code: tr\n" -"Language-Name: Türkçe\n" -"Preferred-Encodings: utf-8\n" -"Domain: imdbpy\n" - -# Default: Actor -msgid "actor" -msgstr "Oyuncu" - -# Default: Actress -msgid "actress" -msgstr "Oyuncu" - -# Default: Adaption -msgid "adaption" -msgstr "" - -# Default: Additional information -msgid "additional-information" -msgstr "Ek bilgi" - -# Default: Admissions -msgid "admissions" -msgstr "" - -# Default: Agent address -msgid "agent-address" -msgstr "" - -# Default: Airing -msgid "airing" -msgstr "Yayımlanma" - -# Default: Akas -msgid "akas" -msgstr "DiÄŸer baÅŸlıklar" - -# Default: Akas from release info -msgid "akas-from-release-info" -msgstr "" - -# Default: All products -msgid "all-products" -msgstr "Bütün ürünler" - -# Default: Alternate language version of -msgid "alternate-language-version-of" -msgstr "" - -# Default: Alternate versions -msgid "alternate-versions" -msgstr "" - -# Default: Amazon reviews -msgid "amazon-reviews" -msgstr "Amazon eleÅŸtirileri" - -# Default: Analog left -msgid "analog-left" -msgstr "Analog sol" - -# Default: Analog right -msgid "analog-right" -msgstr "Analog saÄŸ" - -# Default: Animation department -msgid "animation-department" -msgstr "Animasyon departmanı" - -# Default: Archive footage -msgid "archive-footage" -msgstr "ArÅŸiv çekimleri" - -# Default: Arithmetic mean -msgid "arithmetic-mean" -msgstr "Aritmetik ortalama" - -# Default: Art department -msgid "art-department" -msgstr "Sanat departmanı" - -# Default: Art direction -msgid "art-direction" -msgstr "Sanat yönetmenliÄŸi" - -# Default: Art director -msgid "art-director" -msgstr "Sanat yönetmeni" - -# Default: Article -msgid "article" -msgstr "" - -# Default: Asin -msgid "asin" -msgstr "ASIN" - -# Default: Aspect ratio -msgid "aspect-ratio" -msgstr "En-boy oranı" - -# Default: Assigner -msgid "assigner" -msgstr "Veren" - -# Default: Assistant director -msgid "assistant-director" -msgstr "Yardımcı yönetmen" - -# Default: Auctions -msgid "auctions" -msgstr "Açık artırmalar" - -# Default: Audio noise -msgid "audio-noise" -msgstr "Ses gürültüsü" - -# Default: Audio quality -msgid "audio-quality" -msgstr "Ses kalitesi" - -# Default: Award -msgid "award" -msgstr "Ödül" - -# Default: Awards -msgid "awards" -msgstr "Ödüller" - -# Default: Biographical movies -msgid "biographical-movies" -msgstr "Biyografik filmler" - -# Default: Biography -msgid "biography" -msgstr "Biyografi" - -# Default: Biography print -msgid "biography-print" -msgstr "Basılı biyografi" - -# Default: Birth date -msgid "birth-date" -msgstr "DoÄŸum tarihi" - -# Default: Birth name -msgid "birth-name" -msgstr "Asıl ismi" - -# Default: Birth notes -msgid "birth-notes" -msgstr "DoÄŸum notları" - -# Default: Body -msgid "body" -msgstr "Metin" - -# Default: Book -msgid "book" -msgstr "Kitap" - -# Default: Books -msgid "books" -msgstr "Kitaplar" - -# Default: Bottom 100 rank -msgid "bottom-100-rank" -msgstr "En kötü 100 içindeki sırası" - -# Default: Budget -msgid "budget" -msgstr "Bütçe" - -# Default: Business -msgid "business" -msgstr "GiÅŸe" - -# Default: By arrangement with -msgid "by-arrangement-with" -msgstr "" - -# Default: Camera -msgid "camera" -msgstr "Kamera" - -# Default: Camera and electrical department -msgid "camera-and-electrical-department" -msgstr "Kamera ve elektrik departmanı" - -# Default: Canonical episode title -msgid "canonical-episode-title" -msgstr "" - -# Default: Canonical name -msgid "canonical-name" -msgstr "" - -# Default: Canonical series title -msgid "canonical-series-title" -msgstr "" - -# Default: Canonical title -msgid "canonical-title" -msgstr "" - -# Default: Cast -msgid "cast" -msgstr "Oynayanlar" - -# Default: Casting department -msgid "casting-department" -msgstr "Oyuncu seçme departmanı" - -# Default: Casting director -msgid "casting-director" -msgstr "Oyuncu seçme yönetmeni" - -# Default: Catalog number -msgid "catalog-number" -msgstr "Katalog numarası" - -# Default: Category -msgid "category" -msgstr "Kategori" - -# Default: Certificate -msgid "certificate" -msgstr "Sertifika" - -# Default: Certificates -msgid "certificates" -msgstr "Sertifikalar" - -# Default: Certification -msgid "certification" -msgstr "" - -# Default: Channel -msgid "channel" -msgstr "Kanal" - -# Default: Character -msgid "character" -msgstr "Karakter" - -# Default: Cinematographer -msgid "cinematographer" -msgstr "Kameraman" - -# Default: Cinematographic process -msgid "cinematographic-process" -msgstr "" - -# Default: Close captions teletext ld g -msgid "close-captions-teletext-ld-g" -msgstr "" - -# Default: Color info -msgid "color-info" -msgstr "Renk bilgisi" - -# Default: Color information -msgid "color-information" -msgstr "Renk bilgisi" - -# Default: Color rendition -msgid "color-rendition" -msgstr "" - -# Default: Company -msgid "company" -msgstr "Åžirket" - -# Default: Complete cast -msgid "complete-cast" -msgstr "Bütün oynayanlar" - -# Default: Complete crew -msgid "complete-crew" -msgstr "Bütün çalışanlar" - -# Default: Composer -msgid "composer" -msgstr "Besteci" - -# Default: Connections -msgid "connections" -msgstr "BaÄŸlantılar" - -# Default: Contrast -msgid "contrast" -msgstr "Kontrast" - -# Default: Copyright holder -msgid "copyright-holder" -msgstr "Telif sahibi" - -# Default: Costume department -msgid "costume-department" -msgstr "Kostüm departmanı" - -# Default: Costume designer -msgid "costume-designer" -msgstr "Kostüm tasarımcısı" - -# Default: Countries -msgid "countries" -msgstr "Ülkeler" - -# Default: Country -msgid "country" -msgstr "Ülke" - -# Default: Courtesy of -msgid "courtesy-of" -msgstr "" - -# Default: Cover -msgid "cover" -msgstr "Poster" - -# Default: Cover url -msgid "cover-url" -msgstr "Poster adresi" - -# Default: Crazy credits -msgid "crazy-credits" -msgstr "" - -# Default: Creator -msgid "creator" -msgstr "Yaratıcı" - -# Default: Current role -msgid "current-role" -msgstr "Åžimdiki rol" - -# Default: Database -msgid "database" -msgstr "Veritabanı" - -# Default: Date -msgid "date" -msgstr "Tarih" - -# Default: Death date -msgid "death-date" -msgstr "Ölüm tarihi" - -# Default: Death notes -msgid "death-notes" -msgstr "Ölüm notları" - -# Default: Demographic -msgid "demographic" -msgstr "Demografi" - -# Default: Description -msgid "description" -msgstr "Tarif" - -# Default: Dialogue intellegibility -msgid "dialogue-intellegibility" -msgstr "" - -# Default: Digital sound -msgid "digital-sound" -msgstr "Dijital ses" - -# Default: Director -msgid "director" -msgstr "Yönetmen" - -# Default: Disc format -msgid "disc-format" -msgstr "Disk formatı" - -# Default: Disc size -msgid "disc-size" -msgstr "Disk boyu" - -# Default: Distributors -msgid "distributors" -msgstr "Dağıtıcılar" - -# Default: Dvd -msgid "dvd" -msgstr "DVD" - -# Default: Dvd features -msgid "dvd-features" -msgstr "DVD özellikleri" - -# Default: Dvd format -msgid "dvd-format" -msgstr "DVD formatı" - -# Default: Dvds -msgid "dvds" -msgstr "DVD'ler" - -# Default: Dynamic range -msgid "dynamic-range" -msgstr "" - -# Default: Edited from -msgid "edited-from" -msgstr "" - -# Default: Edited into -msgid "edited-into" -msgstr "" - -# Default: Editor -msgid "editor" -msgstr "Montajcı" - -# Default: Editorial department -msgid "editorial-department" -msgstr "Montaj departmanı" - -# Default: Episode -msgid "episode" -msgstr "Bölüm" - -# Default: Episode of -msgid "episode-of" -msgstr "Dizi" - -# Default: Episode title -msgid "episode-title" -msgstr "Bölüm baÅŸlığı" - -# Default: Episodes -msgid "episodes" -msgstr "Bölümler" - -# Default: Episodes rating -msgid "episodes-rating" -msgstr "Bölüm puanı" - -# Default: Essays -msgid "essays" -msgstr "Denemeler" - -# Default: External reviews -msgid "external-reviews" -msgstr "Harici eleÅŸtiriler" - -# Default: Faqs -msgid "faqs" -msgstr "SSS" - -# Default: Feature -msgid "feature" -msgstr "" - -# Default: Featured in -msgid "featured-in" -msgstr "" - -# Default: Features -msgid "features" -msgstr "" - -# Default: Film negative format -msgid "film-negative-format" -msgstr "Film negatif formatı" - -# Default: Filming dates -msgid "filming-dates" -msgstr "Çekim tarihleri" - -# Default: Filmography -msgid "filmography" -msgstr "Filmografi" - -# Default: Followed by -msgid "followed-by" -msgstr "PeÅŸinden gelen film" - -# Default: Follows -msgid "follows" -msgstr "PeÅŸinden geldiÄŸi film" - -# Default: For -msgid "for" -msgstr "Film" - -# Default: Frequency response -msgid "frequency-response" -msgstr "" - -# Default: From -msgid "from" -msgstr "" - -# Default: Full article link -msgid "full-article-link" -msgstr "" - -# Default: Full size cover url -msgid "full-size-cover-url" -msgstr "" - -# Default: Full size headshot -msgid "full-size-headshot" -msgstr "" - -# Default: Genres -msgid "genres" -msgstr "Türler" - -# Default: Goofs -msgid "goofs" -msgstr "Hatalar" - -# Default: Gross -msgid "gross" -msgstr "Hasılat" - -# Default: Group genre -msgid "group-genre" -msgstr "" - -# Default: Headshot -msgid "headshot" -msgstr "Resim" - -# Default: Height -msgid "height" -msgstr "Boy" - -# Default: Imdbindex -msgid "imdbindex" -msgstr "" - -# Default: In development -msgid "in-development" -msgstr "" - -# Default: Interview -msgid "interview" -msgstr "SöyleÅŸi" - -# Default: Interviews -msgid "interviews" -msgstr "SöyleÅŸiler" - -# Default: Introduction -msgid "introduction" -msgstr "İlk filmi" - -# Default: Item -msgid "item" -msgstr "" - -# Default: Keywords -msgid "keywords" -msgstr "Anahtar sözcükler" - -# Default: Kind -msgid "kind" -msgstr "Tip" - -# Default: Label -msgid "label" -msgstr "" - -# Default: Laboratory -msgid "laboratory" -msgstr "Laboratuar" - -# Default: Language -msgid "language" -msgstr "Dil" - -# Default: Languages -msgid "languages" -msgstr "Diller" - -# Default: Laserdisc -msgid "laserdisc" -msgstr "Lazer Disk" - -# Default: Laserdisc title -msgid "laserdisc-title" -msgstr "" - -# Default: Length -msgid "length" -msgstr "Süre" - -# Default: Line -msgid "line" -msgstr "Replik" - -# Default: Link -msgid "link" -msgstr "BaÄŸlantı" - -# Default: Link text -msgid "link-text" -msgstr "BaÄŸlantı metni" - -# Default: Literature -msgid "literature" -msgstr "Edebiyat" - -# Default: Locations -msgid "locations" -msgstr "Çekim yerleri" - -# Default: Long imdb canonical name -msgid "long-imdb-canonical-name" -msgstr "" - -# Default: Long imdb canonical title -msgid "long-imdb-canonical-title" -msgstr "" - -# Default: Long imdb episode title -msgid "long-imdb-episode-title" -msgstr "IMDb uzun bölüm baÅŸlığı" - -# Default: Long imdb name -msgid "long-imdb-name" -msgstr "IMDb uzun ismi" - -# Default: Long imdb title -msgid "long-imdb-title" -msgstr "IMDb uzun baÅŸlığı" - -# Default: Magazine cover photo -msgid "magazine-cover-photo" -msgstr "Dergi kapağı resmi" - -# Default: Make up -msgid "make-up" -msgstr "Makyaj" - -# Default: Master format -msgid "master-format" -msgstr "Master format" - -# Default: Median -msgid "median" -msgstr "Orta deÄŸer" - -# Default: Merchandising links -msgid "merchandising-links" -msgstr "" - -# Default: Mini biography -msgid "mini-biography" -msgstr "Mini biyografi" - -# Default: Misc links -msgid "misc-links" -msgstr "" - -# Default: Miscellaneous companies -msgid "miscellaneous-companies" -msgstr "" - -# Default: Miscellaneous crew -msgid "miscellaneous-crew" -msgstr "" - -# Default: Movie -msgid "movie" -msgstr "Film" - -# Default: Mpaa -msgid "mpaa" -msgstr "MPAA" - -# Default: Music department -msgid "music-department" -msgstr "Müzik departmanı" - -# Default: Name -msgid "name" -msgstr "İsim" - -# Default: News -msgid "news" -msgstr "Haberler" - -# Default: Newsgroup reviews -msgid "newsgroup-reviews" -msgstr "Haber grubu eleÅŸtirileri" - -# Default: Nick names -msgid "nick-names" -msgstr "Takma isimler" - -# Default: Notes -msgid "notes" -msgstr "Notlar" - -# Default: Novel -msgid "novel" -msgstr "Roman" - -# Default: Number -msgid "number" -msgstr "Sayı" - -# Default: Number of chapter stops -msgid "number-of-chapter-stops" -msgstr "" - -# Default: Number of episodes -msgid "number-of-episodes" -msgstr "Bölüm sayısı" - -# Default: Number of seasons -msgid "number-of-seasons" -msgstr "Sezon sayısı" - -# Default: Number of sides -msgid "number-of-sides" -msgstr "" - -# Default: Number of votes -msgid "number-of-votes" -msgstr "Oy sayısı" - -# Default: Official retail price -msgid "official-retail-price" -msgstr "Resmi perakende satış fiyatı" - -# Default: Official sites -msgid "official-sites" -msgstr "Resmi siteler" - -# Default: Opening weekend -msgid "opening-weekend" -msgstr "Açılış haftasonu" - -# Default: Original air date -msgid "original-air-date" -msgstr "İlk yayımlanma tarihi" - -# Default: Original music -msgid "original-music" -msgstr "Orijinal müzik" - -# Default: Original title -msgid "original-title" -msgstr "" - -# Default: Other literature -msgid "other-literature" -msgstr "" - -# Default: Other works -msgid "other-works" -msgstr "DiÄŸer çalışmalar" - -# Default: Parents guide -msgid "parents-guide" -msgstr "Ana-baba kılavuzu" - -# Default: Performed by -msgid "performed-by" -msgstr "İcra eden" - -# Default: Person -msgid "person" -msgstr "KiÅŸi" - -# Default: Photo sites -msgid "photo-sites" -msgstr "FotoÄŸraf siteleri" - -# Default: Pictorial -msgid "pictorial" -msgstr "" - -# Default: Picture format -msgid "picture-format" -msgstr "Resim formatı" - -# Default: Plot -msgid "plot" -msgstr "Konu" - -# Default: Plot outline -msgid "plot-outline" -msgstr "Konu kısa özeti" - -# Default: Portrayed in -msgid "portrayed-in" -msgstr "" - -# Default: Pressing plant -msgid "pressing-plant" -msgstr "" - -# Default: Printed film format -msgid "printed-film-format" -msgstr "Basılı film formatı" - -# Default: Printed media reviews -msgid "printed-media-reviews" -msgstr "Basın eleÅŸtirileri" - -# Default: Producer -msgid "producer" -msgstr "Yapımcı" - -# Default: Production companies -msgid "production-companies" -msgstr "Yapım ÅŸirketleri" - -# Default: Production country -msgid "production-country" -msgstr "Yapımcı ülke" - -# Default: Production dates -msgid "production-dates" -msgstr "Yapım tarihleri" - -# Default: Production design -msgid "production-design" -msgstr "Yapım tasarımı" - -# Default: Production designer -msgid "production-designer" -msgstr "Yapım tasarımcısı" - -# Default: Production manager -msgid "production-manager" -msgstr "Yapım yöneticisi" - -# Default: Production process protocol -msgid "production-process-protocol" -msgstr "" - -# Default: Quality of source -msgid "quality-of-source" -msgstr "" - -# Default: Quality program -msgid "quality-program" -msgstr "" - -# Default: Quote -msgid "quote" -msgstr "Alıntı" - -# Default: Quotes -msgid "quotes" -msgstr "Alıntılar" - -# Default: Rating -msgid "rating" -msgstr "Puan" - -# Default: Recommendations -msgid "recommendations" -msgstr "Tavsiyeler" - -# Default: Referenced in -msgid "referenced-in" -msgstr "Gönderme yapılan filmler" - -# Default: References -msgid "references" -msgstr "Gönderme yaptığı filmler" - -# Default: Region -msgid "region" -msgstr "Bölge" - -# Default: Release country -msgid "release-country" -msgstr "" - -# Default: Release date -msgid "release-date" -msgstr "" - -# Default: Release dates -msgid "release-dates" -msgstr "" - -# Default: Remade as -msgid "remade-as" -msgstr "Yeniden çekiliÅŸi" - -# Default: Remake of -msgid "remake-of" -msgstr "Yeniden çekimi olduÄŸu film" - -# Default: Rentals -msgid "rentals" -msgstr "Kiralamalar" - -# Default: Result -msgid "result" -msgstr "Sonuç" - -# Default: Review -msgid "review" -msgstr "EleÅŸtiri" - -# Default: Review author -msgid "review-author" -msgstr "EleÅŸtiri yazarı" - -# Default: Review kind -msgid "review-kind" -msgstr "EleÅŸtiri tipi" - -# Default: Runtime -msgid "runtime" -msgstr "Süre" - -# Default: Runtimes -msgid "runtimes" -msgstr "Süreler" - -# Default: Salary history -msgid "salary-history" -msgstr "Üçret tarihçesi" - -# Default: Screenplay teleplay -msgid "screenplay-teleplay" -msgstr "Senaryo" - -# Default: Season -msgid "season" -msgstr "Sezon" - -# Default: Second unit director or assistant director -msgid "second-unit-director-or-assistant-director" -msgstr "İkinci birim yönetmeni ya da yardımcı yönetmen" - -# Default: Self -msgid "self" -msgstr "Kendisi" - -# Default: Series animation department -msgid "series-animation-department" -msgstr "Dizinin animasyon departmanı" - -# Default: Series art department -msgid "series-art-department" -msgstr "Dizinin sanat departmanı" - -# Default: Series assistant directors -msgid "series-assistant-directors" -msgstr "Dizinin yardımcı yönetmenleri" - -# Default: Series camera department -msgid "series-camera-department" -msgstr "Dizinin kamera departmanı" - -# Default: Series casting department -msgid "series-casting-department" -msgstr "Dizinin oyuncu seçimi departmanı" - -# Default: Series cinematographers -msgid "series-cinematographers" -msgstr "Dizinin kameramanları" - -# Default: Series costume department -msgid "series-costume-department" -msgstr "Dizinin kostüm departmanı" - -# Default: Series editorial department -msgid "series-editorial-department" -msgstr "Dizinin montaj departmanı" - -# Default: Series editors -msgid "series-editors" -msgstr "Dizinin montajcıları" - -# Default: Series make up department -msgid "series-make-up-department" -msgstr "Dizinin makyaj departmanı" - -# Default: Series miscellaneous -msgid "series-miscellaneous" -msgstr "" - -# Default: Series music department -msgid "series-music-department" -msgstr "Dizinin müzik departmanı" - -# Default: Series producers -msgid "series-producers" -msgstr "Dizinin yapımcıları" - -# Default: Series production designers -msgid "series-production-designers" -msgstr "Dizinin yapım tasarımcıları" - -# Default: Series production managers -msgid "series-production-managers" -msgstr "Dizinin yapım yöneticileri" - -# Default: Series sound department -msgid "series-sound-department" -msgstr "Dizinin ses departmanı" - -# Default: Series special effects department -msgid "series-special-effects-department" -msgstr "Dizinin özel efekt departmanı" - -# Default: Series stunts -msgid "series-stunts" -msgstr "Dizinin dublörleri" - -# Default: Series title -msgid "series-title" -msgstr "Dizinin baÅŸlığı" - -# Default: Series transportation department -msgid "series-transportation-department" -msgstr "Dizinin ulaşım departmanı" - -# Default: Series visual effects department -msgid "series-visual-effects-department" -msgstr "Dizinin görsel efekt departmanı" - -# Default: Series writers -msgid "series-writers" -msgstr "Dizinin yazarları" - -# Default: Series years -msgid "series-years" -msgstr "Dizinin yılları" - -# Default: Set decoration -msgid "set-decoration" -msgstr "Set dekorasyonu" - -# Default: Sharpness -msgid "sharpness" -msgstr "Keskinlik" - -# Default: Similar to -msgid "similar-to" -msgstr "Benzer" - -# Default: Smart canonical episode title -msgid "smart-canonical-episode-title" -msgstr "" - -# Default: Smart canonical series title -msgid "smart-canonical-series-title" -msgstr "" - -# Default: Smart canonical title -msgid "smart-canonical-title" -msgstr "" - -# Default: Smart long imdb canonical title -msgid "smart-long-imdb-canonical-title" -msgstr "" - -# Default: Sound clips -msgid "sound-clips" -msgstr "Ses klipleri" - -# Default: Sound crew -msgid "sound-crew" -msgstr "Ses ekibi" - -# Default: Sound encoding -msgid "sound-encoding" -msgstr "Ses kodlaması" - -# Default: Sound mix -msgid "sound-mix" -msgstr "" - -# Default: Soundtrack -msgid "soundtrack" -msgstr "Film müzikleri" - -# Default: Spaciality -msgid "spaciality" -msgstr "" - -# Default: Special effects -msgid "special-effects" -msgstr "Özel efektler" - -# Default: Special effects companies -msgid "special-effects-companies" -msgstr "Özel efekt ÅŸirketleri" - -# Default: Special effects department -msgid "special-effects-department" -msgstr "Özel efekt departmanı" - -# Default: Spin off -msgid "spin-off" -msgstr "" - -# Default: Spin off from -msgid "spin-off-from" -msgstr "" - -# Default: Spoofed in -msgid "spoofed-in" -msgstr "Dalga geçildiÄŸi filmler" - -# Default: Spoofs -msgid "spoofs" -msgstr "Dalga geçtiÄŸi filmler" - -# Default: Spouse -msgid "spouse" -msgstr "EÅŸi" - -# Default: Status of availablility -msgid "status-of-availablility" -msgstr "" - -# Default: Studio -msgid "studio" -msgstr "Stüdyo" - -# Default: Studios -msgid "studios" -msgstr "Stüdyolar" - -# Default: Stunt performer -msgid "stunt-performer" -msgstr "" - -# Default: Stunts -msgid "stunts" -msgstr "Dublörler" - -# Default: Subtitles -msgid "subtitles" -msgstr "Altyazılar" - -# Default: Supplement -msgid "supplement" -msgstr "" - -# Default: Supplements -msgid "supplements" -msgstr "" - -# Default: Synopsis -msgid "synopsis" -msgstr "Sinopsis" - -# Default: Taglines -msgid "taglines" -msgstr "Spotlar" - -# Default: Tech info -msgid "tech-info" -msgstr "Teknik bilgi" - -# Default: Thanks -msgid "thanks" -msgstr "TeÅŸekkürler" - -# Default: Time -msgid "time" -msgstr "Zaman" - -# Default: Title -msgid "title" -msgstr "BaÅŸlık" - -# Default: Titles in this product -msgid "titles-in-this-product" -msgstr "Bu üründeki baÅŸlıklar" - -# Default: To -msgid "to" -msgstr "Alan" - -# Default: Top 250 rank -msgid "top-250-rank" -msgstr "En iyi 250 içindeki sırası" - -# Default: Trade mark -msgid "trade-mark" -msgstr "Kendine has özelliÄŸi" - -# Default: Transportation department -msgid "transportation-department" -msgstr "Ulaşım departmanı" - -# Default: Trivia -msgid "trivia" -msgstr "İlginç notlar" - -# Default: Tv -msgid "tv" -msgstr "" - -# Default: Under license from -msgid "under-license-from" -msgstr "" - -# Default: Unknown link -msgid "unknown-link" -msgstr "" - -# Default: Upc -msgid "upc" -msgstr "" - -# Default: Version of -msgid "version-of" -msgstr "" - -# Default: Vhs -msgid "vhs" -msgstr "VHS" - -# Default: Video -msgid "video" -msgstr "" - -# Default: Video artifacts -msgid "video-artifacts" -msgstr "" - -# Default: Video clips -msgid "video-clips" -msgstr "Video klipleri" - -# Default: Video noise -msgid "video-noise" -msgstr "Video gürültüsü" - -# Default: Video quality -msgid "video-quality" -msgstr "Video kalitesi" - -# Default: Video standard -msgid "video-standard" -msgstr "Video standardı" - -# Default: Visual effects -msgid "visual-effects" -msgstr "Görsel efektler" - -# Default: Votes -msgid "votes" -msgstr "Oylar" - -# Default: Votes distribution -msgid "votes-distribution" -msgstr "Oyların dağılımı" - -# Default: Weekend gross -msgid "weekend-gross" -msgstr "Haftasonu hasılatı" - -# Default: Where now -msgid "where-now" -msgstr "Åžu anda nerede" - -# Default: With -msgid "with" -msgstr "" - -# Default: Writer -msgid "writer" -msgstr "Yazar" - -# Default: Written by -msgid "written-by" -msgstr "Yazan" - -# Default: Year -msgid "year" -msgstr "Yıl" - -# Default: Zshops -msgid "zshops" -msgstr "ZShops" diff --git a/libs/imdb/locale/imdbpy.pot b/libs/imdb/locale/imdbpy.pot deleted file mode 100644 index 14ac1669..00000000 --- a/libs/imdb/locale/imdbpy.pot +++ /dev/null @@ -1,1301 +0,0 @@ -# Gettext message file for imdbpy -msgid "" -msgstr "" -"Project-Id-Version: imdbpy\n" -"POT-Creation-Date: 2010-03-18 14:35+0000\n" -"PO-Revision-Date: YYYY-MM-DD HH:MM+0000\n" -"Last-Translator: YOUR NAME \n" -"Language-Team: TEAM NAME \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=UTF-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Language-Code: en\n" -"Language-Name: English\n" -"Preferred-Encodings: utf-8\n" -"Domain: imdbpy\n" - -# Default: Actor -msgid "actor" -msgstr "" - -# Default: Actress -msgid "actress" -msgstr "" - -# Default: Adaption -msgid "adaption" -msgstr "" - -# Default: Additional information -msgid "additional-information" -msgstr "" - -# Default: Admissions -msgid "admissions" -msgstr "" - -# Default: Agent address -msgid "agent-address" -msgstr "" - -# Default: Airing -msgid "airing" -msgstr "" - -# Default: Akas -msgid "akas" -msgstr "" - -# Default: Akas from release info -msgid "akas-from-release-info" -msgstr "" - -# Default: All products -msgid "all-products" -msgstr "" - -# Default: Alternate language version of -msgid "alternate-language-version-of" -msgstr "" - -# Default: Alternate versions -msgid "alternate-versions" -msgstr "" - -# Default: Amazon reviews -msgid "amazon-reviews" -msgstr "" - -# Default: Analog left -msgid "analog-left" -msgstr "" - -# Default: Analog right -msgid "analog-right" -msgstr "" - -# Default: Animation department -msgid "animation-department" -msgstr "" - -# Default: Archive footage -msgid "archive-footage" -msgstr "" - -# Default: Arithmetic mean -msgid "arithmetic-mean" -msgstr "" - -# Default: Art department -msgid "art-department" -msgstr "" - -# Default: Art direction -msgid "art-direction" -msgstr "" - -# Default: Art director -msgid "art-director" -msgstr "" - -# Default: Article -msgid "article" -msgstr "" - -# Default: Asin -msgid "asin" -msgstr "" - -# Default: Aspect ratio -msgid "aspect-ratio" -msgstr "" - -# Default: Assigner -msgid "assigner" -msgstr "" - -# Default: Assistant director -msgid "assistant-director" -msgstr "" - -# Default: Auctions -msgid "auctions" -msgstr "" - -# Default: Audio noise -msgid "audio-noise" -msgstr "" - -# Default: Audio quality -msgid "audio-quality" -msgstr "" - -# Default: Award -msgid "award" -msgstr "" - -# Default: Awards -msgid "awards" -msgstr "" - -# Default: Biographical movies -msgid "biographical-movies" -msgstr "" - -# Default: Biography -msgid "biography" -msgstr "" - -# Default: Biography print -msgid "biography-print" -msgstr "" - -# Default: Birth date -msgid "birth-date" -msgstr "" - -# Default: Birth name -msgid "birth-name" -msgstr "" - -# Default: Birth notes -msgid "birth-notes" -msgstr "" - -# Default: Body -msgid "body" -msgstr "" - -# Default: Book -msgid "book" -msgstr "" - -# Default: Books -msgid "books" -msgstr "" - -# Default: Bottom 100 rank -msgid "bottom-100-rank" -msgstr "" - -# Default: Budget -msgid "budget" -msgstr "" - -# Default: Business -msgid "business" -msgstr "" - -# Default: By arrangement with -msgid "by-arrangement-with" -msgstr "" - -# Default: Camera -msgid "camera" -msgstr "" - -# Default: Camera and electrical department -msgid "camera-and-electrical-department" -msgstr "" - -# Default: Canonical episode title -msgid "canonical-episode-title" -msgstr "" - -# Default: Canonical name -msgid "canonical-name" -msgstr "" - -# Default: Canonical series title -msgid "canonical-series-title" -msgstr "" - -# Default: Canonical title -msgid "canonical-title" -msgstr "" - -# Default: Cast -msgid "cast" -msgstr "" - -# Default: Casting department -msgid "casting-department" -msgstr "" - -# Default: Casting director -msgid "casting-director" -msgstr "" - -# Default: Catalog number -msgid "catalog-number" -msgstr "" - -# Default: Category -msgid "category" -msgstr "" - -# Default: Certificate -msgid "certificate" -msgstr "" - -# Default: Certificates -msgid "certificates" -msgstr "" - -# Default: Certification -msgid "certification" -msgstr "" - -# Default: Channel -msgid "channel" -msgstr "" - -# Default: Character -msgid "character" -msgstr "" - -# Default: Cinematographer -msgid "cinematographer" -msgstr "" - -# Default: Cinematographic process -msgid "cinematographic-process" -msgstr "" - -# Default: Close captions teletext ld g -msgid "close-captions-teletext-ld-g" -msgstr "" - -# Default: Color info -msgid "color-info" -msgstr "" - -# Default: Color information -msgid "color-information" -msgstr "" - -# Default: Color rendition -msgid "color-rendition" -msgstr "" - -# Default: Company -msgid "company" -msgstr "" - -# Default: Complete cast -msgid "complete-cast" -msgstr "" - -# Default: Complete crew -msgid "complete-crew" -msgstr "" - -# Default: Composer -msgid "composer" -msgstr "" - -# Default: Connections -msgid "connections" -msgstr "" - -# Default: Contrast -msgid "contrast" -msgstr "" - -# Default: Copyright holder -msgid "copyright-holder" -msgstr "" - -# Default: Costume department -msgid "costume-department" -msgstr "" - -# Default: Costume designer -msgid "costume-designer" -msgstr "" - -# Default: Countries -msgid "countries" -msgstr "" - -# Default: Country -msgid "country" -msgstr "" - -# Default: Courtesy of -msgid "courtesy-of" -msgstr "" - -# Default: Cover -msgid "cover" -msgstr "" - -# Default: Cover url -msgid "cover-url" -msgstr "" - -# Default: Crazy credits -msgid "crazy-credits" -msgstr "" - -# Default: Creator -msgid "creator" -msgstr "" - -# Default: Current role -msgid "current-role" -msgstr "" - -# Default: Database -msgid "database" -msgstr "" - -# Default: Date -msgid "date" -msgstr "" - -# Default: Death date -msgid "death-date" -msgstr "" - -# Default: Death notes -msgid "death-notes" -msgstr "" - -# Default: Demographic -msgid "demographic" -msgstr "" - -# Default: Description -msgid "description" -msgstr "" - -# Default: Dialogue intellegibility -msgid "dialogue-intellegibility" -msgstr "" - -# Default: Digital sound -msgid "digital-sound" -msgstr "" - -# Default: Director -msgid "director" -msgstr "" - -# Default: Disc format -msgid "disc-format" -msgstr "" - -# Default: Disc size -msgid "disc-size" -msgstr "" - -# Default: Distributors -msgid "distributors" -msgstr "" - -# Default: Dvd -msgid "dvd" -msgstr "" - -# Default: Dvd features -msgid "dvd-features" -msgstr "" - -# Default: Dvd format -msgid "dvd-format" -msgstr "" - -# Default: Dvds -msgid "dvds" -msgstr "" - -# Default: Dynamic range -msgid "dynamic-range" -msgstr "" - -# Default: Edited from -msgid "edited-from" -msgstr "" - -# Default: Edited into -msgid "edited-into" -msgstr "" - -# Default: Editor -msgid "editor" -msgstr "" - -# Default: Editorial department -msgid "editorial-department" -msgstr "" - -# Default: Episode -msgid "episode" -msgstr "" - -# Default: Episode of -msgid "episode-of" -msgstr "" - -# Default: Episode title -msgid "episode-title" -msgstr "" - -# Default: Episodes -msgid "episodes" -msgstr "" - -# Default: Episodes rating -msgid "episodes-rating" -msgstr "" - -# Default: Essays -msgid "essays" -msgstr "" - -# Default: External reviews -msgid "external-reviews" -msgstr "" - -# Default: Faqs -msgid "faqs" -msgstr "" - -# Default: Feature -msgid "feature" -msgstr "" - -# Default: Featured in -msgid "featured-in" -msgstr "" - -# Default: Features -msgid "features" -msgstr "" - -# Default: Film negative format -msgid "film-negative-format" -msgstr "" - -# Default: Filming dates -msgid "filming-dates" -msgstr "" - -# Default: Filmography -msgid "filmography" -msgstr "" - -# Default: Followed by -msgid "followed-by" -msgstr "" - -# Default: Follows -msgid "follows" -msgstr "" - -# Default: For -msgid "for" -msgstr "" - -# Default: Frequency response -msgid "frequency-response" -msgstr "" - -# Default: From -msgid "from" -msgstr "" - -# Default: Full article link -msgid "full-article-link" -msgstr "" - -# Default: Full size cover url -msgid "full-size-cover-url" -msgstr "" - -# Default: Full size headshot -msgid "full-size-headshot" -msgstr "" - -# Default: Genres -msgid "genres" -msgstr "" - -# Default: Goofs -msgid "goofs" -msgstr "" - -# Default: Gross -msgid "gross" -msgstr "" - -# Default: Group genre -msgid "group-genre" -msgstr "" - -# Default: Headshot -msgid "headshot" -msgstr "" - -# Default: Height -msgid "height" -msgstr "" - -# Default: Imdbindex -msgid "imdbindex" -msgstr "" - -# Default: In development -msgid "in-development" -msgstr "" - -# Default: Interview -msgid "interview" -msgstr "" - -# Default: Interviews -msgid "interviews" -msgstr "" - -# Default: Introduction -msgid "introduction" -msgstr "" - -# Default: Item -msgid "item" -msgstr "" - -# Default: Keywords -msgid "keywords" -msgstr "" - -# Default: Kind -msgid "kind" -msgstr "" - -# Default: Label -msgid "label" -msgstr "" - -# Default: Laboratory -msgid "laboratory" -msgstr "" - -# Default: Language -msgid "language" -msgstr "" - -# Default: Languages -msgid "languages" -msgstr "" - -# Default: Laserdisc -msgid "laserdisc" -msgstr "" - -# Default: Laserdisc title -msgid "laserdisc-title" -msgstr "" - -# Default: Length -msgid "length" -msgstr "" - -# Default: Line -msgid "line" -msgstr "" - -# Default: Link -msgid "link" -msgstr "" - -# Default: Link text -msgid "link-text" -msgstr "" - -# Default: Literature -msgid "literature" -msgstr "" - -# Default: Locations -msgid "locations" -msgstr "" - -# Default: Long imdb canonical name -msgid "long-imdb-canonical-name" -msgstr "" - -# Default: Long imdb canonical title -msgid "long-imdb-canonical-title" -msgstr "" - -# Default: Long imdb episode title -msgid "long-imdb-episode-title" -msgstr "" - -# Default: Long imdb name -msgid "long-imdb-name" -msgstr "" - -# Default: Long imdb title -msgid "long-imdb-title" -msgstr "" - -# Default: Magazine cover photo -msgid "magazine-cover-photo" -msgstr "" - -# Default: Make up -msgid "make-up" -msgstr "" - -# Default: Master format -msgid "master-format" -msgstr "" - -# Default: Median -msgid "median" -msgstr "" - -# Default: Merchandising links -msgid "merchandising-links" -msgstr "" - -# Default: Mini biography -msgid "mini-biography" -msgstr "" - -# Default: Misc links -msgid "misc-links" -msgstr "" - -# Default: Miscellaneous companies -msgid "miscellaneous-companies" -msgstr "" - -# Default: Miscellaneous crew -msgid "miscellaneous-crew" -msgstr "" - -# Default: Movie -msgid "movie" -msgstr "" - -# Default: Mpaa -msgid "mpaa" -msgstr "" - -# Default: Music department -msgid "music-department" -msgstr "" - -# Default: Name -msgid "name" -msgstr "" - -# Default: News -msgid "news" -msgstr "" - -# Default: Newsgroup reviews -msgid "newsgroup-reviews" -msgstr "" - -# Default: Nick names -msgid "nick-names" -msgstr "" - -# Default: Notes -msgid "notes" -msgstr "" - -# Default: Novel -msgid "novel" -msgstr "" - -# Default: Number -msgid "number" -msgstr "" - -# Default: Number of chapter stops -msgid "number-of-chapter-stops" -msgstr "" - -# Default: Number of episodes -msgid "number-of-episodes" -msgstr "" - -# Default: Number of seasons -msgid "number-of-seasons" -msgstr "" - -# Default: Number of sides -msgid "number-of-sides" -msgstr "" - -# Default: Number of votes -msgid "number-of-votes" -msgstr "" - -# Default: Official retail price -msgid "official-retail-price" -msgstr "" - -# Default: Official sites -msgid "official-sites" -msgstr "" - -# Default: Opening weekend -msgid "opening-weekend" -msgstr "" - -# Default: Original air date -msgid "original-air-date" -msgstr "" - -# Default: Original music -msgid "original-music" -msgstr "" - -# Default: Original title -msgid "original-title" -msgstr "" - -# Default: Other literature -msgid "other-literature" -msgstr "" - -# Default: Other works -msgid "other-works" -msgstr "" - -# Default: Parents guide -msgid "parents-guide" -msgstr "" - -# Default: Performed by -msgid "performed-by" -msgstr "" - -# Default: Person -msgid "person" -msgstr "" - -# Default: Photo sites -msgid "photo-sites" -msgstr "" - -# Default: Pictorial -msgid "pictorial" -msgstr "" - -# Default: Picture format -msgid "picture-format" -msgstr "" - -# Default: Plot -msgid "plot" -msgstr "" - -# Default: Plot outline -msgid "plot-outline" -msgstr "" - -# Default: Portrayed in -msgid "portrayed-in" -msgstr "" - -# Default: Pressing plant -msgid "pressing-plant" -msgstr "" - -# Default: Printed film format -msgid "printed-film-format" -msgstr "" - -# Default: Printed media reviews -msgid "printed-media-reviews" -msgstr "" - -# Default: Producer -msgid "producer" -msgstr "" - -# Default: Production companies -msgid "production-companies" -msgstr "" - -# Default: Production country -msgid "production-country" -msgstr "" - -# Default: Production dates -msgid "production-dates" -msgstr "" - -# Default: Production design -msgid "production-design" -msgstr "" - -# Default: Production designer -msgid "production-designer" -msgstr "" - -# Default: Production manager -msgid "production-manager" -msgstr "" - -# Default: Production process protocol -msgid "production-process-protocol" -msgstr "" - -# Default: Quality of source -msgid "quality-of-source" -msgstr "" - -# Default: Quality program -msgid "quality-program" -msgstr "" - -# Default: Quote -msgid "quote" -msgstr "" - -# Default: Quotes -msgid "quotes" -msgstr "" - -# Default: Rating -msgid "rating" -msgstr "" - -# Default: Recommendations -msgid "recommendations" -msgstr "" - -# Default: Referenced in -msgid "referenced-in" -msgstr "" - -# Default: References -msgid "references" -msgstr "" - -# Default: Region -msgid "region" -msgstr "" - -# Default: Release country -msgid "release-country" -msgstr "" - -# Default: Release date -msgid "release-date" -msgstr "" - -# Default: Release dates -msgid "release-dates" -msgstr "" - -# Default: Remade as -msgid "remade-as" -msgstr "" - -# Default: Remake of -msgid "remake-of" -msgstr "" - -# Default: Rentals -msgid "rentals" -msgstr "" - -# Default: Result -msgid "result" -msgstr "" - -# Default: Review -msgid "review" -msgstr "" - -# Default: Review author -msgid "review-author" -msgstr "" - -# Default: Review kind -msgid "review-kind" -msgstr "" - -# Default: Runtime -msgid "runtime" -msgstr "" - -# Default: Runtimes -msgid "runtimes" -msgstr "" - -# Default: Salary history -msgid "salary-history" -msgstr "" - -# Default: Screenplay teleplay -msgid "screenplay-teleplay" -msgstr "" - -# Default: Season -msgid "season" -msgstr "" - -# Default: Second unit director or assistant director -msgid "second-unit-director-or-assistant-director" -msgstr "" - -# Default: Self -msgid "self" -msgstr "" - -# Default: Series animation department -msgid "series-animation-department" -msgstr "" - -# Default: Series art department -msgid "series-art-department" -msgstr "" - -# Default: Series assistant directors -msgid "series-assistant-directors" -msgstr "" - -# Default: Series camera department -msgid "series-camera-department" -msgstr "" - -# Default: Series casting department -msgid "series-casting-department" -msgstr "" - -# Default: Series cinematographers -msgid "series-cinematographers" -msgstr "" - -# Default: Series costume department -msgid "series-costume-department" -msgstr "" - -# Default: Series editorial department -msgid "series-editorial-department" -msgstr "" - -# Default: Series editors -msgid "series-editors" -msgstr "" - -# Default: Series make up department -msgid "series-make-up-department" -msgstr "" - -# Default: Series miscellaneous -msgid "series-miscellaneous" -msgstr "" - -# Default: Series music department -msgid "series-music-department" -msgstr "" - -# Default: Series producers -msgid "series-producers" -msgstr "" - -# Default: Series production designers -msgid "series-production-designers" -msgstr "" - -# Default: Series production managers -msgid "series-production-managers" -msgstr "" - -# Default: Series sound department -msgid "series-sound-department" -msgstr "" - -# Default: Series special effects department -msgid "series-special-effects-department" -msgstr "" - -# Default: Series stunts -msgid "series-stunts" -msgstr "" - -# Default: Series title -msgid "series-title" -msgstr "" - -# Default: Series transportation department -msgid "series-transportation-department" -msgstr "" - -# Default: Series visual effects department -msgid "series-visual-effects-department" -msgstr "" - -# Default: Series writers -msgid "series-writers" -msgstr "" - -# Default: Series years -msgid "series-years" -msgstr "" - -# Default: Set decoration -msgid "set-decoration" -msgstr "" - -# Default: Sharpness -msgid "sharpness" -msgstr "" - -# Default: Similar to -msgid "similar-to" -msgstr "" - -# Default: Smart canonical episode title -msgid "smart-canonical-episode-title" -msgstr "" - -# Default: Smart canonical series title -msgid "smart-canonical-series-title" -msgstr "" - -# Default: Smart canonical title -msgid "smart-canonical-title" -msgstr "" - -# Default: Smart long imdb canonical title -msgid "smart-long-imdb-canonical-title" -msgstr "" - -# Default: Sound clips -msgid "sound-clips" -msgstr "" - -# Default: Sound crew -msgid "sound-crew" -msgstr "" - -# Default: Sound encoding -msgid "sound-encoding" -msgstr "" - -# Default: Sound mix -msgid "sound-mix" -msgstr "" - -# Default: Soundtrack -msgid "soundtrack" -msgstr "" - -# Default: Spaciality -msgid "spaciality" -msgstr "" - -# Default: Special effects -msgid "special-effects" -msgstr "" - -# Default: Special effects companies -msgid "special-effects-companies" -msgstr "" - -# Default: Special effects department -msgid "special-effects-department" -msgstr "" - -# Default: Spin off -msgid "spin-off" -msgstr "" - -# Default: Spin off from -msgid "spin-off-from" -msgstr "" - -# Default: Spoofed in -msgid "spoofed-in" -msgstr "" - -# Default: Spoofs -msgid "spoofs" -msgstr "" - -# Default: Spouse -msgid "spouse" -msgstr "" - -# Default: Status of availablility -msgid "status-of-availablility" -msgstr "" - -# Default: Studio -msgid "studio" -msgstr "" - -# Default: Studios -msgid "studios" -msgstr "" - -# Default: Stunt performer -msgid "stunt-performer" -msgstr "" - -# Default: Stunts -msgid "stunts" -msgstr "" - -# Default: Subtitles -msgid "subtitles" -msgstr "" - -# Default: Supplement -msgid "supplement" -msgstr "" - -# Default: Supplements -msgid "supplements" -msgstr "" - -# Default: Synopsis -msgid "synopsis" -msgstr "" - -# Default: Taglines -msgid "taglines" -msgstr "" - -# Default: Tech info -msgid "tech-info" -msgstr "" - -# Default: Thanks -msgid "thanks" -msgstr "" - -# Default: Time -msgid "time" -msgstr "" - -# Default: Title -msgid "title" -msgstr "" - -# Default: Titles in this product -msgid "titles-in-this-product" -msgstr "" - -# Default: To -msgid "to" -msgstr "" - -# Default: Top 250 rank -msgid "top-250-rank" -msgstr "" - -# Default: Trade mark -msgid "trade-mark" -msgstr "" - -# Default: Transportation department -msgid "transportation-department" -msgstr "" - -# Default: Trivia -msgid "trivia" -msgstr "" - -# Default: Tv -msgid "tv" -msgstr "" - -# Default: Under license from -msgid "under-license-from" -msgstr "" - -# Default: Unknown link -msgid "unknown-link" -msgstr "" - -# Default: Upc -msgid "upc" -msgstr "" - -# Default: Version of -msgid "version-of" -msgstr "" - -# Default: Vhs -msgid "vhs" -msgstr "" - -# Default: Video -msgid "video" -msgstr "" - -# Default: Video artifacts -msgid "video-artifacts" -msgstr "" - -# Default: Video clips -msgid "video-clips" -msgstr "" - -# Default: Video noise -msgid "video-noise" -msgstr "" - -# Default: Video quality -msgid "video-quality" -msgstr "" - -# Default: Video standard -msgid "video-standard" -msgstr "" - -# Default: Visual effects -msgid "visual-effects" -msgstr "" - -# Default: Votes -msgid "votes" -msgstr "" - -# Default: Votes distribution -msgid "votes-distribution" -msgstr "" - -# Default: Weekend gross -msgid "weekend-gross" -msgstr "" - -# Default: Where now -msgid "where-now" -msgstr "" - -# Default: With -msgid "with" -msgstr "" - -# Default: Writer -msgid "writer" -msgstr "" - -# Default: Written by -msgid "written-by" -msgstr "" - -# Default: Year -msgid "year" -msgstr "" - -# Default: Zshops -msgid "zshops" -msgstr "" - diff --git a/libs/imdb/locale/msgfmt.py b/libs/imdb/locale/msgfmt.py deleted file mode 100644 index 9e0ab747..00000000 --- a/libs/imdb/locale/msgfmt.py +++ /dev/null @@ -1,204 +0,0 @@ -#!/usr/bin/env python -# -*- coding: iso-8859-1 -*- -"""Generate binary message catalog from textual translation description. - -This program converts a textual Uniforum-style message catalog (.po file) into -a binary GNU catalog (.mo file). This is essentially the same function as the -GNU msgfmt program, however, it is a simpler implementation. - -Usage: msgfmt.py [OPTIONS] filename.po - -Options: - -o file - --output-file=file - Specify the output file to write to. If omitted, output will go to a - file named filename.mo (based off the input file name). - - -h - --help - Print this message and exit. - - -V - --version - Display version information and exit. - -Written by Martin v. Löwis , -refactored / fixed by Thomas Waldmann . -""" - -import sys, os -import getopt, struct, array - -__version__ = "1.3" - -class SyntaxErrorException(Exception): - """raised when having trouble parsing the po file content""" - pass - -class MsgFmt(object): - """transform .po -> .mo format""" - def __init__(self): - self.messages = {} - - def make_filenames(self, filename, outfile=None): - """Compute .mo name from .po name or language""" - if filename.endswith('.po'): - infile = filename - else: - infile = filename + '.po' - if outfile is None: - outfile = os.path.splitext(infile)[0] + '.mo' - return infile, outfile - - def add(self, id, str, fuzzy): - """Add a non-fuzzy translation to the dictionary.""" - if not fuzzy and str: - self.messages[id] = str - - def read_po(self, lines): - ID = 1 - STR = 2 - section = None - fuzzy = False - line_no = 0 - msgid = msgstr = '' - # Parse the catalog - for line in lines: - line_no += 1 - # If we get a comment line after a msgstr, this is a new entry - if line.startswith('#') and section == STR: - self.add(msgid, msgstr, fuzzy) - section = None - fuzzy = False - # Record a fuzzy mark - if line.startswith('#,') and 'fuzzy' in line: - fuzzy = True - # Skip comments - if line.startswith('#'): - continue - # Now we are in a msgid section, output previous section - if line.startswith('msgid'): - if section == STR: - self.add(msgid, msgstr, fuzzy) - fuzzy = False - section = ID - line = line[5:] - msgid = msgstr = '' - # Now we are in a msgstr section - elif line.startswith('msgstr'): - section = STR - line = line[6:] - # Skip empty lines - line = line.strip() - if not line: - continue - # XXX: Does this always follow Python escape semantics? - line = eval(line) - if section == ID: - msgid += line - elif section == STR: - msgstr += line - else: - raise SyntaxErrorException('Syntax error on line %d, before:\n%s' % (line_no, line)) - # Add last entry - if section == STR: - self.add(msgid, msgstr, fuzzy) - - def generate_mo(self): - """Return the generated output.""" - keys = self.messages.keys() - # the keys are sorted in the .mo file - keys.sort() - offsets = [] - ids = '' - strs = '' - for id in keys: - # For each string, we need size and file offset. Each string is NUL - # terminated; the NUL does not count into the size. - offsets.append((len(ids), len(id), len(strs), len(self.messages[id]))) - ids += id + '\0' - strs += self.messages[id] + '\0' - output = [] - # The header is 7 32-bit unsigned integers. We don't use hash tables, so - # the keys start right after the index tables. - # translated string. - keystart = 7*4 + 16*len(keys) - # and the values start after the keys - valuestart = keystart + len(ids) - koffsets = [] - voffsets = [] - # The string table first has the list of keys, then the list of values. - # Each entry has first the size of the string, then the file offset. - for o1, l1, o2, l2 in offsets: - koffsets += [l1, o1 + keystart] - voffsets += [l2, o2 + valuestart] - offsets = koffsets + voffsets - output.append(struct.pack("Iiiiiii", - 0x950412deL, # Magic - 0, # Version - len(keys), # # of entries - 7*4, # start of key index - 7*4 + len(keys)*8, # start of value index - 0, 0)) # size and offset of hash table - output.append(array.array("i", offsets).tostring()) - output.append(ids) - output.append(strs) - return ''.join(output) - - -def make(filename, outfile): - mf = MsgFmt() - infile, outfile = mf.make_filenames(filename, outfile) - try: - lines = file(infile).readlines() - except IOError, msg: - print >> sys.stderr, msg - sys.exit(1) - try: - mf.read_po(lines) - output = mf.generate_mo() - except SyntaxErrorException, msg: - print >> sys.stderr, msg - - try: - open(outfile, "wb").write(output) - except IOError, msg: - print >> sys.stderr, msg - - -def usage(code, msg=''): - print >> sys.stderr, __doc__ - if msg: - print >> sys.stderr, msg - sys.exit(code) - - -def main(): - try: - opts, args = getopt.getopt(sys.argv[1:], 'hVo:', ['help', 'version', 'output-file=']) - except getopt.error, msg: - usage(1, msg) - - outfile = None - # parse options - for opt, arg in opts: - if opt in ('-h', '--help'): - usage(0) - elif opt in ('-V', '--version'): - print >> sys.stderr, "msgfmt.py", __version__ - sys.exit(0) - elif opt in ('-o', '--output-file'): - outfile = arg - # do it - if not args: - print >> sys.stderr, 'No input file given' - print >> sys.stderr, "Try `msgfmt --help' for more information." - return - - for filename in args: - make(filename, outfile) - - -if __name__ == '__main__': - main() - diff --git a/libs/imdb/locale/rebuildmo.py b/libs/imdb/locale/rebuildmo.py deleted file mode 100644 index b72a74c3..00000000 --- a/libs/imdb/locale/rebuildmo.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python -""" -rebuildmo.py script. - -This script builds the .mo files, from the .po files. - -Copyright 2009 H. Turgut Uyar - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -import glob -import msgfmt -import os - -#LOCALE_DIR = os.path.dirname(__file__) - -def rebuildmo(): - lang_glob = 'imdbpy-*.po' - created = [] - for input_file in glob.glob(lang_glob): - lang = input_file[7:-3] - if not os.path.exists(lang): - os.mkdir(lang) - mo_dir = os.path.join(lang, 'LC_MESSAGES') - if not os.path.exists(mo_dir): - os.mkdir(mo_dir) - output_file = os.path.join(mo_dir, 'imdbpy.mo') - msgfmt.make(input_file, output_file) - created.append(lang) - return created - - -if __name__ == '__main__': - languages = rebuildmo() - print 'Created locale for: %s.' % ' '.join(languages) - diff --git a/libs/imdb/parser/__init__.py b/libs/imdb/parser/__init__.py deleted file mode 100644 index 4c3c90a8..00000000 --- a/libs/imdb/parser/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -""" -parser package (imdb package). - -This package provides various parsers to access IMDb data (e.g.: a -parser for the web/http interface, a parser for the SQL database -interface, etc.). -So far, the http/httpThin, mobile and sql parsers are implemented. - -Copyright 2004-2009 Davide Alberani - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -__all__ = ['http', 'mobile', 'sql'] - - diff --git a/libs/imdb/parser/http/__init__.py b/libs/imdb/parser/http/__init__.py deleted file mode 100644 index e05d9afa..00000000 --- a/libs/imdb/parser/http/__init__.py +++ /dev/null @@ -1,775 +0,0 @@ -""" -parser.http package (imdb package). - -This package provides the IMDbHTTPAccessSystem class used to access -IMDb's data through the web interface. -the imdb.IMDb function will return an instance of this class when -called with the 'accessSystem' argument set to "http" or "web" -or "html" (this is the default). - -Copyright 2004-2010 Davide Alberani - 2008 H. Turgut Uyar - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -import sys -import logging -from urllib import FancyURLopener, quote_plus -from codecs import lookup - -from imdb import IMDbBase, imdbURL_movie_main, imdbURL_person_main, \ - imdbURL_character_main, imdbURL_company_main, \ - imdbURL_keyword_main, imdbURL_find, imdbURL_top250, \ - imdbURL_bottom100 -from imdb.utils import analyze_title -from imdb._exceptions import IMDbDataAccessError, IMDbParserError - -import searchMovieParser -import searchPersonParser -import searchCharacterParser -import searchCompanyParser -import searchKeywordParser -import movieParser -import personParser -import characterParser -import companyParser -import topBottomParser - -# Logger for miscellaneous functions. -_aux_logger = logging.getLogger('imdbpy.parser.http.aux') - -IN_GAE = False -try: - import google.appengine - IN_GAE = True - _aux_logger.info('IMDbPY is running in the Google App Engine environment') -except ImportError: - pass - - -class _ModuleProxy: - """A proxy to instantiate and access parsers.""" - def __init__(self, module, defaultKeys=None, oldParsers=False, - useModule=None, fallBackToNew=False): - """Initialize a proxy for the given module; defaultKeys, if set, - muste be a dictionary of values to set for instanced objects.""" - if oldParsers or fallBackToNew: - _aux_logger.warn('The old set of parsers was removed; falling ' \ - 'back to the new parsers.') - self.useModule = useModule - if defaultKeys is None: - defaultKeys = {} - self._defaultKeys = defaultKeys - self._module = module - - def __getattr__(self, name): - """Called only when no look-up is found.""" - _sm = self._module - # Read the _OBJECTS dictionary to build the asked parser. - if name in _sm._OBJECTS: - _entry = _sm._OBJECTS[name] - # Initialize the parser. - kwds = {} - if self.useModule: - kwds = {'useModule': self.useModule} - parserClass = _entry[0][0] - obj = parserClass(**kwds) - attrsToSet = self._defaultKeys.copy() - attrsToSet.update(_entry[1] or {}) - # Set attribute to the object. - for key in attrsToSet: - setattr(obj, key, attrsToSet[key]) - setattr(self, name, obj) - return obj - return getattr(_sm, name) - - -PY_VERSION = sys.version_info[:2] - - -# The cookies for the "adult" search. -# Please don't mess with these account. -# Old 'IMDbPY' account. -_old_cookie_id = 'boM2bYxz9MCsOnH9gZ0S9QHs12NWrNdApxsls1Vb5/NGrNdjcHx3dUas10UASoAjVEvhAbGagERgOpNkAPvxdbfKwaV2ikEj9SzXY1WPxABmDKQwdqzwRbM+12NSeJFGUEx3F8as10WwidLzVshDtxaPIbP13NdjVS9UZTYqgTVGrNcT9vyXU1' -_old_cookie_uu = '3M3AXsquTU5Gur/Svik+ewflPm5Rk2ieY3BIPlLjyK3C0Dp9F8UoPgbTyKiGtZp4x1X+uAUGKD7BM2g+dVd8eqEzDErCoYvdcvGLvVLAen1y08hNQtALjVKAe+1hM8g9QbNonlG1/t4S82ieUsBbrSIQbq1yhV6tZ6ArvSbA7rgHc8n5AdReyAmDaJ5Wm/ee3VDoCnGj/LlBs2ieUZNorhHDKK5Q==' -# New 'IMDbPYweb' account. -_cookie_id = 'rH1jNAkjTlNXvHolvBVBsgaPICNZbNdjVjzFwzas9JRmusdjVoqBs/Hs12NR+1WFxEoR9bGKEDUg6sNlADqXwkas12N131Rwdb+UQNGKN8PWrNdjcdqBQVLq8mbGDHP3hqzxhbD692NQi9D0JjpBtRaPIbP1zNdjUOqENQYv1ADWrNcT9vyXU1' -_cookie_uu = 'su4/m8cho4c6HP+W1qgq6wchOmhnF0w+lIWvHjRUPJ6nRA9sccEafjGADJ6hQGrMd4GKqLcz2X4z5+w+M4OIKnRn7FpENH7dxDQu3bQEHyx0ZEyeRFTPHfQEX03XF+yeN1dsPpcXaqjUZAw+lGRfXRQEfz3RIX9IgVEffdBAHw2wQXyf9xdMPrQELw0QNB8dsffsqcdQemjPB0w+moLcPh0JrKrHJ9hjBzdMPpcXTH7XRwwOk=' - -# imdbpy2010 account. -#_cookie_id = 'QrCdxVi+L+WgqOLrQJJgBgRRXGInphxiBPU/YXSFDyExMFzCp6YcYgSVXyEUhS/xMID8wqemHGID4DlntwZ49vemP5UXsAxiJ4D6goSmHGIgNT9hMXBaRSF2vMS3phxB0bVfQiQlP1RxdrzhB6YcRHFASyIhQVowwXCKtDSlD2YhgRvxBsCKtGemHBKH9mxSI=' -#_cookie_uu = 'oiEo2yoJFCA2Zbn/o7Z1LAPIwotAu6QdALv3foDb1x5F/tdrFY63XkSfty4kntS8Y8jkHSDLt3406+d+JThEilPI0mtTaOQdA/t2/iErp22jaLdeVU5ya4PIREpj7HFdpzhEHadcIAngSER50IoHDpD6Bz4Qy3b+UIhE/hBbhz5Q63ceA2hEvhPo5B0FnrL9Q8jkWjDIbA0Au3d+AOtnXoCIRL4Q28c+UOtnXpP4RL4T6OQdA+6ijUCI5B0AW2d+UOtnXpPYRL4T6OQdA8jkTUOYlC0A==' - - -class _FakeURLOpener(object): - """Fake URLOpener object, used to return empty strings instead of - errors. - """ - def __init__(self, url, headers): - self.url = url - self.headers = headers - def read(self, *args, **kwds): return '' - def close(self, *args, **kwds): pass - def info(self, *args, **kwds): return self.headers - - -class IMDbURLopener(FancyURLopener): - """Fetch web pages and handle errors.""" - _logger = logging.getLogger('imdbpy.parser.http.urlopener') - - def __init__(self, *args, **kwargs): - self._last_url = u'' - FancyURLopener.__init__(self, *args, **kwargs) - # Headers to add to every request. - # XXX: IMDb's web server doesn't like urllib-based programs, - # so lets fake to be Mozilla. - # Wow! I'm shocked by my total lack of ethic! - for header in ('User-Agent', 'User-agent', 'user-agent'): - self.del_header(header) - self.set_header('User-Agent', 'Mozilla/5.0') - # XXX: This class is used also to perform "Exact Primary - # [Title|Name]" searches, and so by default the cookie is set. - c_header = 'id=%s; uu=%s' % (_cookie_id, _cookie_uu) - self.set_header('Cookie', c_header) - - def get_proxy(self): - """Return the used proxy, or an empty string.""" - return self.proxies.get('http', '') - - def set_proxy(self, proxy): - """Set the proxy.""" - if not proxy: - if self.proxies.has_key('http'): - del self.proxies['http'] - else: - if not proxy.lower().startswith('http://'): - proxy = 'http://%s' % proxy - self.proxies['http'] = proxy - - def set_header(self, header, value, _overwrite=True): - """Set a default header.""" - if _overwrite: - self.del_header(header) - self.addheaders.append((header, value)) - - def del_header(self, header): - """Remove a default header.""" - for index in xrange(len(self.addheaders)): - if self.addheaders[index][0] == header: - del self.addheaders[index] - break - - def retrieve_unicode(self, url, size=-1): - """Retrieves the given URL, and returns a unicode string, - trying to guess the encoding of the data (assuming latin_1 - by default)""" - encode = None - try: - if size != -1: - self.set_header('Range', 'bytes=0-%d' % size) - uopener = self.open(url) - kwds = {} - if PY_VERSION > (2, 3) and not IN_GAE: - kwds['size'] = size - content = uopener.read(**kwds) - self._last_url = uopener.url - # Maybe the server is so nice to tell us the charset... - server_encode = uopener.info().getparam('charset') - # Otherwise, look at the content-type HTML meta tag. - if server_encode is None and content: - first_bytes = content[:512] - begin_h = first_bytes.find('text/html; charset=') - if begin_h != -1: - end_h = first_bytes[19+begin_h:].find('"') - if end_h != -1: - server_encode = first_bytes[19+begin_h:19+begin_h+end_h] - if server_encode: - try: - if lookup(server_encode): - encode = server_encode - except (LookupError, ValueError, TypeError): - pass - uopener.close() - if size != -1: - self.del_header('Range') - self.close() - except IOError, e: - if size != -1: - # Ensure that the Range header is removed. - self.del_header('Range') - raise IMDbDataAccessError, {'errcode': e.errno, - 'errmsg': str(e.strerror), - 'url': url, - 'proxy': self.get_proxy(), - 'exception type': 'IOError', - 'original exception': e} - if encode is None: - encode = 'latin_1' - # The detection of the encoding is error prone... - self._logger.warn('Unable to detect the encoding of the retrieved ' - 'page [%s]; falling back to default latin1.', encode) - ##print unicode(content, encode, 'replace').encode('utf8') - return unicode(content, encode, 'replace') - - def http_error_default(self, url, fp, errcode, errmsg, headers): - if errcode == 404: - self._logger.warn('404 code returned for %s: %s (headers: %s)', - url, errmsg, headers) - return _FakeURLOpener(url, headers) - raise IMDbDataAccessError, {'url': 'http:%s' % url, - 'errcode': errcode, - 'errmsg': errmsg, - 'headers': headers, - 'error type': 'http_error_default', - 'proxy': self.get_proxy()} - - def open_unknown(self, fullurl, data=None): - raise IMDbDataAccessError, {'fullurl': fullurl, - 'data': str(data), - 'error type': 'open_unknown', - 'proxy': self.get_proxy()} - - def open_unknown_proxy(self, proxy, fullurl, data=None): - raise IMDbDataAccessError, {'proxy': str(proxy), - 'fullurl': fullurl, - 'error type': 'open_unknown_proxy', - 'data': str(data)} - - -class IMDbHTTPAccessSystem(IMDbBase): - """The class used to access IMDb's data through the web.""" - - accessSystem = 'http' - _http_logger = logging.getLogger('imdbpy.parser.http') - - def __init__(self, isThin=0, adultSearch=1, proxy=-1, oldParsers=False, - fallBackToNew=False, useModule=None, cookie_id=-1, - cookie_uu=None, *arguments, **keywords): - """Initialize the access system.""" - IMDbBase.__init__(self, *arguments, **keywords) - self.urlOpener = IMDbURLopener() - # When isThin is set, we're parsing the "maindetails" page - # of a movie (instead of the "combined" page) and movie/person - # references are not collected if no defaultModFunct is provided. - self.isThin = isThin - self._getRefs = True - self._mdparse = False - if isThin: - if self.accessSystem == 'http': - self.accessSystem = 'httpThin' - self._mdparse = True - if self._defModFunct is None: - self._getRefs = False - from imdb.utils import modNull - self._defModFunct = modNull - self.do_adult_search(adultSearch) - if cookie_id != -1: - if cookie_id is None: - self.del_cookies() - elif cookie_uu is not None: - self.set_cookies(cookie_id, cookie_uu) - if proxy != -1: - self.set_proxy(proxy) - if useModule is not None: - if not isinstance(useModule, (list, tuple)) and ',' in useModule: - useModule = useModule.split(',') - _def = {'_modFunct': self._defModFunct, '_as': self.accessSystem} - # Proxy objects. - self.smProxy = _ModuleProxy(searchMovieParser, defaultKeys=_def, - oldParsers=oldParsers, useModule=useModule, - fallBackToNew=fallBackToNew) - self.spProxy = _ModuleProxy(searchPersonParser, defaultKeys=_def, - oldParsers=oldParsers, useModule=useModule, - fallBackToNew=fallBackToNew) - self.scProxy = _ModuleProxy(searchCharacterParser, defaultKeys=_def, - oldParsers=oldParsers, useModule=useModule, - fallBackToNew=fallBackToNew) - self.scompProxy = _ModuleProxy(searchCompanyParser, defaultKeys=_def, - oldParsers=oldParsers, useModule=useModule, - fallBackToNew=fallBackToNew) - self.skProxy = _ModuleProxy(searchKeywordParser, defaultKeys=_def, - oldParsers=oldParsers, useModule=useModule, - fallBackToNew=fallBackToNew) - self.mProxy = _ModuleProxy(movieParser, defaultKeys=_def, - oldParsers=oldParsers, useModule=useModule, - fallBackToNew=fallBackToNew) - self.pProxy = _ModuleProxy(personParser, defaultKeys=_def, - oldParsers=oldParsers, useModule=useModule, - fallBackToNew=fallBackToNew) - self.cProxy = _ModuleProxy(characterParser, defaultKeys=_def, - oldParsers=oldParsers, useModule=useModule, - fallBackToNew=fallBackToNew) - self.compProxy = _ModuleProxy(companyParser, defaultKeys=_def, - oldParsers=oldParsers, useModule=useModule, - fallBackToNew=fallBackToNew) - self.topBottomProxy = _ModuleProxy(topBottomParser, defaultKeys=_def, - oldParsers=oldParsers, useModule=useModule, - fallBackToNew=fallBackToNew) - - def _normalize_movieID(self, movieID): - """Normalize the given movieID.""" - try: - return '%07d' % int(movieID) - except ValueError, e: - raise IMDbParserError, 'invalid movieID "%s": %s' % (movieID, e) - - def _normalize_personID(self, personID): - """Normalize the given personID.""" - try: - return '%07d' % int(personID) - except ValueError, e: - raise IMDbParserError, 'invalid personID "%s": %s' % (personID, e) - - def _normalize_characterID(self, characterID): - """Normalize the given characterID.""" - try: - return '%07d' % int(characterID) - except ValueError, e: - raise IMDbParserError, 'invalid characterID "%s": %s' % \ - (characterID, e) - - def _normalize_companyID(self, companyID): - """Normalize the given companyID.""" - try: - return '%07d' % int(companyID) - except ValueError, e: - raise IMDbParserError, 'invalid companyID "%s": %s' % \ - (companyID, e) - - def get_imdbMovieID(self, movieID): - """Translate a movieID in an imdbID; in this implementation - the movieID _is_ the imdbID. - """ - return movieID - - def get_imdbPersonID(self, personID): - """Translate a personID in an imdbID; in this implementation - the personID _is_ the imdbID. - """ - return personID - - def get_imdbCharacterID(self, characterID): - """Translate a characterID in an imdbID; in this implementation - the characterID _is_ the imdbID. - """ - return characterID - - def get_imdbCompanyID(self, companyID): - """Translate a companyID in an imdbID; in this implementation - the companyID _is_ the imdbID. - """ - return companyID - - def get_proxy(self): - """Return the used proxy or an empty string.""" - return self.urlOpener.get_proxy() - - def set_proxy(self, proxy): - """Set the web proxy to use. - - It should be a string like 'http://localhost:8080/'; if the - string is empty, no proxy will be used. - If set, the value of the environment variable HTTP_PROXY is - automatically used. - """ - self.urlOpener.set_proxy(proxy) - - def set_cookies(self, cookie_id, cookie_uu): - """Set a cookie to access an IMDb's account.""" - c_header = 'id=%s; uu=%s' % (cookie_id, cookie_uu) - self.urlOpener.set_header('Cookie', c_header) - - def del_cookies(self): - """Remove the used cookie.""" - self.urlOpener.del_header('Cookie') - - def do_adult_search(self, doAdult, - cookie_id=_cookie_id, cookie_uu=_cookie_uu): - """If doAdult is true, 'adult' movies are included in the - search results; cookie_id and cookie_uu are optional - parameters to select a specific account (see your cookie - or cookies.txt file.""" - if doAdult: - self.set_cookies(cookie_id, cookie_uu) - #c_header = 'id=%s; uu=%s' % (cookie_id, cookie_uu) - #self.urlOpener.set_header('Cookie', c_header) - else: - self.urlOpener.del_header('Cookie') - - def _retrieve(self, url, size=-1): - """Retrieve the given URL.""" - ##print url - self._http_logger.debug('fetching url %s (size: %d)', url, size) - return self.urlOpener.retrieve_unicode(url, size=size) - - def _get_search_content(self, kind, ton, results): - """Retrieve the web page for a given search. - kind can be 'tt' (for titles), 'nm' (for names), - 'char' (for characters) or 'co' (for companies). - ton is the title or the name to search. - results is the maximum number of results to be retrieved.""" - if isinstance(ton, unicode): - ton = ton.encode('utf-8') - ##params = 'q=%s&%s=on&mx=%s' % (quote_plus(ton), kind, str(results)) - params = 's=%s;mx=%s;q=%s' % (kind, str(results), quote_plus(ton)) - if kind == 'ep': - params = params.replace('s=ep;', 's=tt;ttype=ep;', 1) - cont = self._retrieve(imdbURL_find % params) - #print 'URL:', imdbURL_find % params - if cont.find('Your search returned more than') == -1 or \ - cont.find("displayed the exact matches") == -1: - return cont - # The retrieved page contains no results, because too many - # titles or names contain the string we're looking for. - params = 's=%s;q=%s;lm=0' % (kind, quote_plus(ton)) - size = 22528 + results * 512 - return self._retrieve(imdbURL_find % params, size=size) - - def _search_movie(self, title, results): - # The URL of the query. - # XXX: To retrieve the complete results list: - # params = urllib.urlencode({'more': 'tt', 'q': title}) - ##params = urllib.urlencode({'tt': 'on','mx': str(results),'q': title}) - ##params = 'q=%s&tt=on&mx=%s' % (quote_plus(title), str(results)) - ##cont = self._retrieve(imdbURL_find % params) - cont = self._get_search_content('tt', title, results) - return self.smProxy.search_movie_parser.parse(cont, results=results)['data'] - - def _search_episode(self, title, results): - t_dict = analyze_title(title) - if t_dict['kind'] == 'episode': - title = t_dict['title'] - cont = self._get_search_content('ep', title, results) - return self.smProxy.search_movie_parser.parse(cont, results=results)['data'] - - def get_movie_main(self, movieID): - if not self.isThin: - cont = self._retrieve(imdbURL_movie_main % movieID + 'combined') - else: - cont = self._retrieve(imdbURL_movie_main % movieID + 'maindetails') - return self.mProxy.movie_parser.parse(cont, mdparse=self._mdparse) - - def get_movie_full_credits(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'fullcredits') - return self.mProxy.movie_parser.parse(cont) - - def get_movie_plot(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'plotsummary') - return self.mProxy.plot_parser.parse(cont, getRefs=self._getRefs) - - def get_movie_awards(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'awards') - return self.mProxy.movie_awards_parser.parse(cont) - - def get_movie_taglines(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'taglines') - return self.mProxy.taglines_parser.parse(cont) - - def get_movie_keywords(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'keywords') - return self.mProxy.keywords_parser.parse(cont) - - def get_movie_alternate_versions(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'alternateversions') - return self.mProxy.alternateversions_parser.parse(cont, - getRefs=self._getRefs) - - def get_movie_crazy_credits(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'crazycredits') - return self.mProxy.crazycredits_parser.parse(cont, - getRefs=self._getRefs) - - def get_movie_goofs(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'goofs') - return self.mProxy.goofs_parser.parse(cont, getRefs=self._getRefs) - - def get_movie_quotes(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'quotes') - return self.mProxy.quotes_parser.parse(cont, getRefs=self._getRefs) - - def get_movie_release_dates(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'releaseinfo') - ret = self.mProxy.releasedates_parser.parse(cont) - ret['info sets'] = ('release dates', 'akas') - return ret - get_movie_akas = get_movie_release_dates - - def get_movie_vote_details(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'ratings') - return self.mProxy.ratings_parser.parse(cont) - - def get_movie_official_sites(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'officialsites') - return self.mProxy.officialsites_parser.parse(cont) - - def get_movie_trivia(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'trivia') - return self.mProxy.trivia_parser.parse(cont, getRefs=self._getRefs) - - def get_movie_connections(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'movieconnections') - return self.mProxy.connections_parser.parse(cont) - - def get_movie_technical(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'technical') - return self.mProxy.tech_parser.parse(cont) - - def get_movie_business(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'business') - return self.mProxy.business_parser.parse(cont, getRefs=self._getRefs) - - def get_movie_literature(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'literature') - return self.mProxy.literature_parser.parse(cont) - - def get_movie_locations(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'locations') - return self.mProxy.locations_parser.parse(cont) - - def get_movie_soundtrack(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'soundtrack') - return self.mProxy.soundtrack_parser.parse(cont) - - def get_movie_dvd(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'dvd') - return self.mProxy.dvd_parser.parse(cont, getRefs=self._getRefs) - - def get_movie_recommendations(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'recommendations') - return self.mProxy.rec_parser.parse(cont) - - def get_movie_external_reviews(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'externalreviews') - return self.mProxy.externalrev_parser.parse(cont) - - def get_movie_newsgroup_reviews(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'newsgroupreviews') - return self.mProxy.newsgrouprev_parser.parse(cont) - - def get_movie_misc_sites(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'miscsites') - return self.mProxy.misclinks_parser.parse(cont) - - def get_movie_sound_clips(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'soundsites') - return self.mProxy.soundclips_parser.parse(cont) - - def get_movie_video_clips(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'videosites') - return self.mProxy.videoclips_parser.parse(cont) - - def get_movie_photo_sites(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'photosites') - return self.mProxy.photosites_parser.parse(cont) - - def get_movie_news(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'news') - return self.mProxy.news_parser.parse(cont, getRefs=self._getRefs) - - def get_movie_amazon_reviews(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'amazon') - return self.mProxy.amazonrev_parser.parse(cont) - - def get_movie_guests(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'epcast') - return self.mProxy.episodes_cast_parser.parse(cont) - get_movie_episodes_cast = get_movie_guests - - def get_movie_merchandising_links(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'sales') - return self.mProxy.sales_parser.parse(cont) - - def get_movie_episodes(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'episodes') - data_d = self.mProxy.episodes_parser.parse(cont) - # set movie['episode of'].movieID for every episode of the series. - if data_d.get('data', {}).has_key('episodes'): - nr_eps = 0 - for season in data_d['data']['episodes'].values(): - for episode in season.values(): - episode['episode of'].movieID = movieID - nr_eps += 1 - # Number of episodes. - if nr_eps: - data_d['data']['number of episodes'] = nr_eps - return data_d - - def get_movie_episodes_rating(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'epdate') - data_d = self.mProxy.eprating_parser.parse(cont) - # set movie['episode of'].movieID for every episode. - if data_d.get('data', {}).has_key('episodes rating'): - for item in data_d['data']['episodes rating']: - episode = item['episode'] - episode['episode of'].movieID = movieID - return data_d - - def get_movie_faqs(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'faq') - return self.mProxy.movie_faqs_parser.parse(cont, getRefs=self._getRefs) - - def get_movie_airing(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'tvschedule') - return self.mProxy.airing_parser.parse(cont) - - get_movie_tv_schedule = get_movie_airing - - def get_movie_synopsis(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'synopsis') - return self.mProxy.synopsis_parser.parse(cont) - - def get_movie_parents_guide(self, movieID): - cont = self._retrieve(imdbURL_movie_main % movieID + 'parentalguide') - return self.mProxy.parentsguide_parser.parse(cont) - - def _search_person(self, name, results): - # The URL of the query. - # XXX: To retrieve the complete results list: - # params = urllib.urlencode({'more': 'nm', 'q': name}) - ##params = urllib.urlencode({'nm': 'on', 'mx': str(results), 'q': name}) - #params = 'q=%s&nm=on&mx=%s' % (quote_plus(name), str(results)) - #cont = self._retrieve(imdbURL_find % params) - cont = self._get_search_content('nm', name, results) - return self.spProxy.search_person_parser.parse(cont, results=results)['data'] - - def get_person_main(self, personID): - cont = self._retrieve(imdbURL_person_main % personID + 'maindetails') - ret = self.pProxy.maindetails_parser.parse(cont) - ret['info sets'] = ('main', 'filmography') - return ret - - def get_person_filmography(self, personID): - return self.get_person_main(personID) - - def get_person_biography(self, personID): - cont = self._retrieve(imdbURL_person_main % personID + 'bio') - return self.pProxy.bio_parser.parse(cont, getRefs=self._getRefs) - - def get_person_awards(self, personID): - cont = self._retrieve(imdbURL_person_main % personID + 'awards') - return self.pProxy.person_awards_parser.parse(cont) - - def get_person_other_works(self, personID): - cont = self._retrieve(imdbURL_person_main % personID + 'otherworks') - return self.pProxy.otherworks_parser.parse(cont, getRefs=self._getRefs) - - #def get_person_agent(self, personID): - # cont = self._retrieve(imdbURL_person_main % personID + 'agent') - # return self.pProxy.agent_parser.parse(cont) - - def get_person_publicity(self, personID): - cont = self._retrieve(imdbURL_person_main % personID + 'publicity') - return self.pProxy.publicity_parser.parse(cont) - - def get_person_official_sites(self, personID): - cont = self._retrieve(imdbURL_person_main % personID + 'officialsites') - return self.pProxy.person_officialsites_parser.parse(cont) - - def get_person_news(self, personID): - cont = self._retrieve(imdbURL_person_main % personID + 'news') - return self.pProxy.news_parser.parse(cont) - - def get_person_episodes(self, personID): - cont = self._retrieve(imdbURL_person_main % personID + 'filmoseries') - return self.pProxy.person_series_parser.parse(cont) - - def get_person_merchandising_links(self, personID): - cont = self._retrieve(imdbURL_person_main % personID + 'forsale') - return self.pProxy.sales_parser.parse(cont) - - def get_person_genres_links(self, personID): - cont = self._retrieve(imdbURL_person_main % personID + 'filmogenre') - return self.pProxy.person_genres_parser.parse(cont) - - def get_person_keywords_links(self, personID): - cont = self._retrieve(imdbURL_person_main % personID + 'filmokey') - return self.pProxy.person_keywords_parser.parse(cont) - - def _search_character(self, name, results): - cont = self._get_search_content('char', name, results) - return self.scProxy.search_character_parser.parse(cont, results=results)['data'] - - def get_character_main(self, characterID): - cont = self._retrieve(imdbURL_character_main % characterID) - ret = self.cProxy.character_main_parser.parse(cont) - ret['info sets'] = ('main', 'filmography') - return ret - - get_character_filmography = get_character_main - - def get_character_biography(self, characterID): - cont = self._retrieve(imdbURL_character_main % characterID + 'bio') - return self.cProxy.character_bio_parser.parse(cont, - getRefs=self._getRefs) - - def get_character_episodes(self, characterID): - cont = self._retrieve(imdbURL_character_main % characterID + - 'filmoseries') - return self.cProxy.character_series_parser.parse(cont) - - def get_character_quotes(self, characterID): - cont = self._retrieve(imdbURL_character_main % characterID + 'quotes') - return self.cProxy.character_quotes_parser.parse(cont, - getRefs=self._getRefs) - - def _search_company(self, name, results): - cont = self._get_search_content('co', name, results) - url = self.urlOpener._last_url - return self.scompProxy.search_company_parser.parse(cont, url=url, - results=results)['data'] - - def get_company_main(self, companyID): - cont = self._retrieve(imdbURL_company_main % companyID) - ret = self.compProxy.company_main_parser.parse(cont) - return ret - - def _search_keyword(self, keyword, results): - # XXX: the IMDb web server seems to have some serious problem with - # non-ascii keyword. - # E.g.: http://akas.imdb.com/keyword/fianc%E9/ - # will return a 500 Internal Server Error: Redirect Recursion. - keyword = keyword.encode('utf8', 'ignore') - try: - cont = self._get_search_content('kw', keyword, results) - except IMDbDataAccessError: - self._http_logger.warn('unable to search for keyword %s', keyword, - exc_info=True) - return [] - return self.skProxy.search_keyword_parser.parse(cont, results=results)['data'] - - def _get_keyword(self, keyword, results): - keyword = keyword.encode('utf8', 'ignore') - try: - cont = self._retrieve(imdbURL_keyword_main % keyword) - except IMDbDataAccessError: - self._http_logger.warn('unable to get keyword %s', keyword, - exc_info=True) - return [] - return self.skProxy.search_moviekeyword_parser.parse(cont, results=results)['data'] - - def _get_top_bottom_movies(self, kind): - if kind == 'top': - parser = self.topBottomProxy.top250_parser - url = imdbURL_top250 - elif kind == 'bottom': - parser = self.topBottomProxy.bottom100_parser - url = imdbURL_bottom100 - else: - return [] - cont = self._retrieve(url) - return parser.parse(cont)['data'] - - diff --git a/libs/imdb/parser/http/bsouplxml/__init__.py b/libs/imdb/parser/http/bsouplxml/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/libs/imdb/parser/http/bsouplxml/_bsoup.py b/libs/imdb/parser/http/bsouplxml/_bsoup.py deleted file mode 100644 index afab5da9..00000000 --- a/libs/imdb/parser/http/bsouplxml/_bsoup.py +++ /dev/null @@ -1,1970 +0,0 @@ -""" -imdb.parser.http._bsoup module (imdb.parser.http package). -This is the BeautifulSoup.py module, not modified; it's included here -so that it's not an external dependency. - -Beautiful Soup -Elixir and Tonic -"The Screen-Scraper's Friend" -http://www.crummy.com/software/BeautifulSoup/ - -Beautiful Soup parses a (possibly invalid) XML or HTML document into a -tree representation. It provides methods and Pythonic idioms that make -it easy to navigate, search, and modify the tree. - -A well-formed XML/HTML document yields a well-formed data -structure. An ill-formed XML/HTML document yields a correspondingly -ill-formed data structure. If your document is only locally -well-formed, you can use this library to find and process the -well-formed part of it. - -Beautiful Soup works with Python 2.2 and up. It has no external -dependencies, but you'll have more success at converting data to UTF-8 -if you also install these three packages: - -* chardet, for auto-detecting character encodings - http://chardet.feedparser.org/ -* cjkcodecs and iconv_codec, which add more encodings to the ones supported - by stock Python. - http://cjkpython.i18n.org/ - -Beautiful Soup defines classes for two main parsing strategies: - - * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific - language that kind of looks like XML. - - * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid - or invalid. This class has web browser-like heuristics for - obtaining a sensible parse tree in the face of common HTML errors. - -Beautiful Soup also defines a class (UnicodeDammit) for autodetecting -the encoding of an HTML or XML document, and converting it to -Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. - -For more than you ever wanted to know about Beautiful Soup, see the -documentation: -http://www.crummy.com/software/BeautifulSoup/documentation.html - -Here, have some legalese: - -Copyright (c) 2004-2008, Leonard Richardson - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of the the Beautiful Soup Consortium and All - Night Kosher Bakery nor the names of its contributors may be - used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. - -""" -from __future__ import generators - -__author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "3.0.7a" -__copyright__ = "Copyright (c) 2004-2008 Leonard Richardson" -__license__ = "New-style BSD" - -from sgmllib import SGMLParser, SGMLParseError -import codecs -import markupbase -import types -import re -import sgmllib -try: - from htmlentitydefs import name2codepoint -except ImportError: - name2codepoint = {} -try: - set -except NameError: - from sets import Set as set - -#These hacks make Beautiful Soup able to parse XML with namespaces -sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') -markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match - -DEFAULT_OUTPUT_ENCODING = "utf-8" - -# First, the classes that represent markup elements. - -class PageElement: - """Contains the navigational information for some part of the page - (either a tag or a piece of text)""" - - def setup(self, parent=None, previous=None): - """Sets up the initial relations between this element and - other elements.""" - self.parent = parent - self.previous = previous - self.next = None - self.previousSibling = None - self.nextSibling = None - if self.parent and self.parent.contents: - self.previousSibling = self.parent.contents[-1] - self.previousSibling.nextSibling = self - - def replaceWith(self, replaceWith): - oldParent = self.parent - myIndex = self.parent.contents.index(self) - if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent: - # We're replacing this element with one of its siblings. - index = self.parent.contents.index(replaceWith) - if index and index < myIndex: - # Furthermore, it comes before this element. That - # means that when we extract it, the index of this - # element will change. - myIndex = myIndex - 1 - self.extract() - oldParent.insert(myIndex, replaceWith) - - def extract(self): - """Destructively rips this element out of the tree.""" - if self.parent: - try: - self.parent.contents.remove(self) - except ValueError: - pass - - #Find the two elements that would be next to each other if - #this element (and any children) hadn't been parsed. Connect - #the two. - lastChild = self._lastRecursiveChild() - nextElement = lastChild.next - - if self.previous: - self.previous.next = nextElement - if nextElement: - nextElement.previous = self.previous - self.previous = None - lastChild.next = None - - self.parent = None - if self.previousSibling: - self.previousSibling.nextSibling = self.nextSibling - if self.nextSibling: - self.nextSibling.previousSibling = self.previousSibling - self.previousSibling = self.nextSibling = None - return self - - def _lastRecursiveChild(self): - "Finds the last element beneath this object to be parsed." - lastChild = self - while hasattr(lastChild, 'contents') and lastChild.contents: - lastChild = lastChild.contents[-1] - return lastChild - - def insert(self, position, newChild): - if (isinstance(newChild, basestring) - or isinstance(newChild, unicode)) \ - and not isinstance(newChild, NavigableString): - newChild = NavigableString(newChild) - - position = min(position, len(self.contents)) - if hasattr(newChild, 'parent') and newChild.parent != None: - # We're 'inserting' an element that's already one - # of this object's children. - if newChild.parent == self: - index = self.find(newChild) - if index and index < position: - # Furthermore we're moving it further down the - # list of this object's children. That means that - # when we extract this element, our target index - # will jump down one. - position = position - 1 - newChild.extract() - - newChild.parent = self - previousChild = None - if position == 0: - newChild.previousSibling = None - newChild.previous = self - else: - previousChild = self.contents[position-1] - newChild.previousSibling = previousChild - newChild.previousSibling.nextSibling = newChild - newChild.previous = previousChild._lastRecursiveChild() - if newChild.previous: - newChild.previous.next = newChild - - newChildsLastElement = newChild._lastRecursiveChild() - - if position >= len(self.contents): - newChild.nextSibling = None - - parent = self - parentsNextSibling = None - while not parentsNextSibling: - parentsNextSibling = parent.nextSibling - parent = parent.parent - if not parent: # This is the last element in the document. - break - if parentsNextSibling: - newChildsLastElement.next = parentsNextSibling - else: - newChildsLastElement.next = None - else: - nextChild = self.contents[position] - newChild.nextSibling = nextChild - if newChild.nextSibling: - newChild.nextSibling.previousSibling = newChild - newChildsLastElement.next = nextChild - - if newChildsLastElement.next: - newChildsLastElement.next.previous = newChildsLastElement - self.contents.insert(position, newChild) - - def append(self, tag): - """Appends the given tag to the contents of this tag.""" - self.insert(len(self.contents), tag) - - def findNext(self, name=None, attrs={}, text=None, **kwargs): - """Returns the first item that matches the given criteria and - appears after this Tag in the document.""" - return self._findOne(self.findAllNext, name, attrs, text, **kwargs) - - def findAllNext(self, name=None, attrs={}, text=None, limit=None, - **kwargs): - """Returns all items that match the given criteria and appear - after this Tag in the document.""" - return self._findAll(name, attrs, text, limit, self.nextGenerator, - **kwargs) - - def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): - """Returns the closest sibling to this Tag that matches the - given criteria and appears after this Tag in the document.""" - return self._findOne(self.findNextSiblings, name, attrs, text, - **kwargs) - - def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, - **kwargs): - """Returns the siblings of this Tag that match the given - criteria and appear after this Tag in the document.""" - return self._findAll(name, attrs, text, limit, - self.nextSiblingGenerator, **kwargs) - fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x - - def findPrevious(self, name=None, attrs={}, text=None, **kwargs): - """Returns the first item that matches the given criteria and - appears before this Tag in the document.""" - return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) - - def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, - **kwargs): - """Returns all items that match the given criteria and appear - before this Tag in the document.""" - return self._findAll(name, attrs, text, limit, self.previousGenerator, - **kwargs) - fetchPrevious = findAllPrevious # Compatibility with pre-3.x - - def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): - """Returns the closest sibling to this Tag that matches the - given criteria and appears before this Tag in the document.""" - return self._findOne(self.findPreviousSiblings, name, attrs, text, - **kwargs) - - def findPreviousSiblings(self, name=None, attrs={}, text=None, - limit=None, **kwargs): - """Returns the siblings of this Tag that match the given - criteria and appear before this Tag in the document.""" - return self._findAll(name, attrs, text, limit, - self.previousSiblingGenerator, **kwargs) - fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x - - def findParent(self, name=None, attrs={}, **kwargs): - """Returns the closest parent of this Tag that matches the given - criteria.""" - # NOTE: We can't use _findOne because findParents takes a different - # set of arguments. - r = None - l = self.findParents(name, attrs, 1) - if l: - r = l[0] - return r - - def findParents(self, name=None, attrs={}, limit=None, **kwargs): - """Returns the parents of this Tag that match the given - criteria.""" - - return self._findAll(name, attrs, None, limit, self.parentGenerator, - **kwargs) - fetchParents = findParents # Compatibility with pre-3.x - - #These methods do the real heavy lifting. - - def _findOne(self, method, name, attrs, text, **kwargs): - r = None - l = method(name, attrs, text, 1, **kwargs) - if l: - r = l[0] - return r - - def _findAll(self, name, attrs, text, limit, generator, **kwargs): - "Iterates over a generator looking for things that match." - - if isinstance(name, SoupStrainer): - strainer = name - else: - # Build a SoupStrainer - strainer = SoupStrainer(name, attrs, text, **kwargs) - results = ResultSet(strainer) - g = generator() - while True: - try: - i = g.next() - except StopIteration: - break - if i: - found = strainer.search(i) - if found: - results.append(found) - if limit and len(results) >= limit: - break - return results - - #These Generators can be used to navigate starting from both - #NavigableStrings and Tags. - def nextGenerator(self): - i = self - while i: - i = i.next - yield i - - def nextSiblingGenerator(self): - i = self - while i: - i = i.nextSibling - yield i - - def previousGenerator(self): - i = self - while i: - i = i.previous - yield i - - def previousSiblingGenerator(self): - i = self - while i: - i = i.previousSibling - yield i - - def parentGenerator(self): - i = self - while i: - i = i.parent - yield i - - # Utility methods - def substituteEncoding(self, str, encoding=None): - encoding = encoding or "utf-8" - return str.replace("%SOUP-ENCODING%", encoding) - - def toEncoding(self, s, encoding=None): - """Encodes an object to a string in some encoding, or to Unicode. - .""" - if isinstance(s, unicode): - if encoding: - s = s.encode(encoding) - elif isinstance(s, str): - if encoding: - s = s.encode(encoding) - else: - s = unicode(s) - else: - if encoding: - s = self.toEncoding(str(s), encoding) - else: - s = unicode(s) - return s - -class NavigableString(unicode, PageElement): - - def __new__(cls, value): - """Create a new NavigableString. - - When unpickling a NavigableString, this method is called with - the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be - passed in to the superclass's __new__ or the superclass won't know - how to handle non-ASCII characters. - """ - if isinstance(value, unicode): - return unicode.__new__(cls, value) - return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) - - def __getnewargs__(self): - return (NavigableString.__str__(self),) - - def __getattr__(self, attr): - """text.string gives you text. This is for backwards - compatibility for Navigable*String, but for CData* it lets you - get the string without the CData wrapper.""" - if attr == 'string': - return self - else: - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) - - def __unicode__(self): - return str(self).decode(DEFAULT_OUTPUT_ENCODING) - - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): - if encoding: - return self.encode(encoding) - else: - return self - -class CData(NavigableString): - - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): - return "" % NavigableString.__str__(self, encoding) - -class ProcessingInstruction(NavigableString): - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): - output = self - if "%SOUP-ENCODING%" in output: - output = self.substituteEncoding(output, encoding) - return "" % self.toEncoding(output, encoding) - -class Comment(NavigableString): - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): - return "" % NavigableString.__str__(self, encoding) - -class Declaration(NavigableString): - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): - return "" % NavigableString.__str__(self, encoding) - -class Tag(PageElement): - - """Represents a found HTML tag with its attributes and contents.""" - - def _invert(h): - "Cheap function to invert a hash." - i = {} - for k,v in h.items(): - i[v] = k - return i - - XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", - "quot" : '"', - "amp" : "&", - "lt" : "<", - "gt" : ">" } - - XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) - - def _convertEntities(self, match): - """Used in a call to re.sub to replace HTML, XML, and numeric - entities with the appropriate Unicode characters. If HTML - entities are being converted, any unrecognized entities are - escaped.""" - x = match.group(1) - if self.convertHTMLEntities and x in name2codepoint: - return unichr(name2codepoint[x]) - elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: - if self.convertXMLEntities: - return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] - else: - return u'&%s;' % x - elif len(x) > 0 and x[0] == '#': - # Handle numeric entities - if len(x) > 1 and x[1] == 'x': - return unichr(int(x[2:], 16)) - else: - return unichr(int(x[1:])) - - elif self.escapeUnrecognizedEntities: - return u'&%s;' % x - else: - return u'&%s;' % x - - def __init__(self, parser, name, attrs=None, parent=None, - previous=None): - "Basic constructor." - - # We don't actually store the parser object: that lets extracted - # chunks be garbage-collected - self.parserClass = parser.__class__ - self.isSelfClosing = parser.isSelfClosingTag(name) - self.name = name - if attrs == None: - attrs = [] - self.attrs = attrs - self.contents = [] - self.setup(parent, previous) - self.hidden = False - self.containsSubstitutions = False - self.convertHTMLEntities = parser.convertHTMLEntities - self.convertXMLEntities = parser.convertXMLEntities - self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities - - # Convert any HTML, XML, or numeric entities in the attribute values. - convert = lambda(k, val): (k, - re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", - self._convertEntities, - val)) - self.attrs = map(convert, self.attrs) - - def get(self, key, default=None): - """Returns the value of the 'key' attribute for the tag, or - the value given for 'default' if it doesn't have that - attribute.""" - return self._getAttrMap().get(key, default) - - def has_key(self, key): - return self._getAttrMap().has_key(key) - - def __getitem__(self, key): - """tag[key] returns the value of the 'key' attribute for the tag, - and throws an exception if it's not there.""" - return self._getAttrMap()[key] - - def __iter__(self): - "Iterating over a tag iterates over its contents." - return iter(self.contents) - - def __len__(self): - "The length of a tag is the length of its list of contents." - return len(self.contents) - - def __contains__(self, x): - return x in self.contents - - def __nonzero__(self): - "A tag is non-None even if it has no contents." - return True - - def __setitem__(self, key, value): - """Setting tag[key] sets the value of the 'key' attribute for the - tag.""" - self._getAttrMap() - self.attrMap[key] = value - found = False - for i in range(0, len(self.attrs)): - if self.attrs[i][0] == key: - self.attrs[i] = (key, value) - found = True - if not found: - self.attrs.append((key, value)) - self._getAttrMap()[key] = value - - def __delitem__(self, key): - "Deleting tag[key] deletes all 'key' attributes for the tag." - for item in self.attrs: - if item[0] == key: - self.attrs.remove(item) - #We don't break because bad HTML can define the same - #attribute multiple times. - self._getAttrMap() - if self.attrMap.has_key(key): - del self.attrMap[key] - - def __call__(self, *args, **kwargs): - """Calling a tag like a function is the same as calling its - findAll() method. Eg. tag('a') returns a list of all the A tags - found within this tag.""" - return apply(self.findAll, args, kwargs) - - def __getattr__(self, tag): - #print "Getattr %s.%s" % (self.__class__, tag) - if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: - return self.find(tag[:-3]) - elif tag.find('__') != 0: - return self.find(tag) - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) - - def __eq__(self, other): - """Returns true iff this tag has the same name, the same attributes, - and the same contents (recursively) as the given tag. - - NOTE: right now this will return false if two tags have the - same attributes in a different order. Should this be fixed?""" - if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): - return False - for i in range(0, len(self.contents)): - if self.contents[i] != other.contents[i]: - return False - return True - - def __ne__(self, other): - """Returns true iff this tag is not identical to the other tag, - as defined in __eq__.""" - return not self == other - - def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): - """Renders this tag as a string.""" - return self.__str__(encoding) - - def __unicode__(self): - return self.__str__(None) - - BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" - + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" - + ")") - - def _sub_entity(self, x): - """Used with a regular expression to substitute the - appropriate XML entity for an XML special character.""" - return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" - - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, - prettyPrint=False, indentLevel=0): - """Returns a string or Unicode representation of this tag and - its contents. To get Unicode, pass None for encoding. - - NOTE: since Python's HTML parser consumes whitespace, this - method is not certain to reproduce the whitespace present in - the original string.""" - - encodedName = self.toEncoding(self.name, encoding) - - attrs = [] - if self.attrs: - for key, val in self.attrs: - fmt = '%s="%s"' - if isString(val): - if self.containsSubstitutions and '%SOUP-ENCODING%' in val: - val = self.substituteEncoding(val, encoding) - - # The attribute value either: - # - # * Contains no embedded double quotes or single quotes. - # No problem: we enclose it in double quotes. - # * Contains embedded single quotes. No problem: - # double quotes work here too. - # * Contains embedded double quotes. No problem: - # we enclose it in single quotes. - # * Embeds both single _and_ double quotes. This - # can't happen naturally, but it can happen if - # you modify an attribute value after parsing - # the document. Now we have a bit of a - # problem. We solve it by enclosing the - # attribute in single quotes, and escaping any - # embedded single quotes to XML entities. - if '"' in val: - fmt = "%s='%s'" - if "'" in val: - # TODO: replace with apos when - # appropriate. - val = val.replace("'", "&squot;") - - # Now we're okay w/r/t quotes. But the attribute - # value might also contain angle brackets, or - # ampersands that aren't part of entities. We need - # to escape those to XML entities too. - val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) - - attrs.append(fmt % (self.toEncoding(key, encoding), - self.toEncoding(val, encoding))) - close = '' - closeTag = '' - if self.isSelfClosing: - close = ' /' - else: - closeTag = '' % encodedName - - indentTag, indentContents = 0, 0 - if prettyPrint: - indentTag = indentLevel - space = (' ' * (indentTag-1)) - indentContents = indentTag + 1 - contents = self.renderContents(encoding, prettyPrint, indentContents) - if self.hidden: - s = contents - else: - s = [] - attributeString = '' - if attrs: - attributeString = ' ' + ' '.join(attrs) - if prettyPrint: - s.append(space) - s.append('<%s%s%s>' % (encodedName, attributeString, close)) - if prettyPrint: - s.append("\n") - s.append(contents) - if prettyPrint and contents and contents[-1] != "\n": - s.append("\n") - if prettyPrint and closeTag: - s.append(space) - s.append(closeTag) - if prettyPrint and closeTag and self.nextSibling: - s.append("\n") - s = ''.join(s) - return s - - def decompose(self): - """Recursively destroys the contents of this tree.""" - contents = [i for i in self.contents] - for i in contents: - if isinstance(i, Tag): - i.decompose() - else: - i.extract() - self.extract() - - def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): - return self.__str__(encoding, True) - - def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, - prettyPrint=False, indentLevel=0): - """Renders the contents of this tag as a string in the given - encoding. If encoding is None, returns a Unicode string..""" - s=[] - for c in self: - text = None - if isinstance(c, NavigableString): - text = c.__str__(encoding) - elif isinstance(c, Tag): - s.append(c.__str__(encoding, prettyPrint, indentLevel)) - if text and prettyPrint: - text = text.strip() - if text: - if prettyPrint: - s.append(" " * (indentLevel-1)) - s.append(text) - if prettyPrint: - s.append("\n") - return ''.join(s) - - #Soup methods - - def find(self, name=None, attrs={}, recursive=True, text=None, - **kwargs): - """Return only the first child of this Tag matching the given - criteria.""" - r = None - l = self.findAll(name, attrs, recursive, text, 1, **kwargs) - if l: - r = l[0] - return r - findChild = find - - def findAll(self, name=None, attrs={}, recursive=True, text=None, - limit=None, **kwargs): - """Extracts a list of Tag objects that match the given - criteria. You can specify the name of the Tag and any - attributes you want the Tag to have. - - The value of a key-value pair in the 'attrs' map can be a - string, a list of strings, a regular expression object, or a - callable that takes a string and returns whether or not the - string matches for some custom definition of 'matches'. The - same is true of the tag name.""" - generator = self.recursiveChildGenerator - if not recursive: - generator = self.childGenerator - return self._findAll(name, attrs, text, limit, generator, **kwargs) - findChildren = findAll - - # Pre-3.x compatibility methods - first = find - fetch = findAll - - def fetchText(self, text=None, recursive=True, limit=None): - return self.findAll(text=text, recursive=recursive, limit=limit) - - def firstText(self, text=None, recursive=True): - return self.find(text=text, recursive=recursive) - - #Private methods - - def _getAttrMap(self): - """Initializes a map representation of this tag's attributes, - if not already initialized.""" - if not getattr(self, 'attrMap'): - self.attrMap = {} - for (key, value) in self.attrs: - self.attrMap[key] = value - return self.attrMap - - #Generator methods - def childGenerator(self): - for i in range(0, len(self.contents)): - yield self.contents[i] - raise StopIteration - - def recursiveChildGenerator(self): - stack = [(self, 0)] - while stack: - tag, start = stack.pop() - if isinstance(tag, Tag): - for i in range(start, len(tag.contents)): - a = tag.contents[i] - yield a - if isinstance(a, Tag) and tag.contents: - if i < len(tag.contents) - 1: - stack.append((tag, i+1)) - stack.append((a, 0)) - break - raise StopIteration - -# Next, a couple classes to represent queries and their results. -class SoupStrainer: - """Encapsulates a number of ways of matching a markup element (tag or - text).""" - - def __init__(self, name=None, attrs={}, text=None, **kwargs): - self.name = name - if isString(attrs): - kwargs['class'] = attrs - attrs = None - if kwargs: - if attrs: - attrs = attrs.copy() - attrs.update(kwargs) - else: - attrs = kwargs - self.attrs = attrs - self.text = text - - def __str__(self): - if self.text: - return self.text - else: - return "%s|%s" % (self.name, self.attrs) - - def searchTag(self, markupName=None, markupAttrs={}): - found = None - markup = None - if isinstance(markupName, Tag): - markup = markupName - markupAttrs = markup - callFunctionWithTagData = callable(self.name) \ - and not isinstance(markupName, Tag) - - if (not self.name) \ - or callFunctionWithTagData \ - or (markup and self._matches(markup, self.name)) \ - or (not markup and self._matches(markupName, self.name)): - if callFunctionWithTagData: - match = self.name(markupName, markupAttrs) - else: - match = True - markupAttrMap = None - for attr, matchAgainst in self.attrs.items(): - if not markupAttrMap: - if hasattr(markupAttrs, 'get'): - markupAttrMap = markupAttrs - else: - markupAttrMap = {} - for k,v in markupAttrs: - markupAttrMap[k] = v - attrValue = markupAttrMap.get(attr) - if not self._matches(attrValue, matchAgainst): - match = False - break - if match: - if markup: - found = markup - else: - found = markupName - return found - - def search(self, markup): - #print 'looking for %s in %s' % (self, markup) - found = None - # If given a list of items, scan it for a text element that - # matches. - if isList(markup) and not isinstance(markup, Tag): - for element in markup: - if isinstance(element, NavigableString) \ - and self.search(element): - found = element - break - # If it's a Tag, make sure its name or attributes match. - # Don't bother with Tags if we're searching for text. - elif isinstance(markup, Tag): - if not self.text: - found = self.searchTag(markup) - # If it's text, make sure the text matches. - elif isinstance(markup, NavigableString) or \ - isString(markup): - if self._matches(markup, self.text): - found = markup - else: - raise Exception, "I don't know how to match against a %s" \ - % markup.__class__ - return found - - def _matches(self, markup, matchAgainst): - #print "Matching %s against %s" % (markup, matchAgainst) - result = False - if matchAgainst == True and type(matchAgainst) == types.BooleanType: - result = markup != None - elif callable(matchAgainst): - result = matchAgainst(markup) - else: - #Custom match methods take the tag as an argument, but all - #other ways of matching match the tag name as a string. - if isinstance(markup, Tag): - markup = markup.name - if markup and not isString(markup): - markup = unicode(markup) - #Now we know that chunk is either a string, or None. - if hasattr(matchAgainst, 'match'): - # It's a regexp object. - result = markup and matchAgainst.search(markup) - elif isList(matchAgainst): - result = markup in matchAgainst - elif hasattr(matchAgainst, 'items'): - result = markup.has_key(matchAgainst) - elif matchAgainst and isString(markup): - if isinstance(markup, unicode): - matchAgainst = unicode(matchAgainst) - else: - matchAgainst = str(matchAgainst) - - if not result: - result = matchAgainst == markup - return result - -class ResultSet(list): - """A ResultSet is just a list that keeps track of the SoupStrainer - that created it.""" - def __init__(self, source): - list.__init__([]) - self.source = source - -# Now, some helper functions. - -def isList(l): - """Convenience method that works with all 2.x versions of Python - to determine whether or not something is listlike.""" - return hasattr(l, '__iter__') \ - or (type(l) in (types.ListType, types.TupleType)) - -def isString(s): - """Convenience method that works with all 2.x versions of Python - to determine whether or not something is stringlike.""" - try: - return isinstance(s, unicode) or isinstance(s, basestring) - except NameError: - return isinstance(s, str) - -def buildTagMap(default, *args): - """Turns a list of maps, lists, or scalars into a single map. - Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and - NESTING_RESET_TAGS maps out of lists and partial maps.""" - built = {} - for portion in args: - if hasattr(portion, 'items'): - #It's a map. Merge it. - for k,v in portion.items(): - built[k] = v - elif isList(portion): - #It's a list. Map each item to the default. - for k in portion: - built[k] = default - else: - #It's a scalar. Map it to the default. - built[portion] = default - return built - -# Now, the parser classes. - -class BeautifulStoneSoup(Tag, SGMLParser): - - """This class contains the basic parser and search code. It defines - a parser that knows nothing about tag behavior except for the - following: - - You can't close a tag without closing all the tags it encloses. - That is, "" actually means - "". - - [Another possible explanation is "", but since - this class defines no SELF_CLOSING_TAGS, it will never use that - explanation.] - - This class is useful for parsing XML or made-up markup languages, - or when BeautifulSoup makes an assumption counter to what you were - expecting.""" - - SELF_CLOSING_TAGS = {} - NESTABLE_TAGS = {} - RESET_NESTING_TAGS = {} - QUOTE_TAGS = {} - PRESERVE_WHITESPACE_TAGS = [] - - MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), - lambda x: x.group(1) + ' />'), - (re.compile(']*)>'), - lambda x: '') - ] - - ROOT_TAG_NAME = u'[document]' - - HTML_ENTITIES = "html" - XML_ENTITIES = "xml" - XHTML_ENTITIES = "xhtml" - # TODO: This only exists for backwards-compatibility - ALL_ENTITIES = XHTML_ENTITIES - - # Used when determining whether a text node is all whitespace and - # can be replaced with a single space. A text node that contains - # fancy Unicode spaces (usually non-breaking) should be left - # alone. - STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } - - def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, - markupMassage=True, smartQuotesTo=XML_ENTITIES, - convertEntities=None, selfClosingTags=None, isHTML=False): - """The Soup object is initialized as the 'root tag', and the - provided markup (which can be a string or a file-like object) - is fed into the underlying parser. - - sgmllib will process most bad HTML, and the BeautifulSoup - class has some tricks for dealing with some HTML that kills - sgmllib, but Beautiful Soup can nonetheless choke or lose data - if your data uses self-closing tags or declarations - incorrectly. - - By default, Beautiful Soup uses regexes to sanitize input, - avoiding the vast majority of these problems. If the problems - don't apply to you, pass in False for markupMassage, and - you'll get better performance. - - The default parser massage techniques fix the two most common - instances of invalid HTML that choke sgmllib: - -
(No space between name of closing tag and tag close) - (Extraneous whitespace in declaration) - - You can pass in a custom list of (RE object, replace method) - tuples to get Beautiful Soup to scrub your input the way you - want.""" - - self.parseOnlyThese = parseOnlyThese - self.fromEncoding = fromEncoding - self.smartQuotesTo = smartQuotesTo - self.convertEntities = convertEntities - # Set the rules for how we'll deal with the entities we - # encounter - if self.convertEntities: - # It doesn't make sense to convert encoded characters to - # entities even while you're converting entities to Unicode. - # Just convert it all to Unicode. - self.smartQuotesTo = None - if convertEntities == self.HTML_ENTITIES: - self.convertXMLEntities = False - self.convertHTMLEntities = True - self.escapeUnrecognizedEntities = True - elif convertEntities == self.XHTML_ENTITIES: - self.convertXMLEntities = True - self.convertHTMLEntities = True - self.escapeUnrecognizedEntities = False - elif convertEntities == self.XML_ENTITIES: - self.convertXMLEntities = True - self.convertHTMLEntities = False - self.escapeUnrecognizedEntities = False - else: - self.convertXMLEntities = False - self.convertHTMLEntities = False - self.escapeUnrecognizedEntities = False - - self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) - SGMLParser.__init__(self) - - if hasattr(markup, 'read'): # It's a file-type object. - markup = markup.read() - self.markup = markup - self.markupMassage = markupMassage - try: - self._feed(isHTML=isHTML) - except StopParsing: - pass - self.markup = None # The markup can now be GCed - - def convert_charref(self, name): - """This method fixes a bug in Python's SGMLParser.""" - try: - n = int(name) - except ValueError: - return - if not 0 <= n <= 127 : # ASCII ends at 127, not 255 - return - return self.convert_codepoint(n) - - def _feed(self, inDocumentEncoding=None, isHTML=False): - # Convert the document to Unicode. - markup = self.markup - if isinstance(markup, unicode): - if not hasattr(self, 'originalEncoding'): - self.originalEncoding = None - else: - dammit = UnicodeDammit\ - (markup, [self.fromEncoding, inDocumentEncoding], - smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) - markup = dammit.unicode - self.originalEncoding = dammit.originalEncoding - self.declaredHTMLEncoding = dammit.declaredHTMLEncoding - if markup: - if self.markupMassage: - if not isList(self.markupMassage): - self.markupMassage = self.MARKUP_MASSAGE - for fix, m in self.markupMassage: - markup = fix.sub(m, markup) - # TODO: We get rid of markupMassage so that the - # soup object can be deepcopied later on. Some - # Python installations can't copy regexes. If anyone - # was relying on the existence of markupMassage, this - # might cause problems. - del(self.markupMassage) - self.reset() - - SGMLParser.feed(self, markup) - # Close out any unfinished strings and close all the open tags. - self.endData() - while self.currentTag.name != self.ROOT_TAG_NAME: - self.popTag() - - def __getattr__(self, methodName): - """This method routes method call requests to either the SGMLParser - superclass or the Tag superclass, depending on the method name.""" - #print "__getattr__ called on %s.%s" % (self.__class__, methodName) - - if methodName.find('start_') == 0 or methodName.find('end_') == 0 \ - or methodName.find('do_') == 0: - return SGMLParser.__getattr__(self, methodName) - elif methodName.find('__') != 0: - return Tag.__getattr__(self, methodName) - else: - raise AttributeError - - def isSelfClosingTag(self, name): - """Returns true iff the given string is the name of a - self-closing tag according to this parser.""" - return self.SELF_CLOSING_TAGS.has_key(name) \ - or self.instanceSelfClosingTags.has_key(name) - - def reset(self): - Tag.__init__(self, self, self.ROOT_TAG_NAME) - self.hidden = 1 - SGMLParser.reset(self) - self.currentData = [] - self.currentTag = None - self.tagStack = [] - self.quoteStack = [] - self.pushTag(self) - - def popTag(self): - tag = self.tagStack.pop() - # Tags with just one string-owning child get the child as a - # 'string' property, so that soup.tag.string is shorthand for - # soup.tag.contents[0] - if len(self.currentTag.contents) == 1 and \ - isinstance(self.currentTag.contents[0], NavigableString): - self.currentTag.string = self.currentTag.contents[0] - - #print "Pop", tag.name - if self.tagStack: - self.currentTag = self.tagStack[-1] - return self.currentTag - - def pushTag(self, tag): - #print "Push", tag.name - if self.currentTag: - self.currentTag.contents.append(tag) - self.tagStack.append(tag) - self.currentTag = self.tagStack[-1] - - def endData(self, containerClass=NavigableString): - if self.currentData: - currentData = u''.join(self.currentData) - if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and - not set([tag.name for tag in self.tagStack]).intersection( - self.PRESERVE_WHITESPACE_TAGS)): - if '\n' in currentData: - currentData = '\n' - else: - currentData = ' ' - self.currentData = [] - if self.parseOnlyThese and len(self.tagStack) <= 1 and \ - (not self.parseOnlyThese.text or \ - not self.parseOnlyThese.search(currentData)): - return - o = containerClass(currentData) - o.setup(self.currentTag, self.previous) - if self.previous: - self.previous.next = o - self.previous = o - self.currentTag.contents.append(o) - - - def _popToTag(self, name, inclusivePop=True): - """Pops the tag stack up to and including the most recent - instance of the given tag. If inclusivePop is false, pops the tag - stack up to but *not* including the most recent instqance of - the given tag.""" - #print "Popping to %s" % name - if name == self.ROOT_TAG_NAME: - return - - numPops = 0 - mostRecentTag = None - for i in range(len(self.tagStack)-1, 0, -1): - if name == self.tagStack[i].name: - numPops = len(self.tagStack)-i - break - if not inclusivePop: - numPops = numPops - 1 - - for i in range(0, numPops): - mostRecentTag = self.popTag() - return mostRecentTag - - def _smartPop(self, name): - - """We need to pop up to the previous tag of this type, unless - one of this tag's nesting reset triggers comes between this - tag and the previous tag of this type, OR unless this tag is a - generic nesting trigger and another generic nesting trigger - comes between this tag and the previous tag of this type. - - Examples: -

FooBar *

* should pop to 'p', not 'b'. -

FooBar *

* should pop to 'table', not 'p'. -

Foo

Bar *

* should pop to 'tr', not 'p'. - -

    • *
    • * should pop to 'ul', not the first 'li'. -
  • ** should pop to 'table', not the first 'tr' - tag should - implicitly close the previous tag within the same
    ** should pop to 'tr', not the first 'td' - """ - - nestingResetTriggers = self.NESTABLE_TAGS.get(name) - isNestable = nestingResetTriggers != None - isResetNesting = self.RESET_NESTING_TAGS.has_key(name) - popTo = None - inclusive = True - for i in range(len(self.tagStack)-1, 0, -1): - p = self.tagStack[i] - if (not p or p.name == name) and not isNestable: - #Non-nestable tags get popped to the top or to their - #last occurance. - popTo = name - break - if (nestingResetTriggers != None - and p.name in nestingResetTriggers) \ - or (nestingResetTriggers == None and isResetNesting - and self.RESET_NESTING_TAGS.has_key(p.name)): - - #If we encounter one of the nesting reset triggers - #peculiar to this tag, or we encounter another tag - #that causes nesting to reset, pop up to but not - #including that tag. - popTo = p.name - inclusive = False - break - p = p.parent - if popTo: - self._popToTag(popTo, inclusive) - - def unknown_starttag(self, name, attrs, selfClosing=0): - #print "Start tag %s: %s" % (name, attrs) - if self.quoteStack: - #This is not a real tag. - #print "<%s> is not real!" % name - attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) - self.handle_data('<%s%s>' % (name, attrs)) - return - self.endData() - - if not self.isSelfClosingTag(name) and not selfClosing: - self._smartPop(name) - - if self.parseOnlyThese and len(self.tagStack) <= 1 \ - and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): - return - - tag = Tag(self, name, attrs, self.currentTag, self.previous) - if self.previous: - self.previous.next = tag - self.previous = tag - self.pushTag(tag) - if selfClosing or self.isSelfClosingTag(name): - self.popTag() - if name in self.QUOTE_TAGS: - #print "Beginning quote (%s)" % name - self.quoteStack.append(name) - self.literal = 1 - return tag - - def unknown_endtag(self, name): - #print "End tag %s" % name - if self.quoteStack and self.quoteStack[-1] != name: - #This is not a real end tag. - #print " is not real!" % name - self.handle_data('' % name) - return - self.endData() - self._popToTag(name) - if self.quoteStack and self.quoteStack[-1] == name: - self.quoteStack.pop() - self.literal = (len(self.quoteStack) > 0) - - def handle_data(self, data): - self.currentData.append(data) - - def _toStringSubclass(self, text, subclass): - """Adds a certain piece of text to the tree as a NavigableString - subclass.""" - self.endData() - self.handle_data(text) - self.endData(subclass) - - def handle_pi(self, text): - """Handle a processing instruction as a ProcessingInstruction - object, possibly one with a %SOUP-ENCODING% slot into which an - encoding will be plugged later.""" - if text[:3] == "xml": - text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" - self._toStringSubclass(text, ProcessingInstruction) - - def handle_comment(self, text): - "Handle comments as Comment objects." - self._toStringSubclass(text, Comment) - - def handle_charref(self, ref): - "Handle character references as data." - if self.convertEntities: - data = unichr(int(ref)) - else: - data = '&#%s;' % ref - self.handle_data(data) - - def handle_entityref(self, ref): - """Handle entity references as data, possibly converting known - HTML and/or XML entity references to the corresponding Unicode - characters.""" - data = None - if self.convertHTMLEntities: - try: - data = unichr(name2codepoint[ref]) - except KeyError: - pass - - if not data and self.convertXMLEntities: - data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) - - if not data and self.convertHTMLEntities and \ - not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): - # TODO: We've got a problem here. We're told this is - # an entity reference, but it's not an XML entity - # reference or an HTML entity reference. Nonetheless, - # the logical thing to do is to pass it through as an - # unrecognized entity reference. - # - # Except: when the input is "&carol;" this function - # will be called with input "carol". When the input is - # "AT&T", this function will be called with input - # "T". We have no way of knowing whether a semicolon - # was present originally, so we don't know whether - # this is an unknown entity or just a misplaced - # ampersand. - # - # The more common case is a misplaced ampersand, so I - # escape the ampersand and omit the trailing semicolon. - data = "&%s" % ref - if not data: - # This case is different from the one above, because we - # haven't already gone through a supposedly comprehensive - # mapping of entities to Unicode characters. We might not - # have gone through any mapping at all. So the chances are - # very high that this is a real entity, and not a - # misplaced ampersand. - data = "&%s;" % ref - self.handle_data(data) - - def handle_decl(self, data): - "Handle DOCTYPEs and the like as Declaration objects." - self._toStringSubclass(data, Declaration) - - def parse_declaration(self, i): - """Treat a bogus SGML declaration as raw data. Treat a CDATA - declaration as a CData object.""" - j = None - if self.rawdata[i:i+9] == '', i) - if k == -1: - k = len(self.rawdata) - data = self.rawdata[i+9:k] - j = k+3 - self._toStringSubclass(data, CData) - else: - try: - j = SGMLParser.parse_declaration(self, i) - except SGMLParseError: - toHandle = self.rawdata[i:] - self.handle_data(toHandle) - j = i + len(toHandle) - return j - -class BeautifulSoup(BeautifulStoneSoup): - - """This parser knows the following facts about HTML: - - * Some tags have no closing tag and should be interpreted as being - closed as soon as they are encountered. - - * The text inside some tags (ie. 'script') may contain tags which - are not really part of the document and which should be parsed - as text, not tags. If you want to parse the text as tags, you can - always fetch it and parse it explicitly. - - * Tag nesting rules: - - Most tags can't be nested at all. For instance, the occurance of - a

    tag should implicitly close the previous

    tag. - -

    Para1

    Para2 - should be transformed into: -

    Para1

    Para2 - - Some tags can be nested arbitrarily. For instance, the occurance - of a

    tag should _not_ implicitly close the previous -
    tag. - - Alice said:
    Bob said:
    Blah - should NOT be transformed into: - Alice said:
    Bob said:
    Blah - - Some tags can be nested, but the nesting is reset by the - interposition of other tags. For instance, a
    , - but not close a tag in another table. - -
    BlahBlah - should be transformed into: -
    BlahBlah - but, - Blah
    Blah - should NOT be transformed into - Blah
    Blah - - Differing assumptions about tag nesting rules are a major source - of problems with the BeautifulSoup class. If BeautifulSoup is not - treating as nestable a tag your page author treats as nestable, - try ICantBelieveItsBeautifulSoup, MinimalSoup, or - BeautifulStoneSoup before writing your own subclass.""" - - def __init__(self, *args, **kwargs): - if not kwargs.has_key('smartQuotesTo'): - kwargs['smartQuotesTo'] = self.HTML_ENTITIES - kwargs['isHTML'] = True - BeautifulStoneSoup.__init__(self, *args, **kwargs) - - SELF_CLOSING_TAGS = buildTagMap(None, - ['br' , 'hr', 'input', 'img', 'meta', - 'spacer', 'link', 'frame', 'base']) - - PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) - - QUOTE_TAGS = {'script' : None, 'textarea' : None} - - #According to the HTML standard, each of these inline tags can - #contain another tag of the same type. Furthermore, it's common - #to actually use these tags this way. - NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', - 'center'] - - #According to the HTML standard, these block tags can contain - #another tag of the same type. Furthermore, it's common - #to actually use these tags this way. - NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del'] - - #Lists can contain other lists, but there are restrictions. - NESTABLE_LIST_TAGS = { 'ol' : [], - 'ul' : [], - 'li' : ['ul', 'ol'], - 'dl' : [], - 'dd' : ['dl'], - 'dt' : ['dl'] } - - #Tables can contain other tables, but there are restrictions. - NESTABLE_TABLE_TAGS = {'table' : [], - 'tr' : ['table', 'tbody', 'tfoot', 'thead'], - 'td' : ['tr'], - 'th' : ['tr'], - 'thead' : ['table'], - 'tbody' : ['table'], - 'tfoot' : ['table'], - } - - NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre'] - - #If one of these tags is encountered, all tags up to the next tag of - #this type are popped. - RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', - NON_NESTABLE_BLOCK_TAGS, - NESTABLE_LIST_TAGS, - NESTABLE_TABLE_TAGS) - - NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, - NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) - - # Used to detect the charset in a META tag; see start_meta - CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) - - def start_meta(self, attrs): - """Beautiful Soup can detect a charset included in a META tag, - try to convert the document to that charset, and re-parse the - document from the beginning.""" - httpEquiv = None - contentType = None - contentTypeIndex = None - tagNeedsEncodingSubstitution = False - - for i in range(0, len(attrs)): - key, value = attrs[i] - key = key.lower() - if key == 'http-equiv': - httpEquiv = value - elif key == 'content': - contentType = value - contentTypeIndex = i - - if httpEquiv and contentType: # It's an interesting meta tag. - match = self.CHARSET_RE.search(contentType) - if match: - if (self.declaredHTMLEncoding is not None or - self.originalEncoding == self.fromEncoding): - # An HTML encoding was sniffed while converting - # the document to Unicode, or an HTML encoding was - # sniffed during a previous pass through the - # document, or an encoding was specified - # explicitly and it worked. Rewrite the meta tag. - def rewrite(match): - return match.group(1) + "%SOUP-ENCODING%" - newAttr = self.CHARSET_RE.sub(rewrite, contentType) - attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], - newAttr) - tagNeedsEncodingSubstitution = True - else: - # This is our first pass through the document. - # Go through it again with the encoding information. - newCharset = match.group(3) - if newCharset and newCharset != self.originalEncoding: - self.declaredHTMLEncoding = newCharset - self._feed(self.declaredHTMLEncoding) - raise StopParsing - pass - tag = self.unknown_starttag("meta", attrs) - if tag and tagNeedsEncodingSubstitution: - tag.containsSubstitutions = True - -class StopParsing(Exception): - pass - -class ICantBelieveItsBeautifulSoup(BeautifulSoup): - - """The BeautifulSoup class is oriented towards skipping over - common HTML errors like unclosed tags. However, sometimes it makes - errors of its own. For instance, consider this fragment: - - FooBar - - This is perfectly valid (if bizarre) HTML. However, the - BeautifulSoup class will implicitly close the first b tag when it - encounters the second 'b'. It will think the author wrote - "FooBar", and didn't close the first 'b' tag, because - there's no real-world reason to bold something that's already - bold. When it encounters '' it will close two more 'b' - tags, for a grand total of three tags closed instead of two. This - can throw off the rest of your document structure. The same is - true of a number of other tags, listed below. - - It's much more common for someone to forget to close a 'b' tag - than to actually use nested 'b' tags, and the BeautifulSoup class - handles the common case. This class handles the not-co-common - case: where you can't believe someone wrote what they did, but - it's valid HTML and BeautifulSoup screwed up by assuming it - wouldn't be.""" - - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ - ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', - 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', - 'big'] - - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript'] - - NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) - -class MinimalSoup(BeautifulSoup): - """The MinimalSoup class is for parsing HTML that contains - pathologically bad markup. It makes no assumptions about tag - nesting, but it does know which tags are self-closing, that - ', re.I|re.S), ''), - # For BeautifulSoup. - (re.compile('', re.I), '') - ] - - def preprocess_dom(self, dom): - # Remove "link this quote" links. - for qLink in self.xpath(dom, "//p[@class='linksoda']"): - qLink.drop_tree() - return dom - - def postprocess_data(self, data): - if 'quotes' not in data: - return {} - for idx, quote in enumerate(data['quotes']): - data['quotes'][idx] = quote.split('::') - return data - - -class DOMHTMLReleaseinfoParser(DOMParserBase): - """Parser for the "release dates" page of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - rdparser = DOMHTMLReleaseinfoParser() - result = rdparser.parse(releaseinfo_html_string) - """ - extractors = [Extractor(label='release dates', - path="//th[@class='xxxx']/../../tr", - attrs=Attribute(key='release dates', multi=True, - path={'country': ".//td[1]//text()", - 'date': ".//td[2]//text()", - 'notes': ".//td[3]//text()"})), - Extractor(label='akas', - path="//div[@class='_imdbpy_akas']/table/tr", - attrs=Attribute(key='akas', multi=True, - path={'title': "./td[1]/text()", - 'countries': "./td[2]/text()"}))] - - preprocessors = [ - (re.compile('(
    )', re.I | re.M | re.S), - r'
    \1
    ')] - - def postprocess_data(self, data): - if not ('release dates' in data or 'akas' in data): return data - releases = data.get('release dates') or [] - rl = [] - for i in releases: - country = i.get('country') - date = i.get('date') - if not (country and date): continue - country = country.strip() - date = date.strip() - if not (country and date): continue - notes = i['notes'] - info = u'%s::%s' % (country, date) - if notes: - info += notes - rl.append(info) - if releases: - del data['release dates'] - if rl: - data['release dates'] = rl - akas = data.get('akas') or [] - nakas = [] - for aka in akas: - title = aka.get('title', '').strip() - if not title: - continue - countries = aka.get('countries', '').split('/') - if not countries: - nakas.append(title) - else: - for country in countries: - nakas.append('%s::%s' % (title, country.strip())) - if akas: - del data['akas'] - if nakas: - data['akas from release info'] = nakas - return data - - -class DOMHTMLRatingsParser(DOMParserBase): - """Parser for the "user ratings" page of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - rparser = DOMHTMLRatingsParser() - result = rparser.parse(userratings_html_string) - """ - re_means = re.compile('mean\s*=\s*([0-9]\.[0-9])\.\s*median\s*=\s*([0-9])', - re.I) - extractors = [ - Extractor(label='number of votes', - path="//td[b='Percentage']/../../tr", - attrs=[Attribute(key='votes', - multi=True, - path={ - 'votes': "td[1]//text()", - 'ordinal': "td[3]//text()" - })]), - Extractor(label='mean and median', - path="//p[starts-with(text(), 'Arithmetic mean')]", - attrs=Attribute(key='mean and median', - path="text()")), - Extractor(label='rating', - path="//a[starts-with(@href, '/search/title?user_rating=')]", - attrs=Attribute(key='rating', - path="text()")), - Extractor(label='demographic voters', - path="//td[b='Average']/../../tr", - attrs=Attribute(key='demographic voters', - multi=True, - path={ - 'voters': "td[1]//text()", - 'votes': "td[2]//text()", - 'average': "td[3]//text()" - })), - Extractor(label='top 250', - path="//a[text()='top 250']", - attrs=Attribute(key='top 250', - path="./preceding-sibling::text()[1]")) - ] - - def postprocess_data(self, data): - nd = {} - votes = data.get('votes', []) - if votes: - nd['number of votes'] = {} - for i in xrange(1, 11): - _ordinal = int(votes[i]['ordinal']) - _strvts = votes[i]['votes'] or '0' - nd['number of votes'][_ordinal] = \ - int(_strvts.replace(',', '')) - mean = data.get('mean and median', '') - if mean: - means = self.re_means.findall(mean) - if means and len(means[0]) == 2: - am, med = means[0] - try: am = float(am) - except (ValueError, OverflowError): pass - if type(am) is type(1.0): - nd['arithmetic mean'] = am - try: med = int(med) - except (ValueError, OverflowError): pass - if type(med) is type(0): - nd['median'] = med - if 'rating' in data: - nd['rating'] = float(data['rating']) - dem_voters = data.get('demographic voters') - if dem_voters: - nd['demographic'] = {} - for i in xrange(1, len(dem_voters)): - if (dem_voters[i]['votes'] is not None) \ - and (dem_voters[i]['votes'].strip()): - nd['demographic'][dem_voters[i]['voters'].strip().lower()] \ - = (int(dem_voters[i]['votes'].replace(',', '')), - float(dem_voters[i]['average'])) - if 'imdb users' in nd.get('demographic', {}): - nd['votes'] = nd['demographic']['imdb users'][0] - nd['demographic']['all votes'] = nd['demographic']['imdb users'] - del nd['demographic']['imdb users'] - top250 = data.get('top 250') - if top250: - sd = top250[9:] - i = sd.find(' ') - if i != -1: - sd = sd[:i] - try: sd = int(sd) - except (ValueError, OverflowError): pass - if type(sd) is type(0): - nd['top 250 rank'] = sd - return nd - - -class DOMHTMLEpisodesRatings(DOMParserBase): - """Parser for the "episode ratings ... by date" page of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - erparser = DOMHTMLEpisodesRatings() - result = erparser.parse(eprating_html_string) - """ - _containsObjects = True - - extractors = [Extractor(label='title', path="//title", - attrs=Attribute(key='title', path="./text()")), - Extractor(label='ep ratings', - path="//th/../..//tr", - attrs=Attribute(key='episodes', multi=True, - path={'nr': ".//td[1]/text()", - 'ep title': ".//td[2]//text()", - 'movieID': ".//td[2]/a/@href", - 'rating': ".//td[3]/text()", - 'votes': ".//td[4]/text()"}))] - - def postprocess_data(self, data): - if 'title' not in data or 'episodes' not in data: return {} - nd = [] - title = data['title'] - for i in data['episodes']: - ept = i['ep title'] - movieID = analyze_imdbid(i['movieID']) - votes = i['votes'] - rating = i['rating'] - if not (ept and movieID and votes and rating): continue - try: - votes = int(votes.replace(',', '').replace('.', '')) - except: - pass - try: - rating = float(rating) - except: - pass - ept = ept.strip() - ept = u'%s {%s' % (title, ept) - nr = i['nr'] - if nr: - ept += u' (#%s)' % nr.strip() - ept += '}' - if movieID is not None: - movieID = str(movieID) - m = Movie(title=ept, movieID=movieID, accessSystem=self._as, - modFunct=self._modFunct) - epofdict = m.get('episode of') - if epofdict is not None: - m['episode of'] = Movie(data=epofdict, accessSystem=self._as, - modFunct=self._modFunct) - nd.append({'episode': m, 'votes': votes, 'rating': rating}) - return {'episodes rating': nd} - - -def _normalize_href(href): - if (href is not None) and (not href.lower().startswith('http://')): - if href.startswith('/'): href = href[1:] - href = '%s%s' % (imdbURL_base, href) - return href - - -class DOMHTMLOfficialsitesParser(DOMParserBase): - """Parser for the "official sites", "external reviews", "newsgroup - reviews", "miscellaneous links", "sound clips", "video clips" and - "photographs" pages of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - osparser = DOMHTMLOfficialsitesParser() - result = osparser.parse(officialsites_html_string) - """ - kind = 'official sites' - - extractors = [ - Extractor(label='site', - path="//ol/li/a", - attrs=Attribute(key='self.kind', - multi=True, - path={ - 'link': "./@href", - 'info': "./text()" - }, - postprocess=lambda x: (x.get('info').strip(), - urllib.unquote(_normalize_href(x.get('link')))))) - ] - - -class DOMHTMLConnectionParser(DOMParserBase): - """Parser for the "connections" page of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - connparser = DOMHTMLConnectionParser() - result = connparser.parse(connections_html_string) - """ - _containsObjects = True - - extractors = [Extractor(label='connection', - group="//div[@class='_imdbpy']", - group_key="./h5/text()", - group_key_normalize=lambda x: x.lower(), - path="./a", - attrs=Attribute(key=None, - path={'title': "./text()", - 'movieID': "./@href"}, - multi=True))] - - preprocessors = [ - ('
    ', '
    '), - # To get the movie's year. - (' (', ' ('), - ('\n
    ', ''), - ('
    - ', '::') - ] - - def postprocess_data(self, data): - for key in data.keys(): - nl = [] - for v in data[key]: - title = v['title'] - ts = title.split('::', 1) - title = ts[0].strip() - notes = u'' - if len(ts) == 2: - notes = ts[1].strip() - m = Movie(title=title, - movieID=analyze_imdbid(v['movieID']), - accessSystem=self._as, notes=notes, - modFunct=self._modFunct) - nl.append(m) - data[key] = nl - if not data: return {} - return {'connections': data} - - -class DOMHTMLLocationsParser(DOMParserBase): - """Parser for the "locations" page of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - lparser = DOMHTMLLocationsParser() - result = lparser.parse(locations_html_string) - """ - extractors = [Extractor(label='locations', path="//dt", - attrs=Attribute(key='locations', multi=True, - path={'place': ".//text()", - 'note': "./following-sibling::dd[1]" \ - "//text()"}, - postprocess=lambda x: (u'%s::%s' % ( - x['place'].strip(), - (x['note'] or u'').strip())).strip(':')))] - - -class DOMHTMLTechParser(DOMParserBase): - """Parser for the "technical", "business", "literature", - "publicity" (for people) and "contacts (for people) pages of - a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - tparser = HTMLTechParser() - result = tparser.parse(technical_html_string) - """ - kind = 'tech' - - extractors = [Extractor(label='tech', - group="//h5", - group_key="./text()", - group_key_normalize=lambda x: x.lower(), - path="./following-sibling::div[1]", - attrs=Attribute(key=None, - path=".//text()", - postprocess=lambda x: [t.strip() - for t in x.split('\n') if t.strip()]))] - - preprocessors = [ - (re.compile('(
    .*?
    )', re.I), r'\1
    '), - (re.compile('((
    |

    |
    ))\n?
    (?!'), - # the ones below are for the publicity parser - (re.compile('

    (.*?)

    ', re.I), r'\1
    '), - (re.compile('()', re.I), r'\1::'), - (re.compile('()', re.I), r'\n\1'), - # this is for splitting individual entries - (re.compile('
    ', re.I), r'\n'), - ] - - def postprocess_data(self, data): - for key in data: - data[key] = filter(None, data[key]) - if self.kind in ('literature', 'business', 'contacts') and data: - if 'screenplay/teleplay' in data: - data['screenplay-teleplay'] = data['screenplay/teleplay'] - del data['screenplay/teleplay'] - data = {self.kind: data} - else: - if self.kind == 'publicity': - if 'biography (print)' in data: - data['biography-print'] = data['biography (print)'] - del data['biography (print)'] - # Tech info. - for key in data.keys(): - if key.startswith('film negative format'): - data['film negative format'] = data[key] - del data[key] - elif key.startswith('film length'): - data['film length'] = data[key] - del data[key] - return data - - -class DOMHTMLDvdParser(DOMParserBase): - """Parser for the "dvd" page of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - dparser = DOMHTMLDvdParser() - result = dparser.parse(dvd_html_string) - """ - _defGetRefs = True - extractors = [Extractor(label='dvd', - path="//div[@class='base_layer']", - attrs=[Attribute(key=None, - multi=True, - path={ - 'title': "../table[1]//h3/text()", - 'cover': "../table[1]//img/@src", - 'region': ".//p[b='Region:']/text()", - 'asin': ".//p[b='ASIN:']/text()", - 'upc': ".//p[b='UPC:']/text()", - 'rating': ".//p/b[starts-with(text(), 'Rating:')]/../img/@alt", - 'certificate': ".//p[b='Certificate:']/text()", - 'runtime': ".//p[b='Runtime:']/text()", - 'label': ".//p[b='Label:']/text()", - 'studio': ".//p[b='Studio:']/text()", - 'release date': ".//p[b='Release Date:']/text()", - 'dvd format': ".//p[b='DVD Format:']/text()", - 'dvd features': ".//p[b='DVD Features: ']//text()", - 'supplements': "..//div[span='Supplements']" \ - "/following-sibling::div[1]//text()", - 'review': "..//div[span='Review']/following-sibling::div[1]//text()", - 'titles': "..//div[starts-with(text(), 'Titles in this Product')]" \ - "/..//text()", - }, - postprocess=lambda x: { - 'title': (x.get('title') or u'').strip(), - 'cover': (x.get('cover') or u'').strip(), - 'region': (x.get('region') or u'').strip(), - 'asin': (x.get('asin') or u'').strip(), - 'upc': (x.get('upc') or u'').strip(), - 'rating': (x.get('rating') or u'Not Rated').strip().replace('Rating: ', ''), - 'certificate': (x.get('certificate') or u'').strip(), - 'runtime': (x.get('runtime') or u'').strip(), - 'label': (x.get('label') or u'').strip(), - 'studio': (x.get('studio') or u'').strip(), - 'release date': (x.get('release date') or u'').strip(), - 'dvd format': (x.get('dvd format') or u'').strip(), - 'dvd features': (x.get('dvd features') or u'').strip().replace('DVD Features: ', ''), - 'supplements': (x.get('supplements') or u'').strip(), - 'review': (x.get('review') or u'').strip(), - 'titles in this product': (x.get('titles') or u'').strip().replace('Titles in this Product::', ''), - } - )])] - - preprocessors = [ - (re.compile('

    (\s*', re.I), - r'
    \1
    '), - (re.compile('

    (

    \s*

    (

    ', re.I), r'::') - ] - - def postprocess_data(self, data): - if not data: - return data - dvds = data['dvd'] - for dvd in dvds: - if dvd['cover'].find('noposter') != -1: - del dvd['cover'] - for key in dvd.keys(): - if not dvd[key]: - del dvd[key] - if 'supplements' in dvd: - dvd['supplements'] = dvd['supplements'].split('::') - return data - - -class DOMHTMLRecParser(DOMParserBase): - """Parser for the "recommendations" page of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - rparser = HTMLRecParser() - result = rparser.parse(recommendations_html_string) - """ - _containsObjects = True - - extractors = [Extractor(label='recommendations', - path="//td[@valign='middle'][1]", - attrs=Attribute(key='../../tr/td[1]//text()', - multi=True, - path={'title': ".//text()", - 'movieID': ".//a/@href"}))] - def postprocess_data(self, data): - for key in data.keys(): - n_key = key - n_keyl = n_key.lower() - if n_keyl == 'suggested by the database': - n_key = 'database' - elif n_keyl == 'imdb users recommend': - n_key = 'users' - data[n_key] = [Movie(title=x['title'], - movieID=analyze_imdbid(x['movieID']), - accessSystem=self._as, modFunct=self._modFunct) - for x in data[key]] - del data[key] - if data: return {'recommendations': data} - return data - - -class DOMHTMLNewsParser(DOMParserBase): - """Parser for the "news" page of a given movie or person. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - nwparser = DOMHTMLNewsParser() - result = nwparser.parse(news_html_string) - """ - _defGetRefs = True - - extractors = [ - Extractor(label='news', - path="//h2", - attrs=Attribute(key='news', - multi=True, - path={ - 'title': "./text()", - 'fromdate': "../following-sibling::p[1]/small//text()", - # FIXME: sometimes (see The Matrix (1999))

    is found - # inside news text. - 'body': "../following-sibling::p[2]//text()", - 'link': "../..//a[text()='Permalink']/@href", - 'fulllink': "../..//a[starts-with(text(), " \ - "'See full article at')]/@href" - }, - postprocess=lambda x: { - 'title': x.get('title').strip(), - 'date': x.get('fromdate').split('|')[0].strip(), - 'from': x.get('fromdate').split('|')[1].replace('From ', - '').strip(), - 'body': (x.get('body') or u'').strip(), - 'link': _normalize_href(x.get('link')), - 'full article link': _normalize_href(x.get('fulllink')) - })) - ] - - preprocessors = [ - (re.compile('(]+>

    )', re.I), r'
    \1'), - (re.compile('(
    )', re.I), r'
    \1'), - (re.compile('

    ', re.I), r'') - ] - - def postprocess_data(self, data): - if not data.has_key('news'): - return {} - for news in data['news']: - if news.has_key('full article link'): - if news['full article link'] is None: - del news['full article link'] - return data - - -def _parse_review(x): - result = {} - title = x.get('title').strip() - if title[-1] == ':': title = title[:-1] - result['title'] = title - result['link'] = _normalize_href(x.get('link')) - kind = x.get('kind').strip() - if kind[-1] == ':': kind = kind[:-1] - result['review kind'] = kind - text = x.get('review').replace('\n\n', '||').replace('\n', ' ').split('||') - review = '\n'.join(text) - if x.get('author') is not None: - author = x.get('author').strip() - review = review.split(author)[0].strip() - result['review author'] = author[2:] - if x.get('item') is not None: - item = x.get('item').strip() - review = review[len(item):].strip() - review = "%s: %s" % (item, review) - result['review'] = review - return result - - -class DOMHTMLAmazonReviewsParser(DOMParserBase): - """Parser for the "amazon reviews" page of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - arparser = DOMHTMLAmazonReviewsParser() - result = arparser.parse(amazonreviews_html_string) - """ - extractors = [ - Extractor(label='amazon reviews', - group="//h3", - group_key="./a/text()", - group_key_normalize=lambda x: x[:-1], - path="./following-sibling::p[1]/span[@class='_review']", - attrs=Attribute(key=None, - multi=True, - path={ - 'title': "../preceding-sibling::h3[1]/a[1]/text()", - 'link': "../preceding-sibling::h3[1]/a[1]/@href", - 'kind': "./preceding-sibling::b[1]/text()", - 'item': "./i/b/text()", - 'review': ".//text()", - 'author': "./i[starts-with(text(), '--')]/text()" - }, - postprocess=_parse_review)) - ] - - preprocessors = [ - (re.compile('

    \n(?!)', re.I), r'\n'), - (re.compile('(\n\n)', re.I), r'\1'), - (re.compile('(

    \n\n)', re.I), r'\1'), - (re.compile('(\s\n)()', re.I), r'\1\2') - ] - - def postprocess_data(self, data): - if len(data) == 0: - return {} - nd = [] - for item in data.keys(): - nd = nd + data[item] - return {'amazon reviews': nd} - - -def _parse_merchandising_link(x): - result = {} - link = x.get('link') - result['link'] = _normalize_href(link) - text = x.get('text') - if text is not None: - result['link-text'] = text.strip() - cover = x.get('cover') - if cover is not None: - result['cover'] = cover - description = x.get('description') - if description is not None: - shop = x.get('shop') - if shop is not None: - result['description'] = u'%s::%s' % (shop, description.strip()) - else: - result['description'] = description.strip() - return result - - -class DOMHTMLSalesParser(DOMParserBase): - """Parser for the "merchandising links" page of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - sparser = DOMHTMLSalesParser() - result = sparser.parse(sales_html_string) - """ - extractors = [ - Extractor(label='shops', - group="//h5/a[@name]/..", - group_key="./a[1]/text()", - group_key_normalize=lambda x: x.lower(), - path=".//following-sibling::table[1]/" \ - "/td[@class='w_rowtable_colshop']//tr[1]", - attrs=Attribute(key=None, - multi=True, - path={ - 'link': "./td[2]/a[1]/@href", - 'text': "./td[1]/img[1]/@alt", - 'cover': "./ancestor::td[1]/../td[1]"\ - "/a[1]/img[1]/@src", - }, - postprocess=_parse_merchandising_link)), - Extractor(label='others', - group="//span[@class='_info']/..", - group_key="./h5/a[1]/text()", - group_key_normalize=lambda x: x.lower(), - path="./span[@class='_info']", - attrs=Attribute(key=None, - multi=True, - path={ - 'link': "./preceding-sibling::a[1]/@href", - 'shop': "./preceding-sibling::a[1]/text()", - 'description': ".//text()", - }, - postprocess=_parse_merchandising_link)) - ] - - preprocessors = [ - (re.compile('(
    \1'), - (re.compile('(
    \n
    \n)

    ', re.I), r'\1'), - (re.compile('(

    \n)(\n)', re.I), r'\1
    \2'), - (re.compile('(\n)(Search.*?)()(\n)', re.I), r'\3\1\2\4'), - (re.compile('(\n)(Search.*?)(\n)', re.I), - r'\1\2\3') - ] - - def postprocess_data(self, data): - if len(data) == 0: - return {} - return {'merchandising links': data} - - -def _build_episode(x): - """Create a Movie object for a given series' episode.""" - episode_id = analyze_imdbid(x.get('link')) - episode_title = x.get('title') - e = Movie(movieID=episode_id, title=episode_title) - e['kind'] = u'episode' - oad = x.get('oad') - if oad: - e['original air date'] = oad.strip() - year = x.get('year') - if year is not None: - year = year[5:] - if year == 'unknown': year = u'????' - if year and year.isdigit(): - year = int(year) - e['year'] = year - else: - if oad and oad[-4:].isdigit(): - e['year'] = int(oad[-4:]) - epinfo = x.get('episode') - if epinfo is not None: - season, episode = epinfo.split(':')[0].split(',') - e['season'] = int(season[7:]) - e['episode'] = int(episode[8:]) - else: - e['season'] = 'unknown' - e['episode'] = 'unknown' - plot = x.get('plot') - if plot: - e['plot'] = plot.strip() - return e - - -class DOMHTMLEpisodesParser(DOMParserBase): - """Parser for the "episode list" page of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - eparser = DOMHTMLEpisodesParser() - result = eparser.parse(episodes_html_string) - """ - _containsObjects = True - - kind = 'episodes list' - _episodes_path = "..//h4" - _oad_path = "./following-sibling::span/strong[1]/text()" - - def _init(self): - self.extractors = [ - Extractor(label='series', - path="//html", - attrs=[Attribute(key='series title', - path=".//title/text()"), - Attribute(key='series movieID', - path=".//h1/a[@class='main']/@href", - postprocess=analyze_imdbid) - ]), - Extractor(label='episodes', - group="//div[@class='_imdbpy']/h3", - group_key="./a/@name", - path=self._episodes_path, - attrs=Attribute(key=None, - multi=True, - path={ - 'link': "./a/@href", - 'title': "./a/text()", - 'year': "./preceding-sibling::a[1]/@name", - 'episode': "./text()[1]", - 'oad': self._oad_path, - 'plot': "./following-sibling::text()[1]" - }, - postprocess=_build_episode))] - if self.kind == 'episodes cast': - self.extractors += [ - Extractor(label='cast', - group="//h4", - group_key="./text()[1]", - group_key_normalize=lambda x: x.strip(), - path="./following-sibling::table[1]//td[@class='nm']", - attrs=Attribute(key=None, - multi=True, - path={'person': "..//text()", - 'link': "./a/@href", - 'roleID': \ - "../td[4]/div[@class='_imdbpyrole']/@roleid"}, - postprocess=lambda x: \ - build_person(x.get('person') or u'', - personID=analyze_imdbid(x.get('link')), - roleID=(x.get('roleID') or u'').split('/'), - accessSystem=self._as, - modFunct=self._modFunct))) - ] - - preprocessors = [ - (re.compile('(
    \n)(

    )', re.I), - r'

    \1
    \2'), - (re.compile('(

    \n\n)
    ', re.I), r'\1'), - (re.compile('

    (.*?)

    ', re.I), r'

    \1

    '), - (_reRolesMovie, _manageRoles), - (re.compile('(

    \n)(
    )', re.I), r'\1\2') - ] - - def postprocess_data(self, data): - # A bit extreme? - if not 'series title' in data: return {} - if not 'series movieID' in data: return {} - stitle = data['series title'].replace('- Episode list', '') - stitle = stitle.replace('- Episodes list', '') - stitle = stitle.replace('- Episode cast', '') - stitle = stitle.replace('- Episodes cast', '') - stitle = stitle.strip() - if not stitle: return {} - seriesID = data['series movieID'] - if seriesID is None: return {} - series = Movie(title=stitle, movieID=str(seriesID), - accessSystem=self._as, modFunct=self._modFunct) - nd = {} - for key in data.keys(): - if key.startswith('season-'): - season_key = key[7:] - try: season_key = int(season_key) - except: pass - nd[season_key] = {} - ep_counter = 1 - for episode in data[key]: - if not episode: continue - episode_key = episode.get('episode') - if episode_key is None: continue - if not isinstance(episode_key, int): - episode_key = ep_counter - ep_counter += 1 - cast_key = 'Season %s, Episode %s:' % (season_key, - episode_key) - if data.has_key(cast_key): - cast = data[cast_key] - for i in xrange(len(cast)): - cast[i].billingPos = i + 1 - episode['cast'] = cast - episode['episode of'] = series - nd[season_key][episode_key] = episode - if len(nd) == 0: - return {} - return {'episodes': nd} - - -class DOMHTMLEpisodesCastParser(DOMHTMLEpisodesParser): - """Parser for the "episodes cast" page of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - eparser = DOMHTMLEpisodesParser() - result = eparser.parse(episodes_html_string) - """ - kind = 'episodes cast' - _episodes_path = "..//h4" - _oad_path = "./following-sibling::b[1]/text()" - - -class DOMHTMLFaqsParser(DOMParserBase): - """Parser for the "FAQ" page of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - fparser = DOMHTMLFaqsParser() - result = fparser.parse(faqs_html_string) - """ - _defGetRefs = True - - # XXX: bsoup and lxml don't match (looks like a minor issue, anyway). - - extractors = [ - Extractor(label='faqs', - path="//div[@class='section']", - attrs=Attribute(key='faqs', - multi=True, - path={ - 'question': "./h3/a/span/text()", - 'answer': "../following-sibling::div[1]//text()" - }, - postprocess=lambda x: u'%s::%s' % (x.get('question').strip(), - '\n\n'.join(x.get('answer').replace( - '\n\n', '\n').strip().split('||'))))) - ] - - preprocessors = [ - (re.compile('

    ', re.I), r'||'), - (re.compile('

    (.*?)

    \n', re.I), r'||\1--'), - (re.compile('(.*?)', re.I), - r'[spoiler]\1[/spoiler]') - ] - - -class DOMHTMLAiringParser(DOMParserBase): - """Parser for the "airing" page of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - aparser = DOMHTMLAiringParser() - result = aparser.parse(airing_html_string) - """ - _containsObjects = True - - extractors = [ - Extractor(label='series title', - path="//title", - attrs=Attribute(key='series title', path="./text()", - postprocess=lambda x: \ - x.replace(' - TV schedule', u''))), - Extractor(label='series id', - path="//h1/a[@href]", - attrs=Attribute(key='series id', path="./@href")), - - Extractor(label='tv airings', - path="//tr[@class]", - attrs=Attribute(key='airing', - multi=True, - path={ - 'date': "./td[1]//text()", - 'time': "./td[2]//text()", - 'channel': "./td[3]//text()", - 'link': "./td[4]/a[1]/@href", - 'title': "./td[4]//text()", - 'season': "./td[5]//text()", - }, - postprocess=lambda x: { - 'date': x.get('date'), - 'time': x.get('time'), - 'channel': x.get('channel').strip(), - 'link': x.get('link'), - 'title': x.get('title'), - 'season': (x.get('season') or '').strip() - } - )) - ] - - def postprocess_data(self, data): - if len(data) == 0: - return {} - seriesTitle = data['series title'] - seriesID = analyze_imdbid(data['series id']) - if data.has_key('airing'): - for airing in data['airing']: - title = airing.get('title', '').strip() - if not title: - epsTitle = seriesTitle - if seriesID is None: - continue - epsID = seriesID - else: - epsTitle = '%s {%s}' % (data['series title'], - airing['title']) - epsID = analyze_imdbid(airing['link']) - e = Movie(title=epsTitle, movieID=epsID) - airing['episode'] = e - del airing['link'] - del airing['title'] - if not airing['season']: - del airing['season'] - if 'series title' in data: - del data['series title'] - if 'series id' in data: - del data['series id'] - if 'airing' in data: - data['airing'] = filter(None, data['airing']) - if 'airing' not in data or not data['airing']: - return {} - return data - - -class DOMHTMLSynopsisParser(DOMParserBase): - """Parser for the "synopsis" page of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - sparser = HTMLSynopsisParser() - result = sparser.parse(synopsis_html_string) - """ - extractors = [ - Extractor(label='synopsis', - path="//div[@class='display'][not(@style)]", - attrs=Attribute(key='synopsis', - path=".//text()", - postprocess=lambda x: '\n\n'.join(x.strip().split('||')))) - ] - - preprocessors = [ - (re.compile('

    ', re.I), r'||') - ] - - -class DOMHTMLParentsGuideParser(DOMParserBase): - """Parser for the "parents guide" page of a given movie. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - pgparser = HTMLParentsGuideParser() - result = pgparser.parse(parentsguide_html_string) - """ - extractors = [ - Extractor(label='parents guide', - group="//div[@class='section']", - group_key="./h3/a/span/text()", - group_key_normalize=lambda x: x.lower(), - path="../following-sibling::div[1]/p", - attrs=Attribute(key=None, - path=".//text()", - postprocess=lambda x: [t.strip().replace('\n', ' ') - for t in x.split('||') if t.strip()])) - ] - - preprocessors = [ - (re.compile('

    ', re.I), r'||') - ] - - def postprocess_data(self, data): - data2 = {} - for key in data: - if data[key]: - data2[key] = data[key] - if not data2: - return {} - return {'parents guide': data2} - - -_OBJECTS = { - 'movie_parser': ((DOMHTMLMovieParser,), None), - 'plot_parser': ((DOMHTMLPlotParser,), None), - 'movie_awards_parser': ((DOMHTMLAwardsParser,), None), - 'taglines_parser': ((DOMHTMLTaglinesParser,), None), - 'keywords_parser': ((DOMHTMLKeywordsParser,), None), - 'crazycredits_parser': ((DOMHTMLCrazyCreditsParser,), None), - 'goofs_parser': ((DOMHTMLGoofsParser,), None), - 'alternateversions_parser': ((DOMHTMLAlternateVersionsParser,), None), - 'trivia_parser': ((DOMHTMLTriviaParser,), None), - 'soundtrack_parser': ((DOMHTMLSoundtrackParser,), {'kind': 'soundtrack'}), - 'quotes_parser': ((DOMHTMLQuotesParser,), None), - 'releasedates_parser': ((DOMHTMLReleaseinfoParser,), None), - 'ratings_parser': ((DOMHTMLRatingsParser,), None), - 'officialsites_parser': ((DOMHTMLOfficialsitesParser,), None), - 'externalrev_parser': ((DOMHTMLOfficialsitesParser,), - {'kind': 'external reviews'}), - 'newsgrouprev_parser': ((DOMHTMLOfficialsitesParser,), - {'kind': 'newsgroup reviews'}), - 'misclinks_parser': ((DOMHTMLOfficialsitesParser,), - {'kind': 'misc links'}), - 'soundclips_parser': ((DOMHTMLOfficialsitesParser,), - {'kind': 'sound clips'}), - 'videoclips_parser': ((DOMHTMLOfficialsitesParser,), - {'kind': 'video clips'}), - 'photosites_parser': ((DOMHTMLOfficialsitesParser,), - {'kind': 'photo sites'}), - 'connections_parser': ((DOMHTMLConnectionParser,), None), - 'tech_parser': ((DOMHTMLTechParser,), None), - 'business_parser': ((DOMHTMLTechParser,), - {'kind': 'business', '_defGetRefs': 1}), - 'literature_parser': ((DOMHTMLTechParser,), {'kind': 'literature'}), - 'locations_parser': ((DOMHTMLLocationsParser,), None), - 'dvd_parser': ((DOMHTMLDvdParser,), None), - 'rec_parser': ((DOMHTMLRecParser,), None), - 'news_parser': ((DOMHTMLNewsParser,), None), - 'amazonrev_parser': ((DOMHTMLAmazonReviewsParser,), None), - 'sales_parser': ((DOMHTMLSalesParser,), None), - 'episodes_parser': ((DOMHTMLEpisodesParser,), None), - 'episodes_cast_parser': ((DOMHTMLEpisodesCastParser,), None), - 'eprating_parser': ((DOMHTMLEpisodesRatings,), None), - 'movie_faqs_parser': ((DOMHTMLFaqsParser,), None), - 'airing_parser': ((DOMHTMLAiringParser,), None), - 'synopsis_parser': ((DOMHTMLSynopsisParser,), None), - 'parentsguide_parser': ((DOMHTMLParentsGuideParser,), None) -} - diff --git a/libs/imdb/parser/http/personParser.py b/libs/imdb/parser/http/personParser.py deleted file mode 100644 index a5fbc081..00000000 --- a/libs/imdb/parser/http/personParser.py +++ /dev/null @@ -1,559 +0,0 @@ -""" -parser.http.personParser module (imdb package). - -This module provides the classes (and the instances), used to parse -the IMDb pages on the akas.imdb.com server about a person. -E.g., for "Mel Gibson" the referred pages would be: - categorized: http://akas.imdb.com/name/nm0000154/maindetails - biography: http://akas.imdb.com/name/nm0000154/bio - ...and so on... - -Copyright 2004-2010 Davide Alberani - 2008 H. Turgut Uyar - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -import re -from imdb.Movie import Movie -from imdb.utils import analyze_name, canonicalName, normalizeName, \ - analyze_title, date_and_notes -from utils import build_movie, DOMParserBase, Attribute, Extractor, \ - analyze_imdbid - - -from movieParser import _manageRoles -_reRoles = re.compile(r'(
  • .*? \.\.\.\. )(.*?)(
  • |
    )', - re.I | re.M | re.S) - -def build_date(date): - day = date.get('day') - year = date.get('year') - if day and year: - return "%s %s" % (day, year) - if day: - return day - if year: - return year - return "" - -class DOMHTMLMaindetailsParser(DOMParserBase): - """Parser for the "categorized" (maindetails) page of a given person. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - cparser = DOMHTMLMaindetailsParser() - result = cparser.parse(categorized_html_string) - """ - _containsObjects = True - - _birth_attrs = [Attribute(key='birth date', - path={ - 'day': ".//a[starts-with(@href, " \ - "'/date/')]/text()", - 'year': ".//a[starts-with(@href, " \ - "'/search/name?birth_year=')]/text()" - }, - postprocess=build_date), - Attribute(key='birth place', - path=".//a[starts-with(@href, " \ - "'/search/name?birth_place=')]/text()")] - _death_attrs = [Attribute(key='death date', - path={ - 'day': ".//a[starts-with(@href, " \ - "'/date/')]/text()", - 'year': ".//a[starts-with(@href, " \ - "'/search/name?death_year=')]/text()" - }, - postprocess=build_date), - Attribute(key='death place', - path=".//a[starts-with(@href, " \ - "'/search/name?death_place=')]/text()")] - _film_attrs = [Attribute(key=None, - multi=True, - path={ - 'link': "./b/a[1]/@href", - 'title': "./b/a[1]/text()", - 'notes': "./b/following-sibling::text()", - 'year': "./span[@class='year_column']/text()", - 'status': "./a[@class='in_production']/text()", - 'rolesNoChar': './/br/following-sibling::text()', - 'chrRoles': "./a[@imdbpyname]/@imdbpyname", - 'roleID': "./a[starts-with(@href, '/character/')]/@href" - }, - postprocess=lambda x: - build_movie(x.get('title') or u'', - year=x.get('year'), - movieID=analyze_imdbid(x.get('link') or u''), - rolesNoChar=(x.get('rolesNoChar') or u'').strip(), - chrRoles=(x.get('chrRoles') or u'').strip(), - additionalNotes=x.get('notes'), - roleID=(x.get('roleID') or u''), - status=x.get('status') or None))] - - extractors = [ - Extractor(label='name', - path="//h1[@class='header']", - attrs=Attribute(key='name', - path=".//text()", - postprocess=lambda x: analyze_name(x, - canonical=1))), - - Extractor(label='birth info', - path="//div[h4='Born:']", - attrs=_birth_attrs), - - Extractor(label='death info', - path="//div[h4='Died:']", - attrs=_death_attrs), - - Extractor(label='headshot', - path="//td[@id='img_primary']/a", - attrs=Attribute(key='headshot', - path="./img/@src")), - - Extractor(label='akas', - path="//div[h4='Alternate Names:']", - attrs=Attribute(key='akas', - path="./text()", - postprocess=lambda x: x.strip().split(' '))), - - Extractor(label='filmography', - group="//div[starts-with(@id, 'filmo-head-')]", - group_key="./a[@name]/text()", - group_key_normalize=lambda x: x.lower().replace(': ', ' '), - path="./following-sibling::div[1]" \ - "/div[starts-with(@class, 'filmo-row')]", - attrs=_film_attrs), - - Extractor(label='indevelopment', - path="//div[starts-with(@class,'devitem')]", - attrs=Attribute(key='in development', - multi=True, - path={ - 'link': './a/@href', - 'title': './a/text()' - }, - postprocess=lambda x: - build_movie(x.get('title') or u'', - movieID=analyze_imdbid(x.get('link') or u''), - roleID=(x.get('roleID') or u'').split('/'), - status=x.get('status') or None))) - ] - - preprocessors = [('
    ', ''), - ('
    ', '
    '), - (re.compile(r'((.*?)'), - r'\1 imdbpyname="\2@@">\2')] - - def postprocess_data(self, data): - for what in 'birth date', 'death date': - if what in data and not data[what]: - del data[what] - # XXX: the code below is for backwards compatibility - # probably could be removed - for key in data.keys(): - if key.startswith('actor '): - if not data.has_key('actor'): - data['actor'] = [] - data['actor'].extend(data[key]) - del data[key] - if key.startswith('actress '): - if not data.has_key('actress'): - data['actress'] = [] - data['actress'].extend(data[key]) - del data[key] - if key.startswith('self '): - if not data.has_key('self'): - data['self'] = [] - data['self'].extend(data[key]) - del data[key] - if key == 'birth place': - data['birth notes'] = data[key] - del data[key] - if key == 'death place': - data['death notes'] = data[key] - del data[key] - return data - - -class DOMHTMLBioParser(DOMParserBase): - """Parser for the "biography" page of a given person. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - bioparser = DOMHTMLBioParser() - result = bioparser.parse(biography_html_string) - """ - _defGetRefs = True - - _birth_attrs = [Attribute(key='birth date', - path={ - 'day': "./a[starts-with(@href, " \ - "'/date/')]/text()", - 'year': "./a[starts-with(@href, " \ - "'/search/name?birth_year=')]/text()" - }, - postprocess=build_date), - Attribute(key='birth notes', - path="./a[starts-with(@href, " \ - "'/search/name?birth_place=')]/text()")] - _death_attrs = [Attribute(key='death date', - path={ - 'day': "./a[starts-with(@href, " \ - "'/date/')]/text()", - 'year': "./a[starts-with(@href, " \ - "'/search/name?death_date=')]/text()" - }, - postprocess=build_date), - Attribute(key='death notes', - path="./text()", - # TODO: check if this slicing is always correct - postprocess=lambda x: u''.join(x).strip()[2:])] - extractors = [ - Extractor(label='headshot', - path="//a[@name='headshot']", - attrs=Attribute(key='headshot', - path="./img/@src")), - Extractor(label='birth info', - path="//div[h5='Date of Birth']", - attrs=_birth_attrs), - Extractor(label='death info', - path="//div[h5='Date of Death']", - attrs=_death_attrs), - Extractor(label='nick names', - path="//div[h5='Nickname']", - attrs=Attribute(key='nick names', - path="./text()", - joiner='|', - postprocess=lambda x: [n.strip().replace(' (', - '::(', 1) for n in x.split('|') - if n.strip()])), - Extractor(label='birth name', - path="//div[h5='Birth Name']", - attrs=Attribute(key='birth name', - path="./text()", - postprocess=lambda x: canonicalName(x.strip()))), - Extractor(label='height', - path="//div[h5='Height']", - attrs=Attribute(key='height', - path="./text()", - postprocess=lambda x: x.strip())), - Extractor(label='mini biography', - path="//div[h5='Mini Biography']", - attrs=Attribute(key='mini biography', - multi=True, - path={ - 'bio': "./p//text()", - 'by': "./b/following-sibling::a/text()" - }, - postprocess=lambda x: "%s::%s" % \ - (x.get('bio').strip(), - (x.get('by') or u'').strip() or u'Anonymous'))), - Extractor(label='spouse', - path="//div[h5='Spouse']/table/tr", - attrs=Attribute(key='spouse', - multi=True, - path={ - 'name': "./td[1]//text()", - 'info': "./td[2]//text()" - }, - postprocess=lambda x: ("%s::%s" % \ - (x.get('name').strip(), - (x.get('info') or u'').strip())).strip(':'))), - Extractor(label='trade mark', - path="//div[h5='Trade Mark']/p", - attrs=Attribute(key='trade mark', - multi=True, - path=".//text()", - postprocess=lambda x: x.strip())), - Extractor(label='trivia', - path="//div[h5='Trivia']/p", - attrs=Attribute(key='trivia', - multi=True, - path=".//text()", - postprocess=lambda x: x.strip())), - Extractor(label='quotes', - path="//div[h5='Personal Quotes']/p", - attrs=Attribute(key='quotes', - multi=True, - path=".//text()", - postprocess=lambda x: x.strip())), - Extractor(label='salary', - path="//div[h5='Salary']/table/tr", - attrs=Attribute(key='salary history', - multi=True, - path={ - 'title': "./td[1]//text()", - 'info': "./td[2]/text()", - }, - postprocess=lambda x: "%s::%s" % \ - (x.get('title').strip(), - x.get('info').strip()))), - Extractor(label='where now', - path="//div[h5='Where Are They Now']/p", - attrs=Attribute(key='where now', - multi=True, - path=".//text()", - postprocess=lambda x: x.strip())), - ] - - preprocessors = [ - (re.compile('(
    )', re.I), r'
    \1'), - (re.compile('(
    \n\s+)', re.I + re.DOTALL), r'\1'), - (re.compile('(

    )'), r'
    \1'), - (re.compile('\.

    ([^\s])', re.I), r'. \1') - ] - - def postprocess_data(self, data): - for what in 'birth date', 'death date': - if what in data and not data[what]: - del data[what] - return data - - -class DOMHTMLOtherWorksParser(DOMParserBase): - """Parser for the "other works" and "agent" pages of a given person. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - owparser = DOMHTMLOtherWorksParser() - result = owparser.parse(otherworks_html_string) - """ - _defGetRefs = True - kind = 'other works' - - # XXX: looks like the 'agent' page is no more public. - extractors = [ - Extractor(label='other works', - path="//h5[text()='Other works']/" \ - "following-sibling::div[1]", - attrs=Attribute(key='self.kind', - path=".//text()", - postprocess=lambda x: x.strip().split('\n\n'))) - ] - - preprocessors = [ - (re.compile('(
    [^<]+
    )', re.I), - r'\1
    '), - (re.compile('(\n
    \s+)', re.I), r'\1'), - (re.compile('(
    )'), r'
    \1'), - (re.compile('

    ', re.I), r'\n\n') - ] - - -def _build_episode(link, title, minfo, role, roleA, roleAID): - """Build an Movie object for a given episode of a series.""" - episode_id = analyze_imdbid(link) - notes = u'' - minidx = minfo.find(' -') - # Sometimes, for some unknown reason, the role is left in minfo. - if minidx != -1: - slfRole = minfo[minidx+3:].lstrip() - minfo = minfo[:minidx].rstrip() - if slfRole.endswith(')'): - commidx = slfRole.rfind('(') - if commidx != -1: - notes = slfRole[commidx:] - slfRole = slfRole[:commidx] - if slfRole and role is None and roleA is None: - role = slfRole - eps_data = analyze_title(title) - eps_data['kind'] = u'episode' - # FIXME: it's wrong for multiple characters (very rare on tv series?). - if role is None: - role = roleA # At worse, it's None. - if role is None: - roleAID = None - if roleAID is not None: - roleAID = analyze_imdbid(roleAID) - e = Movie(movieID=episode_id, data=eps_data, currentRole=role, - roleID=roleAID, notes=notes) - # XXX: are we missing some notes? - # XXX: does it parse things as "Episode dated 12 May 2005 (12 May 2005)"? - if minfo.startswith('('): - pe = minfo.find(')') - if pe != -1: - date = minfo[1:pe] - if date != '????': - e['original air date'] = date - if eps_data.get('year', '????') == '????': - syear = date.split()[-1] - if syear.isdigit(): - e['year'] = int(syear) - return e - - -class DOMHTMLSeriesParser(DOMParserBase): - """Parser for the "by TV series" page of a given person. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - sparser = DOMHTMLSeriesParser() - result = sparser.parse(filmoseries_html_string) - """ - _containsObjects = True - - extractors = [ - Extractor(label='series', - group="//div[@class='filmo']/span[1]", - group_key="./a[1]", - path="./following-sibling::ol[1]/li/a[1]", - attrs=Attribute(key=None, - multi=True, - path={ - 'link': "./@href", - 'title': "./text()", - 'info': "./following-sibling::text()", - 'role': "./following-sibling::i[1]/text()", - 'roleA': "./following-sibling::a[1]/text()", - 'roleAID': "./following-sibling::a[1]/@href" - }, - postprocess=lambda x: _build_episode(x.get('link'), - x.get('title'), - (x.get('info') or u'').strip(), - x.get('role'), - x.get('roleA'), - x.get('roleAID')))) - ] - - def postprocess_data(self, data): - if len(data) == 0: - return {} - nd = {} - for key in data.keys(): - dom = self.get_dom(key) - link = self.xpath(dom, "//a/@href")[0] - title = self.xpath(dom, "//a/text()")[0][1:-1] - series = Movie(movieID=analyze_imdbid(link), - data=analyze_title(title), - accessSystem=self._as, modFunct=self._modFunct) - nd[series] = [] - for episode in data[key]: - # XXX: should we create a copy of 'series', to avoid - # circular references? - episode['episode of'] = series - nd[series].append(episode) - return {'episodes': nd} - - -class DOMHTMLPersonGenresParser(DOMParserBase): - """Parser for the "by genre" and "by keywords" pages of a given person. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - gparser = DOMHTMLPersonGenresParser() - result = gparser.parse(bygenre_html_string) - """ - kind = 'genres' - _containsObjects = True - - extractors = [ - Extractor(label='genres', - group="//b/a[@name]/following-sibling::a[1]", - group_key="./text()", - group_key_normalize=lambda x: x.lower(), - path="../../following-sibling::ol[1]/li//a[1]", - attrs=Attribute(key=None, - multi=True, - path={ - 'link': "./@href", - 'title': "./text()", - 'info': "./following-sibling::text()" - }, - postprocess=lambda x: \ - build_movie(x.get('title') + \ - x.get('info').split('[')[0], - analyze_imdbid(x.get('link'))))) - ] - - def postprocess_data(self, data): - if len(data) == 0: - return {} - return {self.kind: data} - - -from movieParser import _parse_merchandising_link - -class DOMHTMLPersonSalesParser(DOMParserBase): - """Parser for the "merchandising links" page of a given person. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - sparser = DOMHTMLPersonSalesParser() - result = sparser.parse(sales_html_string) - """ - extractors = [ - Extractor(label='merchandising links', - group="//span[@class='merch_title']", - group_key=".//text()", - path="./following-sibling::table[1]/" \ - "/td[@class='w_rowtable_colshop']//tr[1]", - attrs=Attribute(key=None, - multi=True, - path={ - 'link': "./td[2]/a[1]/@href", - 'text': "./td[1]/img[1]/@alt", - 'cover': "./ancestor::td[1]/../" \ - "td[1]/a[1]/img[1]/@src", - }, - postprocess=_parse_merchandising_link)), - ] - - preprocessors = [ - (re.compile('(', re.I), r'\1>') - ] - - def postprocess_data(self, data): - if len(data) == 0: - return {} - return {'merchandising links': data} - - -from movieParser import DOMHTMLTechParser -from movieParser import DOMHTMLOfficialsitesParser -from movieParser import DOMHTMLAwardsParser -from movieParser import DOMHTMLNewsParser - - -_OBJECTS = { - 'maindetails_parser': ((DOMHTMLMaindetailsParser,), None), - 'bio_parser': ((DOMHTMLBioParser,), None), - 'otherworks_parser': ((DOMHTMLOtherWorksParser,), None), - #'agent_parser': ((DOMHTMLOtherWorksParser,), {'kind': 'agent'}), - 'person_officialsites_parser': ((DOMHTMLOfficialsitesParser,), None), - 'person_awards_parser': ((DOMHTMLAwardsParser,), {'subject': 'name'}), - 'publicity_parser': ((DOMHTMLTechParser,), {'kind': 'publicity'}), - 'person_series_parser': ((DOMHTMLSeriesParser,), None), - 'person_contacts_parser': ((DOMHTMLTechParser,), {'kind': 'contacts'}), - 'person_genres_parser': ((DOMHTMLPersonGenresParser,), None), - 'person_keywords_parser': ((DOMHTMLPersonGenresParser,), - {'kind': 'keywords'}), - 'news_parser': ((DOMHTMLNewsParser,), None), - 'sales_parser': ((DOMHTMLPersonSalesParser,), None) -} - diff --git a/libs/imdb/parser/http/searchCharacterParser.py b/libs/imdb/parser/http/searchCharacterParser.py deleted file mode 100644 index c81ca7e4..00000000 --- a/libs/imdb/parser/http/searchCharacterParser.py +++ /dev/null @@ -1,69 +0,0 @@ -""" -parser.http.searchCharacterParser module (imdb package). - -This module provides the HTMLSearchCharacterParser class (and the -search_character_parser instance), used to parse the results of a search -for a given character. -E.g., when searching for the name "Jesse James", the parsed page would be: - http://akas.imdb.com/find?s=Characters;mx=20;q=Jesse+James - -Copyright 2007-2009 Davide Alberani - 2008 H. Turgut Uyar - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -from imdb.utils import analyze_name, build_name -from utils import Extractor, Attribute, analyze_imdbid - -from searchMovieParser import DOMHTMLSearchMovieParser, DOMBasicMovieParser - - -class DOMBasicCharacterParser(DOMBasicMovieParser): - """Simply get the name of a character and the imdbID. - - It's used by the DOMHTMLSearchCharacterParser class to return a result - for a direct match (when a search on IMDb results in a single - character, the web server sends directly the movie page.""" - _titleFunct = lambda self, x: analyze_name(x or u'', canonical=False) - - -class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser): - _BaseParser = DOMBasicCharacterParser - _notDirectHitTitle = 'imdb search' - _titleBuilder = lambda self, x: build_name(x, canonical=False) - _linkPrefix = '/character/ch' - - _attrs = [Attribute(key='data', - multi=True, - path={ - 'link': "./a[1]/@href", - 'name': "./a[1]/text()" - }, - postprocess=lambda x: ( - analyze_imdbid(x.get('link') or u''), - {'name': x.get('name')} - ))] - extractors = [Extractor(label='search', - path="//td[3]/a[starts-with(@href, " \ - "'/character/ch')]/..", - attrs=_attrs)] - - -_OBJECTS = { - 'search_character_parser': ((DOMHTMLSearchCharacterParser,), - {'kind': 'character', '_basic_parser': DOMBasicCharacterParser}) -} - diff --git a/libs/imdb/parser/http/searchCompanyParser.py b/libs/imdb/parser/http/searchCompanyParser.py deleted file mode 100644 index ab666fbc..00000000 --- a/libs/imdb/parser/http/searchCompanyParser.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -parser.http.searchCompanyParser module (imdb package). - -This module provides the HTMLSearchCompanyParser class (and the -search_company_parser instance), used to parse the results of a search -for a given company. -E.g., when searching for the name "Columbia Pictures", the parsed page would be: - http://akas.imdb.com/find?s=co;mx=20;q=Columbia+Pictures - -Copyright 2008-2009 Davide Alberani <da@erlug.linux.it> - 2008 H. Turgut Uyar <uyar@tekir.org> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -from imdb.utils import analyze_company_name, build_company_name -from utils import Extractor, Attribute, analyze_imdbid - -from searchMovieParser import DOMHTMLSearchMovieParser, DOMBasicMovieParser - -class DOMBasicCompanyParser(DOMBasicMovieParser): - """Simply get the name of a company and the imdbID. - - It's used by the DOMHTMLSearchCompanyParser class to return a result - for a direct match (when a search on IMDb results in a single - company, the web server sends directly the company page. - """ - _titleFunct = lambda self, x: analyze_company_name(x or u'') - - -class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser): - _BaseParser = DOMBasicCompanyParser - _notDirectHitTitle = '<title>imdb company' - _titleBuilder = lambda self, x: build_company_name(x) - _linkPrefix = '/company/co' - - _attrs = [Attribute(key='data', - multi=True, - path={ - 'link': "./a[1]/@href", - 'name': "./a[1]/text()", - 'notes': "./text()[1]" - }, - postprocess=lambda x: ( - analyze_imdbid(x.get('link')), - analyze_company_name(x.get('name')+(x.get('notes') - or u''), stripNotes=True) - ))] - extractors = [Extractor(label='search', - path="//td[3]/a[starts-with(@href, " \ - "'/company/co')]/..", - attrs=_attrs)] - - -_OBJECTS = { - 'search_company_parser': ((DOMHTMLSearchCompanyParser,), - {'kind': 'company', '_basic_parser': DOMBasicCompanyParser}) -} - diff --git a/libs/imdb/parser/http/searchKeywordParser.py b/libs/imdb/parser/http/searchKeywordParser.py deleted file mode 100644 index ed72906c..00000000 --- a/libs/imdb/parser/http/searchKeywordParser.py +++ /dev/null @@ -1,111 +0,0 @@ -""" -parser.http.searchKeywordParser module (imdb package). - -This module provides the HTMLSearchKeywordParser class (and the -search_company_parser instance), used to parse the results of a search -for a given keyword. -E.g., when searching for the keyword "alabama", the parsed page would be: - http://akas.imdb.com/find?s=kw;mx=20;q=alabama - -Copyright 2009 Davide Alberani <da@erlug.linux.it> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -from utils import Extractor, Attribute, analyze_imdbid -from imdb.utils import analyze_title, analyze_company_name - -from searchMovieParser import DOMHTMLSearchMovieParser, DOMBasicMovieParser - -class DOMBasicKeywordParser(DOMBasicMovieParser): - """Simply get the name of a keyword. - - It's used by the DOMHTMLSearchKeywordParser class to return a result - for a direct match (when a search on IMDb results in a single - keyword, the web server sends directly the keyword page. - """ - # XXX: it's still to be tested! - # I'm not even sure there can be a direct hit, searching for keywords. - _titleFunct = lambda self, x: analyze_company_name(x or u'') - - -class DOMHTMLSearchKeywordParser(DOMHTMLSearchMovieParser): - """Parse the html page that the IMDb web server shows when the - "new search system" is used, searching for keywords similar to - the one given.""" - - _BaseParser = DOMBasicKeywordParser - _notDirectHitTitle = '<title>imdb keyword' - _titleBuilder = lambda self, x: x - _linkPrefix = '/keyword/' - - _attrs = [Attribute(key='data', - multi=True, - path="./a[1]/text()" - )] - extractors = [Extractor(label='search', - path="//td[3]/a[starts-with(@href, " \ - "'/keyword/')]/..", - attrs=_attrs)] - - -def custom_analyze_title4kwd(title, yearNote, outline): - """Return a dictionary with the needed info.""" - title = title.strip() - if not title: - return {} - if yearNote: - yearNote = '%s)' % yearNote.split(' ')[0] - title = title + ' ' + yearNote - retDict = analyze_title(title) - if outline: - retDict['plot outline'] = outline - return retDict - - -class DOMHTMLSearchMovieKeywordParser(DOMHTMLSearchMovieParser): - """Parse the html page that the IMDb web server shows when the - "new search system" is used, searching for movies with the given - keyword.""" - - _notDirectHitTitle = '<title>best' - - _attrs = [Attribute(key='data', - multi=True, - path={ - 'link': "./a[1]/@href", - 'info': "./a[1]//text()", - 'ynote': "./span[@class='desc']/text()", - 'outline': "./span[@class='outline']//text()" - }, - postprocess=lambda x: ( - analyze_imdbid(x.get('link') or u''), - custom_analyze_title4kwd(x.get('info') or u'', - x.get('ynote') or u'', - x.get('outline') or u'') - ))] - - extractors = [Extractor(label='search', - path="//td[3]/a[starts-with(@href, " \ - "'/title/tt')]/..", - attrs=_attrs)] - - -_OBJECTS = { - 'search_keyword_parser': ((DOMHTMLSearchKeywordParser,), - {'kind': 'keyword', '_basic_parser': DOMBasicKeywordParser}), - 'search_moviekeyword_parser': ((DOMHTMLSearchMovieKeywordParser,), None) -} - diff --git a/libs/imdb/parser/http/searchMovieParser.py b/libs/imdb/parser/http/searchMovieParser.py deleted file mode 100644 index 2e7ace9a..00000000 --- a/libs/imdb/parser/http/searchMovieParser.py +++ /dev/null @@ -1,178 +0,0 @@ -""" -parser.http.searchMovieParser module (imdb package). - -This module provides the HTMLSearchMovieParser class (and the -search_movie_parser instance), used to parse the results of a search -for a given title. -E.g., for when searching for the title "the passion", the parsed -page would be: - http://akas.imdb.com/find?q=the+passion&tt=on&mx=20 - -Copyright 2004-2010 Davide Alberani <da@erlug.linux.it> - 2008 H. Turgut Uyar <uyar@tekir.org> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -import re -from imdb.utils import analyze_title, build_title -from utils import DOMParserBase, Attribute, Extractor, analyze_imdbid - - -class DOMBasicMovieParser(DOMParserBase): - """Simply get the title of a movie and the imdbID. - - It's used by the DOMHTMLSearchMovieParser class to return a result - for a direct match (when a search on IMDb results in a single - movie, the web server sends directly the movie page.""" - # Stay generic enough to be used also for other DOMBasic*Parser classes. - _titleAttrPath = ".//text()" - _linkPath = "//link[@rel='canonical']" - _titleFunct = lambda self, x: analyze_title(x or u'') - - def _init(self): - self.preprocessors += [('<span class="tv-extra">TV mini-series</span>', - '<span class="tv-extra">(mini)</span>')] - self.extractors = [Extractor(label='title', - path="//h1", - attrs=Attribute(key='title', - path=self._titleAttrPath, - postprocess=self._titleFunct)), - Extractor(label='link', - path=self._linkPath, - attrs=Attribute(key='link', path="./@href", - postprocess=lambda x: \ - analyze_imdbid((x or u'').replace( - 'http://pro.imdb.com', '')) - ))] - - # Remove 'More at IMDb Pro' links. - preprocessors = [(re.compile(r'<span class="pro-link".*?</span>'), ''), - (re.compile(r'<a href="http://ad.doubleclick.net.*?;id=(co[0-9]{7});'), r'<a href="http://pro.imdb.com/company/\1"></a>< a href="')] - - def postprocess_data(self, data): - if not 'link' in data: - data = [] - else: - link = data.pop('link') - if (link and data): - data = [(link, data)] - else: - data = [] - return data - - -def custom_analyze_title(title): - """Remove garbage notes after the (year), (year/imdbIndex) or (year) (TV)""" - # XXX: very crappy. :-( - nt = title.split(' ')[0] - if nt: - title = nt - if not title: - return {} - return analyze_title(title) - -# Manage AKAs. -_reAKAStitles = re.compile(r'(?:aka) <em>"(.*?)(<br>|<\/td>)', re.I | re.M) - -class DOMHTMLSearchMovieParser(DOMParserBase): - """Parse the html page that the IMDb web server shows when the - "new search system" is used, for movies.""" - - _BaseParser = DOMBasicMovieParser - _notDirectHitTitle = '<title>imdb title' - _titleBuilder = lambda self, x: build_title(x) - _linkPrefix = '/title/tt' - - _attrs = [Attribute(key='data', - multi=True, - path={ - 'link': "./a[1]/@href", - 'info': ".//text()", - #'akas': ".//div[@class='_imdbpyAKA']//text()" - 'akas': ".//p[@class='find-aka']//text()" - }, - postprocess=lambda x: ( - analyze_imdbid(x.get('link') or u''), - custom_analyze_title(x.get('info') or u''), - x.get('akas') - ))] - extractors = [Extractor(label='search', - path="//td[3]/a[starts-with(@href, '/title/tt')]/..", - attrs=_attrs)] - def _init(self): - self.url = u'' - - def _reset(self): - self.url = u'' - - def preprocess_string(self, html_string): - if self._notDirectHitTitle in html_string[:1024].lower(): - if self._linkPrefix == '/title/tt': - # Only for movies. - html_string = html_string.replace('(TV mini-series)', '(mini)') - html_string = html_string.replace('<p class="find-aka">', - '<p class="find-aka">::') - #html_string = _reAKAStitles.sub( - # r'<div class="_imdbpyAKA">\1::</div>\2', html_string) - return html_string - # Direct hit! - dbme = self._BaseParser(useModule=self._useModule) - res = dbme.parse(html_string, url=self.url) - if not res: return u'' - res = res['data'] - if not (res and res[0]): return u'' - link = '%s%s' % (self._linkPrefix, res[0][0]) - # # Tries to cope with companies for which links to pro.imdb.com - # # are missing. - # link = self.url.replace(imdbURL_base[:-1], '') - title = self._titleBuilder(res[0][1]) - if not (link and title): return u'' - link = link.replace('http://pro.imdb.com', '') - new_html = '<td></td><td></td><td><a href="%s">%s</a></td>' % (link, - title) - return new_html - - def postprocess_data(self, data): - if not data.has_key('data'): - data['data'] = [] - results = getattr(self, 'results', None) - if results is not None: - data['data'][:] = data['data'][:results] - # Horrible hack to support AKAs. - if data and data['data'] and len(data['data'][0]) == 3 and \ - isinstance(data['data'][0], tuple): - for idx, datum in enumerate(data['data']): - if not isinstance(datum, tuple): - continue - if datum[2] is not None: - akas = filter(None, datum[2].split('::')) - if self._linkPrefix == '/title/tt': - akas = [a.replace('" - ', '::').rstrip() for a in akas] - akas = [a.replace('aka "', '', 1).lstrip() for a in akas] - datum[1]['akas'] = akas - data['data'][idx] = (datum[0], datum[1]) - else: - data['data'][idx] = (datum[0], datum[1]) - return data - - def add_refs(self, data): - return data - - -_OBJECTS = { - 'search_movie_parser': ((DOMHTMLSearchMovieParser,), None) -} - diff --git a/libs/imdb/parser/http/searchPersonParser.py b/libs/imdb/parser/http/searchPersonParser.py deleted file mode 100644 index 1756efc5..00000000 --- a/libs/imdb/parser/http/searchPersonParser.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -parser.http.searchPersonParser module (imdb package). - -This module provides the HTMLSearchPersonParser class (and the -search_person_parser instance), used to parse the results of a search -for a given person. -E.g., when searching for the name "Mel Gibson", the parsed page would be: - http://akas.imdb.com/find?q=Mel+Gibson&nm=on&mx=20 - -Copyright 2004-2010 Davide Alberani <da@erlug.linux.it> - 2008 H. Turgut Uyar <uyar@tekir.org> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -import re -from imdb.utils import analyze_name, build_name -from utils import Extractor, Attribute, analyze_imdbid - -from searchMovieParser import DOMHTMLSearchMovieParser, DOMBasicMovieParser - - -def _cleanName(n): - """Clean the name in a title tag.""" - if not n: - return u'' - n = n.replace('Filmography by type for', '') # FIXME: temporary. - return n - -class DOMBasicPersonParser(DOMBasicMovieParser): - """Simply get the name of a person and the imdbID. - - It's used by the DOMHTMLSearchPersonParser class to return a result - for a direct match (when a search on IMDb results in a single - person, the web server sends directly the movie page.""" - _titleFunct = lambda self, x: analyze_name(_cleanName(x), canonical=1) - - -_reAKASp = re.compile(r'(?:aka|birth name) (<em>")(.*?)"(<br>|<\/em>|<\/td>)', - re.I | re.M) - -class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser): - """Parse the html page that the IMDb web server shows when the - "new search system" is used, for persons.""" - _BaseParser = DOMBasicPersonParser - _notDirectHitTitle = '<title>imdb name' - _titleBuilder = lambda self, x: build_name(x, canonical=True) - _linkPrefix = '/name/nm' - - _attrs = [Attribute(key='data', - multi=True, - path={ - 'link': "./a[1]/@href", - 'name': "./a[1]/text()", - 'index': "./text()[1]", - 'akas': ".//div[@class='_imdbpyAKA']/text()" - }, - postprocess=lambda x: ( - analyze_imdbid(x.get('link') or u''), - analyze_name((x.get('name') or u'') + \ - (x.get('index') or u''), - canonical=1), x.get('akas') - ))] - extractors = [Extractor(label='search', - path="//td[3]/a[starts-with(@href, '/name/nm')]/..", - attrs=_attrs)] - - def preprocess_string(self, html_string): - if self._notDirectHitTitle in html_string[:1024].lower(): - html_string = _reAKASp.sub( - r'\1<div class="_imdbpyAKA">\2::</div>\3', - html_string) - return DOMHTMLSearchMovieParser.preprocess_string(self, html_string) - - -_OBJECTS = { - 'search_person_parser': ((DOMHTMLSearchPersonParser,), - {'kind': 'person', '_basic_parser': DOMBasicPersonParser}) -} - diff --git a/libs/imdb/parser/http/topBottomParser.py b/libs/imdb/parser/http/topBottomParser.py deleted file mode 100644 index f0f29509..00000000 --- a/libs/imdb/parser/http/topBottomParser.py +++ /dev/null @@ -1,106 +0,0 @@ -""" -parser.http.topBottomParser module (imdb package). - -This module provides the classes (and the instances), used to parse the -lists of top 250 and bottom 100 movies. -E.g.: - http://akas.imdb.com/chart/top - http://akas.imdb.com/chart/bottom - -Copyright 2009 Davide Alberani <da@erlug.linux.it> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -from imdb.utils import analyze_title -from utils import DOMParserBase, Attribute, Extractor, analyze_imdbid - - -class DOMHTMLTop250Parser(DOMParserBase): - """Parser for the "top 250" page. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - tparser = DOMHTMLTop250Parser() - result = tparser.parse(top250_html_string) - """ - label = 'top 250' - ranktext = 'top 250 rank' - - def _init(self): - self.extractors = [Extractor(label=self.label, - path="//div[@id='main']//table//tr", - attrs=Attribute(key=None, - multi=True, - path={self.ranktext: "./td[1]//text()", - 'rating': "./td[2]//text()", - 'title': "./td[3]//text()", - 'movieID': "./td[3]//a/@href", - 'votes': "./td[4]//text()" - }))] - - def postprocess_data(self, data): - if not data or self.label not in data: - return [] - mlist = [] - data = data[self.label] - # Avoid duplicates. A real fix, using XPath, is auspicabile. - # XXX: probably this is no more needed. - seenIDs = [] - for d in data: - if 'movieID' not in d: continue - if self.ranktext not in d: continue - if 'title' not in d: continue - theID = analyze_imdbid(d['movieID']) - if theID is None: - continue - theID = str(theID) - if theID in seenIDs: - continue - seenIDs.append(theID) - minfo = analyze_title(d['title']) - try: minfo[self.ranktext] = int(d[self.ranktext].replace('.', '')) - except: pass - if 'votes' in d: - try: minfo['votes'] = int(d['votes'].replace(',', '')) - except: pass - if 'rating' in d: - try: minfo['rating'] = float(d['rating']) - except: pass - mlist.append((theID, minfo)) - return mlist - - -class DOMHTMLBottom100Parser(DOMHTMLTop250Parser): - """Parser for the "bottom 100" page. - The page should be provided as a string, as taken from - the akas.imdb.com server. The final result will be a - dictionary, with a key for every relevant section. - - Example: - tparser = DOMHTMLBottom100Parser() - result = tparser.parse(bottom100_html_string) - """ - label = 'bottom 100' - ranktext = 'bottom 100 rank' - - -_OBJECTS = { - 'top250_parser': ((DOMHTMLTop250Parser,), None), - 'bottom100_parser': ((DOMHTMLBottom100Parser,), None) -} - diff --git a/libs/imdb/parser/http/utils.py b/libs/imdb/parser/http/utils.py deleted file mode 100644 index eeca3b07..00000000 --- a/libs/imdb/parser/http/utils.py +++ /dev/null @@ -1,855 +0,0 @@ -""" -parser.http.utils module (imdb package). - -This module provides miscellaneous utilities used by -the imdb.parser.http classes. - -Copyright 2004-2010 Davide Alberani <da@erlug.linux.it> - 2008 H. Turgut Uyar <uyar@tekir.org> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -import re -import logging - -from imdb._exceptions import IMDbError - -from imdb.utils import flatten, _Container -from imdb.Movie import Movie -from imdb.Person import Person -from imdb.Character import Character - - -# Year, imdbIndex and kind. -re_yearKind_index = re.compile(r'(\([0-9\?]{4}(?:/[IVXLCDM]+)?\)(?: \(mini\)| \(TV\)| \(V\)| \(VG\))?)') - -# Match imdb ids in href tags -re_imdbid = re.compile(r'(title/tt|name/nm|character/ch|company/co)([0-9]+)') - -def analyze_imdbid(href): - """Return an imdbID from an URL.""" - if not href: - return None - match = re_imdbid.search(href) - if not match: - return None - return str(match.group(2)) - - -_modify_keys = list(Movie.keys_tomodify_list) + list(Person.keys_tomodify_list) -def _putRefs(d, re_titles, re_names, re_characters, lastKey=None): - """Iterate over the strings inside list items or dictionary values, - substitutes movie titles and person names with the (qv) references.""" - if isinstance(d, list): - for i in xrange(len(d)): - if isinstance(d[i], (unicode, str)): - if lastKey in _modify_keys: - if re_names: - d[i] = re_names.sub(ur"'\1' (qv)", d[i]) - if re_titles: - d[i] = re_titles.sub(ur'_\1_ (qv)', d[i]) - if re_characters: - d[i] = re_characters.sub(ur'#\1# (qv)', d[i]) - elif isinstance(d[i], (list, dict)): - _putRefs(d[i], re_titles, re_names, re_characters, - lastKey=lastKey) - elif isinstance(d, dict): - for k, v in d.items(): - lastKey = k - if isinstance(v, (unicode, str)): - if lastKey in _modify_keys: - if re_names: - d[k] = re_names.sub(ur"'\1' (qv)", v) - if re_titles: - d[k] = re_titles.sub(ur'_\1_ (qv)', v) - if re_characters: - d[k] = re_characters.sub(ur'#\1# (qv)', v) - elif isinstance(v, (list, dict)): - _putRefs(d[k], re_titles, re_names, re_characters, - lastKey=lastKey) - - -# Handle HTML/XML/SGML entities. -from htmlentitydefs import entitydefs -entitydefs = entitydefs.copy() -entitydefsget = entitydefs.get -entitydefs['nbsp'] = ' ' - -sgmlentity = {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''} -sgmlentityget = sgmlentity.get -_sgmlentkeys = sgmlentity.keys() - -entcharrefs = {} -entcharrefsget = entcharrefs.get -for _k, _v in entitydefs.items(): - if _k in _sgmlentkeys: continue - if _v[0:2] == '&#': - dec_code = _v[1:-1] - _v = unichr(int(_v[2:-1])) - entcharrefs[dec_code] = _v - else: - dec_code = '#' + str(ord(_v)) - _v = unicode(_v, 'latin_1', 'replace') - entcharrefs[dec_code] = _v - entcharrefs[_k] = _v -del _sgmlentkeys, _k, _v -entcharrefs['#160'] = u' ' -entcharrefs['#xA0'] = u' ' -entcharrefs['#xa0'] = u' ' -entcharrefs['#XA0'] = u' ' -entcharrefs['#x22'] = u'"' -entcharrefs['#X22'] = u'"' -# convert &x26; to &, to make BeautifulSoup happy; beware that this -# leaves lone '&' in the html broken, but I assume this is better than -# the contrary... -entcharrefs['#38'] = u'&' -entcharrefs['#x26'] = u'&' -entcharrefs['#x26'] = u'&' - -re_entcharrefs = re.compile('&(%s|\#160|\#\d{1,5}|\#x[0-9a-f]{1,4});' % - '|'.join(map(re.escape, entcharrefs)), re.I) -re_entcharrefssub = re_entcharrefs.sub - -sgmlentity.update(dict([('#34', u'"'), ('#38', u'&'), - ('#60', u'<'), ('#62', u'>'), ('#39', u"'")])) -re_sgmlref = re.compile('&(%s);' % '|'.join(map(re.escape, sgmlentity))) -re_sgmlrefsub = re_sgmlref.sub - -# Matches XML-only single tags, like <br/> ; they are invalid in HTML, -# but widely used by IMDb web site. :-/ -re_xmltags = re.compile('<([a-zA-Z]+)/>') - - -def _replXMLRef(match): - """Replace the matched XML/HTML entities and references; - replace everything except sgml entities like <, >, ...""" - ref = match.group(1) - value = entcharrefsget(ref) - if value is None: - if ref[0] == '#': - ref_code = ref[1:] - if ref_code in ('34', '38', '60', '62', '39'): - return match.group(0) - elif ref_code[0].lower() == 'x': - #if ref[2:] == '26': - # # Don't convert &x26; to &, to make BeautifulSoup happy. - # return '&' - return unichr(int(ref[2:], 16)) - else: - return unichr(int(ref[1:])) - else: - return ref - return value - -def subXMLRefs(s): - """Return the given html string with entity and char references - replaced.""" - return re_entcharrefssub(_replXMLRef, s) - -# XXX: no more used here; move it to mobile (they are imported by helpers, too)? -def _replSGMLRefs(match): - """Replace the matched SGML entity.""" - ref = match.group(1) - return sgmlentityget(ref, ref) - -def subSGMLRefs(s): - """Return the given html string with sgml entity and char references - replaced.""" - return re_sgmlrefsub(_replSGMLRefs, s) - - -_b_p_logger = logging.getLogger('imdbpy.parser.http.build_person') -def build_person(txt, personID=None, billingPos=None, - roleID=None, accessSystem='http', modFunct=None): - """Return a Person instance from the tipical <tr>...</tr> strings - found in the IMDb's web site.""" - #if personID is None - # _b_p_logger.debug('empty name or personID for "%s"', txt) - notes = u'' - role = u'' - # Search the (optional) separator between name and role/notes. - if txt.find('....') != -1: - sep = '....' - elif txt.find('...') != -1: - sep = '...' - else: - sep = '...' - # Replace the first parenthesis, assuming there are only - # notes, after. - # Rationale: no imdbIndex is (ever?) showed on the web site. - txt = txt.replace('(', '...(', 1) - txt_split = txt.split(sep, 1) - name = txt_split[0].strip() - if len(txt_split) == 2: - role_comment = txt_split[1].strip() - # Strip common endings. - if role_comment[-4:] == ' and': - role_comment = role_comment[:-4].rstrip() - elif role_comment[-2:] == ' &': - role_comment = role_comment[:-2].rstrip() - elif role_comment[-6:] == '& ....': - role_comment = role_comment[:-6].rstrip() - # Get the notes. - if roleID is not None: - if not isinstance(roleID, list): - cmt_idx = role_comment.find('(') - if cmt_idx != -1: - role = role_comment[:cmt_idx].rstrip() - notes = role_comment[cmt_idx:] - else: - # Just a role, without notes. - role = role_comment - else: - role = role_comment - else: - # We're managing something that doesn't have a 'role', so - # everything are notes. - notes = role_comment - if role == '....': role = u'' - roleNotes = [] - # Manages multiple roleIDs. - if isinstance(roleID, list): - rolesplit = role.split('/') - role = [] - for r in rolesplit: - nidx = r.find('(') - if nidx != -1: - role.append(r[:nidx].rstrip()) - roleNotes.append(r[nidx:]) - else: - role.append(r) - roleNotes.append(None) - lr = len(role) - lrid = len(roleID) - if lr > lrid: - roleID += [None] * (lrid - lr) - elif lr < lrid: - roleID = roleID[:lr] - for i, rid in enumerate(roleID): - if rid is not None: - roleID[i] = str(rid) - if lr == 1: - role = role[0] - roleID = roleID[0] - elif roleID is not None: - roleID = str(roleID) - if personID is not None: - personID = str(personID) - if (not name) or (personID is None): - # Set to 'debug', since build_person is expected to receive some crap. - _b_p_logger.debug('empty name or personID for "%s"', txt) - # XXX: return None if something strange is detected? - person = Person(name=name, personID=personID, currentRole=role, - roleID=roleID, notes=notes, billingPos=billingPos, - modFunct=modFunct, accessSystem=accessSystem) - if roleNotes and len(roleNotes) == len(roleID): - for idx, role in enumerate(person.currentRole): - if roleNotes[idx]: - role.notes = roleNotes[idx] - return person - - -_re_chrIDs = re.compile('[0-9]{7}') - -_b_m_logger = logging.getLogger('imdbpy.parser.http.build_movie') -# To shrink spaces. -re_spaces = re.compile(r'\s+') -def build_movie(txt, movieID=None, roleID=None, status=None, - accessSystem='http', modFunct=None, _parsingCharacter=False, - _parsingCompany=False, year=None, chrRoles=None, - rolesNoChar=None, additionalNotes=None): - """Given a string as normally seen on the "categorized" page of - a person on the IMDb's web site, returns a Movie instance.""" - # FIXME: Oook, lets face it: build_movie and build_person are now - # two horrible sets of patches to support the new IMDb design. They - # must be rewritten from scratch. - if _parsingCharacter: - _defSep = ' Played by ' - elif _parsingCompany: - _defSep = ' ... ' - else: - _defSep = ' .... ' - title = re_spaces.sub(' ', txt).strip() - # Split the role/notes from the movie title. - tsplit = title.split(_defSep, 1) - role = u'' - notes = u'' - roleNotes = [] - if len(tsplit) == 2: - title = tsplit[0].rstrip() - role = tsplit[1].lstrip() - if title[-9:] == 'TV Series': - title = title[:-9].rstrip() - elif title[-14:] == 'TV mini-series': - title = title[:-14] + ' (mini)' - # Try to understand where the movie title ends. - while True: - if year: - break - if title[-1:] != ')': - # Ignore the silly "TV Series" notice. - if title[-9:] == 'TV Series': - title = title[:-9].rstrip() - continue - else: - # Just a title: stop here. - break - # Try to match paired parentheses; yes: sometimes there are - # parentheses inside comments... - nidx = title.rfind('(') - while (nidx != -1 and \ - title[nidx:].count('(') != title[nidx:].count(')')): - nidx = title[:nidx].rfind('(') - # Unbalanced parentheses: stop here. - if nidx == -1: break - # The last item in parentheses seems to be a year: stop here. - first4 = title[nidx+1:nidx+5] - if (first4.isdigit() or first4 == '????') and \ - title[nidx+5:nidx+6] in (')', '/'): break - # The last item in parentheses is a known kind: stop here. - if title[nidx+1:-1] in ('TV', 'V', 'mini', 'VG'): break - # Else, in parentheses there are some notes. - # XXX: should the notes in the role half be kept separated - # from the notes in the movie title half? - if notes: notes = '%s %s' % (title[nidx:], notes) - else: notes = title[nidx:] - title = title[:nidx].rstrip() - if year: - year = year.strip() - if title[-1] == ')': - fpIdx = title.rfind('(') - if fpIdx != -1: - if notes: notes = '%s %s' % (title[fpIdx:], notes) - else: notes = title[fpIdx:] - title = title[:fpIdx].rstrip() - title = u'%s (%s)' % (title, year) - if _parsingCharacter and roleID and not role: - roleID = None - if not roleID: - roleID = None - elif len(roleID) == 1: - roleID = roleID[0] - if not role and chrRoles and isinstance(roleID, (str, unicode)): - roleID = _re_chrIDs.findall(roleID) - role = ' / '.join(filter(None, chrRoles.split('@@'))) - # Manages multiple roleIDs. - if isinstance(roleID, list): - tmprole = role.split('/') - role = [] - for r in tmprole: - nidx = r.find('(') - if nidx != -1: - role.append(r[:nidx].rstrip()) - roleNotes.append(r[nidx:]) - else: - role.append(r) - roleNotes.append(None) - lr = len(role) - lrid = len(roleID) - if lr > lrid: - roleID += [None] * (lrid - lr) - elif lr < lrid: - roleID = roleID[:lr] - for i, rid in enumerate(roleID): - if rid is not None: - roleID[i] = str(rid) - if lr == 1: - role = role[0] - roleID = roleID[0] - elif roleID is not None: - roleID = str(roleID) - if movieID is not None: - movieID = str(movieID) - if (not title) or (movieID is None): - _b_m_logger.error('empty title or movieID for "%s"', txt) - if rolesNoChar: - rolesNoChar = filter(None, [x.strip() for x in rolesNoChar.split('/')]) - if not role: - role = [] - elif not isinstance(role, list): - role = [role] - role += rolesNoChar - notes = notes.strip() - if additionalNotes: - additionalNotes = re_spaces.sub(' ', additionalNotes).strip() - if notes: - notes += u' ' - notes += additionalNotes - m = Movie(title=title, movieID=movieID, notes=notes, currentRole=role, - roleID=roleID, roleIsPerson=_parsingCharacter, - modFunct=modFunct, accessSystem=accessSystem) - if roleNotes and len(roleNotes) == len(roleID): - for idx, role in enumerate(m.currentRole): - try: - if roleNotes[idx]: - role.notes = roleNotes[idx] - except IndexError: - break - # Status can't be checked here, and must be detected by the parser. - if status: - m['status'] = status - return m - - -class DOMParserBase(object): - """Base parser to handle HTML data from the IMDb's web server.""" - _defGetRefs = False - _containsObjects = False - - preprocessors = [] - extractors = [] - usingModule = None - - _logger = logging.getLogger('imdbpy.parser.http.domparser') - - def __init__(self, useModule=None): - """Initialize the parser. useModule can be used to force it - to use 'BeautifulSoup' or 'lxml'; by default, it's auto-detected, - using 'lxml' if available and falling back to 'BeautifulSoup' - otherwise.""" - # Module to use. - if useModule is None: - useModule = ('lxml', 'BeautifulSoup') - if not isinstance(useModule, (tuple, list)): - useModule = [useModule] - self._useModule = useModule - nrMods = len(useModule) - _gotError = False - for idx, mod in enumerate(useModule): - mod = mod.strip().lower() - try: - if mod == 'lxml': - from lxml.html import fromstring - from lxml.etree import tostring - self._is_xml_unicode = False - self.usingModule = 'lxml' - elif mod == 'beautifulsoup': - from bsouplxml.html import fromstring - from bsouplxml.etree import tostring - self._is_xml_unicode = True - self.usingModule = 'beautifulsoup' - else: - self._logger.warn('unknown module "%s"' % mod) - continue - self.fromstring = fromstring - self._tostring = tostring - if _gotError: - self._logger.warn('falling back to "%s"' % mod) - break - except ImportError, e: - if idx+1 >= nrMods: - # Raise the exception, if we don't have any more - # options to try. - raise IMDbError, 'unable to use any parser in %s: %s' % \ - (str(useModule), str(e)) - else: - self._logger.warn('unable to use "%s": %s' % (mod, str(e))) - _gotError = True - continue - else: - raise IMDbError, 'unable to use parsers in %s' % str(useModule) - # Fall-back defaults. - self._modFunct = None - self._as = 'http' - self._cname = self.__class__.__name__ - self._init() - self.reset() - - def reset(self): - """Reset the parser.""" - # Names and titles references. - self._namesRefs = {} - self._titlesRefs = {} - self._charactersRefs = {} - self._reset() - - def _init(self): - """Subclasses can override this method, if needed.""" - pass - - def _reset(self): - """Subclasses can override this method, if needed.""" - pass - - def parse(self, html_string, getRefs=None, **kwds): - """Return the dictionary generated from the given html string; - getRefs can be used to force the gathering of movies/persons/characters - references.""" - self.reset() - if getRefs is not None: - self.getRefs = getRefs - else: - self.getRefs = self._defGetRefs - # Useful only for the testsuite. - if not isinstance(html_string, unicode): - html_string = unicode(html_string, 'latin_1', 'replace') - html_string = subXMLRefs(html_string) - # Temporary fix: self.parse_dom must work even for empty strings. - html_string = self.preprocess_string(html_string) - html_string = html_string.strip() - # tag attributes like title=""Family Guy"" will be - # converted to title=""Family Guy"" and this confuses BeautifulSoup. - if self.usingModule == 'beautifulsoup': - html_string = html_string.replace('""', '"') - #print html_string.encode('utf8') - if html_string: - dom = self.get_dom(html_string) - #print self.tostring(dom).encode('utf8') - try: - dom = self.preprocess_dom(dom) - except Exception, e: - self._logger.error('%s: caught exception preprocessing DOM', - self._cname, exc_info=True) - if self.getRefs: - try: - self.gather_refs(dom) - except Exception, e: - self._logger.warn('%s: unable to gather refs: %s', - self._cname, exc_info=True) - data = self.parse_dom(dom) - else: - data = {} - try: - data = self.postprocess_data(data) - except Exception, e: - self._logger.error('%s: caught exception postprocessing data', - self._cname, exc_info=True) - if self._containsObjects: - self.set_objects_params(data) - data = self.add_refs(data) - return data - - def _build_empty_dom(self): - from bsouplxml import _bsoup - return _bsoup.BeautifulSoup('') - - def get_dom(self, html_string): - """Return a dom object, from the given string.""" - try: - dom = self.fromstring(html_string) - if dom is None: - dom = self._build_empty_dom() - self._logger.error('%s: using a fake empty DOM', self._cname) - return dom - except Exception, e: - self._logger.error('%s: caught exception parsing DOM', - self._cname, exc_info=True) - return self._build_empty_dom() - - def xpath(self, element, path): - """Return elements matching the given XPath.""" - try: - xpath_result = element.xpath(path) - if self._is_xml_unicode: - return xpath_result - result = [] - for item in xpath_result: - if isinstance(item, str): - item = unicode(item) - result.append(item) - return result - except Exception, e: - self._logger.error('%s: caught exception extracting XPath "%s"', - self._cname, path, exc_info=True) - return [] - - def tostring(self, element): - """Convert the element to a string.""" - if isinstance(element, (unicode, str)): - return unicode(element) - else: - try: - return self._tostring(element, encoding=unicode) - except Exception, e: - self._logger.error('%s: unable to convert to string', - self._cname, exc_info=True) - return u'' - - def clone(self, element): - """Clone an element.""" - return self.fromstring(self.tostring(element)) - - def preprocess_string(self, html_string): - """Here we can modify the text, before it's parsed.""" - if not html_string: - return html_string - # Remove silly  » chars. - html_string = html_string.replace(u' \xbb', u'') - try: - preprocessors = self.preprocessors - except AttributeError: - return html_string - for src, sub in preprocessors: - # re._pattern_type is present only since Python 2.5. - if callable(getattr(src, 'sub', None)): - html_string = src.sub(sub, html_string) - elif isinstance(src, str): - html_string = html_string.replace(src, sub) - elif callable(src): - try: - html_string = src(html_string) - except Exception, e: - _msg = '%s: caught exception preprocessing html' - self._logger.error(_msg, self._cname, exc_info=True) - continue - ##print html_string.encode('utf8') - return html_string - - def gather_refs(self, dom): - """Collect references.""" - grParser = GatherRefs(useModule=self._useModule) - grParser._as = self._as - grParser._modFunct = self._modFunct - refs = grParser.parse_dom(dom) - refs = grParser.postprocess_data(refs) - self._namesRefs = refs['names refs'] - self._titlesRefs = refs['titles refs'] - self._charactersRefs = refs['characters refs'] - - def preprocess_dom(self, dom): - """Last chance to modify the dom, before the rules in self.extractors - are applied by the parse_dom method.""" - return dom - - def parse_dom(self, dom): - """Parse the given dom according to the rules specified - in self.extractors.""" - result = {} - for extractor in self.extractors: - ##print extractor.label - if extractor.group is None: - elements = [(extractor.label, element) - for element in self.xpath(dom, extractor.path)] - else: - groups = self.xpath(dom, extractor.group) - elements = [] - for group in groups: - group_key = self.xpath(group, extractor.group_key) - if not group_key: continue - group_key = group_key[0] - # XXX: always tries the conversion to unicode: - # BeautifulSoup.NavigableString is a subclass - # of unicode, and so it's never converted. - group_key = self.tostring(group_key) - normalizer = extractor.group_key_normalize - if normalizer is not None: - if callable(normalizer): - try: - group_key = normalizer(group_key) - except Exception, e: - _m = '%s: unable to apply group_key normalizer' - self._logger.error(_m, self._cname, - exc_info=True) - group_elements = self.xpath(group, extractor.path) - elements.extend([(group_key, element) - for element in group_elements]) - for group_key, element in elements: - for attr in extractor.attrs: - if isinstance(attr.path, dict): - data = {} - for field in attr.path.keys(): - path = attr.path[field] - value = self.xpath(element, path) - if not value: - data[field] = None - else: - # XXX: use u'' , to join? - data[field] = ''.join(value) - else: - data = self.xpath(element, attr.path) - if not data: - data = None - else: - data = attr.joiner.join(data) - if not data: - continue - attr_postprocess = attr.postprocess - if callable(attr_postprocess): - try: - data = attr_postprocess(data) - except Exception, e: - _m = '%s: unable to apply attr postprocess' - self._logger.error(_m, self._cname, exc_info=True) - key = attr.key - if key is None: - key = group_key - elif key.startswith('.'): - # assuming this is an xpath - try: - key = self.xpath(element, key)[0] - except IndexError: - self._logger.error('%s: XPath returned no items', - self._cname, exc_info=True) - elif key.startswith('self.'): - key = getattr(self, key[5:]) - if attr.multi: - if key not in result: - result[key] = [] - result[key].append(data) - else: - if isinstance(data, dict): - result.update(data) - else: - result[key] = data - return result - - def postprocess_data(self, data): - """Here we can modify the data.""" - return data - - def set_objects_params(self, data): - """Set parameters of Movie/Person/... instances, since they are - not always set in the parser's code.""" - for obj in flatten(data, yieldDictKeys=True, scalar=_Container): - obj.accessSystem = self._as - obj.modFunct = self._modFunct - - def add_refs(self, data): - """Modify data according to the expected output.""" - if self.getRefs: - titl_re = ur'(%s)' % '|'.join([re.escape(x) for x - in self._titlesRefs.keys()]) - if titl_re != ur'()': re_titles = re.compile(titl_re, re.U) - else: re_titles = None - nam_re = ur'(%s)' % '|'.join([re.escape(x) for x - in self._namesRefs.keys()]) - if nam_re != ur'()': re_names = re.compile(nam_re, re.U) - else: re_names = None - chr_re = ur'(%s)' % '|'.join([re.escape(x) for x - in self._charactersRefs.keys()]) - if chr_re != ur'()': re_characters = re.compile(chr_re, re.U) - else: re_characters = None - _putRefs(data, re_titles, re_names, re_characters) - return {'data': data, 'titlesRefs': self._titlesRefs, - 'namesRefs': self._namesRefs, - 'charactersRefs': self._charactersRefs} - - -class Extractor(object): - """Instruct the DOM parser about how to parse a document.""" - def __init__(self, label, path, attrs, group=None, group_key=None, - group_key_normalize=None): - """Initialize an Extractor object, used to instruct the DOM parser - about how to parse a document.""" - # rarely (never?) used, mostly for debugging purposes. - self.label = label - self.group = group - if group_key is None: - self.group_key = ".//text()" - else: - self.group_key = group_key - self.group_key_normalize = group_key_normalize - self.path = path - # A list of attributes to fetch. - if isinstance(attrs, Attribute): - attrs = [attrs] - self.attrs = attrs - - def __repr__(self): - """String representation of an Extractor object.""" - r = '<Extractor id:%s (label=%s, path=%s, attrs=%s, group=%s, ' \ - 'group_key=%s group_key_normalize=%s)>' % (id(self), - self.label, self.path, repr(self.attrs), self.group, - self.group_key, self.group_key_normalize) - return r - - -class Attribute(object): - """The attribute to consider, for a given node.""" - def __init__(self, key, multi=False, path=None, joiner=None, - postprocess=None): - """Initialize an Attribute object, used to specify the - attribute to consider, for a given node.""" - # The key under which information will be saved; can be a string or an - # XPath. If None, the label of the containing extractor will be used. - self.key = key - self.multi = multi - self.path = path - if joiner is None: - joiner = '' - self.joiner = joiner - # Post-process this set of information. - self.postprocess = postprocess - - def __repr__(self): - """String representation of an Attribute object.""" - r = '<Attribute id:%s (key=%s, multi=%s, path=%s, joiner=%s, ' \ - 'postprocess=%s)>' % (id(self), self.key, - self.multi, repr(self.path), - self.joiner, repr(self.postprocess)) - return r - - -def _parse_ref(text, link, info): - """Manage links to references.""" - if link.find('/title/tt') != -1: - yearK = re_yearKind_index.match(info) - if yearK and yearK.start() == 0: - text += ' %s' % info[:yearK.end()] - return (text.replace('\n', ' '), link) - - -class GatherRefs(DOMParserBase): - """Parser used to gather references to movies, persons and characters.""" - _attrs = [Attribute(key=None, multi=True, - path={ - 'text': './text()', - 'link': './@href', - 'info': './following::text()[1]' - }, - postprocess=lambda x: _parse_ref(x.get('text'), x.get('link'), - (x.get('info') or u'').strip()))] - extractors = [ - Extractor(label='names refs', - path="//a[starts-with(@href, '/name/nm')][string-length(@href)=16]", - attrs=_attrs), - - Extractor(label='titles refs', - path="//a[starts-with(@href, '/title/tt')]" \ - "[string-length(@href)=17]", - attrs=_attrs), - - Extractor(label='characters refs', - path="//a[starts-with(@href, '/character/ch')]" \ - "[string-length(@href)=21]", - attrs=_attrs), - ] - - def postprocess_data(self, data): - result = {} - for item in ('names refs', 'titles refs', 'characters refs'): - result[item] = {} - for k, v in data.get(item, []): - if not v.endswith('/'): continue - imdbID = analyze_imdbid(v) - if item == 'names refs': - obj = Person(personID=imdbID, name=k, - accessSystem=self._as, modFunct=self._modFunct) - elif item == 'titles refs': - obj = Movie(movieID=imdbID, title=k, - accessSystem=self._as, modFunct=self._modFunct) - else: - obj = Character(characterID=imdbID, name=k, - accessSystem=self._as, modFunct=self._modFunct) - # XXX: companies aren't handled: are they ever found in text, - # as links to their page? - result[item][k] = obj - return result - - def add_refs(self, data): - return data - - diff --git a/libs/imdb/parser/mobile/__init__.py b/libs/imdb/parser/mobile/__init__.py deleted file mode 100644 index ce623ec8..00000000 --- a/libs/imdb/parser/mobile/__init__.py +++ /dev/null @@ -1,833 +0,0 @@ -""" -parser.mobile package (imdb package). - -This package provides the IMDbMobileAccessSystem class used to access -IMDb's data for mobile systems. -the imdb.IMDb function will return an instance of this class when -called with the 'accessSystem' argument set to "mobile". - -Copyright 2005-2010 Davide Alberani <da@erlug.linux.it> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -import re -import logging -from urllib import unquote - -from imdb import imdbURL_movie_main, imdbURL_person_main, imdbURL_character_main -from imdb.Movie import Movie -from imdb.utils import analyze_title, analyze_name, canonicalName, \ - date_and_notes -from imdb._exceptions import IMDbDataAccessError -from imdb.parser.http import IMDbHTTPAccessSystem -from imdb.parser.http.utils import subXMLRefs, subSGMLRefs, build_person, \ - build_movie, re_spaces - -# XXX NOTE: the first version of this module was heavily based on -# regular expressions. This new version replace regexps with -# find() strings' method calls; despite being less flexible, it -# seems to be at least as fast and, hopefully, much more -# lightweight. Yes: the regexp-based version was too heavyweight -# for systems with very limited CPU power and memory footprint. -re_spacessub = re_spaces.sub -# Strip html. -re_unhtml = re.compile(r'<.+?>') -re_unhtmlsub = re_unhtml.sub -# imdb person or movie ids. -re_imdbID = re.compile(r'(?<=nm|tt|ch)([0-9]{7})\b') - -# movie AKAs. -re_makas = re.compile('(<p class="find-aka">.*?</p>)') - -# Remove episode numbers. -re_filmo_episodes = re.compile('<div class="filmo-episodes">.*?</div>', - re.M | re.I) - - -def _unHtml(s): - """Return a string without tags and no multiple spaces.""" - return subSGMLRefs(re_spacessub(' ', re_unhtmlsub('', s)).strip()) - - -_inttype = type(0) - -def _getTagsWith(s, cont, toClosure=False, maxRes=None): - """Return the html tags in the 's' string containing the 'cont' - string; if toClosure is True, everything between the opening - tag and the closing tag is returned.""" - lres = [] - bi = s.find(cont) - if bi != -1: - btag = s[:bi].rfind('<') - if btag != -1: - if not toClosure: - etag = s[bi+1:].find('>') - if etag != -1: - endidx = bi+2+etag - lres.append(s[btag:endidx]) - if maxRes is not None and len(lres) >= maxRes: return lres - lres += _getTagsWith(s[endidx:], cont, - toClosure=toClosure) - else: - spaceidx = s[btag:].find(' ') - if spaceidx != -1: - ctag = '</%s>' % s[btag+1:btag+spaceidx] - closeidx = s[bi:].find(ctag) - if closeidx != -1: - endidx = bi+closeidx+len(ctag) - lres.append(s[btag:endidx]) - if maxRes is not None and len(lres) >= maxRes: - return lres - lres += _getTagsWith(s[endidx:], cont, - toClosure=toClosure) - return lres - - -def _findBetween(s, begins, ends, beginindx=0, maxRes=None, lres=None): - """Return the list of strings from the 's' string which are included - between the 'begins' and 'ends' strings.""" - if lres is None: - lres = [] - bi = s.find(begins, beginindx) - if bi != -1: - lbegins = len(begins) - if isinstance(ends, (list, tuple)): - eset = [s.find(end, bi+lbegins) for end in ends] - eset[:] = [x for x in eset if x != -1] - if not eset: ei = -1 - else: ei = min(eset) - else: - ei = s.find(ends, bi+lbegins) - if ei != -1: - match = s[bi+lbegins:ei] - lres.append(match) - if maxRes is not None and len(lres) >= maxRes: return lres - _findBetween(s, begins, ends, beginindx=ei, maxRes=maxRes, - lres=lres) - return lres - - -class IMDbMobileAccessSystem(IMDbHTTPAccessSystem): - """The class used to access IMDb's data through the web for - mobile terminals.""" - - accessSystem = 'mobile' - _mobile_logger = logging.getLogger('imdbpy.parser.mobile') - - def __init__(self, isThin=1, *arguments, **keywords): - self.accessSystem = 'mobile' - IMDbHTTPAccessSystem.__init__(self, isThin, *arguments, **keywords) - - def _clean_html(self, html): - """Normalize the retrieve html.""" - html = re_spaces.sub(' ', html) - # Remove silly  » chars. - html = html.replace(' »', '') - return subXMLRefs(html) - - def _mretrieve(self, url, size=-1): - """Retrieve an html page and normalize it.""" - cont = self._retrieve(url, size=size) - return self._clean_html(cont) - - def _getPersons(self, s, sep='<br/>'): - """Return a list of Person objects, from the string s; items - are assumed to be separated by the sep string.""" - names = s.split(sep) - pl = [] - plappend = pl.append - counter = 1 - for name in names: - pid = re_imdbID.findall(name) - if not pid: continue - characters = _getTagsWith(name, 'class="char"', - toClosure=True, maxRes=1) - chpids = [] - if characters: - for ch in characters[0].split(' / '): - chid = re_imdbID.findall(ch) - if not chid: - chpids.append(None) - else: - chpids.append(chid[-1]) - if not chpids: - chpids = None - elif len(chpids) == 1: - chpids = chpids[0] - name = _unHtml(name) - # Catch unclosed tags. - gt_indx = name.find('>') - if gt_indx != -1: - name = name[gt_indx+1:].lstrip() - if not name: continue - if name.endswith('...'): - name = name[:-3] - p = build_person(name, personID=str(pid[0]), billingPos=counter, - modFunct=self._defModFunct, roleID=chpids, - accessSystem=self.accessSystem) - plappend(p) - counter += 1 - return pl - - def _search_movie(self, title, results): - ##params = urllib.urlencode({'tt': 'on','mx': str(results),'q': title}) - ##params = 'q=%s&tt=on&mx=%s' % (urllib.quote_plus(title), str(results)) - ##cont = self._mretrieve(imdbURL_search % params) - cont = subXMLRefs(self._get_search_content('tt', title, results)) - title = _findBetween(cont, '<title>', '', maxRes=1) - res = [] - if not title: - self._mobile_logger.error('no title tag searching for movie %s', - title) - return res - tl = title[0].lower() - if not tl.startswith('imdb title'): - # a direct hit! - title = _unHtml(title[0]) - mid = None - midtag = _getTagsWith(cont, 'rel="canonical"', maxRes=1) - if midtag: - mid = _findBetween(midtag[0], '/title/tt', '/', maxRes=1) - if not (mid and title): - self._mobile_logger.error('no direct hit title/movieID for' \ - ' title %s', title) - return res - if cont.find('TV mini-series') != -1: - title += ' (mini)' - res[:] = [(str(mid[0]), analyze_title(title))] - else: - # XXX: this results*3 prevents some recursion errors, but... - # it's not exactly understandable (i.e.: why 'results' is - # not enough to get all the results?) - lis = _findBetween(cont, 'td valign="top">', '', - maxRes=results*3) - for li in lis: - akas = re_makas.findall(li) - for idx, aka in enumerate(akas): - aka = aka.replace('" - ', '::', 1) - aka = _unHtml(aka) - if aka.startswith('aka "'): - aka = aka[5:].strip() - if aka[-1] == '"': - aka = aka[:-1] - akas[idx] = aka - imdbid = re_imdbID.findall(li) - li = re_makas.sub('', li) - mtitle = _unHtml(li) - if not (imdbid and mtitle): - self._mobile_logger.debug('no title/movieID parsing' \ - ' %s searching for title %s', li, - title) - continue - mtitle = mtitle.replace('(TV mini-series)', '(mini)') - resd = analyze_title(mtitle) - if akas: - resd['akas'] = akas - res.append((str(imdbid[0]), resd)) - return res - - def get_movie_main(self, movieID): - cont = self._mretrieve(imdbURL_movie_main % movieID + 'maindetails') - title = _findBetween(cont, '', '', maxRes=1) - if not title: - raise IMDbDataAccessError, 'unable to get movieID "%s"' % movieID - title = _unHtml(title[0]) - if cont.find('TV mini-series') != -1: - title += ' (mini)' - d = analyze_title(title) - kind = d.get('kind') - tv_series = _findBetween(cont, 'TV Series:', '', maxRes=1) - if tv_series: mid = re_imdbID.findall(tv_series[0]) - else: mid = None - if tv_series and mid: - s_title = _unHtml(tv_series[0]) - s_data = analyze_title(s_title) - m = Movie(movieID=str(mid[0]), data=s_data, - accessSystem=self.accessSystem, - modFunct=self._defModFunct) - d['kind'] = kind = u'episode' - d['episode of'] = m - if kind in ('tv series', 'tv mini series'): - years = _findBetween(cont, '

    ', '

    ', maxRes=1) - if years: - years[:] = _findBetween(years[0], 'TV series', '', - maxRes=1) - if years: - d['series years'] = years[0].strip() - air_date = _findBetween(cont, 'Original Air Date:', '', - maxRes=1) - if air_date: - air_date = air_date[0] - vi = air_date.find('(') - if vi != -1: - date = _unHtml(air_date[:vi]).strip() - if date != '????': - d['original air date'] = date - air_date = air_date[vi:] - season = _findBetween(air_date, 'Season', ',', maxRes=1) - if season: - season = season[0].strip() - try: season = int(season) - except: pass - if season or type(season) is _inttype: - d['season'] = season - episode = _findBetween(air_date, 'Episode', ')', maxRes=1) - if episode: - episode = episode[0].strip() - try: episode = int(episode) - except: pass - if episode or type(season) is _inttype: - d['episode'] = episode - direct = _findBetween(cont, '
    Director', ('', '

    '), - maxRes=1) - if direct: - direct = direct[0] - h5idx = direct.find('/h5>') - if h5idx != -1: - direct = direct[h5idx+4:] - direct = self._getPersons(direct) - if direct: d['director'] = direct - if kind in ('tv series', 'tv mini series', 'episode'): - if kind != 'episode': - seasons = _findBetween(cont, 'Seasons:
    ', '', - maxRes=1) - if seasons: - d['number of seasons'] = seasons[0].count('|') + 1 - creator = _findBetween(cont, 'Created by', ('class="tn15more"', - '', - '

    '), - maxRes=1) - if not creator: - # They change 'Created by' to 'Creator' and viceversa - # from time to time... - # XXX: is 'Creators' also used? - creator = _findBetween(cont, 'Creator:', - ('class="tn15more"', '', - '

    '), maxRes=1) - if creator: - creator = creator[0] - if creator.find('tn15more'): creator = '%s>' % creator - creator = self._getPersons(creator) - if creator: d['creator'] = creator - writers = _findBetween(cont, '
    Writer', ('', '

    '), - maxRes=1) - if writers: - writers = writers[0] - h5idx = writers.find('/h5>') - if h5idx != -1: - writers = writers[h5idx+4:] - writers = self._getPersons(writers) - if writers: d['writer'] = writers - cvurl = _getTagsWith(cont, 'name="poster"', toClosure=True, maxRes=1) - if cvurl: - cvurl = _findBetween(cvurl[0], 'src="', '"', maxRes=1) - if cvurl: d['cover url'] = cvurl[0] - genres = _findBetween(cont, 'href="/Sections/Genres/', '/') - if genres: - d['genres'] = list(set(genres)) - ur = _findBetween(cont, '
    ', '
    ', - maxRes=1) - if ur: - rat = _findBetween(ur[0], '', '', maxRes=1) - if rat: - teni = rat[0].find('/10') - if teni != -1: - rat = rat[0][:teni] - try: - rat = float(rat.strip()) - d['rating'] = rat - except ValueError: - self._mobile_logger.warn('wrong rating: %s', rat) - vi = ur[0].rfind('tn15more">') - if vi != -1 and ur[0][vi+10:].find('await') == -1: - try: - votes = _unHtml(ur[0][vi+10:]).replace('votes', '').strip() - votes = int(votes.replace(',', '')) - d['votes'] = votes - except ValueError: - self._mobile_logger.warn('wrong votes: %s', ur) - top250 = _findBetween(cont, 'href="/chart/top?', '', maxRes=1) - if top250: - fn = top250[0].rfind('#') - if fn != -1: - try: - td = int(top250[0][fn+1:]) - d['top 250 rank'] = td - except ValueError: - self._mobile_logger.warn('wrong top250: %s', top250) - castdata = _findBetween(cont, 'Cast overview', '', maxRes=1) - if not castdata: - castdata = _findBetween(cont, 'Credited cast', '', maxRes=1) - if not castdata: - castdata = _findBetween(cont, 'Complete credited cast', '', - maxRes=1) - if not castdata: - castdata = _findBetween(cont, 'Series Cast Summary', '', - maxRes=1) - if not castdata: - castdata = _findBetween(cont, 'Episode Credited cast', '', - maxRes=1) - if castdata: - castdata = castdata[0] - # Reintegrate the fist tag. - fl = castdata.find('href=') - if fl != -1: castdata = '') - if smib != -1: - smie = castdata.rfind('') - if smie != -1: - castdata = castdata[:smib].strip() + \ - castdata[smie+18:].strip() - castdata = castdata.replace('/tr> ', '', maxRes=1) - if akas: - # For some reason, here
    is still used in place of
    . - akas[:] = [x for x in akas[0].split('
    ') if x.strip()] - akas = [_unHtml(x).replace('" - ','::', 1).lstrip('"').strip() - for x in akas] - if 'See more' in akas: akas.remove('See more') - akas[:] = [x for x in akas if x] - if akas: - d['akas'] = akas - mpaa = _findBetween(cont, 'MPAA
    :', '', maxRes=1) - if mpaa: d['mpaa'] = _unHtml(mpaa[0]) - runtimes = _findBetween(cont, 'Runtime:
    ', '', maxRes=1) - if runtimes: - runtimes = runtimes[0] - runtimes = [x.strip().replace(' min', '').replace(' (', '::(', 1) - for x in runtimes.split('|')] - d['runtimes'] = [_unHtml(x).strip() for x in runtimes] - if kind == 'episode': - # number of episodes. - epsn = _findBetween(cont, 'title="Full Episode List">', '', - maxRes=1) - if epsn: - epsn = epsn[0].replace(' Episodes', '').strip() - if epsn: - try: - epsn = int(epsn) - except: - self._mobile_logger.warn('wrong episodes #: %s', epsn) - d['number of episodes'] = epsn - country = _findBetween(cont, 'Country:', '', maxRes=1) - if country: - country[:] = country[0].split(' | ') - country[:] = ['', '::')) for x in country] - if country: d['countries'] = country - lang = _findBetween(cont, 'Language:', '', maxRes=1) - if lang: - lang[:] = lang[0].split(' | ') - lang[:] = ['', '::')) for x in lang] - if lang: d['languages'] = lang - col = _findBetween(cont, '"/search/title?colors=', '') - if col: - col[:] = col[0].split(' | ') - col[:] = ['', '::')) for x in col] - if col: d['color info'] = col - sm = _findBetween(cont, '/search/title?sound_mixes=', '', - maxRes=1) - if sm: - sm[:] = sm[0].split(' | ') - sm[:] = ['', '::')) for x in sm] - if sm: d['sound mix'] = sm - cert = _findBetween(cont, 'Certification:', '', maxRes=1) - if cert: - cert[:] = cert[0].split(' | ') - cert[:] = [_unHtml(x.replace(' ', '::')) for x in cert] - if cert: d['certificates'] = cert - plotoutline = _findBetween(cont, 'Plot:', [''], - maxRes=1) - if plotoutline: - plotoutline = plotoutline[0].strip() - plotoutline = plotoutline.rstrip('|').rstrip() - if plotoutline: d['plot outline'] = _unHtml(plotoutline) - aratio = _findBetween(cont, 'Aspect Ratio:', [''], - maxRes=1) - if aratio: - aratio = aratio[0].strip().replace(' (', '::(', 1) - if aratio: - d['aspect ratio'] = _unHtml(aratio) - return {'data': d} - - def get_movie_plot(self, movieID): - cont = self._mretrieve(imdbURL_movie_main % movieID + 'plotsummary') - plot = _findBetween(cont, '

    ', '

    ') - plot[:] = [_unHtml(x) for x in plot] - for i in xrange(len(plot)): - p = plot[i] - wbyidx = p.rfind(' Written by ') - if wbyidx != -1: - plot[i] = '%s::%s' % \ - (p[:wbyidx].rstrip(), - p[wbyidx+12:].rstrip().replace('{','<').replace('}','>')) - if plot: return {'data': {'plot': plot}} - return {'data': {}} - - def _search_person(self, name, results): - ##params = urllib.urlencode({'nm': 'on', 'mx': str(results), 'q': name}) - ##params = 'q=%s&nm=on&mx=%s' % (urllib.quote_plus(name), str(results)) - ##cont = self._mretrieve(imdbURL_search % params) - cont = subXMLRefs(self._get_search_content('nm', name, results)) - name = _findBetween(cont, '', '', maxRes=1) - res = [] - if not name: - self._mobile_logger.warn('no title tag searching for name %s', name) - return res - nl = name[0].lower() - if not nl.startswith('imdb name'): - # a direct hit! - name = _unHtml(name[0]) - name = name.replace('- Filmography by type' , '').strip() - pid = None - pidtag = _getTagsWith(cont, 'rel="canonical"', maxRes=1) - if pidtag: - pid = _findBetween(pidtag[0], '/name/nm', '/', maxRes=1) - if not (pid and name): - self._mobile_logger.error('no direct hit name/personID for' \ - ' name %s', name) - return res - res[:] = [(str(pid[0]), analyze_name(name, canonical=1))] - else: - lis = _findBetween(cont, 'td valign="top">', '', - maxRes=results*3) - for li in lis: - akas = _findBetween(li, '"', '"') - for sep in [' aka', '
    birth name']: - sepIdx = li.find(sep) - if sepIdx != -1: - li = li[:sepIdx] - pid = re_imdbID.findall(li) - pname = _unHtml(li) - if not (pid and pname): - self._mobile_logger.debug('no name/personID parsing' \ - ' %s searching for name %s', li, - name) - continue - resd = analyze_name(pname, canonical=1) - if akas: - resd['akas'] = akas - res.append((str(pid[0]), resd)) - return res - - def get_person_main(self, personID, _parseChr=False): - if not _parseChr: - url = imdbURL_person_main % personID + 'maindetails' - else: - url = imdbURL_character_main % personID - s = self._mretrieve(url) - r = {} - name = _findBetween(s, '', '', maxRes=1) - if not name: - if _parseChr: w = 'characterID' - else: w = 'personID' - raise IMDbDataAccessError, 'unable to get %s "%s"' % (w, personID) - name = _unHtml(name[0].replace(' - IMDb', '')) - if _parseChr: - name = name.replace('(Character)', '').strip() - name = name.replace('- Filmography by type', '').strip() - else: - name = name.replace('- Filmography by', '').strip() - r = analyze_name(name, canonical=not _parseChr) - for dKind in ('Born', 'Died'): - date = _findBetween(s, '%s:' % dKind.capitalize(), - ('
    ', '

    '), maxRes=1) - if date: - date = _unHtml(date[0]) - if date: - #date, notes = date_and_notes(date) - # TODO: fix to handle real names. - date_notes = date.split(' in ', 1) - notes = u'' - date = date_notes[0] - if len(date_notes) == 2: - notes = date_notes[1] - dtitle = 'birth' - if dKind == 'Died': - dtitle = 'death' - if date: - r['%s date' % dtitle] = date - if notes: - r['%s notes' % dtitle] = notes - akas = _findBetween(s, 'Alternate Names:', ('
    ', - '

    '), maxRes=1) - if akas: - akas = akas[0] - if akas.find(' | ') != -1: - akas = _unHtml(akas).split(' | ') - else: - akas = _unHtml(akas).split(' / ') - if akas: r['akas'] = akas - hs = _findBetween(s, 'name="headshot"', '
    ', maxRes=1) - if hs: - hs[:] = _findBetween(hs[0], 'src="', '"', maxRes=1) - if hs: r['headshot'] = hs[0] - # Build a list of tuples such [('hrefLink', 'section name')] - workkind = _findBetween(s, 'id="jumpto_', '') - ws = [] - for work in workkind: - sep = '" >' - if '">' in work: - sep = '">' - wsplit = work.split(sep, 1) - if len(wsplit) == 2: - sect = wsplit[0] - if '"' in sect: - sect = sect[:sect.find('"')] - ws.append((sect, wsplit[1].lower())) - # XXX: I think "guest appearances" are gone. - if s.find(' tag. - if _parseChr and sect == 'filmography': - inisect = s.find('
    ') - else: - inisect = s.find('',)) - for m in mlist: - fCB = m.find('>') - if fCB != -1: - m = m[fCB+1:].lstrip() - m = re_filmo_episodes.sub('', m) - # For every movie in the current section. - movieID = re_imdbID.findall(m) - if not movieID: - self._mobile_logger.debug('no movieID in %s', m) - continue - m = m.replace('
    ', ' .... ', 1) - if not _parseChr: - chrIndx = m.find(' .... ') - else: - chrIndx = m.find(' Played by ') - chids = [] - if chrIndx != -1: - chrtxt = m[chrIndx+6:] - if _parseChr: - chrtxt = chrtxt[5:] - for ch in chrtxt.split(' / '): - chid = re_imdbID.findall(ch) - if not chid: - chids.append(None) - else: - chids.append(chid[-1]) - if not chids: - chids = None - elif len(chids) == 1: - chids = chids[0] - movieID = str(movieID[0]) - # Search the status. - stidx = m.find('') - status = u'' - if stidx != -1: - stendidx = m.rfind('') - if stendidx != -1: - status = _unHtml(m[stidx+3:stendidx]) - m = m.replace(m[stidx+3:stendidx], '') - year = _findBetween(m, 'year_column">', '', maxRes=1) - if year: - year = year[0] - m = m.replace('%s' % year, - '') - else: - year = None - m = _unHtml(m) - if not m: - self._mobile_logger.warn('no title for movieID %s', movieID) - continue - movie = build_movie(m, movieID=movieID, status=status, - roleID=chids, modFunct=self._defModFunct, - accessSystem=self.accessSystem, - _parsingCharacter=_parseChr, year=year) - sectName = sectName.split(':')[0] - r.setdefault(sectName, []).append(movie) - # If available, take the always correct name from a form. - itag = _getTagsWith(s, 'NAME="primary"', maxRes=1) - if not itag: - itag = _getTagsWith(s, 'name="primary"', maxRes=1) - if itag: - vtag = _findBetween(itag[0], 'VALUE="', ('"', '>'), maxRes=1) - if not vtag: - vtag = _findBetween(itag[0], 'value="', ('"', '>'), maxRes=1) - if vtag: - try: - vtag = unquote(str(vtag[0])) - vtag = unicode(vtag, 'latin_1') - r.update(analyze_name(vtag)) - except UnicodeEncodeError: - pass - return {'data': r, 'info sets': ('main', 'filmography')} - - def get_person_biography(self, personID): - cont = self._mretrieve(imdbURL_person_main % personID + 'bio') - d = {} - spouses = _findBetween(cont, 'Spouse', ('', ''), - maxRes=1) - if spouses: - sl = [] - for spouse in spouses[0].split(''): - if spouse.count('') > 1: - spouse = spouse.replace('', '::', 1) - spouse = _unHtml(spouse) - spouse = spouse.replace(':: ', '::').strip() - if spouse: sl.append(spouse) - if sl: d['spouse'] = sl - nnames = _findBetween(cont, '
    Nickname
    ', ('

    ','
    '), - maxRes=1) - if nnames: - nnames = nnames[0] - if nnames: - nnames = [x.strip().replace(' (', '::(', 1) - for x in nnames.split('
    ')] - if nnames: - d['nick names'] = nnames - misc_sects = _findBetween(cont, '
    ', '
    ') - misc_sects[:] = [x.split('
    ') for x in misc_sects] - misc_sects[:] = [x for x in misc_sects if len(x) == 2] - for sect, data in misc_sects: - sect = sect.lower().replace(':', '').strip() - if d.has_key(sect) and sect != 'mini biography': continue - elif sect in ('spouse', 'nickname'): continue - if sect == 'salary': sect = 'salary history' - elif sect == 'where are they now': sect = 'where now' - elif sect == 'personal quotes': sect = 'quotes' - data = data.replace('

    ', '::') - data = data.replace('

    ', ' ') # for multi-paragraphs 'bio' - data = data.replace(' ', '@@@@') - data = data.replace(' ', '::') - data = _unHtml(data) - data = [x.strip() for x in data.split('::')] - data[:] = [x.replace('@@@@', '::') for x in data if x] - if sect == 'height' and data: data = data[0] - elif sect == 'birth name': data = canonicalName(data[0]) - elif sect == 'date of birth': - date, notes = date_and_notes(data[0]) - if date: - d['birth date'] = date - if notes: - d['birth notes'] = notes - continue - elif sect == 'date of death': - date, notes = date_and_notes(data[0]) - if date: - d['death date'] = date - if notes: - d['death notes'] = notes - continue - elif sect == 'mini biography': - ndata = [] - for bio in data: - byidx = bio.rfind('IMDb Mini Biography By') - if byidx != -1: - bioAuth = bio[:byidx].rstrip() - else: - bioAuth = 'Anonymous' - bio = u'%s::%s' % (bioAuth, bio[byidx+23:].lstrip()) - ndata.append(bio) - data[:] = ndata - if 'mini biography' in d: - d['mini biography'].append(ndata[0]) - continue - d[sect] = data - return {'data': d} - - def _search_character(self, name, results): - cont = subXMLRefs(self._get_search_content('char', name, results)) - name = _findBetween(cont, '', '', maxRes=1) - res = [] - if not name: - self._mobile_logger.error('no title tag searching character %s', - name) - return res - nl = name[0].lower() - if not (nl.startswith('imdb search') or nl.startswith('imdb search') \ - or nl.startswith('imdb character')): - # a direct hit! - name = _unHtml(name[0]).replace('(Character)', '').strip() - pid = None - pidtag = _getTagsWith(cont, 'rel="canonical"', maxRes=1) - if pidtag: - pid = _findBetween(pidtag[0], '/character/ch', '/', maxRes=1) - if not (pid and name): - self._mobile_logger.error('no direct hit name/characterID for' \ - ' character %s', name) - return res - res[:] = [(str(pid[0]), analyze_name(name))] - else: - sects = _findBetween(cont, 'Popular Characters', '', - maxRes=results*3) - sects += _findBetween(cont, 'Characters', '', - maxRes=results*3) - for sect in sects: - lis = _findBetween(sect, '
    ', - ('', '

    '), maxRes=1) - if intro: - intro = _unHtml(intro[0]).strip() - if intro: - d['introduction'] = intro - bios = _findBetween(cont, '
    ', - '
    ') - if bios: - bios = _findBetween(bios[0], '

    ', ('

    ', '

    ')) - if bios: - for bio in bios: - bio = bio.replace('

    ', '::') - bio = bio.replace('\n', ' ') - bio = bio.replace('
    ', '\n') - bio = bio.replace('
    ', '\n') - bio = subSGMLRefs(re_unhtmlsub('', bio).strip()) - bio = bio.replace(' ::', '::').replace(':: ', '::') - bio = bio.replace('::', ': ', 1) - if bio: - d.setdefault('biography', []).append(bio) - return {'data': d} - - diff --git a/libs/imdb/parser/sql/__init__.py b/libs/imdb/parser/sql/__init__.py deleted file mode 100644 index 22510e6c..00000000 --- a/libs/imdb/parser/sql/__init__.py +++ /dev/null @@ -1,1589 +0,0 @@ -""" -parser.sql package (imdb package). - -This package provides the IMDbSqlAccessSystem class used to access -IMDb's data through a SQL database. Every database supported by -the SQLObject _AND_ SQLAlchemy Object Relational Managers is available. -the imdb.IMDb function will return an instance of this class when -called with the 'accessSystem' argument set to "sql", "database" or "db". - -Copyright 2005-2010 Davide Alberani - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -# FIXME: this whole module was written in a veeery short amount of time. -# The code should be commented, rewritten and cleaned. :-) - -import re -import logging -from difflib import SequenceMatcher -from codecs import lookup - -from imdb import IMDbBase -from imdb.utils import normalizeName, normalizeTitle, build_title, \ - build_name, analyze_name, analyze_title, \ - canonicalTitle, canonicalName, re_titleRef, \ - build_company_name, re_episodes, _unicodeArticles, \ - analyze_company_name, re_year_index, re_nameRef -from imdb.Person import Person -from imdb.Movie import Movie -from imdb.Company import Company -from imdb._exceptions import IMDbDataAccessError, IMDbError - - -# Logger for miscellaneous functions. -_aux_logger = logging.getLogger('imdbpy.parser.sql.aux') - -# ============================= -# Things that once upon a time were in imdb.parser.common.locsql. - -def titleVariations(title, fromPtdf=0): - """Build title variations useful for searches; if fromPtdf is true, - the input is assumed to be in the plain text data files format.""" - if fromPtdf: title1 = u'' - else: title1 = title - title2 = title3 = u'' - if fromPtdf or re_year_index.search(title): - # If it appears to have a (year[/imdbIndex]) indication, - # assume that a long imdb canonical name was provided. - titldict = analyze_title(title, canonical=1) - # title1: the canonical name. - title1 = titldict['title'] - if titldict['kind'] != 'episode': - # title3: the long imdb canonical name. - if fromPtdf: title3 = title - else: title3 = build_title(titldict, canonical=1, ptdf=1) - else: - title1 = normalizeTitle(title1) - title3 = build_title(titldict, canonical=1, ptdf=1) - else: - # Just a title. - # title1: the canonical title. - title1 = canonicalTitle(title) - title3 = u'' - # title2 is title1 without the article, or title1 unchanged. - if title1: - title2 = title1 - t2s = title2.split(u', ') - if t2s[-1].lower() in _unicodeArticles: - title2 = u', '.join(t2s[:-1]) - _aux_logger.debug('title variations: 1:[%s] 2:[%s] 3:[%s]', - title1, title2, title3) - return title1, title2, title3 - - -re_nameIndex = re.compile(r'\(([IVXLCDM]+)\)') - -def nameVariations(name, fromPtdf=0): - """Build name variations useful for searches; if fromPtdf is true, - the input is assumed to be in the plain text data files format.""" - name1 = name2 = name3 = u'' - if fromPtdf or re_nameIndex.search(name): - # We've a name with an (imdbIndex) - namedict = analyze_name(name, canonical=1) - # name1 is the name in the canonical format. - name1 = namedict['name'] - # name3 is the canonical name with the imdbIndex. - if fromPtdf: - if namedict.has_key('imdbIndex'): - name3 = name - else: - name3 = build_name(namedict, canonical=1) - else: - # name1 is the name in the canonical format. - name1 = canonicalName(name) - name3 = u'' - # name2 is the name in the normal format, if it differs from name1. - name2 = normalizeName(name1) - if name1 == name2: name2 = u'' - _aux_logger.debug('name variations: 1:[%s] 2:[%s] 3:[%s]', - name1, name2, name3) - return name1, name2, name3 - - -try: - from cutils import ratcliff as _ratcliff - def ratcliff(s1, s2, sm): - """Return the Ratcliff-Obershelp value between the two strings, - using the C implementation.""" - return _ratcliff(s1.encode('latin_1', 'replace'), - s2.encode('latin_1', 'replace')) -except ImportError: - _aux_logger.warn('Unable to import the cutils.ratcliff function.' - ' Searching names and titles using the "sql"' - ' data access system will be slower.') - - def ratcliff(s1, s2, sm): - """Ratcliff-Obershelp similarity.""" - STRING_MAXLENDIFFER = 0.7 - s1len = len(s1) - s2len = len(s2) - if s1len < s2len: - threshold = float(s1len) / s2len - else: - threshold = float(s2len) / s1len - if threshold < STRING_MAXLENDIFFER: - return 0.0 - sm.set_seq2(s2.lower()) - return sm.ratio() - - -def merge_roles(mop): - """Merge multiple roles.""" - new_list = [] - for m in mop: - if m in new_list: - keep_this = new_list[new_list.index(m)] - if not isinstance(keep_this.currentRole, list): - keep_this.currentRole = [keep_this.currentRole] - keep_this.currentRole.append(m.currentRole) - else: - new_list.append(m) - return new_list - - -def scan_names(name_list, name1, name2, name3, results=0, ro_thresold=None, - _scan_character=False): - """Scan a list of names, searching for best matches against - the given variations.""" - if ro_thresold is not None: RO_THRESHOLD = ro_thresold - else: RO_THRESHOLD = 0.6 - sm1 = SequenceMatcher() - sm2 = SequenceMatcher() - sm3 = SequenceMatcher() - sm1.set_seq1(name1.lower()) - if name2: sm2.set_seq1(name2.lower()) - if name3: sm3.set_seq1(name3.lower()) - resd = {} - for i, n_data in name_list: - nil = n_data['name'] - # XXX: on Symbian, here we get a str; not sure this is the - # right place to fix it. - if isinstance(nil, str): - nil = unicode(nil, 'latin1', 'ignore') - # Distance with the canonical name. - ratios = [ratcliff(name1, nil, sm1) + 0.05] - namesurname = u'' - if not _scan_character: - nils = nil.split(', ', 1) - surname = nils[0] - if len(nils) == 2: namesurname = '%s %s' % (nils[1], surname) - else: - nils = nil.split(' ', 1) - surname = nils[-1] - namesurname = nil - if surname != nil: - # Distance with the "Surname" in the database. - ratios.append(ratcliff(name1, surname, sm1)) - if not _scan_character: - ratios.append(ratcliff(name1, namesurname, sm1)) - if name2: - ratios.append(ratcliff(name2, surname, sm2)) - # Distance with the "Name Surname" in the database. - if namesurname: - ratios.append(ratcliff(name2, namesurname, sm2)) - if name3: - # Distance with the long imdb canonical name. - ratios.append(ratcliff(name3, - build_name(n_data, canonical=1), sm3) + 0.1) - ratio = max(ratios) - if ratio >= RO_THRESHOLD: - if resd.has_key(i): - if ratio > resd[i][0]: resd[i] = (ratio, (i, n_data)) - else: resd[i] = (ratio, (i, n_data)) - res = resd.values() - res.sort() - res.reverse() - if results > 0: res[:] = res[:results] - return res - - -def scan_titles(titles_list, title1, title2, title3, results=0, - searchingEpisode=0, onlyEpisodes=0, ro_thresold=None): - """Scan a list of titles, searching for best matches against - the given variations.""" - if ro_thresold is not None: RO_THRESHOLD = ro_thresold - else: RO_THRESHOLD = 0.6 - sm1 = SequenceMatcher() - sm2 = SequenceMatcher() - sm3 = SequenceMatcher() - sm1.set_seq1(title1.lower()) - sm2.set_seq2(title2.lower()) - if title3: - sm3.set_seq1(title3.lower()) - if title3[-1] == '}': searchingEpisode = 1 - hasArt = 0 - if title2 != title1: hasArt = 1 - resd = {} - for i, t_data in titles_list: - if onlyEpisodes: - if t_data.get('kind') != 'episode': - continue - til = t_data['title'] - if til[-1] == ')': - dateIdx = til.rfind('(') - if dateIdx != -1: - til = til[:dateIdx].rstrip() - if not til: - continue - ratio = ratcliff(title1, til, sm1) - if ratio >= RO_THRESHOLD: - resd[i] = (ratio, (i, t_data)) - continue - if searchingEpisode: - if t_data.get('kind') != 'episode': continue - elif t_data.get('kind') == 'episode': continue - til = t_data['title'] - # XXX: on Symbian, here we get a str; not sure this is the - # right place to fix it. - if isinstance(til, str): - til = unicode(til, 'latin1', 'ignore') - # Distance with the canonical title (with or without article). - # titleS -> titleR - # titleS, the -> titleR, the - if not searchingEpisode: - til = canonicalTitle(til) - ratios = [ratcliff(title1, til, sm1) + 0.05] - # til2 is til without the article, if present. - til2 = til - tils = til2.split(', ') - matchHasArt = 0 - if tils[-1].lower() in _unicodeArticles: - til2 = ', '.join(tils[:-1]) - matchHasArt = 1 - if hasArt and not matchHasArt: - # titleS[, the] -> titleR - ratios.append(ratcliff(title2, til, sm2)) - elif matchHasArt and not hasArt: - # titleS -> titleR[, the] - ratios.append(ratcliff(title1, til2, sm1)) - else: - ratios = [0.0] - if title3: - # Distance with the long imdb canonical title. - ratios.append(ratcliff(title3, - build_title(t_data, canonical=1, ptdf=1), sm3) + 0.1) - ratio = max(ratios) - if ratio >= RO_THRESHOLD: - if resd.has_key(i): - if ratio > resd[i][0]: - resd[i] = (ratio, (i, t_data)) - else: resd[i] = (ratio, (i, t_data)) - res = resd.values() - res.sort() - res.reverse() - if results > 0: res[:] = res[:results] - return res - - -def scan_company_names(name_list, name1, results=0, ro_thresold=None): - """Scan a list of company names, searching for best matches against - the given name. Notice that this function takes a list of - strings, and not a list of dictionaries.""" - if ro_thresold is not None: RO_THRESHOLD = ro_thresold - else: RO_THRESHOLD = 0.6 - sm1 = SequenceMatcher() - sm1.set_seq1(name1.lower()) - resd = {} - withoutCountry = not name1.endswith(']') - for i, n in name_list: - # XXX: on Symbian, here we get a str; not sure this is the - # right place to fix it. - if isinstance(n, str): - n = unicode(n, 'latin1', 'ignore') - o_name = n - var = 0.0 - if withoutCountry and n.endswith(']'): - cidx = n.rfind('[') - if cidx != -1: - n = n[:cidx].rstrip() - var = -0.05 - # Distance with the company name. - ratio = ratcliff(name1, n, sm1) + var - if ratio >= RO_THRESHOLD: - if resd.has_key(i): - if ratio > resd[i][0]: resd[i] = (ratio, - (i, analyze_company_name(o_name))) - else: - resd[i] = (ratio, (i, analyze_company_name(o_name))) - res = resd.values() - res.sort() - res.reverse() - if results > 0: res[:] = res[:results] - return res - - -try: - from cutils import soundex -except ImportError: - _aux_logger.warn('Unable to import the cutils.soundex function.' - ' Searches of movie titles and person names will be' - ' a bit slower.') - - _translate = dict(B='1', C='2', D='3', F='1', G='2', J='2', K='2', L='4', - M='5', N='5', P='1', Q='2', R='6', S='2', T='3', V='1', - X='2', Z='2') - _translateget = _translate.get - _re_non_ascii = re.compile(r'^[^a-z]*', re.I) - SOUNDEX_LEN = 5 - - def soundex(s): - """Return the soundex code for the given string.""" - # Maximum length of the soundex code. - s = _re_non_ascii.sub('', s) - if not s: return None - s = s.upper() - soundCode = s[0] - for c in s[1:]: - cw = _translateget(c, '0') - if cw != '0' and soundCode[-1] != cw: - soundCode += cw - return soundCode[:SOUNDEX_LEN] or None - - -def _sortKeywords(keyword, kwds): - """Sort a list of keywords, based on the searched one.""" - sm = SequenceMatcher() - sm.set_seq1(keyword.lower()) - ratios = [(ratcliff(keyword, k, sm), k) for k in kwds] - checkContained = False - if len(keyword) > 4: - checkContained = True - for idx, data in enumerate(ratios): - ratio, key = data - if key.startswith(keyword): - ratios[idx] = (ratio+0.5, key) - elif checkContained and keyword in key: - ratios[idx] = (ratio+0.3, key) - ratios.sort() - ratios.reverse() - return [r[1] for r in ratios] - - -def filterSimilarKeywords(keyword, kwdsIterator): - """Return a sorted list of keywords similar to the one given.""" - seenDict = {} - kwdSndx = soundex(keyword.encode('ascii', 'ignore')) - matches = [] - matchesappend = matches.append - checkContained = False - if len(keyword) > 4: - checkContained = True - for movieID, key in kwdsIterator: - if key in seenDict: - continue - seenDict[key] = None - if checkContained and keyword in key: - matchesappend(key) - continue - if kwdSndx == soundex(key.encode('ascii', 'ignore')): - matchesappend(key) - return _sortKeywords(keyword, matches) - - - -# ============================= - -_litlist = ['screenplay/teleplay', 'novel', 'adaption', 'book', - 'production process protocol', 'interviews', - 'printed media reviews', 'essays', 'other literature'] -_litd = dict([(x, ('literature', x)) for x in _litlist]) - -_buslist = ['budget', 'weekend gross', 'gross', 'opening weekend', 'rentals', - 'admissions', 'filming dates', 'production dates', 'studios', - 'copyright holder'] -_busd = dict([(x, ('business', x)) for x in _buslist]) - - -def _reGroupDict(d, newgr): - """Regroup keys in the d dictionary in subdictionaries, based on - the scheme in the newgr dictionary. - E.g.: in the newgr, an entry 'LD label': ('laserdisc', 'label') - tells the _reGroupDict() function to take the entry with - label 'LD label' (as received from the sql database) - and put it in the subsection (another dictionary) named - 'laserdisc', using the key 'label'.""" - r = {} - newgrks = newgr.keys() - for k, v in d.items(): - if k in newgrks: - r.setdefault(newgr[k][0], {})[newgr[k][1]] = v - # A not-so-clearer version: - ##r.setdefault(newgr[k][0], {}) - ##r[newgr[k][0]][newgr[k][1]] = v - else: r[k] = v - return r - - -def _groupListBy(l, index): - """Regroup items in a list in a list of lists, grouped by - the value at the given index.""" - tmpd = {} - for item in l: - tmpd.setdefault(item[index], []).append(item) - res = tmpd.values() - return res - - -def sub_dict(d, keys): - """Return the subdictionary of 'd', with just the keys listed in 'keys'.""" - return dict([(k, d[k]) for k in keys if k in d]) - - -def get_movie_data(movieID, kindDict, fromAka=0, _table=None): - """Return a dictionary containing data about the given movieID; - if fromAka is true, the AkaTitle table is searched; _table is - reserved for the imdbpy2sql.py script.""" - if _table is not None: - Table = _table - else: - if not fromAka: Table = Title - else: Table = AkaTitle - m = Table.get(movieID) - mdict = {'title': m.title, 'kind': kindDict[m.kindID], - 'year': m.productionYear, 'imdbIndex': m.imdbIndex, - 'season': m.seasonNr, 'episode': m.episodeNr} - if not fromAka: - if m.seriesYears is not None: - mdict['series years'] = unicode(m.seriesYears) - if mdict['imdbIndex'] is None: del mdict['imdbIndex'] - if mdict['year'] is None: del mdict['year'] - else: - try: - mdict['year'] = int(mdict['year']) - except (TypeError, ValueError): - del mdict['year'] - if mdict['season'] is None: del mdict['season'] - else: - try: mdict['season'] = int(mdict['season']) - except: pass - if mdict['episode'] is None: del mdict['episode'] - else: - try: mdict['episode'] = int(mdict['episode']) - except: pass - episodeOfID = m.episodeOfID - if episodeOfID is not None: - ser_dict = get_movie_data(episodeOfID, kindDict, fromAka) - mdict['episode of'] = Movie(data=ser_dict, movieID=episodeOfID, - accessSystem='sql') - if fromAka: - ser_note = AkaTitle.get(episodeOfID).note - if ser_note: - mdict['episode of'].notes = ser_note - return mdict - - -def _iterKeywords(results): - """Iterate over (key.id, key.keyword) columns of a selection of - the Keyword table.""" - for key in results: - yield key.id, key.keyword - - -def getSingleInfo(table, movieID, infoType, notAList=False): - """Return a dictionary in the form {infoType: infoListOrString}, - retrieving a single set of information about a given movie, from - the specified table.""" - infoTypeID = InfoType.select(InfoType.q.info == infoType) - if infoTypeID.count() == 0: - return {} - res = table.select(AND(table.q.movieID == movieID, - table.q.infoTypeID == infoTypeID[0].id)) - retList = [] - for r in res: - info = r.info - note = r.note - if note: - info += u'::%s' % note - retList.append(info) - if not retList: - return {} - if not notAList: return {infoType: retList} - else: return {infoType: retList[0]} - - -def _cmpTop(a, b, what='top 250 rank'): - """Compare function used to sort top 250/bottom 10 rank.""" - av = int(a[1].get(what)) - bv = int(b[1].get(what)) - if av == bv: - return 0 - return (-1, 1)[av > bv] - -def _cmpBottom(a, b): - """Compare function used to sort top 250/bottom 10 rank.""" - return _cmpTop(a, b, what='bottom 10 rank') - - -class IMDbSqlAccessSystem(IMDbBase): - """The class used to access IMDb's data through a SQL database.""" - - accessSystem = 'sql' - _sql_logger = logging.getLogger('imdbpy.parser.sql') - - def __init__(self, uri, adultSearch=1, useORM=None, *arguments, **keywords): - """Initialize the access system.""" - IMDbBase.__init__(self, *arguments, **keywords) - if useORM is None: - useORM = ('sqlobject', 'sqlalchemy') - if not isinstance(useORM, (tuple, list)): - if ',' in useORM: - useORM = useORM.split(',') - else: - useORM = [useORM] - self.useORM = useORM - nrMods = len(useORM) - _gotError = False - DB_TABLES = [] - for idx, mod in enumerate(useORM): - mod = mod.strip().lower() - try: - if mod == 'sqlalchemy': - from alchemyadapter import getDBTables, NotFoundError, \ - setConnection, AND, OR, IN, \ - ISNULL, CONTAINSSTRING, toUTF8 - elif mod == 'sqlobject': - from objectadapter import getDBTables, NotFoundError, \ - setConnection, AND, OR, IN, \ - ISNULL, CONTAINSSTRING, toUTF8 - else: - self._sql_logger.warn('unknown module "%s"' % mod) - continue - self._sql_logger.info('using %s ORM', mod) - # XXX: look ma'... black magic! It's used to make - # TableClasses and some functions accessible - # through the whole module. - for k, v in [('NotFoundError', NotFoundError), - ('AND', AND), ('OR', OR), ('IN', IN), - ('ISNULL', ISNULL), - ('CONTAINSSTRING', CONTAINSSTRING)]: - globals()[k] = v - self.toUTF8 = toUTF8 - DB_TABLES = getDBTables(uri) - for t in DB_TABLES: - globals()[t._imdbpyName] = t - if _gotError: - self._sql_logger.warn('falling back to "%s"' % mod) - break - except ImportError, e: - if idx+1 >= nrMods: - raise IMDbError, 'unable to use any ORM in %s: %s' % ( - str(useORM), str(e)) - else: - self._sql_logger.warn('unable to use "%s": %s' % (mod, - str(e))) - _gotError = True - continue - else: - raise IMDbError, 'unable to use any ORM in %s' % str(useORM) - # Set the connection to the database. - self._sql_logger.debug('connecting to %s', uri) - try: - self._connection = setConnection(uri, DB_TABLES) - except AssertionError, e: - raise IMDbDataAccessError, \ - 'unable to connect to the database server; ' + \ - 'complete message: "%s"' % str(e) - self.Error = self._connection.module.Error - # Maps some IDs to the corresponding strings. - self._kind = {} - self._kindRev = {} - self._sql_logger.debug('reading constants from the database') - try: - for kt in KindType.select(): - self._kind[kt.id] = kt.kind - self._kindRev[str(kt.kind)] = kt.id - except self.Error: - # NOTE: you can also get the error, but - at least with - # MySQL - it also contains the password, and I don't - # like the idea to print it out. - raise IMDbDataAccessError, \ - 'unable to connect to the database server' - self._role = {} - for rl in RoleType.select(): - self._role[rl.id] = str(rl.role) - self._info = {} - self._infoRev = {} - for inf in InfoType.select(): - self._info[inf.id] = str(inf.info) - self._infoRev[str(inf.info)] = inf.id - self._compType = {} - for cType in CompanyType.select(): - self._compType[cType.id] = cType.kind - info = [(it.id, it.info) for it in InfoType.select()] - self._compcast = {} - for cc in CompCastType.select(): - self._compcast[cc.id] = str(cc.kind) - self._link = {} - for lt in LinkType.select(): - self._link[lt.id] = str(lt.link) - self._moviesubs = {} - # Build self._moviesubs, a dictionary used to rearrange - # the data structure for a movie object. - for vid, vinfo in info: - if not vinfo.startswith('LD '): continue - self._moviesubs[vinfo] = ('laserdisc', vinfo[3:]) - self._moviesubs.update(_litd) - self._moviesubs.update(_busd) - self.do_adult_search(adultSearch) - - def _findRefs(self, o, trefs, nrefs): - """Find titles or names references in strings.""" - if isinstance(o, (unicode, str)): - for title in re_titleRef.findall(o): - a_title = analyze_title(title, canonical=0) - rtitle = build_title(a_title, ptdf=1) - if trefs.has_key(rtitle): continue - movieID = self._getTitleID(rtitle) - if movieID is None: - movieID = self._getTitleID(title) - if movieID is None: - continue - m = Movie(title=rtitle, movieID=movieID, - accessSystem=self.accessSystem) - trefs[rtitle] = m - rtitle2 = canonicalTitle(a_title.get('title', u'')) - if rtitle2 and rtitle2 != rtitle and rtitle2 != title: - trefs[rtitle2] = m - if title != rtitle: - trefs[title] = m - for name in re_nameRef.findall(o): - a_name = analyze_name(name, canonical=1) - rname = build_name(a_name, canonical=1) - if nrefs.has_key(rname): continue - personID = self._getNameID(rname) - if personID is None: - personID = self._getNameID(name) - if personID is None: continue - p = Person(name=rname, personID=personID, - accessSystem=self.accessSystem) - nrefs[rname] = p - rname2 = normalizeName(a_name.get('name', u'')) - if rname2 and rname2 != rname: - nrefs[rname2] = p - if name != rname and name != rname2: - nrefs[name] = p - elif isinstance(o, (list, tuple)): - for item in o: - self._findRefs(item, trefs, nrefs) - elif isinstance(o, dict): - for value in o.values(): - self._findRefs(value, trefs, nrefs) - return (trefs, nrefs) - - def _extractRefs(self, o): - """Scan for titles or names references in strings.""" - trefs = {} - nrefs = {} - try: - return self._findRefs(o, trefs, nrefs) - except RuntimeError, e: - # Symbian/python 2.2 has a poor regexp implementation. - import warnings - warnings.warn('RuntimeError in ' - "imdb.parser.sql.IMDbSqlAccessSystem; " - "if it's not a recursion limit exceeded and we're not " - "running in a Symbian environment, it's a bug:\n%s" % e) - return (trefs, nrefs) - - def _changeAKAencoding(self, akanotes, akatitle): - """Return akatitle in the correct charset, as specified in - the akanotes field; if akatitle doesn't need to be modified, - return None.""" - oti = akanotes.find('(original ') - if oti == -1: return None - ote = akanotes[oti+10:].find(' title)') - if ote != -1: - cs_info = akanotes[oti+10:oti+10+ote].lower().split() - for e in cs_info: - # excludes some strings that clearly are not encoding. - if e in ('script', '', 'cyrillic', 'greek'): continue - if e.startswith('iso-') and e.find('latin') != -1: - e = e[4:].replace('-', '') - try: - lookup(e) - lat1 = akatitle.encode('latin_1', 'replace') - return unicode(lat1, e, 'replace') - except (LookupError, ValueError, TypeError): - continue - return None - - def _buildNULLCondition(self, col, val): - """Build a comparison for columns where values can be NULL.""" - if val is None: - return ISNULL(col) - else: - if isinstance(val, (int, long)): - return col == val - else: - return col == self.toUTF8(val) - - def _getTitleID(self, title): - """Given a long imdb canonical title, returns a movieID or - None if not found.""" - td = analyze_title(title) - condition = None - if td['kind'] == 'episode': - epof = td['episode of'] - seriesID = [s.id for s in Title.select( - AND(Title.q.title == self.toUTF8(epof['title']), - self._buildNULLCondition(Title.q.imdbIndex, - epof.get('imdbIndex')), - Title.q.kindID == self._kindRev[epof['kind']], - self._buildNULLCondition(Title.q.productionYear, - epof.get('year'))))] - if seriesID: - condition = AND(IN(Title.q.episodeOfID, seriesID), - Title.q.title == self.toUTF8(td['title']), - self._buildNULLCondition(Title.q.imdbIndex, - td.get('imdbIndex')), - Title.q.kindID == self._kindRev[td['kind']], - self._buildNULLCondition(Title.q.productionYear, - td.get('year'))) - if condition is None: - condition = AND(Title.q.title == self.toUTF8(td['title']), - self._buildNULLCondition(Title.q.imdbIndex, - td.get('imdbIndex')), - Title.q.kindID == self._kindRev[td['kind']], - self._buildNULLCondition(Title.q.productionYear, - td.get('year'))) - res = Title.select(condition) - try: - if res.count() != 1: - return None - except (UnicodeDecodeError, TypeError): - return None - return res[0].id - - def _getNameID(self, name): - """Given a long imdb canonical name, returns a personID or - None if not found.""" - nd = analyze_name(name) - res = Name.select(AND(Name.q.name == self.toUTF8(nd['name']), - self._buildNULLCondition(Name.q.imdbIndex, - nd.get('imdbIndex')))) - try: - c = res.count() - if res.count() != 1: - return None - except (UnicodeDecodeError, TypeError): - return None - return res[0].id - - def _normalize_movieID(self, movieID): - """Normalize the given movieID.""" - try: - return int(movieID) - except (ValueError, OverflowError): - raise IMDbError, 'movieID "%s" can\'t be converted to integer' % \ - movieID - - def _normalize_personID(self, personID): - """Normalize the given personID.""" - try: - return int(personID) - except (ValueError, OverflowError): - raise IMDbError, 'personID "%s" can\'t be converted to integer' % \ - personID - - def _normalize_characterID(self, characterID): - """Normalize the given characterID.""" - try: - return int(characterID) - except (ValueError, OverflowError): - raise IMDbError, 'characterID "%s" can\'t be converted to integer' \ - % characterID - - def _normalize_companyID(self, companyID): - """Normalize the given companyID.""" - try: - return int(companyID) - except (ValueError, OverflowError): - raise IMDbError, 'companyID "%s" can\'t be converted to integer' \ - % companyID - - def get_imdbMovieID(self, movieID): - """Translate a movieID in an imdbID. - If not in the database, try an Exact Primary Title search on IMDb; - return None if it's unable to get the imdbID. - """ - try: movie = Title.get(movieID) - except NotFoundError: return None - imdbID = movie.imdbID - if imdbID is not None: return '%07d' % imdbID - m_dict = get_movie_data(movie.id, self._kind) - titline = build_title(m_dict, ptdf=1) - imdbID = self.title2imdbID(titline) - # If the imdbID was retrieved from the web and was not in the - # database, update the database (ignoring errors, because it's - # possibile that the current user has not update privileges). - # There're times when I think I'm a genius; this one of - # those times... - if imdbID is not None: - try: movie.imdbID = int(imdbID) - except: pass - return imdbID - - def get_imdbPersonID(self, personID): - """Translate a personID in an imdbID. - If not in the database, try an Exact Primary Name search on IMDb; - return None if it's unable to get the imdbID. - """ - try: person = Name.get(personID) - except NotFoundError: return None - imdbID = person.imdbID - if imdbID is not None: return '%07d' % imdbID - n_dict = {'name': person.name, 'imdbIndex': person.imdbIndex} - namline = build_name(n_dict, canonical=1) - imdbID = self.name2imdbID(namline) - if imdbID is not None: - try: person.imdbID = int(imdbID) - except: pass - return imdbID - - def get_imdbCharacterID(self, characterID): - """Translate a characterID in an imdbID. - If not in the database, try an Exact Primary Name search on IMDb; - return None if it's unable to get the imdbID. - """ - try: character = CharName.get(characterID) - except NotFoundError: return None - imdbID = character.imdbID - if imdbID is not None: return '%07d' % imdbID - n_dict = {'name': character.name, 'imdbIndex': character.imdbIndex} - namline = build_name(n_dict, canonical=1) - imdbID = self.character2imdbID(namline) - if imdbID is not None: - try: character.imdbID = int(imdbID) - except: pass - return imdbID - - def get_imdbCompanyID(self, companyID): - """Translate a companyID in an imdbID. - If not in the database, try an Exact Primary Name search on IMDb; - return None if it's unable to get the imdbID. - """ - try: company = CompanyName.get(companyID) - except NotFoundError: return None - imdbID = company.imdbID - if imdbID is not None: return '%07d' % imdbID - n_dict = {'name': company.name, 'country': company.countryCode} - namline = build_company_name(n_dict) - imdbID = self.company2imdbID(namline) - if imdbID is not None: - try: company.imdbID = int(imdbID) - except: pass - return imdbID - - def do_adult_search(self, doAdult): - """If set to 0 or False, movies in the Adult category are not - episodeOf = title_dict.get('episode of') - shown in the results of a search.""" - self.doAdult = doAdult - - def _search_movie(self, title, results, _episodes=False): - title = title.strip() - if not title: return [] - title_dict = analyze_title(title, canonical=1) - s_title = title_dict['title'] - if not s_title: return [] - episodeOf = title_dict.get('episode of') - if episodeOf: - _episodes = False - s_title_split = s_title.split(', ') - if len(s_title_split) > 1 and \ - s_title_split[-1].lower() in _unicodeArticles: - s_title_rebuilt = ', '.join(s_title_split[:-1]) - if s_title_rebuilt: - s_title = s_title_rebuilt - #if not episodeOf: - # if not _episodes: - # s_title_split = s_title.split(', ') - # if len(s_title_split) > 1 and \ - # s_title_split[-1].lower() in _articles: - # s_title_rebuilt = ', '.join(s_title_split[:-1]) - # if s_title_rebuilt: - # s_title = s_title_rebuilt - #else: - # _episodes = False - if isinstance(s_title, unicode): - s_title = s_title.encode('ascii', 'ignore') - - soundexCode = soundex(s_title) - - # XXX: improve the search restricting the kindID if the - # "kind" of the input differs from "movie"? - condition = conditionAka = None - if _episodes: - condition = AND(Title.q.phoneticCode == soundexCode, - Title.q.kindID == self._kindRev['episode']) - conditionAka = AND(AkaTitle.q.phoneticCode == soundexCode, - AkaTitle.q.kindID == self._kindRev['episode']) - elif title_dict['kind'] == 'episode' and episodeOf is not None: - # set canonical=0 ? Should not make much difference. - series_title = build_title(episodeOf, canonical=1) - # XXX: is it safe to get "results" results? - # Too many? Too few? - serRes = results - if serRes < 3 or serRes > 10: - serRes = 10 - searchSeries = self._search_movie(series_title, serRes) - seriesIDs = [result[0] for result in searchSeries] - if seriesIDs: - condition = AND(Title.q.phoneticCode == soundexCode, - IN(Title.q.episodeOfID, seriesIDs), - Title.q.kindID == self._kindRev['episode']) - conditionAka = AND(AkaTitle.q.phoneticCode == soundexCode, - IN(AkaTitle.q.episodeOfID, seriesIDs), - AkaTitle.q.kindID == self._kindRev['episode']) - else: - # XXX: bad situation: we have found no matching series; - # try searching everything (both episodes and - # non-episodes) for the title. - condition = AND(Title.q.phoneticCode == soundexCode, - IN(Title.q.episodeOfID, seriesIDs)) - conditionAka = AND(AkaTitle.q.phoneticCode == soundexCode, - IN(AkaTitle.q.episodeOfID, seriesIDs)) - if condition is None: - # XXX: excludes episodes? - condition = AND(Title.q.kindID != self._kindRev['episode'], - Title.q.phoneticCode == soundexCode) - conditionAka = AND(AkaTitle.q.kindID != self._kindRev['episode'], - AkaTitle.q.phoneticCode == soundexCode) - - # Up to 3 variations of the title are searched, plus the - # long imdb canonical title, if provided. - if not _episodes: - title1, title2, title3 = titleVariations(title) - else: - title1 = title - title2 = '' - title3 = '' - try: - qr = [(q.id, get_movie_data(q.id, self._kind)) - for q in Title.select(condition)] - q2 = [(q.movieID, get_movie_data(q.id, self._kind, fromAka=1)) - for q in AkaTitle.select(conditionAka)] - qr += q2 - except NotFoundError, e: - raise IMDbDataAccessError, \ - 'unable to search the database: "%s"' % str(e) - - resultsST = results * 3 - res = scan_titles(qr, title1, title2, title3, resultsST, - searchingEpisode=episodeOf is not None, - onlyEpisodes=_episodes, - ro_thresold=0.0) - res[:] = [x[1] for x in res] - - if res and not self.doAdult: - mids = [x[0] for x in res] - genreID = self._infoRev['genres'] - adultlist = [al.movieID for al - in MovieInfo.select( - AND(MovieInfo.q.infoTypeID == genreID, - MovieInfo.q.info == 'Adult', - IN(MovieInfo.q.movieID, mids)))] - res[:] = [x for x in res if x[0] not in adultlist] - - new_res = [] - # XXX: can there be duplicates? - for r in res: - if r not in q2: - new_res.append(r) - continue - mdict = r[1] - aka_title = build_title(mdict, ptdf=1) - orig_dict = get_movie_data(r[0], self._kind) - orig_title = build_title(orig_dict, ptdf=1) - if aka_title == orig_title: - new_res.append(r) - continue - orig_dict['akas'] = [aka_title] - new_res.append((r[0], orig_dict)) - if results > 0: new_res[:] = new_res[:results] - return new_res - - def _search_episode(self, title, results): - return self._search_movie(title, results, _episodes=True) - - def get_movie_main(self, movieID): - # Every movie information is retrieved from here. - infosets = self.get_movie_infoset() - try: - res = get_movie_data(movieID, self._kind) - except NotFoundError, e: - raise IMDbDataAccessError, \ - 'unable to get movieID "%s": "%s"' % (movieID, str(e)) - if not res: - raise IMDbDataAccessError, 'unable to get movieID "%s"' % movieID - # Collect cast information. - castdata = [[cd.personID, cd.personRoleID, cd.note, cd.nrOrder, - self._role[cd.roleID]] - for cd in CastInfo.select(CastInfo.q.movieID == movieID)] - for p in castdata: - person = Name.get(p[0]) - p += [person.name, person.imdbIndex] - if p[4] in ('actor', 'actress'): - p[4] = 'cast' - # Regroup by role/duty (cast, writer, director, ...) - castdata[:] = _groupListBy(castdata, 4) - for group in castdata: - duty = group[0][4] - for pdata in group: - curRole = pdata[1] - curRoleID = None - if curRole is not None: - robj = CharName.get(curRole) - curRole = robj.name - curRoleID = robj.id - p = Person(personID=pdata[0], name=pdata[5], - currentRole=curRole or u'', - roleID=curRoleID, - notes=pdata[2] or u'', - accessSystem='sql') - if pdata[6]: p['imdbIndex'] = pdata[6] - p.billingPos = pdata[3] - res.setdefault(duty, []).append(p) - if duty == 'cast': - res[duty] = merge_roles(res[duty]) - res[duty].sort() - # Info about the movie. - minfo = [(self._info[m.infoTypeID], m.info, m.note) - for m in MovieInfo.select(MovieInfo.q.movieID == movieID)] - minfo += [(self._info[m.infoTypeID], m.info, m.note) - for m in MovieInfoIdx.select(MovieInfoIdx.q.movieID == movieID)] - minfo += [('keywords', Keyword.get(m.keywordID).keyword, None) - for m in MovieKeyword.select(MovieKeyword.q.movieID == movieID)] - minfo = _groupListBy(minfo, 0) - for group in minfo: - sect = group[0][0] - for mdata in group: - data = mdata[1] - if mdata[2]: data += '::%s' % mdata[2] - res.setdefault(sect, []).append(data) - # Companies info about a movie. - cinfo = [(self._compType[m.companyTypeID], m.companyID, m.note) for m - in MovieCompanies.select(MovieCompanies.q.movieID == movieID)] - cinfo = _groupListBy(cinfo, 0) - for group in cinfo: - sect = group[0][0] - for mdata in group: - cDb = CompanyName.get(mdata[1]) - cDbTxt = cDb.name - if cDb.countryCode: - cDbTxt += ' %s' % cDb.countryCode - company = Company(name=cDbTxt, - companyID=mdata[1], - notes=mdata[2] or u'', - accessSystem=self.accessSystem) - res.setdefault(sect, []).append(company) - # AKA titles. - akat = [(get_movie_data(at.id, self._kind, fromAka=1), at.note) - for at in AkaTitle.select(AkaTitle.q.movieID == movieID)] - if akat: - res['akas'] = [] - for td, note in akat: - nt = build_title(td, ptdf=1) - if note: - net = self._changeAKAencoding(note, nt) - if net is not None: nt = net - nt += '::%s' % note - if nt not in res['akas']: res['akas'].append(nt) - # Complete cast/crew. - compcast = [(self._compcast[cc.subjectID], self._compcast[cc.statusID]) - for cc in CompleteCast.select(CompleteCast.q.movieID == movieID)] - if compcast: - for entry in compcast: - val = unicode(entry[1]) - res[u'complete %s' % entry[0]] = val - # Movie connections. - mlinks = [[ml.linkedMovieID, self._link[ml.linkTypeID]] - for ml in MovieLink.select(MovieLink.q.movieID == movieID)] - if mlinks: - for ml in mlinks: - lmovieData = get_movie_data(ml[0], self._kind) - m = Movie(movieID=ml[0], data=lmovieData, accessSystem='sql') - ml[0] = m - res['connections'] = {} - mlinks[:] = _groupListBy(mlinks, 1) - for group in mlinks: - lt = group[0][1] - res['connections'][lt] = [i[0] for i in group] - # Episodes. - episodes = {} - eps_list = list(Title.select(Title.q.episodeOfID == movieID)) - eps_list.sort() - if eps_list: - ps_data = {'title': res['title'], 'kind': res['kind'], - 'year': res.get('year'), - 'imdbIndex': res.get('imdbIndex')} - parentSeries = Movie(movieID=movieID, data=ps_data, - accessSystem='sql') - for episode in eps_list: - episodeID = episode.id - episode_data = get_movie_data(episodeID, self._kind) - m = Movie(movieID=episodeID, data=episode_data, - accessSystem='sql') - m['episode of'] = parentSeries - season = episode_data.get('season', 'UNKNOWN') - if season not in episodes: episodes[season] = {} - ep_number = episode_data.get('episode') - if ep_number is None: - ep_number = max((episodes[season].keys() or [0])) + 1 - episodes[season][ep_number] = m - res['episodes'] = episodes - res['number of episodes'] = sum([len(x) for x in episodes.values()]) - res['number of seasons'] = len(episodes.keys()) - # Regroup laserdisc information. - res = _reGroupDict(res, self._moviesubs) - # Do some transformation to preserve consistency with other - # data access systems. - if 'quotes' in res: - for idx, quote in enumerate(res['quotes']): - res['quotes'][idx] = quote.split('::') - if 'runtimes' in res and len(res['runtimes']) > 0: - rt = res['runtimes'][0] - episodes = re_episodes.findall(rt) - if episodes: - res['runtimes'][0] = re_episodes.sub('', rt) - if res['runtimes'][0][-2:] == '::': - res['runtimes'][0] = res['runtimes'][0][:-2] - if 'votes' in res: - res['votes'] = int(res['votes'][0]) - if 'rating' in res: - res['rating'] = float(res['rating'][0]) - if 'votes distribution' in res: - res['votes distribution'] = res['votes distribution'][0] - if 'mpaa' in res: - res['mpaa'] = res['mpaa'][0] - if 'top 250 rank' in res: - try: res['top 250 rank'] = int(res['top 250 rank']) - except: pass - if 'bottom 10 rank' in res: - try: res['bottom 100 rank'] = int(res['bottom 10 rank']) - except: pass - del res['bottom 10 rank'] - for old, new in [('guest', 'guests'), ('trademarks', 'trade-mark'), - ('articles', 'article'), ('pictorials', 'pictorial'), - ('magazine-covers', 'magazine-cover-photo')]: - if old in res: - res[new] = res[old] - del res[old] - trefs,nrefs = {}, {} - trefs,nrefs = self._extractRefs(sub_dict(res,Movie.keys_tomodify_list)) - return {'data': res, 'titlesRefs': trefs, 'namesRefs': nrefs, - 'info sets': infosets} - - # Just to know what kind of information are available. - get_movie_alternate_versions = get_movie_main - get_movie_business = get_movie_main - get_movie_connections = get_movie_main - get_movie_crazy_credits = get_movie_main - get_movie_goofs = get_movie_main - get_movie_keywords = get_movie_main - get_movie_literature = get_movie_main - get_movie_locations = get_movie_main - get_movie_plot = get_movie_main - get_movie_quotes = get_movie_main - get_movie_release_dates = get_movie_main - get_movie_soundtrack = get_movie_main - get_movie_taglines = get_movie_main - get_movie_technical = get_movie_main - get_movie_trivia = get_movie_main - get_movie_vote_details = get_movie_main - get_movie_episodes = get_movie_main - - def _search_person(self, name, results): - name = name.strip() - if not name: return [] - s_name = analyze_name(name)['name'] - if not s_name: return [] - if isinstance(s_name, unicode): - s_name = s_name.encode('ascii', 'ignore') - soundexCode = soundex(s_name) - name1, name2, name3 = nameVariations(name) - - # If the soundex is None, compare only with the first - # phoneticCode column. - if soundexCode is not None: - condition = IN(soundexCode, [Name.q.namePcodeCf, - Name.q.namePcodeNf, - Name.q.surnamePcode]) - conditionAka = IN(soundexCode, [AkaName.q.namePcodeCf, - AkaName.q.namePcodeNf, - AkaName.q.surnamePcode]) - else: - condition = ISNULL(Name.q.namePcodeCf) - conditionAka = ISNULL(AkaName.q.namePcodeCf) - - try: - qr = [(q.id, {'name': q.name, 'imdbIndex': q.imdbIndex}) - for q in Name.select(condition)] - - q2 = [(q.personID, {'name': q.name, 'imdbIndex': q.imdbIndex}) - for q in AkaName.select(conditionAka)] - qr += q2 - except NotFoundError, e: - raise IMDbDataAccessError, \ - 'unable to search the database: "%s"' % str(e) - - res = scan_names(qr, name1, name2, name3, results) - res[:] = [x[1] for x in res] - # Purge empty imdbIndex. - returnl = [] - for x in res: - tmpd = x[1] - if tmpd['imdbIndex'] is None: - del tmpd['imdbIndex'] - returnl.append((x[0], tmpd)) - - new_res = [] - # XXX: can there be duplicates? - for r in returnl: - if r not in q2: - new_res.append(r) - continue - pdict = r[1] - aka_name = build_name(pdict, canonical=1) - p = Name.get(r[0]) - orig_dict = {'name': p.name, 'imdbIndex': p.imdbIndex} - if orig_dict['imdbIndex'] is None: - del orig_dict['imdbIndex'] - orig_name = build_name(orig_dict, canonical=1) - if aka_name == orig_name: - new_res.append(r) - continue - orig_dict['akas'] = [aka_name] - new_res.append((r[0], orig_dict)) - if results > 0: new_res[:] = new_res[:results] - - return new_res - - def get_person_main(self, personID): - # Every person information is retrieved from here. - infosets = self.get_person_infoset() - try: - p = Name.get(personID) - except NotFoundError, e: - raise IMDbDataAccessError, \ - 'unable to get personID "%s": "%s"' % (personID, str(e)) - res = {'name': p.name, 'imdbIndex': p.imdbIndex} - if res['imdbIndex'] is None: del res['imdbIndex'] - if not res: - raise IMDbDataAccessError, 'unable to get personID "%s"' % personID - # Collect cast information. - castdata = [(cd.movieID, cd.personRoleID, cd.note, - self._role[cd.roleID], - get_movie_data(cd.movieID, self._kind)) - for cd in CastInfo.select(CastInfo.q.personID == personID)] - # Regroup by role/duty (cast, writer, director, ...) - castdata[:] = _groupListBy(castdata, 3) - episodes = {} - seenDuties = [] - for group in castdata: - for mdata in group: - duty = orig_duty = group[0][3] - if duty not in seenDuties: seenDuties.append(orig_duty) - note = mdata[2] or u'' - if 'episode of' in mdata[4]: - duty = 'episodes' - if orig_duty not in ('actor', 'actress'): - if note: note = ' %s' % note - note = '[%s]%s' % (orig_duty, note) - curRole = mdata[1] - curRoleID = None - if curRole is not None: - robj = CharName.get(curRole) - curRole = robj.name - curRoleID = robj.id - m = Movie(movieID=mdata[0], data=mdata[4], - currentRole=curRole or u'', - roleID=curRoleID, - notes=note, accessSystem='sql') - if duty != 'episodes': - res.setdefault(duty, []).append(m) - else: - episodes.setdefault(m['episode of'], []).append(m) - if episodes: - for k in episodes: - episodes[k].sort() - episodes[k].reverse() - res['episodes'] = episodes - for duty in seenDuties: - if duty in res: - if duty in ('actor', 'actress', 'himself', 'herself', - 'themselves'): - res[duty] = merge_roles(res[duty]) - res[duty].sort() - # Info about the person. - pinfo = [(self._info[pi.infoTypeID], pi.info, pi.note) - for pi in PersonInfo.select(PersonInfo.q.personID == personID)] - # Regroup by duty. - pinfo = _groupListBy(pinfo, 0) - for group in pinfo: - sect = group[0][0] - for pdata in group: - data = pdata[1] - if pdata[2]: data += '::%s' % pdata[2] - res.setdefault(sect, []).append(data) - # AKA names. - akan = [(an.name, an.imdbIndex) - for an in AkaName.select(AkaName.q.personID == personID)] - if akan: - res['akas'] = [] - for n in akan: - nd = {'name': n[0]} - if n[1]: nd['imdbIndex'] = n[1] - nt = build_name(nd, canonical=1) - res['akas'].append(nt) - # Do some transformation to preserve consistency with other - # data access systems. - for key in ('birth date', 'birth notes', 'death date', 'death notes', - 'birth name', 'height'): - if key in res: - res[key] = res[key][0] - if 'guest' in res: - res['notable tv guest appearances'] = res['guest'] - del res['guest'] - miscnames = res.get('nick names', []) - if 'birth name' in res: miscnames.append(res['birth name']) - if 'akas' in res: - for mname in miscnames: - if mname in res['akas']: res['akas'].remove(mname) - if not res['akas']: del res['akas'] - trefs,nrefs = self._extractRefs(sub_dict(res,Person.keys_tomodify_list)) - return {'data': res, 'titlesRefs': trefs, 'namesRefs': nrefs, - 'info sets': infosets} - - # Just to know what kind of information are available. - get_person_filmography = get_person_main - get_person_biography = get_person_main - get_person_other_works = get_person_main - get_person_episodes = get_person_main - - def _search_character(self, name, results): - name = name.strip() - if not name: return [] - s_name = analyze_name(name)['name'] - if not s_name: return [] - if isinstance(s_name, unicode): - s_name = s_name.encode('ascii', 'ignore') - s_name = normalizeName(s_name) - soundexCode = soundex(s_name) - surname = s_name.split(' ')[-1] - surnameSoundex = soundex(surname) - name2 = '' - soundexName2 = None - nsplit = s_name.split() - if len(nsplit) > 1: - name2 = '%s %s' % (nsplit[-1], ' '.join(nsplit[:-1])) - if s_name == name2: - name2 = '' - else: - soundexName2 = soundex(name2) - # If the soundex is None, compare only with the first - # phoneticCode column. - if soundexCode is not None: - if soundexName2 is not None: - condition = OR(surnameSoundex == CharName.q.surnamePcode, - IN(CharName.q.namePcodeNf, [soundexCode, - soundexName2]), - IN(CharName.q.surnamePcode, [soundexCode, - soundexName2])) - else: - condition = OR(surnameSoundex == CharName.q.surnamePcode, - IN(soundexCode, [CharName.q.namePcodeNf, - CharName.q.surnamePcode])) - else: - condition = ISNULL(Name.q.namePcodeNf) - try: - qr = [(q.id, {'name': q.name, 'imdbIndex': q.imdbIndex}) - for q in CharName.select(condition)] - except NotFoundError, e: - raise IMDbDataAccessError, \ - 'unable to search the database: "%s"' % str(e) - res = scan_names(qr, s_name, name2, '', results, - _scan_character=True) - res[:] = [x[1] for x in res] - # Purge empty imdbIndex. - returnl = [] - for x in res: - tmpd = x[1] - if tmpd['imdbIndex'] is None: - del tmpd['imdbIndex'] - returnl.append((x[0], tmpd)) - return returnl - - def get_character_main(self, characterID, results=1000): - # Every character information is retrieved from here. - infosets = self.get_character_infoset() - try: - c = CharName.get(characterID) - except NotFoundError, e: - raise IMDbDataAccessError, \ - 'unable to get characterID "%s": "%s"' % (characterID, e) - res = {'name': c.name, 'imdbIndex': c.imdbIndex} - if res['imdbIndex'] is None: del res['imdbIndex'] - if not res: - raise IMDbDataAccessError, 'unable to get characterID "%s"' % \ - characterID - # Collect filmography information. - items = CastInfo.select(CastInfo.q.personRoleID == characterID) - if results > 0: - items = items[:results] - filmodata = [(cd.movieID, cd.personID, cd.note, - get_movie_data(cd.movieID, self._kind)) for cd in items - if self._role[cd.roleID] in ('actor', 'actress')] - fdata = [] - for f in filmodata: - curRole = None - curRoleID = f[1] - note = f[2] or u'' - if curRoleID is not None: - robj = Name.get(curRoleID) - curRole = robj.name - m = Movie(movieID=f[0], data=f[3], - currentRole=curRole or u'', - roleID=curRoleID, roleIsPerson=True, - notes=note, accessSystem='sql') - fdata.append(m) - fdata = merge_roles(fdata) - fdata.sort() - if fdata: - res['filmography'] = fdata - return {'data': res, 'info sets': infosets} - - get_character_filmography = get_character_main - get_character_biography = get_character_main - - def _search_company(self, name, results): - name = name.strip() - if not name: return [] - if isinstance(name, unicode): - name = name.encode('ascii', 'ignore') - soundexCode = soundex(name) - # If the soundex is None, compare only with the first - # phoneticCode column. - if soundexCode is None: - condition = ISNULL(CompanyName.q.namePcodeNf) - else: - if name.endswith(']'): - condition = CompanyName.q.namePcodeSf == soundexCode - else: - condition = CompanyName.q.namePcodeNf == soundexCode - try: - qr = [(q.id, {'name': q.name, 'country': q.countryCode}) - for q in CompanyName.select(condition)] - except NotFoundError, e: - raise IMDbDataAccessError, \ - 'unable to search the database: "%s"' % str(e) - qr[:] = [(x[0], build_company_name(x[1])) for x in qr] - res = scan_company_names(qr, name, results) - res[:] = [x[1] for x in res] - # Purge empty country keys. - returnl = [] - for x in res: - tmpd = x[1] - country = tmpd.get('country') - if country is None and 'country' in tmpd: - del tmpd['country'] - returnl.append((x[0], tmpd)) - return returnl - - def get_company_main(self, companyID, results=0): - # Every company information is retrieved from here. - infosets = self.get_company_infoset() - try: - c = CompanyName.get(companyID) - except NotFoundError, e: - raise IMDbDataAccessError, \ - 'unable to get companyID "%s": "%s"' % (companyID, e) - res = {'name': c.name, 'country': c.countryCode} - if res['country'] is None: del res['country'] - if not res: - raise IMDbDataAccessError, 'unable to get companyID "%s"' % \ - companyID - # Collect filmography information. - items = MovieCompanies.select(MovieCompanies.q.companyID == companyID) - if results > 0: - items = items[:results] - filmodata = [(cd.movieID, cd.companyID, - self._compType[cd.companyTypeID], cd.note, - get_movie_data(cd.movieID, self._kind)) for cd in items] - filmodata = _groupListBy(filmodata, 2) - for group in filmodata: - ctype = group[0][2] - for movieID, companyID, ctype, note, movieData in group: - movie = Movie(data=movieData, movieID=movieID, - notes=note or u'', accessSystem=self.accessSystem) - res.setdefault(ctype, []).append(movie) - res.get(ctype, []).sort() - return {'data': res, 'info sets': infosets} - - def _search_keyword(self, keyword, results): - constr = OR(Keyword.q.phoneticCode == - soundex(keyword.encode('ascii', 'ignore')), - CONTAINSSTRING(Keyword.q.keyword, self.toUTF8(keyword))) - return filterSimilarKeywords(keyword, - _iterKeywords(Keyword.select(constr)))[:results] - - def _get_keyword(self, keyword, results): - keyID = Keyword.select(Keyword.q.keyword == keyword) - if keyID.count() == 0: - return [] - keyID = keyID[0].id - movies = MovieKeyword.select(MovieKeyword.q.keywordID == - keyID)[:results] - return [(m.movieID, get_movie_data(m.movieID, self._kind)) - for m in movies] - - def _get_top_bottom_movies(self, kind): - if kind == 'top': - kind = 'top 250 rank' - elif kind == 'bottom': - # Not a refuse: the plain text data files contains only - # the bottom 10 movies. - kind = 'bottom 10 rank' - else: - return [] - infoID = InfoType.select(InfoType.q.info == kind) - if infoID.count() == 0: - return [] - infoID = infoID[0].id - movies = MovieInfoIdx.select(MovieInfoIdx.q.infoTypeID == infoID) - ml = [] - for m in movies: - minfo = get_movie_data(m.movieID, self._kind) - for k in kind, 'votes', 'rating', 'votes distribution': - valueDict = getSingleInfo(MovieInfoIdx, m.movieID, - k, notAList=True) - if k in (kind, 'votes') and k in valueDict: - valueDict[k] = int(valueDict[k]) - elif k == 'rating' and k in valueDict: - valueDict[k] = float(valueDict[k]) - minfo.update(valueDict) - ml.append((m.movieID, minfo)) - sorter = (_cmpBottom, _cmpTop)[kind == 'top 250 rank'] - ml.sort(sorter) - return ml - - def __del__(self): - """Ensure that the connection is closed.""" - if not hasattr(self, '_connection'): return - self._sql_logger.debug('closing connection to the database') - self._connection.close() - diff --git a/libs/imdb/parser/sql/alchemyadapter.py b/libs/imdb/parser/sql/alchemyadapter.py deleted file mode 100644 index 12cc494a..00000000 --- a/libs/imdb/parser/sql/alchemyadapter.py +++ /dev/null @@ -1,508 +0,0 @@ -""" -parser.sql.alchemyadapter module (imdb.parser.sql package). - -This module adapts the SQLAlchemy ORM to the internal mechanism. - -Copyright 2008-2010 Davide Alberani - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -import re -import sys -import logging -from sqlalchemy import * -from sqlalchemy import schema -try: from sqlalchemy import exc # 0.5 -except ImportError: from sqlalchemy import exceptions as exc # 0.4 - -_alchemy_logger = logging.getLogger('imdbpy.parser.sql.alchemy') - -try: - import migrate.changeset - HAS_MC = True -except ImportError: - HAS_MC = False - _alchemy_logger.warn('Unable to import migrate.changeset: Foreign ' \ - 'Keys will not be created.') - -from imdb._exceptions import IMDbDataAccessError -from dbschema import * - -# Used to convert table and column names. -re_upper = re.compile(r'([A-Z])') - -# XXX: I'm not sure at all that this is the best method to connect -# to the database and bind that connection to every table. -metadata = MetaData() - -# Maps our placeholders to SQLAlchemy's column types. -MAP_COLS = { - INTCOL: Integer, - UNICODECOL: UnicodeText, - STRINGCOL: String -} - - -class NotFoundError(IMDbDataAccessError): - """Exception raised when Table.get(id) returns no value.""" - pass - - -def _renameTable(tname): - """Build the name of a table, as done by SQLObject.""" - tname = re_upper.sub(r'_\1', tname) - if tname.startswith('_'): - tname = tname[1:] - return tname.lower() - -def _renameColumn(cname): - """Build the name of a column, as done by SQLObject.""" - cname = cname.replace('ID', 'Id') - return _renameTable(cname) - - -class DNNameObj(object): - """Used to access table.sqlmeta.columns[column].dbName (a string).""" - def __init__(self, dbName): - self.dbName = dbName - - def __repr__(self): - return '' % (self.dbName, id(self)) - - -class DNNameDict(object): - """Used to access table.sqlmeta.columns (a dictionary).""" - def __init__(self, colMap): - self.colMap = colMap - - def __getitem__(self, key): - return DNNameObj(self.colMap[key]) - - def __repr__(self): - return '' % (self.colMap, id(self)) - - -class SQLMetaAdapter(object): - """Used to access table.sqlmeta (an object with .table, .columns and - .idName attributes).""" - def __init__(self, table, colMap=None): - self.table = table - if colMap is None: - colMap = {} - self.colMap = colMap - - def __getattr__(self, name): - if name == 'table': - return getattr(self.table, name) - if name == 'columns': - return DNNameDict(self.colMap) - if name == 'idName': - return self.colMap.get('id', 'id') - return None - - def __repr__(self): - return '' % \ - (repr(self.table), repr(self.colMap), id(self)) - - -class QAdapter(object): - """Used to access table.q attribute (remapped to SQLAlchemy table.c).""" - def __init__(self, table, colMap=None): - self.table = table - if colMap is None: - colMap = {} - self.colMap = colMap - - def __getattr__(self, name): - try: return getattr(self.table.c, self.colMap[name]) - except KeyError, e: raise AttributeError, "unable to get '%s'" % name - - def __repr__(self): - return '' % \ - (repr(self.table), repr(self.colMap), id(self)) - - -class RowAdapter(object): - """Adapter for a SQLAlchemy RowProxy object.""" - def __init__(self, row, table, colMap=None): - self.row = row - # FIXME: it's OBSCENE that 'table' should be passed from - # TableAdapter through ResultAdapter only to land here, - # where it's used to directly update a row item. - self.table = table - if colMap is None: - colMap = {} - self.colMap = colMap - self.colMapKeys = colMap.keys() - - def __getattr__(self, name): - try: return getattr(self.row, self.colMap[name]) - except KeyError, e: raise AttributeError, "unable to get '%s'" % name - - def __setattr__(self, name, value): - # FIXME: I can't even think about how much performances suffer, - # for this horrible hack (and it's used so rarely...) - # For sure something like a "property" to map column names - # to getter/setter functions would be much better, but it's - # not possible (or at least not easy) to build them for a - # single instance. - if name in self.__dict__.get('colMapKeys', ()): - # Trying to update a value in the database. - row = self.__dict__['row'] - table = self.__dict__['table'] - colMap = self.__dict__['colMap'] - params = {colMap[name]: value} - table.update(table.c.id==row.id).execute(**params) - # XXX: minor bug: after a value is assigned with the - # 'rowAdapterInstance.colName = value' syntax, for some - # reason rowAdapterInstance.colName still returns the - # previous value (even if the database is updated). - # Fix it? I'm not even sure it's ever used. - return - # For every other attribute. - object.__setattr__(self, name, value) - - def __repr__(self): - return '' % \ - (repr(self.row), repr(self.table), repr(self.colMap), id(self)) - - -class ResultAdapter(object): - """Adapter for a SQLAlchemy ResultProxy object.""" - def __init__(self, result, table, colMap=None): - self.result = result - self.table = table - if colMap is None: - colMap = {} - self.colMap = colMap - - def count(self): - return len(self) - - def __len__(self): - # FIXME: why sqlite returns -1? (that's wrooong!) - if self.result.rowcount == -1: - return 0 - return self.result.rowcount - - def __getitem__(self, key): - res = list(self.result)[key] - if not isinstance(key, slice): - # A single item. - return RowAdapter(res, self.table, colMap=self.colMap) - else: - # A (possible empty) list of items. - return [RowAdapter(x, self.table, colMap=self.colMap) - for x in res] - - def __iter__(self): - for item in self.result: - yield RowAdapter(item, self.table, colMap=self.colMap) - - def __repr__(self): - return '' % \ - (repr(self.result), repr(self.table), - repr(self.colMap), id(self)) - - -class TableAdapter(object): - """Adapter for a SQLAlchemy Table object, to mimic a SQLObject class.""" - def __init__(self, table, uri=None): - """Initialize a TableAdapter object.""" - self._imdbpySchema = table - self._imdbpyName = table.name - self.connectionURI = uri - self.colMap = {} - columns = [] - for col in table.cols: - # Column's paramters. - params = {'nullable': True} - params.update(col.params) - if col.name == 'id': - params['primary_key'] = True - if 'notNone' in params: - params['nullable'] = not params['notNone'] - del params['notNone'] - cname = _renameColumn(col.name) - self.colMap[col.name] = cname - colClass = MAP_COLS[col.kind] - colKindParams = {} - if 'length' in params: - colKindParams['length'] = params['length'] - del params['length'] - elif colClass is UnicodeText and col.index: - # XXX: limit length for UNICODECOLs that will have an index. - # this can result in name.name and title.title truncations! - colClass = Unicode - # Should work for most of the database servers. - length = 511 - if self.connectionURI: - if self.connectionURI.startswith('mysql'): - # To stay compatible with MySQL 4.x. - length = 255 - colKindParams['length'] = length - elif self._imdbpyName == 'PersonInfo' and col.name == 'info': - if self.connectionURI: - if self.connectionURI.startswith('ibm'): - # There are some entries longer than 32KB. - colClass = CLOB - # I really do hope that this space isn't wasted - # for each other shorter entry... - colKindParams['length'] = 68*1024 - colKind = colClass(**colKindParams) - if 'alternateID' in params: - # There's no need to handle them here. - del params['alternateID'] - # Create a column. - colObj = Column(cname, colKind, **params) - columns.append(colObj) - self.tableName = _renameTable(table.name) - # Create the table. - self.table = Table(self.tableName, metadata, *columns) - self._ta_insert = self.table.insert() - self._ta_select = self.table.select - # Adapters for special attributes. - self.q = QAdapter(self.table, colMap=self.colMap) - self.sqlmeta = SQLMetaAdapter(self.table, colMap=self.colMap) - - def select(self, conditions=None): - """Return a list of results.""" - result = self._ta_select(conditions).execute() - return ResultAdapter(result, self.table, colMap=self.colMap) - - def get(self, theID): - """Get an object given its ID.""" - result = self.select(self.table.c.id == theID) - #if not result: - # raise NotFoundError, 'no data for ID %s' % theID - # FIXME: isn't this a bit risky? We can't check len(result), - # because sqlite returns -1... - # What about converting it to a list and getting the first item? - try: - return result[0] - except KeyError: - raise NotFoundError, 'no data for ID %s' % theID - - def dropTable(self, checkfirst=True): - """Drop the table.""" - dropParams = {'checkfirst': checkfirst} - # Guess what? Another work-around for a ibm_db bug. - if self.table.bind.engine.url.drivername.startswith('ibm_db'): - del dropParams['checkfirst'] - try: - self.table.drop(**dropParams) - except exc.ProgrammingError: - # As above: re-raise the exception, but only if it's not ibm_db. - if not self.table.bind.engine.url.drivername.startswith('ibm_db'): - raise - - def createTable(self, checkfirst=True): - """Create the table.""" - self.table.create(checkfirst=checkfirst) - # Create indexes for alternateID columns (other indexes will be - # created later, at explicit request for performances reasons). - for col in self._imdbpySchema.cols: - if col.name == 'id': - continue - if col.params.get('alternateID', False): - self._createIndex(col, checkfirst=checkfirst) - - def _createIndex(self, col, checkfirst=True): - """Create an index for a given (schema) column.""" - # XXX: indexLen is ignored in SQLAlchemy, and that means that - # indexes will be over the whole 255 chars strings... - # NOTE: don't use a dot as a separator, or DB2 will do - # nasty things. - idx_name = '%s_%s' % (self.table.name, col.index or col.name) - if checkfirst: - for index in self.table.indexes: - if index.name == idx_name: - return - idx = Index(idx_name, getattr(self.table.c, self.colMap[col.name])) - # XXX: beware that exc.OperationalError can be raised, is some - # strange circumstances; that's why the index name doesn't - # follow the SQLObject convention, but includes the table name: - # sqlite, for example, expects index names to be unique at - # db-level. - try: - idx.create() - except exc.OperationalError, e: - _alchemy_logger.warn('Skipping creation of the %s.%s index: %s' % - (self.sqlmeta.table, col.name, e)) - - def addIndexes(self, ifNotExists=True): - """Create all required indexes.""" - for col in self._imdbpySchema.cols: - if col.index: - self._createIndex(col, checkfirst=ifNotExists) - - def addForeignKeys(self, mapTables, ifNotExists=True): - """Create all required foreign keys.""" - if not HAS_MC: - return - # It seems that there's no reason to prevent the creation of - # indexes for columns with FK constrains: if there's already - # an index, the FK index is not created. - countCols = 0 - for col in self._imdbpySchema.cols: - countCols += 1 - if not col.foreignKey: - continue - fks = col.foreignKey.split('.', 1) - foreignTableName = fks[0] - if len(fks) == 2: - foreignColName = fks[1] - else: - foreignColName = 'id' - foreignColName = mapTables[foreignTableName].colMap.get( - foreignColName, foreignColName) - thisColName = self.colMap.get(col.name, col.name) - thisCol = self.table.columns[thisColName] - foreignTable = mapTables[foreignTableName].table - foreignCol = getattr(foreignTable.c, foreignColName) - # Need to explicitly set an unique name, otherwise it will - # explode, if two cols points to the same table. - fkName = 'fk_%s_%s_%d' % (foreignTable.name, foreignColName, - countCols) - constrain = migrate.changeset.ForeignKeyConstraint([thisCol], - [foreignCol], - name=fkName) - try: - constrain.create() - except exc.OperationalError: - continue - - def __call__(self, *args, **kwds): - """To insert a new row with the syntax: TableClass(key=value, ...)""" - taArgs = {} - for key, value in kwds.items(): - taArgs[self.colMap.get(key, key)] = value - self._ta_insert.execute(*args, **taArgs) - - def __repr__(self): - return '' % (repr(self.table), id(self)) - - -# Module-level "cache" for SQLObject classes, to prevent -# "Table 'tableName' is already defined for this MetaData instance" errors, -# when two or more connections to the database are made. -# XXX: is this the best way to act? -TABLES_REPOSITORY = {} - -def getDBTables(uri=None): - """Return a list of TableAdapter objects to be used to access the - database through the SQLAlchemy ORM. The connection uri is optional, and - can be used to tailor the db schema to specific needs.""" - DB_TABLES = [] - for table in DB_SCHEMA: - if table.name in TABLES_REPOSITORY: - DB_TABLES.append(TABLES_REPOSITORY[table.name]) - continue - tableAdapter = TableAdapter(table, uri) - DB_TABLES.append(tableAdapter) - TABLES_REPOSITORY[table.name] = tableAdapter - return DB_TABLES - - -# Functions used to emulate SQLObject's logical operators. -def AND(*params): - """Emulate SQLObject's AND.""" - return and_(*params) - -def OR(*params): - """Emulate SQLObject's OR.""" - return or_(*params) - -def IN(item, inList): - """Emulate SQLObject's IN.""" - if not isinstance(item, schema.Column): - return OR(*[x == item for x in inList]) - else: - return item.in_(inList) - -def ISNULL(x): - """Emulate SQLObject's ISNULL.""" - # XXX: Should we use null()? Can null() be a global instance? - # XXX: Is it safe to test None with the == operator, in this case? - return x == None - -def ISNOTNULL(x): - """Emulate SQLObject's ISNOTNULL.""" - return x != None - -def CONTAINSSTRING(expr, pattern): - """Emulate SQLObject's CONTAINSSTRING.""" - return expr.like('%%%s%%' % pattern) - - -def toUTF8(s): - """For some strange reason, sometimes SQLObject wants utf8 strings - instead of unicode; with SQLAlchemy we just return the unicode text.""" - return s - - -class _AlchemyConnection(object): - """A proxy for the connection object, required since _ConnectionFairy - uses __slots__.""" - def __init__(self, conn): - self.conn = conn - - def __getattr__(self, name): - return getattr(self.conn, name) - - -def setConnection(uri, tables, encoding='utf8', debug=False): - """Set connection for every table.""" - # FIXME: why on earth MySQL requires an additional parameter, - # is well beyond my understanding... - if uri.startswith('mysql'): - if '?' in uri: - uri += '&' - else: - uri += '?' - uri += 'charset=%s' % encoding - params = {'encoding': encoding} - if debug: - params['echo'] = True - if uri.startswith('ibm_db'): - # Try to work-around a possible bug of the ibm_db DB2 driver. - params['convert_unicode'] = True - # XXX: is this the best way to connect? - engine = create_engine(uri, **params) - metadata.bind = engine - eng_conn = engine.connect() - if uri.startswith('sqlite'): - major = sys.version_info[0] - minor = sys.version_info[1] - if major > 2 or (major == 2 and minor > 5): - eng_conn.connection.connection.text_factory = str - # XXX: OH MY, THAT'S A MESS! - # We need to return a "connection" object, with the .dbName - # attribute set to the db engine name (e.g. "mysql"), .paramstyle - # set to the style of the paramters for query() calls, and the - # .module attribute set to a module (?) with .OperationalError and - # .IntegrityError attributes. - # Another attribute of "connection" is the getConnection() function, - # used to return an object with a .cursor() method. - connection = _AlchemyConnection(eng_conn.connection) - paramstyle = eng_conn.dialect.paramstyle - connection.module = eng_conn.dialect.dbapi - connection.paramstyle = paramstyle - connection.getConnection = lambda: connection.connection - connection.dbName = engine.url.drivername - return connection - - diff --git a/libs/imdb/parser/sql/cutils.c b/libs/imdb/parser/sql/cutils.c deleted file mode 100644 index 677c1b1e..00000000 --- a/libs/imdb/parser/sql/cutils.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - * cutils.c module. - * - * Miscellaneous functions to speed up the IMDbPY package. - * - * Contents: - * - pyratcliff(): - * Function that implements the Ratcliff-Obershelp comparison - * amongst Python strings. - * - * - pysoundex(): - * Return a soundex code string, for the given string. - * - * Copyright 2004-2009 Davide Alberani - * Released under the GPL license. - * - * NOTE: The Ratcliff-Obershelp part was heavily based on code from the - * "simil" Python module. - * The "simil" module is copyright of Luca Montecchiani - * and can be found here: http://spazioinwind.libero.it/montecchiani/ - * It was released under the GPL license; original comments are leaved - * below. - * - */ - - -/*========== Ratcliff-Obershelp ==========*/ -/***************************************************************************** - * - * Stolen code from : - * - * [Python-Dev] Why is soundex marked obsolete? - * by Eric S. Raymond [4]esr@thyrsus.com - * on Sun, 14 Jan 2001 14:09:01 -0500 - * - *****************************************************************************/ - -/***************************************************************************** - * - * Ratcliff-Obershelp common-subpattern similarity. - * - * This code first appeared in a letter to the editor in Doctor - * Dobbs's Journal, 11/1988. The original article on the algorithm, - * "Pattern Matching by Gestalt" by John Ratcliff, had appeared in the - * July 1988 issue (#181) but the algorithm was presented in assembly. - * The main drawback of the Ratcliff-Obershelp algorithm is the cost - * of the pairwise comparisons. It is significantly more expensive - * than stemming, Hamming distance, soundex, and the like. - * - * Running time quadratic in the data size, memory usage constant. - * - *****************************************************************************/ - -#include - -#define DONTCOMPARE_NULL 0.0 -#define DONTCOMPARE_SAME 1.0 -#define COMPARE 2.0 -#define STRING_MAXLENDIFFER 0.7 - -/* As of 05 Mar 2008, the longest title is ~600 chars. */ -#define MXLINELEN 1023 - -#define MAX(a,b) ((a) > (b) ? (a) : (b)) - - -//***************************************** -// preliminary check.... -//***************************************** -static float -strings_check(char const *s, char const *t) -{ - float threshold; // lenght difference - int s_len = strlen(s); // length of s - int t_len = strlen(t); // length of t - - // NULL strings ? - if ((t_len * s_len) == 0) - return (DONTCOMPARE_NULL); - - // the same ? - if (strcmp(s, t) == 0) - return (DONTCOMPARE_SAME); - - // string lenght difference threshold - // we don't want to compare too different lenght strings ;) - if (s_len < t_len) - threshold = (float) s_len / (float) t_len; - else - threshold = (float) t_len / (float) s_len; - if (threshold < STRING_MAXLENDIFFER) - return (DONTCOMPARE_NULL); - - // proceed - return (COMPARE); -} - - -static int -RatcliffObershelp(char *st1, char *end1, char *st2, char *end2) -{ - register char *a1, *a2; - char *b1, *b2; - char *s1 = st1, *s2 = st2; /* initializations are just to pacify GCC */ - short max, i; - - if (end1 <= st1 || end2 <= st2) - return (0); - if (end1 == st1 + 1 && end2 == st2 + 1) - return (0); - - max = 0; - b1 = end1; - b2 = end2; - - for (a1 = st1; a1 < b1; a1++) { - for (a2 = st2; a2 < b2; a2++) { - if (*a1 == *a2) { - /* determine length of common substring */ - for (i = 1; a1[i] && (a1[i] == a2[i]); i++) - continue; - if (i > max) { - max = i; - s1 = a1; - s2 = a2; - b1 = end1 - max; - b2 = end2 - max; - } - } - } - } - if (!max) - return (0); - max += RatcliffObershelp(s1 + max, end1, s2 + max, end2); /* rhs */ - max += RatcliffObershelp(st1, s1, st2, s2); /* lhs */ - return max; -} - - -static float -ratcliff(char *s1, char *s2) -/* compute Ratcliff-Obershelp similarity of two strings */ -{ - int l1, l2; - float res; - - // preliminary tests - res = strings_check(s1, s2); - if (res != COMPARE) - return(res); - - l1 = strlen(s1); - l2 = strlen(s2); - - return 2.0 * RatcliffObershelp(s1, s1 + l1, s2, s2 + l2) / (l1 + l2); -} - - -/* Change a string to lowercase. */ -static void -strtolower(char *s1) -{ - int i; - for (i=0; i < strlen(s1); i++) s1[i] = tolower(s1[i]); -} - - -/* Ratcliff-Obershelp for two python strings; returns a python float. */ -static PyObject* -pyratcliff(PyObject *self, PyObject *pArgs) -{ - char *s1 = NULL; - char *s2 = NULL; - PyObject *discard = NULL; - char s1copy[MXLINELEN+1]; - char s2copy[MXLINELEN+1]; - - /* The optional PyObject parameter is here to be compatible - * with the pure python implementation, which uses a - * difflib.SequenceMatcher object. */ - if (!PyArg_ParseTuple(pArgs, "ss|O", &s1, &s2, &discard)) - return NULL; - - strncpy(s1copy, s1, MXLINELEN); - strncpy(s2copy, s2, MXLINELEN); - /* Work on copies. */ - strtolower(s1copy); - strtolower(s2copy); - - return Py_BuildValue("f", ratcliff(s1copy, s2copy)); -} - - -/*========== soundex ==========*/ -/* Max length of the soundex code to output (an uppercase char and - * _at most_ 4 digits). */ -#define SOUNDEX_LEN 5 - -/* Group Number Lookup Table */ -static char soundTable[26] = -{ 0 /* A */, '1' /* B */, '2' /* C */, '3' /* D */, 0 /* E */, '1' /* F */, - '2' /* G */, 0 /* H */, 0 /* I */, '2' /* J */, '2' /* K */, '4' /* L */, - '5' /* M */, '5' /* N */, 0 /* O */, '1' /* P */, '2' /* Q */, '6' /* R */, - '2' /* S */, '3' /* T */, 0 /* U */, '1' /* V */, 0 /* W */, '2' /* X */, - 0 /* Y */, '2' /* Z */}; - -static PyObject* -pysoundex(PyObject *self, PyObject *pArgs) -{ - int i, j, n; - char *s = NULL; - char word[MXLINELEN+1]; - char soundCode[SOUNDEX_LEN+1]; - char c; - - if (!PyArg_ParseTuple(pArgs, "s", &s)) - return NULL; - - j = 0; - n = strlen(s); - - /* Convert to uppercase and exclude non-ascii chars. */ - for (i = 0; i < n; i++) { - c = toupper(s[i]); - if (c < 91 && c > 64) { - word[j] = c; - j++; - } - } - word[j] = '\0'; - - n = strlen(word); - if (n == 0) { - /* If the string is empty, returns None. */ - return Py_BuildValue(""); - } - soundCode[0] = word[0]; - - /* Build the soundCode string. */ - j = 1; - for (i = 1; j < SOUNDEX_LEN && i < n; i++) { - c = soundTable[(word[i]-65)]; - /* Compact zeroes and equal consecutive digits ("12234112"->"123412") */ - if (c != 0 && c != soundCode[j-1]) { - soundCode[j++] = c; - } - } - soundCode[j] = '\0'; - - return Py_BuildValue("s", soundCode); -} - - -static PyMethodDef cutils_methods[] = { - {"ratcliff", pyratcliff, - METH_VARARGS, "Ratcliff-Obershelp similarity."}, - {"soundex", pysoundex, - METH_VARARGS, "Soundex code for strings."}, - {NULL} -}; - - -void -initcutils(void) -{ - Py_InitModule("cutils", cutils_methods); -} - - diff --git a/libs/imdb/parser/sql/dbschema.py b/libs/imdb/parser/sql/dbschema.py deleted file mode 100644 index 358dfdd5..00000000 --- a/libs/imdb/parser/sql/dbschema.py +++ /dev/null @@ -1,461 +0,0 @@ -#-*- encoding: utf-8 -*- -""" -parser.sql.dbschema module (imdb.parser.sql package). - -This module provides the schema used to describe the layout of the -database used by the imdb.parser.sql package; functions to create/drop -tables and indexes are also provided. - -Copyright 2005-2010 Davide Alberani - 2006 Giuseppe "Cowo" Corbelli lugbs.linux.it> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -import logging - -_dbschema_logger = logging.getLogger('imdbpy.parser.sql.dbschema') - - -# Placeholders for column types. -INTCOL = 1 -UNICODECOL = 2 -STRINGCOL = 3 -_strMap = {1: 'INTCOL', 2: 'UNICODECOL', 3: 'STRINGCOL'} - -class DBCol(object): - """Define column objects.""" - def __init__(self, name, kind, **params): - self.name = name - self.kind = kind - self.index = None - self.indexLen = None - # If not None, two notations are accepted: 'TableName' - # and 'TableName.ColName'; in the first case, 'id' is assumed - # as the name of the pointed column. - self.foreignKey = None - if 'index' in params: - self.index = params['index'] - del params['index'] - if 'indexLen' in params: - self.indexLen = params['indexLen'] - del params['indexLen'] - if 'foreignKey' in params: - self.foreignKey = params['foreignKey'] - del params['foreignKey'] - self.params = params - - def __str__(self): - """Class representation.""" - s = '' % (self.name, - len(self.cols), sum([len(v) for v in self.values.values()])) - - def __repr__(self): - """Class representation.""" - s = '').lstrip('<') - for col in self.cols]) - if col_s: - s += ', %s' % col_s - if self.values: - s += ', values=%s' % self.values - s += ')>' - return s - - -# Default values to insert in some tables: {'column': (list, of, values, ...)} -kindTypeDefs = {'kind': ('movie', 'tv series', 'tv movie', 'video movie', - 'tv mini series', 'video game', 'episode')} -companyTypeDefs = {'kind': ('distributors', 'production companies', - 'special effects companies', 'miscellaneous companies')} -infoTypeDefs = {'info': ('runtimes', 'color info', 'genres', 'languages', - 'certificates', 'sound mix', 'tech info', 'countries', 'taglines', - 'keywords', 'alternate versions', 'crazy credits', 'goofs', - 'soundtrack', 'quotes', 'release dates', 'trivia', 'locations', - 'mini biography', 'birth notes', 'birth date', 'height', - 'death date', 'spouse', 'other works', 'birth name', - 'salary history', 'nick names', 'books', 'agent address', - 'biographical movies', 'portrayed in', 'where now', 'trade mark', - 'interviews', 'article', 'magazine cover photo', 'pictorial', - 'death notes', 'LD disc format', 'LD year', 'LD digital sound', - 'LD official retail price', 'LD frequency response', 'LD pressing plant', - 'LD length', 'LD language', 'LD review', 'LD spaciality', 'LD release date', - 'LD production country', 'LD contrast', 'LD color rendition', - 'LD picture format', 'LD video noise', 'LD video artifacts', - 'LD release country', 'LD sharpness', 'LD dynamic range', - 'LD audio noise', 'LD color information', 'LD group genre', - 'LD quality program', 'LD close captions-teletext-ld-g', - 'LD category', 'LD analog left', 'LD certification', - 'LD audio quality', 'LD video quality', 'LD aspect ratio', - 'LD analog right', 'LD additional information', - 'LD number of chapter stops', 'LD dialogue intellegibility', - 'LD disc size', 'LD master format', 'LD subtitles', - 'LD status of availablility', 'LD quality of source', - 'LD number of sides', 'LD video standard', 'LD supplement', - 'LD original title', 'LD sound encoding', 'LD number', 'LD label', - 'LD catalog number', 'LD laserdisc title', 'screenplay-teleplay', - 'novel', 'adaption', 'book', 'production process protocol', - 'printed media reviews', 'essays', 'other literature', 'mpaa', - 'plot', 'votes distribution', 'votes', 'rating', - 'production dates', 'copyright holder', 'filming dates', 'budget', - 'weekend gross', 'gross', 'opening weekend', 'rentals', - 'admissions', 'studios', 'top 250 rank', 'bottom 10 rank')} -compCastTypeDefs = {'kind': ('cast', 'crew', 'complete', 'complete+verified')} -linkTypeDefs = {'link': ('follows', 'followed by', 'remake of', 'remade as', - 'references', 'referenced in', 'spoofs', 'spoofed in', - 'features', 'featured in', 'spin off from', 'spin off', - 'version of', 'similar to', 'edited into', - 'edited from', 'alternate language version of', - 'unknown link')} -roleTypeDefs = {'role': ('actor', 'actress', 'producer', 'writer', - 'cinematographer', 'composer', 'costume designer', - 'director', 'editor', 'miscellaneous crew', - 'production designer', 'guest')} - -# Schema of tables in our database. -# XXX: Foreign keys can be used to create constrains between tables, -# but they create indexes in the database, and this -# means poor performances at insert-time. -DB_SCHEMA = [ - DBTable('Name', - # namePcodeCf is the soundex of the name in the canonical format. - # namePcodeNf is the soundex of the name in the normal format, if - # different from namePcodeCf. - # surnamePcode is the soundex of the surname, if different from the - # other two values. - - # The 'id' column is simply skipped by SQLObject (it's a default); - # the alternateID attribute here will be ignored by SQLAlchemy. - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('name', UNICODECOL, notNone=True, index='idx_name', indexLen=6), - DBCol('imdbIndex', UNICODECOL, length=12, default=None), - DBCol('imdbID', INTCOL, default=None), - DBCol('namePcodeCf', STRINGCOL, length=5, default=None, - index='idx_pcodecf'), - DBCol('namePcodeNf', STRINGCOL, length=5, default=None, - index='idx_pcodenf'), - DBCol('surnamePcode', STRINGCOL, length=5, default=None, - index='idx_pcode'), - DBCol('md5sum', STRINGCOL, length=32, default=None, index='idx_md5') - ), - - DBTable('CharName', - # namePcodeNf is the soundex of the name in the normal format. - # surnamePcode is the soundex of the surname, if different - # from namePcodeNf. - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('name', UNICODECOL, notNone=True, index='idx_name', indexLen=6), - DBCol('imdbIndex', UNICODECOL, length=12, default=None), - DBCol('imdbID', INTCOL, default=None), - DBCol('namePcodeNf', STRINGCOL, length=5, default=None, - index='idx_pcodenf'), - DBCol('surnamePcode', STRINGCOL, length=5, default=None, - index='idx_pcode'), - DBCol('md5sum', STRINGCOL, length=32, default=None, index='idx_md5') - ), - - DBTable('CompanyName', - # namePcodeNf is the soundex of the name in the normal format. - # namePcodeSf is the soundex of the name plus the country code. - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('name', UNICODECOL, notNone=True, index='idx_name', indexLen=6), - DBCol('countryCode', UNICODECOL, length=255, default=None), - DBCol('imdbID', INTCOL, default=None), - DBCol('namePcodeNf', STRINGCOL, length=5, default=None, - index='idx_pcodenf'), - DBCol('namePcodeSf', STRINGCOL, length=5, default=None, - index='idx_pcodesf'), - DBCol('md5sum', STRINGCOL, length=32, default=None, index='idx_md5') - ), - - DBTable('KindType', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('kind', STRINGCOL, length=15, default=None, alternateID=True), - values=kindTypeDefs - ), - - DBTable('Title', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('title', UNICODECOL, notNone=True, - index='idx_title', indexLen=10), - DBCol('imdbIndex', UNICODECOL, length=12, default=None), - DBCol('kindID', INTCOL, notNone=True, foreignKey='KindType'), - DBCol('productionYear', INTCOL, default=None), - DBCol('imdbID', INTCOL, default=None), - DBCol('phoneticCode', STRINGCOL, length=5, default=None, - index='idx_pcode'), - DBCol('episodeOfID', INTCOL, default=None, index='idx_epof', - foreignKey='Title'), - DBCol('seasonNr', INTCOL, default=None), - DBCol('episodeNr', INTCOL, default=None), - # Maximum observed length is 44; 49 can store 5 comma-separated - # year-year pairs. - DBCol('seriesYears', STRINGCOL, length=49, default=None), - DBCol('md5sum', STRINGCOL, length=32, default=None, index='idx_md5') - ), - - DBTable('CompanyType', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('kind', STRINGCOL, length=32, default=None, alternateID=True), - values=companyTypeDefs - ), - - DBTable('AkaName', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('personID', INTCOL, notNone=True, index='idx_person', - foreignKey='Name'), - DBCol('name', UNICODECOL, notNone=True), - DBCol('imdbIndex', UNICODECOL, length=12, default=None), - DBCol('namePcodeCf', STRINGCOL, length=5, default=None, - index='idx_pcodecf'), - DBCol('namePcodeNf', STRINGCOL, length=5, default=None, - index='idx_pcodenf'), - DBCol('surnamePcode', STRINGCOL, length=5, default=None, - index='idx_pcode'), - DBCol('md5sum', STRINGCOL, length=32, default=None, index='idx_md5') - ), - - DBTable('AkaTitle', - # XXX: It's safer to set notNone to False, here. - # alias for akas are stored completely in the AkaTitle table; - # this means that episodes will set also a "tv series" alias name. - # Reading the aka-title.list file it looks like there are - # episode titles with aliases to different titles for both - # the episode and the series title, while for just the series - # there are no aliases. - # E.g.: - # aka title original title - # "Series, The" (2005) {The Episode} "Other Title" (2005) {Other Title} - # But there is no: - # "Series, The" (2005) "Other Title" (2005) - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('movieID', INTCOL, notNone=True, index='idx_movieid', - foreignKey='Title'), - DBCol('title', UNICODECOL, notNone=True), - DBCol('imdbIndex', UNICODECOL, length=12, default=None), - DBCol('kindID', INTCOL, notNone=True, foreignKey='KindType'), - DBCol('productionYear', INTCOL, default=None), - DBCol('phoneticCode', STRINGCOL, length=5, default=None, - index='idx_pcode'), - DBCol('episodeOfID', INTCOL, default=None, index='idx_epof', - foreignKey='AkaTitle'), - DBCol('seasonNr', INTCOL, default=None), - DBCol('episodeNr', INTCOL, default=None), - DBCol('note', UNICODECOL, default=None), - DBCol('md5sum', STRINGCOL, length=32, default=None, index='idx_md5') - ), - - DBTable('RoleType', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('role', STRINGCOL, length=32, notNone=True, alternateID=True), - values=roleTypeDefs - ), - - DBTable('CastInfo', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('personID', INTCOL, notNone=True, index='idx_pid', - foreignKey='Name'), - DBCol('movieID', INTCOL, notNone=True, index='idx_mid', - foreignKey='Title'), - DBCol('personRoleID', INTCOL, default=None, index='idx_cid', - foreignKey='CharName'), - DBCol('note', UNICODECOL, default=None), - DBCol('nrOrder', INTCOL, default=None), - DBCol('roleID', INTCOL, notNone=True, foreignKey='RoleType') - ), - - DBTable('CompCastType', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('kind', STRINGCOL, length=32, notNone=True, alternateID=True), - values=compCastTypeDefs - ), - - DBTable('CompleteCast', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('movieID', INTCOL, index='idx_mid', foreignKey='Title'), - DBCol('subjectID', INTCOL, notNone=True, foreignKey='CompCastType'), - DBCol('statusID', INTCOL, notNone=True, foreignKey='CompCastType') - ), - - DBTable('InfoType', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('info', STRINGCOL, length=32, notNone=True, alternateID=True), - values=infoTypeDefs - ), - - DBTable('LinkType', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('link', STRINGCOL, length=32, notNone=True, alternateID=True), - values=linkTypeDefs - ), - - DBTable('Keyword', - DBCol('id', INTCOL, notNone=True, alternateID=True), - # XXX: can't use alternateID=True, because it would create - # a UNIQUE index; unfortunately (at least with a common - # collation like utf8_unicode_ci) MySQL will consider - # some different keywords identical - like - # "fiancée" and "fiancee". - DBCol('keyword', UNICODECOL, length=255, notNone=True, - index='idx_keyword', indexLen=5), - DBCol('phoneticCode', STRINGCOL, length=5, default=None, - index='idx_pcode') - ), - - DBTable('MovieKeyword', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('movieID', INTCOL, notNone=True, index='idx_mid', - foreignKey='Title'), - DBCol('keywordID', INTCOL, notNone=True, index='idx_keywordid', - foreignKey='Keyword') - ), - - DBTable('MovieLink', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('movieID', INTCOL, notNone=True, index='idx_mid', - foreignKey='Title'), - DBCol('linkedMovieID', INTCOL, notNone=True, foreignKey='Title'), - DBCol('linkTypeID', INTCOL, notNone=True, foreignKey='LinkType') - ), - - DBTable('MovieInfo', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('movieID', INTCOL, notNone=True, index='idx_mid', - foreignKey='Title'), - DBCol('infoTypeID', INTCOL, notNone=True, foreignKey='InfoType'), - DBCol('info', UNICODECOL, notNone=True), - DBCol('note', UNICODECOL, default=None) - ), - - # This table is identical to MovieInfo, except that both 'infoTypeID' - # and 'info' are indexed. - DBTable('MovieInfoIdx', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('movieID', INTCOL, notNone=True, index='idx_mid', - foreignKey='Title'), - DBCol('infoTypeID', INTCOL, notNone=True, index='idx_infotypeid', - foreignKey='InfoType'), - DBCol('info', UNICODECOL, notNone=True, index='idx_info', indexLen=10), - DBCol('note', UNICODECOL, default=None) - ), - - DBTable('MovieCompanies', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('movieID', INTCOL, notNone=True, index='idx_mid', - foreignKey='Title'), - DBCol('companyID', INTCOL, notNone=True, index='idx_cid', - foreignKey='CompanyName'), - DBCol('companyTypeID', INTCOL, notNone=True, foreignKey='CompanyType'), - DBCol('note', UNICODECOL, default=None) - ), - - DBTable('PersonInfo', - DBCol('id', INTCOL, notNone=True, alternateID=True), - DBCol('personID', INTCOL, notNone=True, index='idx_pid', - foreignKey='Name'), - DBCol('infoTypeID', INTCOL, notNone=True, foreignKey='InfoType'), - DBCol('info', UNICODECOL, notNone=True), - DBCol('note', UNICODECOL, default=None) - ) -] - - -# Functions to manage tables. -def dropTables(tables, ifExists=True): - """Drop the tables.""" - # In reverse order (useful to avoid errors about foreign keys). - DB_TABLES_DROP = list(tables) - DB_TABLES_DROP.reverse() - for table in DB_TABLES_DROP: - _dbschema_logger.info('dropping table %s', table._imdbpyName) - table.dropTable(ifExists) - -def createTables(tables, ifNotExists=True): - """Create the tables and insert default values.""" - for table in tables: - # Create the table. - _dbschema_logger.info('creating table %s', table._imdbpyName) - table.createTable(ifNotExists) - # Insert default values, if any. - if table._imdbpySchema.values: - _dbschema_logger.info('inserting values into table %s', - table._imdbpyName) - for key in table._imdbpySchema.values: - for value in table._imdbpySchema.values[key]: - table(**{key: unicode(value)}) - -def createIndexes(tables, ifNotExists=True): - """Create the indexes in the database.""" - for table in tables: - _dbschema_logger.info('creating indexes for table %s', - table._imdbpyName) - table.addIndexes(ifNotExists) - -def createForeignKeys(tables, ifNotExists=True): - """Create Foreign Keys.""" - mapTables = {} - for table in tables: - mapTables[table._imdbpyName] = table - for table in tables: - _dbschema_logger.info('creating foreign keys for table %s', - table._imdbpyName) - table.addForeignKeys(mapTables, ifNotExists) - diff --git a/libs/imdb/parser/sql/objectadapter.py b/libs/imdb/parser/sql/objectadapter.py deleted file mode 100644 index b7ca9851..00000000 --- a/libs/imdb/parser/sql/objectadapter.py +++ /dev/null @@ -1,203 +0,0 @@ -""" -parser.sql.objectadapter module (imdb.parser.sql package). - -This module adapts the SQLObject ORM to the internal mechanism. - -Copyright 2008-2010 Davide Alberani - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -import sys -import logging - -from sqlobject import * -from sqlobject.sqlbuilder import ISNULL, ISNOTNULL, AND, OR, IN, CONTAINSSTRING - -from dbschema import * - -_object_logger = logging.getLogger('imdbpy.parser.sql.object') - - -# Maps our placeholders to SQLAlchemy's column types. -MAP_COLS = { - INTCOL: IntCol, - UNICODECOL: UnicodeCol, - STRINGCOL: StringCol -} - - -# Exception raised when Table.get(id) returns no value. -NotFoundError = SQLObjectNotFound - - -# class method to be added to the SQLObject class. -def addIndexes(cls, ifNotExists=True): - """Create all required indexes.""" - for col in cls._imdbpySchema.cols: - if col.index: - idxName = col.index - colToIdx = col.name - if col.indexLen: - colToIdx = {'column': col.name, 'length': col.indexLen} - if idxName in [i.name for i in cls.sqlmeta.indexes]: - # Check if the index is already present. - continue - idx = DatabaseIndex(colToIdx, name=idxName) - cls.sqlmeta.addIndex(idx) - try: - cls.createIndexes(ifNotExists) - except dberrors.OperationalError, e: - _object_logger.warn('Skipping creation of the %s.%s index: %s' % - (cls.sqlmeta.table, col.name, e)) -addIndexes = classmethod(addIndexes) - - -# Global repository for "fake" tables with Foreign Keys - need to -# prevent troubles if addForeignKeys is called more than one time. -FAKE_TABLES_REPOSITORY = {} - -def _buildFakeFKTable(cls, fakeTableName): - """Return a "fake" table, with foreign keys where needed.""" - countCols = 0 - attrs = {} - for col in cls._imdbpySchema.cols: - countCols += 1 - if col.name == 'id': - continue - if not col.foreignKey: - # A non-foreign key column - add it as usual. - attrs[col.name] = MAP_COLS[col.kind](**col.params) - continue - # XXX: Foreign Keys pointing to TableName.ColName not yet supported. - thisColName = col.name - if thisColName.endswith('ID'): - thisColName = thisColName[:-2] - - fks = col.foreignKey.split('.', 1) - foreignTableName = fks[0] - if len(fks) == 2: - foreignColName = fks[1] - else: - foreignColName = 'id' - # Unused... - #fkName = 'fk_%s_%s_%d' % (foreignTableName, foreignColName, - # countCols) - # Create a Foreign Key column, with the correct references. - fk = ForeignKey(foreignTableName, name=thisColName, default=None) - attrs[thisColName] = fk - # Build a _NEW_ SQLObject subclass, with foreign keys, if needed. - newcls = type(fakeTableName, (SQLObject,), attrs) - return newcls - -def addForeignKeys(cls, mapTables, ifNotExists=True): - """Create all required foreign keys.""" - # Do not even try, if there are no FK, in this table. - if not filter(None, [col.foreignKey for col in cls._imdbpySchema.cols]): - return - fakeTableName = 'myfaketable%s' % cls.sqlmeta.table - if fakeTableName in FAKE_TABLES_REPOSITORY: - newcls = FAKE_TABLES_REPOSITORY[fakeTableName] - else: - newcls = _buildFakeFKTable(cls, fakeTableName) - FAKE_TABLES_REPOSITORY[fakeTableName] = newcls - # Connect the class with foreign keys. - newcls.setConnection(cls._connection) - for col in cls._imdbpySchema.cols: - if col.name == 'id': - continue - if not col.foreignKey: - continue - # Get the SQL that _WOULD BE_ run, if we had to create - # this "fake" table. - fkQuery = newcls._connection.createReferenceConstraint(newcls, - newcls.sqlmeta.columns[col.name]) - if not fkQuery: - # Probably the db doesn't support foreign keys (SQLite). - continue - # Remove "myfaketable" to get references to _real_ tables. - fkQuery = fkQuery.replace('myfaketable', '') - # Execute the query. - newcls._connection.query(fkQuery) - # Disconnect it. - newcls._connection.close() -addForeignKeys = classmethod(addForeignKeys) - - -# Module-level "cache" for SQLObject classes, to prevent -# "class TheClass is already in the registry" errors, when -# two or more connections to the database are made. -# XXX: is this the best way to act? -TABLES_REPOSITORY = {} - -def getDBTables(uri=None): - """Return a list of classes to be used to access the database - through the SQLObject ORM. The connection uri is optional, and - can be used to tailor the db schema to specific needs.""" - DB_TABLES = [] - for table in DB_SCHEMA: - if table.name in TABLES_REPOSITORY: - DB_TABLES.append(TABLES_REPOSITORY[table.name]) - continue - attrs = {'_imdbpyName': table.name, '_imdbpySchema': table, - 'addIndexes': addIndexes, 'addForeignKeys': addForeignKeys} - for col in table.cols: - if col.name == 'id': - continue - attrs[col.name] = MAP_COLS[col.kind](**col.params) - # Create a subclass of SQLObject. - # XXX: use a metaclass? I can't see any advantage. - cls = type(table.name, (SQLObject,), attrs) - DB_TABLES.append(cls) - TABLES_REPOSITORY[table.name] = cls - return DB_TABLES - - -def toUTF8(s): - """For some strange reason, sometimes SQLObject wants utf8 strings - instead of unicode.""" - return s.encode('utf_8') - - -def setConnection(uri, tables, encoding='utf8', debug=False): - """Set connection for every table.""" - kw = {} - # FIXME: it's absolutely unclear what we should do to correctly - # support unicode in MySQL; with some versions of SQLObject, - # it seems that setting use_unicode=1 is the _wrong_ thing to do. - _uriLower = uri.lower() - if _uriLower.startswith('mysql'): - kw['use_unicode'] = 1 - #kw['sqlobject_encoding'] = encoding - kw['charset'] = encoding - conn = connectionForURI(uri, **kw) - conn.debug = debug - if uri.startswith('sqlite'): - major = sys.version_info[0] - minor = sys.version_info[1] - if major > 2 or (major == 2 and minor > 5): - conn.connection.connection.text_factory = str - for table in tables: - table.setConnection(conn) - #table.sqlmeta.cacheValues = False - # FIXME: is it safe to set table._cacheValue to False? Looks like - # we can't retrieve correct values after an update (I think - # it's never needed, but...) Anyway, these are set to False - # for performance reason at insert time (see imdbpy2sql.py). - table._cacheValue = False - # Required by imdbpy2sql.py. - conn.paramstyle = conn.module.paramstyle - return conn - diff --git a/libs/imdb/utils.py b/libs/imdb/utils.py deleted file mode 100644 index e2d9551d..00000000 --- a/libs/imdb/utils.py +++ /dev/null @@ -1,1545 +0,0 @@ -""" -utils module (imdb package). - -This module provides basic utilities for the imdb package. - -Copyright 2004-2010 Davide Alberani - 2009 H. Turgut Uyar - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -""" - -from __future__ import generators -import re -import string -import logging -from copy import copy, deepcopy -from time import strptime, strftime - -from imdb import VERSION -from imdb import articles -from imdb._exceptions import IMDbParserError - - -# Logger for imdb.utils module. -_utils_logger = logging.getLogger('imdbpy.utils') - -# The regular expression for the "long" year format of IMDb, like -# "(1998)" and "(1986/II)", where the optional roman number (that I call -# "imdbIndex" after the slash is used for movies with the same title -# and year of release. -# XXX: probably L, C, D and M are far too much! ;-) -re_year_index = re.compile(r'\(([0-9\?]{4}(/[IVXLCDM]+)?)\)') - -# Match only the imdbIndex (for name strings). -re_index = re.compile(r'^\(([IVXLCDM]+)\)$') - -# Match the number of episodes. -re_episodes = re.compile('\s?\((\d+) episodes\)', re.I) -re_episode_info = re.compile(r'{\s*(.+?)?\s?(\([0-9\?]{4}-[0-9\?]{1,2}-[0-9\?]{1,2}\))?\s?(\(#[0-9]+\.[0-9]+\))?}') - -# Common suffixes in surnames. -_sname_suffixes = ('de', 'la', 'der', 'den', 'del', 'y', 'da', 'van', - 'e', 'von', 'the', 'di', 'du', 'el', 'al') - -def canonicalName(name): - """Return the given name in canonical "Surname, Name" format. - It assumes that name is in the 'Name Surname' format.""" - # XXX: some statistics (as of 17 Apr 2008, over 2288622 names): - # - just a surname: 69476 - # - single surname, single name: 2209656 - # - composed surname, composed name: 9490 - # - composed surname, single name: 67606 - # (2: 59764, 3: 6862, 4: 728) - # - single surname, composed name: 242310 - # (2: 229467, 3: 9901, 4: 2041, 5: 630) - # - Jr.: 8025 - # Don't convert names already in the canonical format. - if name.find(', ') != -1: return name - if isinstance(name, unicode): - joiner = u'%s, %s' - sur_joiner = u'%s %s' - sur_space = u' %s' - space = u' ' - else: - joiner = '%s, %s' - sur_joiner = '%s %s' - sur_space = ' %s' - space = ' ' - sname = name.split(' ') - snl = len(sname) - if snl == 2: - # Just a name and a surname: how boring... - name = joiner % (sname[1], sname[0]) - elif snl > 2: - lsname = [x.lower() for x in sname] - if snl == 3: _indexes = (0, snl-2) - else: _indexes = (0, snl-2, snl-3) - # Check for common surname prefixes at the beginning and near the end. - for index in _indexes: - if lsname[index] not in _sname_suffixes: continue - try: - # Build the surname. - surn = sur_joiner % (sname[index], sname[index+1]) - del sname[index] - del sname[index] - try: - # Handle the "Jr." after the name. - if lsname[index+2].startswith('jr'): - surn += sur_space % sname[index] - del sname[index] - except (IndexError, ValueError): - pass - name = joiner % (surn, space.join(sname)) - break - except ValueError: - continue - else: - name = joiner % (sname[-1], space.join(sname[:-1])) - return name - -def normalizeName(name): - """Return a name in the normal "Name Surname" format.""" - if isinstance(name, unicode): - joiner = u'%s %s' - else: - joiner = '%s %s' - sname = name.split(', ') - if len(sname) == 2: - name = joiner % (sname[1], sname[0]) - return name - -def analyze_name(name, canonical=None): - """Return a dictionary with the name and the optional imdbIndex - keys, from the given string. - - If canonical is None (default), the name is stored in its own style. - If canonical is True, the name is converted to canonical style. - If canonical is False, the name is converted to normal format. - - raise an IMDbParserError exception if the name is not valid. - """ - original_n = name - name = name.strip() - res = {} - imdbIndex = '' - opi = name.rfind('(') - if opi != -1: - cpi = name.rfind(')') - if cpi > opi and re_index.match(name[opi:cpi+1]): - imdbIndex = name[opi+1:cpi] - name = name[:opi].rstrip() - else: - # XXX: for the birth and death dates case like " (1926-2004)" - name = name[:opi-1] - if not name: - raise IMDbParserError, 'invalid name: "%s"' % original_n - if canonical is not None: - if canonical: - name = canonicalName(name) - else: - name = normalizeName(name) - res['name'] = name - if imdbIndex: - res['imdbIndex'] = imdbIndex - return res - - -def build_name(name_dict, canonical=None): - """Given a dictionary that represents a "long" IMDb name, - return a string. - If canonical is None (default), the name is returned in the stored style. - If canonical is True, the name is converted to canonical style. - If canonical is False, the name is converted to normal format. - """ - name = name_dict.get('canonical name') or name_dict.get('name', '') - if not name: return '' - if canonical is not None: - if canonical: - name = canonicalName(name) - else: - name = normalizeName(name) - imdbIndex = name_dict.get('imdbIndex') - if imdbIndex: - name += ' (%s)' % imdbIndex - return name - - -# XXX: here only for backward compatibility. Find and remove any dependency. -_articles = articles.GENERIC_ARTICLES -_unicodeArticles = articles.toUnicode(_articles) -articlesDicts = articles.articlesDictsForLang(None) -spArticles = articles.spArticlesForLang(None) - -def canonicalTitle(title, lang=None): - """Return the title in the canonic format 'Movie Title, The'; - beware that it doesn't handle long imdb titles, but only the - title portion, without year[/imdbIndex] or special markup. - The 'lang' argument can be used to specify the language of the title. - """ - isUnicode = isinstance(title, unicode) - articlesDicts = articles.articlesDictsForLang(lang) - try: - if title.split(', ')[-1].lower() in articlesDicts[isUnicode]: - return title - except IndexError: - pass - if isUnicode: - _format = u'%s, %s' - else: - _format = '%s, %s' - ltitle = title.lower() - spArticles = articles.spArticlesForLang(lang) - for article in spArticles[isUnicode]: - if ltitle.startswith(article): - lart = len(article) - title = _format % (title[lart:], title[:lart]) - if article[-1] == ' ': - title = title[:-1] - break - ## XXX: an attempt using a dictionary lookup. - ##for artSeparator in (' ', "'", '-'): - ## article = _articlesDict.get(ltitle.split(artSeparator)[0]) - ## if article is not None: - ## lart = len(article) - ## # check titles like "una", "I'm Mad" and "L'abbacchio". - ## if title[lart:] == '' or (artSeparator != ' ' and - ## title[lart:][1] != artSeparator): continue - ## title = '%s, %s' % (title[lart:], title[:lart]) - ## if artSeparator == ' ': title = title[1:] - ## break - return title - -def normalizeTitle(title, lang=None): - """Return the title in the normal "The Title" format; - beware that it doesn't handle long imdb titles, but only the - title portion, without year[/imdbIndex] or special markup. - The 'lang' argument can be used to specify the language of the title. - """ - isUnicode = isinstance(title, unicode) - stitle = title.split(', ') - articlesDicts = articles.articlesDictsForLang(lang) - if len(stitle) > 1 and stitle[-1].lower() in articlesDicts[isUnicode]: - sep = ' ' - if stitle[-1][-1] in ("'", '-'): - sep = '' - if isUnicode: - _format = u'%s%s%s' - _joiner = u', ' - else: - _format = '%s%s%s' - _joiner = ', ' - title = _format % (stitle[-1], sep, _joiner.join(stitle[:-1])) - return title - - -def _split_series_episode(title): - """Return the series and the episode titles; if this is not a - series' episode, the returned series title is empty. - This function recognize two different styles: - "The Series" An Episode (2005) - "The Series" (2004) {An Episode (2005) (#season.episode)}""" - series_title = '' - episode_or_year = '' - if title[-1:] == '}': - # Title of the episode, as in the plain text data files. - begin_eps = title.rfind('{') - if begin_eps == -1: return '', '' - series_title = title[:begin_eps].rstrip() - # episode_or_year is returned with the {...} - episode_or_year = title[begin_eps:].strip() - if episode_or_year[:12] == '{SUSPENDED}}': return '', '' - # XXX: works only with tv series; it's still unclear whether - # IMDb will support episodes for tv mini series and tv movies... - elif title[0:1] == '"': - second_quot = title[1:].find('"') + 2 - if second_quot != 1: # a second " was found. - episode_or_year = title[second_quot:].lstrip() - first_char = episode_or_year[0:1] - if not first_char: return '', '' - if first_char != '(': - # There is not a (year) but the title of the episode; - # that means this is an episode title, as returned by - # the web server. - series_title = title[:second_quot] - ##elif episode_or_year[-1:] == '}': - ## # Title of the episode, as in the plain text data files. - ## begin_eps = episode_or_year.find('{') - ## if begin_eps == -1: return series_title, episode_or_year - ## series_title = title[:second_quot+begin_eps].rstrip() - ## # episode_or_year is returned with the {...} - ## episode_or_year = episode_or_year[begin_eps:] - return series_title, episode_or_year - - -def is_series_episode(title): - """Return True if 'title' is an series episode.""" - title = title.strip() - if _split_series_episode(title)[0]: return 1 - return 0 - - -def analyze_title(title, canonical=None, canonicalSeries=None, - canonicalEpisode=None, _emptyString=u''): - """Analyze the given title and return a dictionary with the - "stripped" title, the kind of the show ("movie", "tv series", etc.), - the year of production and the optional imdbIndex (a roman number - used to distinguish between movies with the same title and year). - - If canonical is None (default), the title is stored in its own style. - If canonical is True, the title is converted to canonical style. - If canonical is False, the title is converted to normal format. - - raise an IMDbParserError exception if the title is not valid. - """ - # XXX: introduce the 'lang' argument? - if canonical is not None: - canonicalSeries = canonicalEpisode = canonical - original_t = title - result = {} - title = title.strip() - year = _emptyString - kind = _emptyString - imdbIndex = _emptyString - series_title, episode_or_year = _split_series_episode(title) - if series_title: - # It's an episode of a series. - series_d = analyze_title(series_title, canonical=canonicalSeries) - oad = sen = ep_year = _emptyString - # Plain text data files format. - if episode_or_year[0:1] == '{' and episode_or_year[-1:] == '}': - match = re_episode_info.findall(episode_or_year) - if match: - # Episode title, original air date and #season.episode - episode_or_year, oad, sen = match[0] - episode_or_year = episode_or_year.strip() - if not oad: - # No year, but the title is something like (2005-04-12) - if episode_or_year and episode_or_year[0] == '(' and \ - episode_or_year[-1:] == ')' and \ - episode_or_year[1:2] != '#': - oad = episode_or_year - if oad[1:5] and oad[5:6] == '-': - try: - ep_year = int(oad[1:5]) - except (TypeError, ValueError): - pass - if not oad and not sen and episode_or_year.startswith('(#'): - sen = episode_or_year - elif episode_or_year.startswith('Episode dated'): - oad = episode_or_year[14:] - if oad[-4:].isdigit(): - try: - ep_year = int(oad[-4:]) - except (TypeError, ValueError): - pass - episode_d = analyze_title(episode_or_year, canonical=canonicalEpisode) - episode_d['kind'] = u'episode' - episode_d['episode of'] = series_d - if oad: - episode_d['original air date'] = oad[1:-1] - if ep_year and episode_d.get('year') is None: - episode_d['year'] = ep_year - if sen and sen[2:-1].find('.') != -1: - seas, epn = sen[2:-1].split('.') - if seas: - # Set season and episode. - try: seas = int(seas) - except: pass - try: epn = int(epn) - except: pass - episode_d['season'] = seas - if epn: - episode_d['episode'] = epn - return episode_d - # First of all, search for the kind of show. - # XXX: Number of entries at 17 Apr 2008: - # movie: 379,871 - # episode: 483,832 - # tv movie: 61,119 - # tv series: 44,795 - # video movie: 57,915 - # tv mini series: 5,497 - # video game: 5,490 - # More up-to-date statistics: http://us.imdb.com/database_statistics - if title.endswith('(TV)'): - kind = u'tv movie' - title = title[:-4].rstrip() - elif title.endswith('(V)'): - kind = u'video movie' - title = title[:-3].rstrip() - elif title.endswith('(video)'): - kind = u'video movie' - title = title[:-7].rstrip() - elif title.endswith('(mini)'): - kind = u'tv mini series' - title = title[:-6].rstrip() - elif title.endswith('(VG)'): - kind = u'video game' - title = title[:-4].rstrip() - # Search for the year and the optional imdbIndex (a roman number). - yi = re_year_index.findall(title) - if yi: - last_yi = yi[-1] - year = last_yi[0] - if last_yi[1]: - imdbIndex = last_yi[1][1:] - year = year[:-len(imdbIndex)-1] - i = title.rfind('(%s)' % last_yi[0]) - if i != -1: - title = title[:i-1].rstrip() - # This is a tv (mini) series: strip the '"' at the begin and at the end. - # XXX: strip('"') is not used for compatibility with Python 2.0. - if title and title[0] == title[-1] == '"': - if not kind: - kind = u'tv series' - title = title[1:-1].strip() - elif title.endswith('(TV series)'): - kind = u'tv series' - title = title[:-11].rstrip() - if not title: - raise IMDbParserError, 'invalid title: "%s"' % original_t - if canonical is not None: - if canonical: - title = canonicalTitle(title) - else: - title = normalizeTitle(title) - # 'kind' is one in ('movie', 'episode', 'tv series', 'tv mini series', - # 'tv movie', 'video movie', 'video game') - result['title'] = title - result['kind'] = kind or u'movie' - if year and year != '????': - try: - result['year'] = int(year) - except (TypeError, ValueError): - pass - if imdbIndex: - result['imdbIndex'] = imdbIndex - if isinstance(_emptyString, str): - result['kind'] = str(kind or 'movie') - return result - - -_web_format = '%d %B %Y' -_ptdf_format = '(%Y-%m-%d)' -def _convertTime(title, fromPTDFtoWEB=1, _emptyString=u''): - """Convert a time expressed in the pain text data files, to - the 'Episode dated ...' format used on the web site; if - fromPTDFtoWEB is false, the inverted conversion is applied.""" - try: - if fromPTDFtoWEB: - from_format = _ptdf_format - to_format = _web_format - else: - from_format = u'Episode dated %s' % _web_format - to_format = _ptdf_format - t = strptime(title, from_format) - title = strftime(to_format, t) - if fromPTDFtoWEB: - if title[0] == '0': title = title[1:] - title = u'Episode dated %s' % title - except ValueError: - pass - if isinstance(_emptyString, str): - try: - title = str(title) - except UnicodeDecodeError: - pass - return title - - -def build_title(title_dict, canonical=None, canonicalSeries=None, - canonicalEpisode=None, ptdf=0, lang=None, _doYear=1, - _emptyString=u''): - """Given a dictionary that represents a "long" IMDb title, - return a string. - - If canonical is None (default), the title is returned in the stored style. - If canonical is True, the title is converted to canonical style. - If canonical is False, the title is converted to normal format. - - lang can be used to specify the language of the title. - - If ptdf is true, the plain text data files format is used. - """ - if canonical is not None: - canonicalSeries = canonical - pre_title = _emptyString - kind = title_dict.get('kind') - episode_of = title_dict.get('episode of') - if kind == 'episode' and episode_of is not None: - # Works with both Movie instances and plain dictionaries. - doYear = 0 - if ptdf: - doYear = 1 - pre_title = build_title(episode_of, canonical=canonicalSeries, - ptdf=0, _doYear=doYear, - _emptyString=_emptyString) - ep_dict = {'title': title_dict.get('title', ''), - 'imdbIndex': title_dict.get('imdbIndex')} - ep_title = ep_dict['title'] - if not ptdf: - doYear = 1 - ep_dict['year'] = title_dict.get('year', '????') - if ep_title[0:1] == '(' and ep_title[-1:] == ')' and \ - ep_title[1:5].isdigit(): - ep_dict['title'] = _convertTime(ep_title, fromPTDFtoWEB=1, - _emptyString=_emptyString) - else: - doYear = 0 - if ep_title.startswith('Episode dated'): - ep_dict['title'] = _convertTime(ep_title, fromPTDFtoWEB=0, - _emptyString=_emptyString) - episode_title = build_title(ep_dict, - canonical=canonicalEpisode, ptdf=ptdf, - _doYear=doYear, _emptyString=_emptyString) - if ptdf: - oad = title_dict.get('original air date', _emptyString) - if len(oad) == 10 and oad[4] == '-' and oad[7] == '-' and \ - episode_title.find(oad) == -1: - episode_title += ' (%s)' % oad - seas = title_dict.get('season') - if seas is not None: - episode_title += ' (#%s' % seas - episode = title_dict.get('episode') - if episode is not None: - episode_title += '.%s' % episode - episode_title += ')' - episode_title = '{%s}' % episode_title - return '%s %s' % (pre_title, episode_title) - title = title_dict.get('title', '') - if not title: return _emptyString - if canonical is not None: - if canonical: - title = canonicalTitle(title, lang=lang) - else: - title = normalizeTitle(title, lang=lang) - if pre_title: - title = '%s %s' % (pre_title, title) - if kind in (u'tv series', u'tv mini series'): - title = '"%s"' % title - if _doYear: - imdbIndex = title_dict.get('imdbIndex') - year = title_dict.get('year') or u'????' - if isinstance(_emptyString, str): - year = str(year) - title += ' (%s' % year - if imdbIndex: - title += '/%s' % imdbIndex - title += ')' - if kind: - if kind == 'tv movie': - title += ' (TV)' - elif kind == 'video movie': - title += ' (V)' - elif kind == 'tv mini series': - title += ' (mini)' - elif kind == 'video game': - title += ' (VG)' - return title - - -def split_company_name_notes(name): - """Return two strings, the first representing the company name, - and the other representing the (optional) notes.""" - name = name.strip() - notes = u'' - if name.endswith(')'): - fpidx = name.find('(') - if fpidx != -1: - notes = name[fpidx:] - name = name[:fpidx].rstrip() - return name, notes - - -def analyze_company_name(name, stripNotes=False): - """Return a dictionary with the name and the optional 'country' - keys, from the given string. - If stripNotes is true, tries to not consider optional notes. - - raise an IMDbParserError exception if the name is not valid. - """ - if stripNotes: - name = split_company_name_notes(name)[0] - o_name = name - name = name.strip() - country = None - if name.endswith(']'): - idx = name.rfind('[') - if idx != -1: - country = name[idx:] - name = name[:idx].rstrip() - if not name: - raise IMDbParserError, 'invalid name: "%s"' % o_name - result = {'name': name} - if country: - result['country'] = country - return result - - -def build_company_name(name_dict, _emptyString=u''): - """Given a dictionary that represents a "long" IMDb company name, - return a string. - """ - name = name_dict.get('name') - if not name: - return _emptyString - country = name_dict.get('country') - if country is not None: - name += ' %s' % country - return name - - -class _LastC: - """Size matters.""" - def __cmp__(self, other): - if isinstance(other, self.__class__): return 0 - return 1 - -_last = _LastC() - -def cmpMovies(m1, m2): - """Compare two movies by year, in reverse order; the imdbIndex is checked - for movies with the same year of production and title.""" - # Sort tv series' episodes. - m1e = m1.get('episode of') - m2e = m2.get('episode of') - if m1e is not None and m2e is not None: - cmp_series = cmpMovies(m1e, m2e) - if cmp_series != 0: - return cmp_series - m1s = m1.get('season') - m2s = m2.get('season') - if m1s is not None and m2s is not None: - if m1s < m2s: - return 1 - elif m1s > m2s: - return -1 - m1p = m1.get('episode') - m2p = m2.get('episode') - if m1p < m2p: - return 1 - elif m1p > m2p: - return -1 - try: - if m1e is None: m1y = int(m1.get('year', 0)) - else: m1y = int(m1e.get('year', 0)) - except ValueError: - m1y = 0 - try: - if m2e is None: m2y = int(m2.get('year', 0)) - else: m2y = int(m2e.get('year', 0)) - except ValueError: - m2y = 0 - if m1y > m2y: return -1 - if m1y < m2y: return 1 - # Ok, these movies have the same production year... - #m1t = m1.get('canonical title', _last) - #m2t = m2.get('canonical title', _last) - # It should works also with normal dictionaries (returned from searches). - #if m1t is _last and m2t is _last: - m1t = m1.get('title', _last) - m2t = m2.get('title', _last) - if m1t < m2t: return -1 - if m1t > m2t: return 1 - # Ok, these movies have the same title... - m1i = m1.get('imdbIndex', _last) - m2i = m2.get('imdbIndex', _last) - if m1i > m2i: return -1 - if m1i < m2i: return 1 - m1id = getattr(m1, 'movieID', None) - # Introduce this check even for other comparisons functions? - # XXX: is it safe to check without knowning the data access system? - # probably not a great idea. Check for 'kind', instead? - if m1id is not None: - m2id = getattr(m2, 'movieID', None) - if m1id > m2id: return -1 - elif m1id < m2id: return 1 - return 0 - - -def cmpPeople(p1, p2): - """Compare two people by billingPos, name and imdbIndex.""" - p1b = getattr(p1, 'billingPos', None) or _last - p2b = getattr(p2, 'billingPos', None) or _last - if p1b > p2b: return 1 - if p1b < p2b: return -1 - p1n = p1.get('canonical name', _last) - p2n = p2.get('canonical name', _last) - if p1n is _last and p2n is _last: - p1n = p1.get('name', _last) - p2n = p2.get('name', _last) - if p1n > p2n: return 1 - if p1n < p2n: return -1 - p1i = p1.get('imdbIndex', _last) - p2i = p2.get('imdbIndex', _last) - if p1i > p2i: return 1 - if p1i < p2i: return -1 - return 0 - - -def cmpCompanies(p1, p2): - """Compare two companies.""" - p1n = p1.get('long imdb name', _last) - p2n = p2.get('long imdb name', _last) - if p1n is _last and p2n is _last: - p1n = p1.get('name', _last) - p2n = p2.get('name', _last) - if p1n > p2n: return 1 - if p1n < p2n: return -1 - p1i = p1.get('country', _last) - p2i = p2.get('country', _last) - if p1i > p2i: return 1 - if p1i < p2i: return -1 - return 0 - - -# References to titles, names and characters. -# XXX: find better regexp! -re_titleRef = re.compile(r'_(.+?(?: \([0-9\?]{4}(?:/[IVXLCDM]+)?\))?(?: \(mini\)| \(TV\)| \(V\)| \(VG\))?)_ \(qv\)') -# FIXME: doesn't match persons with ' in the name. -re_nameRef = re.compile(r"'([^']+?)' \(qv\)") -# XXX: good choice? Are there characters with # in the name? -re_characterRef = re.compile(r"#([^']+?)# \(qv\)") - -# Functions used to filter the text strings. -def modNull(s, titlesRefs, namesRefs, charactersRefs): - """Do nothing.""" - return s - -def modClearTitleRefs(s, titlesRefs, namesRefs, charactersRefs): - """Remove titles references.""" - return re_titleRef.sub(r'\1', s) - -def modClearNameRefs(s, titlesRefs, namesRefs, charactersRefs): - """Remove names references.""" - return re_nameRef.sub(r'\1', s) - -def modClearCharacterRefs(s, titlesRefs, namesRefs, charactersRefs): - """Remove characters references""" - return re_characterRef.sub(r'\1', s) - -def modClearRefs(s, titlesRefs, namesRefs, charactersRefs): - """Remove titles, names and characters references.""" - s = modClearTitleRefs(s, {}, {}, {}) - s = modClearCharacterRefs(s, {}, {}, {}) - return modClearNameRefs(s, {}, {}, {}) - - -def modifyStrings(o, modFunct, titlesRefs, namesRefs, charactersRefs): - """Modify a string (or string values in a dictionary or strings - in a list), using the provided modFunct function and titlesRefs - namesRefs and charactersRefs references dictionaries.""" - # Notice that it doesn't go any deeper than the first two levels in a list. - if isinstance(o, (unicode, str)): - return modFunct(o, titlesRefs, namesRefs, charactersRefs) - elif isinstance(o, (list, tuple, dict)): - _stillorig = 1 - if isinstance(o, (list, tuple)): keys = xrange(len(o)) - else: keys = o.keys() - for i in keys: - v = o[i] - if isinstance(v, (unicode, str)): - if _stillorig: - o = copy(o) - _stillorig = 0 - o[i] = modFunct(v, titlesRefs, namesRefs, charactersRefs) - elif isinstance(v, (list, tuple)): - modifyStrings(o[i], modFunct, titlesRefs, namesRefs, - charactersRefs) - return o - - -def date_and_notes(s): - """Parse (birth|death) date and notes; returns a tuple in the - form (date, notes).""" - s = s.strip() - if not s: return (u'', u'') - notes = u'' - if s[0].isdigit() or s.split()[0].lower() in ('c.', 'january', 'february', - 'march', 'april', 'may', 'june', - 'july', 'august', 'september', - 'october', 'november', - 'december', 'ca.', 'circa', - '????,'): - i = s.find(',') - if i != -1: - notes = s[i+1:].strip() - s = s[:i] - else: - notes = s - s = u'' - if s == '????': s = u'' - return s, notes - - -class RolesList(list): - """A list of Person or Character instances, used for the currentRole - property.""" - def __unicode__(self): - return u' / '.join([unicode(x) for x in self]) - - def __str__(self): - # FIXME: does it make sense at all? Return a unicode doesn't - # seem right, in __str__. - return u' / '.join([unicode(x).encode('utf8') for x in self]) - - -# Replace & with &, but only if it's not already part of a charref. -#_re_amp = re.compile(r'(&)(?!\w+;)', re.I) -#_re_amp = re.compile(r'(?<=\W)&(?=[^a-zA-Z0-9_#])') -_re_amp = re.compile(r'&(?![^a-zA-Z0-9_#]{1,5};)') - -def escape4xml(value): - """Escape some chars that can't be present in a XML value.""" - if isinstance(value, int): - value = str(value) - value = _re_amp.sub('&', value) - value = value.replace('"', '"').replace("'", ''') - value = value.replace('<', '<').replace('>', '>') - if isinstance(value, unicode): - value = value.encode('ascii', 'xmlcharrefreplace') - return value - - -def _refsToReplace(value, modFunct, titlesRefs, namesRefs, charactersRefs): - """Return three lists - for movie titles, persons and characters names - - with two items tuples: the first item is the reference once escaped - by the user-provided modFunct function, the second is the same - reference un-escaped.""" - mRefs = [] - for refRe, refTemplate in [(re_titleRef, u'_%s_ (qv)'), - (re_nameRef, u"'%s' (qv)"), - (re_characterRef, u'#%s# (qv)')]: - theseRefs = [] - for theRef in refRe.findall(value): - # refTemplate % theRef values don't change for a single - # _Container instance, so this is a good candidate for a - # cache or something - even if it's so rarely used that... - # Moreover, it can grow - ia.update(...) - and change if - # modFunct is modified. - goodValue = modFunct(refTemplate % theRef, titlesRefs, namesRefs, - charactersRefs) - # Prevents problems with crap in plain text data files. - # We should probably exclude invalid chars and string that - # are too long in the re_*Ref expressions. - if '_' in goodValue or len(goodValue) > 128: - continue - toReplace = escape4xml(goodValue) - # Only the 'value' portion is replaced. - replaceWith = goodValue.replace(theRef, escape4xml(theRef)) - theseRefs.append((toReplace, replaceWith)) - mRefs.append(theseRefs) - return mRefs - - -def _handleTextNotes(s): - """Split text::notes strings.""" - ssplit = s.split('::', 1) - if len(ssplit) == 1: - return s - return u'%s%s' % (ssplit[0], ssplit[1]) - - -def _normalizeValue(value, withRefs=False, modFunct=None, titlesRefs=None, - namesRefs=None, charactersRefs=None): - """Replace some chars that can't be present in a XML text.""" - # XXX: use s.encode(encoding, 'xmlcharrefreplace') ? Probably not - # a great idea: after all, returning a unicode is safe. - if isinstance(value, (unicode, str)): - if not withRefs: - value = _handleTextNotes(escape4xml(value)) - else: - # Replace references that were accidentally escaped. - replaceLists = _refsToReplace(value, modFunct, titlesRefs, - namesRefs, charactersRefs) - value = modFunct(value, titlesRefs or {}, namesRefs or {}, - charactersRefs or {}) - value = _handleTextNotes(escape4xml(value)) - for replaceList in replaceLists: - for toReplace, replaceWith in replaceList: - value = value.replace(toReplace, replaceWith) - else: - value = unicode(value) - return value - - -def _tag4TON(ton, addAccessSystem=False, _containerOnly=False): - """Build a tag for the given _Container instance; - both open and close tags are returned.""" - tag = ton.__class__.__name__.lower() - what = 'name' - if tag == 'movie': - value = ton.get('long imdb title') or ton.get('title', '') - what = 'title' - else: - value = ton.get('long imdb name') or ton.get('name', '') - value = _normalizeValue(value) - extras = u'' - crl = ton.currentRole - if crl: - if not isinstance(crl, list): - crl = [crl] - for cr in crl: - crTag = cr.__class__.__name__.lower() - crValue = cr['long imdb name'] - crValue = _normalizeValue(crValue) - crID = cr.getID() - if crID is not None: - extras += u'<%s id="%s">' \ - u'%s' % (crTag, crID, - crValue, crTag) - else: - extras += u'<%s>%s' % \ - (crTag, crValue, crTag) - if cr.notes: - extras += u'%s' % _normalizeValue(cr.notes) - extras += u'' - theID = ton.getID() - if theID is not None: - beginTag = u'<%s id="%s"' % (tag, theID) - if addAccessSystem and ton.accessSystem: - beginTag += ' access-system="%s"' % ton.accessSystem - if not _containerOnly: - beginTag += u'><%s>%s' % (what, value, what) - else: - beginTag += u'>' - else: - if not _containerOnly: - beginTag = u'<%s><%s>%s' % (tag, what, value, what) - else: - beginTag = u'<%s>' % tag - beginTag += extras - if ton.notes: - beginTag += u'%s' % _normalizeValue(ton.notes) - return (beginTag, u'' % tag) - - -TAGS_TO_MODIFY = { - 'movie.parents-guide': ('item', True), - 'movie.number-of-votes': ('item', True), - 'movie.soundtrack.item': ('item', True), - 'movie.quotes': ('quote', False), - 'movie.quotes.quote': ('line', False), - 'movie.demographic': ('item', True), - 'movie.episodes': ('season', True), - 'movie.episodes.season': ('episode', True), - 'person.merchandising-links': ('item', True), - 'person.genres': ('item', True), - 'person.quotes': ('quote', False), - 'person.keywords': ('item', True), - 'character.quotes': ('item', True), - 'character.quotes.item': ('quote', False), - 'character.quotes.item.quote': ('line', False) - } - -_allchars = string.maketrans('', '') -_keepchars = _allchars.translate(_allchars, string.ascii_lowercase + '-' + - string.digits) - -def _tagAttr(key, fullpath): - """Return a tuple with a tag name and a (possibly empty) attribute, - applying the conversions specified in TAGS_TO_MODIFY and checking - that the tag is safe for a XML document.""" - attrs = {} - _escapedKey = escape4xml(key) - if fullpath in TAGS_TO_MODIFY: - tagName, useTitle = TAGS_TO_MODIFY[fullpath] - if useTitle: - attrs['key'] = _escapedKey - elif not isinstance(key, unicode): - if isinstance(key, str): - tagName = unicode(key, 'ascii', 'ignore') - else: - strType = str(type(key)).replace("", "") - attrs['keytype'] = strType - tagName = unicode(key) - else: - tagName = key - if isinstance(key, int): - attrs['keytype'] = 'int' - origTagName = tagName - tagName = tagName.lower().replace(' ', '-') - tagName = str(tagName).translate(_allchars, _keepchars) - if origTagName != tagName: - if 'key' not in attrs: - attrs['key'] = _escapedKey - if (not tagName) or tagName[0].isdigit() or tagName[0] == '-': - # This is a fail-safe: we should never be here, since unpredictable - # keys must be listed in TAGS_TO_MODIFY. - # This will proably break the DTD/schema, but at least it will - # produce a valid XML. - tagName = 'item' - _utils_logger.error('invalid tag: %s [%s]' % (_escapedKey, fullpath)) - attrs['key'] = _escapedKey - return tagName, u' '.join([u'%s="%s"' % i for i in attrs.items()]) - - -def _seq2xml(seq, _l=None, withRefs=False, modFunct=None, - titlesRefs=None, namesRefs=None, charactersRefs=None, - _topLevel=True, key2infoset=None, fullpath=''): - """Convert a sequence or a dictionary to a list of XML - unicode strings.""" - if _l is None: - _l = [] - if isinstance(seq, dict): - for key in seq: - value = seq[key] - if isinstance(key, _Container): - # Here we're assuming that a _Container is never a top-level - # key (otherwise we should handle key2infoset). - openTag, closeTag = _tag4TON(key) - # So that fullpath will contains something meaningful. - tagName = key.__class__.__name__.lower() - else: - tagName, attrs = _tagAttr(key, fullpath) - openTag = u'<%s' % tagName - if attrs: - openTag += ' %s' % attrs - if _topLevel and key2infoset and key in key2infoset: - openTag += u' infoset="%s"' % key2infoset[key] - if isinstance(value, int): - openTag += ' type="int"' - elif isinstance(value, float): - openTag += ' type="float"' - openTag += u'>' - closeTag = u'' % tagName - _l.append(openTag) - _seq2xml(value, _l, withRefs, modFunct, titlesRefs, - namesRefs, charactersRefs, _topLevel=False, - fullpath='%s.%s' % (fullpath, tagName)) - _l.append(closeTag) - elif isinstance(seq, (list, tuple)): - tagName, attrs = _tagAttr('item', fullpath) - beginTag = u'<%s' % tagName - if attrs: - beginTag += u' %s' % attrs - #beginTag += u'>' - closeTag = u'' % tagName - for item in seq: - if isinstance(item, _Container): - _seq2xml(item, _l, withRefs, modFunct, titlesRefs, - namesRefs, charactersRefs, _topLevel=False, - fullpath='%s.%s' % (fullpath, - item.__class__.__name__.lower())) - else: - openTag = beginTag - if isinstance(item, int): - openTag += ' type="int"' - elif isinstance(item, float): - openTag += ' type="float"' - openTag += u'>' - _l.append(openTag) - _seq2xml(item, _l, withRefs, modFunct, titlesRefs, - namesRefs, charactersRefs, _topLevel=False, - fullpath='%s.%s' % (fullpath, tagName)) - _l.append(closeTag) - else: - if isinstance(seq, _Container): - _l.extend(_tag4TON(seq)) - else: - # Text, ints, floats and the like. - _l.append(_normalizeValue(seq, withRefs=withRefs, - modFunct=modFunct, - titlesRefs=titlesRefs, - namesRefs=namesRefs, - charactersRefs=charactersRefs)) - return _l - - -_xmlHead = u""" - - -""" -_xmlHead = _xmlHead.replace('{VERSION}', - VERSION.replace('.', '').split('dev')[0][:2]) - - -class _Container(object): - """Base class for Movie, Person, Character and Company classes.""" - # The default sets of information retrieved. - default_info = () - - # Aliases for some not-so-intuitive keys. - keys_alias = {} - - # List of keys to modify. - keys_tomodify_list = () - - # Function used to compare two instances of this class. - cmpFunct = None - - # Regular expression used to build the 'full-size (headshot|cover url)'. - _re_fullsizeURL = re.compile(r'\._V1\._SX(\d+)_SY(\d+)_') - - def __init__(self, myID=None, data=None, notes=u'', - currentRole=u'', roleID=None, roleIsPerson=False, - accessSystem=None, titlesRefs=None, namesRefs=None, - charactersRefs=None, modFunct=None, *args, **kwds): - """Initialize a Movie, Person, Character or Company object. - *myID* -- your personal identifier for this object. - *data* -- a dictionary used to initialize the object. - *notes* -- notes for the person referred in the currentRole - attribute; e.g.: '(voice)' or the alias used in the - movie credits. - *accessSystem* -- a string representing the data access system used. - *currentRole* -- a Character instance representing the current role - or duty of a person in this movie, or a Person - object representing the actor/actress who played - a given character in a Movie. If a string is - passed, an object is automatically build. - *roleID* -- if available, the characterID/personID of the currentRole - object. - *roleIsPerson* -- when False (default) the currentRole is assumed - to be a Character object, otherwise a Person. - *titlesRefs* -- a dictionary with references to movies. - *namesRefs* -- a dictionary with references to persons. - *charactersRefs* -- a dictionary with references to characters. - *modFunct* -- function called returning text fields. - """ - self.reset() - self.accessSystem = accessSystem - self.myID = myID - if data is None: data = {} - self.set_data(data, override=1) - self.notes = notes - if titlesRefs is None: titlesRefs = {} - self.update_titlesRefs(titlesRefs) - if namesRefs is None: namesRefs = {} - self.update_namesRefs(namesRefs) - if charactersRefs is None: charactersRefs = {} - self.update_charactersRefs(charactersRefs) - self.set_mod_funct(modFunct) - self.keys_tomodify = {} - for item in self.keys_tomodify_list: - self.keys_tomodify[item] = None - self._roleIsPerson = roleIsPerson - if not roleIsPerson: - from imdb.Character import Character - self._roleClass = Character - else: - from imdb.Person import Person - self._roleClass = Person - self.currentRole = currentRole - if roleID: - self.roleID = roleID - self._init(*args, **kwds) - - def _get_roleID(self): - """Return the characterID or personID of the currentRole object.""" - if not self.__role: - return None - if isinstance(self.__role, list): - return [x.getID() for x in self.__role] - return self.currentRole.getID() - - def _set_roleID(self, roleID): - """Set the characterID or personID of the currentRole object.""" - if not self.__role: - # XXX: needed? Just ignore it? It's probably safer to - # ignore it, to prevent some bugs in the parsers. - #raise IMDbError,"Can't set ID of an empty Character/Person object." - pass - if not self._roleIsPerson: - if not isinstance(roleID, (list, tuple)): - self.currentRole.characterID = roleID - else: - for index, item in enumerate(roleID): - self.__role[index].characterID = item - else: - if not isinstance(roleID, (list, tuple)): - self.currentRole.personID = roleID - else: - for index, item in enumerate(roleID): - self.__role[index].personID = item - - roleID = property(_get_roleID, _set_roleID, - doc="the characterID or personID of the currentRole object.") - - def _get_currentRole(self): - """Return a Character or Person instance.""" - if self.__role: - return self.__role - return self._roleClass(name=u'', accessSystem=self.accessSystem, - modFunct=self.modFunct) - - def _set_currentRole(self, role): - """Set self.currentRole to a Character or Person instance.""" - if isinstance(role, (unicode, str)): - if not role: - self.__role = None - else: - self.__role = self._roleClass(name=role, modFunct=self.modFunct, - accessSystem=self.accessSystem) - elif isinstance(role, (list, tuple)): - self.__role = RolesList() - for item in role: - if isinstance(item, (unicode, str)): - self.__role.append(self._roleClass(name=item, - accessSystem=self.accessSystem, - modFunct=self.modFunct)) - else: - self.__role.append(item) - if not self.__role: - self.__role = None - else: - self.__role = role - - currentRole = property(_get_currentRole, _set_currentRole, - doc="The role of a Person in a Movie" + \ - " or the interpreter of a Character in a Movie.") - - def _init(self, **kwds): pass - - def reset(self): - """Reset the object.""" - self.data = {} - self.myID = None - self.notes = u'' - self.titlesRefs = {} - self.namesRefs = {} - self.charactersRefs = {} - self.modFunct = modClearRefs - self.current_info = [] - self.infoset2keys = {} - self.key2infoset = {} - self.__role = None - self._reset() - - def _reset(self): pass - - def clear(self): - """Reset the dictionary.""" - self.data.clear() - self.notes = u'' - self.titlesRefs = {} - self.namesRefs = {} - self.charactersRefs = {} - self.current_info = [] - self.infoset2keys = {} - self.key2infoset = {} - self.__role = None - self._clear() - - def _clear(self): pass - - def get_current_info(self): - """Return the current set of information retrieved.""" - return self.current_info - - def update_infoset_map(self, infoset, keys, mainInfoset): - """Update the mappings between infoset and keys.""" - if keys is None: - keys = [] - if mainInfoset is not None: - theIS = mainInfoset - else: - theIS = infoset - self.infoset2keys[theIS] = keys - for key in keys: - self.key2infoset[key] = theIS - - def set_current_info(self, ci): - """Set the current set of information retrieved.""" - # XXX:Remove? It's never used and there's no way to update infoset2keys. - self.current_info = ci - - def add_to_current_info(self, val, keys=None, mainInfoset=None): - """Add a set of information to the current list.""" - if val not in self.current_info: - self.current_info.append(val) - self.update_infoset_map(val, keys, mainInfoset) - - def has_current_info(self, val): - """Return true if the given set of information is in the list.""" - return val in self.current_info - - def set_mod_funct(self, modFunct): - """Set the fuction used to modify the strings.""" - if modFunct is None: modFunct = modClearRefs - self.modFunct = modFunct - - def update_titlesRefs(self, titlesRefs): - """Update the dictionary with the references to movies.""" - self.titlesRefs.update(titlesRefs) - - def get_titlesRefs(self): - """Return the dictionary with the references to movies.""" - return self.titlesRefs - - def update_namesRefs(self, namesRefs): - """Update the dictionary with the references to names.""" - self.namesRefs.update(namesRefs) - - def get_namesRefs(self): - """Return the dictionary with the references to names.""" - return self.namesRefs - - def update_charactersRefs(self, charactersRefs): - """Update the dictionary with the references to characters.""" - self.charactersRefs.update(charactersRefs) - - def get_charactersRefs(self): - """Return the dictionary with the references to characters.""" - return self.charactersRefs - - def set_data(self, data, override=0): - """Set the movie data to the given dictionary; if 'override' is - set, the previous data is removed, otherwise the two dictionary - are merged. - """ - if not override: - self.data.update(data) - else: - self.data = data - - def getID(self): - """Return movieID, personID, characterID or companyID.""" - raise NotImplementedError, 'override this method' - - def __cmp__(self, other): - """Compare two Movie, Person, Character or Company objects.""" - # XXX: raise an exception? - if self.cmpFunct is None: return -1 - if not isinstance(other, self.__class__): return -1 - return self.cmpFunct(other) - - def __hash__(self): - """Hash for this object.""" - # XXX: does it always work correctly? - theID = self.getID() - if theID is not None and self.accessSystem not in ('UNKNOWN', None): - # Handle 'http' and 'mobile' as they are the same access system. - acs = self.accessSystem - if acs in ('mobile', 'httpThin'): - acs = 'http' - # There must be some indication of the kind of the object, too. - s4h = '%s:%s[%s]' % (self.__class__.__name__, theID, acs) - else: - s4h = repr(self) - return hash(s4h) - - def isSame(self, other): - """Return True if the two represent the same object.""" - if not isinstance(other, self.__class__): return 0 - if hash(self) == hash(other): return 1 - return 0 - - def __len__(self): - """Number of items in the data dictionary.""" - return len(self.data) - - def getAsXML(self, key, _with_add_keys=True): - """Return a XML representation of the specified key, or None - if empty. If _with_add_keys is False, dinamically generated - keys are excluded.""" - # Prevent modifyStrings in __getitem__ to be called; if needed, - # it will be called by the _normalizeValue function. - origModFunct = self.modFunct - self.modFunct = modNull - # XXX: not totally sure it's a good idea, but could prevent - # problems (i.e.: the returned string always contains - # a DTD valid tag, and not something that can be only in - # the keys_alias map). - key = self.keys_alias.get(key, key) - if (not _with_add_keys) and (key in self._additional_keys()): - self.modFunct = origModFunct - return None - try: - withRefs = False - if key in self.keys_tomodify and \ - origModFunct not in (None, modNull): - withRefs = True - value = self.get(key) - if value is None: - return None - tag = self.__class__.__name__.lower() - return u''.join(_seq2xml({key: value}, withRefs=withRefs, - modFunct=origModFunct, - titlesRefs=self.titlesRefs, - namesRefs=self.namesRefs, - charactersRefs=self.charactersRefs, - key2infoset=self.key2infoset, - fullpath=tag)) - finally: - self.modFunct = origModFunct - - def asXML(self, _with_add_keys=True): - """Return a XML representation of the whole object. - If _with_add_keys is False, dinamically generated keys are excluded.""" - beginTag, endTag = _tag4TON(self, addAccessSystem=True, - _containerOnly=True) - resList = [beginTag] - for key in self.keys(): - value = self.getAsXML(key, _with_add_keys=_with_add_keys) - if not value: - continue - resList.append(value) - resList.append(endTag) - head = _xmlHead % self.__class__.__name__.lower() - return head + u''.join(resList) - - def _getitem(self, key): - """Handle special keys.""" - return None - - def __getitem__(self, key): - """Return the value for a given key, checking key aliases; - a KeyError exception is raised if the key is not found. - """ - value = self._getitem(key) - if value is not None: return value - # Handle key aliases. - key = self.keys_alias.get(key, key) - rawData = self.data[key] - if key in self.keys_tomodify and \ - self.modFunct not in (None, modNull): - try: - return modifyStrings(rawData, self.modFunct, self.titlesRefs, - self.namesRefs, self.charactersRefs) - except RuntimeError, e: - # Symbian/python 2.2 has a poor regexp implementation. - import warnings - warnings.warn('RuntimeError in ' - "imdb.utils._Container.__getitem__; if it's not " - "a recursion limit exceeded and we're not running " - "in a Symbian environment, it's a bug:\n%s" % e) - return rawData - - def __setitem__(self, key, item): - """Directly store the item with the given key.""" - self.data[key] = item - - def __delitem__(self, key): - """Remove the given section or key.""" - # XXX: how to remove an item of a section? - del self.data[key] - - def _additional_keys(self): - """Valid keys to append to the data.keys() list.""" - return [] - - def keys(self): - """Return a list of valid keys.""" - return self.data.keys() + self._additional_keys() - - def items(self): - """Return the items in the dictionary.""" - return [(k, self.get(k)) for k in self.keys()] - - # XXX: is this enough? - def iteritems(self): return self.data.iteritems() - def iterkeys(self): return self.data.iterkeys() - def itervalues(self): return self.data.itervalues() - - def values(self): - """Return the values in the dictionary.""" - return [self.get(k) for k in self.keys()] - - def has_key(self, key): - """Return true if a given section is defined.""" - try: - self.__getitem__(key) - except KeyError: - return 0 - return 1 - - # XXX: really useful??? - # consider also that this will confuse people who meant to - # call ia.update(movieObject, 'data set') instead. - def update(self, dict): - self.data.update(dict) - - def get(self, key, failobj=None): - """Return the given section, or default if it's not found.""" - try: - return self.__getitem__(key) - except KeyError: - return failobj - - def setdefault(self, key, failobj=None): - if not self.has_key(key): - self[key] = failobj - return self[key] - - def pop(self, key, *args): - return self.data.pop(key, *args) - - def popitem(self): - return self.data.popitem() - - def __repr__(self): - """String representation of an object.""" - raise NotImplementedError, 'override this method' - - def __str__(self): - """Movie title or person name.""" - raise NotImplementedError, 'override this method' - - def __contains__(self, key): - raise NotImplementedError, 'override this method' - - def append_item(self, key, item): - """The item is appended to the list identified by the given key.""" - self.data.setdefault(key, []).append(item) - - def set_item(self, key, item): - """Directly store the item with the given key.""" - self.data[key] = item - - def __nonzero__(self): - """Return true if self.data contains something.""" - if self.data: return 1 - return 0 - - def __deepcopy__(self, memo): - raise NotImplementedError, 'override this method' - - def copy(self): - """Return a deep copy of the object itself.""" - return deepcopy(self) - - -def flatten(seq, toDescend=(list, dict, tuple), yieldDictKeys=0, - onlyKeysType=(_Container,), scalar=None): - """Iterate over nested lists and dictionaries; toDescend is a list - or a tuple of types to be considered non-scalar; if yieldDictKeys is - true, also dictionaries' keys are yielded; if scalar is not None, only - items of the given type(s) are yielded.""" - if scalar is None or isinstance(seq, scalar): - yield seq - if isinstance(seq, toDescend): - if isinstance(seq, (dict, _Container)): - if yieldDictKeys: - # Yield also the keys of the dictionary. - for key in seq.iterkeys(): - for k in flatten(key, toDescend=toDescend, - yieldDictKeys=yieldDictKeys, - onlyKeysType=onlyKeysType, scalar=scalar): - if onlyKeysType and isinstance(k, onlyKeysType): - yield k - for value in seq.itervalues(): - for v in flatten(value, toDescend=toDescend, - yieldDictKeys=yieldDictKeys, - onlyKeysType=onlyKeysType, scalar=scalar): - yield v - elif not isinstance(seq, (str, unicode, int, float)): - for item in seq: - for i in flatten(item, toDescend=toDescend, - yieldDictKeys=yieldDictKeys, - onlyKeysType=onlyKeysType, scalar=scalar): - yield i - -