| 1 | |
|---|
| 2 | """ |
|---|
| 3 | |
|---|
| 4 | pypi.py |
|---|
| 5 | ======= |
|---|
| 6 | |
|---|
| 7 | Desc: Library for getting information about Python packages by querying |
|---|
| 8 | The CheeseShop (PYPI a.k.a. Python Package Index). |
|---|
| 9 | |
|---|
| 10 | |
|---|
| 11 | Author: Rob Cakebread <gentoodev@gmail.com> |
|---|
| 12 | |
|---|
| 13 | License : GNU General Public License Version 2 |
|---|
| 14 | |
|---|
| 15 | """ |
|---|
| 16 | |
|---|
| 17 | __docformat__ = 'restructuredtext' |
|---|
| 18 | |
|---|
| 19 | import re |
|---|
| 20 | import xmlrpclib |
|---|
| 21 | import cPickle |
|---|
| 22 | import os |
|---|
| 23 | import time |
|---|
| 24 | import logging |
|---|
| 25 | import urllib2 |
|---|
| 26 | |
|---|
| 27 | from yolk.utils import get_yolk_dir |
|---|
| 28 | |
|---|
| 29 | |
|---|
| 30 | XML_RPC_SERVER = 'http://pypi.python.org/pypi' |
|---|
| 31 | #XML_RPC_SERVER = 'http://download.zope.org/ppix/' |
|---|
| 32 | #XML_RPC_SERVER = 'http://cheeseshop.python.org/simple' |
|---|
| 33 | |
|---|
| 34 | |
|---|
| 35 | class ProxyTransport(xmlrpclib.Transport): |
|---|
| 36 | """ |
|---|
| 37 | Provides an XMl-RPC transport routing via a http proxy. |
|---|
| 38 | |
|---|
| 39 | This is done by using urllib2, which in turn uses the environment |
|---|
| 40 | varable http_proxy and whatever else it is built to use (e.g. the |
|---|
| 41 | windows registry). |
|---|
| 42 | |
|---|
| 43 | NOTE: the environment variable http_proxy should be set correctly. |
|---|
| 44 | See check_proxy_setting() below. |
|---|
| 45 | |
|---|
| 46 | Written from scratch but inspired by xmlrpc_urllib_transport.py |
|---|
| 47 | file from http://starship.python.net/crew/jjkunce/ by jjk. |
|---|
| 48 | |
|---|
| 49 | A. Ellerton 2006-07-06 |
|---|
| 50 | """ |
|---|
| 51 | |
|---|
| 52 | def request(self, host, handler, request_body, verbose): |
|---|
| 53 | '''Send xml-rpc request using proxy''' |
|---|
| 54 | #We get a traceback if we don't have this attribute: |
|---|
| 55 | self.verbose = verbose |
|---|
| 56 | url = 'http://' + host + handler |
|---|
| 57 | request = urllib2.Request(url) |
|---|
| 58 | request.add_data(request_body) |
|---|
| 59 | # Note: 'Host' and 'Content-Length' are added automatically |
|---|
| 60 | request.add_header('User-Agent', self.user_agent) |
|---|
| 61 | request.add_header('Content-Type', 'text/xml') |
|---|
| 62 | proxy_handler = urllib2.ProxyHandler() |
|---|
| 63 | opener = urllib2.build_opener(proxy_handler) |
|---|
| 64 | fhandle = opener.open(request) |
|---|
| 65 | return(self.parse_response(fhandle)) |
|---|
| 66 | |
|---|
| 67 | |
|---|
| 68 | def check_proxy_setting(): |
|---|
| 69 | """ |
|---|
| 70 | If the environmental variable 'HTTP_PROXY' is set, it will most likely be |
|---|
| 71 | in one of these forms: |
|---|
| 72 | |
|---|
| 73 | proxyhost:8080 |
|---|
| 74 | http://proxyhost:8080 |
|---|
| 75 | |
|---|
| 76 | urlllib2 requires the proxy URL to start with 'http://' |
|---|
| 77 | This routine does that, and returns the transport for xmlrpc. |
|---|
| 78 | """ |
|---|
| 79 | try: |
|---|
| 80 | http_proxy = os.environ['HTTP_PROXY'] |
|---|
| 81 | except KeyError: |
|---|
| 82 | return |
|---|
| 83 | |
|---|
| 84 | if not http_proxy.startswith('http://'): |
|---|
| 85 | match = re.match('(http://)?([-_\.A-Za-z]+):(\d+)', http_proxy) |
|---|
| 86 | #if not match: |
|---|
| 87 | # raise Exception('Proxy format not recognised: [%s]' % http_proxy) |
|---|
| 88 | os.environ['HTTP_PROXY'] = 'http://%s:%s' % (match.group(2), |
|---|
| 89 | match.group(3)) |
|---|
| 90 | return |
|---|
| 91 | |
|---|
| 92 | |
|---|
| 93 | class CheeseShop: |
|---|
| 94 | |
|---|
| 95 | """Interface to Python Package Index""" |
|---|
| 96 | |
|---|
| 97 | def __init__(self, debug=False, no_cache=False, yolk_dir=None): |
|---|
| 98 | self.debug = debug |
|---|
| 99 | self.no_cache = no_cache |
|---|
| 100 | if yolk_dir: |
|---|
| 101 | self.yolk_dir = yolk_dir |
|---|
| 102 | else: |
|---|
| 103 | self.yolk_dir = get_yolk_dir() |
|---|
| 104 | self.xmlrpc = self.get_xmlrpc_server() |
|---|
| 105 | self.pkg_cache_file = self.get_pkg_cache_file() |
|---|
| 106 | self.last_sync_file = self.get_last_sync_file() |
|---|
| 107 | self.pkg_list = None |
|---|
| 108 | self.logger = logging.getLogger("yolk") |
|---|
| 109 | self.get_cache() |
|---|
| 110 | |
|---|
| 111 | def get_cache(self): |
|---|
| 112 | """ |
|---|
| 113 | Get a package name list from disk cache or PyPI |
|---|
| 114 | """ |
|---|
| 115 | #This is used by external programs that import `CheeseShop` and don't |
|---|
| 116 | #want a cache file written to ~/.pypi and query PyPI every time. |
|---|
| 117 | if self.no_cache: |
|---|
| 118 | self.pkg_list = self.list_packages() |
|---|
| 119 | return |
|---|
| 120 | |
|---|
| 121 | if not os.path.exists(self.yolk_dir): |
|---|
| 122 | os.mkdir(self.yolk_dir) |
|---|
| 123 | if os.path.exists(self.pkg_cache_file): |
|---|
| 124 | self.pkg_list = self.query_cached_package_list() |
|---|
| 125 | else: |
|---|
| 126 | self.logger.debug("DEBUG: Fetching package list cache from PyPi...") |
|---|
| 127 | self.fetch_pkg_list() |
|---|
| 128 | |
|---|
| 129 | def get_last_sync_file(self): |
|---|
| 130 | """ |
|---|
| 131 | Get the last time in seconds since The Epoc since the last pkg list sync |
|---|
| 132 | """ |
|---|
| 133 | return os.path.abspath(self.yolk_dir + "/last_sync") |
|---|
| 134 | |
|---|
| 135 | def get_xmlrpc_server(self): |
|---|
| 136 | """ |
|---|
| 137 | Returns PyPI's XML-RPC server instance |
|---|
| 138 | """ |
|---|
| 139 | check_proxy_setting() |
|---|
| 140 | if os.environ.has_key('XMLRPC_DEBUG'): |
|---|
| 141 | debug = 1 |
|---|
| 142 | else: |
|---|
| 143 | debug = 0 |
|---|
| 144 | try: |
|---|
| 145 | return xmlrpclib.Server(XML_RPC_SERVER, transport=ProxyTransport(), verbose=debug) |
|---|
| 146 | except IOError: |
|---|
| 147 | self.logger("ERROR: Can't connect to XML-RPC server: %s" \ |
|---|
| 148 | % XML_RPC_SERVER) |
|---|
| 149 | |
|---|
| 150 | def get_pkg_cache_file(self): |
|---|
| 151 | """ |
|---|
| 152 | Returns filename of pkg cache |
|---|
| 153 | """ |
|---|
| 154 | return os.path.abspath('%s/pkg_list.pkl' % self.yolk_dir) |
|---|
| 155 | |
|---|
| 156 | def query_versions_pypi(self, package_name): |
|---|
| 157 | """Fetch list of available versions for a package from The CheeseShop""" |
|---|
| 158 | if not package_name in self.pkg_list: |
|---|
| 159 | self.logger.debug("Package %s not in cache, querying PyPI..." \ |
|---|
| 160 | % package_name) |
|---|
| 161 | self.fetch_pkg_list() |
|---|
| 162 | #I have to set version=[] for edge cases like "Magic file extensions" |
|---|
| 163 | #but I'm not sure why this happens. It's included with Python or |
|---|
| 164 | #because it has a space in it's name? |
|---|
| 165 | versions = [] |
|---|
| 166 | for pypi_pkg in self.pkg_list: |
|---|
| 167 | if pypi_pkg.lower() == package_name.lower(): |
|---|
| 168 | if self.debug: |
|---|
| 169 | self.logger.debug("DEBUG: %s" % package_name) |
|---|
| 170 | versions = self.package_releases(pypi_pkg) |
|---|
| 171 | package_name = pypi_pkg |
|---|
| 172 | break |
|---|
| 173 | return (package_name, versions) |
|---|
| 174 | |
|---|
| 175 | def query_cached_package_list(self): |
|---|
| 176 | """Return list of pickled package names from PYPI""" |
|---|
| 177 | if self.debug: |
|---|
| 178 | self.logger.debug("DEBUG: reading pickled cache file") |
|---|
| 179 | return cPickle.load(open(self.pkg_cache_file, "r")) |
|---|
| 180 | |
|---|
| 181 | def fetch_pkg_list(self): |
|---|
| 182 | """Fetch and cache master list of package names from PYPI""" |
|---|
| 183 | self.logger.debug("DEBUG: Fetching package name list from PyPI") |
|---|
| 184 | package_list = self.list_packages() |
|---|
| 185 | cPickle.dump(package_list, open(self.pkg_cache_file, "w")) |
|---|
| 186 | self.pkg_list = package_list |
|---|
| 187 | |
|---|
| 188 | def search(self, spec, operator): |
|---|
| 189 | '''Query PYPI via XMLRPC interface using search spec''' |
|---|
| 190 | return self.xmlrpc.search(spec, operator.lower()) |
|---|
| 191 | |
|---|
| 192 | def changelog(self, hours): |
|---|
| 193 | '''Query PYPI via XMLRPC interface using search spec''' |
|---|
| 194 | return self.xmlrpc.changelog(get_seconds(hours)) |
|---|
| 195 | |
|---|
| 196 | def updated_releases(self, hours): |
|---|
| 197 | '''Query PYPI via XMLRPC interface using search spec''' |
|---|
| 198 | return self.xmlrpc.updated_releases(get_seconds(hours)) |
|---|
| 199 | |
|---|
| 200 | def list_packages(self): |
|---|
| 201 | """Query PYPI via XMLRPC interface for a a list of all package names""" |
|---|
| 202 | return self.xmlrpc.list_packages() |
|---|
| 203 | |
|---|
| 204 | def release_urls(self, package_name, version): |
|---|
| 205 | """Query PYPI via XMLRPC interface for a pkg's available versions""" |
|---|
| 206 | |
|---|
| 207 | return self.xmlrpc.release_urls(package_name, version) |
|---|
| 208 | |
|---|
| 209 | def release_data(self, package_name, version): |
|---|
| 210 | """Query PYPI via XMLRPC interface for a pkg's metadata""" |
|---|
| 211 | try: |
|---|
| 212 | return self.xmlrpc.release_data(package_name, version) |
|---|
| 213 | except xmlrpclib.Fault: |
|---|
| 214 | #XXX Raises xmlrpclib.Fault if you give non-existant version |
|---|
| 215 | #Could this be server bug? |
|---|
| 216 | return |
|---|
| 217 | |
|---|
| 218 | def package_releases(self, package_name): |
|---|
| 219 | """Query PYPI via XMLRPC interface for a pkg's available versions""" |
|---|
| 220 | if self.debug: |
|---|
| 221 | self.logger.debug("DEBUG: querying PyPI for versions of " \ |
|---|
| 222 | + package_name) |
|---|
| 223 | return self.xmlrpc.package_releases(package_name) |
|---|
| 224 | |
|---|
| 225 | def get_download_urls(self, package_name, version="", pkg_type="all"): |
|---|
| 226 | """Query PyPI for pkg download URI for a packge""" |
|---|
| 227 | |
|---|
| 228 | if version: |
|---|
| 229 | versions = [version] |
|---|
| 230 | else: |
|---|
| 231 | |
|---|
| 232 | #If they don't specify version, show em all. |
|---|
| 233 | |
|---|
| 234 | (package_name, versions) = self.query_versions_pypi(package_name) |
|---|
| 235 | |
|---|
| 236 | all_urls = [] |
|---|
| 237 | for ver in versions: |
|---|
| 238 | metadata = self.release_data(package_name, ver) |
|---|
| 239 | for urls in self.release_urls(package_name, ver): |
|---|
| 240 | if pkg_type == "source" and urls['packagetype'] == "sdist": |
|---|
| 241 | all_urls.append(urls['url']) |
|---|
| 242 | elif pkg_type == "egg" and \ |
|---|
| 243 | urls['packagetype'].startswith("bdist"): |
|---|
| 244 | all_urls.append(urls['url']) |
|---|
| 245 | elif pkg_type == "all": |
|---|
| 246 | #All |
|---|
| 247 | all_urls.append(urls['url']) |
|---|
| 248 | |
|---|
| 249 | #Try the package's metadata directly in case there's nothing |
|---|
| 250 | #returned by XML-RPC's release_urls() |
|---|
| 251 | if metadata and metadata.has_key('download_url') and \ |
|---|
| 252 | metadata['download_url'] != "UNKNOWN" and \ |
|---|
| 253 | metadata['download_url'] != None: |
|---|
| 254 | if metadata['download_url'] not in all_urls: |
|---|
| 255 | if pkg_type != "all": |
|---|
| 256 | url = filter_url(pkg_type, metadata['download_url']) |
|---|
| 257 | if url: |
|---|
| 258 | all_urls.append(url) |
|---|
| 259 | return all_urls |
|---|
| 260 | |
|---|
| 261 | def filter_url(pkg_type, url): |
|---|
| 262 | """ |
|---|
| 263 | Returns URL of specified file type |
|---|
| 264 | 'source', 'egg', or 'all' |
|---|
| 265 | """ |
|---|
| 266 | bad_stuff = ["?modtime", "#md5="] |
|---|
| 267 | for junk in bad_stuff: |
|---|
| 268 | if junk in url: |
|---|
| 269 | url = url.split(junk)[0] |
|---|
| 270 | break |
|---|
| 271 | |
|---|
| 272 | #pkg_spec==dev (svn) |
|---|
| 273 | if url.endswith("-dev"): |
|---|
| 274 | url = url.split("#egg=")[0] |
|---|
| 275 | |
|---|
| 276 | if pkg_type == "all": |
|---|
| 277 | return url |
|---|
| 278 | |
|---|
| 279 | elif pkg_type == "source": |
|---|
| 280 | valid_source_types = [".tgz", ".tar.gz", ".zip", ".tbz2", ".tar.bz2"] |
|---|
| 281 | for extension in valid_source_types: |
|---|
| 282 | if url.lower().endswith(extension): |
|---|
| 283 | return url |
|---|
| 284 | |
|---|
| 285 | elif pkg_type == "egg": |
|---|
| 286 | if url.lower().endswith(".egg"): |
|---|
| 287 | return url |
|---|
| 288 | |
|---|
| 289 | def get_seconds(hours): |
|---|
| 290 | """ |
|---|
| 291 | Get number of seconds since epoch from now minus `hours` |
|---|
| 292 | |
|---|
| 293 | @param hours: Number of `hours` back in time we are checking |
|---|
| 294 | @type hours: int |
|---|
| 295 | |
|---|
| 296 | Return integer for number of seconds for now minus hours |
|---|
| 297 | |
|---|
| 298 | """ |
|---|
| 299 | return int(time.time() - (60 * 60) * hours) |
|---|
| 300 | |
|---|