root/trunk/yolk/pypi.py

Revision 191, 10.0 KB (checked in by cakebread, 4 years ago)

Added xmlrpc debug info when XMLRPC_DEBUG env variable is set.

Line 
1
2"""
3
4pypi.py
5=======
6
7Desc: Library for getting information about Python packages by querying
8      The CheeseShop (PYPI a.k.a. Python Package Index).
9
10
11Author: Rob Cakebread <gentoodev@gmail.com>
12
13License  : GNU General Public License Version 2
14
15"""
16
17__docformat__ = 'restructuredtext'
18
19import re
20import xmlrpclib
21import cPickle
22import os
23import time
24import logging
25import urllib2
26
27from yolk.utils import get_yolk_dir
28
29
30XML_RPC_SERVER = 'http://pypi.python.org/pypi'
31#XML_RPC_SERVER = 'http://download.zope.org/ppix/'
32#XML_RPC_SERVER = 'http://cheeseshop.python.org/simple'
33
34
35class ProxyTransport(xmlrpclib.Transport):
36    """
37    Provides an XMl-RPC transport routing via a http proxy.
38   
39    This is done by using urllib2, which in turn uses the environment
40    varable http_proxy and whatever else it is built to use (e.g. the
41    windows    registry).
42   
43    NOTE: the environment variable http_proxy should be set correctly.
44    See check_proxy_setting() below.
45   
46    Written from scratch but inspired by xmlrpc_urllib_transport.py
47    file from http://starship.python.net/crew/jjkunce/ by jjk.
48   
49    A. Ellerton 2006-07-06
50    """
51
52    def request(self, host, handler, request_body, verbose):
53        '''Send xml-rpc request using proxy'''
54        #We get a traceback if we don't have this attribute:
55        self.verbose = verbose
56        url = 'http://' + host + handler
57        request = urllib2.Request(url)
58        request.add_data(request_body)
59        # Note: 'Host' and 'Content-Length' are added automatically
60        request.add_header('User-Agent', self.user_agent)
61        request.add_header('Content-Type', 'text/xml')
62        proxy_handler = urllib2.ProxyHandler()
63        opener = urllib2.build_opener(proxy_handler)
64        fhandle = opener.open(request)
65        return(self.parse_response(fhandle))
66
67
68def check_proxy_setting():
69    """
70    If the environmental variable 'HTTP_PROXY' is set, it will most likely be
71    in one of these forms:
72   
73          proxyhost:8080
74          http://proxyhost:8080
75   
76    urlllib2 requires the proxy URL to start with 'http://'
77    This routine does that, and returns the transport for xmlrpc.
78    """
79    try:
80        http_proxy = os.environ['HTTP_PROXY']
81    except KeyError:
82        return
83   
84    if not http_proxy.startswith('http://'):
85        match = re.match('(http://)?([-_\.A-Za-z]+):(\d+)', http_proxy)
86        #if not match:
87        #    raise Exception('Proxy format not recognised: [%s]' % http_proxy)
88        os.environ['HTTP_PROXY'] = 'http://%s:%s' % (match.group(2),
89                match.group(3))
90    return
91
92
93class CheeseShop:
94
95    """Interface to Python Package Index"""
96
97    def __init__(self, debug=False, no_cache=False, yolk_dir=None):
98        self.debug = debug
99        self.no_cache = no_cache
100        if yolk_dir:
101            self.yolk_dir = yolk_dir
102        else:
103            self.yolk_dir = get_yolk_dir()
104        self.xmlrpc = self.get_xmlrpc_server()
105        self.pkg_cache_file = self.get_pkg_cache_file()
106        self.last_sync_file = self.get_last_sync_file()
107        self.pkg_list = None
108        self.logger = logging.getLogger("yolk")
109        self.get_cache()
110
111    def get_cache(self):
112        """
113        Get a package name list from disk cache or PyPI
114        """
115        #This is used by external programs that import `CheeseShop` and don't
116        #want a cache file written to ~/.pypi and query PyPI every time.
117        if self.no_cache:
118            self.pkg_list = self.list_packages()
119            return
120
121        if not os.path.exists(self.yolk_dir):
122            os.mkdir(self.yolk_dir)
123        if os.path.exists(self.pkg_cache_file):
124            self.pkg_list = self.query_cached_package_list()
125        else:
126            self.logger.debug("DEBUG: Fetching package list cache from PyPi...")
127            self.fetch_pkg_list()
128
129    def get_last_sync_file(self):
130        """
131        Get the last time in seconds since The Epoc since the last pkg list sync
132        """
133        return os.path.abspath(self.yolk_dir + "/last_sync")
134
135    def get_xmlrpc_server(self):
136        """
137        Returns PyPI's XML-RPC server instance
138        """
139        check_proxy_setting()
140        if os.environ.has_key('XMLRPC_DEBUG'):
141            debug = 1
142        else:
143            debug = 0
144        try:
145            return xmlrpclib.Server(XML_RPC_SERVER, transport=ProxyTransport(), verbose=debug)
146        except IOError:
147            self.logger("ERROR: Can't connect to XML-RPC server: %s" \
148                    % XML_RPC_SERVER)
149
150    def get_pkg_cache_file(self):
151        """
152        Returns filename of pkg cache
153        """
154        return os.path.abspath('%s/pkg_list.pkl' % self.yolk_dir)
155
156    def query_versions_pypi(self, package_name):
157        """Fetch list of available versions for a package from The CheeseShop"""
158        if not package_name in self.pkg_list:
159            self.logger.debug("Package %s not in cache, querying PyPI..." \
160                    % package_name)
161            self.fetch_pkg_list()
162        #I have to set version=[] for edge cases like "Magic file extensions"
163        #but I'm not sure why this happens. It's included with Python or
164        #because it has a space in it's name?
165        versions = []
166        for pypi_pkg in self.pkg_list:
167            if pypi_pkg.lower() == package_name.lower():
168                if self.debug:
169                    self.logger.debug("DEBUG: %s" % package_name)
170                versions = self.package_releases(pypi_pkg)
171                package_name = pypi_pkg
172                break
173        return (package_name, versions)
174
175    def query_cached_package_list(self):
176        """Return list of pickled package names from PYPI"""
177        if self.debug:
178            self.logger.debug("DEBUG: reading pickled cache file")
179        return cPickle.load(open(self.pkg_cache_file, "r"))
180
181    def fetch_pkg_list(self):
182        """Fetch and cache master list of package names from PYPI"""
183        self.logger.debug("DEBUG: Fetching package name list from PyPI")
184        package_list = self.list_packages()
185        cPickle.dump(package_list, open(self.pkg_cache_file, "w"))
186        self.pkg_list = package_list
187
188    def search(self, spec, operator):
189        '''Query PYPI via XMLRPC interface using search spec'''
190        return self.xmlrpc.search(spec, operator.lower())
191   
192    def changelog(self, hours):
193        '''Query PYPI via XMLRPC interface using search spec'''
194        return self.xmlrpc.changelog(get_seconds(hours))
195
196    def updated_releases(self, hours):
197        '''Query PYPI via XMLRPC interface using search spec'''
198        return self.xmlrpc.updated_releases(get_seconds(hours))
199
200    def list_packages(self):
201        """Query PYPI via XMLRPC interface for a a list of all package names"""
202        return self.xmlrpc.list_packages()
203
204    def release_urls(self, package_name, version):
205        """Query PYPI via XMLRPC interface for a pkg's available versions"""
206
207        return self.xmlrpc.release_urls(package_name, version)
208
209    def release_data(self, package_name, version):
210        """Query PYPI via XMLRPC interface for a pkg's metadata"""
211        try:
212            return self.xmlrpc.release_data(package_name, version)
213        except xmlrpclib.Fault:
214            #XXX Raises xmlrpclib.Fault if you give non-existant version
215            #Could this be server bug?
216            return
217
218    def package_releases(self, package_name):
219        """Query PYPI via XMLRPC interface for a pkg's available versions"""
220        if self.debug:
221            self.logger.debug("DEBUG: querying PyPI for versions of " \
222                    + package_name)
223        return self.xmlrpc.package_releases(package_name)
224
225    def get_download_urls(self, package_name, version="", pkg_type="all"):
226        """Query PyPI for pkg download URI for a packge"""
227
228        if version:
229            versions = [version]
230        else:
231
232            #If they don't specify version, show em all.
233
234            (package_name, versions) = self.query_versions_pypi(package_name)
235
236        all_urls = []
237        for ver in versions:
238            metadata = self.release_data(package_name, ver)
239            for urls in self.release_urls(package_name, ver):
240                if pkg_type == "source" and urls['packagetype'] == "sdist":
241                    all_urls.append(urls['url'])
242                elif pkg_type == "egg" and \
243                        urls['packagetype'].startswith("bdist"):
244                    all_urls.append(urls['url'])
245                elif pkg_type == "all":
246                    #All
247                    all_urls.append(urls['url'])
248
249            #Try the package's metadata directly in case there's nothing
250            #returned by XML-RPC's release_urls()
251            if metadata and metadata.has_key('download_url') and \
252                        metadata['download_url'] != "UNKNOWN" and \
253                        metadata['download_url'] != None:
254                if metadata['download_url'] not in all_urls:
255                    if pkg_type != "all":
256                        url = filter_url(pkg_type, metadata['download_url'])
257                        if url:
258                            all_urls.append(url)
259        return all_urls
260       
261def filter_url(pkg_type, url):
262    """
263    Returns URL of specified file type
264    'source', 'egg', or 'all'
265    """
266    bad_stuff = ["?modtime", "#md5="]
267    for junk in bad_stuff:
268        if junk in url:
269            url = url.split(junk)[0]
270            break
271
272    #pkg_spec==dev (svn)
273    if url.endswith("-dev"):
274        url = url.split("#egg=")[0]
275
276    if pkg_type == "all":
277        return url
278
279    elif pkg_type == "source":
280        valid_source_types = [".tgz", ".tar.gz", ".zip", ".tbz2", ".tar.bz2"]
281        for extension in valid_source_types:
282            if url.lower().endswith(extension):
283                return url
284
285    elif pkg_type == "egg":
286        if url.lower().endswith(".egg"):
287            return url
288
289def get_seconds(hours):
290    """
291    Get number of seconds since epoch from now minus `hours`
292
293    @param hours: Number of `hours` back in time we are checking
294    @type hours: int
295
296    Return integer for number of seconds for now minus hours
297
298    """
299    return int(time.time() - (60 * 60) * hours)
300
Note: See TracBrowser for help on using the browser.