From fc5a835b2e628c4167ac8e1fcdc25839fdade136 Mon Sep 17 00:00:00 2001 From: Andrew Gerrand Date: Tue, 27 Jul 2010 15:02:44 +1000 Subject: [PATCH] dashboard: more aggressive caching for project/package pages Plus automatic package<->project association script. (This is just a helper script for now. I intend to fully automate the associations further down the track.) R=rsc CC=golang-dev https://golang.org/cl/1715054 --- misc/dashboard/godashboard/package.py | 170 ++++++++++++++++++-------- 1 file changed, 121 insertions(+), 49 deletions(-) diff --git a/misc/dashboard/godashboard/package.py b/misc/dashboard/godashboard/package.py index 6c3bd99956..27c51046e0 100644 --- a/misc/dashboard/godashboard/package.py +++ b/misc/dashboard/godashboard/package.py @@ -17,6 +17,7 @@ from google.appengine.ext.webapp import template from google.appengine.ext.webapp.util import run_wsgi_app from google.appengine.api import users from google.appengine.api import mail +from google.appengine.api import urlfetch import binascii import datetime import hashlib @@ -50,36 +51,91 @@ re_bitbucket = re.compile(r'^bitbucket\.org/[a-z0-9A-Z_.\-]+/[a-z0-9A-Z_.\-]+$') re_googlecode = re.compile(r'^[a-z0-9\-]+\.googlecode\.com/(svn|hg)$') re_github = re.compile(r'^github\.com/[a-z0-9A-Z_.\-]+/[a-z0-9A-Z_.\-]+$') +def vc_to_web(path): + if re_bitbucket.match(path): + check_url = 'http://' + path + '/?cmd=heads' + web = 'http://' + path + '/' + elif re_github.match(path): + # github doesn't let you fetch the .git directory anymore. + # fetch .git/info/refs instead, like git clone would. + check_url = 'http://'+path+'.git/info/refs' + web = 'http://' + path + elif re_googlecode.match(path): + check_url = 'http://'+path + web = 'http://code.google.com/p/' + path[:path.index('.')] + else: + return False, False + return web, check_url + +re_bitbucket_web = re.compile(r'bitbucket\.org/([a-z0-9A-Z_.\-]+)/([a-z0-9A-Z_.\-]+)') +re_googlecode_web = re.compile(r'code.google.com/p/([a-z0-9\-]+)') +re_github_web = re.compile(r'github\.com/([a-z0-9A-Z_.\-]+)/([a-z0-9A-Z_.\-]+)') +re_striphttp = re.compile(r'http://(www\.)?') + +def web_to_vc(url): + url = re_striphttp.sub('', url) + m = re_bitbucket_web.match(url) + if m: + return 'bitbucket.org/'+m.group(1)+'/'+m.group(2) + m = re_github_web.match(url) + if m: + return 'github.com/'+m.group(1)+'/'+m.group(2) + m = re_googlecode_web.match(url) + if m: + path = m.group(1)+'.googlecode.com/' + # perform http request to path/hg to check if they're using mercurial + vcs = 'svn' + try: + response = urlfetch.fetch('http://'+path+'hg', deadline=1) + if response.status_code == 200: + vcs = 'hg' + except: pass + return path + vcs + return False + MaxPathLength = 100 +CacheTimeout = 3600 class PackagePage(webapp.RequestHandler): def get(self): if self.request.get('fmt') == 'json': return self.json() - q = Package.all() - q.order('-last_install') - by_time = q.fetch(100) + html = memcache.get('view-package') + if not html: + q = Package.all() + q.order('-last_install') + by_time = q.fetch(100) - q = Package.all() - q.order('-count') - by_count = q.fetch(100) + q = Package.all() + q.order('-count') + by_count = q.fetch(100) - self.response.headers['Content-Type'] = 'text/html; charset=utf-8' - path = os.path.join(os.path.dirname(__file__), 'package.html') - self.response.out.write(template.render(path, {"by_time": by_time, "by_count": by_count})) + self.response.headers['Content-Type'] = 'text/html; charset=utf-8' + path = os.path.join(os.path.dirname(__file__), 'package.html') + html = template.render( + path, + {"by_time": by_time, "by_count": by_count} + ) + memcache.set('view-package', html, time=CacheTimeout) + + self.response.out.write(html) def json(self): - self.response.set_status(200) - self.response.headers['Content-Type'] = 'text/plain; charset=utf-8' - q = Package.all() - s = '{"packages": [' - sep = '' - for r in q.fetch(1000): - s += '%s\n\t{"path": "%s", "last_install": "%s", "count": "%s"}' % (sep, r.path, r.last_install, r.count) - sep = ',' - s += '\n]}\n' - self.response.out.write(s) + json = memcache.get('view-package-json') + if not json: + self.response.set_status(200) + self.response.headers['Content-Type'] = 'text/plain; charset=utf-8' + q = Package.all() + s = '{"packages": [' + sep = '' + for r in q.fetch(1000): + s += '%s\n\t{"path": "%s", "last_install": "%s", "count": "%s"}' % (sep, r.path, r.last_install, r.count) + sep = ',' + s += '\n]}\n' + json = s + memcache.set('view-package-json', json, time=CacheTimeoout) + self.response.out.write(json) def can_get_url(self, url): try: @@ -104,18 +160,8 @@ class PackagePage(webapp.RequestHandler): p = Package.get_by_key_name(key) if p is None: # not in datastore - verify URL before creating - if re_bitbucket.match(path): - check_url = 'http://' + path + '/?cmd=heads' - web = 'http://' + path + '/' - elif re_github.match(path): - # github doesn't let you fetch the .git directory anymore. - # fetch .git/info/refs instead, like git clone would. - check_url = 'http://'+path+'.git/info/refs' - web = 'http://' + path - elif re_googlecode.match(path): - check_url = 'http://'+path - web = 'http://code.google.com/p/' + path[:path.index('.')] - else: + web, check_url = vc_to_web(path) + if not web: logging.error('unrecognized path: %s', path) return False if not self.can_get_url(check_url): @@ -150,9 +196,27 @@ class ProjectPage(webapp.RequestHandler): self.redirect(users.create_logout_url("/project")) elif self.request.path == "/project/edit" and admin: self.edit() + elif self.request.path == "/project/assoc" and admin: + self.assoc() else: self.list() + def assoc(self): + projects = Project.all() + for p in projects: + if p.package: + continue + path = web_to_vc(p.web_url) + if not path: + continue + pkg = Package.get_by_key_name("pkg-"+path) + if not pkg: + self.response.out.write('no: %s %s
' % (p.web_url, path)) + continue + p.package = pkg + p.put() + self.response.out.write('yes: %s %s
' % (p.web_url, path)) + def post(self): if self.request.path == "/project/edit": self.edit(True) @@ -177,30 +241,37 @@ class ProjectPage(webapp.RequestHandler): self.list({"submitMsg": "Your project has been submitted."}) - def list(self, data={}): - projects = Project.all().order('category').order('name') - + def list(self, additional_data={}): + data = memcache.get('view-project-data') admin = users.is_current_user_admin() - if not admin: - projects = projects.filter('approved =', True) + if admin or not data: + projects = Project.all().order('category').order('name') + if not admin: + projects = projects.filter('approved =', True) + projects = list(projects) - projects = list(projects) + tags = sets.Set() + for p in projects: + for t in p.tags: + tags.add(t) - tags = sets.Set() - for p in projects: - for t in p.tags: - tags.add(t) + tag = self.request.get('tag', None) + if tag: + projects = filter(lambda x: tag in x.tags, projects) - tag = self.request.get("tag", None) - if tag: - projects = filter(lambda x: tag in x.tags, projects) + data = {} + data['tag'] = tag + data['tags'] = tags + data['projects'] = projects + data['admin']= admin + if not admin: + memcache.set('view-project-data', data, time=CacheTimeout) + + for k, v in additional_data.items(): + data[k] = v self.response.headers['Content-Type'] = 'text/html; charset=utf-8' path = os.path.join(os.path.dirname(__file__), 'project.html') - data["tag"] = tag - data["tags"] = tags - data["projects"] = projects - data["admin"] = admin self.response.out.write(template.render(path, data)) def edit(self, save=False): @@ -228,7 +299,8 @@ class ProjectPage(webapp.RequestHandler): p.approved = self.request.get("approved") == "1" p.tags = filter(lambda x: x, self.request.get("tags", "").split(",")) p.put() - self.redirect("/project") + memcache.delete('view-project-data') + self.redirect('/project') return # get all project categories and tags