1
0
mirror of https://github.com/golang/go synced 2024-11-21 23:54:40 -07:00

codereview: speed upload

Cuts time to upload trivial 160-file CL by 5x,
from 250 seconds to 50 seconds.

R=r
CC=golang-dev
https://golang.org/cl/1991047
This commit is contained in:
Russ Cox 2010-08-26 18:56:29 -04:00
parent d3a2dcf5a8
commit f7d87f355c

View File

@ -6,7 +6,7 @@
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
@ -237,13 +237,11 @@ class CL(object):
("subject", self.Subject()), ("subject", self.Subject()),
] ]
# NOTE(rsc): This duplicates too much of RealMain,
# but RealMain doesn't have the most reusable interface.
if self.name != "new": if self.name != "new":
form_fields.append(("issue", self.name)) form_fields.append(("issue", self.name))
vcs = None vcs = None
if self.files: if self.files:
vcs = MercurialVCS(upload_options, repo.root) vcs = MercurialVCS(upload_options, ui, repo)
data = vcs.GenerateDiff(self.files) data = vcs.GenerateDiff(self.files)
files = vcs.GetBaseFiles(data) files = vcs.GetBaseFiles(data)
if len(data) > MAX_UPLOAD_SIZE: if len(data) > MAX_UPLOAD_SIZE:
@ -641,7 +639,7 @@ def getremote(ui, repo, opts):
source = hg.parseurl(ui.expandpath("default"), None)[0] source = hg.parseurl(ui.expandpath("default"), None)[0]
try: try:
remoteui = hg.remoteui # hg 1.6 remoteui = hg.remoteui # hg 1.6
except: except:
remoteui = cmdutil.remoteui remoteui = cmdutil.remoteui
other = hg.repository(remoteui(repo, opts), source) other = hg.repository(remoteui(repo, opts), source)
if proxy is not None: if proxy is not None:
@ -712,7 +710,7 @@ original_match = None
def ReplacementForCmdutilMatch(repo, pats=[], opts={}, globbed=False, default='relpath'): def ReplacementForCmdutilMatch(repo, pats=[], opts={}, globbed=False, default='relpath'):
taken = [] taken = []
files = [] files = []
pats = pats or [] pats = pats or []
for p in pats: for p in pats:
if p.startswith('@'): if p.startswith('@'):
taken.append(p) taken.append(p)
@ -722,7 +720,7 @@ def ReplacementForCmdutilMatch(repo, pats=[], opts={}, globbed=False, default='r
cl, err = LoadCL(repo.ui, repo, clname, web=False) cl, err = LoadCL(repo.ui, repo, clname, web=False)
if err != '': if err != '':
raise util.Abort("loading CL " + clname + ": " + err) raise util.Abort("loading CL " + clname + ": " + err)
if cl.files == None: if not cl.files:
raise util.Abort("no files in CL " + clname) raise util.Abort("no files in CL " + clname)
files = Add(files, cl.files) files = Add(files, cl.files)
pats = Sub(pats, taken) + ['path:'+f for f in files] pats = Sub(pats, taken) + ['path:'+f for f in files]
@ -1543,19 +1541,18 @@ def DownloadCL(ui, repo, clname):
return cl, diffdata, "" return cl, diffdata, ""
def MySend(request_path, payload=None, def MySend(request_path, payload=None,
content_type="application/octet-stream", content_type="application/octet-stream",
timeout=None, force_auth=True, timeout=None, force_auth=True,
**kwargs): **kwargs):
"""Run MySend1 maybe twice, because Rietveld is unreliable.""" """Run MySend1 maybe twice, because Rietveld is unreliable."""
try: try:
return MySend1(request_path, payload, content_type, timeout, force_auth, **kwargs) return MySend1(request_path, payload, content_type, timeout, force_auth, **kwargs)
except Exception, e: except Exception, e:
if type(e) == urllib2.HTTPError and e.code == 403: # forbidden, it happens if type(e) == urllib2.HTTPError and e.code == 403: # forbidden, it happens
raise raise
print >>sys.stderr, "Loading "+request_path+": "+ExceptionDetail()+"; trying again in 2 seconds." print >>sys.stderr, "Loading "+request_path+": "+ExceptionDetail()+"; trying again in 2 seconds."
time.sleep(2) time.sleep(2)
return MySend1(request_path, payload, content_type, timeout, force_auth, **kwargs) return MySend1(request_path, payload, content_type, timeout, force_auth, **kwargs)
# Like upload.py Send but only authenticates when the # Like upload.py Send but only authenticates when the
# redirect is to www.google.com/accounts. This keeps # redirect is to www.google.com/accounts. This keeps
@ -1799,7 +1796,7 @@ def RietveldSetup(ui, repo):
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
@ -2427,6 +2424,39 @@ class VersionControlSystem(object):
StatusUpdate(" --> %s" % response_body) StatusUpdate(" --> %s" % response_body)
sys.exit(1) sys.exit(1)
# Don't want to spawn too many threads, nor do we want to
# hit Rietveld too hard, or it will start serving 500 errors.
# When 8 works, it's no better than 4, and sometimes 8 is
# too many for Rietveld to handle.
MAX_PARALLEL_UPLOADS = 4
sema = threading.BoundedSemaphore(MAX_PARALLEL_UPLOADS)
upload_threads = []
finished_upload_threads = []
class UploadFileThread(threading.Thread):
def __init__(self, args):
threading.Thread.__init__(self)
self.args = args
def run(self):
UploadFile(*self.args)
finished_upload_threads.append(self)
sema.release()
def StartUploadFile(*args):
sema.acquire()
while len(finished_upload_threads) > 0:
t = finished_upload_threads.pop()
upload_threads.remove(t)
t.join()
t = UploadFileThread(args)
upload_threads.append(t)
t.start()
def WaitForUploads():
for t in upload_threads:
t.join()
patches = dict() patches = dict()
[patches.setdefault(v, k) for k, v in patch_list] [patches.setdefault(v, k) for k, v in patch_list]
for filename in patches.keys(): for filename in patches.keys():
@ -2437,9 +2467,10 @@ class VersionControlSystem(object):
file_id_str = file_id_str[file_id_str.rfind("_") + 1:] file_id_str = file_id_str[file_id_str.rfind("_") + 1:]
file_id = int(file_id_str) file_id = int(file_id_str)
if base_content != None: if base_content != None:
UploadFile(filename, file_id, base_content, is_binary, status, True) StartUploadFile(filename, file_id, base_content, is_binary, status, True)
if new_content != None: if new_content != None:
UploadFile(filename, file_id, new_content, is_binary, status, False) StartUploadFile(filename, file_id, new_content, is_binary, status, False)
WaitForUploads()
def IsImage(self, filename): def IsImage(self, filename):
"""Returns true if the filename has an image extension.""" """Returns true if the filename has an image extension."""
@ -2458,14 +2489,25 @@ class VersionControlSystem(object):
return False return False
return not mimetype.startswith("text/") return not mimetype.startswith("text/")
class FakeMercurialUI(object):
def __init__(self):
self.quiet = True
self.output = ''
def write(self, s):
self.output += s
use_hg_shell = False # set to True to shell out to hg always; slower
class MercurialVCS(VersionControlSystem): class MercurialVCS(VersionControlSystem):
"""Implementation of the VersionControlSystem interface for Mercurial.""" """Implementation of the VersionControlSystem interface for Mercurial."""
def __init__(self, options, repo_dir): def __init__(self, options, ui, repo):
super(MercurialVCS, self).__init__(options) super(MercurialVCS, self).__init__(options)
self.ui = ui
self.repo = repo
# Absolute path to repository (we can be in a subdir) # Absolute path to repository (we can be in a subdir)
self.repo_dir = os.path.normpath(repo_dir) self.repo_dir = os.path.normpath(repo.root)
# Compute the subdir # Compute the subdir
cwd = os.path.normpath(os.getcwd()) cwd = os.path.normpath(os.getcwd())
assert cwd.startswith(self.repo_dir) assert cwd.startswith(self.repo_dir)
@ -2532,7 +2574,14 @@ class MercurialVCS(VersionControlSystem):
is_binary = False is_binary = False
oldrelpath = relpath = self._GetRelPath(filename) oldrelpath = relpath = self._GetRelPath(filename)
# "hg status -C" returns two lines for moved/copied files, one otherwise # "hg status -C" returns two lines for moved/copied files, one otherwise
out = RunShell(["hg", "status", "-C", "--rev", self.base_rev, relpath]) if use_hg_shell:
out = RunShell(["hg", "status", "-C", "--rev", self.base_rev, relpath])
else:
fui = FakeMercurialUI()
ret = commands.status(fui, self.repo, *[relpath], **{'rev': [self.base_rev], 'copies': True})
if ret:
raise util.Abort(ret)
out = fui.output
out = out.splitlines() out = out.splitlines()
# HACK: strip error message about missing file/directory if it isn't in # HACK: strip error message about missing file/directory if it isn't in
# the working copy # the working copy
@ -2547,13 +2596,15 @@ class MercurialVCS(VersionControlSystem):
else: else:
base_rev = self.base_rev base_rev = self.base_rev
if status != "A": if status != "A":
base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath], if use_hg_shell:
silent_ok=True) base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath], silent_ok=True)
else:
base_content = str(self.repo[base_rev][filename].data())
is_binary = "\0" in base_content # Mercurial's heuristic is_binary = "\0" in base_content # Mercurial's heuristic
if status != "R": if status != "R":
new_content = open(relpath, "rb").read() new_content = open(relpath, "rb").read()
is_binary = is_binary or "\0" in new_content is_binary = is_binary or "\0" in new_content
if is_binary and base_content: if is_binary and base_content and use_hg_shell:
# Fetch again without converting newlines # Fetch again without converting newlines
base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath], base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath],
silent_ok=True, universal_newlines=False) silent_ok=True, universal_newlines=False)